Spaces:
Paused
Paused
File size: 2,719 Bytes
4a36ad2 7c603fe 3ad38de 7c603fe ea6fb88 7c603fe 3ad38de cf91668 7c603fe 3ad38de fa3a00b 3ad38de 7c603fe 3ad38de 7c603fe bc8831f 3ad38de bc8831f 3ad38de bc8831f 3ad38de bc8831f 3ad38de bc8831f 3ad38de bc8831f 91c5333 bc8831f 3ad38de bc8831f 3ad38de bc8831f 91c5333 7c603fe 3ad38de 7c603fe 3ad38de 7c603fe 3ad38de 7c603fe 3ad38de 7c603fe 3ad38de 4a36ad2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import requests
import torch
# from googletrans import Translator
from transformers import pipeline
from deep_translator import GoogleTranslator
import time
import os
VECTOR_API_URL = os.getenv('API_URL')
# translator = Translator()
sentiment_model = pipeline(
'sentiment-analysis',
model='cardiffnlp/twitter-xlm-roberta-base-sentiment',
tokenizer='cardiffnlp/twitter-xlm-roberta-base-sentiment',
device=0 if torch.cuda.is_available() else -1
)
classifier = pipeline(
"zero-shot-classification",
model="valhalla/distilbart-mnli-12-6",
device=0 if torch.cuda.is_available() else -1
)
def classify_comment(text):
if not text:
print("Received empty text for classification.")
return "non-interrogative"
print(f"Classifying comment: {text}")
try:
translated_text = GoogleTranslator(source='auto', target="en").translate(text)
print(f"Translated text: {translated_text}")
except Exception as e:
print(f"Translation failed: {e}")
return "non-interrogative"
if not translated_text:
print("Translation returned empty text.")
return "non-interrogative"
try:
result = classifier(translated_text, ["interrogative", "non-interrogative"], clean_up_tokenization_spaces=True)
print(f"Classification result: {result}")
except Exception as e:
print(f"Classification failed: {e}")
return "non-interrogative"
top_class = result['labels'][0]
print(f"Top class: {top_class}")
return top_class
def retrieve_from_vdb(query):
print(f"Отправка запроса к FastAPI сервису: {query}")
response = requests.post(f"{VECTOR_API_URL}/search/", json={"query": query})
if response.status_code == 200:
results = response.json().get("results", [])
print(f"Получено {len(results)} результатов: {results}")
return results
else:
print(f"Ошибка при поиске: {response.text}")
return []
def analyze_sentiment(comments):
print("Начинаем анализ настроений.")
results = []
for i in range(0, len(comments), 50):
batch = comments[i:i + 50]
print(f"Анализируем батч с {i} по {i + len(batch)} комментарий: {batch}")
batch_results = sentiment_model(batch)
print(f"Результаты батча: {batch_results}")
results.extend(batch_results)
time.sleep(1) # Задержка для предотвращения перегрузки
print("Анализ настроений завершен. Общие результаты: {results}")
return results
|