Spaces:
Paused
Paused
File size: 2,980 Bytes
7c603fe ea6fb88 7c603fe 7e822e7 7c603fe 7e822e7 7c603fe 7e822e7 7c603fe bc8831f 7e822e7 bc8831f 7e822e7 bc8831f 7e822e7 bc8831f 7e822e7 7c603fe 7e822e7 7c603fe a7df9f5 7c603fe 7e822e7 7c603fe 7e822e7 7c603fe 7e822e7 7c603fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import requests
import torch
from transformers import pipeline
from deep_translator import GoogleTranslator
import time
import os
VECTOR_API_URL = os.getenv('API_URL')
# Replace the sentiment analysis model
sentiment_model = pipeline(
'sentiment-analysis',
model='distilbert-base-uncased-finetuned-sst-2-english',
tokenizer='distilbert-base-uncased-finetuned-sst-2-english',
device=0 if torch.cuda.is_available() else -1
)
classifier = pipeline(
"zero-shot-classification",
model="valhalla/distilbart-mnli-12-6",
device=0 if torch.cuda.is_available() else -1
)
def classify_comment(text):
if not text:
print("Received empty text for classification.")
return "non-interrogative"
print(f"Classifying comment: {text}")
try:
translated_text = GoogleTranslator(source='auto', target="en").translate(text)
print(f"Translated text: {translated_text}")
except Exception as e:
print(f"Translation failed: {e}")
return "non-interrogative"
if not translated_text:
print("Translation returned empty text.")
return "non-interrogative"
try:
result = classifier(translated_text, ["interrogative", "non-interrogative"], clean_up_tokenization_spaces=True)
print(f"Classification result: {result}")
except Exception as e:
print(f"Classification failed: {e}")
return "non-interrogative"
top_class = result['labels'][0]
print(f"Top class: {top_class}")
return top_class
def retrieve_from_vdb(query):
print(f"Отправка запроса к FastAPI сервису: {query}")
response = requests.post(f"{VECTOR_API_URL}/search/", json={"query": query})
if response.status_code == 200:
results = response.json().get("results", [])
print(f"Получено {len(results)} результатов: {results}")
return results
else:
print(f"Ошибка при поиске: {response.text}")
return []
def analyze_sentiment(comments):
print("Начинаем анализ настроений.")
results = []
for i in range(0, len(comments), 50):
batch = comments[i:i + 50]
print(f"Анализируем батч с {i} по {i + len(batch)} комментарий: {batch}")
# Translate comments into English before sentiment analysis
translated_batch = [GoogleTranslator(source='auto', target="en").translate(comment) for comment in batch]
print(f"Переведённый батч: {translated_batch}")
batch_results = sentiment_model(translated_batch)
print(f"Результаты батча: {batch_results}")
results.extend(batch_results)
time.sleep(1) # Задержка для предотвращения перегрузки
print(f"Анализ настроений завершен. Общие результаты: {results}")
return results
|