File size: 2,496 Bytes
7c603fe
 
ea6fb88
7c603fe
ea6fb88
7c603fe
 
 
 
ea6fb88
7c603fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc8831f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c603fe
 
 
 
 
 
a7df9f5
7c603fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import requests
import torch
# from googletrans import Translator
from transformers import pipeline
from deep_translator import GoogleTranslator
import time
import os 
VECTOR_API_URL = os.getenv('API_URL')

# translator = Translator()


sentiment_model = pipeline(
    'sentiment-analysis',
    model='cardiffnlp/twitter-xlm-roberta-base-sentiment',
    tokenizer='cardiffnlp/twitter-xlm-roberta-base-sentiment',
    device=0 if torch.cuda.is_available() else -1
)



classifier = pipeline(
    "zero-shot-classification",
    model="valhalla/distilbart-mnli-12-6",
    device=0 if torch.cuda.is_available() else -1
)


def classify_comment(text):
    if not text:
        print("Received empty text for classification.")
        return "non-interrogative"
    print(f"Classifying comment: {text}")
    try:
        translated_text = GoogleTranslator(source='auto', target="en").translate(text)
    except Exception as e:
        print(f"Translation failed: {e}")
        return "non-interrogative"
    if not translated_text:
        print("Translation returned empty text.")
        return "non-interrogative"
    print(f"Translated text: {translated_text}")
    try:
        result = classifier(translated_text, ["interrogative", "non-interrogative"], clean_up_tokenization_spaces=True)
    except Exception as e:
        print(f"Classification failed: {e}")
        return "non-interrogative"
    top_class = result['labels'][0]
    return top_class


def retrieve_from_vdb(query):
    print(f"Отправка запроса к FastAPI сервису: {query}")
    response = requests.post(f"{VECTOR_API_URL}/search/", json={"query": query})
    if response.status_code == 200:
        results = response.json().get("results", [])
        print(f"Получено {len(results)} результатов.")
        return results
    else:
        print(f"Ошибка при поиске: {response.text}")
        return []


def analyze_sentiment(comments):
    print("Начинаем анализ настроений.")
    results = []
    for i in range(0, len(comments), 50):
        batch = comments[i:i + 50]
        print(f"Анализируем батч с {i} по {i + len(batch)} комментарий.")
        batch_results = sentiment_model(batch)
        results.extend(batch_results)
        time.sleep(1)  # Задержка для предотвращения перегрузки
    print("Анализ настроений завершен.")
    return results