File size: 2,719 Bytes
4a36ad2
7c603fe
 
3ad38de
7c603fe
ea6fb88
7c603fe
3ad38de
cf91668
7c603fe
3ad38de
fa3a00b
3ad38de
 
 
 
 
 
7c603fe
3ad38de
 
 
 
 
7c603fe
 
bc8831f
3ad38de
bc8831f
3ad38de
bc8831f
 
3ad38de
bc8831f
3ad38de
bc8831f
 
3ad38de
bc8831f
91c5333
bc8831f
 
3ad38de
bc8831f
3ad38de
bc8831f
91c5333
7c603fe
3ad38de
7c603fe
 
3ad38de
 
 
 
 
 
 
 
 
 
 
7c603fe
3ad38de
7c603fe
 
 
3ad38de
 
 
 
7c603fe
3ad38de
4a36ad2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

import requests
import torch
# from googletrans import Translator
from transformers import pipeline
from deep_translator import GoogleTranslator
import time
import os 
VECTOR_API_URL = os.getenv('API_URL')

# translator = Translator()

sentiment_model = pipeline(
    'sentiment-analysis',
    model='cardiffnlp/twitter-xlm-roberta-base-sentiment',
    tokenizer='cardiffnlp/twitter-xlm-roberta-base-sentiment',
    device=0 if torch.cuda.is_available() else -1
)

classifier = pipeline(
    "zero-shot-classification",
    model="valhalla/distilbart-mnli-12-6",
    device=0 if torch.cuda.is_available() else -1
)

def classify_comment(text):
    if not text:
        print("Received empty text for classification.")
        return "non-interrogative"
    print(f"Classifying comment: {text}")
    try:
        translated_text = GoogleTranslator(source='auto', target="en").translate(text)
        print(f"Translated text: {translated_text}")
    except Exception as e:
        print(f"Translation failed: {e}")
        return "non-interrogative"
    if not translated_text:
        print("Translation returned empty text.")
        return "non-interrogative"
    
    try:
        result = classifier(translated_text, ["interrogative", "non-interrogative"], clean_up_tokenization_spaces=True)
        print(f"Classification result: {result}")
    except Exception as e:
        print(f"Classification failed: {e}")
        return "non-interrogative"
    
    top_class = result['labels'][0]
    print(f"Top class: {top_class}")
    return top_class

def retrieve_from_vdb(query):
    print(f"Отправка запроса к FastAPI сервису: {query}")
    response = requests.post(f"{VECTOR_API_URL}/search/", json={"query": query})
    if response.status_code == 200:
        results = response.json().get("results", [])
        print(f"Получено {len(results)} результатов: {results}")
        return results
    else:
        print(f"Ошибка при поиске: {response.text}")
        return []

def analyze_sentiment(comments):
    print("Начинаем анализ настроений.")
    results = []
    for i in range(0, len(comments), 50):
        batch = comments[i:i + 50]
        print(f"Анализируем батч с {i} по {i + len(batch)} комментарий: {batch}")
        batch_results = sentiment_model(batch)
        print(f"Результаты батча: {batch_results}")
        results.extend(batch_results)
        time.sleep(1)  # Задержка для предотвращения перегрузки
    print("Анализ настроений завершен. Общие результаты: {results}")
    return results