Spaces:

Uniaff
/

najiiioooo

Paused

File size: 2,980 Bytes

7c603fe
 
 
ea6fb88
7c603fe
 
 
7e822e7
7c603fe
7e822e7
7c603fe
 
7e822e7
 
7c603fe
 
 
 
 
 
 
 
 
 
bc8831f
 
 
 
 
 
7e822e7
bc8831f
 
 
 
 
 
7e822e7
bc8831f
 
7e822e7
bc8831f
 
 
7e822e7
7c603fe
7e822e7
7c603fe
 
 
 
a7df9f5
7c603fe
 
7e822e7
7c603fe
 
 
 
 
 
 
 
 
 
7e822e7
 
 
 
 
 
 
 
7c603fe
 
7e822e7
7c603fe

import requests
import torch
from transformers import pipeline
from deep_translator import GoogleTranslator
import time
import os 

VECTOR_API_URL = os.getenv('API_URL')

# Replace the sentiment analysis model
sentiment_model = pipeline(
    'sentiment-analysis',
    model='distilbert-base-uncased-finetuned-sst-2-english',
    tokenizer='distilbert-base-uncased-finetuned-sst-2-english',
    device=0 if torch.cuda.is_available() else -1
)

classifier = pipeline(
    "zero-shot-classification",
    model="valhalla/distilbart-mnli-12-6",
    device=0 if torch.cuda.is_available() else -1
)

def classify_comment(text):
    if not text:
        print("Received empty text for classification.")
        return "non-interrogative"
    print(f"Classifying comment: {text}")
    try:
        translated_text = GoogleTranslator(source='auto', target="en").translate(text)
        print(f"Translated text: {translated_text}")
    except Exception as e:
        print(f"Translation failed: {e}")
        return "non-interrogative"
    if not translated_text:
        print("Translation returned empty text.")
        return "non-interrogative"
    
    try:
        result = classifier(translated_text, ["interrogative", "non-interrogative"], clean_up_tokenization_spaces=True)
        print(f"Classification result: {result}")
    except Exception as e:
        print(f"Classification failed: {e}")
        return "non-interrogative"
    
    top_class = result['labels'][0]
    print(f"Top class: {top_class}")
    return top_class

def retrieve_from_vdb(query):
    print(f"Отправка запроса к FastAPI сервису: {query}")
    response = requests.post(f"{VECTOR_API_URL}/search/", json={"query": query})
    if response.status_code == 200:
        results = response.json().get("results", [])
        print(f"Получено {len(results)} результатов: {results}")
        return results
    else:
        print(f"Ошибка при поиске: {response.text}")
        return []

def analyze_sentiment(comments):
    print("Начинаем анализ настроений.")
    results = []
    for i in range(0, len(comments), 50):
        batch = comments[i:i + 50]
        print(f"Анализируем батч с {i} по {i + len(batch)} комментарий: {batch}")
        
        # Translate comments into English before sentiment analysis
        translated_batch = [GoogleTranslator(source='auto', target="en").translate(comment) for comment in batch]
        print(f"Переведённый батч: {translated_batch}")
        
        batch_results = sentiment_model(translated_batch)
        print(f"Результаты батча: {batch_results}")
        results.extend(batch_results)
        time.sleep(1)  # Задержка для предотвращения перегрузки
    print(f"Анализ настроений завершен. Общие результаты: {results}")
    return results