Spaces:

Uniaff
/

najiiioooo

Paused

App Files Files Community

Uniaff commited on Oct 10, 2024

Commit

7e822e7

verified ·

1 Parent(s): a7df9f5

Update func_ai.py

Browse files

Files changed (1) hide show

func_ai.py +19 -17

func_ai.py CHANGED Viewed

@@ -1,31 +1,26 @@
 import requests
 import torch
-# from googletrans import Translator
 from transformers import pipeline
 from deep_translator import GoogleTranslator
 import time
 import os
-VECTOR_API_URL = os.getenv('API_URL')
-# translator = Translator()
 sentiment_model = pipeline(
     'sentiment-analysis',
-    model='cardiffnlp/twitter-xlm-roberta-base-sentiment',
-    tokenizer='cardiffnlp/twitter-xlm-roberta-base-sentiment',
     device=0 if torch.cuda.is_available() else -1
 )
 classifier = pipeline(
     "zero-shot-classification",
     model="valhalla/distilbart-mnli-12-6",
     device=0 if torch.cuda.is_available() else -1
 )
 def classify_comment(text):
     if not text:
         print("Received empty text for classification.")
@@ -33,43 +28,50 @@ def classify_comment(text):
     print(f"Classifying comment: {text}")
     try:
         translated_text = GoogleTranslator(source='auto', target="en").translate(text)
     except Exception as e:
         print(f"Translation failed: {e}")
         return "non-interrogative"
     if not translated_text:
         print("Translation returned empty text.")
         return "non-interrogative"
-    print(f"Translated text: {translated_text}")
     try:
         result = classifier(translated_text, ["interrogative", "non-interrogative"], clean_up_tokenization_spaces=True)
     except Exception as e:
         print(f"Classification failed: {e}")
         return "non-interrogative"
     top_class = result['labels'][0]
     return top_class
 def retrieve_from_vdb(query):
     print(f"Отправка запроса к FastAPI сервису: {query}")
     response = requests.post(f"{VECTOR_API_URL}/search/", json={"query": query})
     if response.status_code == 200:
         results = response.json().get("results", [])
-        print(f"Получено {len(results)} результатов.")
         return results
     else:
         print(f"Ошибка при поиске: {response.text}")
         return []
 def analyze_sentiment(comments):
     print("Начинаем анализ настроений.")
     results = []
     for i in range(0, len(comments), 50):
         batch = comments[i:i + 50]
-        print(f"Анализируем батч с {i} по {i + len(batch)} комментарий.")
-        batch_results = sentiment_model(batch)
         results.extend(batch_results)
         time.sleep(1)  # Задержка для предотвращения перегрузки
-    print("Анализ настроений завершен.")
     return results

 import requests
 import torch
 from transformers import pipeline
 from deep_translator import GoogleTranslator
 import time
 import os
+VECTOR_API_URL = os.getenv('API_URL')
+# Replace the sentiment analysis model
 sentiment_model = pipeline(
     'sentiment-analysis',
+    model='distilbert-base-uncased-finetuned-sst-2-english',
+    tokenizer='distilbert-base-uncased-finetuned-sst-2-english',
     device=0 if torch.cuda.is_available() else -1
 )
 classifier = pipeline(
     "zero-shot-classification",
     model="valhalla/distilbart-mnli-12-6",
     device=0 if torch.cuda.is_available() else -1
 )
 def classify_comment(text):
     if not text:
         print("Received empty text for classification.")
     print(f"Classifying comment: {text}")
     try:
         translated_text = GoogleTranslator(source='auto', target="en").translate(text)
+        print(f"Translated text: {translated_text}")
     except Exception as e:
         print(f"Translation failed: {e}")
         return "non-interrogative"
     if not translated_text:
         print("Translation returned empty text.")
         return "non-interrogative"
     try:
         result = classifier(translated_text, ["interrogative", "non-interrogative"], clean_up_tokenization_spaces=True)
+        print(f"Classification result: {result}")
     except Exception as e:
         print(f"Classification failed: {e}")
         return "non-interrogative"
     top_class = result['labels'][0]
+    print(f"Top class: {top_class}")
     return top_class
 def retrieve_from_vdb(query):
     print(f"Отправка запроса к FastAPI сервису: {query}")
     response = requests.post(f"{VECTOR_API_URL}/search/", json={"query": query})
     if response.status_code == 200:
         results = response.json().get("results", [])
+        print(f"Получено {len(results)} результатов: {results}")
         return results
     else:
         print(f"Ошибка при поиске: {response.text}")
         return []
 def analyze_sentiment(comments):
     print("Начинаем анализ настроений.")
     results = []
     for i in range(0, len(comments), 50):
         batch = comments[i:i + 50]
+        print(f"Анализируем батч с {i} по {i + len(batch)} комментарий: {batch}")
+        # Translate comments into English before sentiment analysis
+        translated_batch = [GoogleTranslator(source='auto', target="en").translate(comment) for comment in batch]
+        print(f"Переведённый батч: {translated_batch}")
+        batch_results = sentiment_model(translated_batch)
+        print(f"Результаты батча: {batch_results}")
         results.extend(batch_results)
         time.sleep(1)  # Задержка для предотвращения перегрузки
+    print(f"Анализ настроений завершен. Общие результаты: {results}")
     return results