Uniaff commited on
Commit
7e822e7
·
verified ·
1 Parent(s): a7df9f5

Update func_ai.py

Browse files
Files changed (1) hide show
  1. func_ai.py +19 -17
func_ai.py CHANGED
@@ -1,31 +1,26 @@
1
  import requests
2
  import torch
3
- # from googletrans import Translator
4
  from transformers import pipeline
5
  from deep_translator import GoogleTranslator
6
  import time
7
  import os
8
- VECTOR_API_URL = os.getenv('API_URL')
9
-
10
- # translator = Translator()
11
 
 
12
 
 
13
  sentiment_model = pipeline(
14
  'sentiment-analysis',
15
- model='cardiffnlp/twitter-xlm-roberta-base-sentiment',
16
- tokenizer='cardiffnlp/twitter-xlm-roberta-base-sentiment',
17
  device=0 if torch.cuda.is_available() else -1
18
  )
19
 
20
-
21
-
22
  classifier = pipeline(
23
  "zero-shot-classification",
24
  model="valhalla/distilbart-mnli-12-6",
25
  device=0 if torch.cuda.is_available() else -1
26
  )
27
 
28
-
29
  def classify_comment(text):
30
  if not text:
31
  print("Received empty text for classification.")
@@ -33,43 +28,50 @@ def classify_comment(text):
33
  print(f"Classifying comment: {text}")
34
  try:
35
  translated_text = GoogleTranslator(source='auto', target="en").translate(text)
 
36
  except Exception as e:
37
  print(f"Translation failed: {e}")
38
  return "non-interrogative"
39
  if not translated_text:
40
  print("Translation returned empty text.")
41
  return "non-interrogative"
42
- print(f"Translated text: {translated_text}")
43
  try:
44
  result = classifier(translated_text, ["interrogative", "non-interrogative"], clean_up_tokenization_spaces=True)
 
45
  except Exception as e:
46
  print(f"Classification failed: {e}")
47
  return "non-interrogative"
 
48
  top_class = result['labels'][0]
 
49
  return top_class
50
 
51
-
52
  def retrieve_from_vdb(query):
53
  print(f"Отправка запроса к FastAPI сервису: {query}")
54
  response = requests.post(f"{VECTOR_API_URL}/search/", json={"query": query})
55
  if response.status_code == 200:
56
  results = response.json().get("results", [])
57
- print(f"Получено {len(results)} результатов.")
58
  return results
59
  else:
60
  print(f"Ошибка при поиске: {response.text}")
61
  return []
62
 
63
-
64
  def analyze_sentiment(comments):
65
  print("Начинаем анализ настроений.")
66
  results = []
67
  for i in range(0, len(comments), 50):
68
  batch = comments[i:i + 50]
69
- print(f"Анализируем батч с {i} по {i + len(batch)} комментарий.")
70
- batch_results = sentiment_model(batch)
 
 
 
 
 
 
71
  results.extend(batch_results)
72
  time.sleep(1) # Задержка для предотвращения перегрузки
73
- print("Анализ настроений завершен.")
74
  return results
75
-
 
1
  import requests
2
  import torch
 
3
  from transformers import pipeline
4
  from deep_translator import GoogleTranslator
5
  import time
6
  import os
 
 
 
7
 
8
+ VECTOR_API_URL = os.getenv('API_URL')
9
 
10
+ # Replace the sentiment analysis model
11
  sentiment_model = pipeline(
12
  'sentiment-analysis',
13
+ model='distilbert-base-uncased-finetuned-sst-2-english',
14
+ tokenizer='distilbert-base-uncased-finetuned-sst-2-english',
15
  device=0 if torch.cuda.is_available() else -1
16
  )
17
 
 
 
18
  classifier = pipeline(
19
  "zero-shot-classification",
20
  model="valhalla/distilbart-mnli-12-6",
21
  device=0 if torch.cuda.is_available() else -1
22
  )
23
 
 
24
  def classify_comment(text):
25
  if not text:
26
  print("Received empty text for classification.")
 
28
  print(f"Classifying comment: {text}")
29
  try:
30
  translated_text = GoogleTranslator(source='auto', target="en").translate(text)
31
+ print(f"Translated text: {translated_text}")
32
  except Exception as e:
33
  print(f"Translation failed: {e}")
34
  return "non-interrogative"
35
  if not translated_text:
36
  print("Translation returned empty text.")
37
  return "non-interrogative"
38
+
39
  try:
40
  result = classifier(translated_text, ["interrogative", "non-interrogative"], clean_up_tokenization_spaces=True)
41
+ print(f"Classification result: {result}")
42
  except Exception as e:
43
  print(f"Classification failed: {e}")
44
  return "non-interrogative"
45
+
46
  top_class = result['labels'][0]
47
+ print(f"Top class: {top_class}")
48
  return top_class
49
 
 
50
  def retrieve_from_vdb(query):
51
  print(f"Отправка запроса к FastAPI сервису: {query}")
52
  response = requests.post(f"{VECTOR_API_URL}/search/", json={"query": query})
53
  if response.status_code == 200:
54
  results = response.json().get("results", [])
55
+ print(f"Получено {len(results)} результатов: {results}")
56
  return results
57
  else:
58
  print(f"Ошибка при поиске: {response.text}")
59
  return []
60
 
 
61
  def analyze_sentiment(comments):
62
  print("Начинаем анализ настроений.")
63
  results = []
64
  for i in range(0, len(comments), 50):
65
  batch = comments[i:i + 50]
66
+ print(f"Анализируем батч с {i} по {i + len(batch)} комментарий: {batch}")
67
+
68
+ # Translate comments into English before sentiment analysis
69
+ translated_batch = [GoogleTranslator(source='auto', target="en").translate(comment) for comment in batch]
70
+ print(f"Переведённый батч: {translated_batch}")
71
+
72
+ batch_results = sentiment_model(translated_batch)
73
+ print(f"Результаты батча: {batch_results}")
74
  results.extend(batch_results)
75
  time.sleep(1) # Задержка для предотвращения перегрузки
76
+ print(f"Анализ настроений завершен. Общие результаты: {results}")
77
  return results