Spaces:

abdull4h
/

vision-2030-virtual-assistant

Running on Zero

App Files Files Community

abdull4h commited on Mar 20

Commit

bc85188

verified ·

1 Parent(s): 64f3706

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -44

app.py CHANGED Viewed

@@ -34,6 +34,9 @@ class Vision2030Assistant:
     def __init__(self):
         """Initialize the Vision 2030 Assistant with basic knowledge"""
         logger.info("Initializing Vision 2030 Assistant...")
         # Initialize embedding models
         self.load_embedding_models()
@@ -57,7 +60,38 @@ class Vision2030Assistant:
         self.has_pdf_content = False
         logger.info("Vision 2030 Assistant initialized successfully")
     @spaces.GPU
     def load_embedding_models(self):
         """Load embedding models for retrieval"""
@@ -369,71 +403,72 @@ class Vision2030Assistant:
             return ""
     def generate_response(self, user_input):
-        """Generate response based on user input"""
         if not user_input or user_input.strip() == "":
             return ""
         start_time = time.time()
-        # Default response in case of failure
         default_response = {
             "en": "I apologize, but I couldn't process your request properly. Please try again.",
             "ar": "أعتذر، لم أتمكن من معالجة طلبك بشكل صحيح. الرجاء المحاولة مرة أخرى."
         }
         try:
-            # Detect language
             try:
-                lang = detect(user_input)
-                if lang != "ar":  # Simplify to just Arabic vs non-Arabic
-                    lang = "en"
             except:
-                lang = "en"  # Default fallback
             logger.info(f"Detected language: {lang}")
-            # Check for specific question patterns
-            if lang == "ar":
-                # National identity
-                if "الهوية الوطنية" in user_input or "تعزيز الهوية" in user_input:
-                    reply = "تتضمن رؤية 2030 مبادرات متعددة لتعزيز الهوية الوطنية السعودية بما في ذلك البرامج الثقافية والحفاظ على التراث وتعزيز القيم السعودية."
-                # Hajj and Umrah
-                elif "المعتمرين" in user_input or "الحجاج" in user_input or "العمرة" in user_input or "الحج" in user_input:
-                    reply = "تهدف رؤية 2030 إلى زيادة القدرة على استقبال المعتمرين من 8 ملايين إلى 30 مليون معتمر سنويًا."
-                # Economic diversification
-                elif "تنويع مصادر الدخل" in user_input or "الاقتصاد المزدهر" in user_input or "تنمية الاقتصاد" in user_input:
-                    reply = "تهدف رؤية 2030 إلى زيادة الإيرادات الحكومية غير النفطية من 163 مليار ريال سعودي إلى 1 تريليون ريال سعودي من خلال تطوير قطاعات متنوعة مثل السياحة والتصنيع والطاقة المتجددة."
-                # UNESCO sites
-                elif "المواقع الأثرية" in user_input or "اليونسكو" in user_input or "التراث العالمي" in user_input:
-                    reply = "تضع رؤية 2030 هدفًا بتسجيل ما لا يقل عن 10 مواقع سعودية في قائمة التراث العالمي لليونسكو."
-                # Real wealth
-                elif "الثروة الحقيقية" in user_input or "أثمن" in user_input or "ثروة" in user_input:
-                    reply = "الثروة الحقيقية للمملكة العربية السعودية، كما أكدت رؤية 2030، هي شعبها، وخاصة الشباب."
-                # Global gateway
-                elif "بوابة للعالم" in user_input or "مكانتها" in user_input or "موقعها الاستراتيجي" in user_input:
-                    reply = "تهدف المملكة العربية السعودية إلى تعزيز مكانتها كبوابة عالمية من خلال الاستفادة من موقعها الاستراتيجي بين آسيا وأوروبا وأفريقيا."
-                # Key pillars
-                elif "ركائز" in user_input or "اركان" in user_input:
-                    reply = "الركائز الرئيسية لرؤية 2030 هي مجتمع حيوي، واقتصاد مزدهر، ووطن طموح."
-                # General Vision 2030
-                elif "ما هي" in user_input or "ماهي" in user_input:
-                    reply = "رؤية 2030 هي الإطار الاستراتيجي للمملكة العربية السعودية للحد من الاعتماد على النفط وتنويع الاقتصاد وتطوير القطاعات العامة. الركائز الرئيسية لرؤية 2030 هي مجتمع حيوي، واقتصاد مزدهر، ووطن طموح."
-                else:
-                    # Use retrieved context
-                    context = self.retrieve_context(user_input, lang)
                     reply = context if context else "لم أتمكن من العثور على معلومات كافية حول هذا السؤال."
-            else:  # English
-                # Use retrieved context
-                context = self.retrieve_context(user_input, lang)
-                reply = context if context else "I couldn't find enough information about this question."
-            # Record response time
             response_time = time.time() - start_time
             self.metrics["response_times"].append(response_time)
             logger.info(f"Generated response in {response_time:.2f}s")
-            # Store the interaction for later evaluation
             interaction = {
                 "timestamp": datetime.now().isoformat(),
                 "user_input": user_input,
@@ -447,8 +482,10 @@ class Vision2030Assistant:
         except Exception as e:
             logger.error(f"Error generating response: {str(e)}")
             return default_response.get(lang, default_response["en"])
     def evaluate_factual_accuracy(self, response, reference):
         """Simple evaluation of factual accuracy by keyword matching"""
         # This is a simplified approach - in production, use more sophisticated methods

     def __init__(self):
         """Initialize the Vision 2030 Assistant with basic knowledge"""
         logger.info("Initializing Vision 2030 Assistant...")
+        # Load QA pipelines for English & Arabic
+        self._load_qa_pipelines()
         # Initialize embedding models
         self.load_embedding_models()
         self.has_pdf_content = False
         logger.info("Vision 2030 Assistant initialized successfully")
+    @spaces.GPU
+    def _load_qa_pipelines(self):
+        """
+        Load or initialize QA models for English and Arabic.
+        You can choose any Hugging Face QA model; below are just examples.
+        """
+        logger.info("Loading QA pipelines...")
+        try:
+            # English QA pipeline
+            self.qa_pipeline_en = pipeline(
+                "question-answering",
+                model="distilbert-base-cased-distilled-squad",
+                tokenizer="distilbert-base-cased-distilled-squad",
+                device=0 if has_gpu else -1  # Use GPU if available
+            )
+            # Arabic QA pipeline
+            # For Arabic, you can use a model like `aubmindlab/bert-base-arabertv02-qa`:
+            self.qa_pipeline_ar = pipeline(
+                "question-answering",
+                model="aubmindlab/bert-base-arabertv02-qa",
+                tokenizer="aubmindlab/bert-base-arabertv02-qa",
+                device=0 if has_gpu else -1
+            )
+            logger.info("QA pipelines loaded successfully.")
+        except Exception as e:
+            logger.error(f"Error loading QA pipelines: {str(e)}")
+            self.qa_pipeline_en = None
+            self.qa_pipeline_ar = None
     @spaces.GPU
     def load_embedding_models(self):
         """Load embedding models for retrieval"""
             return ""
     def generate_response(self, user_input):
+        """Generate a more detailed answer using a QA pipeline if available."""
         if not user_input or user_input.strip() == "":
             return ""
         start_time = time.time()
         default_response = {
             "en": "I apologize, but I couldn't process your request properly. Please try again.",
             "ar": "أعتذر، لم أتمكن من معالجة طلبك بشكل صحيح. الرجاء المحاولة مرة أخرى."
         }
         try:
+            # 1) Detect language
             try:
+                lang_detected = detect(user_input)
+                lang = "ar" if lang_detected == "ar" else "en"
             except:
+                lang = "en"  # fallback
             logger.info(f"Detected language: {lang}")
+            # 2) Retrieve relevant context (could be from PDF or base knowledge)
+            context = self.retrieve_context(user_input, lang)
+            # 3) Decide whether to use QA pipeline or fallback
+            if lang == "ar" and self.qa_pipeline_ar is not None and context:
+                # Use Arabic QA pipeline
+                try:
+                    answer = self.qa_pipeline_ar(question=user_input, context=context)
+                    reply = answer["answer"].strip()
+                    # If the QA model returns something too short or obviously unhelpful,
+                    # you can fallback to the original context-based approach:
+                    if len(reply) < 2:
+                        reply = context  # fallback to returning the raw context
+                except Exception as e:
+                    logger.error(f"Error in Arabic QA pipeline: {str(e)}")
+                    # fallback
+                    reply = context if context else "لم أتمكن من العثور على معلومات كافية حول هذا السؤال."
+            elif lang == "en" and self.qa_pipeline_en is not None and context:
+                # Use English QA pipeline
+                try:
+                    answer = self.qa_pipeline_en(question=user_input, context=context)
+                    reply = answer["answer"].strip()
+                    if len(reply) < 2:
+                        reply = context
+                except Exception as e:
+                    logger.error(f"Error in English QA pipeline: {str(e)}")
+                    reply = context if context else "I couldn't find enough information about this question."
+            else:
+                # 4) If no QA pipeline or no context, fallback to your existing approach
+                #    e.g., returning context or a short fallback message.
+                if lang == "ar":
                     reply = context if context else "لم أتمكن من العثور على معلومات كافية حول هذا السؤال."
+                else:
+                    reply = context if context else "I couldn't find enough information about this question."
+            # 5) Record metrics and return
             response_time = time.time() - start_time
             self.metrics["response_times"].append(response_time)
             logger.info(f"Generated response in {response_time:.2f}s")
+            # Store the interaction
             interaction = {
                 "timestamp": datetime.now().isoformat(),
                 "user_input": user_input,
         except Exception as e:
             logger.error(f"Error generating response: {str(e)}")
+            # fallback to default
             return default_response.get(lang, default_response["en"])
     def evaluate_factual_accuracy(self, response, reference):
         """Simple evaluation of factual accuracy by keyword matching"""
         # This is a simplified approach - in production, use more sophisticated methods