Spaces:

abdull4h
/

vision-2030-virtual-assistant

Running on Zero

App Files Files Community

abdull4h commited on Mar 21

Commit

c8b0d13

verified ·

1 Parent(s): d69f501

Update app.py

Browse files

Files changed (1) hide show

app.py +258 -62

app.py CHANGED Viewed

@@ -1,3 +1,19 @@
 import gradio as gr
 import os
 import re
@@ -5,20 +21,27 @@ import torch
 import numpy as np
 from pathlib import Path
 import PyPDF2
-from transformers import AutoTokenizer, AutoModelForCausalLM
 from sentence_transformers import SentenceTransformer
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain.schema import Document
 from langchain.embeddings import HuggingFaceEmbeddings
-import spaces  # Add this import for Hugging Face Spaces
 # Create the Vision 2030 Assistant class
 class Vision2030Assistant:
-    def __init__(self, model, tokenizer, vector_store):
         self.model = model
         self.tokenizer = tokenizer
         self.vector_store = vector_store
         self.conversation_history = []
     def answer(self, user_query):
@@ -40,8 +63,11 @@ class Vision2030Assistant:
         # Retrieve relevant contexts
         contexts = retrieve_context(enhanced_query, self.vector_store, top_k=5)
-        # Generate response
-        response = generate_response(user_query, contexts, self.model, self.tokenizer, language)
         # Add response to conversation history
         self.conversation_history.append({"role": "assistant", "content": response})
@@ -87,9 +113,9 @@ def retrieve_context(query, vector_store, top_k=5):
     return contexts
-@spaces.GPU  # Add decorator for GPU usage
-def generate_response(query, contexts, model, tokenizer, language="auto"):
-    """Generate a response using retrieved contexts with ALLaM-specific formatting"""
     # Auto-detect language if not specified
     if language == "auto":
         language = detect_language(query)
@@ -149,6 +175,53 @@ Question: {query} [/INST]</s>"""
         # Fallback response
         return "I apologize, but I encountered an error while generating a response."
 def process_pdf_files(pdf_files):
     """Process PDF files and create documents"""
     documents = []
@@ -217,24 +290,67 @@ def create_vector_store(documents):
     vector_store = FAISS.from_documents(chunks, embedding_function)
     return vector_store
-# Variables to store state
-model = None
-tokenizer = None
-assistant = None
-# Load the model and tokenizer
-@spaces.GPU  # Add decorator for GPU usage
-def load_model_and_tokenizer():
-    global model, tokenizer
-    if model is not None and tokenizer is not None:
-        return "Model already loaded"
     model_name = "ALLaM-AI/ALLaM-7B-Instruct-preview"
-    print(f"Loading model: {model_name}")
     try:
-        # First attempt with AutoTokenizer
         tokenizer = AutoTokenizer.from_pretrained(
             model_name,
             trust_remote_code=True,
@@ -244,32 +360,64 @@ def load_model_and_tokenizer():
         # Load model with appropriate settings for ALLaM
         model = AutoModelForCausalLM.from_pretrained(
             model_name,
-            torch_dtype=torch.bfloat16,  # Use bfloat16 for better compatibility
             trust_remote_code=True,
             device_map="auto",
         )
-        return "Model loaded successfully with AutoTokenizer!"
     except Exception as e:
-        error_msg = f"First loading attempt failed: {e}"
         print(error_msg)
-        try:
-            # Try with specific tokenizer class if the first attempt fails
-            from transformers import LlamaTokenizer
-            tokenizer = LlamaTokenizer.from_pretrained(model_name)
-            model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                torch_dtype=torch.float16,
-                trust_remote_code=True,
-                device_map="auto",
-            )
-            return "Model loaded successfully with LlamaTokenizer!"
-        except Exception as e2:
-            return f"Both loading attempts failed. Error 1: {e}. Error 2: {e2}"
 # Gradio Interface Functions
 def process_pdfs(pdf_files):
@@ -285,27 +433,44 @@ def process_pdfs(pdf_files):
     # Ensure model is loaded
     if model is None or tokenizer is None:
-        load_status = load_model_and_tokenizer()
-        if "successfully" not in load_status.lower():
-            return f"Model loading failed: {load_status}"
     # Create vector store
     vector_store = create_vector_store(documents)
     # Initialize assistant
-    assistant = Vision2030Assistant(model, tokenizer, vector_store)
     return f"Successfully processed {len(documents)} documents. The assistant is ready to use!"
-@spaces.GPU  # Add decorator for GPU usage
 def answer_query(message, history):
     global assistant
     if assistant is None:
-        return "Please upload and process Vision 2030 PDF documents first."
     response = assistant.answer(message)
-    return response
 def reset_chat():
     global assistant
@@ -316,31 +481,62 @@ def reset_chat():
     reset_message = assistant.reset_conversation()
     return reset_message
 # Create Gradio interface
 with gr.Blocks(title="Vision 2030 Virtual Assistant") as demo:
     gr.Markdown("# Vision 2030 Virtual Assistant")
     gr.Markdown("Ask questions about Saudi Vision 2030 goals, projects, and progress in Arabic or English.")
     with gr.Tab("Setup"):
-        gr.Markdown("## Step 1: Load the Model")
-        load_btn = gr.Button("Load ALLaM-7B Model", variant="primary")
-        load_output = gr.Textbox(label="Load Status")
-        load_btn.click(load_model_and_tokenizer, inputs=[], outputs=load_output)
-        gr.Markdown("## Step 2: Upload Vision 2030 Documents")
-        pdf_files = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload PDF Documents")
-        process_btn = gr.Button("Process Documents", variant="primary")
-        process_output = gr.Textbox(label="Processing Status")
-        process_btn.click(process_pdfs, inputs=[pdf_files], outputs=process_output)
     with gr.Tab("Chat"):
-        chatbot = gr.Chatbot(label="Conversation")
-        message = gr.Textbox(
-            label="Ask a question about Vision 2030 (in Arabic or English)",
-            placeholder="What are the main goals of Vision 2030?",
-            lines=2
-        )
-        submit_btn = gr.Button("Submit", variant="primary")
         reset_btn = gr.Button("Reset Conversation")
         gr.Markdown("### Example Questions")
@@ -375,7 +571,7 @@ with gr.Blocks(title="Vision 2030 Virtual Assistant") as demo:
         submit_btn.click(answer_query, inputs=[message, chatbot], outputs=[chatbot])
         message.submit(answer_query, inputs=[message, chatbot], outputs=[chatbot])
         reset_btn.click(reset_chat, inputs=[], outputs=[reset_output])
-        reset_btn.click(lambda: None, inputs=[], outputs=[chatbot], postprocess=False)
 # Launch the app
 demo.launch()

+# Force install sentencepiece
+import sys
+import subprocess
+def install_package(package):
+    subprocess.check_call([sys.executable, "-m", "pip", "install", package])
+try:
+    import sentencepiece
+    print("SentencePiece is already installed")
+except ImportError:
+    print("Installing SentencePiece...")
+    install_package("sentencepiece==0.1.99")
+    print("SentencePiece installed successfully")
+# Import other required libraries
 import gradio as gr
 import os
 import re
 import numpy as np
 from pathlib import Path
 import PyPDF2
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
 from sentence_transformers import SentenceTransformer
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain.schema import Document
 from langchain.embeddings import HuggingFaceEmbeddings
+import spaces
+# Global variables to store model state
+model = None
+tokenizer = None
+assistant = None
+model_type = "primary"  # Track if we're using primary or fallback model
 # Create the Vision 2030 Assistant class
 class Vision2030Assistant:
+    def __init__(self, model, tokenizer, vector_store, model_type="primary"):
         self.model = model
         self.tokenizer = tokenizer
         self.vector_store = vector_store
+        self.model_type = model_type
         self.conversation_history = []
     def answer(self, user_query):
         # Retrieve relevant contexts
         contexts = retrieve_context(enhanced_query, self.vector_store, top_k=5)
+        # Generate response based on model type
+        if self.model_type == "primary":
+            response = generate_response_primary(user_query, contexts, self.model, self.tokenizer, language)
+        else:
+            response = generate_response_fallback(user_query, contexts, self.model, self.tokenizer, language)
         # Add response to conversation history
         self.conversation_history.append({"role": "assistant", "content": response})
     return contexts
+@spaces.GPU
+def generate_response_primary(query, contexts, model, tokenizer, language="auto"):
+    """Generate a response using ALLaM model"""
     # Auto-detect language if not specified
     if language == "auto":
         language = detect_language(query)
         # Fallback response
         return "I apologize, but I encountered an error while generating a response."
+@spaces.GPU
+def generate_response_fallback(query, contexts, model, tokenizer, language="auto"):
+    """Generate a response using the fallback model (BLOOM or mBART)"""
+    # Auto-detect language if not specified
+    if language == "auto":
+        language = detect_language(query)
+    # Format the prompt based on language
+    if language == "arabic":
+        system_prompt = (
+            "أنت مساعد افتراضي يهتم برؤية السعودية 2030. استخدم السياق التالي للإجابة على السؤال: "
+        )
+    else:
+        system_prompt = (
+            "You are a virtual assistant for Saudi Vision 2030. Use the following context to answer the question: "
+        )
+    # Combine retrieved contexts
+    context_text = "\n\n".join([f"Document: {ctx['content']}" for ctx in contexts])
+    # Format prompt for fallback model (simpler format)
+    prompt = f"{system_prompt}\n\nContext:\n{context_text}\n\nQuestion: {query}\n\nAnswer:"
+    try:
+        # Generate with fallback model
+        inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True).to(model.device)
+        outputs = model.generate(
+            inputs.input_ids,
+            attention_mask=inputs.attention_mask,
+            max_length=inputs.input_ids.shape[1] + 512,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        # For most models, this is how we extract the response
+        response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
+        # Cleanup and return
+        return response.strip()
+    except Exception as e:
+        print(f"Error during fallback generation: {e}")
+        return "I apologize, but I encountered an error while generating a response with the fallback model."
 def process_pdf_files(pdf_files):
     """Process PDF files and create documents"""
     documents = []
     vector_store = FAISS.from_documents(chunks, embedding_function)
     return vector_store
+# Attempt to create mock documents if none are available yet
+def create_mock_documents():
+    """Create mock documents about Vision 2030"""
+    documents = []
+    # Sample content about Vision 2030 in both languages
+    samples = [
+        {
+            "content": "رؤية السعودية 2030 هي خطة استراتيجية تهدف إلى تنويع الاقتصاد السعودي وتقليل الاعتماد على النفط مع تطوير قطاعات مختلفة مثل الصحة والتعليم والسياحة.",
+            "source": "vision2030_overview_ar.txt"
+        },
+        {
+            "content": "Saudi Vision 2030 is a strategic framework aiming to diversify Saudi Arabia's economy and reduce dependence on oil, while developing sectors like health, education, and tourism.",
+            "source": "vision2030_overview_en.txt"
+        },
+        {
+            "content": "تشمل الأهداف الاقتصادية لرؤية 2030 زيادة مساهمة القطاع الخاص من 40% إلى 65% من الناتج المحلي الإجمالي، ورفع نسبة الصادرات غير النفطية من 16% إلى 50% من الناتج المحلي الإجمالي غير النفطي، وخفض البطالة إلى 7%.",
+            "source": "economic_goals_ar.txt"
+        },
+        {
+            "content": "The economic goals of Vision 2030 include increasing private sector contribution from 40% to 65% of GDP, raising non-oil exports from 16% to 50%, and reducing unemployment from 11.6% to 7%.",
+            "source": "economic_goals_en.txt"
+        },
+        {
+            "content": "تركز رؤية 2030 على زيادة مشاركة المرأة في سوق العمل من 22% إلى 30% بحلول عام 2030، مع توفير فرص متساوية في التعليم والعمل.",
+            "source": "women_empowerment_ar.txt"
+        },
+        {
+            "content": "Vision 2030 emphasizes increasing women's participation in the workforce from 22% to 30% by 2030, while providing equal opportunities in education and employment.",
+            "source": "women_empowerment_en.txt"
+        }
+    ]
+    # Create documents from samples
+    for sample in samples:
+        doc = Document(
+            page_content=sample["content"],
+            metadata={"source": sample["source"], "filename": sample["source"]}
+        )
+        documents.append(doc)
+    print(f"Created {len(documents)} mock documents")
+    return documents
+@spaces.GPU
+def load_primary_model():
+    """Load the ALLaM-7B model with error handling"""
+    global model, tokenizer, model_type
+    if model is not None and tokenizer is not None and model_type == "primary":
+        return "Primary model (ALLaM-7B) already loaded"
     model_name = "ALLaM-AI/ALLaM-7B-Instruct-preview"
+    print(f"Loading primary model: {model_name}")
     try:
+        # Try to import sentencepiece explicitly first
+        import sentencepiece as spm
+        print("SentencePiece imported successfully")
+        # First attempt with AutoTokenizer and explicit trust_remote_code
         tokenizer = AutoTokenizer.from_pretrained(
             model_name,
             trust_remote_code=True,
         # Load model with appropriate settings for ALLaM
         model = AutoModelForCausalLM.from_pretrained(
             model_name,
+            torch_dtype=torch.bfloat16,
             trust_remote_code=True,
             device_map="auto",
         )
+        model_type = "primary"
+        return "Primary model (ALLaM-7B) loaded successfully!"
     except Exception as e:
+        error_msg = f"Primary model loading failed: {e}"
         print(error_msg)
+        return error_msg
+@spaces.GPU
+def load_fallback_model():
+    """Load the fallback model (BLOOM-7B1) when ALLaM fails"""
+    global model, tokenizer, model_type
+    if model is not None and tokenizer is not None and model_type == "fallback":
+        return "Fallback model already loaded"
+    try:
+        print("Loading fallback model: BLOOM-7B1...")
+        # Use BLOOM model as fallback (it doesn't need SentencePiece)
+        tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-7b1")
+        model = AutoModelForCausalLM.from_pretrained(
+            "bigscience/bloom-7b1",
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+            load_in_8bit=True  # Reduce memory usage
+        )
+        model_type = "fallback"
+        return "Fallback model (BLOOM-7B1) loaded successfully!"
+    except Exception as e:
+        return f"Fallback model loading failed: {e}"
+def load_mbart_model():
+    """Load mBART as a second fallback option"""
+    global model, tokenizer, model_type
+    try:
+        print("Loading mBART multilingual model...")
+        model_name = "facebook/mbart-large-50-many-to-many-mmt"
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForSeq2SeqLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16,
+            device_map="auto",
+            load_in_8bit=True
+        )
+        model_type = "mbart"
+        return "mBART multilingual model loaded successfully!"
+    except Exception as e:
+        return f"mBART model loading failed: {e}"
 # Gradio Interface Functions
 def process_pdfs(pdf_files):
     # Ensure model is loaded
     if model is None or tokenizer is None:
+        return "Please load a model first (primary or fallback) before processing documents."
     # Create vector store
     vector_store = create_vector_store(documents)
     # Initialize assistant
+    assistant = Vision2030Assistant(model, tokenizer, vector_store, model_type)
     return f"Successfully processed {len(documents)} documents. The assistant is ready to use!"
+def use_mock_documents():
+    """Use mock documents when no PDFs are available"""
+    documents = create_mock_documents()
+    global assistant, model, tokenizer
+    # Ensure model is loaded
+    if model is None or tokenizer is None:
+        return "Please load a model first (primary or fallback) before using mock documents."
+    # Create vector store
+    vector_store = create_vector_store(documents)
+    # Initialize assistant
+    assistant = Vision2030Assistant(model, tokenizer, vector_store, model_type)
+    return "Successfully initialized with mock Vision 2030 documents. The assistant is ready for testing!"
+@spaces.GPU
 def answer_query(message, history):
     global assistant
     if assistant is None:
+        return [(message, "Please load a model and process documents first (or use mock documents for testing).")]
     response = assistant.answer(message)
+    history.append((message, response))
+    return history
 def reset_chat():
     global assistant
     reset_message = assistant.reset_conversation()
     return reset_message
+def restart_factory():
+    return "Restarting the application... Please reload the page in a few seconds."
 # Create Gradio interface
 with gr.Blocks(title="Vision 2030 Virtual Assistant") as demo:
     gr.Markdown("# Vision 2030 Virtual Assistant")
     gr.Markdown("Ask questions about Saudi Vision 2030 goals, projects, and progress in Arabic or English.")
     with gr.Tab("Setup"):
+        gr.Markdown("## Step 1: Load a Model")
+        with gr.Row():
+            with gr.Column():
+                primary_btn = gr.Button("Load ALLaM-7B Model (Primary)", variant="primary")
+                primary_output = gr.Textbox(label="Primary Model Status")
+                primary_btn.click(load_primary_model, inputs=[], outputs=primary_output)
+            with gr.Column():
+                fallback_btn = gr.Button("Load BLOOM-7B1 (Fallback)", variant="secondary")
+                fallback_output = gr.Textbox(label="Fallback Model Status")
+                fallback_btn.click(load_fallback_model, inputs=[], outputs=fallback_output)
+            with gr.Column():
+                mbart_btn = gr.Button("Load mBART (Alternative)", variant="secondary")
+                mbart_output = gr.Textbox(label="mBART Model Status")
+                mbart_btn.click(load_mbart_model, inputs=[], outputs=mbart_output)
+        gr.Markdown("## Step 2: Prepare Documents")
+        with gr.Row():
+            with gr.Column():
+                pdf_files = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload PDF Documents")
+                process_btn = gr.Button("Process Documents", variant="primary")
+                process_output = gr.Textbox(label="Processing Status")
+                process_btn.click(process_pdfs, inputs=[pdf_files], outputs=process_output)
+            with gr.Column():
+                mock_btn = gr.Button("Use Mock Documents (for testing)", variant="secondary")
+                mock_output = gr.Textbox(label="Mock Documents Status")
+                mock_btn.click(use_mock_documents, inputs=[], outputs=mock_output)
+        gr.Markdown("## Troubleshooting")
+        restart_btn = gr.Button("Restart Application", variant="secondary")
+        restart_output = gr.Textbox(label="Restart Status")
+        restart_btn.click(restart_factory, inputs=[], outputs=restart_output)
+        restart_btn.click(None, [], None, _js="() => {setTimeout(() => {location.reload()}, 5000)}")
     with gr.Tab("Chat"):
+        chatbot = gr.Chatbot(label="Conversation", height=500)
+        with gr.Row():
+            message = gr.Textbox(
+                label="Ask a question about Vision 2030 (in Arabic or English)",
+                placeholder="What are the main goals of Vision 2030?",
+                lines=2
+            )
+            submit_btn = gr.Button("Submit", variant="primary")
         reset_btn = gr.Button("Reset Conversation")
         gr.Markdown("### Example Questions")
         submit_btn.click(answer_query, inputs=[message, chatbot], outputs=[chatbot])
         message.submit(answer_query, inputs=[message, chatbot], outputs=[chatbot])
         reset_btn.click(reset_chat, inputs=[], outputs=[reset_output])
+        reset_btn.click(lambda: None, inputs=[], outputs=[chatbot], postprocess=lambda: [])
 # Launch the app
 demo.launch()