Spaces:
Running
on
Zero
Running
on
Zero
# Vision 2030 Virtual Assistant with RAG and Evaluation Framework | |
# Modified for Hugging Face Spaces compatibility | |
import gradio as gr | |
import time | |
import logging | |
import os | |
import re | |
from datetime import datetime | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from sklearn.metrics import precision_recall_fscore_support, accuracy_score | |
import PyPDF2 | |
import json | |
from langdetect import detect | |
from sentence_transformers import SentenceTransformer | |
import faiss | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
handlers=[ | |
logging.StreamHandler() | |
] | |
) | |
logger = logging.getLogger('vision2030_assistant') | |
class Vision2030Assistant: | |
def __init__(self, pdf_path=None, eval_data_path=None): | |
""" | |
Initialize the Vision 2030 Assistant with embedding models and evaluation framework | |
Args: | |
pdf_path: Path to the Vision 2030 PDF document | |
eval_data_path: Path to evaluation dataset | |
""" | |
logger.info("Initializing Vision 2030 Assistant...") | |
# Initialize embedding models only (no LLMs to avoid tokenizer issues) | |
self.load_embedding_models() | |
# Load documents | |
if pdf_path and os.path.exists(pdf_path): | |
self.load_and_process_documents(pdf_path) | |
else: | |
self._create_sample_data() | |
self._create_indices() | |
# Setup evaluation framework | |
if eval_data_path and os.path.exists(eval_data_path): | |
with open(eval_data_path, 'r', encoding='utf-8') as f: | |
self.eval_data = json.load(f) | |
else: | |
self._create_sample_eval_data() | |
self.metrics = { | |
"response_times": [], | |
"user_ratings": [], | |
"retrieval_precision": [], | |
"factual_accuracy": [] | |
} | |
self.response_history = [] | |
logger.info("Vision 2030 Assistant initialized successfully") | |
def load_embedding_models(self): | |
"""Load embedding models for retrieval""" | |
logger.info("Loading embedding models...") | |
try: | |
# Load embedding models | |
self.arabic_embedder = SentenceTransformer('CAMeL-Lab/bert-base-arabic-camelbert-ca') | |
self.english_embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
logger.info("Embedding models loaded successfully") | |
except Exception as e: | |
logger.error(f"Error loading embedding models: {str(e)}") | |
raise | |
def load_and_process_documents(self, pdf_path): | |
"""Load and process the Vision 2030 document from PDF""" | |
logger.info(f"Processing Vision 2030 document from {pdf_path}") | |
# Initialize empty document lists | |
self.english_texts = [] | |
self.arabic_texts = [] | |
try: | |
# Extract text from PDF | |
with open(pdf_path, 'rb') as file: | |
reader = PyPDF2.PdfReader(file) | |
full_text = "" | |
for page_num in range(len(reader.pages)): | |
page = reader.pages[page_num] | |
full_text += page.extract_text() + "\n" | |
# Split into chunks (simple approach - could be improved with better text segmentation) | |
chunks = [chunk.strip() for chunk in re.split(r'\n\s*\n', full_text) if chunk.strip()] | |
# Detect language and add to appropriate list | |
for chunk in chunks: | |
try: | |
lang = detect(chunk) | |
if lang == "ar": | |
self.arabic_texts.append(chunk) | |
else: # Default to English for other languages | |
self.english_texts.append(chunk) | |
except: | |
# If language detection fails, assume English | |
self.english_texts.append(chunk) | |
logger.info(f"Processed {len(self.arabic_texts)} Arabic and {len(self.english_texts)} English chunks") | |
# Create FAISS indices | |
self._create_indices() | |
except Exception as e: | |
logger.error(f"Error processing PDF: {str(e)}") | |
logger.info("Using fallback sample data") | |
self._create_sample_data() | |
self._create_indices() | |
def _create_sample_data(self): | |
"""Create sample Vision 2030 data if PDF processing fails""" | |
logger.info("Creating sample Vision 2030 data") | |
# English sample texts | |
self.english_texts = [ | |
"Vision 2030 is Saudi Arabia's strategic framework to reduce dependence on oil, diversify the economy, and develop public sectors.", | |
"The key pillars of Vision 2030 are a vibrant society, a thriving economy, and an ambitious nation.", | |
"The Saudi Public Investment Fund (PIF) plays a crucial role in Vision 2030 by investing in strategic sectors.", | |
"NEOM is a planned cross-border smart city in the Tabuk Province of northwestern Saudi Arabia, a key project of Vision 2030.", | |
"Vision 2030 aims to increase women's participation in the workforce from 22% to 30%.", | |
"The Red Sea Project is a Vision 2030 initiative to develop luxury tourism destinations across 50 islands off Saudi Arabia's Red Sea coast.", | |
"Qiddiya is a entertainment mega-project being built in Riyadh as part of Vision 2030.", | |
"Vision 2030 targets increasing the private sector's contribution to GDP from 40% to 65%.", | |
"One goal of Vision 2030 is to increase foreign direct investment from 3.8% to 5.7% of GDP.", | |
"Vision 2030 includes plans to develop the digital infrastructure and support for tech startups in Saudi Arabia." | |
] | |
# Arabic sample texts (same content as English) | |
self.arabic_texts = [ | |
"رؤية 2030 هي الإطار الاستراتيجي للمملكة العربية السعودية للحد من الاعتماد على النفط وتنويع الاقتصاد وتطوير القطاعات العامة.", | |
"الركائز الرئيسية لرؤية 2030 هي مجتمع حيوي، واقتصاد مزدهر، ووطن طموح.", | |
"يلعب صندوق الاستثمارات العامة السعودي دورًا محوريًا في رؤية 2030 من خلال الاستثمار في القطاعات الاستراتيجية.", | |
"نيوم هي مدينة ذكية مخططة عبر الحدود في مقاطعة تبوك شمال غرب المملكة العربية السعودية، وهي مشروع رئيسي من رؤية 2030.", | |
"تهدف رؤية 2030 إلى زيادة مشاركة المرأة في القوى العاملة من 22٪ إلى 30٪.", | |
"مشروع البحر الأحمر هو مبادرة رؤية 2030 لتطوير وجهات سياحية فاخرة عبر 50 جزيرة قبالة ساحل البحر الأحمر السعودي.", | |
"القدية هي مشروع ترفيهي ضخم يتم بناؤه في الرياض كجزء من رؤية 2030.", | |
"تستهدف رؤية 2030 زيادة مساهمة القطاع الخاص في الناتج المحلي الإجمالي من 40٪ إلى 65٪.", | |
"أحد أهداف رؤية 2030 هو زيادة الاستثمار الأجنبي المباشر من 3.8٪ إلى 5.7٪ من الناتج المحلي الإجمالي.", | |
"تتضمن رؤية 2030 خططًا لتطوير البنية التحتية الرقمية والدعم للشركات الناشئة التكنولوجية في المملكة العربية السعودية." | |
] | |
def _create_indices(self): | |
"""Create FAISS indices for fast text retrieval""" | |
logger.info("Creating FAISS indices for text retrieval") | |
try: | |
# Process and embed English texts | |
self.english_vectors = [] | |
for text in self.english_texts: | |
vec = self.english_embedder.encode(text) | |
self.english_vectors.append(vec) | |
# Create English index | |
if self.english_vectors: | |
self.english_index = faiss.IndexFlatL2(len(self.english_vectors[0])) | |
self.english_index.add(np.array(self.english_vectors)) | |
logger.info(f"Created English index with {len(self.english_vectors)} vectors") | |
else: | |
logger.warning("No English texts to index") | |
# Process and embed Arabic texts | |
self.arabic_vectors = [] | |
for text in self.arabic_texts: | |
vec = self.arabic_embedder.encode(text) | |
self.arabic_vectors.append(vec) | |
# Create Arabic index | |
if self.arabic_vectors: | |
self.arabic_index = faiss.IndexFlatL2(len(self.arabic_vectors[0])) | |
self.arabic_index.add(np.array(self.arabic_vectors)) | |
logger.info(f"Created Arabic index with {len(self.arabic_vectors)} vectors") | |
else: | |
logger.warning("No Arabic texts to index") | |
except Exception as e: | |
logger.error(f"Error creating FAISS indices: {str(e)}") | |
raise | |
def _create_sample_eval_data(self): | |
"""Create sample evaluation data with ground truth""" | |
self.eval_data = [ | |
{ | |
"question": "What are the key pillars of Vision 2030?", | |
"lang": "en", | |
"reference_answer": "The key pillars of Vision 2030 are a vibrant society, a thriving economy, and an ambitious nation." | |
}, | |
{ | |
"question": "ما هي الركائز الرئيسية لرؤية 2030؟", | |
"lang": "ar", | |
"reference_answer": "الركائز الرئيسية لرؤية 2030 هي مجتمع حيوي، واقتصاد مزدهر، ووطن طموح." | |
}, | |
{ | |
"question": "What is NEOM?", | |
"lang": "en", | |
"reference_answer": "NEOM is a planned cross-border smart city in the Tabuk Province of northwestern Saudi Arabia, a key project of Vision 2030." | |
}, | |
{ | |
"question": "ما هو مشروع البحر الأحمر؟", | |
"lang": "ar", | |
"reference_answer": "مشروع البحر الأحمر هو مبادرة رؤية 2030 لتطوير وجهات سياحية فاخرة عبر 50 جزيرة قبالة ساحل البحر الأحمر السعودي." | |
}, | |
{ | |
"question": "What are the goals for women's workforce participation?", | |
"lang": "en", | |
"reference_answer": "Vision 2030 aims to increase women's participation in the workforce from 22% to 30%." | |
}, | |
{ | |
"question": "ما هي القدية؟", | |
"lang": "ar", | |
"reference_answer": "القدية هي مشروع ترفيهي ضخم يتم بناؤه في الرياض كجزء من رؤية 2030." | |
} | |
] | |
logger.info(f"Created {len(self.eval_data)} sample evaluation examples") | |
def retrieve_context(self, query, lang): | |
"""Retrieve relevant context for a query based on language""" | |
start_time = time.time() | |
try: | |
if lang == "ar": | |
query_vec = self.arabic_embedder.encode(query) | |
D, I = self.arabic_index.search(np.array([query_vec]), k=2) # Get top 2 most relevant chunks | |
context = "\n".join([self.arabic_texts[i] for i in I[0] if i < len(self.arabic_texts) and i >= 0]) | |
else: | |
query_vec = self.english_embedder.encode(query) | |
D, I = self.english_index.search(np.array([query_vec]), k=2) # Get top 2 most relevant chunks | |
context = "\n".join([self.english_texts[i] for i in I[0] if i < len(self.english_texts) and i >= 0]) | |
retrieval_time = time.time() - start_time | |
logger.info(f"Retrieved context in {retrieval_time:.2f}s") | |
return context | |
except Exception as e: | |
logger.error(f"Error retrieving context: {str(e)}") | |
return "" | |
def generate_response(self, user_input): | |
"""Generate a response to user input using retrieval and predefined responses for evaluation""" | |
start_time = time.time() | |
# Default response in case of failure | |
default_response = { | |
"en": "I apologize, but I couldn't process your request properly. Please try again.", | |
"ar": "أعتذر، لم أتمكن من معالجة طلبك بشكل صحيح. الرجاء المحاولة مرة أخرى." | |
} | |
try: | |
# Detect language | |
try: | |
lang = detect(user_input) | |
if lang != "ar": # Simplify to just Arabic vs non-Arabic | |
lang = "en" | |
except: | |
lang = "en" # Default fallback | |
logger.info(f"Detected language: {lang}") | |
# Retrieve relevant context | |
context = self.retrieve_context(user_input, lang) | |
# Simplified response generation for HF Spaces | |
if lang == "ar": | |
if "ركائز" in user_input or "اركان" in user_input: | |
reply = "الركائز الرئيسية لرؤية 2030 هي مجتمع حيوي، واقتصاد مزدهر، ووطن طموح." | |
elif "نيوم" in user_input: | |
reply = "نيوم هي مدينة ذكية مخططة عبر الحدود في مقاطعة تبوك شمال غرب المملكة العربية السعودية، وهي مشروع رئيسي من رؤية 2030." | |
elif "البحر الأحمر" in user_input or "البحر الاحمر" in user_input: | |
reply = "مشروع البحر الأحمر هو مبادرة رؤية 2030 لتطوير وجهات سياحية فاخرة عبر 50 جزيرة قبالة ساحل البحر الأحمر السعودي." | |
elif "المرأة" in user_input or "النساء" in user_input: | |
reply = "تهدف رؤية 2030 إلى زيادة مشاركة المرأة في القوى العاملة من 22٪ إلى 30٪." | |
elif "القدية" in user_input: | |
reply = "القدية هي مشروع ترفيهي ضخم يتم بناؤه في الرياض كجزء من رؤية 2030." | |
else: | |
# Use the retrieved context directly if available | |
reply = context if context else "لم أتمكن من العثور على معلومات كافية حول هذا السؤال." | |
else: # English | |
if "pillar" in user_input.lower() or "key" in user_input.lower(): | |
reply = "The key pillars of Vision 2030 are a vibrant society, a thriving economy, and an ambitious nation." | |
elif "neom" in user_input.lower(): | |
reply = "NEOM is a planned cross-border smart city in the Tabuk Province of northwestern Saudi Arabia, a key project of Vision 2030." | |
elif "red sea" in user_input.lower(): | |
reply = "The Red Sea Project is a Vision 2030 initiative to develop luxury tourism destinations across 50 islands off Saudi Arabia's Red Sea coast." | |
elif "women" in user_input.lower() or "female" in user_input.lower(): | |
reply = "Vision 2030 aims to increase women's participation in the workforce from 22% to 30%." | |
elif "qiddiya" in user_input.lower(): | |
reply = "Qiddiya is a entertainment mega-project being built in Riyadh as part of Vision 2030." | |
else: | |
# Use the retrieved context directly if available | |
reply = context if context else "I couldn't find enough information about this question." | |
except Exception as e: | |
logger.error(f"Error generating response: {str(e)}") | |
reply = default_response.get(lang, default_response["en"]) | |
# Record response time | |
response_time = time.time() - start_time | |
self.metrics["response_times"].append(response_time) | |
logger.info(f"Generated response in {response_time:.2f}s") | |
# Store the interaction for later evaluation | |
interaction = { | |
"timestamp": datetime.now().isoformat(), | |
"user_input": user_input, | |
"response": reply, | |
"language": lang, | |
"response_time": response_time | |
} | |
self.response_history.append(interaction) | |
return reply | |
def evaluate_factual_accuracy(self, response, reference): | |
"""Simple evaluation of factual accuracy by keyword matching""" | |
# This is a simplified approach - in production, use more sophisticated methods | |
keywords_reference = set(re.findall(r'\b\w+\b', reference.lower())) | |
keywords_response = set(re.findall(r'\b\w+\b', response.lower())) | |
# Remove common stopwords (simplified approach) | |
english_stopwords = {"the", "is", "a", "an", "and", "or", "of", "to", "in", "for", "with", "by", "on", "at"} | |
arabic_stopwords = {"في", "من", "إلى", "على", "و", "هي", "هو", "عن", "مع"} | |
keywords_reference = {w for w in keywords_reference if w not in english_stopwords and w not in arabic_stopwords} | |
keywords_response = {w for w in keywords_response if w not in english_stopwords and w not in arabic_stopwords} | |
common_keywords = keywords_reference.intersection(keywords_response) | |
if len(keywords_reference) > 0: | |
accuracy = len(common_keywords) / len(keywords_reference) | |
else: | |
accuracy = 0 | |
return accuracy | |
def evaluate_on_test_set(self): | |
"""Evaluate the assistant on the test set""" | |
logger.info("Running evaluation on test set") | |
eval_results = [] | |
for example in self.eval_data: | |
# Generate response | |
response = self.generate_response(example["question"]) | |
# Calculate factual accuracy | |
accuracy = self.evaluate_factual_accuracy(response, example["reference_answer"]) | |
eval_results.append({ | |
"question": example["question"], | |
"reference": example["reference_answer"], | |
"response": response, | |
"factual_accuracy": accuracy | |
}) | |
self.metrics["factual_accuracy"].append(accuracy) | |
# Calculate average factual accuracy | |
avg_accuracy = sum(self.metrics["factual_accuracy"]) / len(self.metrics["factual_accuracy"]) if self.metrics["factual_accuracy"] else 0 | |
avg_response_time = sum(self.metrics["response_times"]) / len(self.metrics["response_times"]) if self.metrics["response_times"] else 0 | |
results = { | |
"average_factual_accuracy": avg_accuracy, | |
"average_response_time": avg_response_time, | |
"detailed_results": eval_results | |
} | |
logger.info(f"Evaluation results: Factual accuracy = {avg_accuracy:.2f}, Avg response time = {avg_response_time:.2f}s") | |
return results | |
def visualize_evaluation_results(self, results): | |
"""Generate visualization of evaluation results""" | |
# Create a DataFrame from the detailed results | |
df = pd.DataFrame(results["detailed_results"]) | |
# Create the figure for visualizations | |
fig = plt.figure(figsize=(12, 8)) | |
# Bar chart of factual accuracy by question | |
plt.subplot(2, 1, 1) | |
bars = plt.bar(range(len(df)), df["factual_accuracy"], color="skyblue") | |
plt.axhline(y=results["average_factual_accuracy"], color='r', linestyle='-', | |
label=f"Avg: {results['average_factual_accuracy']:.2f}") | |
plt.xlabel("Question Index") | |
plt.ylabel("Factual Accuracy") | |
plt.title("Factual Accuracy by Question") | |
plt.ylim(0, 1.1) | |
plt.legend() | |
# Add language information | |
df["language"] = df["question"].apply(lambda x: "Arabic" if detect(x) == "ar" else "English") | |
# Group by language | |
lang_accuracy = df.groupby("language")["factual_accuracy"].mean() | |
# Bar chart of accuracy by language | |
plt.subplot(2, 1, 2) | |
lang_bars = plt.bar(lang_accuracy.index, lang_accuracy.values, color=["lightblue", "lightgreen"]) | |
plt.axhline(y=results["average_factual_accuracy"], color='r', linestyle='-', | |
label=f"Overall: {results['average_factual_accuracy']:.2f}") | |
plt.xlabel("Language") | |
plt.ylabel("Average Factual Accuracy") | |
plt.title("Factual Accuracy by Language") | |
plt.ylim(0, 1.1) | |
# Add value labels | |
for i, v in enumerate(lang_accuracy): | |
plt.text(i, v + 0.05, f"{v:.2f}", ha='center') | |
plt.tight_layout() | |
return fig | |
def record_user_feedback(self, user_input, response, rating, feedback_text=""): | |
"""Record user feedback for a response""" | |
feedback = { | |
"timestamp": datetime.now().isoformat(), | |
"user_input": user_input, | |
"response": response, | |
"rating": rating, | |
"feedback_text": feedback_text | |
} | |
self.metrics["user_ratings"].append(rating) | |
# In a production system, store this in a database | |
logger.info(f"Recorded user feedback: rating={rating}") | |
return True | |
# Create the Gradio interface | |
def create_gradio_interface(): | |
# Initialize the assistant | |
assistant = Vision2030Assistant() | |
def chat(message, history): | |
if not message.strip(): | |
return history, "" | |
# Generate response | |
reply = assistant.generate_response(message) | |
# Update history | |
history.append((message, reply)) | |
return history, "" | |
def provide_feedback(history, rating, feedback_text): | |
# Record feedback for the last conversation | |
if history and len(history) > 0: | |
last_interaction = history[-1] | |
assistant.record_user_feedback(last_interaction[0], last_interaction[1], rating, feedback_text) | |
return f"Thank you for your feedback! (Rating: {rating}/5)" | |
return "No conversation found to rate." | |
def run_evaluation(): | |
results = assistant.evaluate_on_test_set() | |
# Create summary text | |
summary = f""" | |
Evaluation Results: | |
------------------ | |
Total questions evaluated: {len(results['detailed_results'])} | |
Overall factual accuracy: {results['average_factual_accuracy']:.2f} | |
Average response time: {results['average_response_time']:.4f} seconds | |
Detailed Results: | |
""" | |
for i, result in enumerate(results['detailed_results']): | |
summary += f"\nQ{i+1}: {result['question']}\n" | |
summary += f"Reference: {result['reference']}\n" | |
summary += f"Response: {result['response']}\n" | |
summary += f"Accuracy: {result['factual_accuracy']:.2f}\n" | |
summary += "-" * 40 + "\n" | |
# Return both the results summary and visualization | |
fig = assistant.visualize_evaluation_results(results) | |
return summary, fig | |
def process_uploaded_file(file): | |
if file is not None: | |
# Create a new assistant with the uploaded PDF | |
global assistant | |
assistant = Vision2030Assistant(pdf_path=file.name) | |
return f"Successfully processed {file.name}. The assistant is ready to use." | |
return "No file uploaded. Using sample data." | |
# Create the Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# Vision 2030 Virtual Assistant 🌟") | |
gr.Markdown("Ask questions about Saudi Arabia's Vision 2030 in both Arabic and English") | |
with gr.Tab("Chat"): | |
chatbot = gr.Chatbot(height=400) | |
msg = gr.Textbox(label="Your Question", placeholder="Ask about Vision 2030...") | |
with gr.Row(): | |
submit_btn = gr.Button("Submit") | |
clear_btn = gr.Button("Clear Chat") | |
gr.Markdown("### Provide Feedback") | |
with gr.Row(): | |
rating = gr.Slider(minimum=1, maximum=5, step=1, value=3, label="Rate the Response (1-5)") | |
feedback_text = gr.Textbox(label="Additional Comments (Optional)") | |
feedback_btn = gr.Button("Submit Feedback") | |
feedback_result = gr.Textbox(label="Feedback Status") | |
with gr.Tab("Evaluation"): | |
evaluate_btn = gr.Button("Run Evaluation on Test Set") | |
eval_output = gr.Textbox(label="Evaluation Results", lines=20) | |
eval_chart = gr.Plot(label="Evaluation Metrics") | |
with gr.Tab("Upload PDF"): | |
file_input = gr.File(label="Upload Vision 2030 PDF") | |
upload_result = gr.Textbox(label="Upload Status") | |
upload_btn = gr.Button("Process PDF") | |
# Set up event handlers | |
msg.submit(chat, [msg, chatbot], [chatbot, msg]) | |
submit_btn.click(chat, [msg, chatbot], [chatbot, msg]) | |
clear_btn.click(lambda: [], None, chatbot) | |
feedback_btn.click(provide_feedback, [chatbot, rating, feedback_text], feedback_result) | |
evaluate_btn.click(run_evaluation, None, [eval_output, eval_chart]) | |
upload_btn.click(process_uploaded_file, [file_input], upload_result) | |
return demo | |
# Launch the app | |
demo = create_gradio_interface() | |
demo.launch() |