Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -140,8 +140,16 @@ def simple_process_pdfs(pdf_paths):
|
|
140 |
"""Process PDF documents and return document objects"""
|
141 |
documents = []
|
142 |
|
|
|
|
|
|
|
143 |
for pdf_path in pdf_paths:
|
144 |
try:
|
|
|
|
|
|
|
|
|
|
|
145 |
text = ""
|
146 |
with open(pdf_path, 'rb') as file:
|
147 |
reader = PyPDF2.PdfReader(file)
|
@@ -161,6 +169,8 @@ def simple_process_pdfs(pdf_paths):
|
|
161 |
print(f"Warning: No text extracted from {pdf_path}")
|
162 |
except Exception as e:
|
163 |
print(f"Error processing {pdf_path}: {e}")
|
|
|
|
|
164 |
|
165 |
print(f"Processed {len(documents)} PDF documents")
|
166 |
return documents
|
@@ -414,26 +424,21 @@ comprehensive_evaluation_data = [
|
|
414 |
# Gradio Interface
|
415 |
def initialize_system():
|
416 |
"""Initialize the Vision 2030 Assistant system"""
|
417 |
-
#
|
418 |
-
|
419 |
-
# and if vector stores are already created
|
420 |
-
|
421 |
-
# Define paths
|
422 |
-
model_dir = "models"
|
423 |
-
vector_store_dir = "vector_stores"
|
424 |
-
pdf_dir = "pdf_data"
|
425 |
|
426 |
-
|
427 |
-
os.
|
428 |
-
os.makedirs(pdf_dir, exist_ok=True)
|
429 |
-
|
430 |
-
# Check if we need to download PDFs
|
431 |
-
pdf_files = ["vision2030_docs/saudi_vision203.pdf", "vision2030_docs/saudi_vision2030_ar.pdf"]
|
432 |
|
433 |
-
#
|
434 |
-
|
|
|
|
|
435 |
|
436 |
# Process PDFs and create vector store
|
|
|
|
|
|
|
437 |
if os.path.exists(os.path.join(vector_store_dir, "index.faiss")):
|
438 |
print("Loading existing vector store...")
|
439 |
embedding_function = HuggingFaceEmbeddings(
|
@@ -443,6 +448,8 @@ def initialize_system():
|
|
443 |
else:
|
444 |
print("Creating new vector store...")
|
445 |
documents = simple_process_pdfs(pdf_files)
|
|
|
|
|
446 |
vector_store = create_vector_store(documents)
|
447 |
vector_store.save_local(vector_store_dir)
|
448 |
|
@@ -505,14 +512,19 @@ def run_evaluation_on_sample(assistant, sample_index=0):
|
|
505 |
# Evaluate response
|
506 |
evaluation_results = evaluate_response(query, response, reference)
|
507 |
|
508 |
-
# Format for display
|
509 |
-
metrics_str = "\n".join([f"{k}: {v}" for k, v in evaluation_results.items()])
|
510 |
-
|
511 |
return query, response, reference, evaluation_results, sources, category, language
|
512 |
|
513 |
-
def qualitative_evaluation_interface(assistant):
|
514 |
"""Create a Gradio interface for qualitative evaluation"""
|
515 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
516 |
sample_options = [f"{i+1}. {item['query'][:50]}..." for i, item in enumerate(comprehensive_evaluation_data)]
|
517 |
|
518 |
with gr.Blocks(title="Vision 2030 Assistant - Qualitative Evaluation") as interface:
|
@@ -596,8 +608,11 @@ def qualitative_evaluation_interface(assistant):
|
|
596 |
query, response, reference, metrics, sources, category, language = run_evaluation_on_sample(assistant, index)
|
597 |
sources_str = ", ".join(sources)
|
598 |
return query, response, reference, metrics, sources_str, category, language
|
599 |
-
except:
|
600 |
-
|
|
|
|
|
|
|
601 |
|
602 |
eval_button.click(
|
603 |
handle_sample_selection,
|
@@ -614,6 +629,7 @@ def qualitative_evaluation_interface(assistant):
|
|
614 |
)
|
615 |
|
616 |
# Custom evaluation event handlers
|
|
|
617 |
def handle_custom_evaluation(query, reference):
|
618 |
if not query:
|
619 |
return "Please enter a query", "", {}
|
@@ -639,6 +655,7 @@ def qualitative_evaluation_interface(assistant):
|
|
639 |
)
|
640 |
|
641 |
# Conversation mode event handlers
|
|
|
642 |
def handle_conversation(message, history):
|
643 |
if not message:
|
644 |
return history, "", ""
|
@@ -672,19 +689,72 @@ def qualitative_evaluation_interface(assistant):
|
|
672 |
|
673 |
# Main function to run in Hugging Face Space
|
674 |
def main():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
675 |
# Initialize the system
|
676 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
677 |
assistant = initialize_system()
|
|
|
|
|
678 |
interface = qualitative_evaluation_interface(assistant)
|
|
|
|
|
679 |
interface.launch()
|
680 |
except Exception as e:
|
681 |
-
print(f"Error
|
|
|
|
|
|
|
682 |
# Create a simple error interface
|
683 |
-
gr.
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
688 |
|
689 |
if __name__ == "__main__":
|
690 |
main()
|
|
|
140 |
"""Process PDF documents and return document objects"""
|
141 |
documents = []
|
142 |
|
143 |
+
print(f"Attempting to process PDFs: {pdf_paths}")
|
144 |
+
print(f"Current directory contents: {os.listdir('.')}")
|
145 |
+
|
146 |
for pdf_path in pdf_paths:
|
147 |
try:
|
148 |
+
if not os.path.exists(pdf_path):
|
149 |
+
print(f"Warning: {pdf_path} does not exist")
|
150 |
+
continue
|
151 |
+
|
152 |
+
print(f"Processing {pdf_path}...")
|
153 |
text = ""
|
154 |
with open(pdf_path, 'rb') as file:
|
155 |
reader = PyPDF2.PdfReader(file)
|
|
|
169 |
print(f"Warning: No text extracted from {pdf_path}")
|
170 |
except Exception as e:
|
171 |
print(f"Error processing {pdf_path}: {e}")
|
172 |
+
import traceback
|
173 |
+
traceback.print_exc()
|
174 |
|
175 |
print(f"Processed {len(documents)} PDF documents")
|
176 |
return documents
|
|
|
424 |
# Gradio Interface
|
425 |
def initialize_system():
|
426 |
"""Initialize the Vision 2030 Assistant system"""
|
427 |
+
# Define paths for PDF files in the root directory
|
428 |
+
pdf_files = ["saudi_vision203.pdf", "saudi_vision2030_ar.pdf"]
|
|
|
|
|
|
|
|
|
|
|
|
|
429 |
|
430 |
+
# Print available files for debugging
|
431 |
+
print("Files in current directory:", os.listdir("."))
|
|
|
|
|
|
|
|
|
432 |
|
433 |
+
# Check if PDFs exist
|
434 |
+
for pdf_file in pdf_files:
|
435 |
+
if not os.path.exists(pdf_file):
|
436 |
+
print(f"Warning: {pdf_file} not found")
|
437 |
|
438 |
# Process PDFs and create vector store
|
439 |
+
vector_store_dir = "vector_stores"
|
440 |
+
os.makedirs(vector_store_dir, exist_ok=True)
|
441 |
+
|
442 |
if os.path.exists(os.path.join(vector_store_dir, "index.faiss")):
|
443 |
print("Loading existing vector store...")
|
444 |
embedding_function = HuggingFaceEmbeddings(
|
|
|
448 |
else:
|
449 |
print("Creating new vector store...")
|
450 |
documents = simple_process_pdfs(pdf_files)
|
451 |
+
if not documents:
|
452 |
+
raise ValueError("No documents were processed successfully. Cannot continue.")
|
453 |
vector_store = create_vector_store(documents)
|
454 |
vector_store.save_local(vector_store_dir)
|
455 |
|
|
|
512 |
# Evaluate response
|
513 |
evaluation_results = evaluate_response(query, response, reference)
|
514 |
|
|
|
|
|
|
|
515 |
return query, response, reference, evaluation_results, sources, category, language
|
516 |
|
517 |
+
def qualitative_evaluation_interface(assistant=None):
|
518 |
"""Create a Gradio interface for qualitative evaluation"""
|
519 |
|
520 |
+
# If assistant is None, create a simplified interface
|
521 |
+
if assistant is None:
|
522 |
+
with gr.Blocks(title="Vision 2030 Assistant - Initialization Error") as interface:
|
523 |
+
gr.Markdown("# Vision 2030 Assistant - Initialization Error")
|
524 |
+
gr.Markdown("There was an error initializing the assistant. Please check the logs for details.")
|
525 |
+
gr.Textbox(label="Status", value="System initialization failed")
|
526 |
+
return interface
|
527 |
+
|
528 |
sample_options = [f"{i+1}. {item['query'][:50]}..." for i, item in enumerate(comprehensive_evaluation_data)]
|
529 |
|
530 |
with gr.Blocks(title="Vision 2030 Assistant - Qualitative Evaluation") as interface:
|
|
|
608 |
query, response, reference, metrics, sources, category, language = run_evaluation_on_sample(assistant, index)
|
609 |
sources_str = ", ".join(sources)
|
610 |
return query, response, reference, metrics, sources_str, category, language
|
611 |
+
except Exception as e:
|
612 |
+
print(f"Error in handle_sample_selection: {e}")
|
613 |
+
import traceback
|
614 |
+
traceback.print_exc()
|
615 |
+
return f"Error processing selection: {e}", "", "", {}, "", "", ""
|
616 |
|
617 |
eval_button.click(
|
618 |
handle_sample_selection,
|
|
|
629 |
)
|
630 |
|
631 |
# Custom evaluation event handlers
|
632 |
+
@spaces.GPU # Use GPU for custom evaluation
|
633 |
def handle_custom_evaluation(query, reference):
|
634 |
if not query:
|
635 |
return "Please enter a query", "", {}
|
|
|
655 |
)
|
656 |
|
657 |
# Conversation mode event handlers
|
658 |
+
@spaces.GPU # Use GPU for conversation handling
|
659 |
def handle_conversation(message, history):
|
660 |
if not message:
|
661 |
return history, "", ""
|
|
|
689 |
|
690 |
# Main function to run in Hugging Face Space
|
691 |
def main():
|
692 |
+
# Start with a debugging report
|
693 |
+
print("=" * 50)
|
694 |
+
print("SYSTEM INITIALIZATION")
|
695 |
+
print("=" * 50)
|
696 |
+
print("Current directory:", os.getcwd())
|
697 |
+
print("Files in directory:", os.listdir("."))
|
698 |
+
print("=" * 50)
|
699 |
+
|
700 |
# Initialize the system
|
701 |
try:
|
702 |
+
# First check if PDF files exist
|
703 |
+
pdf_files = ["saudi_vision203.pdf", "saudi_vision2030_ar.pdf"]
|
704 |
+
for pdf_file in pdf_files:
|
705 |
+
if not os.path.exists(pdf_file):
|
706 |
+
print(f"Warning: {pdf_file} not found!")
|
707 |
+
|
708 |
+
# Process with initialization
|
709 |
+
print("Starting system initialization...")
|
710 |
assistant = initialize_system()
|
711 |
+
|
712 |
+
print("Creating interface...")
|
713 |
interface = qualitative_evaluation_interface(assistant)
|
714 |
+
|
715 |
+
print("Launching interface...")
|
716 |
interface.launch()
|
717 |
except Exception as e:
|
718 |
+
print(f"Error during initialization: {e}")
|
719 |
+
import traceback
|
720 |
+
traceback.print_exc()
|
721 |
+
|
722 |
# Create a simple error interface
|
723 |
+
with gr.Blocks(title="Vision 2030 Assistant - Error") as debug_interface:
|
724 |
+
gr.Markdown("# Vision 2030 Assistant - Initialization Error")
|
725 |
+
gr.Markdown("There was an error initializing the assistant.")
|
726 |
+
|
727 |
+
# Display error details
|
728 |
+
gr.Textbox(
|
729 |
+
value=f"Error: {str(e)}",
|
730 |
+
label="Error Details",
|
731 |
+
lines=5
|
732 |
+
)
|
733 |
+
|
734 |
+
# Show file system status
|
735 |
+
files_list = "\n".join(os.listdir("."))
|
736 |
+
gr.Textbox(
|
737 |
+
value=files_list,
|
738 |
+
label="Files in Directory",
|
739 |
+
lines=10
|
740 |
+
)
|
741 |
+
|
742 |
+
# Add a button to check PDFs
|
743 |
+
def check_pdfs():
|
744 |
+
result = []
|
745 |
+
for pdf_file in ["saudi_vision203.pdf", "saudi_vision2030_ar.pdf"]:
|
746 |
+
if os.path.exists(pdf_file):
|
747 |
+
size = os.path.getsize(pdf_file) / (1024 * 1024) # Size in MB
|
748 |
+
result.append(f"{pdf_file}: Found ({size:.2f} MB)")
|
749 |
+
else:
|
750 |
+
result.append(f"{pdf_file}: Not found")
|
751 |
+
return "\n".join(result)
|
752 |
+
|
753 |
+
check_btn = gr.Button("Check PDF Files")
|
754 |
+
pdf_status = gr.Textbox(label="PDF Status", lines=3)
|
755 |
+
check_btn.click(check_pdfs, inputs=[], outputs=[pdf_status])
|
756 |
+
|
757 |
+
debug_interface.launch()
|
758 |
|
759 |
if __name__ == "__main__":
|
760 |
main()
|