# app.py import os import shutil import streamlit as st import torch import atexit from advanced_rag import AdvancedRAG from metamask_component import metamask_connector from voice_component import voice_input_component # Helper function to initialize session state def initialize_session_state(): """Initialize Streamlit session state variables.""" if "rag" not in st.session_state: st.session_state.rag = None if "messages" not in st.session_state: st.session_state.messages = [] if "temp_dir" not in st.session_state: st.session_state.temp_dir = None if "metamask_connected" not in st.session_state: st.session_state.metamask_connected = False if "retrieval_method" not in st.session_state: st.session_state.retrieval_method = "enhanced" if "voice_transcript" not in st.session_state: st.session_state.voice_transcript = "" # Helper function to clean up temporary files def cleanup_temp_files(): """Clean up temporary files when application exits.""" if st.session_state.get('temp_dir') and os.path.exists(st.session_state.temp_dir): try: shutil.rmtree(st.session_state.temp_dir) print(f"Cleaned up temporary directory: {st.session_state.temp_dir}") except Exception as e: print(f"Error cleaning up temporary directory: {e}") # Streamlit UI def main(): st.set_page_config( page_title="Advanced RAG with MetaMask and Voice", layout="wide", initial_sidebar_state="expanded" ) st.title("🚀 Advanced RAG System with Blockchain Verification and Voice Input") st.markdown(""" This application allows you to: - Upload and process PDF documents - Verify document authenticity on blockchain using MetaMask - Ask questions using voice or text input - Choose between direct retrieval or enhanced LLM-powered answers """) # Initialize session state initialize_session_state() # MetaMask Connection Section st.header("🦊 MetaMask Connection") st.markdown("Connect your MetaMask wallet to verify documents and log queries on the blockchain.") # Add MetaMask connector and get connection info metamask_info = metamask_connector() # Display MetaMask connection status if metamask_info and metamask_info.get("connected"): st.success(f"✅ MetaMask Connected: {metamask_info.get('address')}") st.info(f"Network: {metamask_info.get('network_name')}") st.session_state.metamask_connected = True else: st.warning("⚠️ MetaMask not connected. Please connect your wallet to use blockchain features.") st.session_state.metamask_connected = False # Update RAG system with MetaMask connection if needed if st.session_state.rag and metamask_info: is_connected = st.session_state.rag.update_blockchain_connection(metamask_info) if is_connected: st.success("RAG system updated with MetaMask connection") # Sidebar for configuration and file upload with st.sidebar: st.header("⚙️ Configuration") # GPU Detection gpu_available = torch.cuda.is_available() if gpu_available: try: gpu_info = torch.cuda.get_device_properties(0) st.success(f"GPU detected: {gpu_info.name} ({gpu_info.total_memory / 1024**3:.1f} GB)") except Exception as e: st.warning(f"GPU detected but couldn't get properties: {str(e)}") st.info("Running with limited GPU information") else: st.warning("No GPU detected. Running in CPU mode.") # Model selection st.subheader("Model Selection") llm_model = st.selectbox( "LLM Model", options=[ "mistralai/Mistral-7B-Instruct-v0.2", "google/gemma-7b-it", "google/flan-t5-xl", "Salesforce/xgen-7b-8k-inst", "tiiuae/falcon-7b-instruct" ], index=0 ) embedding_model = st.selectbox( "Embedding Model", options=[ "sentence-transformers/all-mpnet-base-v2", "sentence-transformers/all-MiniLM-L6-v2", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" ], index=1 # all-MiniLM-L6-v2 is smaller and faster ) use_gpu = st.checkbox("Use GPU Acceleration", value=gpu_available) # Blockchain configuration st.subheader("🔗 Blockchain Configuration") use_blockchain = st.checkbox("Enable Blockchain Verification", value=True) if use_blockchain: # Hardcoded contract address - replace with your deployed contract contract_address = os.environ.get("CONTRACT_ADDRESS", "0x123abc...") # Your pre-deployed contract st.info(f"Using pre-deployed contract: {contract_address[:10]}...") # Display MetaMask connection status in sidebar if metamask_info and metamask_info.get("connected"): st.success(f"✅ MetaMask Connected: {metamask_info.get('address')[:10]}...") else: st.warning("⚠️ MetaMask not connected. Please connect your wallet above.") # Advanced options with st.expander("Advanced Options"): chunk_size = st.slider("Chunk Size", 100, 2000, 1000) chunk_overlap = st.slider("Chunk Overlap", 0, 500, 200) # Initialize button if st.button("Initialize System"): with st.spinner("Initializing RAG system..."): if use_blockchain and not contract_address: st.error("Contract address is required for blockchain integration") else: st.session_state.rag = AdvancedRAG( llm_model_name=llm_model, embedding_model_name=embedding_model, chunk_size=chunk_size, chunk_overlap=chunk_overlap, use_gpu=use_gpu and gpu_available, use_blockchain=use_blockchain, contract_address=contract_address if use_blockchain else None ) # Update with current MetaMask connection if available if use_blockchain and metamask_info: st.session_state.rag.update_blockchain_connection(metamask_info) st.success(f"System initialized with {embedding_model} on {st.session_state.rag.device}") if use_blockchain: if metamask_info and metamask_info.get("connected"): st.success("Blockchain verification enabled with MetaMask") else: st.warning("Blockchain verification enabled but MetaMask not connected") st.header("📄 Upload Documents") uploaded_files = st.file_uploader("Select PDFs", type="pdf", accept_multiple_files=True) if uploaded_files and st.button("Process PDFs"): if not st.session_state.rag: with st.spinner("Initializing RAG system..."): st.session_state.rag = AdvancedRAG( llm_model_name=llm_model, embedding_model_name=embedding_model, chunk_size=chunk_size, chunk_overlap=chunk_overlap, use_gpu=use_gpu and gpu_available, use_blockchain=use_blockchain, contract_address=contract_address if use_blockchain else None ) # Update with current MetaMask connection if available if use_blockchain and metamask_info: st.session_state.rag.update_blockchain_connection(metamask_info) success = st.session_state.rag.process_pdfs(uploaded_files) if success: metrics = st.session_state.rag.get_performance_metrics() if metrics: st.success("PDFs processed successfully!") with st.expander("💹 Performance Metrics"): st.markdown(f"**Documents processed:** {metrics['documents_processed']} chunks") st.markdown(f"**Index building time:** {metrics['index_building_time']:.2f} seconds") st.markdown(f"**Total processing time:** {metrics['total_processing_time']:.2f} seconds") st.markdown(f"**Memory used:** {metrics['memory_used_gb']:.2f} GB") st.markdown(f"**Device used:** {metrics['device']}") st.markdown(f"**Blockchain verification:** {'Enabled' if metrics['blockchain_enabled'] else 'Disabled'}") st.markdown(f"**Blockchain connected:** {'Yes' if metrics.get('blockchain_connected') else 'No'}") # Retrieval Method Selection st.header("🔍 Retrieval Method") retrieval_cols = st.columns(2) with retrieval_cols[0]: if st.button("📄 Direct Retrieval", help="Get raw document chunks without LLM processing", use_container_width=True): st.session_state.retrieval_method = "direct" st.info("Using Direct Retrieval: Raw document passages will be returned without LLM processing") with retrieval_cols[1]: if st.button("🧠 Enhanced Retrieval", help="Process results through LLM for comprehensive answers", use_container_width=True): st.session_state.retrieval_method = "enhanced" st.info("Using Enhanced Retrieval: Documents will be processed by LLM to generate comprehensive answers") # Blockchain verification info if st.session_state.rag and st.session_state.rag.use_blockchain: if st.session_state.metamask_connected: st.info("🔗 Blockchain verification is enabled with MetaMask. Documents are cryptographically verified and queries are logged with immutable audit trail.") else: st.warning("🔗 Blockchain verification is enabled but MetaMask is not connected. Please connect your MetaMask wallet to use blockchain features.") # Display chat messages st.header("💬 Chat") # Chat container chat_container = st.container(height=400, border=True) with chat_container: for message in st.session_state.messages: with st.chat_message(message["role"]): if message["role"] == "user": st.markdown(message["content"]) else: if isinstance(message["content"], dict): st.markdown(message["content"]["answer"]) if "query_time" in message["content"]: st.caption(f"Response time: {message['content']['query_time']:.2f} seconds") if "method" in message["content"]: method_name = "Direct Retrieval" if message["content"]["method"] == "direct" else "Enhanced Retrieval" st.caption(f"Method: {method_name}") # Display blockchain log if available if "blockchain_log" in message["content"] and message["content"]["blockchain_log"]: blockchain_log = message["content"]["blockchain_log"] st.success(f"✅ Query logged on blockchain | Transaction: {blockchain_log['tx_hash'][:10]}...") # Display sources in expander if "sources" in message["content"] and message["content"]["sources"]: with st.expander("📄 View Sources"): for i, source in enumerate(message["content"]["sources"]): st.markdown(f"**Source {i+1}: {source['source']}**") # Show blockchain verification if available if source.get("blockchain"): st.success(f"✅ Verified on blockchain | TX: {source['blockchain']['tx_hash'][:10]}...") st.text(source["content"]) st.divider() else: st.markdown(message["content"]) # Voice Input Section st.header("🎤 Voice Input") st.markdown("You can ask questions using your voice or type them below.") # Voice input component voice_transcript = voice_input_component() # Update session state with voice transcript if not empty if voice_transcript and voice_transcript.strip(): st.session_state.voice_transcript = voice_transcript.strip() st.success(f"Voice input received: {voice_transcript}") # Chat input - show the voice transcript in the text input prompt_placeholder = "Ask a question about your PDFs..." if st.session_state.voice_transcript: prompt_placeholder = st.session_state.voice_transcript # Chat input prompt = st.chat_input(prompt_placeholder) # Process either voice input or text input if prompt or st.session_state.voice_transcript: # Prioritize text input over voice input if prompt: user_input = prompt else: user_input = st.session_state.voice_transcript # Clear voice transcript after using it st.session_state.voice_transcript = "" # Rerun to clear the voice input display st.rerun() # Add user message to chat st.session_state.messages.append({"role": "user", "content": user_input}) # Display user message with chat_container: with st.chat_message("user"): st.markdown(user_input) # Check if system is initialized if not st.session_state.rag: with chat_container: with st.chat_message("assistant"): message = "Please initialize the system and process PDFs first." st.markdown(message) st.session_state.messages.append({"role": "assistant", "content": message}) # Get response if vector store is ready elif st.session_state.rag.vector_store: with chat_container: with st.chat_message("assistant"): # Get retrieval method method = st.session_state.retrieval_method # Get response using specified method response = st.session_state.rag.ask(user_input, method=method) st.session_state.messages.append({"role": "assistant", "content": response}) if isinstance(response, dict): st.markdown(response["answer"]) if "query_time" in response: st.caption(f"Response time: {response['query_time']:.2f} seconds") if "method" in response: method_name = "Direct Retrieval" if response["method"] == "direct" else "Enhanced Retrieval" st.caption(f"Method: {method_name}") # Display blockchain log if available if "blockchain_log" in response and response["blockchain_log"]: blockchain_log = response["blockchain_log"] st.success(f"✅ Query logged on blockchain | Transaction: {blockchain_log['tx_hash'][:10]}...") # Display sources in expander if "sources" in response and response["sources"]: with st.expander("📄 View Sources"): for i, source in enumerate(response["sources"]): st.markdown(f"**Source {i+1}: {source['source']}**") # Show blockchain verification if available if source.get("blockchain"): st.success(f"✅ Verified on blockchain | TX: {source['blockchain']['tx_hash'][:10]}...") st.text(source["content"]) st.divider() else: st.markdown(response) else: with chat_container: with st.chat_message("assistant"): message = "Please upload and process PDF files first." st.markdown(message) st.session_state.messages.append({"role": "assistant", "content": message}) # Main entry point if __name__ == "__main__": # Register cleanup function atexit.register(cleanup_temp_files) main()