Spaces:

vikramronavrsc
/

BLOCHAIN_RAG_FOR_LASTDAY_EXAM_PREP

Running

App Files Files Community

vikramronavrsc commited on Mar 31

Commit

dbb1b59

verified ·

1 Parent(s): 1169500

Update app.py

Browse files

Files changed (1) hide show

app.py +356 -573

app.py CHANGED Viewed

@@ -1,235 +1,28 @@
-# app.py - Optimized for Hugging Face Spaces
 import os
 import tempfile
 import shutil
 import PyPDF2
 import streamlit as st
 import torch
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain.chains import RetrievalQA
 from langchain.docstore.document import Document
 from langchain.prompts import PromptTemplate
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_community.llms import HuggingFaceHub
 import time
 import psutil
 import uuid
 import atexit
-import json
-import hashlib
-from web3 import Web3
-# Set page configuration
-st.set_page_config(
-    page_title="RAG System",
-    layout="wide",
-    initial_sidebar_state="expanded"
-)
-# Custom CSS for better UI
-def load_css():
-    st.markdown("""
-    <style>
-    /* Main layout styling */
-    .main {
-        background-color: #f9fafb;
-    }
-    /* Card styling */
-    .card {
-        border-radius: 10px;
-        background-color: white;
-        padding: 20px;
-        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
-        margin-bottom: 20px;
-    }
-    /* Two-column layout */
-    .answer-section {
-        background-color: white;
-        border-radius: 10px;
-        padding: 20px;
-        margin-bottom: 15px;
-        border-left: 4px solid #4CAF50;
-        box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
-    }
-    .sources-section {
-        background-color: white;
-        border-radius: 10px;
-        padding: 15px;
-        margin-bottom: 15px;
-        border-left: 4px solid #2196F3;
-        box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
-    }
-    .source-item {
-        padding: 10px;
-        border-radius: 5px;
-        background-color: #f8f9fa;
-        margin-bottom: 10px;
-        border: 1px solid #eee;
-    }
-    .source-header {
-        font-weight: bold;
-        margin-bottom: 5px;
-        display: flex;
-        justify-content: space-between;
-    }
-    .verified-badge {
-        background-color: #4CAF50;
-        color: white;
-        padding: 2px 8px;
-        border-radius: 10px;
-        font-size: 0.8em;
-    }
-    /* Method selection styling */
-    .method-container {
-        display: flex;
-        gap: 10px;
-        margin-bottom: 15px;
-    }
-    .method-button {
-        flex: 1;
-        text-align: center;
-        padding: 10px;
-        border-radius: 5px;
-        cursor: pointer;
-        transition: all 0.3s;
-    }
-    .direct-method {
-        background-color: #e3f2fd;
-        border: 1px solid #bbdefb;
-        color: #1976D2;
-    }
-    .direct-method:hover {
-        background-color: #bbdefb;
-    }
-    .enhanced-method {
-        background-color: #e8f5e9;
-        border: 1px solid #c8e6c9;
-        color: #388E3C;
-    }
-    .enhanced-method:hover {
-        background-color: #c8e6c9;
-    }
-    .method-active {
-        box-shadow: 0 0 0 2px #3f51b5;
-    }
-    /* Voice button styling */
-    .voice-button {
-        width: 50px;
-        height: 50px;
-        border-radius: 50%;
-        background-color: #f44336;
-        color: white;
-        display: flex;
-        align-items: center;
-        justify-content: center;
-        cursor: pointer;
-        box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
-        transition: all 0.3s;
-        margin: 0 auto;
-    }
-    .voice-button:hover {
-        transform: scale(1.05);
-        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
-    }
-    /* Header styling */
-    h1, h2, h3 {
-        color: #333;
-    }
-    /* Button styling */
-    .stButton>button {
-        border-radius: 5px;
-        font-weight: 500;
-    }
-    </style>
-    """, unsafe_allow_html=True)
-# Simple blockchain utility
-class BlockchainVerifier:
-    def __init__(self, contract_address=None):
-        self.contract_address = contract_address
-        self.is_connected = False
-        self.user_address = None
-    def connect_wallet(self, wallet_address):
-        """Simulate connecting to a wallet"""
-        self.is_connected = True
-        self.user_address = wallet_address
-        return True
-    def compute_file_hash(self, file_path):
-        """Compute SHA-256 hash of file"""
-        sha256_hash = hashlib.sha256()
-        with open(file_path, "rb") as f:
-            for byte_block in iter(lambda: f.read(4096), b""):
-                sha256_hash.update(byte_block)
-        return sha256_hash.hexdigest()
-    def verify_document(self, document_id, file_path):
-        """Simulate document verification on blockchain"""
-        if not self.is_connected:
-            return {"status": False, "error": "Wallet not connected"}
-        # Calculate hash
-        document_hash = self.compute_file_hash(file_path)
-        # Simulate transaction
-        tx_hash = "0x" + "".join([format(i, "02x") for i in os.urandom(32)])
-        return {
-            "status": True,
-            "tx_hash": tx_hash,
-            "document_id": document_id,
-            "document_hash": document_hash,
-            "block_number": 12345678
-        }
-    def log_query(self, query_text, answer_text):
-        """Simulate logging a query on blockchain"""
-        if not self.is_connected:
-            return {"status": False, "error": "Wallet not connected"}
-        # Create query data and hash
-        query_id = f"query_{int(time.time())}"
-        query_data = {
-            "query": query_text,
-            "answer": answer_text,
-            "timestamp": int(time.time())
-        }
-        query_hash = hashlib.sha256(json.dumps(query_data).encode()).hexdigest()
-        # Simulate transaction
-        tx_hash = "0x" + "".join([format(i, "02x") for i in os.urandom(32)])
-        return {
-            "status": True,
-            "tx_hash": tx_hash,
-            "query_id": query_id,
-            "query_hash": query_hash,
-            "block_number": 12345678
-        }
-# RAG System Class
-class OptimizedRAG:
     def __init__(self,
-                 llm_model_name="google/flan-t5-base",
                  embedding_model_name="sentence-transformers/all-MiniLM-L6-v2",
                  chunk_size=1000,
                  chunk_overlap=200,
@@ -237,7 +30,16 @@ class OptimizedRAG:
                  use_blockchain=False,
                  contract_address=None):
         """
-        Initialize the RAG system optimized for Hugging Face Spaces
         """
         self.llm_model_name = llm_model_name
         self.embedding_model_name = embedding_model_name
@@ -246,6 +48,7 @@ class OptimizedRAG:
         # Device selection for embeddings
         self.device = "cuda" if self.use_gpu else "cpu"
         # Initialize text splitter
         self.text_splitter = RecursiveCharacterTextSplitter(
@@ -260,7 +63,7 @@ class OptimizedRAG:
             model_kwargs={"device": self.device}
         )
-        # Initialize LLM using HuggingFaceHub
         try:
             # Use HF_TOKEN from environment variables
             hf_token = os.environ.get("HF_TOKEN")
@@ -270,7 +73,7 @@ class OptimizedRAG:
             self.llm = HuggingFaceHub(
                 repo_id=llm_model_name,
                 huggingfacehub_api_token=hf_token,
-                model_kwargs={"temperature": 0.7, "max_length": 512}
             )
         except Exception as e:
             st.error(f"Error initializing LLM: {str(e)}")
@@ -278,42 +81,60 @@ class OptimizedRAG:
             # Fallback to a smaller model
             self.llm = HuggingFaceHub(
                 repo_id="google/flan-t5-small",
-                model_kwargs={"temperature": 0.7, "max_length": 256}
             )
-        # Initialize vector store and stats
         self.vector_store = None
         self.documents_processed = 0
         self.processing_times = {}
-        # Initialize blockchain verifier
         self.blockchain = None
         if use_blockchain:
-            self.blockchain = BlockchainVerifier(contract_address=contract_address)
-    def connect_wallet(self, wallet_address):
-        """Connect wallet for blockchain verification"""
-        if self.blockchain:
-            return self.blockchain.connect_wallet(wallet_address)
         return False
     def process_pdfs(self, pdf_files):
-        """Process PDF files and create vector store"""
         all_docs = []
         with st.status("Processing PDF files...") as status:
-            # Create temporary directory
             temp_dir = tempfile.mkdtemp()
             st.session_state['temp_dir'] = temp_dir
-            # Track processing stats
             start_time = time.time()
             mem_before = psutil.virtual_memory().used / (1024 * 1024 * 1024)  # GB
-            # Process each PDF
             for i, pdf_file in enumerate(pdf_files):
                 try:
-                    # Save uploaded file
                     pdf_path = os.path.join(temp_dir, pdf_file.name)
                     with open(pdf_path, "wb") as f:
                         f.write(pdf_file.getbuffer())
@@ -330,130 +151,107 @@ class OptimizedRAG:
                             if page_text:
                                 text += page_text + "\n\n"
-                    # Create and split documents
                     docs = [Document(page_content=text, metadata={"source": pdf_file.name})]
                     split_docs = self.text_splitter.split_documents(docs)
                     all_docs.extend(split_docs)
-                    # Verify on blockchain if enabled
                     if self.use_blockchain and self.blockchain and self.blockchain.is_connected:
-                        document_id = f"{pdf_file.name}_{uuid.uuid4().hex[:8]}"
-                        verification = self.blockchain.verify_document(document_id, pdf_path)
-                        if verification.get('status'):
-                            st.sidebar.success(f"✅ {pdf_file.name} verified on blockchain")
-                            # Add blockchain metadata
-                            for doc in split_docs:
-                                doc.metadata["blockchain"] = {
-                                    "verified": True,
-                                    "document_id": document_id,
-                                    "document_hash": verification.get("document_hash", ""),
-                                    "tx_hash": verification.get("tx_hash", ""),
-                                    "block_number": verification.get("block_number", 0)
-                                }
                 except Exception as e:
                     st.sidebar.error(f"Error processing {pdf_file.name}: {str(e)}")
-            # Create vector store
             if all_docs:
                 status.update(label="Building vector index...")
                 try:
                     index_start_time = time.time()
                     self.vector_store = FAISS.from_documents(all_docs, self.embeddings)
-                    index_time = time.time() - index_start_time
-                    # Track memory usage
-                    mem_after = psutil.virtual_memory().used / (1024 * 1024 * 1024)
                     mem_used = mem_after - mem_before
-                    # Save performance metrics
                     total_time = time.time() - start_time
                     self.processing_times["index_building"] = index_time
                     self.processing_times["total_time"] = total_time
                     self.processing_times["memory_used_gb"] = mem_used
                     self.documents_processed = len(all_docs)
-                    status.update(label=f"Completed processing {len(all_docs)} chunks", state="complete")
                     return True
                 except Exception as e:
                     st.error(f"Error creating vector store: {str(e)}")
                     return False
             else:
                 status.update(label="No content extracted from PDFs", state="error")
                 return False
-    def direct_retrieval(self, query):
-        """Direct retrieval method - returns raw document chunks"""
         if not self.vector_store:
             return "Please upload and process PDF files first."
         try:
-            # Start timing
-            query_start_time = time.time()
-            # Retrieve relevant documents
-            retriever = self.vector_store.as_retriever(search_kwargs={"k": 5})
-            docs = retriever.get_relevant_documents(query)
-            # Format sources and answer
-            sources = []
-            answer = "Here are the most relevant passages:\n\n"
-            for i, doc in enumerate(docs):
-                # Get blockchain info if available
-                blockchain_info = None
-                if "blockchain" in doc.metadata:
-                    blockchain_info = {
-                        "verified": doc.metadata["blockchain"]["verified"],
-                        "document_id": doc.metadata["blockchain"]["document_id"],
-                        "tx_hash": doc.metadata["blockchain"]["tx_hash"]
-                    }
-                # Add to answer and sources
-                answer += f"Passage {i+1} (from {doc.metadata.get('source', 'Unknown')}):\n{doc.page_content}\n\n"
-                sources.append({
-                    "content": doc.page_content,
-                    "source": doc.metadata.get("source", "Unknown"),
-                    "blockchain": blockchain_info
-                })
-            # Calculate query time
-            query_time = time.time() - query_start_time
-            # Log query to blockchain if enabled
-            blockchain_log = None
-            if self.use_blockchain and self.blockchain and self.blockchain.is_connected:
-                log_result = self.blockchain.log_query(query, answer)
-                if log_result.get("status"):
-                    blockchain_log = {
-                        "logged": True,
-                        "query_id": log_result.get("query_id", ""),
-                        "tx_hash": log_result.get("tx_hash", "")
-                    }
-            return {
-                "answer": answer,
-                "sources": sources,
-                "query_time": query_time,
-                "blockchain_log": blockchain_log,
-                "method": "direct"
-            }
-        except Exception as e:
-            st.error(f"Error in direct retrieval: {str(e)}")
-            return f"Error: {str(e)}"
-    def enhanced_retrieval(self, query):
-        """Enhanced retrieval - processes through LLM for better answers"""
-        if not self.vector_store:
-            return "Please upload and process PDF files first."
-        try:
-            # Create prompt template
             prompt_template = """
-            Answer the question based on the context below.
             Context:
             {context}
@@ -467,20 +265,23 @@ class OptimizedRAG:
                 input_variables=["context", "question"]
             )
-            # Start timing
             query_start_time = time.time()
             # Create QA chain
             qa = RetrievalQA.from_chain_type(
                 llm=self.llm,
                 chain_type="stuff",
                 retriever=self.vector_store.as_retriever(search_kwargs={"k": 4}),
-                chain_type_kwargs={"prompt": PROMPT},
                 return_source_documents=True
             )
             # Get answer
-            response = qa({"query": query})
             answer = response["result"]
             source_docs = response["source_documents"]
@@ -490,7 +291,7 @@ class OptimizedRAG:
             # Format sources
             sources = []
             for i, doc in enumerate(source_docs):
-                # Get blockchain info if available
                 blockchain_info = None
                 if "blockchain" in doc.metadata:
                     blockchain_info = {
@@ -500,345 +301,327 @@ class OptimizedRAG:
                     }
                 sources.append({
-                    "content": doc.page_content,
                     "source": doc.metadata.get("source", "Unknown"),
                     "blockchain": blockchain_info
                 })
-            # Log query to blockchain if enabled
             blockchain_log = None
             if self.use_blockchain and self.blockchain and self.blockchain.is_connected:
-                log_result = self.blockchain.log_query(query, answer)
-                if log_result.get("status"):
-                    blockchain_log = {
-                        "logged": True,
-                        "query_id": log_result.get("query_id", ""),
-                        "tx_hash": log_result.get("tx_hash", "")
-                    }
             return {
                 "answer": answer,
                 "sources": sources,
                 "query_time": query_time,
-                "blockchain_log": blockchain_log,
-                "method": "enhanced"
             }
         except Exception as e:
-            st.error(f"Error in enhanced retrieval: {str(e)}")
             return f"Error: {str(e)}"
-    def ask(self, query, method="enhanced"):
-        """Ask a question using the specified method"""
-        if method == "direct":
-            return self.direct_retrieval(query)
-        else:
-            return self.enhanced_retrieval(query)
 # Helper function to initialize session state
 def initialize_session_state():
-    """Initialize Streamlit session state variables"""
     if "rag" not in st.session_state:
         st.session_state.rag = None
     if "messages" not in st.session_state:
         st.session_state.messages = []
     if "temp_dir" not in st.session_state:
         st.session_state.temp_dir = None
-    if "wallet_connected" not in st.session_state:
-        st.session_state.wallet_connected = False
-    if "wallet_address" not in st.session_state:
-        st.session_state.wallet_address = None
-    if "retrieval_method" not in st.session_state:
-        st.session_state.retrieval_method = "enhanced"
-    if "current_answer" not in st.session_state:
-        st.session_state.current_answer = None
 # Helper function to clean up temporary files
 def cleanup_temp_files():
-    """Clean up temporary files when application exits"""
     if st.session_state.get('temp_dir') and os.path.exists(st.session_state.temp_dir):
         try:
             shutil.rmtree(st.session_state.temp_dir)
         except Exception as e:
             print(f"Error cleaning up temporary directory: {e}")
-# Create a simple wallet connector UI
-def wallet_connector():
-    st.sidebar.subheader("🔗 Blockchain Connection")
-    if st.session_state.wallet_connected:
-        st.sidebar.success(f"✅ Connected: {st.session_state.wallet_address[:10]}...")
-        if st.sidebar.button("Disconnect Wallet"):
-            st.session_state.wallet_connected = False
-            st.session_state.wallet_address = None
-            st.rerun()
-    else:
-        st.sidebar.info("Connect wallet to verify documents on blockchain")
-        if st.sidebar.button("Connect Wallet"):
-            # Generate a mock wallet address
-            wallet_address = "0x" + "".join([format(i, "02x") for i in os.urandom(20)])
-            st.session_state.wallet_address = wallet_address
-            st.session_state.wallet_connected = True
-            # Connect to RAG system if initialized
-            if st.session_state.rag:
-                st.session_state.rag.connect_wallet(wallet_address)
-            st.rerun()
-# Main application UI
 def main():
-    # Load CSS
-    load_css()
     # Initialize session state
     initialize_session_state()
-    # Page header
-    st.title("📚 Advanced RAG System")
-    st.markdown("""
-    <div style="display: flex; gap: 10px; margin-bottom: 20px;">
-        <div style="background-color: #e3f2fd; padding: 5px 10px; border-radius: 15px; font-size: 0.9em;">
-            📄 Document Analysis
-        </div>
-        <div style="background-color: #e8f5e9; padding: 5px 10px; border-radius: 15px; font-size: 0.9em;">
-            🔗 Blockchain Verification
-        </div>
-        <div style="background-color: #fff3e0; padding: 5px 10px; border-radius: 15px; font-size: 0.9em;">
-            🎤 Voice Input
-        </div>
-    </div>
-    """, unsafe_allow_html=True)
-    # Sidebar for configuration
     with st.sidebar:
-        # Wallet connector
-        wallet_connector()
-        # System configuration
-        st.sidebar.subheader("⚙️ System Configuration")
         # GPU Detection
         gpu_available = torch.cuda.is_available()
         if gpu_available:
-            st.sidebar.success(f"GPU detected and available")
         else:
-            st.sidebar.warning("No GPU detected. Running in CPU mode.")
-        # Model selection with faster models
-        llm_model = st.sidebar.selectbox(
             "LLM Model",
             options=[
                 "google/flan-t5-base",
-                "google/flan-t5-small",
-                "distilbert/distilgpt2",
-                "google/flan-ul2"
             ],
             index=0
         )
-        embedding_model = st.sidebar.selectbox(
             "Embedding Model",
             options=[
                 "sentence-transformers/all-MiniLM-L6-v2",
-                "sentence-transformers/paraphrase-MiniLM-L3-v2",
-                "sentence-transformers/all-mpnet-base-v2"
             ],
-            index=0
         )
-        use_gpu = st.sidebar.checkbox("Use GPU Acceleration", value=gpu_available)
-        use_blockchain = st.sidebar.checkbox("Enable Blockchain", value=True)
-        # Contract address - hardcoded for simplicity
-        contract_address = "0x123abc..." # Your pre-deployed contract
         # Initialize button
-        if st.sidebar.button("Initialize System"):
-            with st.spinner("Setting up RAG system..."):
-                st.session_state.rag = OptimizedRAG(
-                    llm_model_name=llm_model,
-                    embedding_model_name=embedding_model,
-                    chunk_size=1000,
-                    chunk_overlap=200,
-                    use_gpu=use_gpu and gpu_available,
-                    use_blockchain=use_blockchain,
-                    contract_address=contract_address if use_blockchain else None
-                )
-                # Connect wallet if already connected
-                if st.session_state.wallet_connected:
-                    st.session_state.rag.connect_wallet(st.session_state.wallet_address)
-                st.sidebar.success(f"✅ System initialized!")
-        # Document upload
-        st.sidebar.subheader("📄 Document Upload")
-        uploaded_files = st.sidebar.file_uploader("Select PDFs", type="pdf", accept_multiple_files=True)
-        if uploaded_files and st.sidebar.button("Process Documents"):
             if not st.session_state.rag:
-                with st.spinner("Initializing system first..."):
-                    st.session_state.rag = OptimizedRAG(
                         llm_model_name=llm_model,
                         embedding_model_name=embedding_model,
-                        chunk_size=1000,
-                        chunk_overlap=200,
                         use_gpu=use_gpu and gpu_available,
                         use_blockchain=use_blockchain,
                         contract_address=contract_address if use_blockchain else None
                     )
-                    # Connect wallet if already connected
-                    if st.session_state.wallet_connected:
-                        st.session_state.rag.connect_wallet(st.session_state.wallet_address)
             success = st.session_state.rag.process_pdfs(uploaded_files)
             if success:
-                st.sidebar.success("📄 Documents processed successfully!")
-    # Method Selection
-    st.markdown("### Retrieval Method")
-    col1, col2 = st.columns(2)
-    with col1:
-        direct_class = "method-button direct-method"
-        if st.session_state.retrieval_method == "direct":
-            direct_class += " method-active"
-        if st.markdown(f"""
-        <div class="{direct_class}" onclick="this.classList.add('method-active')">
-            🔍 Direct Retrieval
-        </div>
-        """, unsafe_allow_html=True):
-            st.session_state.retrieval_method = "direct"
-            st.rerun()
-    with col2:
-        enhanced_class = "method-button enhanced-method"
-        if st.session_state.retrieval_method == "enhanced":
-            enhanced_class += " method-active"
-        if st.markdown(f"""
-        <div class="{enhanced_class}" onclick="this.classList.add('method-active')">
-            💡 Enhanced Answers
-        </div>
-        """, unsafe_allow_html=True):
-            st.session_state.retrieval_method = "enhanced"
-            st.rerun()
-    # Method description
-    if st.session_state.retrieval_method == "direct":
-        st.info("🔍 **Direct Retrieval**: Shows raw document passages. Fast and transparent.")
-    else:
-        st.info("💡 **Enhanced Answers**: Processes content through AI for better quality answers.")
-    # Main Two-Column Layout
-    answer_col, sources_col = st.columns([2, 1])
-    # Answer column
-    with answer_col:
-        st.markdown("### Ask a Question")
-        # Text input
-        user_input = st.text_input("Enter your question about the documents")
-        # Simple voice input simulation
-        voice_toggle = st.checkbox("Enable voice input")
-        if voice_toggle:
-            st.markdown("""
-            <div style="display: flex; flex-direction: column; align-items: center; margin: 15px 0;">
-                <div class="voice-button">🎤</div>
-                <div style="margin-top: 10px; color: #666;">Click to speak</div>
-            </div>
-            """, unsafe_allow_html=True)
-            if st.button("Simulate Voice Input"):
-                user_input = "What are the main topics covered in the documents?"
-                st.info(f"Voice input received: {user_input}")
-                st.rerun()
-        # Process query
-        if user_input:
-            # Add user message to history
-            st.session_state.messages.append({"role": "user", "content": user_input})
-            # Check if system is initialized
-            if not st.session_state.rag:
-                st.error("Please initialize the system and process PDFs first.")
-            # Get response if vector store is ready
-            elif st.session_state.rag.vector_store:
-                with st.spinner("Generating answer..."):
-                    # Get retrieval method
-                    method = st.session_state.retrieval_method
-                    # Get answer
-                    response = st.session_state.rag.ask(user_input, method=method)
-                    st.session_state.messages.append({"role": "assistant", "content": response})
-                    # Store current answer
-                    st.session_state.current_answer = response
-                    # Rerun to update UI
-                    st.rerun()
-            else:
-                st.error("Please upload and process PDF files first.")
-        # Display current answer
-        if st.session_state.current_answer and isinstance(st.session_state.current_answer, dict):
-            answer = st.session_state.current_answer
-            st.markdown("""
-            <div class="answer-section">
-                <h3>Answer</h3>
-                <div style="white-space: pre-line;">
-                    {answer_text}
-                </div>
-                <div style="margin-top: 10px; font-size: 0.8em; color: #666;">
-                    Method: {method_name} | Time: {query_time:.2f}s
-                </div>
-            </div>
-            """.format(
-                answer_text=answer["answer"],
-                method_name="Direct Retrieval" if answer["method"] == "direct" else "Enhanced Answer",
-                query_time=answer["query_time"]
-            ), unsafe_allow_html=True)
-            # Blockchain verification display
-            if "blockchain_log" in answer and answer["blockchain_log"]:
-                blockchain_log = answer["blockchain_log"]
-                st.success(f"✅ Query logged on blockchain | Transaction: {blockchain_log['tx_hash'][:10]}...")
-    # Sources column
-    with sources_col:
-        st.markdown("### Sources")
-        if st.session_state.current_answer and isinstance(st.session_state.current_answer, dict):
-            answer = st.session_state.current_answer
-            # Display sources
-            if "sources" in answer and answer["sources"]:
-                for i, source in enumerate(answer["sources"]):
-                    verified_badge = ""
-                    if source.get("blockchain"):
-                        verified_badge = '<span class="verified-badge">✓ Verified</span>'
-                    st.markdown(f"""
-                    <div class="source-item">
-                        <div class="source-header">
-                            Source {i+1}: {source['source']}
-                            {verified_badge}
-                        </div>
-                        <div style="font-size: 0.9em;">
-                            {source['content'][:200]}...
-                        </div>
-                    </div>
-                    """, unsafe_allow_html=True)
-            else:
-                st.info("No sources available for this query.")
         else:
-            st.info("Ask a question to see sources here.")
 # Main entry point
 if __name__ == "__main__":

+# main_metamask.py
 import os
 import tempfile
 import shutil
 import PyPDF2
 import streamlit as st
 import torch
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.llms import HuggingFaceHub
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain.chains import RetrievalQA
 from langchain.docstore.document import Document
 from langchain.prompts import PromptTemplate
 import time
 import psutil
 import uuid
 import atexit
+from blockchain_utils_metamask import BlockchainManagerMetaMask
+from metamask_component import metamask_connector
+class BlockchainEnabledRAG:
     def __init__(self,
+                 llm_model_name="mistralai/Mistral-7B-Instruct-v0.2",
                  embedding_model_name="sentence-transformers/all-MiniLM-L6-v2",
                  chunk_size=1000,
                  chunk_overlap=200,
                  use_blockchain=False,
                  contract_address=None):
         """
+        Initialize the GPU-efficient RAG system with MetaMask blockchain integration.
+        Args:
+            llm_model_name: The HuggingFace model for text generation
+            embedding_model_name: The HuggingFace model for embeddings
+            chunk_size: Size of document chunks
+            chunk_overlap: Overlap between chunks
+            use_gpu: Whether to use GPU acceleration
+            use_blockchain: Whether to enable blockchain verification
+            contract_address: Address of the deployed RAG Document Verifier contract
         """
         self.llm_model_name = llm_model_name
         self.embedding_model_name = embedding_model_name
         # Device selection for embeddings
         self.device = "cuda" if self.use_gpu else "cpu"
+        st.sidebar.info(f"Using device: {self.device}")
         # Initialize text splitter
         self.text_splitter = RecursiveCharacterTextSplitter(
             model_kwargs={"device": self.device}
         )
+        # Initialize LLM using HuggingFaceHub instead of Ollama
         try:
             # Use HF_TOKEN from environment variables
             hf_token = os.environ.get("HF_TOKEN")
             self.llm = HuggingFaceHub(
                 repo_id=llm_model_name,
                 huggingfacehub_api_token=hf_token,
+                model_kwargs={"temperature": 0.7, "max_length": 1024}
             )
         except Exception as e:
             st.error(f"Error initializing LLM: {str(e)}")
             # Fallback to a smaller model
             self.llm = HuggingFaceHub(
                 repo_id="google/flan-t5-small",
+                model_kwargs={"temperature": 0.7, "max_length": 512}
             )
+        # Initialize vector store
         self.vector_store = None
         self.documents_processed = 0
+        # Monitoring stats
         self.processing_times = {}
+        # Initialize blockchain manager if enabled
         self.blockchain = None
         if use_blockchain:
+            try:
+                self.blockchain = BlockchainManagerMetaMask(
+                    contract_address=contract_address
+                )
+                st.sidebar.success("Blockchain manager initialized. Please connect MetaMask to continue.")
+            except Exception as e:
+                st.sidebar.error(f"Failed to initialize blockchain manager: {str(e)}")
+                self.use_blockchain = False
+    def update_blockchain_connection(self, metamask_info):
+        """Update blockchain connection with MetaMask info."""
+        if self.blockchain and metamask_info:
+            self.blockchain.update_connection(
+                is_connected=metamask_info.get("connected", False),
+                user_address=metamask_info.get("address"),
+                network_id=metamask_info.get("network_id")
+            )
+            return self.blockchain.is_connected
         return False
     def process_pdfs(self, pdf_files):
+        """Process PDF files, create a vector store, and verify documents on blockchain."""
         all_docs = []
         with st.status("Processing PDF files...") as status:
+            # Create temporary directory for file storage
             temp_dir = tempfile.mkdtemp()
             st.session_state['temp_dir'] = temp_dir
+            # Monitor processing time and memory usage
             start_time = time.time()
+            # Track memory before processing
             mem_before = psutil.virtual_memory().used / (1024 * 1024 * 1024)  # GB
+            # Process each PDF file
             for i, pdf_file in enumerate(pdf_files):
                 try:
+                    file_start_time = time.time()
+                    # Save uploaded file to temp directory
                     pdf_path = os.path.join(temp_dir, pdf_file.name)
                     with open(pdf_path, "wb") as f:
                         f.write(pdf_file.getbuffer())
                             if page_text:
                                 text += page_text + "\n\n"
+                    # Create documents
                     docs = [Document(page_content=text, metadata={"source": pdf_file.name})]
+                    # Split documents into chunks
                     split_docs = self.text_splitter.split_documents(docs)
                     all_docs.extend(split_docs)
+                    # Verify document on blockchain if enabled and connected
                     if self.use_blockchain and self.blockchain and self.blockchain.is_connected:
+                        try:
+                            # Create a unique document ID
+                            document_id = f"{pdf_file.name}_{uuid.uuid4().hex[:8]}"
+                            # Verify document on blockchain
+                            status.update(label=f"Verifying {pdf_file.name} on blockchain...")
+                            verification = self.blockchain.verify_document(document_id, pdf_path)
+                            if verification.get('status'):  # Success
+                                st.sidebar.success(f"✅ {pdf_file.name} verified on blockchain")
+                                if 'tx_hash' in verification:
+                                    st.sidebar.info(f"Transaction: {verification['tx_hash'][:10]}...")
+                                # Add blockchain metadata to documents
+                                for doc in split_docs:
+                                    doc.metadata["blockchain"] = {
+                                        "verified": True,
+                                        "document_id": document_id,
+                                        "document_hash": verification.get("document_hash", ""),
+                                        "tx_hash": verification.get("tx_hash", ""),
+                                        "block_number": verification.get("block_number", 0)
+                                    }
+                            else:
+                                st.sidebar.warning(f"❌ Failed to verify {pdf_file.name} on blockchain")
+                                if 'error' in verification:
+                                    st.sidebar.error(f"Error: {verification['error']}")
+                        except Exception as e:
+                            st.sidebar.error(f"Blockchain verification error: {str(e)}")
+                    elif self.use_blockchain:
+                        st.sidebar.warning("MetaMask not connected. Document not verified on blockchain.")
+                    file_end_time = time.time()
+                    processing_time = file_end_time - file_start_time
+                    st.sidebar.success(f"Processed {pdf_file.name}: {len(split_docs)} chunks in {processing_time:.2f}s")
+                    self.processing_times[pdf_file.name] = {
+                        "chunks": len(split_docs),
+                        "time": processing_time
+                    }
                 except Exception as e:
                     st.sidebar.error(f"Error processing {pdf_file.name}: {str(e)}")
+            # Create vector store if we have documents
             if all_docs:
                 status.update(label="Building vector index...")
                 try:
+                    # Record the time taken to build the index
                     index_start_time = time.time()
+                    # Create the vector store using FAISS
                     self.vector_store = FAISS.from_documents(all_docs, self.embeddings)
+                    index_end_time = time.time()
+                    index_time = index_end_time - index_start_time
+                    # Track memory after processing
+                    mem_after = psutil.virtual_memory().used / (1024 * 1024 * 1024)  # GB
                     mem_used = mem_after - mem_before
                     total_time = time.time() - start_time
+                    status.update(label=f"Completed processing {len(all_docs)} chunks in {total_time:.2f}s", state="complete")
+                    # Save performance metrics
                     self.processing_times["index_building"] = index_time
                     self.processing_times["total_time"] = total_time
                     self.processing_times["memory_used_gb"] = mem_used
                     self.documents_processed = len(all_docs)
                     return True
                 except Exception as e:
                     st.error(f"Error creating vector store: {str(e)}")
+                    status.update(label="Error creating vector store", state="error")
                     return False
             else:
                 status.update(label="No content extracted from PDFs", state="error")
                 return False
+    def ask(self, query):
+        """Ask a question and get an answer based on the PDFs with blockchain logging."""
         if not self.vector_store:
             return "Please upload and process PDF files first."
         try:
+            # Custom prompt
             prompt_template = """
+            You are an AI assistant that provides accurate information based on PDF documents.
+            Use the following context to answer the question. Be detailed and precise in your answer.
+            If the answer is not in the context, say "I don't have enough information to answer this question."
             Context:
             {context}
                 input_variables=["context", "question"]
             )
+            # Start timing the query
             query_start_time = time.time()
             # Create QA chain
+            chain_type_kwargs = {"prompt": PROMPT}
             qa = RetrievalQA.from_chain_type(
                 llm=self.llm,
                 chain_type="stuff",
                 retriever=self.vector_store.as_retriever(search_kwargs={"k": 4}),
+                chain_type_kwargs=chain_type_kwargs,
                 return_source_documents=True
             )
             # Get answer
+            with st.status("Searching documents and generating answer..."):
+                response = qa({"query": query})
             answer = response["result"]
             source_docs = response["source_documents"]
             # Format sources
             sources = []
             for i, doc in enumerate(source_docs):
+                # Extract blockchain verification info if available
                 blockchain_info = None
                 if "blockchain" in doc.metadata:
                     blockchain_info = {
                     }
                 sources.append({
+                    "content": doc.page_content[:300] + "..." if len(doc.page_content) > 300 else doc.page_content,
                     "source": doc.metadata.get("source", "Unknown"),
                     "blockchain": blockchain_info
                 })
+            # Log query to blockchain if enabled and connected
             blockchain_log = None
             if self.use_blockchain and self.blockchain and self.blockchain.is_connected:
+                try:
+                    with st.status("Logging query to blockchain..."):
+                        log_result = self.blockchain.log_query(query, answer)
+                        if log_result.get("status"):  # Success
+                            blockchain_log = {
+                                "logged": True,
+                                "query_id": log_result.get("query_id", ""),
+                                "tx_hash": log_result.get("tx_hash", ""),
+                                "block_number": log_result.get("block_number", 0)
+                            }
+                        else:
+                            st.error(f"Error logging to blockchain: {log_result.get('error', 'Unknown error')}")
+                except Exception as e:
+                    st.error(f"Error logging to blockchain: {str(e)}")
             return {
                 "answer": answer,
                 "sources": sources,
                 "query_time": query_time,
+                "blockchain_log": blockchain_log
             }
         except Exception as e:
+            st.error(f"Error generating answer: {str(e)}")
             return f"Error: {str(e)}"
+    def get_performance_metrics(self):
+        """Return performance metrics for the RAG system."""
+        if not self.processing_times:
+            return None
+        return {
+            "documents_processed": self.documents_processed,
+            "index_building_time": self.processing_times.get("index_building", 0),
+            "total_processing_time": self.processing_times.get("total_time", 0),
+            "memory_used_gb": self.processing_times.get("memory_used_gb", 0),
+            "device": self.device,
+            "embedding_model": self.embedding_model_name,
+            "blockchain_enabled": self.use_blockchain,
+            "blockchain_connected": self.blockchain.is_connected if self.blockchain else False
+        }
 # Helper function to initialize session state
 def initialize_session_state():
+    """Initialize Streamlit session state variables."""
     if "rag" not in st.session_state:
         st.session_state.rag = None
     if "messages" not in st.session_state:
         st.session_state.messages = []
     if "temp_dir" not in st.session_state:
         st.session_state.temp_dir = None
+    if "metamask_connected" not in st.session_state:
+        st.session_state.metamask_connected = False
 # Helper function to clean up temporary files
 def cleanup_temp_files():
+    """Clean up temporary files when application exits."""
     if st.session_state.get('temp_dir') and os.path.exists(st.session_state.temp_dir):
         try:
             shutil.rmtree(st.session_state.temp_dir)
+            print(f"Cleaned up temporary directory: {st.session_state.temp_dir}")
         except Exception as e:
             print(f"Error cleaning up temporary directory: {e}")
+# Streamlit UI
 def main():
+    st.set_page_config(page_title="Blockchain-Enabled RAG System", layout="wide")
+    st.title("🚀 GPU-Accelerated PDF Question Answering with MetaMask Blockchain Verification")
+    st.markdown("Upload PDFs, verify them on blockchain with MetaMask, and ask questions with audit log")
     # Initialize session state
     initialize_session_state()
+    # MetaMask Connection Section
+    st.header("🦊 MetaMask Connection")
+    st.markdown("Connect your MetaMask wallet to verify documents and log queries on the blockchain.")
+    # Add MetaMask connector and get connection info
+    metamask_info = metamask_connector()
+    # Display MetaMask connection status
+    if metamask_info and metamask_info.get("connected"):
+        st.success(f"✅ MetaMask Connected: {metamask_info.get('address')}")
+        st.info(f"Network: {metamask_info.get('network_name')}")
+        st.session_state.metamask_connected = True
+    else:
+        st.warning("⚠️ MetaMask not connected. Please connect your wallet to use blockchain features.")
+        st.session_state.metamask_connected = False
+    # Update RAG system with MetaMask connection if needed
+    if st.session_state.rag and metamask_info:
+        is_connected = st.session_state.rag.update_blockchain_connection(metamask_info)
+        if is_connected:
+            st.success("RAG system updated with MetaMask connection")
+    # Sidebar for configuration and file upload
     with st.sidebar:
+        st.header("⚙️ Configuration")
         # GPU Detection
         gpu_available = torch.cuda.is_available()
         if gpu_available:
+            try:
+                gpu_info = torch.cuda.get_device_properties(0)
+                st.success(f"GPU detected: {gpu_info.name} ({gpu_info.total_memory / 1024**3:.1f} GB)")
+            except Exception as e:
+                st.warning(f"GPU detected but couldn't get properties: {str(e)}")
+                st.info("Running with limited GPU information")
         else:
+            st.warning("No GPU detected. Running in CPU mode.")
+        # Model selection
+        llm_model = st.selectbox(
             "LLM Model",
             options=[
+                "mistralai/Mistral-7B-Instruct-v0.2",
                 "google/flan-t5-base",
+                "tiiuae/falcon-7b-instruct"
             ],
             index=0
         )
+        embedding_model = st.selectbox(
             "Embedding Model",
             options=[
+                "sentence-transformers/all-mpnet-base-v2",
                 "sentence-transformers/all-MiniLM-L6-v2",
+                "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
             ],
+            index=1  # all-MiniLM-L6-v2 is smaller and faster
         )
+        use_gpu = st.checkbox("Use GPU Acceleration", value=gpu_available)
+        # Blockchain configuration
+        st.header("🔗 Blockchain Configuration")
+        use_blockchain = st.checkbox("Enable Blockchain Verification", value=True)
+        if use_blockchain:
+            contract_address = st.text_input("Contract Address",
+                                            value="0x0000000000000000000000000000000000000000")
+            # Display MetaMask connection status in sidebar
+            if metamask_info and metamask_info.get("connected"):
+                st.success(f"✅ MetaMask Connected: {metamask_info.get('address')[:10]}...")
+            else:
+                st.warning("⚠️ MetaMask not connected. Please connect your wallet above.")
+            if not contract_address or contract_address == "0x0000000000000000000000000000000000000000":
+                st.error("Please deploy the contract and enter its address")
+        # Advanced options
+        with st.expander("Advanced Options"):
+            chunk_size = st.slider("Chunk Size", 100, 2000, 1000)
+            chunk_overlap = st.slider("Chunk Overlap", 0, 500, 200)
         # Initialize button
+        if st.button("Initialize System"):
+            with st.spinner("Initializing RAG system..."):
+                if use_blockchain and not contract_address:
+                    st.error("Contract address is required for blockchain integration")
+                else:
+                    st.session_state.rag = BlockchainEnabledRAG(
+                        llm_model_name=llm_model,
+                        embedding_model_name=embedding_model,
+                        chunk_size=chunk_size,
+                        chunk_overlap=chunk_overlap,
+                        use_gpu=use_gpu and gpu_available,
+                        use_blockchain=use_blockchain,
+                        contract_address=contract_address if use_blockchain else None
+                    )
+                    # Update with current MetaMask connection if available
+                    if use_blockchain and metamask_info:
+                        st.session_state.rag.update_blockchain_connection(metamask_info)
+                    st.success(f"System initialized with {embedding_model} on {st.session_state.rag.device}")
+                    if use_blockchain:
+                        if metamask_info and metamask_info.get("connected"):
+                            st.success("Blockchain verification enabled with MetaMask")
+                        else:
+                            st.warning("Blockchain verification enabled but MetaMask not connected")
+        st.header("📄 Upload Documents")
+        uploaded_files = st.file_uploader("Select PDFs", type="pdf", accept_multiple_files=True)
+        if uploaded_files and st.button("Process PDFs"):
             if not st.session_state.rag:
+                with st.spinner("Initializing RAG system..."):
+                    st.session_state.rag = BlockchainEnabledRAG(
                         llm_model_name=llm_model,
                         embedding_model_name=embedding_model,
+                        chunk_size=chunk_size,
+                        chunk_overlap=chunk_overlap,
                         use_gpu=use_gpu and gpu_available,
                         use_blockchain=use_blockchain,
                         contract_address=contract_address if use_blockchain else None
                     )
+                    # Update with current MetaMask connection if available
+                    if use_blockchain and metamask_info:
+                        st.session_state.rag.update_blockchain_connection(metamask_info)
             success = st.session_state.rag.process_pdfs(uploaded_files)
             if success:
+                metrics = st.session_state.rag.get_performance_metrics()
+                if metrics:
+                    st.success("PDFs processed successfully!")
+                    with st.expander("💹 Performance Metrics"):
+                        st.markdown(f"**Documents processed:** {metrics['documents_processed']} chunks")
+                        st.markdown(f"**Index building time:** {metrics['index_building_time']:.2f} seconds")
+                        st.markdown(f"**Total processing time:** {metrics['total_processing_time']:.2f} seconds")
+                        st.markdown(f"**Memory used:** {metrics['memory_used_gb']:.2f} GB")
+                        st.markdown(f"**Device used:** {metrics['device']}")
+                        st.markdown(f"**Blockchain verification:** {'Enabled' if metrics['blockchain_enabled'] else 'Disabled'}")
+                        st.markdown(f"**Blockchain connected:** {'Yes' if metrics.get('blockchain_connected') else 'No'}")
+    # Blockchain verification info
+    if st.session_state.rag and st.session_state.rag.use_blockchain:
+        if st.session_state.metamask_connected:
+            st.info("🔗 Blockchain verification is enabled with MetaMask. Documents are cryptographically verified and queries are logged with immutable audit trail.")
+        else:
+            st.warning("🔗 Blockchain verification is enabled but MetaMask is not connected. Please connect your MetaMask wallet to use blockchain features.")
+    # Display chat messages
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            if message["role"] == "user":
+                st.markdown(message["content"])
+            else:
+                if isinstance(message["content"], dict):
+                    st.markdown(message["content"]["answer"])
+                    if "query_time" in message["content"]:
+                        st.caption(f"Response time: {message['content']['query_time']:.2f} seconds")
+                    # Display blockchain log if available
+                    if "blockchain_log" in message["content"] and message["content"]["blockchain_log"]:
+                        blockchain_log = message["content"]["blockchain_log"]
+                        st.success(f"✅ Query logged on blockchain | Transaction: {blockchain_log['tx_hash'][:10]}...")
+                    # Display sources in expander
+                    if "sources" in message["content"] and message["content"]["sources"]:
+                        with st.expander("📄 View Sources"):
+                            for i, source in enumerate(message["content"]["sources"]):
+                                st.markdown(f"**Source {i+1}: {source['source']}**")
+                                # Show blockchain verification if available
+                                if source.get("blockchain"):
+                                    st.success(f"✅ Verified on blockchain | TX: {source['blockchain']['tx_hash'][:10]}...")
+                                st.text(source["content"])
+                                st.divider()
+                else:
+                    st.markdown(message["content"])
+    # Chat input
+    if prompt := st.chat_input("Ask a question about your PDFs..."):
+        # Add user message to chat
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        # Display user message
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        # Check if system is initialized
+        if not st.session_state.rag:
+            with st.chat_message("assistant"):
+                message = "Please initialize the system and process PDFs first."
+                st.markdown(message)
+                st.session_state.messages.append({"role": "assistant", "content": message})
+        # Get response if vector store is ready
+        elif st.session_state.rag.vector_store:
+            with st.chat_message("assistant"):
+                response = st.session_state.rag.ask(prompt)
+                st.session_state.messages.append({"role": "assistant", "content": response})
+                if isinstance(response, dict):
+                    st.markdown(response["answer"])
+                    if "query_time" in response:
+                        st.caption(f"Response time: {response['query_time']:.2f} seconds")
+                    # Display blockchain log if available
+                    if "blockchain_log" in response and response["blockchain_log"]:
+                        blockchain_log = response["blockchain_log"]
+                        st.success(f"✅ Query logged on blockchain | Transaction: {blockchain_log['tx_hash'][:10]}...")
+                    # Display sources in expander
+                    if "sources" in response and response["sources"]:
+                        with st.expander("📄 View Sources"):
+                            for i, source in enumerate(response["sources"]):
+                                st.markdown(f"**Source {i+1}: {source['source']}**")
+                                # Show blockchain verification if available
+                                if source.get("blockchain"):
+                                    st.success(f"✅ Verified on blockchain | TX: {source['blockchain']['tx_hash'][:10]}...")
+                                st.text(source["content"])
+                                st.divider()
+                else:
+                    st.markdown(response)
         else:
+            with st.chat_message("assistant"):
+                message = "Please upload and process PDF files first."
+                st.markdown(message)
+                st.session_state.messages.append({"role": "assistant", "content": message})
 # Main entry point
 if __name__ == "__main__":