vikramronavrsc commited on
Commit
06709d1
Β·
verified Β·
1 Parent(s): 635881c

Upload 2 files

Browse files
Files changed (2) hide show
  1. main-metamask.py +631 -0
  2. metamask-requirements.txt +13 -0
main-metamask.py ADDED
@@ -0,0 +1,631 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main_metamask.py
2
+ import os
3
+ import tempfile
4
+ import shutil
5
+ import PyPDF2
6
+ import streamlit as st
7
+ import torch
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain_community.llms import HuggingFaceHub
10
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from langchain_community.vectorstores import FAISS
12
+ from langchain.chains import RetrievalQA
13
+ from langchain.docstore.document import Document
14
+ from langchain.prompts import PromptTemplate
15
+ import time
16
+ import psutil
17
+ import uuid
18
+ import atexit
19
+ from blockchain_utils_metamask import BlockchainManagerMetaMask
20
+ from metamask_component import metamask_connector
21
+
22
+
23
+ class BlockchainEnabledRAG:
24
+ def __init__(self,
25
+ llm_model_name="mistralai/Mistral-7B-Instruct-v0.2",
26
+ embedding_model_name="sentence-transformers/all-MiniLM-L6-v2",
27
+ chunk_size=1000,
28
+ chunk_overlap=200,
29
+ use_gpu=True,
30
+ use_blockchain=False,
31
+ contract_address=None):
32
+ """
33
+ Initialize the GPU-efficient RAG system with MetaMask blockchain integration.
34
+
35
+ Args:
36
+ llm_model_name: The HuggingFace model for text generation
37
+ embedding_model_name: The HuggingFace model for embeddings
38
+ chunk_size: Size of document chunks
39
+ chunk_overlap: Overlap between chunks
40
+ use_gpu: Whether to use GPU acceleration
41
+ use_blockchain: Whether to enable blockchain verification
42
+ contract_address: Address of the deployed RAG Document Verifier contract
43
+ """
44
+ self.llm_model_name = llm_model_name
45
+ self.embedding_model_name = embedding_model_name
46
+ self.use_gpu = use_gpu and torch.cuda.is_available()
47
+ self.use_blockchain = use_blockchain
48
+
49
+ # Device selection for embeddings
50
+ self.device = "cuda" if self.use_gpu else "cpu"
51
+ st.sidebar.info(f"Using device: {self.device}")
52
+
53
+ # Initialize text splitter
54
+ self.text_splitter = RecursiveCharacterTextSplitter(
55
+ chunk_size=chunk_size,
56
+ chunk_overlap=chunk_overlap,
57
+ length_function=len,
58
+ )
59
+
60
+ # Initialize embeddings model
61
+ self.embeddings = HuggingFaceEmbeddings(
62
+ model_name=embedding_model_name,
63
+ model_kwargs={"device": self.device}
64
+ )
65
+
66
+ # Initialize LLM using HuggingFaceHub instead of Ollama
67
+ try:
68
+ # Use HF_TOKEN from environment variables
69
+ hf_token = os.environ.get("HF_TOKEN")
70
+ if not hf_token:
71
+ st.warning("No HuggingFace token found. Using model without authentication.")
72
+
73
+ self.llm = HuggingFaceHub(
74
+ repo_id=llm_model_name,
75
+ huggingfacehub_api_token=hf_token,
76
+ model_kwargs={"temperature": 0.7, "max_length": 1024}
77
+ )
78
+ except Exception as e:
79
+ st.error(f"Error initializing LLM: {str(e)}")
80
+ st.info("Trying to initialize with default model...")
81
+ # Fallback to a smaller model
82
+ self.llm = HuggingFaceHub(
83
+ repo_id="google/flan-t5-small",
84
+ model_kwargs={"temperature": 0.7, "max_length": 512}
85
+ )
86
+
87
+ # Initialize vector store
88
+ self.vector_store = None
89
+ self.documents_processed = 0
90
+
91
+ # Monitoring stats
92
+ self.processing_times = {}
93
+
94
+ # Initialize blockchain manager if enabled
95
+ self.blockchain = None
96
+ if use_blockchain:
97
+ try:
98
+ self.blockchain = BlockchainManagerMetaMask(
99
+ contract_address=contract_address
100
+ )
101
+ st.sidebar.success("Blockchain manager initialized. Please connect MetaMask to continue.")
102
+ except Exception as e:
103
+ st.sidebar.error(f"Failed to initialize blockchain manager: {str(e)}")
104
+ self.use_blockchain = False
105
+
106
+ def update_blockchain_connection(self, metamask_info):
107
+ """Update blockchain connection with MetaMask info."""
108
+ if self.blockchain and metamask_info:
109
+ self.blockchain.update_connection(
110
+ is_connected=metamask_info.get("connected", False),
111
+ user_address=metamask_info.get("address"),
112
+ network_id=metamask_info.get("network_id")
113
+ )
114
+ return self.blockchain.is_connected
115
+ return False
116
+
117
+ def process_pdfs(self, pdf_files):
118
+ """Process PDF files, create a vector store, and verify documents on blockchain."""
119
+ all_docs = []
120
+
121
+ with st.status("Processing PDF files...") as status:
122
+ # Create temporary directory for file storage
123
+ temp_dir = tempfile.mkdtemp()
124
+ st.session_state['temp_dir'] = temp_dir
125
+
126
+ # Monitor processing time and memory usage
127
+ start_time = time.time()
128
+
129
+ # Track memory before processing
130
+ mem_before = psutil.virtual_memory().used / (1024 * 1024 * 1024) # GB
131
+
132
+ # Process each PDF file
133
+ for i, pdf_file in enumerate(pdf_files):
134
+ try:
135
+ file_start_time = time.time()
136
+
137
+ # Save uploaded file to temp directory
138
+ pdf_path = os.path.join(temp_dir, pdf_file.name)
139
+ with open(pdf_path, "wb") as f:
140
+ f.write(pdf_file.getbuffer())
141
+
142
+ status.update(label=f"Processing {pdf_file.name} ({i+1}/{len(pdf_files)})...")
143
+
144
+ # Extract text from PDF
145
+ text = ""
146
+ with open(pdf_path, "rb") as f:
147
+ pdf = PyPDF2.PdfReader(f)
148
+ for page_num in range(len(pdf.pages)):
149
+ page = pdf.pages[page_num]
150
+ page_text = page.extract_text()
151
+ if page_text:
152
+ text += page_text + "\n\n"
153
+
154
+ # Create documents
155
+ docs = [Document(page_content=text, metadata={"source": pdf_file.name})]
156
+
157
+ # Split documents into chunks
158
+ split_docs = self.text_splitter.split_documents(docs)
159
+
160
+ all_docs.extend(split_docs)
161
+
162
+ # Verify document on blockchain if enabled and connected
163
+ if self.use_blockchain and self.blockchain and self.blockchain.is_connected:
164
+ try:
165
+ # Create a unique document ID
166
+ document_id = f"{pdf_file.name}_{uuid.uuid4().hex[:8]}"
167
+
168
+ # Verify document on blockchain
169
+ status.update(label=f"Verifying {pdf_file.name} on blockchain...")
170
+ verification = self.blockchain.verify_document(document_id, pdf_path)
171
+
172
+ if verification.get('status'): # Success
173
+ st.sidebar.success(f"βœ… {pdf_file.name} verified on blockchain")
174
+ if 'tx_hash' in verification:
175
+ st.sidebar.info(f"Transaction: {verification['tx_hash'][:10]}...")
176
+
177
+ # Add blockchain metadata to documents
178
+ for doc in split_docs:
179
+ doc.metadata["blockchain"] = {
180
+ "verified": True,
181
+ "document_id": document_id,
182
+ "document_hash": verification.get("document_hash", ""),
183
+ "tx_hash": verification.get("tx_hash", ""),
184
+ "block_number": verification.get("block_number", 0)
185
+ }
186
+ else:
187
+ st.sidebar.warning(f"❌ Failed to verify {pdf_file.name} on blockchain")
188
+ if 'error' in verification:
189
+ st.sidebar.error(f"Error: {verification['error']}")
190
+ except Exception as e:
191
+ st.sidebar.error(f"Blockchain verification error: {str(e)}")
192
+ elif self.use_blockchain:
193
+ st.sidebar.warning("MetaMask not connected. Document not verified on blockchain.")
194
+
195
+ file_end_time = time.time()
196
+ processing_time = file_end_time - file_start_time
197
+
198
+ st.sidebar.success(f"Processed {pdf_file.name}: {len(split_docs)} chunks in {processing_time:.2f}s")
199
+ self.processing_times[pdf_file.name] = {
200
+ "chunks": len(split_docs),
201
+ "time": processing_time
202
+ }
203
+
204
+ except Exception as e:
205
+ st.sidebar.error(f"Error processing {pdf_file.name}: {str(e)}")
206
+
207
+ # Create vector store if we have documents
208
+ if all_docs:
209
+ status.update(label="Building vector index...")
210
+ try:
211
+ # Record the time taken to build the index
212
+ index_start_time = time.time()
213
+
214
+ # Create the vector store using FAISS
215
+ self.vector_store = FAISS.from_documents(all_docs, self.embeddings)
216
+
217
+ index_end_time = time.time()
218
+ index_time = index_end_time - index_start_time
219
+
220
+ # Track memory after processing
221
+ mem_after = psutil.virtual_memory().used / (1024 * 1024 * 1024) # GB
222
+ mem_used = mem_after - mem_before
223
+
224
+ total_time = time.time() - start_time
225
+
226
+ status.update(label=f"Completed processing {len(all_docs)} chunks in {total_time:.2f}s", state="complete")
227
+
228
+ # Save performance metrics
229
+ self.processing_times["index_building"] = index_time
230
+ self.processing_times["total_time"] = total_time
231
+ self.processing_times["memory_used_gb"] = mem_used
232
+ self.documents_processed = len(all_docs)
233
+
234
+ return True
235
+ except Exception as e:
236
+ st.error(f"Error creating vector store: {str(e)}")
237
+ status.update(label="Error creating vector store", state="error")
238
+ return False
239
+ else:
240
+ status.update(label="No content extracted from PDFs", state="error")
241
+ return False
242
+
243
+ def ask(self, query):
244
+ """Ask a question and get an answer based on the PDFs with blockchain logging."""
245
+ if not self.vector_store:
246
+ return "Please upload and process PDF files first."
247
+
248
+ try:
249
+ # Custom prompt
250
+ prompt_template = """
251
+ You are an AI assistant that provides accurate information based on PDF documents.
252
+
253
+ Use the following context to answer the question. Be detailed and precise in your answer.
254
+ If the answer is not in the context, say "I don't have enough information to answer this question."
255
+
256
+ Context:
257
+ {context}
258
+
259
+ Question: {question}
260
+
261
+ Answer:
262
+ """
263
+ PROMPT = PromptTemplate(
264
+ template=prompt_template,
265
+ input_variables=["context", "question"]
266
+ )
267
+
268
+ # Start timing the query
269
+ query_start_time = time.time()
270
+
271
+ # Create QA chain
272
+ chain_type_kwargs = {"prompt": PROMPT}
273
+ qa = RetrievalQA.from_chain_type(
274
+ llm=self.llm,
275
+ chain_type="stuff",
276
+ retriever=self.vector_store.as_retriever(search_kwargs={"k": 4}),
277
+ chain_type_kwargs=chain_type_kwargs,
278
+ return_source_documents=True
279
+ )
280
+
281
+ # Get answer
282
+ with st.status("Searching documents and generating answer..."):
283
+ response = qa({"query": query})
284
+
285
+ answer = response["result"]
286
+ source_docs = response["source_documents"]
287
+
288
+ # Calculate query time
289
+ query_time = time.time() - query_start_time
290
+
291
+ # Format sources
292
+ sources = []
293
+ for i, doc in enumerate(source_docs):
294
+ # Extract blockchain verification info if available
295
+ blockchain_info = None
296
+ if "blockchain" in doc.metadata:
297
+ blockchain_info = {
298
+ "verified": doc.metadata["blockchain"]["verified"],
299
+ "document_id": doc.metadata["blockchain"]["document_id"],
300
+ "tx_hash": doc.metadata["blockchain"]["tx_hash"]
301
+ }
302
+
303
+ sources.append({
304
+ "content": doc.page_content[:300] + "..." if len(doc.page_content) > 300 else doc.page_content,
305
+ "source": doc.metadata.get("source", "Unknown"),
306
+ "blockchain": blockchain_info
307
+ })
308
+
309
+ # Log query to blockchain if enabled and connected
310
+ blockchain_log = None
311
+ if self.use_blockchain and self.blockchain and self.blockchain.is_connected:
312
+ try:
313
+ with st.status("Logging query to blockchain..."):
314
+ log_result = self.blockchain.log_query(query, answer)
315
+
316
+ if log_result.get("status"): # Success
317
+ blockchain_log = {
318
+ "logged": True,
319
+ "query_id": log_result.get("query_id", ""),
320
+ "tx_hash": log_result.get("tx_hash", ""),
321
+ "block_number": log_result.get("block_number", 0)
322
+ }
323
+ else:
324
+ st.error(f"Error logging to blockchain: {log_result.get('error', 'Unknown error')}")
325
+ except Exception as e:
326
+ st.error(f"Error logging to blockchain: {str(e)}")
327
+
328
+ return {
329
+ "answer": answer,
330
+ "sources": sources,
331
+ "query_time": query_time,
332
+ "blockchain_log": blockchain_log
333
+ }
334
+
335
+ except Exception as e:
336
+ st.error(f"Error generating answer: {str(e)}")
337
+ return f"Error: {str(e)}"
338
+
339
+ def get_performance_metrics(self):
340
+ """Return performance metrics for the RAG system."""
341
+ if not self.processing_times:
342
+ return None
343
+
344
+ return {
345
+ "documents_processed": self.documents_processed,
346
+ "index_building_time": self.processing_times.get("index_building", 0),
347
+ "total_processing_time": self.processing_times.get("total_time", 0),
348
+ "memory_used_gb": self.processing_times.get("memory_used_gb", 0),
349
+ "device": self.device,
350
+ "embedding_model": self.embedding_model_name,
351
+ "blockchain_enabled": self.use_blockchain,
352
+ "blockchain_connected": self.blockchain.is_connected if self.blockchain else False
353
+ }
354
+
355
+
356
+ # Helper function to initialize session state
357
+ def initialize_session_state():
358
+ """Initialize Streamlit session state variables."""
359
+ if "rag" not in st.session_state:
360
+ st.session_state.rag = None
361
+ if "messages" not in st.session_state:
362
+ st.session_state.messages = []
363
+ if "temp_dir" not in st.session_state:
364
+ st.session_state.temp_dir = None
365
+ if "metamask_connected" not in st.session_state:
366
+ st.session_state.metamask_connected = False
367
+
368
+ # Helper function to clean up temporary files
369
+ def cleanup_temp_files():
370
+ """Clean up temporary files when application exits."""
371
+ if st.session_state.get('temp_dir') and os.path.exists(st.session_state.temp_dir):
372
+ try:
373
+ shutil.rmtree(st.session_state.temp_dir)
374
+ print(f"Cleaned up temporary directory: {st.session_state.temp_dir}")
375
+ except Exception as e:
376
+ print(f"Error cleaning up temporary directory: {e}")
377
+
378
+
379
+ # Streamlit UI
380
+ def main():
381
+ st.set_page_config(page_title="Blockchain-Enabled RAG System", layout="wide")
382
+
383
+ st.title("πŸš€ GPU-Accelerated PDF Question Answering with MetaMask Blockchain Verification")
384
+ st.markdown("Upload PDFs, verify them on blockchain with MetaMask, and ask questions with audit log")
385
+
386
+ # Initialize session state
387
+ initialize_session_state()
388
+
389
+ # MetaMask Connection Section
390
+ st.header("🦊 MetaMask Connection")
391
+ st.markdown("Connect your MetaMask wallet to verify documents and log queries on the blockchain.")
392
+
393
+ # Add MetaMask connector and get connection info
394
+ metamask_info = metamask_connector()
395
+
396
+ # Display MetaMask connection status
397
+ if metamask_info and metamask_info.get("connected"):
398
+ st.success(f"βœ… MetaMask Connected: {metamask_info.get('address')}")
399
+ st.info(f"Network: {metamask_info.get('network_name')}")
400
+ st.session_state.metamask_connected = True
401
+ else:
402
+ st.warning("⚠️ MetaMask not connected. Please connect your wallet to use blockchain features.")
403
+ st.session_state.metamask_connected = False
404
+
405
+ # Update RAG system with MetaMask connection if needed
406
+ if st.session_state.rag and metamask_info:
407
+ is_connected = st.session_state.rag.update_blockchain_connection(metamask_info)
408
+ if is_connected:
409
+ st.success("RAG system updated with MetaMask connection")
410
+
411
+ # Sidebar for configuration and file upload
412
+ with st.sidebar:
413
+ st.header("βš™οΈ Configuration")
414
+
415
+ # GPU Detection
416
+ gpu_available = torch.cuda.is_available()
417
+ if gpu_available:
418
+ try:
419
+ gpu_info = torch.cuda.get_device_properties(0)
420
+ st.success(f"GPU detected: {gpu_info.name} ({gpu_info.total_memory / 1024**3:.1f} GB)")
421
+ except Exception as e:
422
+ st.warning(f"GPU detected but couldn't get properties: {str(e)}")
423
+ st.info("Running with limited GPU information")
424
+ else:
425
+ st.warning("No GPU detected. Running in CPU mode.")
426
+
427
+ # Model selection
428
+ llm_model = st.selectbox(
429
+ "LLM Model",
430
+ options=[
431
+ "mistralai/Mistral-7B-Instruct-v0.2",
432
+ "google/flan-t5-base",
433
+ "tiiuae/falcon-7b-instruct"
434
+ ],
435
+ index=0
436
+ )
437
+
438
+ embedding_model = st.selectbox(
439
+ "Embedding Model",
440
+ options=[
441
+ "sentence-transformers/all-mpnet-base-v2",
442
+ "sentence-transformers/all-MiniLM-L6-v2",
443
+ "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
444
+ ],
445
+ index=1 # all-MiniLM-L6-v2 is smaller and faster
446
+ )
447
+
448
+ use_gpu = st.checkbox("Use GPU Acceleration", value=gpu_available)
449
+
450
+ # Blockchain configuration
451
+ st.header("πŸ”— Blockchain Configuration")
452
+ use_blockchain = st.checkbox("Enable Blockchain Verification", value=True)
453
+
454
+ if use_blockchain:
455
+ contract_address = st.text_input("Contract Address",
456
+ value="0x0000000000000000000000000000000000000000")
457
+
458
+ # Display MetaMask connection status in sidebar
459
+ if metamask_info and metamask_info.get("connected"):
460
+ st.success(f"βœ… MetaMask Connected: {metamask_info.get('address')[:10]}...")
461
+ else:
462
+ st.warning("⚠️ MetaMask not connected. Please connect your wallet above.")
463
+
464
+ if not contract_address or contract_address == "0x0000000000000000000000000000000000000000":
465
+ st.error("Please deploy the contract and enter its address")
466
+
467
+ # Advanced options
468
+ with st.expander("Advanced Options"):
469
+ chunk_size = st.slider("Chunk Size", 100, 2000, 1000)
470
+ chunk_overlap = st.slider("Chunk Overlap", 0, 500, 200)
471
+
472
+ # Initialize button
473
+ if st.button("Initialize System"):
474
+ with st.spinner("Initializing RAG system..."):
475
+ if use_blockchain and not contract_address:
476
+ st.error("Contract address is required for blockchain integration")
477
+ else:
478
+ st.session_state.rag = BlockchainEnabledRAG(
479
+ llm_model_name=llm_model,
480
+ embedding_model_name=embedding_model,
481
+ chunk_size=chunk_size,
482
+ chunk_overlap=chunk_overlap,
483
+ use_gpu=use_gpu and gpu_available,
484
+ use_blockchain=use_blockchain,
485
+ contract_address=contract_address if use_blockchain else None
486
+ )
487
+
488
+ # Update with current MetaMask connection if available
489
+ if use_blockchain and metamask_info:
490
+ st.session_state.rag.update_blockchain_connection(metamask_info)
491
+
492
+ st.success(f"System initialized with {embedding_model} on {st.session_state.rag.device}")
493
+ if use_blockchain:
494
+ if metamask_info and metamask_info.get("connected"):
495
+ st.success("Blockchain verification enabled with MetaMask")
496
+ else:
497
+ st.warning("Blockchain verification enabled but MetaMask not connected")
498
+
499
+ st.header("πŸ“„ Upload Documents")
500
+ uploaded_files = st.file_uploader("Select PDFs", type="pdf", accept_multiple_files=True)
501
+
502
+ if uploaded_files and st.button("Process PDFs"):
503
+ if not st.session_state.rag:
504
+ with st.spinner("Initializing RAG system..."):
505
+ st.session_state.rag = BlockchainEnabledRAG(
506
+ llm_model_name=llm_model,
507
+ embedding_model_name=embedding_model,
508
+ chunk_size=chunk_size,
509
+ chunk_overlap=chunk_overlap,
510
+ use_gpu=use_gpu and gpu_available,
511
+ use_blockchain=use_blockchain,
512
+ contract_address=contract_address if use_blockchain else None
513
+ )
514
+
515
+ # Update with current MetaMask connection if available
516
+ if use_blockchain and metamask_info:
517
+ st.session_state.rag.update_blockchain_connection(metamask_info)
518
+
519
+ success = st.session_state.rag.process_pdfs(uploaded_files)
520
+ if success:
521
+ metrics = st.session_state.rag.get_performance_metrics()
522
+ if metrics:
523
+ st.success("PDFs processed successfully!")
524
+ with st.expander("πŸ’Ή Performance Metrics"):
525
+ st.markdown(f"**Documents processed:** {metrics['documents_processed']} chunks")
526
+ st.markdown(f"**Index building time:** {metrics['index_building_time']:.2f} seconds")
527
+ st.markdown(f"**Total processing time:** {metrics['total_processing_time']:.2f} seconds")
528
+ st.markdown(f"**Memory used:** {metrics['memory_used_gb']:.2f} GB")
529
+ st.markdown(f"**Device used:** {metrics['device']}")
530
+ st.markdown(f"**Blockchain verification:** {'Enabled' if metrics['blockchain_enabled'] else 'Disabled'}")
531
+ st.markdown(f"**Blockchain connected:** {'Yes' if metrics.get('blockchain_connected') else 'No'}")
532
+
533
+ # Blockchain verification info
534
+ if st.session_state.rag and st.session_state.rag.use_blockchain:
535
+ if st.session_state.metamask_connected:
536
+ st.info("πŸ”— Blockchain verification is enabled with MetaMask. Documents are cryptographically verified and queries are logged with immutable audit trail.")
537
+ else:
538
+ st.warning("πŸ”— Blockchain verification is enabled but MetaMask is not connected. Please connect your MetaMask wallet to use blockchain features.")
539
+
540
+ # Display chat messages
541
+ for message in st.session_state.messages:
542
+ with st.chat_message(message["role"]):
543
+ if message["role"] == "user":
544
+ st.markdown(message["content"])
545
+ else:
546
+ if isinstance(message["content"], dict):
547
+ st.markdown(message["content"]["answer"])
548
+
549
+ if "query_time" in message["content"]:
550
+ st.caption(f"Response time: {message['content']['query_time']:.2f} seconds")
551
+
552
+ # Display blockchain log if available
553
+ if "blockchain_log" in message["content"] and message["content"]["blockchain_log"]:
554
+ blockchain_log = message["content"]["blockchain_log"]
555
+ st.success(f"βœ… Query logged on blockchain | Transaction: {blockchain_log['tx_hash'][:10]}...")
556
+
557
+ # Display sources in expander
558
+ if "sources" in message["content"] and message["content"]["sources"]:
559
+ with st.expander("πŸ“„ View Sources"):
560
+ for i, source in enumerate(message["content"]["sources"]):
561
+ st.markdown(f"**Source {i+1}: {source['source']}**")
562
+
563
+ # Show blockchain verification if available
564
+ if source.get("blockchain"):
565
+ st.success(f"βœ… Verified on blockchain | TX: {source['blockchain']['tx_hash'][:10]}...")
566
+
567
+ st.text(source["content"])
568
+ st.divider()
569
+ else:
570
+ st.markdown(message["content"])
571
+
572
+ # Chat input
573
+ if prompt := st.chat_input("Ask a question about your PDFs..."):
574
+ # Add user message to chat
575
+ st.session_state.messages.append({"role": "user", "content": prompt})
576
+
577
+ # Display user message
578
+ with st.chat_message("user"):
579
+ st.markdown(prompt)
580
+
581
+ # Check if system is initialized
582
+ if not st.session_state.rag:
583
+ with st.chat_message("assistant"):
584
+ message = "Please initialize the system and process PDFs first."
585
+ st.markdown(message)
586
+ st.session_state.messages.append({"role": "assistant", "content": message})
587
+
588
+ # Get response if vector store is ready
589
+ elif st.session_state.rag.vector_store:
590
+ with st.chat_message("assistant"):
591
+ response = st.session_state.rag.ask(prompt)
592
+ st.session_state.messages.append({"role": "assistant", "content": response})
593
+
594
+ if isinstance(response, dict):
595
+ st.markdown(response["answer"])
596
+
597
+ if "query_time" in response:
598
+ st.caption(f"Response time: {response['query_time']:.2f} seconds")
599
+
600
+ # Display blockchain log if available
601
+ if "blockchain_log" in response and response["blockchain_log"]:
602
+ blockchain_log = response["blockchain_log"]
603
+ st.success(f"βœ… Query logged on blockchain | Transaction: {blockchain_log['tx_hash'][:10]}...")
604
+
605
+ # Display sources in expander
606
+ if "sources" in response and response["sources"]:
607
+ with st.expander("πŸ“„ View Sources"):
608
+ for i, source in enumerate(response["sources"]):
609
+ st.markdown(f"**Source {i+1}: {source['source']}**")
610
+
611
+ # Show blockchain verification if available
612
+ if source.get("blockchain"):
613
+ st.success(f"βœ… Verified on blockchain | TX: {source['blockchain']['tx_hash'][:10]}...")
614
+
615
+ st.text(source["content"])
616
+ st.divider()
617
+ else:
618
+ st.markdown(response)
619
+ else:
620
+ with st.chat_message("assistant"):
621
+ message = "Please upload and process PDF files first."
622
+ st.markdown(message)
623
+ st.session_state.messages.append({"role": "assistant", "content": message})
624
+
625
+
626
+ # Main entry point
627
+ if __name__ == "__main__":
628
+ # Register cleanup function
629
+ atexit.register(cleanup_temp_files)
630
+
631
+ main()
metamask-requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit>=1.30.0
2
+ langchain>=0.0.292
3
+ langchain-huggingface>=0.0.3
4
+ langchain-community>=0.0.3
5
+ python-dotenv>=1.0.0
6
+ transformers>=4.33.3
7
+ faiss-cpu>=1.7.4
8
+ sentence-transformers>=2.2.2
9
+ PyPDF2>=3.0.1
10
+ psutil>=5.9.5
11
+ web3>=6.10.0
12
+ uuid>=1.30
13
+ huggingface-hub>=0.17.3