Upload 2 files
Browse files- main-metamask.py +631 -0
- metamask-requirements.txt +13 -0
main-metamask.py
ADDED
@@ -0,0 +1,631 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# main_metamask.py
|
2 |
+
import os
|
3 |
+
import tempfile
|
4 |
+
import shutil
|
5 |
+
import PyPDF2
|
6 |
+
import streamlit as st
|
7 |
+
import torch
|
8 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
9 |
+
from langchain_community.llms import HuggingFaceHub
|
10 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
11 |
+
from langchain_community.vectorstores import FAISS
|
12 |
+
from langchain.chains import RetrievalQA
|
13 |
+
from langchain.docstore.document import Document
|
14 |
+
from langchain.prompts import PromptTemplate
|
15 |
+
import time
|
16 |
+
import psutil
|
17 |
+
import uuid
|
18 |
+
import atexit
|
19 |
+
from blockchain_utils_metamask import BlockchainManagerMetaMask
|
20 |
+
from metamask_component import metamask_connector
|
21 |
+
|
22 |
+
|
23 |
+
class BlockchainEnabledRAG:
|
24 |
+
def __init__(self,
|
25 |
+
llm_model_name="mistralai/Mistral-7B-Instruct-v0.2",
|
26 |
+
embedding_model_name="sentence-transformers/all-MiniLM-L6-v2",
|
27 |
+
chunk_size=1000,
|
28 |
+
chunk_overlap=200,
|
29 |
+
use_gpu=True,
|
30 |
+
use_blockchain=False,
|
31 |
+
contract_address=None):
|
32 |
+
"""
|
33 |
+
Initialize the GPU-efficient RAG system with MetaMask blockchain integration.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
llm_model_name: The HuggingFace model for text generation
|
37 |
+
embedding_model_name: The HuggingFace model for embeddings
|
38 |
+
chunk_size: Size of document chunks
|
39 |
+
chunk_overlap: Overlap between chunks
|
40 |
+
use_gpu: Whether to use GPU acceleration
|
41 |
+
use_blockchain: Whether to enable blockchain verification
|
42 |
+
contract_address: Address of the deployed RAG Document Verifier contract
|
43 |
+
"""
|
44 |
+
self.llm_model_name = llm_model_name
|
45 |
+
self.embedding_model_name = embedding_model_name
|
46 |
+
self.use_gpu = use_gpu and torch.cuda.is_available()
|
47 |
+
self.use_blockchain = use_blockchain
|
48 |
+
|
49 |
+
# Device selection for embeddings
|
50 |
+
self.device = "cuda" if self.use_gpu else "cpu"
|
51 |
+
st.sidebar.info(f"Using device: {self.device}")
|
52 |
+
|
53 |
+
# Initialize text splitter
|
54 |
+
self.text_splitter = RecursiveCharacterTextSplitter(
|
55 |
+
chunk_size=chunk_size,
|
56 |
+
chunk_overlap=chunk_overlap,
|
57 |
+
length_function=len,
|
58 |
+
)
|
59 |
+
|
60 |
+
# Initialize embeddings model
|
61 |
+
self.embeddings = HuggingFaceEmbeddings(
|
62 |
+
model_name=embedding_model_name,
|
63 |
+
model_kwargs={"device": self.device}
|
64 |
+
)
|
65 |
+
|
66 |
+
# Initialize LLM using HuggingFaceHub instead of Ollama
|
67 |
+
try:
|
68 |
+
# Use HF_TOKEN from environment variables
|
69 |
+
hf_token = os.environ.get("HF_TOKEN")
|
70 |
+
if not hf_token:
|
71 |
+
st.warning("No HuggingFace token found. Using model without authentication.")
|
72 |
+
|
73 |
+
self.llm = HuggingFaceHub(
|
74 |
+
repo_id=llm_model_name,
|
75 |
+
huggingfacehub_api_token=hf_token,
|
76 |
+
model_kwargs={"temperature": 0.7, "max_length": 1024}
|
77 |
+
)
|
78 |
+
except Exception as e:
|
79 |
+
st.error(f"Error initializing LLM: {str(e)}")
|
80 |
+
st.info("Trying to initialize with default model...")
|
81 |
+
# Fallback to a smaller model
|
82 |
+
self.llm = HuggingFaceHub(
|
83 |
+
repo_id="google/flan-t5-small",
|
84 |
+
model_kwargs={"temperature": 0.7, "max_length": 512}
|
85 |
+
)
|
86 |
+
|
87 |
+
# Initialize vector store
|
88 |
+
self.vector_store = None
|
89 |
+
self.documents_processed = 0
|
90 |
+
|
91 |
+
# Monitoring stats
|
92 |
+
self.processing_times = {}
|
93 |
+
|
94 |
+
# Initialize blockchain manager if enabled
|
95 |
+
self.blockchain = None
|
96 |
+
if use_blockchain:
|
97 |
+
try:
|
98 |
+
self.blockchain = BlockchainManagerMetaMask(
|
99 |
+
contract_address=contract_address
|
100 |
+
)
|
101 |
+
st.sidebar.success("Blockchain manager initialized. Please connect MetaMask to continue.")
|
102 |
+
except Exception as e:
|
103 |
+
st.sidebar.error(f"Failed to initialize blockchain manager: {str(e)}")
|
104 |
+
self.use_blockchain = False
|
105 |
+
|
106 |
+
def update_blockchain_connection(self, metamask_info):
|
107 |
+
"""Update blockchain connection with MetaMask info."""
|
108 |
+
if self.blockchain and metamask_info:
|
109 |
+
self.blockchain.update_connection(
|
110 |
+
is_connected=metamask_info.get("connected", False),
|
111 |
+
user_address=metamask_info.get("address"),
|
112 |
+
network_id=metamask_info.get("network_id")
|
113 |
+
)
|
114 |
+
return self.blockchain.is_connected
|
115 |
+
return False
|
116 |
+
|
117 |
+
def process_pdfs(self, pdf_files):
|
118 |
+
"""Process PDF files, create a vector store, and verify documents on blockchain."""
|
119 |
+
all_docs = []
|
120 |
+
|
121 |
+
with st.status("Processing PDF files...") as status:
|
122 |
+
# Create temporary directory for file storage
|
123 |
+
temp_dir = tempfile.mkdtemp()
|
124 |
+
st.session_state['temp_dir'] = temp_dir
|
125 |
+
|
126 |
+
# Monitor processing time and memory usage
|
127 |
+
start_time = time.time()
|
128 |
+
|
129 |
+
# Track memory before processing
|
130 |
+
mem_before = psutil.virtual_memory().used / (1024 * 1024 * 1024) # GB
|
131 |
+
|
132 |
+
# Process each PDF file
|
133 |
+
for i, pdf_file in enumerate(pdf_files):
|
134 |
+
try:
|
135 |
+
file_start_time = time.time()
|
136 |
+
|
137 |
+
# Save uploaded file to temp directory
|
138 |
+
pdf_path = os.path.join(temp_dir, pdf_file.name)
|
139 |
+
with open(pdf_path, "wb") as f:
|
140 |
+
f.write(pdf_file.getbuffer())
|
141 |
+
|
142 |
+
status.update(label=f"Processing {pdf_file.name} ({i+1}/{len(pdf_files)})...")
|
143 |
+
|
144 |
+
# Extract text from PDF
|
145 |
+
text = ""
|
146 |
+
with open(pdf_path, "rb") as f:
|
147 |
+
pdf = PyPDF2.PdfReader(f)
|
148 |
+
for page_num in range(len(pdf.pages)):
|
149 |
+
page = pdf.pages[page_num]
|
150 |
+
page_text = page.extract_text()
|
151 |
+
if page_text:
|
152 |
+
text += page_text + "\n\n"
|
153 |
+
|
154 |
+
# Create documents
|
155 |
+
docs = [Document(page_content=text, metadata={"source": pdf_file.name})]
|
156 |
+
|
157 |
+
# Split documents into chunks
|
158 |
+
split_docs = self.text_splitter.split_documents(docs)
|
159 |
+
|
160 |
+
all_docs.extend(split_docs)
|
161 |
+
|
162 |
+
# Verify document on blockchain if enabled and connected
|
163 |
+
if self.use_blockchain and self.blockchain and self.blockchain.is_connected:
|
164 |
+
try:
|
165 |
+
# Create a unique document ID
|
166 |
+
document_id = f"{pdf_file.name}_{uuid.uuid4().hex[:8]}"
|
167 |
+
|
168 |
+
# Verify document on blockchain
|
169 |
+
status.update(label=f"Verifying {pdf_file.name} on blockchain...")
|
170 |
+
verification = self.blockchain.verify_document(document_id, pdf_path)
|
171 |
+
|
172 |
+
if verification.get('status'): # Success
|
173 |
+
st.sidebar.success(f"β
{pdf_file.name} verified on blockchain")
|
174 |
+
if 'tx_hash' in verification:
|
175 |
+
st.sidebar.info(f"Transaction: {verification['tx_hash'][:10]}...")
|
176 |
+
|
177 |
+
# Add blockchain metadata to documents
|
178 |
+
for doc in split_docs:
|
179 |
+
doc.metadata["blockchain"] = {
|
180 |
+
"verified": True,
|
181 |
+
"document_id": document_id,
|
182 |
+
"document_hash": verification.get("document_hash", ""),
|
183 |
+
"tx_hash": verification.get("tx_hash", ""),
|
184 |
+
"block_number": verification.get("block_number", 0)
|
185 |
+
}
|
186 |
+
else:
|
187 |
+
st.sidebar.warning(f"β Failed to verify {pdf_file.name} on blockchain")
|
188 |
+
if 'error' in verification:
|
189 |
+
st.sidebar.error(f"Error: {verification['error']}")
|
190 |
+
except Exception as e:
|
191 |
+
st.sidebar.error(f"Blockchain verification error: {str(e)}")
|
192 |
+
elif self.use_blockchain:
|
193 |
+
st.sidebar.warning("MetaMask not connected. Document not verified on blockchain.")
|
194 |
+
|
195 |
+
file_end_time = time.time()
|
196 |
+
processing_time = file_end_time - file_start_time
|
197 |
+
|
198 |
+
st.sidebar.success(f"Processed {pdf_file.name}: {len(split_docs)} chunks in {processing_time:.2f}s")
|
199 |
+
self.processing_times[pdf_file.name] = {
|
200 |
+
"chunks": len(split_docs),
|
201 |
+
"time": processing_time
|
202 |
+
}
|
203 |
+
|
204 |
+
except Exception as e:
|
205 |
+
st.sidebar.error(f"Error processing {pdf_file.name}: {str(e)}")
|
206 |
+
|
207 |
+
# Create vector store if we have documents
|
208 |
+
if all_docs:
|
209 |
+
status.update(label="Building vector index...")
|
210 |
+
try:
|
211 |
+
# Record the time taken to build the index
|
212 |
+
index_start_time = time.time()
|
213 |
+
|
214 |
+
# Create the vector store using FAISS
|
215 |
+
self.vector_store = FAISS.from_documents(all_docs, self.embeddings)
|
216 |
+
|
217 |
+
index_end_time = time.time()
|
218 |
+
index_time = index_end_time - index_start_time
|
219 |
+
|
220 |
+
# Track memory after processing
|
221 |
+
mem_after = psutil.virtual_memory().used / (1024 * 1024 * 1024) # GB
|
222 |
+
mem_used = mem_after - mem_before
|
223 |
+
|
224 |
+
total_time = time.time() - start_time
|
225 |
+
|
226 |
+
status.update(label=f"Completed processing {len(all_docs)} chunks in {total_time:.2f}s", state="complete")
|
227 |
+
|
228 |
+
# Save performance metrics
|
229 |
+
self.processing_times["index_building"] = index_time
|
230 |
+
self.processing_times["total_time"] = total_time
|
231 |
+
self.processing_times["memory_used_gb"] = mem_used
|
232 |
+
self.documents_processed = len(all_docs)
|
233 |
+
|
234 |
+
return True
|
235 |
+
except Exception as e:
|
236 |
+
st.error(f"Error creating vector store: {str(e)}")
|
237 |
+
status.update(label="Error creating vector store", state="error")
|
238 |
+
return False
|
239 |
+
else:
|
240 |
+
status.update(label="No content extracted from PDFs", state="error")
|
241 |
+
return False
|
242 |
+
|
243 |
+
def ask(self, query):
|
244 |
+
"""Ask a question and get an answer based on the PDFs with blockchain logging."""
|
245 |
+
if not self.vector_store:
|
246 |
+
return "Please upload and process PDF files first."
|
247 |
+
|
248 |
+
try:
|
249 |
+
# Custom prompt
|
250 |
+
prompt_template = """
|
251 |
+
You are an AI assistant that provides accurate information based on PDF documents.
|
252 |
+
|
253 |
+
Use the following context to answer the question. Be detailed and precise in your answer.
|
254 |
+
If the answer is not in the context, say "I don't have enough information to answer this question."
|
255 |
+
|
256 |
+
Context:
|
257 |
+
{context}
|
258 |
+
|
259 |
+
Question: {question}
|
260 |
+
|
261 |
+
Answer:
|
262 |
+
"""
|
263 |
+
PROMPT = PromptTemplate(
|
264 |
+
template=prompt_template,
|
265 |
+
input_variables=["context", "question"]
|
266 |
+
)
|
267 |
+
|
268 |
+
# Start timing the query
|
269 |
+
query_start_time = time.time()
|
270 |
+
|
271 |
+
# Create QA chain
|
272 |
+
chain_type_kwargs = {"prompt": PROMPT}
|
273 |
+
qa = RetrievalQA.from_chain_type(
|
274 |
+
llm=self.llm,
|
275 |
+
chain_type="stuff",
|
276 |
+
retriever=self.vector_store.as_retriever(search_kwargs={"k": 4}),
|
277 |
+
chain_type_kwargs=chain_type_kwargs,
|
278 |
+
return_source_documents=True
|
279 |
+
)
|
280 |
+
|
281 |
+
# Get answer
|
282 |
+
with st.status("Searching documents and generating answer..."):
|
283 |
+
response = qa({"query": query})
|
284 |
+
|
285 |
+
answer = response["result"]
|
286 |
+
source_docs = response["source_documents"]
|
287 |
+
|
288 |
+
# Calculate query time
|
289 |
+
query_time = time.time() - query_start_time
|
290 |
+
|
291 |
+
# Format sources
|
292 |
+
sources = []
|
293 |
+
for i, doc in enumerate(source_docs):
|
294 |
+
# Extract blockchain verification info if available
|
295 |
+
blockchain_info = None
|
296 |
+
if "blockchain" in doc.metadata:
|
297 |
+
blockchain_info = {
|
298 |
+
"verified": doc.metadata["blockchain"]["verified"],
|
299 |
+
"document_id": doc.metadata["blockchain"]["document_id"],
|
300 |
+
"tx_hash": doc.metadata["blockchain"]["tx_hash"]
|
301 |
+
}
|
302 |
+
|
303 |
+
sources.append({
|
304 |
+
"content": doc.page_content[:300] + "..." if len(doc.page_content) > 300 else doc.page_content,
|
305 |
+
"source": doc.metadata.get("source", "Unknown"),
|
306 |
+
"blockchain": blockchain_info
|
307 |
+
})
|
308 |
+
|
309 |
+
# Log query to blockchain if enabled and connected
|
310 |
+
blockchain_log = None
|
311 |
+
if self.use_blockchain and self.blockchain and self.blockchain.is_connected:
|
312 |
+
try:
|
313 |
+
with st.status("Logging query to blockchain..."):
|
314 |
+
log_result = self.blockchain.log_query(query, answer)
|
315 |
+
|
316 |
+
if log_result.get("status"): # Success
|
317 |
+
blockchain_log = {
|
318 |
+
"logged": True,
|
319 |
+
"query_id": log_result.get("query_id", ""),
|
320 |
+
"tx_hash": log_result.get("tx_hash", ""),
|
321 |
+
"block_number": log_result.get("block_number", 0)
|
322 |
+
}
|
323 |
+
else:
|
324 |
+
st.error(f"Error logging to blockchain: {log_result.get('error', 'Unknown error')}")
|
325 |
+
except Exception as e:
|
326 |
+
st.error(f"Error logging to blockchain: {str(e)}")
|
327 |
+
|
328 |
+
return {
|
329 |
+
"answer": answer,
|
330 |
+
"sources": sources,
|
331 |
+
"query_time": query_time,
|
332 |
+
"blockchain_log": blockchain_log
|
333 |
+
}
|
334 |
+
|
335 |
+
except Exception as e:
|
336 |
+
st.error(f"Error generating answer: {str(e)}")
|
337 |
+
return f"Error: {str(e)}"
|
338 |
+
|
339 |
+
def get_performance_metrics(self):
|
340 |
+
"""Return performance metrics for the RAG system."""
|
341 |
+
if not self.processing_times:
|
342 |
+
return None
|
343 |
+
|
344 |
+
return {
|
345 |
+
"documents_processed": self.documents_processed,
|
346 |
+
"index_building_time": self.processing_times.get("index_building", 0),
|
347 |
+
"total_processing_time": self.processing_times.get("total_time", 0),
|
348 |
+
"memory_used_gb": self.processing_times.get("memory_used_gb", 0),
|
349 |
+
"device": self.device,
|
350 |
+
"embedding_model": self.embedding_model_name,
|
351 |
+
"blockchain_enabled": self.use_blockchain,
|
352 |
+
"blockchain_connected": self.blockchain.is_connected if self.blockchain else False
|
353 |
+
}
|
354 |
+
|
355 |
+
|
356 |
+
# Helper function to initialize session state
|
357 |
+
def initialize_session_state():
|
358 |
+
"""Initialize Streamlit session state variables."""
|
359 |
+
if "rag" not in st.session_state:
|
360 |
+
st.session_state.rag = None
|
361 |
+
if "messages" not in st.session_state:
|
362 |
+
st.session_state.messages = []
|
363 |
+
if "temp_dir" not in st.session_state:
|
364 |
+
st.session_state.temp_dir = None
|
365 |
+
if "metamask_connected" not in st.session_state:
|
366 |
+
st.session_state.metamask_connected = False
|
367 |
+
|
368 |
+
# Helper function to clean up temporary files
|
369 |
+
def cleanup_temp_files():
|
370 |
+
"""Clean up temporary files when application exits."""
|
371 |
+
if st.session_state.get('temp_dir') and os.path.exists(st.session_state.temp_dir):
|
372 |
+
try:
|
373 |
+
shutil.rmtree(st.session_state.temp_dir)
|
374 |
+
print(f"Cleaned up temporary directory: {st.session_state.temp_dir}")
|
375 |
+
except Exception as e:
|
376 |
+
print(f"Error cleaning up temporary directory: {e}")
|
377 |
+
|
378 |
+
|
379 |
+
# Streamlit UI
|
380 |
+
def main():
|
381 |
+
st.set_page_config(page_title="Blockchain-Enabled RAG System", layout="wide")
|
382 |
+
|
383 |
+
st.title("π GPU-Accelerated PDF Question Answering with MetaMask Blockchain Verification")
|
384 |
+
st.markdown("Upload PDFs, verify them on blockchain with MetaMask, and ask questions with audit log")
|
385 |
+
|
386 |
+
# Initialize session state
|
387 |
+
initialize_session_state()
|
388 |
+
|
389 |
+
# MetaMask Connection Section
|
390 |
+
st.header("π¦ MetaMask Connection")
|
391 |
+
st.markdown("Connect your MetaMask wallet to verify documents and log queries on the blockchain.")
|
392 |
+
|
393 |
+
# Add MetaMask connector and get connection info
|
394 |
+
metamask_info = metamask_connector()
|
395 |
+
|
396 |
+
# Display MetaMask connection status
|
397 |
+
if metamask_info and metamask_info.get("connected"):
|
398 |
+
st.success(f"β
MetaMask Connected: {metamask_info.get('address')}")
|
399 |
+
st.info(f"Network: {metamask_info.get('network_name')}")
|
400 |
+
st.session_state.metamask_connected = True
|
401 |
+
else:
|
402 |
+
st.warning("β οΈ MetaMask not connected. Please connect your wallet to use blockchain features.")
|
403 |
+
st.session_state.metamask_connected = False
|
404 |
+
|
405 |
+
# Update RAG system with MetaMask connection if needed
|
406 |
+
if st.session_state.rag and metamask_info:
|
407 |
+
is_connected = st.session_state.rag.update_blockchain_connection(metamask_info)
|
408 |
+
if is_connected:
|
409 |
+
st.success("RAG system updated with MetaMask connection")
|
410 |
+
|
411 |
+
# Sidebar for configuration and file upload
|
412 |
+
with st.sidebar:
|
413 |
+
st.header("βοΈ Configuration")
|
414 |
+
|
415 |
+
# GPU Detection
|
416 |
+
gpu_available = torch.cuda.is_available()
|
417 |
+
if gpu_available:
|
418 |
+
try:
|
419 |
+
gpu_info = torch.cuda.get_device_properties(0)
|
420 |
+
st.success(f"GPU detected: {gpu_info.name} ({gpu_info.total_memory / 1024**3:.1f} GB)")
|
421 |
+
except Exception as e:
|
422 |
+
st.warning(f"GPU detected but couldn't get properties: {str(e)}")
|
423 |
+
st.info("Running with limited GPU information")
|
424 |
+
else:
|
425 |
+
st.warning("No GPU detected. Running in CPU mode.")
|
426 |
+
|
427 |
+
# Model selection
|
428 |
+
llm_model = st.selectbox(
|
429 |
+
"LLM Model",
|
430 |
+
options=[
|
431 |
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
432 |
+
"google/flan-t5-base",
|
433 |
+
"tiiuae/falcon-7b-instruct"
|
434 |
+
],
|
435 |
+
index=0
|
436 |
+
)
|
437 |
+
|
438 |
+
embedding_model = st.selectbox(
|
439 |
+
"Embedding Model",
|
440 |
+
options=[
|
441 |
+
"sentence-transformers/all-mpnet-base-v2",
|
442 |
+
"sentence-transformers/all-MiniLM-L6-v2",
|
443 |
+
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
444 |
+
],
|
445 |
+
index=1 # all-MiniLM-L6-v2 is smaller and faster
|
446 |
+
)
|
447 |
+
|
448 |
+
use_gpu = st.checkbox("Use GPU Acceleration", value=gpu_available)
|
449 |
+
|
450 |
+
# Blockchain configuration
|
451 |
+
st.header("π Blockchain Configuration")
|
452 |
+
use_blockchain = st.checkbox("Enable Blockchain Verification", value=True)
|
453 |
+
|
454 |
+
if use_blockchain:
|
455 |
+
contract_address = st.text_input("Contract Address",
|
456 |
+
value="0x0000000000000000000000000000000000000000")
|
457 |
+
|
458 |
+
# Display MetaMask connection status in sidebar
|
459 |
+
if metamask_info and metamask_info.get("connected"):
|
460 |
+
st.success(f"β
MetaMask Connected: {metamask_info.get('address')[:10]}...")
|
461 |
+
else:
|
462 |
+
st.warning("β οΈ MetaMask not connected. Please connect your wallet above.")
|
463 |
+
|
464 |
+
if not contract_address or contract_address == "0x0000000000000000000000000000000000000000":
|
465 |
+
st.error("Please deploy the contract and enter its address")
|
466 |
+
|
467 |
+
# Advanced options
|
468 |
+
with st.expander("Advanced Options"):
|
469 |
+
chunk_size = st.slider("Chunk Size", 100, 2000, 1000)
|
470 |
+
chunk_overlap = st.slider("Chunk Overlap", 0, 500, 200)
|
471 |
+
|
472 |
+
# Initialize button
|
473 |
+
if st.button("Initialize System"):
|
474 |
+
with st.spinner("Initializing RAG system..."):
|
475 |
+
if use_blockchain and not contract_address:
|
476 |
+
st.error("Contract address is required for blockchain integration")
|
477 |
+
else:
|
478 |
+
st.session_state.rag = BlockchainEnabledRAG(
|
479 |
+
llm_model_name=llm_model,
|
480 |
+
embedding_model_name=embedding_model,
|
481 |
+
chunk_size=chunk_size,
|
482 |
+
chunk_overlap=chunk_overlap,
|
483 |
+
use_gpu=use_gpu and gpu_available,
|
484 |
+
use_blockchain=use_blockchain,
|
485 |
+
contract_address=contract_address if use_blockchain else None
|
486 |
+
)
|
487 |
+
|
488 |
+
# Update with current MetaMask connection if available
|
489 |
+
if use_blockchain and metamask_info:
|
490 |
+
st.session_state.rag.update_blockchain_connection(metamask_info)
|
491 |
+
|
492 |
+
st.success(f"System initialized with {embedding_model} on {st.session_state.rag.device}")
|
493 |
+
if use_blockchain:
|
494 |
+
if metamask_info and metamask_info.get("connected"):
|
495 |
+
st.success("Blockchain verification enabled with MetaMask")
|
496 |
+
else:
|
497 |
+
st.warning("Blockchain verification enabled but MetaMask not connected")
|
498 |
+
|
499 |
+
st.header("π Upload Documents")
|
500 |
+
uploaded_files = st.file_uploader("Select PDFs", type="pdf", accept_multiple_files=True)
|
501 |
+
|
502 |
+
if uploaded_files and st.button("Process PDFs"):
|
503 |
+
if not st.session_state.rag:
|
504 |
+
with st.spinner("Initializing RAG system..."):
|
505 |
+
st.session_state.rag = BlockchainEnabledRAG(
|
506 |
+
llm_model_name=llm_model,
|
507 |
+
embedding_model_name=embedding_model,
|
508 |
+
chunk_size=chunk_size,
|
509 |
+
chunk_overlap=chunk_overlap,
|
510 |
+
use_gpu=use_gpu and gpu_available,
|
511 |
+
use_blockchain=use_blockchain,
|
512 |
+
contract_address=contract_address if use_blockchain else None
|
513 |
+
)
|
514 |
+
|
515 |
+
# Update with current MetaMask connection if available
|
516 |
+
if use_blockchain and metamask_info:
|
517 |
+
st.session_state.rag.update_blockchain_connection(metamask_info)
|
518 |
+
|
519 |
+
success = st.session_state.rag.process_pdfs(uploaded_files)
|
520 |
+
if success:
|
521 |
+
metrics = st.session_state.rag.get_performance_metrics()
|
522 |
+
if metrics:
|
523 |
+
st.success("PDFs processed successfully!")
|
524 |
+
with st.expander("πΉ Performance Metrics"):
|
525 |
+
st.markdown(f"**Documents processed:** {metrics['documents_processed']} chunks")
|
526 |
+
st.markdown(f"**Index building time:** {metrics['index_building_time']:.2f} seconds")
|
527 |
+
st.markdown(f"**Total processing time:** {metrics['total_processing_time']:.2f} seconds")
|
528 |
+
st.markdown(f"**Memory used:** {metrics['memory_used_gb']:.2f} GB")
|
529 |
+
st.markdown(f"**Device used:** {metrics['device']}")
|
530 |
+
st.markdown(f"**Blockchain verification:** {'Enabled' if metrics['blockchain_enabled'] else 'Disabled'}")
|
531 |
+
st.markdown(f"**Blockchain connected:** {'Yes' if metrics.get('blockchain_connected') else 'No'}")
|
532 |
+
|
533 |
+
# Blockchain verification info
|
534 |
+
if st.session_state.rag and st.session_state.rag.use_blockchain:
|
535 |
+
if st.session_state.metamask_connected:
|
536 |
+
st.info("π Blockchain verification is enabled with MetaMask. Documents are cryptographically verified and queries are logged with immutable audit trail.")
|
537 |
+
else:
|
538 |
+
st.warning("π Blockchain verification is enabled but MetaMask is not connected. Please connect your MetaMask wallet to use blockchain features.")
|
539 |
+
|
540 |
+
# Display chat messages
|
541 |
+
for message in st.session_state.messages:
|
542 |
+
with st.chat_message(message["role"]):
|
543 |
+
if message["role"] == "user":
|
544 |
+
st.markdown(message["content"])
|
545 |
+
else:
|
546 |
+
if isinstance(message["content"], dict):
|
547 |
+
st.markdown(message["content"]["answer"])
|
548 |
+
|
549 |
+
if "query_time" in message["content"]:
|
550 |
+
st.caption(f"Response time: {message['content']['query_time']:.2f} seconds")
|
551 |
+
|
552 |
+
# Display blockchain log if available
|
553 |
+
if "blockchain_log" in message["content"] and message["content"]["blockchain_log"]:
|
554 |
+
blockchain_log = message["content"]["blockchain_log"]
|
555 |
+
st.success(f"β
Query logged on blockchain | Transaction: {blockchain_log['tx_hash'][:10]}...")
|
556 |
+
|
557 |
+
# Display sources in expander
|
558 |
+
if "sources" in message["content"] and message["content"]["sources"]:
|
559 |
+
with st.expander("π View Sources"):
|
560 |
+
for i, source in enumerate(message["content"]["sources"]):
|
561 |
+
st.markdown(f"**Source {i+1}: {source['source']}**")
|
562 |
+
|
563 |
+
# Show blockchain verification if available
|
564 |
+
if source.get("blockchain"):
|
565 |
+
st.success(f"β
Verified on blockchain | TX: {source['blockchain']['tx_hash'][:10]}...")
|
566 |
+
|
567 |
+
st.text(source["content"])
|
568 |
+
st.divider()
|
569 |
+
else:
|
570 |
+
st.markdown(message["content"])
|
571 |
+
|
572 |
+
# Chat input
|
573 |
+
if prompt := st.chat_input("Ask a question about your PDFs..."):
|
574 |
+
# Add user message to chat
|
575 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
576 |
+
|
577 |
+
# Display user message
|
578 |
+
with st.chat_message("user"):
|
579 |
+
st.markdown(prompt)
|
580 |
+
|
581 |
+
# Check if system is initialized
|
582 |
+
if not st.session_state.rag:
|
583 |
+
with st.chat_message("assistant"):
|
584 |
+
message = "Please initialize the system and process PDFs first."
|
585 |
+
st.markdown(message)
|
586 |
+
st.session_state.messages.append({"role": "assistant", "content": message})
|
587 |
+
|
588 |
+
# Get response if vector store is ready
|
589 |
+
elif st.session_state.rag.vector_store:
|
590 |
+
with st.chat_message("assistant"):
|
591 |
+
response = st.session_state.rag.ask(prompt)
|
592 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
593 |
+
|
594 |
+
if isinstance(response, dict):
|
595 |
+
st.markdown(response["answer"])
|
596 |
+
|
597 |
+
if "query_time" in response:
|
598 |
+
st.caption(f"Response time: {response['query_time']:.2f} seconds")
|
599 |
+
|
600 |
+
# Display blockchain log if available
|
601 |
+
if "blockchain_log" in response and response["blockchain_log"]:
|
602 |
+
blockchain_log = response["blockchain_log"]
|
603 |
+
st.success(f"β
Query logged on blockchain | Transaction: {blockchain_log['tx_hash'][:10]}...")
|
604 |
+
|
605 |
+
# Display sources in expander
|
606 |
+
if "sources" in response and response["sources"]:
|
607 |
+
with st.expander("π View Sources"):
|
608 |
+
for i, source in enumerate(response["sources"]):
|
609 |
+
st.markdown(f"**Source {i+1}: {source['source']}**")
|
610 |
+
|
611 |
+
# Show blockchain verification if available
|
612 |
+
if source.get("blockchain"):
|
613 |
+
st.success(f"β
Verified on blockchain | TX: {source['blockchain']['tx_hash'][:10]}...")
|
614 |
+
|
615 |
+
st.text(source["content"])
|
616 |
+
st.divider()
|
617 |
+
else:
|
618 |
+
st.markdown(response)
|
619 |
+
else:
|
620 |
+
with st.chat_message("assistant"):
|
621 |
+
message = "Please upload and process PDF files first."
|
622 |
+
st.markdown(message)
|
623 |
+
st.session_state.messages.append({"role": "assistant", "content": message})
|
624 |
+
|
625 |
+
|
626 |
+
# Main entry point
|
627 |
+
if __name__ == "__main__":
|
628 |
+
# Register cleanup function
|
629 |
+
atexit.register(cleanup_temp_files)
|
630 |
+
|
631 |
+
main()
|
metamask-requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit>=1.30.0
|
2 |
+
langchain>=0.0.292
|
3 |
+
langchain-huggingface>=0.0.3
|
4 |
+
langchain-community>=0.0.3
|
5 |
+
python-dotenv>=1.0.0
|
6 |
+
transformers>=4.33.3
|
7 |
+
faiss-cpu>=1.7.4
|
8 |
+
sentence-transformers>=2.2.2
|
9 |
+
PyPDF2>=3.0.1
|
10 |
+
psutil>=5.9.5
|
11 |
+
web3>=6.10.0
|
12 |
+
uuid>=1.30
|
13 |
+
huggingface-hub>=0.17.3
|