shiningcreature's picture
Update app.py
305d189 verified
import os
import zipfile
import streamlit as st
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Step 1: Extract ZIP
def extract_zip(zip_path, extract_to):
if os.path.exists(zip_path) and not os.path.exists(extract_to):
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_to)
st.success("Knowledge Base extracted successfully!")
# Step 2: Auto-detect folder that contains .md files
def find_md_folder(base_path):
for root, dirs, files in os.walk(base_path):
if any(file.endswith(".md") for file in files):
return root
return None
# Step 3: Load and embed knowledge base
@st.cache_resource
def load_knowledge_base(folder_path):
loader = DirectoryLoader(folder_path, glob="*.md", loader_cls=TextLoader)
docs = loader.load()
if not docs:
st.error("No documents found in the knowledge base folder.")
return None
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_docs = splitter.split_documents(docs)
if not split_docs:
st.error("Failed to split documents.")
return None
model_name = "sentence-transformers/paraphrase-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
try:
db = FAISS.from_documents(split_docs, embeddings)
return db
except Exception as e:
st.error(f"Error creating FAISS index: {e}")
return None
# Streamlit UI
st.title("πŸ“˜ Fitlytic Chatbot")
# Step 4: Extract ZIP if needed
zip_path = "Knowledge_Base.zip"
extract_to = "Knowledge_Base"
extract_zip(zip_path, extract_to)
# Step 5: Find folder containing .md files
md_folder = find_md_folder(extract_to)
# Step 6: Load knowledge base
if md_folder:
db = load_knowledge_base(md_folder)
else:
st.error("Could not find any Markdown files in the extracted folder.")
st.stop()
# Step 7: User interaction
if db:
query = st.text_input("Ask me anything about Fitlytic:")
if query:
results = db.similarity_search(query, k=1)
if results:
st.success(results[0].page_content)
else:
st.error("!!Sorry, I couldn't find an answer. Try rephrasing it.")
else:
st.stop()