import os import zipfile import streamlit as st from langchain_community.document_loaders import DirectoryLoader, TextLoader from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter # Step 1: Extract ZIP def extract_zip(zip_path, extract_to): if os.path.exists(zip_path) and not os.path.exists(extract_to): with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall(extract_to) st.success("Knowledge Base extracted successfully!") # Step 2: Auto-detect folder that contains .md files def find_md_folder(base_path): for root, dirs, files in os.walk(base_path): if any(file.endswith(".md") for file in files): return root return None # Step 3: Load and embed knowledge base @st.cache_resource def load_knowledge_base(folder_path): loader = DirectoryLoader(folder_path, glob="*.md", loader_cls=TextLoader) docs = loader.load() if not docs: st.error("No documents found in the knowledge base folder.") return None splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) split_docs = splitter.split_documents(docs) if not split_docs: st.error("Failed to split documents.") return None model_name = "sentence-transformers/paraphrase-MiniLM-L6-v2" embeddings = HuggingFaceEmbeddings(model_name=model_name) try: db = FAISS.from_documents(split_docs, embeddings) return db except Exception as e: st.error(f"Error creating FAISS index: {e}") return None # Streamlit UI st.title("📘 Fitlytic Chatbot") # Step 4: Extract ZIP if needed zip_path = "Knowledge_Base.zip" extract_to = "Knowledge_Base" extract_zip(zip_path, extract_to) # Step 5: Find folder containing .md files md_folder = find_md_folder(extract_to) # Step 6: Load knowledge base if md_folder: db = load_knowledge_base(md_folder) else: st.error("Could not find any Markdown files in the extracted folder.") st.stop() # Step 7: User interaction if db: query = st.text_input("Ask me anything about Fitlytic:") if query: results = db.similarity_search(query, k=1) if results: st.success(results[0].page_content) else: st.error("!!Sorry, I couldn't find an answer. Try rephrasing it.") else: st.stop()