|
import os |
|
import zipfile |
|
import streamlit as st |
|
from langchain_community.document_loaders import DirectoryLoader, TextLoader |
|
from langchain_community.vectorstores import FAISS |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
|
|
|
def extract_zip(zip_path, extract_to): |
|
if os.path.exists(zip_path) and not os.path.exists(extract_to): |
|
with zipfile.ZipFile(zip_path, 'r') as zip_ref: |
|
zip_ref.extractall(extract_to) |
|
st.success("Knowledge Base extracted successfully!") |
|
|
|
|
|
def find_md_folder(base_path): |
|
for root, dirs, files in os.walk(base_path): |
|
if any(file.endswith(".md") for file in files): |
|
return root |
|
return None |
|
|
|
|
|
@st.cache_resource |
|
def load_knowledge_base(folder_path): |
|
loader = DirectoryLoader(folder_path, glob="*.md", loader_cls=TextLoader) |
|
docs = loader.load() |
|
if not docs: |
|
st.error("No documents found in the knowledge base folder.") |
|
return None |
|
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) |
|
split_docs = splitter.split_documents(docs) |
|
if not split_docs: |
|
st.error("Failed to split documents.") |
|
return None |
|
|
|
model_name = "sentence-transformers/paraphrase-MiniLM-L6-v2" |
|
embeddings = HuggingFaceEmbeddings(model_name=model_name) |
|
|
|
try: |
|
db = FAISS.from_documents(split_docs, embeddings) |
|
return db |
|
except Exception as e: |
|
st.error(f"Error creating FAISS index: {e}") |
|
return None |
|
|
|
|
|
st.title("π Fitlytic Chatbot") |
|
|
|
|
|
zip_path = "Knowledge_Base.zip" |
|
extract_to = "Knowledge_Base" |
|
extract_zip(zip_path, extract_to) |
|
|
|
|
|
md_folder = find_md_folder(extract_to) |
|
|
|
|
|
if md_folder: |
|
db = load_knowledge_base(md_folder) |
|
else: |
|
st.error("Could not find any Markdown files in the extracted folder.") |
|
st.stop() |
|
|
|
|
|
if db: |
|
query = st.text_input("Ask me anything about Fitlytic:") |
|
if query: |
|
results = db.similarity_search(query, k=1) |
|
if results: |
|
st.success(results[0].page_content) |
|
else: |
|
st.error("!!Sorry, I couldn't find an answer. Try rephrasing it.") |
|
else: |
|
st.stop() |
|
|