Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,6 @@ from langchain_community.document_loaders import DirectoryLoader, TextLoader
|
|
5 |
from langchain_community.vectorstores import FAISS
|
6 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
-
from transformers import AutoTokenizer
|
9 |
|
10 |
# Step 1: Extract ZIP
|
11 |
def extract_zip(zip_path, extract_to):
|
@@ -14,7 +13,14 @@ def extract_zip(zip_path, extract_to):
|
|
14 |
zip_ref.extractall(extract_to)
|
15 |
st.success("β
Knowledge Base extracted successfully!")
|
16 |
|
17 |
-
# Step 2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
@st.cache_resource
|
19 |
def load_knowledge_base(folder_path):
|
20 |
loader = DirectoryLoader(folder_path, glob="*.md", loader_cls=TextLoader)
|
@@ -29,11 +35,8 @@ def load_knowledge_base(folder_path):
|
|
29 |
st.error("β Failed to split documents.")
|
30 |
return None
|
31 |
|
32 |
-
# Use AutoTokenizer to load the appropriate tokenizer for the model
|
33 |
model_name = "sentence-transformers/paraphrase-MiniLM-L6-v2"
|
34 |
-
|
35 |
-
|
36 |
-
embeddings = HuggingFaceEmbeddings(model_name=model_name, tokenizer=tokenizer)
|
37 |
|
38 |
try:
|
39 |
db = FAISS.from_documents(split_docs, embeddings)
|
@@ -45,19 +48,22 @@ def load_knowledge_base(folder_path):
|
|
45 |
# Streamlit UI
|
46 |
st.title("π Fitlytic Chatbot")
|
47 |
|
48 |
-
# Step
|
49 |
zip_path = "Knowledge_Base.zip"
|
50 |
extract_to = "Knowledge_Base"
|
51 |
extract_zip(zip_path, extract_to)
|
52 |
|
53 |
-
# Step
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
56 |
else:
|
57 |
-
st.error("β
|
58 |
st.stop()
|
59 |
|
60 |
-
# Step
|
61 |
if db:
|
62 |
query = st.text_input("Ask me anything about Fitlytic:")
|
63 |
if query:
|
|
|
5 |
from langchain_community.vectorstores import FAISS
|
6 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
8 |
|
9 |
# Step 1: Extract ZIP
|
10 |
def extract_zip(zip_path, extract_to):
|
|
|
13 |
zip_ref.extractall(extract_to)
|
14 |
st.success("β
Knowledge Base extracted successfully!")
|
15 |
|
16 |
+
# Step 2: Auto-detect folder that contains .md files
|
17 |
+
def find_md_folder(base_path):
|
18 |
+
for root, dirs, files in os.walk(base_path):
|
19 |
+
if any(file.endswith(".md") for file in files):
|
20 |
+
return root
|
21 |
+
return None
|
22 |
+
|
23 |
+
# Step 3: Load and embed knowledge base
|
24 |
@st.cache_resource
|
25 |
def load_knowledge_base(folder_path):
|
26 |
loader = DirectoryLoader(folder_path, glob="*.md", loader_cls=TextLoader)
|
|
|
35 |
st.error("β Failed to split documents.")
|
36 |
return None
|
37 |
|
|
|
38 |
model_name = "sentence-transformers/paraphrase-MiniLM-L6-v2"
|
39 |
+
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
|
|
|
|
40 |
|
41 |
try:
|
42 |
db = FAISS.from_documents(split_docs, embeddings)
|
|
|
48 |
# Streamlit UI
|
49 |
st.title("π Fitlytic Chatbot")
|
50 |
|
51 |
+
# Step 4: Extract ZIP if needed
|
52 |
zip_path = "Knowledge_Base.zip"
|
53 |
extract_to = "Knowledge_Base"
|
54 |
extract_zip(zip_path, extract_to)
|
55 |
|
56 |
+
# Step 5: Find folder containing .md files
|
57 |
+
md_folder = find_md_folder(extract_to)
|
58 |
+
|
59 |
+
# Step 6: Load knowledge base
|
60 |
+
if md_folder:
|
61 |
+
db = load_knowledge_base(md_folder)
|
62 |
else:
|
63 |
+
st.error("β Could not find any Markdown files in the extracted folder.")
|
64 |
st.stop()
|
65 |
|
66 |
+
# Step 7: User interaction
|
67 |
if db:
|
68 |
query = st.text_input("Ask me anything about Fitlytic:")
|
69 |
if query:
|