shiningcreature commited on
Commit
8db2f7b
Β·
verified Β·
1 Parent(s): b4a972b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -12
app.py CHANGED
@@ -5,7 +5,6 @@ from langchain_community.document_loaders import DirectoryLoader, TextLoader
5
  from langchain_community.vectorstores import FAISS
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
- from transformers import AutoTokenizer
9
 
10
  # Step 1: Extract ZIP
11
  def extract_zip(zip_path, extract_to):
@@ -14,7 +13,14 @@ def extract_zip(zip_path, extract_to):
14
  zip_ref.extractall(extract_to)
15
  st.success("βœ… Knowledge Base extracted successfully!")
16
 
17
- # Step 2: Load and embed knowledge base
 
 
 
 
 
 
 
18
  @st.cache_resource
19
  def load_knowledge_base(folder_path):
20
  loader = DirectoryLoader(folder_path, glob="*.md", loader_cls=TextLoader)
@@ -29,11 +35,8 @@ def load_knowledge_base(folder_path):
29
  st.error("❌ Failed to split documents.")
30
  return None
31
 
32
- # Use AutoTokenizer to load the appropriate tokenizer for the model
33
  model_name = "sentence-transformers/paraphrase-MiniLM-L6-v2"
34
- tokenizer = AutoTokenizer.from_pretrained(model_name)
35
-
36
- embeddings = HuggingFaceEmbeddings(model_name=model_name, tokenizer=tokenizer)
37
 
38
  try:
39
  db = FAISS.from_documents(split_docs, embeddings)
@@ -45,19 +48,22 @@ def load_knowledge_base(folder_path):
45
  # Streamlit UI
46
  st.title("πŸ“˜ Fitlytic Chatbot")
47
 
48
- # Step 3: Extract ZIP if needed
49
  zip_path = "Knowledge_Base.zip"
50
  extract_to = "Knowledge_Base"
51
  extract_zip(zip_path, extract_to)
52
 
53
- # Step 4: Load knowledge base
54
- if os.path.exists(extract_to):
55
- db = load_knowledge_base(os.path.join(extract_to, "Knowledge_Base"))
 
 
 
56
  else:
57
- st.error("❌ ZIP file not found or extraction failed.")
58
  st.stop()
59
 
60
- # Step 5: User interaction
61
  if db:
62
  query = st.text_input("Ask me anything about Fitlytic:")
63
  if query:
 
5
  from langchain_community.vectorstores import FAISS
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
8
 
9
  # Step 1: Extract ZIP
10
  def extract_zip(zip_path, extract_to):
 
13
  zip_ref.extractall(extract_to)
14
  st.success("βœ… Knowledge Base extracted successfully!")
15
 
16
+ # Step 2: Auto-detect folder that contains .md files
17
+ def find_md_folder(base_path):
18
+ for root, dirs, files in os.walk(base_path):
19
+ if any(file.endswith(".md") for file in files):
20
+ return root
21
+ return None
22
+
23
+ # Step 3: Load and embed knowledge base
24
  @st.cache_resource
25
  def load_knowledge_base(folder_path):
26
  loader = DirectoryLoader(folder_path, glob="*.md", loader_cls=TextLoader)
 
35
  st.error("❌ Failed to split documents.")
36
  return None
37
 
 
38
  model_name = "sentence-transformers/paraphrase-MiniLM-L6-v2"
39
+ embeddings = HuggingFaceEmbeddings(model_name=model_name)
 
 
40
 
41
  try:
42
  db = FAISS.from_documents(split_docs, embeddings)
 
48
  # Streamlit UI
49
  st.title("πŸ“˜ Fitlytic Chatbot")
50
 
51
+ # Step 4: Extract ZIP if needed
52
  zip_path = "Knowledge_Base.zip"
53
  extract_to = "Knowledge_Base"
54
  extract_zip(zip_path, extract_to)
55
 
56
+ # Step 5: Find folder containing .md files
57
+ md_folder = find_md_folder(extract_to)
58
+
59
+ # Step 6: Load knowledge base
60
+ if md_folder:
61
+ db = load_knowledge_base(md_folder)
62
  else:
63
+ st.error("❌ Could not find any Markdown files in the extracted folder.")
64
  st.stop()
65
 
66
+ # Step 7: User interaction
67
  if db:
68
  query = st.text_input("Ask me anything about Fitlytic:")
69
  if query: