Spaces:
Running
Running
File size: 5,683 Bytes
8ef6cb8 930f0f2 6847a85 8ef6cb8 6847a85 3e21c23 6847a85 3e21c23 8ef6cb8 6847a85 90dc0e0 6847a85 90dc0e0 6847a85 c078cac 90dc0e0 6847a85 3e21c23 c078cac 8ef6cb8 c078cac 8ef6cb8 6847a85 c078cac 8ef6cb8 6847a85 8ef6cb8 3e21c23 8ef6cb8 3e21c23 6847a85 90dc0e0 3e21c23 8ef6cb8 3e21c23 6847a85 90dc0e0 6847a85 3e21c23 90dc0e0 c078cac 90dc0e0 c078cac 90dc0e0 c078cac 3e21c23 6847a85 c078cac 6847a85 8ef6cb8 c078cac 6847a85 90dc0e0 6847a85 c078cac 8ef6cb8 6847a85 c1c2b2d c078cac 6847a85 c1c2b2d 6847a85 c078cac 6847a85 c078cac 8ef6cb8 c078cac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
from datetime import datetime
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_parse import LlamaParse
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os
from dotenv import load_dotenv
import gradio as gr
import markdowm as md
import base64
# Load environment variables
load_dotenv()
llm_models = [
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"meta-llama/Meta-Llama-3-8B-Instruct",
"mistralai/Mistral-7B-Instruct-v0.2",
"tiiuae/falcon-7b-instruct",
]
embed_models = [
"BAAI/bge-small-en-v1.5",
"NeuML/pubmedbert-base-embeddings",
"BAAI/llm-embedder",
"BAAI/bge-large-en"
]
# Global state
selected_llm_model_name = llm_models[0]
selected_embed_model_name = embed_models[0]
vector_index = None
# Parser setup
parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
file_extractor = {ext: parser for ext in ['.pdf', '.docx', '.doc', '.txt', '.csv', '.xlsx', '.pptx', '.html', '.jpg', '.jpeg', '.png', '.webp', '.svg']}
def load_files(file_path: str, embed_model_name: str):
global vector_index
try:
document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
filename = os.path.basename(file_path)
return f"Ready to give response on {filename}"
except Exception as e:
return f"An error occurred: {e}"
def set_llm_model(selected_model):
global selected_llm_model_name
selected_llm_model_name = selected_model
return f"Model set to: {selected_model}"
def respond(message, history):
try:
llm = HuggingFaceInferenceAPI(
model_name=selected_llm_model_name,
contextWindow=8192,
maxTokens=1024,
temperature=0.3,
topP=0.9,
frequencyPenalty=0.5,
presencePenalty=0.5,
token=os.getenv("TOKEN")
)
if vector_index is not None:
query_engine = vector_index.as_query_engine(llm=llm)
bot_message = str(query_engine.query(message))
history.append((message, bot_message))
print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {bot_message}\n")
return bot_message, history
else:
return "Please upload a file first.", history
except Exception as e:
return f"An error occurred: {e}", history
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# Encoded logos
github_logo_encoded = encode_image("Images/github-logo.png")
linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
website_logo_encoded = encode_image("Images/ai-logo.png")
# Markdown placeholders
description = "### Welcome to **DocBot** - Ask Questions Based on Your Uploaded Documents"
guide = "> Step 1: Upload\n> Step 2: Select Embedding\n> Step 3: Select LLM\n> Step 4: Ask Questions"
footer = """
<center>
<a href="https://github.com" target="_blank"><img src="data:image/png;base64,{}" height="30"/></a>
<a href="https://linkedin.com" target="_blank"><img src="data:image/png;base64,{}" height="30"/></a>
<a href="https://yourwebsite.com" target="_blank"><img src="data:image/png;base64,{}" height="30"/></a>
</center>
""".format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded)
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
gr.Markdown("# DocBot")
with gr.Tabs():
with gr.TabItem("Intro"):
gr.Markdown(description)
with gr.TabItem("DocBot"):
with gr.Accordion("=== IMPORTANT: READ ME FIRST ===", open=False):
gr.Markdown(guide)
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(file_count="single", type='filepath', label="Step-1: Upload document")
embed_model_dropdown = gr.Dropdown(embed_models, label="Step-2: Select Embedding", interactive=True)
with gr.Row():
btn = gr.Button("Submit", variant='primary')
clear = gr.ClearButton()
output = gr.Text(label='Vector Index')
llm_model_dropdown = gr.Dropdown(llm_models, label="Step-3: Select LLM", interactive=True)
model_selected_output = gr.Text(label="Model selected")
with gr.Column(scale=3):
chatbot_ui = gr.Chatbot(height=500)
message = gr.Textbox(placeholder="Step-4: Ask me questions on the uploaded document!", container=False)
submit_btn = gr.Button("Send")
# Bind logic
llm_model_dropdown.change(fn=set_llm_model, inputs=llm_model_dropdown, outputs=model_selected_output)
btn.click(fn=load_files, inputs=[file_input, embed_model_dropdown], outputs=output)
clear.click(lambda: [None] * 3, outputs=[file_input, embed_model_dropdown, output])
# Chat logic
state = gr.State([])
submit_btn.click(fn=respond, inputs=[message, state], outputs=[chatbot_ui, state])
message.submit(fn=respond, inputs=[message, state], outputs=[chatbot_ui, state])
gr.HTML(footer)
if __name__ == "__main__":
demo.launch(share=True) |