Spaces:

gourisankar85
/

TalkToYourDocument

Running

App Files Files Community

gourisankar85 commited on 26 days ago

Commit

e6cc6f7

verified ·

1 Parent(s): a521154

Upload 13 files

Browse files

Files changed (13) hide show

app.py +142 -0
config/appConfig.py +11 -0
config/config.py +10 -0
data/document_loader.py +24 -0
data/pdf_reader.py +31 -0
globals.py +6 -0
requirements.txt +11 -0
retriever/chat_manager.py +43 -0
retriever/chunk_documents.py +49 -0
retriever/document_manager.py +113 -0
retriever/llm_manager.py +116 -0
retriever/vector_store_manager.py +90 -0
utils/document_utils.py +56 -0

app.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import logging
+import gradio as gr
+from utils.document_utils import initialize_logging
+from retriever.chat_manager import chat_response
+ # Note: DocumentManager is now initialized in retrieve_documents.py
+from globals import app_config
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+initialize_logging()
+def load_sample_question(question):
+    return question
+def clear_selection():
+    return [], "", []  # Reset doc_selector to empty list
+def process_uploaded_file(file, current_selection):
+    """Process uploaded file using DocumentManager and update UI."""
+    status, page_list, filename, _ = app_config.doc_manager.process_document(file.name if file else None)
+    # Update current selection to include new file if not already present
+    updated_selection = current_selection if current_selection else []
+    if filename and filename not in updated_selection:
+        updated_selection.append(filename)
+    return (
+        status,
+        page_list,
+        gr.update(choices=app_config.doc_manager.get_uploaded_documents(), value=updated_selection)
+    )
+def update_doc_selector(selected_docs):
+    """Keep selected documents in sync."""
+    return selected_docs
+# UI Configuration
+models = ["gemma2-9b-it", "llama-guard-3-8b", "qwen-2.5-32b"]
+example_questions = [
+    "What is communication server?",
+    "Show me an example of a configuration file.",
+    "How to create Protected File Directories ?",
+    "What are the attributes of the Azureblobstorage port?",
+    "What is Mediator help?",
+]
+all_questions = [
+    "Can you explain Communication Server architecture?",
+    "Why does the other instance of my multi-instance qmgr seem to hang after a failover? Queue manager will not start after failover.",
+    "Explain the concept of blockchain.",
+    "What is the capital of France?",
+    "Do Surface Porosity and Pore Size Influence Mechanical Properties and Cellular Response to PEEK?",
+    "How does a vaccine work?",
+    "Tell me the step-by-step instruction for front-door installation.",
+    "What are the risk factors for heart disease?",
+]
+with gr.Blocks() as interface:
+    interface.title = "🤖 IntelliDoc: AI Document Explorer"
+    gr.Markdown("""
+        # 🤖 IntelliDoc: AI Document Explorer
+        **AI Document Explorer** allows you to upload PDF documents and interact with them using AI-powered analysis and summarization. Ask questions, extract key insights, and gain a deeper understanding of your documents effortlessly.
+    """)
+    with gr.Row():
+        # Left Sidebar
+        with gr.Column(scale=2):
+            gr.Markdown("## Upload and Select Document")
+            upload_btn = gr.File(label="Upload PDF Document", file_types=[".pdf"])
+            doc_selector = gr.Dropdown(
+                choices=app_config.doc_manager.get_uploaded_documents(),
+                label="Documents",
+                multiselect=True,
+                value=[]  # Initial value as empty list
+            )
+            model_selector = gr.Dropdown(choices=models, label="Models", interactive=True)
+            clear_btn = gr.Button("Clear Selection")
+            upload_status = gr.Textbox(label="Upload Status", interactive=False)
+            # Process uploaded file and update UI
+            upload_btn.change(
+                process_uploaded_file,
+                inputs=[upload_btn, doc_selector],
+                outputs=[
+                    upload_status,
+                    gr.State(),  # page_list
+                    doc_selector  # Update choices and value together
+                ]
+            )
+            clear_btn.click(
+                clear_selection,
+                outputs=[doc_selector, upload_status, gr.State()]
+            )
+            # Reinitialize LLM when the model changes
+            model_selector.change(
+                app_config.gen_llm.reinitialize_llm,
+                inputs=[model_selector],
+                outputs=[upload_status]
+            )
+        # Middle Section (Chat & LLM Response)
+        with gr.Column(scale=6):
+            gr.Markdown("## Chat with document(s)")
+            chat_history = gr.Textbox(label="Chat History", interactive=False, lines=26, elem_id="chat-history", elem_classes=["chat-box"])
+            with gr.Row():
+                chat_input = gr.Textbox(label="Ask additional questions about the document...", show_label=False, placeholder="Ask additional questions about the document...", elem_id="chat-input", lines=3)
+                chat_btn = gr.Button("🚀 Send", variant="primary", elem_id="send-button", scale=0)
+            chat_btn.click(chat_response, inputs=[chat_input, doc_selector, chat_history], outputs=chat_history).then(
+                lambda: "",  # Return an empty string to clear the chat_input
+                outputs=chat_input
+            )
+        # Right Sidebar (Sample Questions & History)
+        with gr.Column(scale=2):
+            gr.Markdown("## Frequently asked questions:")
+            with gr.Column():
+                gr.Examples(
+                    examples=example_questions,
+                    inputs=chat_input,
+                    label=""
+                )
+                question_dropdown = gr.Dropdown(
+                    label="",
+                    choices=all_questions,
+                    interactive=True,
+                    info="Choose a question from the dropdown to populate the query box."
+                )
+            gr.Markdown("## Logs")
+            history = gr.Textbox(label="Previous Queries", interactive=False)
+    gr.HTML("""
+    <style>
+    .chat-box textarea {
+        max-height: 600px !important;
+        overflow-y: auto !important;
+        resize: vertical;
+        white-space: pre-wrap;  /* Keeps formatting */
+    }
+    </style>
+    """)
+if __name__ == "__main__":
+    interface.launch()

config/appConfig.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import logging
+from retriever.llm_manager import LLMManager
+from retriever.document_manager import DocumentManager
+class AppConfig:
+    def __init__(self):
+        # Initialize LLMManager with the default model
+        self.gen_llm = LLMManager()  # This will initialize the default model ("gemma2-9b-it")
+        # Initialize DocumentManager (it will be a singleton instance shared across the app)
+        self.doc_manager = DocumentManager()
+        logging.info("AppConfig initialized with LLMManager")

config/config.py ADDED Viewed

	@@ -0,0 +1,10 @@

+class ConfigConstants:
+    # Constants related to datasets and models
+    DATA_SET_PATH= '/persistent/'
+    EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-MiniLM-L3-v2"
+    RE_RANKER_MODEL_NAME = 'cross-encoder/ms-marco-electra-base'
+    GENERATION_MODEL_NAME = 'mixtral-8x7b-32768'
+    GENERATION_MODELS = ["llama3-8b-8192", "qwen-2.5-32b", "mixtral-8x7b-32768", "gemma2-9b-it" ]
+    DEFAULT_CHUNK_SIZE = 1000
+    CHUNK_OVERLAP = 200

data/document_loader.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# document_loader.py
+import os
+from typing import Optional
+class DocumentLoader:
+    def __init__(self):
+        self.uploaded_file = None
+    def load_file(self, file_path: str) -> Optional[str]:
+        """
+        Load the uploaded PDF file and validate it
+        Returns the file path if valid, None otherwise
+        """
+        if not file_path:
+            return None
+        if not file_path.lower().endswith('.pdf'):
+            raise ValueError("Only PDF files are supported")
+        if not os.path.exists(file_path):
+            raise FileNotFoundError("File does not exist")
+        self.uploaded_file = file_path
+        return file_path

data/pdf_reader.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# pdf_reader.py
+import PyPDF2
+from typing import List
+class PDFReader:
+    def __init__(self):
+        self.page_list = []
+    def read_pdf(self, file_path: str) -> List[str]:
+        """
+        Read PDF content and return list of pages
+        Each element in the list is the text content of a page
+        """
+        try:
+            # Open and read the PDF file
+            with open(file_path, 'rb') as file:
+                pdf_reader = PyPDF2.PdfReader(file)
+                num_pages = len(pdf_reader.pages)
+                # Extract text from each page
+                self.page_list = []
+                for page_num in range(num_pages):
+                    page = pdf_reader.pages[page_num]
+                    text = page.extract_text()
+                    if text:  # Only add non-empty pages
+                        self.page_list.append(text.strip())
+                return self.page_list
+        except Exception as e:
+            raise Exception(f"Error reading PDF: {str(e)}")

globals.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import logging
+from config.appConfig import AppConfig
+# Initialize AppConfig (this will initialize the LLMManager with the default model)
+app_config = AppConfig()
+logging.info("Global app_config initialized")

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+transformers
+torch
+faiss-cpu
+sentence-transformers
+langchain
+llama-index
+langchain-community
+langchain_groq
+langchain-huggingface
+gradio
+PyPDF2

retriever/chat_manager.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import logging
+from typing import List
+from globals import app_config
+def chat_response(query: str, selected_docs: List[str], history: str) -> str:
+    """
+    Generate a chat response based on the user's query and selected documents.
+    Args:
+        query (str): The user's query.
+        selected_docs (List[str]): List of selected document filenames from the dropdown.
+        history (str): The chat history.
+        model_name (str): The name of the LLM model to use for generation.
+    Returns:
+        str: Updated chat history with the new response.
+    """
+    if not query:
+        return history + "\n" + "Response: Please enter a query." if history else "Response: Please enter a query."
+    if not selected_docs:
+        return history + "\n" + "LLM: Please select at least one document." if history else "Response: Please select at least one document."
+    # Retrieve the top 5 chunks based on the query and selected documents
+    top_k_results = app_config.doc_manager.retrieve_top_k(query, selected_docs, k=5)
+    if not top_k_results:
+        return history + "\n" + f"User: {query}\nLLM: No relevant information found in the selected documents." if history else f"User: {query}\nLLM: No relevant information found in the selected documents."
+    # Send the top K results to the LLM to generate a response
+    try:
+        llm_response, source_docs = app_config.gen_llm.generate_response(query, top_k_results)
+    except Exception as e:
+        return history + "\n" + f"User: {query}\nLLM: Error generating response: {str(e)}" if history else f"User: {query}\nLLM: Error generating response: {str(e)}"
+    # Format the response for the chat history
+    response = f"{llm_response}\n"
+    '''for i, doc in enumerate(source_docs, 1):
+        doc_id = doc.metadata.get('doc_id', 'Unknown')
+        filename = next((name for name, d_id in app_config.doc_manager.document_ids.items() if d_id == doc_id), 'Unknown')
+        response += f"{i}. {filename}: {doc.page_content[:100]}...\n"'''
+    return history + "\n" + f"User: {query}\nResponse: {response}" if history else f"User: {query}\nResponse: {response}"

retriever/chunk_documents.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import logging
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import hashlib
+def chunk_documents(page_list, doc_id, chunk_size=1000, chunk_overlap=200):
+    """
+    Chunk a list of page contents into smaller segments with document ID metadata.
+    Args:
+        page_list (list): List of strings, each string being the content of a page.
+        doc_id (str): Unique identifier for the document.
+        chunk_size (int): Maximum size of each chunk (default: 1000 characters).
+        chunk_overlap (int): Overlap between chunks (default: 200 characters).
+    Returns:
+        list: List of dictionaries, each containing 'text', 'source', and 'doc_id'.
+    """
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    documents = []
+    seen_hashes = set()  # Track hashes of chunks to avoid duplicates
+    for page_num, page_content in enumerate(page_list, start=1):  # Start page numbering at 1
+        if not page_content or not isinstance(page_content, str):
+            continue  # Skip empty or invalid pages
+        # Split the page content into chunks
+        chunks = text_splitter.split_text(page_content)
+        for i, chunk in enumerate(chunks):
+            # Generate a unique hash for the chunk
+            chunk_hash = hashlib.sha256(chunk.encode()).hexdigest()
+            # Skip if the chunk is a duplicate
+            if chunk_hash in seen_hashes:
+                continue
+            # Create source identifier (e.g., "doc_123_page_1_chunk_0")
+            source = f"doc_{doc_id}_page_{page_num}_chunk_{i}"
+            # Add the chunk with doc_id as metadata
+            documents.append({
+                'text': chunk,
+                'source': source,
+                'doc_id': doc_id
+            })
+            seen_hashes.add(chunk_hash)
+    logging.info(f"Chunking of documents is done. Chunked the document to {len(documents)} numbers of chunks")
+    return documents

retriever/document_manager.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import logging
+import os
+from typing import Any, Dict, List
+import uuid
+from data.document_loader import DocumentLoader
+from data.pdf_reader import PDFReader
+from retriever.chunk_documents import chunk_documents
+from retriever.vector_store_manager import VectorStoreManager
+class DocumentManager:
+    def __init__(self):
+        self.doc_loader = DocumentLoader()
+        self.pdf_reader = PDFReader()
+        self.vector_manager = VectorStoreManager()
+        self.uploaded_documents = {}
+        self.chunked_documents = {}
+        self.document_ids = {}
+        logging.info("DocumentManager initialized")
+    def process_document(self, file):
+        """
+        Process an uploaded file: load, read PDF, chunk, and store in vector store.
+        Returns: (status_message, page_list, filename, doc_id)
+        """
+        try:
+            if file is None:
+                return "No file uploaded", [], None, None
+            logging.info(f"Processing file: {file}")
+            # Load and validate file
+            file_path = self.doc_loader.load_file(file)
+            filename = os.path.basename(file_path)
+            # Read PDF content
+            page_list = self.pdf_reader.read_pdf(file_path)
+            # Store the uploaded document
+            self.uploaded_documents[filename] = file_path
+            # Generate a unique document ID
+            doc_id = str(uuid.uuid4())
+            self.document_ids[filename] = doc_id
+            # Chunk the pages
+            chunks = chunk_documents(page_list, doc_id, chunk_size=1000, chunk_overlap=200)
+            self.chunked_documents[filename] = chunks
+            # Add chunks to vector store
+            self.vector_manager.add_documents(chunks)
+            return (
+                f"Successfully loaded {filename} with {len(page_list)} pages",
+                page_list,
+                filename,
+                doc_id
+            )
+        except Exception as e:
+            logging.error(f"Error processing document: {str(e)}")
+            return f"Error: {str(e)}", [], None, None
+    def get_uploaded_documents(self):
+        """Return the list of uploaded document filenames."""
+        return list(self.uploaded_documents.keys())
+    def get_chunks(self, filename):
+        """Return chunks for a given filename."""
+        return self.chunked_documents.get(filename, [])
+    def get_document_id(self, filename):
+        """Return the document ID for a given filename."""
+        return self.document_ids.get(filename, None)
+    def retrieve_top_k(self, query: str, selected_docs: List[str], k: int = 5) -> List[Dict[str, Any]]:
+        """
+        Retrieve the top K chunks across the selected documents based on the user's query.
+        Args:
+            query (str): The user's query.
+            selected_docs (List[str]): List of selected document filenames from the dropdown.
+            k (int): Number of top results to return (default is 5).
+        Returns:
+            List[Dict[str, Any]]: List of top K chunks with their text, metadata, and scores.
+        """
+        if not selected_docs:
+            logging.warning("No documents selected for retrieval")
+            return []
+        all_results = []
+        for filename in selected_docs:
+            doc_id = self.get_document_id(filename)
+            if not doc_id:
+                logging.warning(f"No document ID found for filename: {filename}")
+                continue
+            # Search for relevant chunks within this document
+            results = self.vector_manager.search(query, doc_id, k=k)
+            all_results.extend(results)
+        # Sort all results by score in descending order and take the top K
+        all_results.sort(key=lambda x: x['score'], reverse=True)
+        top_k_results = all_results[:k]
+        # Log the list of retrieved documents
+        logging.info("Retrieved top K documents:")
+        for i, result in enumerate(top_k_results, 1):
+            doc_id = result['metadata'].get('doc_id', 'Unknown')
+            filename = next((name for name, d_id in self.document_ids.items() if d_id == doc_id), 'Unknown')
+            logging.info(f"{i}. Filename: {filename}, Doc ID: {doc_id}, Score: {result['score']:.4f}, Text: {result['text'][:100]}...")
+        return top_k_results

retriever/llm_manager.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import logging
+import os
+from typing import List, Dict, Any, Tuple
+from langchain_groq import ChatGroq
+from langchain.chains import RetrievalQA
+from langchain_core.documents import Document
+from langchain_core.retrievers import BaseRetriever
+class LLMManager:
+    DEFAULT_MODEL = "gemma2-9b-it"  # Set the default model name
+    def __init__(self):
+        self.generation_llm = None
+        logging.info("LLMManager initialized")
+        # Initialize the default model during construction
+        try:
+            self.initialize_generation_llm(self.DEFAULT_MODEL)
+            logging.info(f"Initialized default LLM model: {self.DEFAULT_MODEL}")
+        except ValueError as e:
+            logging.error(f"Failed to initialize default LLM model: {str(e)}")
+    def initialize_generation_llm(self, model_name: str) -> None:
+        """
+        Initialize the generation LLM using the Groq API.
+        Args:
+            model_name (str): The name of the model to use for generation.
+        Raises:
+            ValueError: If GROQ_API_KEY is not set.
+        """
+        api_key = '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' #os.getenv("GROQ_API_KEY")
+        if not api_key:
+            raise ValueError("GROQ_API_KEY is not set. Please add it in your environment variables.")
+        os.environ["GROQ_API_KEY"] = api_key
+        self.generation_llm = ChatGroq(model=model_name, temperature=0.7)
+        self.generation_llm.name = model_name
+        logging.info(f"Generation LLM {model_name} initialized")
+    def reinitialize_llm(self, model_name: str) -> str:
+        """
+        Reinitialize the LLM with a new model name.
+        Args:
+            model_name (str): The name of the new model to initialize.
+        Returns:
+            str: Status message indicating success or failure.
+        """
+        try:
+            self.initialize_generation_llm(model_name)
+            return f"LLM model changed to {model_name}"
+        except ValueError as e:
+            logging.error(f"Failed to reinitialize LLM with model {model_name}: {str(e)}")
+            return f"Error: Failed to change LLM model: {str(e)}"
+    def generate_response(self, question: str, relevant_docs: List[Dict[str, Any]]) -> Tuple[str, List[Document]]:
+        """
+        Generate a response using the generation LLM based on the question and relevant documents.
+        Args:
+            question (str): The user's query.
+            relevant_docs (List[Dict[str, Any]]): List of relevant document chunks with text, metadata, and scores.
+        Returns:
+            Tuple[str, List[Document]]: The LLM's response and the source documents used.
+        Raises:
+            ValueError: If the generation LLM is not initialized.
+            Exception: If there's an error during the QA chain invocation.
+        """
+        if not self.generation_llm:
+            raise ValueError("Generation LLM is not initialized. Call initialize_generation_llm first.")
+        # Convert the relevant documents into LangChain Document objects
+        documents = [
+            Document(page_content=doc['text'], metadata=doc['metadata'])
+            for doc in relevant_docs
+        ]
+        # Create a proper retriever by subclassing BaseRetriever
+        class SimpleRetriever(BaseRetriever):
+            def __init__(self, docs: List[Document], **kwargs):
+                super().__init__(**kwargs)  # Pass kwargs to BaseRetriever
+                self._docs = docs  # Use a private attribute to store docs
+                logging.debug(f"SimpleRetriever initialized with {len(docs)} documents")
+            def _get_relevant_documents(self, query: str) -> List[Document]:
+                logging.debug(f"SimpleRetriever._get_relevant_documents called with query: {query}")
+                return self._docs
+            async def _aget_relevant_documents(self, query: str) -> List[Document]:
+                logging.debug(f"SimpleRetriever._aget_relevant_documents called with query: {query}")
+                return self._docs
+        # Instantiate the retriever
+        retriever = SimpleRetriever(docs=documents)
+        # Create a retrieval-based question-answering chain
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=self.generation_llm,
+            retriever=retriever,
+            return_source_documents=True
+        )
+        try:
+            result = qa_chain.invoke({"query": question})
+            response = result['result']
+            source_docs = result['source_documents']
+            logging.info(f"Generated response for question: {question} : {response}")
+            return response, source_docs
+        except Exception as e:
+            logging.error(f"Error during QA chain invocation: {str(e)}")
+            raise e

retriever/vector_store_manager.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import os
+import logging
+from config.config import ConfigConstants
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+class VectorStoreManager:
+    def __init__(self, embedding_path="embeddings.faiss"):
+        """
+        Initialize the vector store manager.
+        Args:
+            embedding_path (str): Path to save/load the FAISS index.
+        """
+        self.embedding_path = embedding_path
+        self.embedding_model = HuggingFaceEmbeddings(model_name=ConfigConstants.EMBEDDING_MODEL_NAME)
+        self.vector_store = self._initialize_vector_store()
+    def _initialize_vector_store(self):
+        """Initialize or load the FAISS vector store."""
+        if os.path.exists(self.embedding_path):
+            logging.info("Loading embeddings from local file")
+            return FAISS.load_local(
+                self.embedding_path,
+                self.embedding_model,
+                allow_dangerous_deserialization=True
+            )
+        else:
+            logging.info("Creating new vector store")
+            # Return an empty vector store; it will be populated when documents are added
+            return FAISS.from_texts(
+                texts=[""],  # Dummy text to initialize
+                embedding=self.embedding_model,
+                metadatas=[{"source": "init", "doc_id": "init"}]
+            )
+    def add_documents(self, documents):
+        """
+        Add new documents to the vector store and save it.
+        Args:
+            documents (list): List of dictionaries with 'text', 'source', and 'doc_id'.
+        """
+        if not documents:
+            return
+        texts = [doc['text'] for doc in documents]
+        metadatas = [{'source': doc['source'], 'doc_id': doc['doc_id']} for doc in documents]
+        logging.info("Adding new documents to vector store")
+        self.vector_store.add_texts(
+            texts=texts,
+            metadatas=metadatas
+        )
+        self.vector_store.save_local(self.embedding_path)
+        logging.info(f"Vector store updated and saved to {self.embedding_path}")
+    def search(self, query, doc_id, k=4):
+        """
+        Search the vector store for relevant chunks, filtered by doc_id.
+        Args:
+            query (str): The user's query.
+            doc_id (str): The document ID to filter by.
+            k (int): Number of results to return.
+        Returns:
+            list: List of relevant document chunks with metadata and scores.
+        """
+        if not self.vector_store:
+            return []
+        try:
+            # Define a filter function to match doc_id
+            filter_fn = lambda metadata: metadata['doc_id'] == doc_id
+            # Perform similarity search with filter
+            results = self.vector_store.similarity_search_with_score(
+                query=query,
+                k=k,
+                filter=filter_fn
+            )
+            # Format results
+            return [{'text': doc.page_content, 'metadata': doc.metadata, 'score': score} for doc, score in results]
+        except Exception as e:
+            logging.error(f"Error during vector store search: {str(e)}")
+            return []

utils/document_utils.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import logging
+from typing import List
+logs = []
+class Document:
+    def __init__(self, metadata, page_content):
+        self.metadata = metadata
+        self.page_content = page_content
+def apply_sentence_keys_documents(relevant_docs: List[Document]):
+    result = []
+    '''for i, doc in enumerate(relevant_docs):
+        doc_id = str(i)
+        title_passage = doc.page_content.split('\nPassage: ')
+        title = title_passage[0]
+        passages = title_passage[1].split('. ')
+        doc_result = []
+        doc_result.append([f"{doc_id}a", title])
+        for j, passage in enumerate(passages):
+            doc_result.append([f"{doc_id}{chr(98 + j)}", passage])
+        result.append(doc_result)'''
+    for relevant_doc_index, relevant_doc in enumerate(relevant_docs):
+        sentences = []
+        for sentence_index, sentence in enumerate(relevant_doc.page_content.split(".")):
+            sentences.append([str(relevant_doc_index)+chr(97 + sentence_index), sentence])
+        result.append(sentences)
+    return result
+def apply_sentence_keys_response(input_string):
+    sentences = input_string.split('. ')
+    result = [[chr(97 + i), sentence] for i, sentence in enumerate(sentences)]
+    return result
+def initialize_logging():
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    # Custom log handler to capture logs and add them to the logs list
+    class LogHandler(logging.Handler):
+        def emit(self, record):
+            log_entry = self.format(record)
+            logs.append(log_entry)
+    # Add custom log handler to the logger
+    log_handler = LogHandler()
+    log_handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
+    logger.addHandler(log_handler)
+def get_logs():
+        """Retrieve logs for display."""
+        return "\n".join(logs[-100:])  # Only show the last 50 logs for example