Spaces:
Sleeping
Sleeping
import datasets | |
from langchain.docstore.document import Document | |
from langchain_community.retrievers import BM25Retriever | |
from langchain.tools import Tool | |
def load_guest_dataset(): | |
"""Loads the guest dataset and converts it into Document objects.""" | |
guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train") | |
# Convert dataset entries into Document objects | |
docs = [ | |
Document( | |
page_content="\n".join([ | |
f"Name: {guest['name']}", | |
f"Relation: {guest['relation']}", | |
f"Description: {guest['description']}", | |
f"Email: {guest['email']}" | |
]), | |
metadata={"name": guest["name"]} | |
) | |
for guest in guest_dataset | |
] | |
# Return the documents | |
return docs | |
# Load the dataset | |
docs = load_guest_dataset() | |
# Initialize the retriever | |
bm25_retriever = BM25Retriever.from_documents(docs) | |
def extract_text(query: str) -> str: | |
"""Retrieves detailed information about gala guests based on their name or relation.""" | |
results = bm25_retriever.invoke(query) | |
if results: | |
return results[0].page_content # [doc.page_content for doc in results[:1]]), :3 | |
else: | |
return "No matching guest information found." | |
guest_info_tool = Tool( | |
name="guest_info_retriever", | |
func=extract_text, | |
description="Retrieves detailed information about gala guests based on their name or relation." | |
) | |
if __name__ == "__main__": | |
query = "Marie" | |
print(f"query: {query}:\nretrieval: {extract_text(query)}") |