Spaces:
Sleeping
Sleeping
File size: 1,582 Bytes
38812af e235492 38812af e235492 8507438 38812af e235492 38812af e235492 38812af e235492 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import datasets
from langchain.docstore.document import Document
from langchain_community.retrievers import BM25Retriever
from langchain.tools import Tool
def load_guest_dataset():
"""Loads the guest dataset and converts it into Document objects."""
guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train")
# Convert dataset entries into Document objects
docs = [
Document(
page_content="\n".join([
f"Name: {guest['name']}",
f"Relation: {guest['relation']}",
f"Description: {guest['description']}",
f"Email: {guest['email']}"
]),
metadata={"name": guest["name"]}
)
for guest in guest_dataset
]
# Return the documents
return docs
# Load the dataset
docs = load_guest_dataset()
# Initialize the retriever
bm25_retriever = BM25Retriever.from_documents(docs)
def extract_text(query: str) -> str:
"""Retrieves detailed information about gala guests based on their name or relation."""
results = bm25_retriever.invoke(query)
if results:
return results[0].page_content # [doc.page_content for doc in results[:1]]), :3
else:
return "No matching guest information found."
guest_info_tool = Tool(
name="guest_info_retriever",
func=extract_text,
description="Retrieves detailed information about gala guests based on their name or relation."
)
if __name__ == "__main__":
query = "Marie"
print(f"query: {query}:\nretrieval: {extract_text(query)}") |