ragflow-enterprise-search-app-Cybersoft

Running

App Files Files Community

ragflow-enterprise-search-app-Cybersoft / generator.py

hoangkha1810

Update generator.py

fd6b30d verified 29 days ago

raw

history blame contribute delete

3.67 kB

	import os
	import requests
	from retriever import retrieve_docs
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from numpy import dot
	from numpy.linalg import norm

	API_KEY = "AIzaSyAb8_PKYoIdrxj42Yq1ToM0m3iiiGwx7s0"

	def filter_relevant_docs(docs, query, top_k=3):
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	query_embedding = embeddings.embed_query(query)
	scores = []
	for doc in docs:
	doc_embedding = embeddings.embed_query(doc.page_content)
	cosine_sim = dot(query_embedding, doc_embedding) / (norm(query_embedding) * norm(doc_embedding))
	scores.append((doc, cosine_sim))
	scores.sort(key=lambda x: x[1], reverse=True)
	return [doc for doc, _ in scores[:top_k]]

	def format_sources(docs):
	sources = set()
	for doc in docs:
	section = doc.metadata.get("section")
	if section:
	sources.add(section.strip())
	else:
	filename = os.path.basename(doc.metadata.get("source", "Nguồn không xác định"))
	sources.add(filename)
	return "\n".join(f"- {src}" for src in sorted(sources))

	def answer_query(query, model="Gemini Pro", temperature=0.2, history=None):
	# Khởi tạo lịch sử nếu None
	if history is None:
	history = []

	# Lấy và lọc tài liệu
	all_docs = retrieve_docs(query)
	if not all_docs:
	return "Không tìm thấy tài liệu liên quan để trả lời.", []

	docs = filter_relevant_docs(all_docs, query)
	context = "\n\n".join([doc.page_content for doc in docs])

	# Định dạng lịch sử hội thoại
	history_text = ""
	if history:
	history_text = "Lịch sử hội thoại:\n"
	for exchange in history:
	history_text += f"Người dùng: {exchange['user']}\nTrợ lý: {exchange['assistant']}\n\n"

	# Tạo prompt với lịch sử hội thoại
	prompt = f"""
	{history_text}Dựa trên tài liệu sau, hãy trả lời câu hỏi theo phong cách trang trọng, lịch sự và chuyên nghiệp:
	{context}
	Câu hỏi: {query}
	Yêu cầu:
	- Luôn sử dụng từ ngữ lịch sự ("Bạn cần...", "Vui lòng...", "Sau khi...").
	- Tránh dùng từ nói miệng như "nhé", "nha", "ok".
	- Câu trả lời cần đầy đủ, rõ ràng, không mơ hồ.
	- Đọc kĩ các tài liệu để đưa ra câu trả lời liên quan và chính xác nhất
	- Chỉ sử dụng thông tin có trong tài liệu. Nếu không có thông tin liên quan, trả lời: "Thông tin không có trong tài liệu được cung cấp."
	- Không được nhắc đến việc đã tham khảo hay nguồn tài liệu.
	- Nếu người dùng yêu cầu, vui lòng cung cấp câu trả lời bằng ngôn ngữ khác hoặc viết code chính xác, đầy đủ theo yêu cầu.
	- Trả lời như một chatbot thông thường, không cần giải thích về quy trình hay tài liệu.
	Trả lời:
	"""

	# Gọi API Gemini
	url = f"https://generativelanguage.googleapis.com/v1/models/gemini-2.0-flash:generateContent?key={API_KEY}"
	headers = {"Content-Type": "application/json"}
	payload = {
	"contents": [{"parts": [{"text": prompt}]}],
	"generationConfig": {"temperature": temperature}
	}

	response = requests.post(url, headers=headers, json=payload)
	data = response.json()

	# Xử lý phản hồi từ API
	try:
	answer = data['candidates'][0]['content']['parts'][0]['text']
	except Exception as e:
	print("🔴 Phản hồi từ Gemini:", data)
	answer = "Lỗi khi gọi Gemini API: " + str(e)

	return answer, docs