Spaces:

Ipshitaa
/

rag-chatbot

Sleeping

App Files Files Community

rag-chatbot / endpoint.py

Ipshitaa

updated main.py

ec2628d 6 months ago

raw

history blame contribute delete

1.96 kB

	# endpoint.py
	from fastapi import FastAPI, Request
	from pydantic import BaseModel
	from llama_index.core import Settings, StorageContext, load_index_from_storage
	from llama_index.llms.groq import Groq
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	import os,json
	from dotenv import load_dotenv

	# Load secrets
	load_dotenv()
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")

	# Init LLM and Embedding model
	Settings.llm = Groq(model="llama3-8b-8192", api_key=GROQ_API_KEY)
	Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

	# Load index
	PERSIST_DIR = "./storage"
	storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
	index = load_index_from_storage(storage_context)
	chat_engine = index.as_chat_engine(chat_mode="context", verbose=False)

	app = FastAPI()

	class QueryRequest(BaseModel):
	question: str

	class RecommendRequest(BaseModel):
	query: str

	@app.get("/health")
	def health_check():
	return {"status": "healthy"}

	@app.post("/recommend")
	async def recommend(request: RecommendRequest):
	prompt = f"""
	You are an intelligent assistant that recommends SHL assessments based on user queries.

	Using the query: "{request.query}", return all relevant and matching SHL assessments (at least 1 and up to 10).

	Only respond in this exact JSON format:

	{{
	"recommended_assessments": [
	{{
	"url": "Valid URL in string",
	"adaptive_support": "Yes/No",
	"description": "Description in string",
	"duration": 60,
	"remote_support": "Yes/No",
	"test_type": ["List of string"]
	}}
	]
	}}

	Do not include any explanations or extra text. Only return pure JSON. Respond with as many matching assessments as possible (up to 10).
	"""

	response = chat_engine.chat(prompt)
	try:
	return json.loads(response.response)
	except Exception:
	return {"error": "Model response was not valid JSON", "raw": response.response}