rag-chatbot / endpoint.py
Ipshitaa's picture
updated main.py
ec2628d
# endpoint.py
from fastapi import FastAPI, Request
from pydantic import BaseModel
from llama_index.core import Settings, StorageContext, load_index_from_storage
from llama_index.llms.groq import Groq
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import os,json
from dotenv import load_dotenv
# Load secrets
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# Init LLM and Embedding model
Settings.llm = Groq(model="llama3-8b-8192", api_key=GROQ_API_KEY)
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Load index
PERSIST_DIR = "./storage"
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
index = load_index_from_storage(storage_context)
chat_engine = index.as_chat_engine(chat_mode="context", verbose=False)
app = FastAPI()
class QueryRequest(BaseModel):
question: str
class RecommendRequest(BaseModel):
query: str
@app.get("/health")
def health_check():
return {"status": "healthy"}
@app.post("/recommend")
async def recommend(request: RecommendRequest):
prompt = f"""
You are an intelligent assistant that recommends SHL assessments based on user queries.
Using the query: "{request.query}", return **all relevant and matching** SHL assessments (at least 1 and up to 10).
Only respond in this exact JSON format:
{{
"recommended_assessments": [
{{
"url": "Valid URL in string",
"adaptive_support": "Yes/No",
"description": "Description in string",
"duration": 60,
"remote_support": "Yes/No",
"test_type": ["List of string"]
}}
]
}}
Do not include any explanations or extra text. Only return pure JSON. Respond with as many matching assessments as possible (up to 10).
"""
response = chat_engine.chat(prompt)
try:
return json.loads(response.response)
except Exception:
return {"error": "Model response was not valid JSON", "raw": response.response}