Spaces:
Sleeping
Sleeping
# endpoint.py | |
from fastapi import FastAPI, Request | |
from pydantic import BaseModel | |
from llama_index.core import Settings, StorageContext, load_index_from_storage | |
from llama_index.llms.groq import Groq | |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
import os,json | |
from dotenv import load_dotenv | |
# Load secrets | |
load_dotenv() | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
# Init LLM and Embedding model | |
Settings.llm = Groq(model="llama3-8b-8192", api_key=GROQ_API_KEY) | |
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
# Load index | |
PERSIST_DIR = "./storage" | |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) | |
index = load_index_from_storage(storage_context) | |
chat_engine = index.as_chat_engine(chat_mode="context", verbose=False) | |
app = FastAPI() | |
class QueryRequest(BaseModel): | |
question: str | |
class RecommendRequest(BaseModel): | |
query: str | |
def health_check(): | |
return {"status": "healthy"} | |
async def recommend(request: RecommendRequest): | |
prompt = f""" | |
You are an intelligent assistant that recommends SHL assessments based on user queries. | |
Using the query: "{request.query}", return **all relevant and matching** SHL assessments (at least 1 and up to 10). | |
Only respond in this exact JSON format: | |
{{ | |
"recommended_assessments": [ | |
{{ | |
"url": "Valid URL in string", | |
"adaptive_support": "Yes/No", | |
"description": "Description in string", | |
"duration": 60, | |
"remote_support": "Yes/No", | |
"test_type": ["List of string"] | |
}} | |
] | |
}} | |
Do not include any explanations or extra text. Only return pure JSON. Respond with as many matching assessments as possible (up to 10). | |
""" | |
response = chat_engine.chat(prompt) | |
try: | |
return json.loads(response.response) | |
except Exception: | |
return {"error": "Model response was not valid JSON", "raw": response.response} |