Spaces:
Sleeping
Sleeping
File size: 1,958 Bytes
ec2628d c71b3b8 ec2628d c71b3b8 ec2628d c71b3b8 ec2628d c71b3b8 ec2628d c71b3b8 ec2628d c71b3b8 ec2628d c71b3b8 ec2628d c71b3b8 ec2628d c71b3b8 ec2628d c71b3b8 ec2628d c71b3b8 ec2628d c71b3b8 ec2628d c71b3b8 ec2628d c71b3b8 ec2628d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# endpoint.py
from fastapi import FastAPI, Request
from pydantic import BaseModel
from llama_index.core import Settings, StorageContext, load_index_from_storage
from llama_index.llms.groq import Groq
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import os,json
from dotenv import load_dotenv
# Load secrets
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# Init LLM and Embedding model
Settings.llm = Groq(model="llama3-8b-8192", api_key=GROQ_API_KEY)
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Load index
PERSIST_DIR = "./storage"
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
index = load_index_from_storage(storage_context)
chat_engine = index.as_chat_engine(chat_mode="context", verbose=False)
app = FastAPI()
class QueryRequest(BaseModel):
question: str
class RecommendRequest(BaseModel):
query: str
@app.get("/health")
def health_check():
return {"status": "healthy"}
@app.post("/recommend")
async def recommend(request: RecommendRequest):
prompt = f"""
You are an intelligent assistant that recommends SHL assessments based on user queries.
Using the query: "{request.query}", return **all relevant and matching** SHL assessments (at least 1 and up to 10).
Only respond in this exact JSON format:
{{
"recommended_assessments": [
{{
"url": "Valid URL in string",
"adaptive_support": "Yes/No",
"description": "Description in string",
"duration": 60,
"remote_support": "Yes/No",
"test_type": ["List of string"]
}}
]
}}
Do not include any explanations or extra text. Only return pure JSON. Respond with as many matching assessments as possible (up to 10).
"""
response = chat_engine.chat(prompt)
try:
return json.loads(response.response)
except Exception:
return {"error": "Model response was not valid JSON", "raw": response.response} |