import os from transformers import pipeline import gradio as gr from fastapi import FastAPI from pydantic import BaseModel import threading import uvicorn # ======================= # Load Secrets # ======================= # SYSTEM_PROMPT (with the flag) must be added in HF Space secrets SYSTEM_PROMPT = os.environ.get( "prompt", "You are a placeholder Sovereign. No secrets found in environment." ) # MODEL_ID = "tiiuae/Falcon3-3B-Instruct" # tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) # # ======================= # # Initialize Falcon-3B # # ======================= # pipe = pipeline( # "text-generation", # model="tiiuae/Falcon3-3B-Instruct", # tokenizer=tokenizer, # device_map="auto", # return_full_text=False, # max_new_tokens=256, # do_sample=True, # temperature=0.8, # top_p=0.9, # eos_token_id=tokenizer.eos_token_id # ) pipe = pipeline( "text-generation", model="tiiuae/Falcon3-3B-Instruct", torch_dtype="auto", device_map="auto", ) # ======================= # Core Chat Function # ======================= def chat_fn(user_input: str) -> str: """ Concatenate system and user messages, run the model, and strip the system prompt from the output. """ messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": f"User: {user_input}"} ] # Falcon is not chat-native; we just join roles with newlines prompt_text = "\n".join(f"{m['role'].capitalize()}: {m['content']}" for m in messages) result = pipe(prompt_text, max_new_tokens=256, do_sample=False) generated_text = result[0]["generated_text"] return generated_text[len(prompt_text):].strip() # ======================= # Gradio UI # ======================= def gradio_chat(user_input: str) -> str: return chat_fn(user_input) iface = gr.Interface( fn=gradio_chat, inputs=gr.Textbox(lines=5, placeholder="Enter your prompt…"), outputs="text", title="Prompt cracking challenge", description="Does he really think he is the king?" ) # ======================= # FastAPI for API access # ======================= app = FastAPI(title="Prompt cracking challenge API") class Request(BaseModel): prompt: str @app.post("/generate") def generate(req: Request): return {"response": chat_fn(req.prompt)} # ======================= # Launch Both Servers # ======================= if __name__ == "__main__": iface.launch(server_name="0.0.0.0", share=True)