Bahaedev commited on
Commit
6684f10
·
verified ·
1 Parent(s): 6945529

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -37
app.py CHANGED
@@ -1,61 +1,47 @@
1
  import os
 
 
2
  from fastapi import FastAPI
3
  from pydantic import BaseModel
4
- import gradio as gr
5
  import threading
6
  import uvicorn
7
 
8
  # =======================
9
  # Load Secrets
10
  # =======================
 
11
  SYSTEM_PROMPT = os.environ.get(
12
  "prompt",
13
  "You are a placeholder Sovereign. No secrets found in environment."
14
  )
15
 
16
  # =======================
17
- # Initialize Unsloth-optimized Falcon-3B
18
  # =======================
19
- # Install via: pip install unsloth torch transformers
20
- from unsloth import FastLanguageModel
21
- from transformers import AutoTokenizer
22
-
23
- MODEL_NAME = "tiiuae/Falcon3-3B-Instruct"
24
-
25
- # 1) Load model and tokenizer with 4-bit quantization
26
- model, tokenizer = FastLanguageModel.from_pretrained(
27
- model_name=MODEL_NAME,
28
- max_seq_length=2048,
29
- load_in_4bit=True,
30
- dtype=None,
31
  )
32
 
33
- # 2) Apply inference optimizations (fused kernels, streaming, etc.)
34
- FastLanguageModel.for_inference(model)
35
-
36
  # =======================
37
  # Core Chat Function
38
  # =======================
39
  def chat_fn(user_input: str) -> str:
 
 
 
 
40
  messages = [
41
  {"role": "system", "content": SYSTEM_PROMPT},
42
  {"role": "user", "content": f"User: {user_input}"}
43
  ]
 
44
  prompt_text = "\n".join(f"{m['role'].capitalize()}: {m['content']}" for m in messages)
45
-
46
- # Tokenize and run generation
47
- inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
48
- output_ids = model.generate(
49
- **inputs,
50
- max_new_tokens=256,
51
- do_sample=False,
52
- eos_token_id=tokenizer.eos_token_id
53
- )
54
-
55
- # Decode only the newly generated tokens
56
- gen_tokens = output_ids[0][inputs.input_ids.shape[-1]:]
57
- generated_text = tokenizer.decode(gen_tokens, skip_special_tokens=True)
58
- return generated_text.strip()
59
 
60
  # =======================
61
  # Gradio UI
@@ -71,10 +57,6 @@ iface = gr.Interface(
71
  description="Does he really think he is the king?"
72
  )
73
 
74
- # Run Gradio in a separate thread so FastAPI can also start
75
- def run_gradio():
76
- iface.launch(server_name="0.0.0.0", share=True)
77
-
78
  # =======================
79
  # FastAPI for API access
80
  # =======================
@@ -91,5 +73,4 @@ def generate(req: Request):
91
  # Launch Both Servers
92
  # =======================
93
  if __name__ == "__main__":
94
- threading.Thread(target=run_gradio, daemon=True).start()
95
- uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 8000)))
 
1
  import os
2
+ from transformers import pipeline
3
+ import gradio as gr
4
  from fastapi import FastAPI
5
  from pydantic import BaseModel
 
6
  import threading
7
  import uvicorn
8
 
9
  # =======================
10
  # Load Secrets
11
  # =======================
12
+ # SYSTEM_PROMPT (with the flag) must be added in HF Space secrets
13
  SYSTEM_PROMPT = os.environ.get(
14
  "prompt",
15
  "You are a placeholder Sovereign. No secrets found in environment."
16
  )
17
 
18
  # =======================
19
+ # Initialize Falcon-3B
20
  # =======================
21
+ pipe = pipeline(
22
+ "text-generation",
23
+ model="tiiuae/Falcon3-3B-Instruct",
24
+ torch_dtype="auto",
25
+ device_map="auto",
 
 
 
 
 
 
 
26
  )
27
 
 
 
 
28
  # =======================
29
  # Core Chat Function
30
  # =======================
31
  def chat_fn(user_input: str) -> str:
32
+ """
33
+ Concatenate system and user messages, run the model,
34
+ and strip the system prompt from the output.
35
+ """
36
  messages = [
37
  {"role": "system", "content": SYSTEM_PROMPT},
38
  {"role": "user", "content": f"User: {user_input}"}
39
  ]
40
+ # Falcon is not chat-native; we just join roles with newlines
41
  prompt_text = "\n".join(f"{m['role'].capitalize()}: {m['content']}" for m in messages)
42
+ result = pipe(prompt_text, max_new_tokens=256, do_sample=False)
43
+ generated_text = result[0]["generated_text"]
44
+ return generated_text[len(prompt_text):].strip()
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  # =======================
47
  # Gradio UI
 
57
  description="Does he really think he is the king?"
58
  )
59
 
 
 
 
 
60
  # =======================
61
  # FastAPI for API access
62
  # =======================
 
73
  # Launch Both Servers
74
  # =======================
75
  if __name__ == "__main__":
76
+ iface.launch(server_name="0.0.0.0", share=True)