Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Request | |
from fastapi.responses import StreamingResponse | |
from pydantic import BaseModel, Extra | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import time | |
import uuid | |
import json | |
from typing import Optional, List, Union, Dict, Any | |
import asyncio | |
# --- THE ENGINEERING CONTEXT IS HERE (IN ENGLISH) --- | |
SYSTEM_PROMPT = """ | |
You are a senior expert WordPress and WooCommerce developer. Your goal is to provide code that is clean, secure, high-performance, and follows WordPress standards. | |
### FUNDAMENTAL RULES ### | |
1. **Never Modify Core Files:** Always provide solutions via a child theme, a custom plugin, or code snippets. | |
2. **Respect Hooks:** Systematically use WordPress and WooCommerce actions (`add_action`) and filters (`add_filter`). This is the foundation of everything. | |
3. **Security First:** | |
- **Escape Output:** Use `esc_html__()`, `esc_attr__()`, `esc_url()` for any displayed data. | |
- **Sanitize Input:** Use `sanitize_text_field()`, `wp_kses_post()` for any data coming from the user. | |
- **Use Nonces:** Add nonces (`wp_create_nonce`, `wp_verify_nonce`) to secure forms and AJAX actions. | |
4. **Performance:** Prioritize native WordPress functions (`WP_Query` instead of direct SQL queries, Transients API for caching). | |
5. **Coding Standards:** Follow the official WordPress coding standards (indentation, variable and function naming). | |
### WOOCOMMERCE CONTEXT ### | |
- You have a perfect understanding of the product, order, and customer structure. | |
- You master the specific WooCommerce hooks (e.g., `woocommerce_before_add_to_cart_button`, `woocommerce_thankyou`). | |
- You know how to override WooCommerce templates correctly via a child theme. | |
### RESPONSE FORMAT ### | |
For each code request, provide: | |
1. A brief explanation of the solution. | |
2. The complete and functional PHP code block. | |
3. A clear instruction on where to place this code (e.g., "Add this code to your child theme's `functions.php` file."). | |
""" | |
# --- Configuration --- | |
MODEL_ID = "deepseek-ai/deepseek-coder-1.3b-instruct" | |
DEVICE = "cpu" | |
# --- Model Loading --- | |
print(f"Loading model: {MODEL_ID}") | |
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16, device_map=DEVICE) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, padding_side='left') | |
tokenizer.pad_token = tokenizer.eos_token | |
print("Model and tokenizer loaded successfully on CPU.") | |
# ... (The rest of the code remains the same) ... | |
app = FastAPI() | |
class ContentPart(BaseModel): | |
type: str | |
text: str | |
class ChatMessage(BaseModel): | |
role: str | |
content: Union[str, List[ContentPart]] | |
class ChatCompletionRequest(BaseModel): | |
model: Optional[str] = None | |
messages: List[ChatMessage] | |
stream: Optional[bool] = False | |
class Config: extra = Extra.ignore | |
class ModelData(BaseModel): | |
id: str | |
object: str = "model" | |
owned_by: str = "user" | |
class ModelList(BaseModel): | |
object: str = "list" | |
data: List[ModelData] | |
async def list_models(): | |
return ModelList(data=[ModelData(id=MODEL_ID)]) | |
async def create_chat_completion(request: ChatCompletionRequest): | |
user_prompt = "" | |
last_message = request.messages[-1] | |
if isinstance(last_message.content, list): | |
for part in last_message.content: | |
if part.type == 'text': | |
user_prompt += part.text + "\n" | |
elif isinstance(last_message.content, str): | |
user_prompt = last_message.content | |
if not user_prompt: return {"error": "Prompt not found."} | |
# INJECTING THE SYSTEM PROMPT | |
messages_for_model = [ | |
{'role': 'system', 'content': SYSTEM_PROMPT}, | |
{'role': 'user', 'content': user_prompt} | |
] | |
inputs = tokenizer.apply_chat_template(messages_for_model, add_generation_prompt=True, return_tensors="pt").to(DEVICE) | |
outputs = model.generate( | |
inputs.input_ids, | |
attention_mask=inputs.attention_mask, | |
max_new_tokens=500, | |
do_sample=True, | |
temperature=0.1, | |
top_k=50, | |
top_p=0.95, | |
num_return_sequences=1, | |
eos_token_id=tokenizer.eos_token_id | |
) | |
response_text = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True) | |
async def stream_generator(): | |
response_id = f"chatcmpl-{uuid.uuid4()}" | |
for char in response_text: | |
chunk = {"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_ID, "choices": [{"index": 0, "delta": {"content": char}, "finish_reason": None}]} | |
yield f"data: {json.dumps(chunk)}\n\n" | |
await asyncio.sleep(0.005) | |
final_chunk = {"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_ID, "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]} | |
yield f"data: {json.dumps(final_chunk)}\n\n" | |
yield "data: [DONE]\n\n" | |
if request.stream: | |
return StreamingResponse(stream_generator(), media_type="text/event-stream") | |
else: | |
return {"choices": [{"message": {"role": "assistant", "content": response_text}}]} | |
def root(): | |
return {"status": "Specialized WordPress/WooCommerce Agent is online", "model_id": MODEL_ID} |