Spaces:

ReallyFloppyPenguin
/

Polaris-4B-Chat

Running

App Files Files Community

ReallyFloppyPenguin commited on Jul 1

Commit

e7f0eb4

verified ·

1 Parent(s): 899488b

Create app.py

Browse files

Files changed (1) hide show

app.py +341 -0

app.py ADDED Viewed

	@@ -0,0 +1,341 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import os
+from typing import List, Tuple
+import re
+class PolarisModel:
+    """
+    POLARIS-4B-Preview: A Post-training recipe for scaling RL on Advanced Reasoning models
+    Specialized for mathematical reasoning and problem-solving tasks.
+    """
+    def __init__(self):
+        self.model_name = "POLARIS-Project/Polaris-4B-Preview"
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model = None
+        self.tokenizer = None
+        self.load_model()
+    def load_model(self):
+        """Load the POLARIS model with optimized settings for reasoning tasks"""
+        try:
+            # Load tokenizer
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                trust_remote_code=True,
+                padding_side="left"
+            )
+            # Set pad token if not exists
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            # Configure for efficient inference
+            if self.device == "cuda":
+                # Use 4-bit quantization for GPU to fit 4B model
+                quantization_config = BitsAndBytesConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_compute_dtype=torch.float16,
+                    bnb_4bit_use_double_quant=True,
+                    bnb_4bit_quant_type="nf4"
+                )
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    self.model_name,
+                    quantization_config=quantization_config,
+                    device_map="auto",
+                    trust_remote_code=True,
+                    torch_dtype=torch.float16
+                )
+            else:
+                # CPU inference
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    self.model_name,
+                    device_map="cpu",
+                    trust_remote_code=True,
+                    torch_dtype=torch.float32
+                )
+            print(f"✅ POLARIS-4B-Preview loaded successfully on {self.device}")
+        except Exception as e:
+            print(f"❌ Error loading model: {e}")
+            # Fallback to a smaller model if POLARIS fails to load
+            try:
+                print("🔄 Attempting to load fallback model...")
+                self.tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
+                self.model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+                print("✅ Fallback model loaded")
+            except Exception as fallback_error:
+                print(f"❌ Fallback model also failed: {fallback_error}")
+                self.model = None
+                self.tokenizer = None
+    def generate_reasoning_response(
+        self,
+        prompt: str,
+        max_length: int = 2048,
+        temperature: float = 0.7,
+        top_p: float = 0.9,
+        do_sample: bool = True,
+        num_return_sequences: int = 1
+    ) -> str:
+        """
+        Generate response with chain-of-thought reasoning optimized for POLARIS
+        """
+        if not self.model or not self.tokenizer:
+            return "❌ Model not loaded. Please check the model loading status."
+        try:
+            # Format prompt for mathematical reasoning
+            formatted_prompt = self.format_reasoning_prompt(prompt)
+            # Tokenize input
+            inputs = self.tokenizer.encode(
+                formatted_prompt,
+                return_tensors="pt",
+                truncation=True,
+                max_length=1024
+            ).to(self.device)
+            # Generate with optimized parameters for reasoning
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    inputs,
+                    max_new_tokens=max_length - inputs.shape[1],
+                    temperature=temperature,
+                    top_p=top_p,
+                    do_sample=do_sample,
+                    num_return_sequences=num_return_sequences,
+                    pad_token_id=self.tokenizer.pad_token_id,
+                    eos_token_id=self.tokenizer.eos_token_id,
+                    repetition_penalty=1.1,
+                    length_penalty=1.0,
+                    early_stopping=True
+                )
+            # Decode response
+            full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            response = full_response[len(formatted_prompt):].strip()
+            return self.format_response(response)
+        except Exception as e:
+            return f"❌ Error generating response: {str(e)}"
+    def format_reasoning_prompt(self, user_input: str) -> str:
+        """Format prompt to encourage step-by-step reasoning"""
+        if any(keyword in user_input.lower() for keyword in ['solve', 'calculate', 'find', 'prove', 'show']):
+            return f"""<|im_start|>system
+You are POLARIS, an advanced reasoning model specialized in mathematical problem-solving.
+Approach each problem step-by-step with clear reasoning. Show your work and explain each step.
+<|im_end|>
+<|im_start|>user
+{user_input}
+Please solve this step-by-step:
+<|im_end|>
+<|im_start|>assistant
+I'll solve this step-by-step:
+"""
+        else:
+            return f"""<|im_start|>system
+You are POLARIS, an advanced reasoning model. Provide thoughtful, well-reasoned responses.
+<|im_end|>
+<|im_start|>user
+{user_input}
+<|im_end|>
+<|im_start|>assistant
+"""
+    def format_response(self, response: str) -> str:
+        """Clean and format the model response"""
+        # Remove potential artifacts
+        response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', response, flags=re.DOTALL)
+        response = response.strip()
+        # Ensure proper formatting for mathematical expressions
+        if '$$' in response or '\\(' in response:
+            response = "🧮 **Mathematical Solution:**\n\n" + response
+        return response
+# Initialize the model
+polaris_model = PolarisModel()
+def chat_with_polaris(
+    message: str,
+    history: List[Tuple[str, str]] = None,
+    temperature: float = 0.7,
+    max_length: int = 1024
+) -> Tuple[str, List[Tuple[str, str]]]:
+    """Main chat function for Gradio interface"""
+    if history is None:
+        history = []
+    if not message.strip():
+        return "", history
+    # Generate response
+    response = polaris_model.generate_reasoning_response(
+        message,
+        temperature=temperature,
+        max_length=max_length
+    )
+    # Update history
+    history.append((message, response))
+    return "", history
+def clear_chat():
+    """Clear the chat history"""
+    return [], []
+def get_model_info():
+    """Return information about the POLARIS model"""
+    return """
+## 🌠 POLARIS-4B-Preview
+**POLARIS** is a post-training recipe for scaling Reinforcement Learning on Advanced Reasoning models.
+### Key Features:
+- **4B parameters** optimized for mathematical reasoning
+- **Advanced Chain-of-Thought** reasoning capabilities
+- **Superior performance** on mathematical benchmarks (AIME, AMC, Olympiad)
+- **Outperforms larger models** through specialized RL training
+### Benchmark Results:
+- **AIME24**: 81.2% (avg@32)
+- **AIME25**: 79.4% (avg@32)
+- **AMC23**: 94.8% (avg@8)
+- **Minerva Math**: 44.0% (avg@4)
+- **Olympiad Bench**: 69.1% (avg@4)
+### Best Use Cases:
+- Mathematical problem solving
+- Step-by-step reasoning tasks
+- Competition math problems
+- Logical reasoning challenges
+Try asking mathematical questions or reasoning problems!
+"""
+# Create example problems for the interface
+example_problems = [
+    "Solve: If x + y = 10 and x - y = 4, find the values of x and y.",
+    "Find the derivative of f(x) = 3x² + 2x - 1",
+    "Prove that the square root of 2 is irrational.",
+    "A rectangle has a perimeter of 24 cm and an area of 35 cm². Find its dimensions.",
+    "What is the sum of the first 100 positive integers?",
+    "Solve the quadratic equation: 2x² - 7x + 3 = 0"
+]
+# Create the Gradio interface
+with gr.Blocks(
+    title="🌠 POLARIS-4B-Preview - Advanced Reasoning Model",
+    theme=gr.themes.Soft(),
+    css="""
+    .gradio-container {
+        max-width: 1200px !important;
+    }
+    .chat-message {
+        font-size: 16px !important;
+    }
+    """
+) as demo:
+    gr.Markdown("""
+    # 🌠 POLARIS-4B-Preview
+    ## Advanced Reasoning Model for Mathematical Problem Solving
+    POLARIS uses reinforcement learning to achieve state-of-the-art performance on mathematical reasoning tasks.
+    Try asking mathematical questions, logic problems, or step-by-step reasoning challenges!
+    """)
+    with gr.Row():
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(
+                height=600,
+                show_label=False,
+                container=True,
+                bubble_full_width=False
+            )
+            with gr.Row():
+                msg = gr.Textbox(
+                    placeholder="Enter your mathematical problem or reasoning question...",
+                    show_label=False,
+                    scale=5,
+                    container=False
+                )
+                submit_btn = gr.Button("🚀 Solve", scale=1, variant="primary")
+                clear_btn = gr.Button("🗑️ Clear", scale=1)
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Settings")
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=1.5,
+                value=0.7,
+                step=0.1,
+                label="Temperature",
+                info="Higher = more creative"
+            )
+            max_length = gr.Slider(
+                minimum=256,
+                maximum=2048,
+                value=1024,
+                step=128,
+                label="Max Response Length",
+                info="Maximum tokens to generate"
+            )
+            gr.Markdown("### 📚 Example Problems")
+            for i, example in enumerate(example_problems):
+                gr.Button(
+                    f"Example {i+1}",
+                    size="sm"
+                ).click(
+                    lambda x=example: x,
+                    outputs=[msg]
+                )
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 📊 Model Information")
+            model_info = gr.Markdown(get_model_info())
+    # Event handlers
+    submit_btn.click(
+        chat_with_polaris,
+        inputs=[msg, chatbot, temperature, max_length],
+        outputs=[msg, chatbot]
+    )
+    msg.submit(
+        chat_with_polaris,
+        inputs=[msg, chatbot, temperature, max_length],
+        outputs=[msg, chatbot]
+    )
+    clear_btn.click(
+        clear_chat,
+        outputs=[chatbot]
+    )
+# Launch configuration
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        show_error=True
+    )