Spaces:

ReallyFloppyPenguin
/

Polaris-4B-Chat

Sleeping

App Files Files Community

Polaris-4B-Chat / app.py

ReallyFloppyPenguin

Create app.py

e7f0eb4 verified about 2 months ago

raw

history blame contribute delete

11.4 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
	import os
	from typing import List, Tuple
	import re

	class PolarisModel:
	"""
	POLARIS-4B-Preview: A Post-training recipe for scaling RL on Advanced Reasoning models
	Specialized for mathematical reasoning and problem-solving tasks.
	"""

	def __init__(self):
	self.model_name = "POLARIS-Project/Polaris-4B-Preview"
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.model = None
	self.tokenizer = None
	self.load_model()

	def load_model(self):
	"""Load the POLARIS model with optimized settings for reasoning tasks"""
	try:
	# Load tokenizer
	self.tokenizer = AutoTokenizer.from_pretrained(
	self.model_name,
	trust_remote_code=True,
	padding_side="left"
	)

	# Set pad token if not exists
	if self.tokenizer.pad_token is None:
	self.tokenizer.pad_token = self.tokenizer.eos_token

	# Configure for efficient inference
	if self.device == "cuda":
	# Use 4-bit quantization for GPU to fit 4B model
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4"
	)

	self.model = AutoModelForCausalLM.from_pretrained(
	self.model_name,
	quantization_config=quantization_config,
	device_map="auto",
	trust_remote_code=True,
	torch_dtype=torch.float16
	)
	else:
	# CPU inference
	self.model = AutoModelForCausalLM.from_pretrained(
	self.model_name,
	device_map="cpu",
	trust_remote_code=True,
	torch_dtype=torch.float32
	)

	print(f"✅ POLARIS-4B-Preview loaded successfully on {self.device}")

	except Exception as e:
	print(f"❌ Error loading model: {e}")
	# Fallback to a smaller model if POLARIS fails to load
	try:
	print("🔄 Attempting to load fallback model...")
	self.tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
	self.model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
	self.tokenizer.pad_token = self.tokenizer.eos_token
	print("✅ Fallback model loaded")
	except Exception as fallback_error:
	print(f"❌ Fallback model also failed: {fallback_error}")
	self.model = None
	self.tokenizer = None

	def generate_reasoning_response(
	self,
	prompt: str,
	max_length: int = 2048,
	temperature: float = 0.7,
	top_p: float = 0.9,
	do_sample: bool = True,
	num_return_sequences: int = 1
	) -> str:
	"""
	Generate response with chain-of-thought reasoning optimized for POLARIS
	"""
	if not self.model or not self.tokenizer:
	return "❌ Model not loaded. Please check the model loading status."

	try:
	# Format prompt for mathematical reasoning
	formatted_prompt = self.format_reasoning_prompt(prompt)

	# Tokenize input
	inputs = self.tokenizer.encode(
	formatted_prompt,
	return_tensors="pt",
	truncation=True,
	max_length=1024
	).to(self.device)

	# Generate with optimized parameters for reasoning
	with torch.no_grad():
	outputs = self.model.generate(
	inputs,
	max_new_tokens=max_length - inputs.shape[1],
	temperature=temperature,
	top_p=top_p,
	do_sample=do_sample,
	num_return_sequences=num_return_sequences,
	pad_token_id=self.tokenizer.pad_token_id,
	eos_token_id=self.tokenizer.eos_token_id,
	repetition_penalty=1.1,
	length_penalty=1.0,
	early_stopping=True
	)

	# Decode response
	full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
	response = full_response[len(formatted_prompt):].strip()

	return self.format_response(response)

	except Exception as e:
	return f"❌ Error generating response: {str(e)}"

	def format_reasoning_prompt(self, user_input: str) -> str:
	"""Format prompt to encourage step-by-step reasoning"""
	if any(keyword in user_input.lower() for keyword in ['solve', 'calculate', 'find', 'prove', 'show']):
	return f"""<\|im_start\|>system
	You are POLARIS, an advanced reasoning model specialized in mathematical problem-solving.
	Approach each problem step-by-step with clear reasoning. Show your work and explain each step.
	<\|im_end\|>
	<\|im_start\|>user
	{user_input}

	Please solve this step-by-step:
	<\|im_end\|>
	<\|im_start\|>assistant
	I'll solve this step-by-step:

	"""
	else:
	return f"""<\|im_start\|>system
	You are POLARIS, an advanced reasoning model. Provide thoughtful, well-reasoned responses.
	<\|im_end\|>
	<\|im_start\|>user
	{user_input}
	<\|im_end\|>
	<\|im_start\|>assistant
	"""

	def format_response(self, response: str) -> str:
	"""Clean and format the model response"""
	# Remove potential artifacts
	response = re.sub(r'<\\|im_start\\|>.*?<\\|im_end\\|>', '', response, flags=re.DOTALL)
	response = response.strip()

	# Ensure proper formatting for mathematical expressions
	if '$$' in response or '\\(' in response:
	response = "🧮 Mathematical Solution:\n\n" + response

	return response

	# Initialize the model
	polaris_model = PolarisModel()

	def chat_with_polaris(
	message: str,
	history: List[Tuple[str, str]] = None,
	temperature: float = 0.7,
	max_length: int = 1024
	) -> Tuple[str, List[Tuple[str, str]]]:
	"""Main chat function for Gradio interface"""
	if history is None:
	history = []

	if not message.strip():
	return "", history

	# Generate response
	response = polaris_model.generate_reasoning_response(
	message,
	temperature=temperature,
	max_length=max_length
	)

	# Update history
	history.append((message, response))

	return "", history

	def clear_chat():
	"""Clear the chat history"""
	return [], []

	def get_model_info():
	"""Return information about the POLARIS model"""
	return """
	## 🌠 POLARIS-4B-Preview

	POLARIS is a post-training recipe for scaling Reinforcement Learning on Advanced Reasoning models.

	### Key Features:
	- 4B parameters optimized for mathematical reasoning
	- Advanced Chain-of-Thought reasoning capabilities
	- Superior performance on mathematical benchmarks (AIME, AMC, Olympiad)
	- Outperforms larger models through specialized RL training

	### Benchmark Results:
	- AIME24: 81.2% (avg@32)
	- AIME25: 79.4% (avg@32)
	- AMC23: 94.8% (avg@8)
	- Minerva Math: 44.0% (avg@4)
	- Olympiad Bench: 69.1% (avg@4)

	### Best Use Cases:
	- Mathematical problem solving
	- Step-by-step reasoning tasks
	- Competition math problems
	- Logical reasoning challenges

	Try asking mathematical questions or reasoning problems!
	"""

	# Create example problems for the interface
	example_problems = [
	"Solve: If x + y = 10 and x - y = 4, find the values of x and y.",
	"Find the derivative of f(x) = 3x² + 2x - 1",
	"Prove that the square root of 2 is irrational.",
	"A rectangle has a perimeter of 24 cm and an area of 35 cm². Find its dimensions.",
	"What is the sum of the first 100 positive integers?",
	"Solve the quadratic equation: 2x² - 7x + 3 = 0"
	]

	# Create the Gradio interface
	with gr.Blocks(
	title="🌠 POLARIS-4B-Preview - Advanced Reasoning Model",
	theme=gr.themes.Soft(),
	css="""
	.gradio-container {
	max-width: 1200px !important;
	}
	.chat-message {
	font-size: 16px !important;
	}
	"""
	) as demo:

	gr.Markdown("""
	# 🌠 POLARIS-4B-Preview
	## Advanced Reasoning Model for Mathematical Problem Solving

	POLARIS uses reinforcement learning to achieve state-of-the-art performance on mathematical reasoning tasks.
	Try asking mathematical questions, logic problems, or step-by-step reasoning challenges!
	""")

	with gr.Row():
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(
	height=600,
	show_label=False,
	container=True,
	bubble_full_width=False
	)

	with gr.Row():
	msg = gr.Textbox(
	placeholder="Enter your mathematical problem or reasoning question...",
	show_label=False,
	scale=5,
	container=False
	)
	submit_btn = gr.Button("🚀 Solve", scale=1, variant="primary")
	clear_btn = gr.Button("🗑️ Clear", scale=1)

	with gr.Column(scale=1):
	gr.Markdown("### ⚙️ Settings")

	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.5,
	value=0.7,
	step=0.1,
	label="Temperature",
	info="Higher = more creative"
	)

	max_length = gr.Slider(
	minimum=256,
	maximum=2048,
	value=1024,
	step=128,
	label="Max Response Length",
	info="Maximum tokens to generate"
	)

	gr.Markdown("### 📚 Example Problems")

	for i, example in enumerate(example_problems):
	gr.Button(
	f"Example {i+1}",
	size="sm"
	).click(
	lambda x=example: x,
	outputs=[msg]
	)

	with gr.Row():
	with gr.Column():
	gr.Markdown("### 📊 Model Information")
	model_info = gr.Markdown(get_model_info())

	# Event handlers
	submit_btn.click(
	chat_with_polaris,
	inputs=[msg, chatbot, temperature, max_length],
	outputs=[msg, chatbot]
	)

	msg.submit(
	chat_with_polaris,
	inputs=[msg, chatbot, temperature, max_length],
	outputs=[msg, chatbot]
	)

	clear_btn.click(
	clear_chat,
	outputs=[chatbot]
	)

	# Launch configuration
	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	show_error=True
	)