from dataclasses import dataclass, field from typing import Any, Union import gradio as gr Observation = Union[str, dict[str, Any]] Action = Union[str, dict[str, Any]] # e.g., user message, tool call schema @dataclass class StepResult: observation: Observation reward: float done: bool info: dict[str, Any] = field(default_factory=dict) class WordleEnv: """ Demonstration env. Not a full game; 4-letter variant for brevity. Observations are emoji strings; actions are 4-letter lowercase words. Reward is 1.0 on success, else 0.0. Terminal on success or after 6 guesses. """ def __init__(self, *, secret: str = "word", max_guesses: int = 6) -> None: assert len(secret) == 4 and secret.isalpha() self._secret = secret self._max = max_guesses self._n = 0 self._obs = "⬜" * 4 def reset(self) -> Observation: # noqa: ARG002 self._n = 0 self._obs = "⬜" * 4 return self._obs def step(self, action: Action) -> StepResult: guess: str = str(action) guess = guess.strip().lower() if len(guess) != 4 or not guess.isalpha(): return StepResult(self._obs, -0.05, False, {"error": "invalid guess"}) self._n += 1 secret = self._secret feedback: list[str] = [] for i, ch in enumerate(guess): if ch == secret[i]: feedback.append("🟩") elif ch in secret: feedback.append("🟨") else: feedback.append("⬜") self._obs = "".join(feedback) done = guess == secret or self._n >= self._max reward = 1.0 if guess == secret else 0.0 return StepResult(self._obs, reward, done, {"guesses": self._n}) def render(self) -> str: return self._obs # def step_fn(guess: str, wordle) -> tuple[str, float, bool, dict]: # """ # Perform a step in the Wordle environment. # # Args: # guess (str): The guessed word (4-letter lowercase string). # # Returns: # tuple[str, float, bool, dict]: A tuple containing: # - observation: The observation after the step . # - reward: The reward obtained from the step. # - done: Whether the game is done. # - info: Additional info. # """ # result = wordle.step(guess) # return result.observation, result.reward, result.done, result.info, wordle wordle = WordleEnv(secret="word") def step_fn(guess: str) -> tuple[str, float, bool, dict]: """ Perform a step in the Wordle environment. Args: guess (str): The guessed word (4-letter lowercase string). Returns: tuple[str, float, bool, dict]: A tuple containing: - observation: The observation after the step . - reward: The reward obtained from the step. - done: Whether the game is done. - info: Additional info. """ result = wordle.step(guess) return result.observation, result.reward, result.done, result.info # demo = gr.Interface( # fn=step_fn, # inputs=["text", gr.State(WordleEnv(secret="word"))], # outputs=[ # gr.Textbox(label="Observation"), # gr.Number(label="Reward"), # gr.Textbox(label="Done"), # gr.Textbox(label="Info"), # gr.State(), # ], # ) demo = gr.Interface( fn=step_fn, inputs=["text"], outputs=[ gr.Textbox(label="Observation"), gr.Number(label="Reward"), gr.Textbox(label="Done"), gr.Textbox(label="Info"), ], ) if __name__ == "__main__": demo.launch(mcp_server=True)