Spaces:

Fzina
/

Traffic-AI

Sleeping

App Files Files Community

Fzina commited on Jan 11

Commit

90c82b4

verified ·

1 Parent(s): 3021d5b

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -54

app.py CHANGED Viewed

@@ -66,59 +66,43 @@ class TrafficSimEnv(gym.Env):
     def __init__(self, congestion_level):
         super(TrafficSimEnv, self).__init__()
         self.congestion_level = congestion_level
-        self.action_space = spaces.Discrete(3)  # Actions: 0 (decrease signal), 1 (maintain), 2 (increase signal)
-        self.observation_space = spaces.Box(
-            low=np.array([0, 20]),
-            high=np.array([2, 60]),
-            dtype=np.float32
-        )
-        self.current_signal = 30  # Starting signal duration (in seconds)
         self.steps = 0
-    def reset(self, *, seed=None, options=None):
-        """
-        Reset the environment and return the initial observation.
-        """
         self.steps = 0
-        self.current_signal = 30  # Reset the signal at the beginning
         congestion_map = {"Low": 0, "Medium": 1, "High": 2}
         self.congestion_numeric = congestion_map.get(self.congestion_level, 0)
         obs = np.array([self.congestion_numeric, self.current_signal], dtype=np.float32)
-        return obs, {}  # Return as single observation (no batch)
     def step(self, action):
-        """
-        Execute a step in the environment, taking an action and returning the next state and reward.
-        """
-        # Signal changes based on the action
-        if action == 0:
-            self.current_signal = max(20, self.current_signal - 5)  # Decrease signal
-        elif action == 2:
-            self.current_signal = min(60, self.current_signal + 5)  # Increase signal
-        # Action 1 is to maintain the current signal
-        # Reward Calculation based on congestion level and signal
         reward = self._calculate_reward()
-        # Increment the step count
         self.steps += 1
-        done = self.steps >= 10  # End condition: after 10 steps
-        truncated = False  # Default to False; can change based on custom conditions
-        # Observation after the action
         obs = np.array([self.congestion_numeric, self.current_signal], dtype=np.float32)
-        info = {}  # Additional info (can remain empty or contain any useful data)
-        return obs, reward, done, truncated, info  # Return as individual values
     def _calculate_reward(self):
         if self.congestion_level == "High":
-            return -abs(40 - self.current_signal)  # Negative reward if signal is far from 40s
         elif self.congestion_level == "Medium":
-            return -abs(30 - self.current_signal)  # Negative reward if signal is far from 30s
         else:
-            return -abs(20 - self.current_signal)  # Negative reward if signal is far from 20s
     def render(self, mode="human"):
         print(f"Current Signal: {self.current_signal}s")
@@ -129,40 +113,37 @@ class TrafficSimEnv(gym.Env):
 # Prior to commit had a lot of errors regarding expected output errors
 def optimize_signal_rl(congestion_level):
     try:
-        # Create the environment with DummyVecEnv to wrap TrafficSimEnv
         env = DummyVecEnv([lambda: TrafficSimEnv(congestion_level)])
-        # Initialize PPO model (policy = "MlpPolicy", for multi-layer perceptron model)
         model = PPO("MlpPolicy", env, verbose=0)
-        # Train the model on the environment for 1000 timesteps
         model.learn(total_timesteps=1000)
-        # Reset environment to start the simulation
-        obs, info = env.reset()  # Reset the environment and get initial observation (info is empty by default)
-        # Since env.reset() returns a batch, extract the first observation and info
-        obs = obs[0]  # Extract the first observation from the batch
-        info = info[0]  # Extract the first info from the batch
-        # Loop through to simulate for 10 timesteps
         for _ in range(10):
             action, _ = model.predict(obs)
-            # Step through the environment with the predicted action
-            obs, reward, done, truncated, info = env.step(action)  # Step returns 5 values
-            obs = obs[0]  # Extract the first element of the batch
-            reward = reward[0]  # Extract the first element of the reward batch
-            done = done[0]  # Extract the first element of the done flag batch
-            truncated = truncated[0]  # Extract the first element of the truncated batch
-            info = info[0]  # Extract the first element of the info batch
-            # Stop when the environment signals that the episode is done or truncated
             if done or truncated:
                 break
-        # Extract the optimal signal duration (second observation value) from `obs`
-        optimal_duration = int(obs[1]) if len(obs) > 1 else 30  # Ensure the signal value is within range
         return f"Green for {optimal_duration}s, Red for {60 - optimal_duration}s"
     except Exception as e:
         logging.error(f"Error optimizing signal with RL: {e}")

     def __init__(self, congestion_level):
         super(TrafficSimEnv, self).__init__()
         self.congestion_level = congestion_level
+        self.action_space = spaces.Discrete(3)
+        self.observation_space = spaces.Box(low=np.array([0, 20]), high=np.array([2, 60]), dtype=np.float32)
+        self.current_signal = 30
         self.steps = 0
+    def reset(self, seed=None, options=None):
         self.steps = 0
+        self.current_signal = 30
         congestion_map = {"Low": 0, "Medium": 1, "High": 2}
         self.congestion_numeric = congestion_map.get(self.congestion_level, 0)
         obs = np.array([self.congestion_numeric, self.current_signal], dtype=np.float32)
+        info = {}  # Empty dictionary as info
+        return obs, info
     def step(self, action):
+        signal_change = {0: -5, 1: 0, 2: 5}[action]
+        self.current_signal = max(20, min(60, self.current_signal + signal_change))
         reward = self._calculate_reward()
         self.steps += 1
+        done = self.steps >= 10
+        truncated = False
         obs = np.array([self.congestion_numeric, self.current_signal], dtype=np.float32)
+        info = {}  # Info dictionary (can be populated with useful debugging data)
+        return obs, reward, done, truncated, info
     def _calculate_reward(self):
         if self.congestion_level == "High":
+            return -abs(40 - self.current_signal)
         elif self.congestion_level == "Medium":
+            return -abs(30 - self.current_signal)
         else:
+            return -abs(20 - self.current_signal)
     def render(self, mode="human"):
         print(f"Current Signal: {self.current_signal}s")
 # Prior to commit had a lot of errors regarding expected output errors
 def optimize_signal_rl(congestion_level):
     try:
+        # Create the environment wrapped in DummyVecEnv
         env = DummyVecEnv([lambda: TrafficSimEnv(congestion_level)])
+        # Initialize PPO model
         model = PPO("MlpPolicy", env, verbose=0)
+        # Train the model
         model.learn(total_timesteps=1000)
+        # Reset environment
+        obs, info = env.reset()  # For Gymnasium 1.0.0, reset() returns (obs, info)
+        obs = obs[0]  # Extract first observation from batch
         for _ in range(10):
+            # Predict action
             action, _ = model.predict(obs)
+            # Take a step
+            obs, reward, done, truncated, info = env.step(action)
+            obs = obs[0]  # Extract first observation from batch
+            reward = reward[0]  # Extract first reward from batch
+            done = done[0]  # Extract first done flag from batch
+            truncated = truncated[0]  # Extract first truncated flag from batch
+            info = info[0]  # Extract first info from batch
+            # End simulation if episode is done or truncated
             if done or truncated:
                 break
+        # Extract optimal signal duration
+        optimal_duration = int(obs[1]) if len(obs) > 1 else 30
         return f"Green for {optimal_duration}s, Red for {60 - optimal_duration}s"
     except Exception as e:
         logging.error(f"Error optimizing signal with RL: {e}")