Spaces:

Fzina
/

Traffic-AI

Sleeping

Fzina commited on Jan 11

Commit

8618a39

verified ·

1 Parent(s): b3e5ddf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -146,28 +146,23 @@ def optimize_signal_rl(congestion_level):
         model.learn(total_timesteps=1000)
         # Reset the environment and get the initial observation
-        obs, _ = env.reset()  # We don't need to unpack infos for now
         logging.debug(f"Reset observation: {obs}, type: {type(obs)}, shape: {np.shape(obs)}")
-        # Flatten obs for the model
-        obs = np.array(obs).flatten()
         # RL Optimization loop
         for _ in range(10):
-            action, _ = model.predict(obs, deterministic=True)
-            obs, reward, done, truncated, _ = env.step(action)  # We don't need to unpack infos here
-            # Debug observation structure
-            logging.debug(f"Step observation: {obs}, type: {type(obs)}, shape: {np.shape(obs)}")
-            # Flatten the observation again for next step
-            obs = np.array(obs).flatten()
-            if done[0]:  # Check if done for the first environment in the batch
                 break
-        # Get optimal duration
-        optimal_duration = int(obs[1]) if len(obs) > 1 else 30
         return f"Green for {optimal_duration}s, Red for {60 - optimal_duration}s"
     except Exception as e:

         model.learn(total_timesteps=1000)
         # Reset the environment and get the initial observation
+        obs = env.reset()  # No need to unpack anything; this gives the observation array
         logging.debug(f"Reset observation: {obs}, type: {type(obs)}, shape: {np.shape(obs)}")
         # RL Optimization loop
         for _ in range(10):
+            action, _ = model.predict(obs, deterministic=True)  # Predict action
+            obs, rewards, dones, infos = env.step(action)  # Step environment
+            # Flatten the observation for the next step
+            obs = obs.flatten()
+            # If any environment is done, stop
+            if dones[0]:
                 break
+        # Get the optimal duration
+        optimal_duration = int(obs[1]) if len(obs) > 1 else 30  # Use default if obs is incorrect
         return f"Green for {optimal_duration}s, Red for {60 - optimal_duration}s"
     except Exception as e: