Spaces:
Sleeping
Sleeping
import pytest | |
import numpy as np | |
import pprint | |
from easydict import EasyDict | |
try: | |
from dizoo.gfootball.envs.gfootball_academy_env import GfootballAcademyEnv | |
except ModuleNotFoundError: | |
print("[WARNING] no gfootball env, if you want to use gfootball, please install it, otherwise, ignore it.") | |
cfg_keeper = EasyDict(dict( | |
env_name='academy_3_vs_1_with_keeper', | |
agent_num=3, | |
obs_dim=26, | |
)) | |
cfg_counter = EasyDict(dict( | |
env_name='academy_counterattack_hard', | |
agent_num=4, | |
obs_dim=34, | |
)) | |
class TestGfootballAcademyEnv: | |
def get_random_action(self, min_value, max_value): | |
action = np.random.randint(min_value, max_value + 1, (1, )) | |
return action | |
def test_academy_3_vs_1_with_keeper(self): | |
cfg = cfg_keeper | |
env = GfootballAcademyEnv(cfg) | |
print(env.observation_space, env._action_space, env.reward_space) | |
pp = pprint.PrettyPrinter(indent=2) | |
for i in range(2): | |
eps_len = 0 | |
# env.enable_save_replay(replay_path='./video') | |
reset_obs = env.reset() | |
while True: | |
eps_len += 1 | |
action = env.random_action()[0] | |
action = [int(action_agent) for k, action_agent in action.items()] | |
timestep = env.step(action) | |
obs = timestep.obs | |
reward = timestep.reward | |
done = timestep.done | |
# print('observation: ') | |
# pp.pprint(obs) | |
assert obs['agent_state'].shape == (cfg.agent_num, cfg.obs_dim) | |
assert obs['global_state'].shape == (cfg.agent_num, cfg.obs_dim * 2) | |
assert obs['action_mask'].shape == (cfg.agent_num, 19) | |
print('step {}, action: {}, reward: {}'.format(eps_len, action, reward)) | |
if done: | |
break | |
assert reward == -1 or reward == 100 | |
print(f'Episode {i} done! The episode length is {eps_len}. The last reward is {reward}.') | |
print('End') | |
def test_academy_counterattack_hard(self): | |
cfg = cfg_counter | |
env = GfootballAcademyEnv(cfg) | |
print(env.observation_space, env._action_space, env.reward_space) | |
pp = pprint.PrettyPrinter(indent=2) | |
for i in range(2): | |
eps_len = 0 | |
reset_obs = env.reset() | |
while True: | |
eps_len += 1 | |
action = env.random_action()[0] | |
action = [int(action_agent) for k, action_agent in action.items()] | |
timestep = env.step(action) | |
obs = timestep.obs | |
reward = timestep.reward | |
done = timestep.done | |
# print('observation: ') | |
# pp.pprint(obs) | |
assert obs['agent_state'].shape == (cfg.agent_num, cfg.obs_dim) | |
assert obs['global_state'].shape == (cfg.agent_num, cfg.obs_dim * 2) | |
assert obs['action_mask'].shape == (cfg.agent_num, 19) | |
print('step {}, action: {}, reward: {}'.format(eps_len, action, reward)) | |
if done: | |
break | |
assert reward == -1 or reward == 100 | |
print(f'Episode {i} done! The episode length is {eps_len}. The last reward is {reward}.') | |
print('End') | |