Skip to content

[WIP] Simpler examples #967

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ jobs:
image: ubuntu-2004-cuda-11.4:202110-01
resource_class: gpu.nvidia.medium
environment:
image_name: "pytorch/manylinux-cuda117"
image_name: "nvidia/cudagl:11.4.0-base"
TAR_OPTIONS: --no-same-owner
PYTHON_VERSION: << parameters.python_version >>
CU_VERSION: << parameters.cu_version >>
Expand Down
2 changes: 2 additions & 0 deletions .circleci/unittest/linux_examples/scripts/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ unset PYTORCH_VERSION
# For unittest, nightly PyTorch is used as the following section,
# so no need to set PYTORCH_VERSION.
# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config.
apt-get update && apt-get install -y git wget gcc g++
#apt-get update && apt-get install -y git wget freeglut3 freeglut3-dev

set -e

Expand Down
93 changes: 69 additions & 24 deletions .circleci/unittest/linux_examples/scripts/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

set -e

apt-get update && apt-get remove swig -y && apt-get install -y git gcc patchelf libosmesa6-dev libgl1-mesa-glx libglfw3 swig3.0 wget freeglut3 freeglut3-dev

eval "$(./conda/bin/conda shell.bash hook)"
conda activate ./env

Expand All @@ -27,19 +29,75 @@ export MKL_THREADING_LAYER=GNU
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 20
python .circleci/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 20

# ========================================================================================
# DDPG
# ----
#
# Modalities:
# ^^^^^^^^^^^
#
# pixels on/off
# Batched on/off
#
# With batched environments
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=4 \
env_per_collector=2 \
collector_devices=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120
collector.total_frames=48 \
collector.init_random_frames=10 \
collector.frames_per_batch=16 \
collector.num_collectors=4 \
collector.collector_devices=cuda:0 \
env.num_envs=2 \
optim.batch_size=10 \
optim.optim_steps_per_batch=1 \
recorder.video=True \
recorder.frames=4 \
replay_buffer.capacity=120 \
env.from_pixels=False \
logger.backend=csv
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
collector.frames_per_batch=16 \
collector.num_collectors=4 \
collector.collector_devices=cuda:0 \
env.num_envs=2 \
optim.batch_size=10 \
optim.optim_steps_per_batch=1 \
recorder.video=True \
recorder.frames=4 \
replay_buffer.capacity=120 \
env.from_pixels=True \
logger.backend=csv
# With single envs
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
collector.frames_per_batch=16 \
collector.num_collectors=4 \
collector.collector_devices=cuda:0 \
env.num_envs=1 \
optim.batch_size=10 \
optim.optim_steps_per_batch=1 \
recorder.video=True \
recorder.frames=4 \
replay_buffer.capacity=120 \
env.from_pixels=False \
logger.backend=csv
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
collector.total_frames=48 \
collector.init_random_frames=10 \
collector.frames_per_batch=16 \
collector.num_collectors=4 \
collector.collector_devices=cuda:0 \
env.num_envs=1 \
optim.batch_size=10 \
optim.optim_steps_per_batch=1 \
recorder.video=True \
recorder.frames=4 \
replay_buffer.capacity=120 \
env.from_pixels=True \
logger.backend=csv

python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
total_frames=48 \
batch_size=10 \
Expand Down Expand Up @@ -112,19 +170,6 @@ python .circleci/unittest/helpers/coverage_run_parallel.py examples/dreamer/drea
buffer_size=120 \
rssm_hidden_dim=17

# With single envs
python .circleci/unittest/helpers/coverage_run_parallel.py examples/ddpg/ddpg.py \
total_frames=48 \
init_random_frames=10 \
batch_size=10 \
frames_per_batch=16 \
num_workers=2 \
env_per_collector=1 \
collector_devices=cuda:0 \
optim_steps_per_batch=1 \
record_video=True \
record_frames=4 \
buffer_size=120
python .circleci/unittest/helpers/coverage_run_parallel.py examples/a2c/a2c.py \
total_frames=48 \
batch_size=10 \
Expand Down
42 changes: 4 additions & 38 deletions .circleci/unittest/linux_examples/scripts/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ set -e

this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# Avoid error: "fatal: unsafe repository"
apt-get update && apt-get install -y git wget gcc g++

git config --global --add safe.directory '*'
root_dir="$(git rev-parse --show-toplevel)"
conda_dir="${root_dir}/conda"
Expand Down Expand Up @@ -71,48 +73,12 @@ conda env config vars set MUJOCO_PY_MUJOCO_PATH=$root_dir/.mujoco/mujoco210 \
MUJOCO_GL=$PRIVATE_MUJOCO_GL \
PYOPENGL_PLATFORM=$PRIVATE_MUJOCO_GL

# Software rendering requires GLX and OSMesa.
if [ $PRIVATE_MUJOCO_GL == 'egl' ] || [ $PRIVATE_MUJOCO_GL == 'osmesa' ] ; then
yum makecache
yum install -y glfw
yum install -y glew
yum install -y mesa-libGL
yum install -y mesa-libGL-devel
yum install -y mesa-libOSMesa-devel
yum -y install egl-utils
yum -y install freeglut
fi

pip install pip --upgrade

conda env update --file "${this_dir}/environment.yml" --prune

conda deactivate
conda activate "${env_dir}"

if [[ $OSTYPE != 'darwin'* ]]; then
# install ale-py: manylinux names are broken for CentOS so we need to manually download and
# rename them
PY_VERSION=$(python --version)
if [[ $PY_VERSION == *"3.7"* ]]; then
wget https://files.pythonhosted.org/packages/ab/fd/6615982d9460df7f476cad265af1378057eee9daaa8e0026de4cedbaffbd/ale_py-0.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
pip install ale_py-0.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
rm ale_py-0.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
elif [[ $PY_VERSION == *"3.8"* ]]; then
wget https://files.pythonhosted.org/packages/0f/8a/feed20571a697588bc4bfef05d6a487429c84f31406a52f8af295a0346a2/ale_py-0.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
pip install ale_py-0.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
rm ale_py-0.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
elif [[ $PY_VERSION == *"3.9"* ]]; then
wget https://files.pythonhosted.org/packages/a0/98/4316c1cedd9934f9a91b6e27a9be126043b4445594b40cfa391c8de2e5e8/ale_py-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
pip install ale_py-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
rm ale_py-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
elif [[ $PY_VERSION == *"3.10"* ]]; then
wget https://files.pythonhosted.org/packages/60/1b/3adde7f44f79fcc50d0a00a0643255e48024c4c3977359747d149dc43500/ale_py-0.8.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
mv ale_py-0.8.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl ale_py-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
pip install ale_py-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
rm ale_py-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
fi
pip install "gymnasium[atari,accept-rom-license]"
else
pip install "gymnasium[atari,accept-rom-license]"
fi
pip install ale-py
pip install "gymnasium[atari,accept-rom-license]"
101 changes: 65 additions & 36 deletions examples/ddpg/config.yaml
Original file line number Diff line number Diff line change
@@ -1,36 +1,65 @@
env_name: HalfCheetah-v4
env_task: ""
env_library: gym
async_collection: 1
record_video: 0
normalize_rewards_online: 1
normalize_rewards_online_scale: 5
frame_skip: 1
frames_per_batch: 1024
optim_steps_per_batch: 128
batch_size: 256
total_frames: 1000000
prb: 1
lr: 3e-4
ou_exploration: 1
multi_step: 1
init_random_frames: 25000
activation: elu
gSDE: 0
from_pixels: 0
#collector_devices: [cuda:1,cuda:1,cuda:1,cuda:1]
collector_devices: [cpu,cpu,cpu,cpu]
env_per_collector: 8
num_workers: 32
lr_scheduler: ""
value_network_update_interval: 200
record_interval: 10
max_frames_per_traj: -1
weight_decay: 0.0
annealing_frames: 1000000
init_env_steps: 10000
record_frames: 10000
loss_function: smooth_l1
batch_transform: 1
buffer_prefetch: 64
norm_stats: 1
# task and env
env:
env_name: HalfCheetah-v4
env_task: ""
env_library: gym
normalize_rewards_online: 1
normalize_rewards_online_scale: 5
frame_skip: 1
norm_stats: 1
num_envs: 4
n_samples_stats: 1000
noop: 1
reward_scaling:
from_pixels: False

# collector
collector:
async_collection: 1
frames_per_batch: 1024
total_frames: 1000000
multi_step: 3 # 0 to disable
init_random_frames: 25000
collector_devices: cpu # [cpu,cpu,cpu,cpu]
num_collectors: 4
max_frames_per_traj: -1

# eval
recorder:
video: True
interval: 10000 # record interval in frames
frames: 10000

# logger
logger:
backend: wandb
exp_name: ddpg_cheetah_gym

# Buffer
replay_buffer:
prb: 1
buffer_prefetch: 64
capacity: 1_000_000

# Optim
optim:
device: cpu
lr: 3e-4
weight_decay: 0.0
batch_size: 256
lr_scheduler: ""
value_network_update_interval: 200
optim_steps_per_batch: 8

# Policy and model
model:
ou_exploration: 1
annealing_frames: 1000000
noisy: False
activation: elu

# loss
loss:
loss_function: smooth_l1
gamma: 0.99
tau: 0.05
Loading