Skip to content

Commit

Permalink
added docker, new algo
Browse files Browse the repository at this point in the history
  • Loading branch information
alafumee committed Nov 11, 2024
1 parent 82ca588 commit 7284329
Show file tree
Hide file tree
Showing 16 changed files with 978 additions and 71 deletions.
2 changes: 1 addition & 1 deletion constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pathlib

### Task parameters
DATA_DIR = '/localdata/yy/datasets/aloha' # '/home/yunzhezh/adaptiveAC/data'
DATA_DIR = '/home/torchuser/adaptiveAC/data' # '/localdata/yy/datasets/aloha'
SIM_TASK_CONFIGS = {
'sim_transfer_cube_scripted':{
'dataset_dir': DATA_DIR + '/sim_transfer_cube_scripted',
Expand Down
5 changes: 3 additions & 2 deletions detr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def get_args_parser():
# new
parser.add_argument('--state_dim', action='store', type=int, help='state_dim', required=False)
parser.add_argument('--action_dim', action='store', type=int, help='action_dim', required=False)
parser.add_argument('--prediction_ckpt_dir', action='store', type=str, help='prediction_ckpt_dir', required=False)

return parser

Expand All @@ -99,14 +100,14 @@ def build_ACT_model_and_optimizer(args_override):

return model, optimizer

def build_ACT2_model_and_optimizer(args_override):
def build_ACT2_model_and_optimizer(args_override, pred_model):
parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()])
args = parser.parse_args()
for k, v in args_override.items():
setattr(args, k, v)
# print(args.backbone, " args.backbone\n")
# exit(0)
model = build_ACT2_model(args)
model = build_ACT2_model(args, pred_model)
model.cuda()

param_dicts = [
Expand Down
4 changes: 2 additions & 2 deletions detr/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ def build_CNNMLP_model(args):
def build_prediction_model(args):
return build_P(args)

def build_ACT2_model(args):
return build_act2(args)
def build_ACT2_model(args, pred_model):
return build_act2(args, pred_model)
105 changes: 63 additions & 42 deletions detr/models/detr_vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,10 @@ def forward(self, qpos, image, env_state, actions=None, is_pad=None):

class DETRVAE_with_model(nn.Module):
""" This is the DETR module that performs object detection """
def __init__(self, backbones, transformer, encoder, state_dim, action_dim, num_queries, camera_names):
def __init__(self, pred_model, transformer, encoder, state_dim, action_dim, num_queries, camera_names):
""" Initializes the model.
Parameters:
backbones: torch module of the backbone to be used. See backbone.py
pred_model: a feature prediction model
transformer: torch module of the transformer architecture. See transformer.py
state_dim: robot state dimension of the environment
num_queries: number of object queries, ie detection slot. This is the maximal number of objects
Expand All @@ -178,20 +178,20 @@ def __init__(self, backbones, transformer, encoder, state_dim, action_dim, num_q
self.action_head = nn.Linear(hidden_dim, action_dim)
self.is_pad_head = nn.Linear(hidden_dim, 1)
self.query_embed = nn.Embedding(num_queries, hidden_dim)
if backbones is not None:
self.input_proj = nn.Conv2d(backbones[0].num_channels, hidden_dim, kernel_size=1)
self.backbones = nn.ModuleList(backbones)
self.input_proj_robot_state = nn.Linear(14, hidden_dim)
# print(self.backbones, backbones,end=" backbones\n")
# for param in self.backbones[0].parameters():
# assert param.requires_grad == False
# print(param.requires_grad,end=" param\n")
else:
# input_dim = 14 + 7 # robot_state + env_state
self.input_proj_robot_state = nn.Linear(14, hidden_dim)
self.input_proj_env_state = nn.Linear(7, hidden_dim)
self.pos = torch.nn.Embedding(2, hidden_dim)
self.backbones = None
# if backbones is not None:
# self.input_proj = nn.Conv2d(backbones[0].num_channels, hidden_dim, kernel_size=1)
# self.backbones = nn.ModuleList(backbones)
# self.input_proj_robot_state = nn.Linear(14, hidden_dim)
# # print(self.backbones, backbones,end=" backbones\n")
# # for param in self.backbones[0].parameters():
# # assert param.requires_grad == False
# # print(param.requires_grad,end=" param\n")
# else:
# input_dim = 14 + 7 # robot_state + env_state
self.input_proj_robot_state = nn.Linear(14, hidden_dim)
self.input_proj_env_state = nn.Linear(7, hidden_dim)
self.pos = torch.nn.Embedding(2, hidden_dim)
self.pred_model = pred_model

# encoder extra parameters
self.latent_dim = 32 # final size of latent z # TODO tune
Expand All @@ -205,7 +205,7 @@ def __init__(self, backbones, transformer, encoder, state_dim, action_dim, num_q
self.latent_out_proj = nn.Linear(self.latent_dim, hidden_dim) # project latent sample to embedding
self.additional_pos_embed = nn.Embedding(2, hidden_dim) # learned position embedding for proprio and latent

def forward(self, qpos, image, env_state, actions=None, is_pad=None, predict_model=None):
def forward(self, qpos, image, env_state, actions=None, is_pad=None):
"""
qpos: batch, qpos_dim
image: batch, num_cam, channel, height, width
Expand Down Expand Up @@ -244,32 +244,36 @@ def forward(self, qpos, image, env_state, actions=None, is_pad=None, predict_mod
latent_sample = torch.zeros([bs, self.latent_dim], dtype=torch.float32).to(qpos.device)
latent_input = self.latent_out_proj(latent_sample)
print(latent_input.shape, " latent_input\n")
if self.backbones is not None:
if self.pred_model is not None:
src = self.pred_model.get_features(qpos, image)
print("SRC_SHAPE: ", src.shape)
pos = get_sinusoid_encoding_table(1, src.shape[1])
# Image observation features and position embeddings
all_cam_features = []
all_cam_pos = []
for cam_id, cam_name in enumerate(self.camera_names):
features, pos = self.backbones[0](image[:, cam_id]) # HARDCODED
features = features[0] # take the last layer feature
pos = pos[0]
all_cam_features.append(self.input_proj(features))
print(features.shape, " features\n")
print(all_cam_features[-1].shape, " pos\n")
all_cam_pos.append(pos)
# proprioception features
print(self.camera_names,end=" camera_names\n")
# all_cam_features = []
# all_cam_pos = []
# for cam_id, cam_name in enumerate(self.camera_names):
# features, pos = self.backbones[0](image[:, cam_id]) # HARDCODED
# features = features[0] # take the last layer feature
# pos = pos[0]
# all_cam_features.append(self.input_proj(features))
# print(features.shape, " features\n")
# print(all_cam_features[-1].shape, " pos\n")
# all_cam_pos.append(pos)
# # proprioception features
# print(self.camera_names,end=" camera_names\n")
proprio_input = self.input_proj_robot_state(qpos)
# fold camera dimension into width dimension
src = torch.cat(all_cam_features, axis=3)
pos = torch.cat(all_cam_pos, axis=3)
print(src.shape, " src\n")
print(pos.shape, " pos\n")
print(latent_input.shape, " latent_input\n")
print(proprio_input.shape, " proprio_input\n")
print(self.additional_pos_embed.weight.shape, " additional_pos_embed\n")
print(self.query_embed.weight.shape, " query_embed\n")
print(self.transformer, " transformer\n")
# # fold camera dimension into width dimension
# src = torch.cat(all_cam_features, axis=3)
# pos = torch.cat(all_cam_pos, axis=3)
# print(src.shape, " src\n")
# print(pos.shape, " pos\n")
# print(latent_input.shape, " latent_input\n")
# print(proprio_input.shape, " proprio_input\n")
# print(self.additional_pos_embed.weight.shape, " additional_pos_embed\n")
# print(self.query_embed.weight.shape, " query_embed\n")
# print(self.transformer, " transformer\n")
hs = self.transformer(src, None, self.query_embed.weight, pos, latent_input, proprio_input, self.additional_pos_embed.weight)[0]

else:
qpos = self.input_proj_robot_state(qpos)
env_state = self.input_proj_env_state(env_state)
Expand Down Expand Up @@ -399,8 +403,25 @@ def build(args):

return model

def build_act2(args):
backbones = []
def build_act2(args, pred_model):
transformer = build_transformer(args)

encoder = build_encoder(args)

model = DETRVAE_with_model(
pred_model,
transformer,
encoder,
state_dim=args.state_dim,
action_dim=args.action_dim,
num_queries=args.num_queries,
camera_names=args.camera_names,
)

n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("number of parameters: %.2fM" % (n_parameters/1e6,))

return model

def build_cnnmlp(args):
state_dim = 14 # TODO hardcode
Expand Down
6 changes: 6 additions & 0 deletions docker/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
**/*.ckpt
data
ckpt
wandb_ckpt
wandb
.git
7 changes: 7 additions & 0 deletions docker/10_nvidia.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"file_format_version" : "1.0.0",
"ICD" : {
"library_path" : "libEGL_nvidia.so.0"
}
}

76 changes: 76 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
FROM nvcr.io/nvidia/pytorch:21.09-py3
ENV DEBIAN_FRONTEND=noninteractive

# dependencies for gym
#
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
libxcursor-dev \
libxrandr-dev \
libxinerama-dev \
libxi-dev \
mesa-common-dev \
zip \
unzip \
make \
gcc-8 \
g++-8 \
vulkan-utils \
mesa-vulkan-drivers \
pigz \
git \
libegl1 \
git-lfs

# Force gcc 8 to avoid CUDA 10 build issues on newer base OS
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 8
RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 8

# WAR for eglReleaseThread shutdown crash in libEGL_mesa.so.0 (ensure it's never detected/loaded)
# Can't remove package libegl-mesa0 directly (because of libegl1 which we need)
RUN rm /usr/lib/x86_64-linux-gnu/libEGL_mesa.so.0 /usr/lib/x86_64-linux-gnu/libEGL_mesa.so.0.0.0 /usr/share/glvnd/egl_vendor.d/50_mesa.json

COPY docker/nvidia_icd.json /usr/share/vulkan/icd.d/nvidia_icd.json
COPY docker/10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json

RUN groupadd -g 1027 torchuser
RUN useradd -r -u 1027 -g torchuser --create-home torchuser

ENV PATH="/home/torchuser/.local/bin:$PATH"

# WORKDIR /home/torchuser/adaptiveAC

# COPY --chown=torchuser . .

WORKDIR /home/torchuser

USER torchuser

RUN conda create -n aloha python=3.8.10

USER root

RUN chown 1027:1027 /home/torchuser/.conda/envs/.conda_envs_dir_test

USER torchuser

# RUN source activate aloha \
# && pip install torchvision \
# && pip install torch \
# && pip install pyquaternion \
# && pip install pyyaml \
# && pip install rospkg \
# && pip install pexpect \
# && pip install mujoco==2.3.7 \
# && pip install dm_control==1.0.14 \
# && pip install opencv-python \
# && pip install matplotlib \
# && pip install einops \
# && pip install packaging \
# && pip install h5py \
# && pip install ipython \
# && cd adaptiveAC/detr && pip install -e .
# RUN cd python && pip install -q -e .


ENV NVIDIA_VISIBLE_DEVICES=all NVIDIA_DRIVER_CAPABILITIES=all
6 changes: 6 additions & 0 deletions docker/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
set -e
set -u
SCRIPTROOT="$( cd "$(dirname "$0")" ; pwd -P )"
cd "${SCRIPTROOT}/.."

docker build --network host -t act -f docker/Dockerfile .
8 changes: 8 additions & 0 deletions docker/enter_container.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
su_=$1

if [ "$su_" = "true" ]; then
docker exec -it -u 0 torch_container_gui /bin/bash
else
docker exec -it torch_container_gui /bin/bash
fi
7 changes: 7 additions & 0 deletions docker/nvidia_icd.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"file_format_version" : "1.0.0",
"ICD": {
"library_path": "libGLX_nvidia.so.0",
"api_version" : "1.1.95"
}
}
52 changes: 52 additions & 0 deletions docker/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash
set -e
set -u

if [ $# -eq 0 ]
then
echo "running docker without display"
docker run -it --network=host --gpus=all -v ~/adaptiveAC:/home/torchuser/adaptiveAC/ \
--name=torch_container act /bin/bash \
-c "source activate aloha && pip install torchvision==0.14.0 \
&& pip install torch==1.13.0 \
&& pip install pyquaternion \
&& pip install pyyaml \
&& pip install rospkg \
&& pip install pexpect \
&& pip install mujoco==2.3.7 \
&& pip install dm_control==1.0.14 \
&& pip install opencv-python \
&& pip install matplotlib \
&& pip install einops \
&& pip install packaging \
&& pip install h5py \
&& pip install ipython \
&& pip install wandb\
&& cd adaptiveAC/detr && pip install -e . \
&& tail -f /dev/null"
else
export DISPLAY=$DISPLAY
echo "setting display to $DISPLAY"
xhost +
docker run -it -v "$HOME/.Xauthority:/home/torchuser/.Xauthority:rw" -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=$DISPLAY \
--ipc=host --network=host --privileged=true \
-v ~/adaptiveAC:/home/torchuser/adaptiveAC/ --gpus=all --name=torch_container_gui act /bin/bash \
-c "source activate aloha && pip install torchvision==0.14.0 \
&& pip install torch==1.13.0 \
&& pip install pyquaternion \
&& pip install pyyaml \
&& pip install rospkg \
&& pip install pexpect \
&& pip install mujoco==2.3.7 \
&& pip install dm_control==1.0.14 \
&& pip install opencv-python \
&& pip install matplotlib \
&& pip install einops \
&& pip install packaging \
&& pip install h5py \
&& pip install ipython \
&& pip install wandb \
&& cd adaptiveAC/detr && pip install -e . \
&& tail -f /dev/null"
xhost -
fi
10 changes: 6 additions & 4 deletions il.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ parameters:
chunk_size:
values: [100]
ckpt_dir:
value: "/localdata/yy/zzzzworkspace/act/ckpt/sim_transfer_cube_scripted_run3"
# value: "/localdata/yy/zzzzworkspace/act/ckpt/sim_transfer_cube_scripted_run3"
# value: "./ckpt/sim_insertion_scripted/Testchunk"
# value: "/localdata/yy/zzzzworkspace/act/ckpt/sim_insertion_human_chunk100"
# value: "/localdata/yy/zzzzworkspace/act/ckpt/sim_transfer_cube_scripted_run2"
# value: "/home/yunzhezh/adaptiveAC/wandb_ckpt/vae_transfer_cube"
value: "/home/torchuser/adaptiveAC/wandb_ckpt/vae_transfer_cube"
prediction_ckpt_dir:
value: "/home/torchuser/adaptiveAC/wandb_ckpt/vae_transfer_cube_prediction"
hidden_dim:
value: 512
batch_size:
Expand All @@ -32,11 +34,11 @@ parameters:
num_epochs:
value: 2000
num_epochs_prediction:
value: 1
value: 5
lr:
value: 1e-5
seed:
values: [1]
values: [1, 2, 3, 4, 5]
eval:
value: false
onscreen_render:
Expand Down
Loading

0 comments on commit 7284329

Please sign in to comment.