added docker, new algo

alafumee · Nov 11, 2024 · 7284329 · 7284329
1 parent 82ca588
commit 7284329
Show file tree

Hide file tree

Showing 16 changed files with 978 additions and 71 deletions.
diff --git a/constants.py b/constants.py
@@ -1,7 +1,7 @@
 import pathlib
 
 ### Task parameters
-DATA_DIR = '/localdata/yy/datasets/aloha' # '/home/yunzhezh/adaptiveAC/data'
+DATA_DIR = '/home/torchuser/adaptiveAC/data' # '/localdata/yy/datasets/aloha' 
 SIM_TASK_CONFIGS = {
     'sim_transfer_cube_scripted':{
         'dataset_dir': DATA_DIR + '/sim_transfer_cube_scripted',

diff --git a/detr/main.py b/detr/main.py
@@ -73,6 +73,7 @@ def get_args_parser():
     # new
     parser.add_argument('--state_dim', action='store', type=int, help='state_dim', required=False)
     parser.add_argument('--action_dim', action='store', type=int, help='action_dim', required=False)
+    parser.add_argument('--prediction_ckpt_dir', action='store', type=str, help='prediction_ckpt_dir', required=False)
 
     return parser
 
@@ -99,14 +100,14 @@ def build_ACT_model_and_optimizer(args_override):
 
     return model, optimizer
 
-def build_ACT2_model_and_optimizer(args_override):
+def build_ACT2_model_and_optimizer(args_override, pred_model):
     parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()])
     args = parser.parse_args()
     for k, v in args_override.items():
         setattr(args, k, v)
     # print(args.backbone, "  args.backbone\n")
     # exit(0)
-    model = build_ACT2_model(args)
+    model = build_ACT2_model(args, pred_model)
     model.cuda()
 
     param_dicts = [

diff --git a/detr/models/__init__.py b/detr/models/__init__.py
@@ -12,5 +12,5 @@ def build_CNNMLP_model(args):
 def build_prediction_model(args):
     return build_P(args)
 
-def build_ACT2_model(args):
-    return build_act2(args)
+def build_ACT2_model(args, pred_model):
+    return build_act2(args, pred_model)
diff --git a/detr/models/detr_vae.py b/detr/models/detr_vae.py
@@ -159,10 +159,10 @@ def forward(self, qpos, image, env_state, actions=None, is_pad=None):
 
 class DETRVAE_with_model(nn.Module):
     """ This is the DETR module that performs object detection """
-    def __init__(self, backbones, transformer, encoder, state_dim, action_dim, num_queries, camera_names):
+    def __init__(self, pred_model, transformer, encoder, state_dim, action_dim, num_queries, camera_names):
         """ Initializes the model.
         Parameters:
-            backbones: torch module of the backbone to be used. See backbone.py
+            pred_model: a feature prediction model
             transformer: torch module of the transformer architecture. See transformer.py
             state_dim: robot state dimension of the environment
             num_queries: number of object queries, ie detection slot. This is the maximal number of objects
@@ -178,20 +178,20 @@ def __init__(self, backbones, transformer, encoder, state_dim, action_dim, num_q
         self.action_head = nn.Linear(hidden_dim, action_dim)
         self.is_pad_head = nn.Linear(hidden_dim, 1)
         self.query_embed = nn.Embedding(num_queries, hidden_dim)
-        if backbones is not None:
-            self.input_proj = nn.Conv2d(backbones[0].num_channels, hidden_dim, kernel_size=1)
-            self.backbones = nn.ModuleList(backbones)
-            self.input_proj_robot_state = nn.Linear(14, hidden_dim)
-            # print(self.backbones, backbones,end=" backbones\n")
-            # for param in self.backbones[0].parameters():
-            #     assert param.requires_grad == False
-                # print(param.requires_grad,end=" param\n")
-        else:
-            # input_dim = 14 + 7 # robot_state + env_state
-            self.input_proj_robot_state = nn.Linear(14, hidden_dim)
-            self.input_proj_env_state = nn.Linear(7, hidden_dim)
-            self.pos = torch.nn.Embedding(2, hidden_dim)
-            self.backbones = None
+        # if backbones is not None:
+        #     self.input_proj = nn.Conv2d(backbones[0].num_channels, hidden_dim, kernel_size=1)
+        #     self.backbones = nn.ModuleList(backbones)
+        #     self.input_proj_robot_state = nn.Linear(14, hidden_dim)
+        #     # print(self.backbones, backbones,end=" backbones\n")
+        #     # for param in self.backbones[0].parameters():
+        #     #     assert param.requires_grad == False
+        #         # print(param.requires_grad,end=" param\n")
+        # else:
+        # input_dim = 14 + 7 # robot_state + env_state
+        self.input_proj_robot_state = nn.Linear(14, hidden_dim)
+        self.input_proj_env_state = nn.Linear(7, hidden_dim)
+        self.pos = torch.nn.Embedding(2, hidden_dim)
+        self.pred_model = pred_model
 
         # encoder extra parameters
         self.latent_dim = 32 # final size of latent z # TODO tune
@@ -205,7 +205,7 @@ def __init__(self, backbones, transformer, encoder, state_dim, action_dim, num_q
         self.latent_out_proj = nn.Linear(self.latent_dim, hidden_dim) # project latent sample to embedding
         self.additional_pos_embed = nn.Embedding(2, hidden_dim) # learned position embedding for proprio and latent
 
-    def forward(self, qpos, image, env_state, actions=None, is_pad=None, predict_model=None):
+    def forward(self, qpos, image, env_state, actions=None, is_pad=None):
         """
         qpos: batch, qpos_dim
         image: batch, num_cam, channel, height, width
@@ -244,32 +244,36 @@ def forward(self, qpos, image, env_state, actions=None, is_pad=None, predict_mod
             latent_sample = torch.zeros([bs, self.latent_dim], dtype=torch.float32).to(qpos.device)
             latent_input = self.latent_out_proj(latent_sample)
         print(latent_input.shape, "  latent_input\n")
-        if self.backbones is not None:
+        if self.pred_model is not None:
+            src = self.pred_model.get_features(qpos, image)
+            print("SRC_SHAPE: ", src.shape)
+            pos = get_sinusoid_encoding_table(1, src.shape[1])
             # Image observation features and position embeddings
-            all_cam_features = []
-            all_cam_pos = []
-            for cam_id, cam_name in enumerate(self.camera_names):
-                features, pos = self.backbones[0](image[:, cam_id]) # HARDCODED
-                features = features[0] # take the last layer feature
-                pos = pos[0]
-                all_cam_features.append(self.input_proj(features))
-                print(features.shape, "  features\n")
-                print(all_cam_features[-1].shape, "  pos\n")
-                all_cam_pos.append(pos)
-            # proprioception features
-            print(self.camera_names,end=" camera_names\n")
+            # all_cam_features = []
+            # all_cam_pos = []
+            # for cam_id, cam_name in enumerate(self.camera_names):
+            #     features, pos = self.backbones[0](image[:, cam_id]) # HARDCODED
+            #     features = features[0] # take the last layer feature
+            #     pos = pos[0]
+            #     all_cam_features.append(self.input_proj(features))
+            #     print(features.shape, "  features\n")
+            #     print(all_cam_features[-1].shape, "  pos\n")
+            #     all_cam_pos.append(pos)
+            # # proprioception features
+            # print(self.camera_names,end=" camera_names\n")
             proprio_input = self.input_proj_robot_state(qpos)
-            # fold camera dimension into width dimension
-            src = torch.cat(all_cam_features, axis=3)
-            pos = torch.cat(all_cam_pos, axis=3)
-            print(src.shape, "  src\n")
-            print(pos.shape, "  pos\n")
-            print(latent_input.shape, "  latent_input\n")
-            print(proprio_input.shape, "  proprio_input\n")
-            print(self.additional_pos_embed.weight.shape, "  additional_pos_embed\n")
-            print(self.query_embed.weight.shape, "  query_embed\n")
-            print(self.transformer, "  transformer\n")
+            # # fold camera dimension into width dimension
+            # src = torch.cat(all_cam_features, axis=3)
+            # pos = torch.cat(all_cam_pos, axis=3)
+            # print(src.shape, "  src\n")
+            # print(pos.shape, "  pos\n")
+            # print(latent_input.shape, "  latent_input\n")
+            # print(proprio_input.shape, "  proprio_input\n")
+            # print(self.additional_pos_embed.weight.shape, "  additional_pos_embed\n")
+            # print(self.query_embed.weight.shape, "  query_embed\n")
+            # print(self.transformer, "  transformer\n")
             hs = self.transformer(src, None, self.query_embed.weight, pos, latent_input, proprio_input, self.additional_pos_embed.weight)[0]
+
         else:
             qpos = self.input_proj_robot_state(qpos)
             env_state = self.input_proj_env_state(env_state)
@@ -399,8 +403,25 @@ def build(args):
 
     return model
 
-def build_act2(args):
-    backbones = []
+def build_act2(args, pred_model):
+    transformer = build_transformer(args)
+
+    encoder = build_encoder(args)
+
+    model = DETRVAE_with_model(
+        pred_model,
+        transformer,
+        encoder,
+        state_dim=args.state_dim,
+        action_dim=args.action_dim,
+        num_queries=args.num_queries,
+        camera_names=args.camera_names,
+    )
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    print("number of parameters: %.2fM" % (n_parameters/1e6,))
+
+    return model
 
 def build_cnnmlp(args):
     state_dim = 14 # TODO hardcode

diff --git a/docker/.dockerignore b/docker/.dockerignore
@@ -0,0 +1,6 @@
+**/*.ckpt
+data
+ckpt
+wandb_ckpt
+wandb
+.git
diff --git a/docker/10_nvidia.json b/docker/10_nvidia.json
@@ -0,0 +1,7 @@
+{
+    "file_format_version" : "1.0.0",
+        "ICD" : {
+            "library_path" : "libEGL_nvidia.so.0"
+    }
+}
+
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -0,0 +1,76 @@
+FROM nvcr.io/nvidia/pytorch:21.09-py3
+ENV DEBIAN_FRONTEND=noninteractive 
+
+# dependencies for gym
+#
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+ libxcursor-dev \
+ libxrandr-dev \
+ libxinerama-dev \
+ libxi-dev \
+ mesa-common-dev \
+ zip \
+ unzip \
+ make \
+ gcc-8 \
+ g++-8 \
+ vulkan-utils \
+ mesa-vulkan-drivers \
+ pigz \
+ git \
+ libegl1 \
+ git-lfs
+
+# Force gcc 8 to avoid CUDA 10 build issues on newer base OS
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 8
+RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 8
+
+# WAR for eglReleaseThread shutdown crash in libEGL_mesa.so.0 (ensure it's never detected/loaded)
+# Can't remove package libegl-mesa0 directly (because of libegl1 which we need)
+RUN rm /usr/lib/x86_64-linux-gnu/libEGL_mesa.so.0 /usr/lib/x86_64-linux-gnu/libEGL_mesa.so.0.0.0 /usr/share/glvnd/egl_vendor.d/50_mesa.json
+
+COPY docker/nvidia_icd.json /usr/share/vulkan/icd.d/nvidia_icd.json
+COPY docker/10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json
+
+RUN groupadd -g 1027 torchuser
+RUN useradd -r -u 1027 -g torchuser --create-home torchuser
+
+ENV PATH="/home/torchuser/.local/bin:$PATH"
+
+# WORKDIR /home/torchuser/adaptiveAC
+
+# COPY --chown=torchuser . .
+
+WORKDIR /home/torchuser
+
+USER torchuser
+
+RUN conda create -n aloha python=3.8.10 
+
+USER root
+
+RUN chown 1027:1027 /home/torchuser/.conda/envs/.conda_envs_dir_test
+
+USER torchuser
+
+# RUN source activate aloha \
+#     && pip install torchvision \
+#     && pip install torch \
+#     && pip install pyquaternion \
+#     && pip install pyyaml \
+#     && pip install rospkg \
+#     && pip install pexpect \
+#     && pip install mujoco==2.3.7 \
+#     && pip install dm_control==1.0.14 \
+#     && pip install opencv-python \
+#     && pip install matplotlib \
+#     && pip install einops \
+#     && pip install packaging \
+#     && pip install h5py \
+#     && pip install ipython \
+#     && cd adaptiveAC/detr && pip install -e .
+# RUN cd python && pip install -q -e .
+
+
+ENV NVIDIA_VISIBLE_DEVICES=all NVIDIA_DRIVER_CAPABILITIES=all
diff --git a/docker/build.sh b/docker/build.sh
@@ -0,0 +1,6 @@
+set -e
+set -u
+SCRIPTROOT="$( cd "$(dirname "$0")" ; pwd -P )"
+cd "${SCRIPTROOT}/.."
+
+docker build --network host -t act -f docker/Dockerfile .
diff --git a/docker/enter_container.sh b/docker/enter_container.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+su_=$1
+
+if [ "$su_" = "true" ]; then
+    docker exec -it -u 0 torch_container_gui /bin/bash
+else
+    docker exec -it torch_container_gui /bin/bash
+fi
diff --git a/docker/nvidia_icd.json b/docker/nvidia_icd.json
@@ -0,0 +1,7 @@
+{
+    "file_format_version" : "1.0.0",
+    "ICD": {
+        "library_path": "libGLX_nvidia.so.0",
+        "api_version" : "1.1.95"
+    }
+}
diff --git a/docker/run.sh b/docker/run.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+set -e
+set -u
+
+if [ $# -eq 0 ]
+then
+    echo "running docker without display"
+    docker run -it --network=host --gpus=all -v ~/adaptiveAC:/home/torchuser/adaptiveAC/ \
+	--name=torch_container act /bin/bash \
+	-c "source activate aloha && pip install torchvision==0.14.0 \
+		&& pip install torch==1.13.0 \
+		&& pip install pyquaternion \
+		&& pip install pyyaml \
+		&& pip install rospkg \
+		&& pip install pexpect \
+		&& pip install mujoco==2.3.7 \
+		&& pip install dm_control==1.0.14 \
+		&& pip install opencv-python \
+		&& pip install matplotlib \
+		&& pip install einops \
+		&& pip install packaging \
+		&& pip install h5py \
+		&& pip install ipython \
+		&& pip install wandb\
+		&& cd adaptiveAC/detr && pip install -e . \
+		&& tail -f /dev/null"
+else
+    export DISPLAY=$DISPLAY
+	echo "setting display to $DISPLAY"
+	xhost +
+	docker run -it -v "$HOME/.Xauthority:/home/torchuser/.Xauthority:rw" -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=$DISPLAY \
+	--ipc=host --network=host --privileged=true \
+	-v ~/adaptiveAC:/home/torchuser/adaptiveAC/ --gpus=all --name=torch_container_gui act /bin/bash \
+	-c "source activate aloha && pip install torchvision==0.14.0 \
+		&& pip install torch==1.13.0 \
+		&& pip install pyquaternion \
+		&& pip install pyyaml \
+		&& pip install rospkg \
+		&& pip install pexpect \
+		&& pip install mujoco==2.3.7 \
+		&& pip install dm_control==1.0.14 \
+		&& pip install opencv-python \
+		&& pip install matplotlib \
+		&& pip install einops \
+		&& pip install packaging \
+		&& pip install h5py \
+		&& pip install ipython \
+		&& pip install wandb \
+		&& cd adaptiveAC/detr && pip install -e . \
+		&& tail -f /dev/null"
+	xhost -
+fi
diff --git a/il.yaml b/il.yaml
@@ -14,11 +14,13 @@ parameters:
   chunk_size:
     values: [100]
   ckpt_dir:
-    value: "/localdata/yy/zzzzworkspace/act/ckpt/sim_transfer_cube_scripted_run3"
+    # value: "/localdata/yy/zzzzworkspace/act/ckpt/sim_transfer_cube_scripted_run3"
     # value: "./ckpt/sim_insertion_scripted/Testchunk"
     # value: "/localdata/yy/zzzzworkspace/act/ckpt/sim_insertion_human_chunk100"
     # value: "/localdata/yy/zzzzworkspace/act/ckpt/sim_transfer_cube_scripted_run2"
-    # value: "/home/yunzhezh/adaptiveAC/wandb_ckpt/vae_transfer_cube"
+    value: "/home/torchuser/adaptiveAC/wandb_ckpt/vae_transfer_cube"
+  prediction_ckpt_dir:
+    value: "/home/torchuser/adaptiveAC/wandb_ckpt/vae_transfer_cube_prediction"
   hidden_dim:
     value: 512
   batch_size:
@@ -32,11 +34,11 @@ parameters:
   num_epochs:
     value: 2000
   num_epochs_prediction:
-    value: 1
+    value: 5
   lr:
     value: 1e-5
   seed:
-    values: [1]
+    values: [1, 2, 3, 4, 5]
   eval:
     value: false
   onscreen_render: