From 2c68ecc7385a3211f686dc4de52713bf55dbec28 Mon Sep 17 00:00:00 2001 From: Dan Friedman <16467890+danfriedman0@users.noreply.github.com> Date: Tue, 30 Jan 2024 15:16:00 -0500 Subject: [PATCH] Add training script for ICL experiment. Add an example training script to reproduce the in-context learning experiment from the paper (see issue #4). An important detail is to set `--unembed_mask 0` (otherwise the model will be prevented from predicting the `unk`, which is used for this task). You may need to run the script with multiple seeds (e.g. 10) to get an initialization that learns to solve the task. --- scripts/induction.sh | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 scripts/induction.sh diff --git a/scripts/induction.sh b/scripts/induction.sh new file mode 100644 index 0000000..527dfc7 --- /dev/null +++ b/scripts/induction.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +VOCAB_SIZE=10 +MIN_LENGTH=9 +MAX_LENGTH=9 +SEED=6 + +echo "SEED=${SEED}"; + +python src/run.py \ + --dataset "induction" \ + --vocab_size "${VOCAB_SIZE}" \ + --dataset_size 20000 \ + --min_length "${MIN_LENGTH}" \ + --max_length "${MAX_LENGTH}" \ + --n_epochs 500 \ + --batch_size 512 \ + --lr "5e-2" \ + --gumbel_samples 1 \ + --sample_fn "gumbel_soft" \ + --tau_init 3.0 \ + --tau_end 0.01 \ + --tau_schedule "geomspace" \ + --n_vars_cat 1 \ + --n_vars_num 1 \ + --n_layers 2 \ + --n_heads_cat 1 \ + --n_heads_num 0 \ + --n_cat_mlps 0 \ + --n_num_mlps 0 \ + --attention_type "cat" \ + --rel_pos_bias "fixed" \ + --one_hot_embed \ + --count_only \ + --selector_width 0 \ + --seed "${SEED}" \ + --unique 1 \ + --unembed_mask 0 \ + --autoregressive \ + --save \ + --save_code \ + --output_dir "output/induction/s${SEED}";