-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrun_ubelix_job_epyc2.sh
48 lines (40 loc) · 1.79 KB
/
run_ubelix_job_epyc2.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/bin/bash
#SBATCH --job-name="LawInstruct"
#SBATCH --mail-user=joel.niklaus@inf.unibe.ch
#SBATCH --mail-type=end,fail
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --time=4-00:00:00
#SBATCH --mem=64GB
#SBATCH --cpus-per-task=1
#SBATCH --qos=job_epyc2
#SBATCH --partition=epyc2
# Put your code below this line
cd /storage/workspaces/inf_fdn/hpc_nfp77/joel/LawInstruct
conda activate lawinstruct
export HF_DATASETS_CACHE="/storage/workspaces/inf_fdn/hpc_nfp77/joel/.cache"
module load git-lfs/2.4.2
# Define the language modes and instruction bank sizes
language_modes=("multilingual") # ("english" "multilingual")
instruction_bank_sizes=(10) # (1 2 5 10)
# Loop over the language modes and instruction bank sizes
for language_mode in "${language_modes[@]}"
do
for instruction_bank_size in "${instruction_bank_sizes[@]}"
do
# Invoke the Python script with the desired arguments
echo "Building datasets for language mode $language_mode and instruction bank size $instruction_bank_size"
python build_instruction_datasets.py --language_mode "$language_mode" --instruction_bank_size "$instruction_bank_size" --datasets legal
#--build_from_scratch
done
done
# xz --list data/*.xz
# python build_num_shards_dict.py
# example: python build_instruction_datasets.py --language_mode english --instruction_bank_size 10 --datasets legal --build_from_scratch
# IMPORTANT:
# Run with sbatch run_ubelix_job.sh
# check with squeue --user=jn20t930 --jobs={job_id}
# monitor with scontrol show --detail jobid {job_id}
# cancel with scancel {job_id}
# monitor gpu usage with ssh gnode14 and then nvidia-smi
# run interactive job with srun --partition=epyc2 --qos=job_epyc2 --mem=64G --cpus-per-task=16 --time=02:00:00 --pty /bin/bash