Spaces:
Running
on
Zero
Running
on
Zero
set -x | |
FILE=$1 | |
CONFIG=$2 | |
GPUS=${GPUS:-8} | |
GPUS_PER_NODE=${GPUS_PER_NODE:-8} | |
CPUS_PER_TASK=${CPUS_PER_TASK:-5} | |
MASTER_PORT=${MASTER_PORT:-$((28500 + $RANDOM % 2000))} | |
PARTITION=${PARTITION:-DUMMY} | |
JOB_NAME=${JOB_NAME:-DUMMY} | |
QUOTATYPE=${QUOTATYPE:-auto} | |
SRUN_ARGS=${SRUN_ARGS:-""} | |
DEEPSPEED=${DEEPSPEED:-deepspeed_zero2} | |
PY_ARGS=${@:3} | |
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 \ | |
CUDA_HOME=${CONDA_PREFIX} \ | |
LD_LIBRARY_PATH=${CONDA_PREFIX}/lib:$(realpath ~/.local/lib) \ | |
MASTER_PORT=$MASTER_PORT \ | |
srun -p ${PARTITION} \ | |
--job-name=${JOB_NAME} \ | |
--gres=gpu:${GPUS_PER_NODE} \ | |
--ntasks=${GPUS} \ | |
--ntasks-per-node=${GPUS_PER_NODE} \ | |
--cpus-per-task=${CPUS_PER_TASK} \ | |
--kill-on-bad-exit=1 \ | |
--quotatype=${QUOTATYPE} \ | |
${SRUN_ARGS} \ | |
python -u tools/${FILE}.py ${CONFIG} --launcher="slurm" --deepspeed $DEEPSPEED ${PY_ARGS} | |