diff --git a/tensorflow/viper/run_distributed_1_node_multi_gpu.slurm b/tensorflow/viper/run_distributed_1_node_multi_gpu.slurm
new file mode 100644
index 0000000000000000000000000000000000000000..b91296f65f44980f22d13d9b211e0f59261f55b9
--- /dev/null
+++ b/tensorflow/viper/run_distributed_1_node_multi_gpu.slurm
@@ -0,0 +1,32 @@
+#!/bin/bash -l
+#SBATCH -o logs/%j_multigpu.log
+#SBATCH -e logs/%j_multigpu.log
+#SBATCH -J tf_synth
+#SBATCH --nodes=1            # request a full node
+#SBATCH --ntasks-per-node=1   # only start 1 task via srun because Python multiprocessing starts more tasks internally
+#SBATCH --ntasks-per-socket=1
+#SBATCH --cpus-per-task=48    # assign all the cores to that first task to make room for Python's multiprocessing tasks
+#SBATCH --constraint="apu"
+#SBATCH --gres=gpu:2
+#SBATCH --mem=0
+#SBATCH --time=02:15:00
+
+module purge
+module load apptainer/1.4.1
+
+# Get the command line from the job info
+export SBATCH_SCRIPT_PATH=$(scontrol show job "$SLURM_JOBID" | grep -oP 'Command=\K.*' | sed -E "s|/$(echo "$SLURM_CELL" | tr '[:upper:]' '[:lower:]')||" | sed -E 's|^/u[12]/|/u/|')
+
+
+sif_file="$(dirname "$SBATCH_SCRIPT_PATH")/tf-2.16.sif"
+code_dir="$(dirname $(dirname "$SBATCH_SCRIPT_PATH"))/src"
+
+export TF_FORCE_GPU_ALLOW_GROWTH=true
+
+export batch_size_per_device="--batch_size_per_device 256"
+
+srun apptainer exec --nv $sif_file bash -c """
+  export RANK=\${SLURM_PROCID}
+
+  python $code_dir/train_synthetic.py train $batch_size_per_device
+  """
diff --git a/tensorflow/viper/run_distributed_multi_node_multi_gpu.slurm b/tensorflow/viper/run_distributed_multi_node_multi_gpu.slurm
new file mode 100644
index 0000000000000000000000000000000000000000..070f864a79fa624d025c01c995b2dfcf7f0cba17
--- /dev/null
+++ b/tensorflow/viper/run_distributed_multi_node_multi_gpu.slurm
@@ -0,0 +1,33 @@
+#!/bin/bash -l
+#SBATCH -o logs/%j_multinode.log
+#SBATCH -e logs/%j_multinode.log
+#SBATCH -J tf_synth
+#SBATCH --nodes=2           # request multiple nodes
+#SBATCH --ntasks-per-node=1   # only start 1 task via srun because Python multiprocessing starts more tasks internally
+#SBATCH --ntasks-per-socket=1
+#SBATCH --cpus-per-task=48    # assign all the cores to that first task to make room for Python's multiprocessing tasks
+#SBATCH --constraint="apu"
+#SBATCH --gres=gpu:2
+#SBATCH --mem=0
+#SBATCH --time=02:15:00
+
+module purge
+module load apptainer/1.4.1
+
+# Get the command line from the job info
+# Get the command line from the job info
+export SBATCH_SCRIPT_PATH=$(scontrol show job "$SLURM_JOBID" | grep -oP 'Command=\K.*' | sed -E "s|/$(echo "$SLURM_CELL" | tr '[:upper:]' '[:lower:]')||" | sed -E 's|^/u[12]/|/u/|')
+
+sif_file="$(dirname "$SBATCH_SCRIPT_PATH")/tf-2.16.sif"
+code_dir="$(dirname $(dirname "$SBATCH_SCRIPT_PATH"))/src"
+
+export TF_FORCE_GPU_ALLOW_GROWTH=true
+
+export batch_size_per_device="--batch_size_per_device 256"
+PRE_RUN="source ${code_dir}/set_tf_config_multiple_nodes.sh && echo \$TF_CONFIG && export RANK=\${SLURM_PROCID}"
+
+srun bash -c """
+  ${PRE_RUN} &&
+  export RANK=\${SLURM_PROCID} &&
+  apptainer exec --nv $sif_file python $code_dir/train_synthetic.py train $batch_size_per_device
+  """
diff --git a/tensorflow/viper/tf-2.16-recipe.def b/tensorflow/viper/tf-2.16-recipe.def
index 924b4e9ea1682e09efdcdc4b934b979941ab10ba..d795e9426bb6c0f5faa546a6e1a3cc77c0255948 100644
--- a/tensorflow/viper/tf-2.16-recipe.def
+++ b/tensorflow/viper/tf-2.16-recipe.def
@@ -4,6 +4,7 @@ From: rocm/tensorflow:rocm6.3.3-py3.12-tf2.16-dev
 %post
 python -m pip install --upgrade pip
 pip install ipython ipykernel
+pip install pandas
 pip install click
 
 %environment