Commit e30e3fec authored by Marcel Henrik Schubert's avatar Marcel Henrik Schubert
Browse files

fixed shell script; added 80 core processing

parent c23834fb
......@@ -843,8 +843,13 @@ def _main(args):
manager = mp.Manager()
#create a two queues to split work into two parts
q = [manager.Queue(), manager.Queue()]
pool = mp.Pool(4, maxtasksperchild=1)
if not args['test']:
ncpus = mp.cpu_count()
if ncpus < 80:
print('failed to get 80 cpus - only got '.format(ncpus))
else:
ncpus = 4
pool = mp.Pool(ncpus, maxtasksperchild=1)
#pool = mp.Pool(mp.cpu_count(), maxtasksperchild=1)
print('create listener for saving of data...')
sys.stdout.flush()
......
......@@ -41,7 +41,7 @@ module load scikit-learn/0.24.1
names=(creator performer sports)
for i in ${names[@]}; do
# Run the program:
srun -N 1 -n 1 python preprocess.py -p ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31 -f workset_$i.ndjson -s ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31/preprocessed -c (1,5) -w (1,2) -t (1,3) -d (1,5) -o (1,3) --workset=workset --part=$i --rerun --both --asis --spacy --encase_list emoji emoticon
srun -N 1 -n 1 python preprocess.py -p ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31 -f workset_$i.ndjson -s ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31/preprocessed -c "(1,5)" -w "(1,2)" -t "(1,3)" -d "(1,5)" -o "(1,3)" --workset=workset --part=$i --rerun --both --asis --spacy --encase_list emoji emoticon
done
wait
echo "job finished"
#!/bin/bash -l
typ=creator
# Standard output and error:
#SBATCH -o ./../../jobscripts/out/preprocess_${typ}_out.%j
#SBATCH -e ./../../jobscripts/out/preprocess_${typ}_err_part_2.%j
#SBATCH -e ./../../jobscripts/out/preprocess_${typ}.%j
# Initial working directory:
#SBATCH -D /ptmp/mschuber/PAN/Scripts/Preprocessing
# Job Name:
......@@ -36,10 +36,10 @@ module load anaconda/3/2020.02
module load tensorflow/cpu/2.5.0
module load scikit-learn/0.24.1
typ=creator
python preprocess.py -p ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31 -f workset_${typ}.ndjson -s ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31/preprocessed -c "(1,5)" -w "(1,2)" -t "(1,3)" -d "(1,5)" -o "(1,3)" --workset=workset --part=${typ} --rerun --both --asis --spacy --encase_list emoji emoticon
srun python preprocess.py -p ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31 -f workset_${typ}.ndjson -s ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31/preprocessed -c "(1,5)" -w "(1,2)" -t "(1,3)" -d "(1,5)" -o "(1,3)" --workset=workset --part=${typ} --rerun --both --asis --spacy --encase_list emoji emoticon
echo "job finished"
\ No newline at end of file
#!/bin/bash -l
typ=manager
# Standard output and error:
#SBATCH -o ./../../jobscripts/out/preprocess_${typ}_out.%j
#SBATCH -e ./../../jobscripts/out/preprocess_${typ}_err_part_2.%j
#SBATCH -e ./../../jobscripts/out/preprocess_${typ}_err.%j
# Initial working directory:
#SBATCH -D /ptmp/mschuber/PAN/Scripts/Preprocessing
# Job Name:
......@@ -38,8 +38,8 @@ module load scikit-learn/0.24.1
typ=manager
python preprocess.py -p ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31 -f workset_${typ}.ndjson -s ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31/preprocessed -c "(1,5)" -w "(1,2)" -t "(1,3)" -d "(1,5)" -o "(1,3)" --workset=workset --part=${typ} --rerun --both --asis --spacy --encase_list emoji emoticon
srun python preprocess.py -p ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31 -f workset_${typ}.ndjson -s ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31/preprocessed -c "(1,5)" -w "(1,2)" -t "(1,3)" -d "(1,5)" -o "(1,3)" --workset=workset --part=${typ} --rerun --both --asis --spacy --encase_list emoji emoticon
echo "job finished"
\ No newline at end of file
#!/bin/bash -l
typ=performer
# Standard output and error:
#SBATCH -o ./../../jobscripts/out/preprocess_${typ}_out.%j
#SBATCH -e ./../../jobscripts/out/preprocess_${typ}_err_part_2.%j
#SBATCH -e ./../../jobscripts/out/preprocess_${typ}_err.%j
# Initial working directory:
#SBATCH -D /ptmp/mschuber/PAN/Scripts/Preprocessing
# Job Name:
......@@ -20,9 +19,10 @@ typ=performer
#
#SBATCH --mem=180000
#SBATCH --mail-type=none
#SBATCH --mail-user=schubert@coll.mpg.de
#SBATCH --mail-user=<userid>@coll.mpg.de
#
# Wall clock limit:
#SBATCH --time=24:00:00
#SBATCH --time 24:00:00
##export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}
# For pinning threads correctly:
......@@ -36,10 +36,10 @@ module load anaconda/3/2020.02
module load tensorflow/cpu/2.5.0
module load scikit-learn/0.24.1
typ=performer
python preprocess.py -p ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31 -f workset_${typ}.ndjson -s ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31/preprocessed -c "(1,5)" -w "(1,2)" -t "(1,3)" -d "(1,5)" -o "(1,3)" --workset=workset --part=${typ} --rerun --both --asis --spacy --encase_list emoji emoticon
srun python preprocess.py -p ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31 -f workset_${typ}.ndjson -s ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31/preprocessed -c "(1,5)" -w "(1,2)" -t "(1,3)" -d "(1,5)" -o "(1,3)" --workset=workset --part=${typ} --rerun --both --asis --spacy --encase_list emoji emoticon
echo "job finished"
\ No newline at end of file
#!/bin/bash -l
typ=sports
# Standard output and error:
#SBATCH -o ./../../jobscripts/out/preprocess_${typ}_out.%j
#SBATCH -e ./../../jobscripts/out/preprocess_${typ}_err_part_2.%j
#SBATCH -e ./../../jobscripts/out/preprocess_${typ}_err.%j
# Initial working directory:
#SBATCH -D /ptmp/mschuber/PAN/Scripts/Preprocessing
# Job Name:
......@@ -37,9 +37,9 @@ module load tensorflow/cpu/2.5.0
module load scikit-learn/0.24.1
typ=sports
python preprocess.py -p ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31 -f workset_${typ}.ndjson -s ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31/preprocessed -c "(1,5)" -w "(1,2)" -t "(1,3)" -d "(1,5)" -o "(1,3)" --workset=workset --part=${typ} --rerun --both --asis --spacy --encase_list emoji emoticon
srun python preprocess.py -p ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31 -f workset_${typ}.ndjson -s ../../Data/pan19-celebrity-profiling-training-dataset-2019-01-31/preprocessed -c "(1,5)" -w "(1,2)" -t "(1,3)" -d "(1,5)" -o "(1,3)" --workset=workset --part=${typ} --rerun --both --asis --spacy --encase_list emoji emoticon
echo "job finished"
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment