From c15f9d6fc75483273349bcd48e5ec9072cf5691a Mon Sep 17 00:00:00 2001 From: "Paul P.H. Wilson" Date: Wed, 9 Jul 2014 14:04:34 -0500 Subject: [PATCH 1/8] Initial commit of code from S. Goldstein --- AUTHORS | 1 + README.md | 67 ++++++++++++++++++++++++++++-- indir/pythontest.py | 32 +++++++++++++++ queue/cleanupCHTCPython.pl | 45 ++++++++++++++++++++ queue/process.cmd | 84 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 226 insertions(+), 3 deletions(-) create mode 100644 AUTHORS create mode 100755 indir/pythontest.py create mode 100755 queue/cleanupCHTCPython.pl create mode 100644 queue/process.cmd diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..6aa6e76 --- /dev/null +++ b/AUTHORS @@ -0,0 +1 @@ +Steve Goldstein sgoldstein@wisc.edu \ No newline at end of file diff --git a/README.md b/README.md index 2451e26..0a3aa7b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,65 @@ -scipy-on-htcondor -================= +Running python+numpy+scipy on HTCondor +====================================== + +Author: Steve Goldstein sgoldstein@wisc.edu +July, 2014 + +To run python with numpy or scipy on condor, you have to ensure that +the execute node has a version of python that includes those +libraries. CHTC has solved that problem by building such a version of +python and providing a means to install that version on the execute +node. + +The purpose of pythonSubmit.tgz is to give you an easy way to +implement CHTC's solution that can be widely applied. On the CHTC web +pages, you can read about some of the details and a more general +implementation. + +Step by step instructions: + +1. Log on to the CHTC submit node: + + ssh @submit-3.chtc.wisc.edu + +2. Make a directory for your python work. + + mkdir myProject + cd myProject + +3. Copy the pythonSubmit tar file to your workspace and extract the +archive. + + wget + tar zxvf pythonSubmit.tgz + +4. a. Copy your python program to the indir/ subdirectory. + + scp :bin/myPythonProgram.py indir/ + + b. Copy any other input files to the indir/ subdirectory. + +5. Make sure the first line of the python program looks like this: + + #!/usr/bin/env python + +It should match the first line of the pythontest.py program: + + head -1 indir/*py + +6. Edit the condor submit file, following the (albeit, sparse) +directions in that file: + + nano queue/process.cmd + +7. Submit the jobs to condor: + + condor_submit queue/process.cmd + +8. When the jobs have finished, run the cleanup script: + + queue/cleanupCHTCPython.pl + +Your output will be in the outdir directory. Check the files in `chtcOutput/` +for errors. + -Tools to effectively launch SciPy based jobs on HTCondor environments diff --git a/indir/pythontest.py b/indir/pythontest.py new file mode 100755 index 0000000..0e7c210 --- /dev/null +++ b/indir/pythontest.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +import os +import platform +import sys +import time +import numpy +from scipy import sqrt, pi + +print >> sys.stderr, __doc__ +print "Version :", platform.python_version() +print "Program :", sys.executable +print 'Script :', os.path.abspath(__file__) +print 'Args :', sys.argv[1:] +print + +a = numpy.arange(10000000) +b = numpy.arange(10000000) +c = a + b + + +h1 = sqrt(pi/2) + +print a,b +print c +print h1 +print + +f = open('DONE','w') +print >>f,'all','done' +sys.exit(0) + diff --git a/queue/cleanupCHTCPython.pl b/queue/cleanupCHTCPython.pl new file mode 100755 index 0000000..849b553 --- /dev/null +++ b/queue/cleanupCHTCPython.pl @@ -0,0 +1,45 @@ +#!/usr/bin/perl -w + + +use strict; +use Carp; +use English; +use Getopt::Long; +use Cwd; + +GetOptions ( + + ); + +my $chtcNoise = "./chtcOutput"; +croak "$chtcNoise directory already exists." + if ( -e $chtcNoise); + +croak "Can't create $chtcNoise directory" + unless (mkdir $chtcNoise); + +my @filelist = + qw + ( + ChtcWrapper*out + AuditLog* + CURLTIME* + outdir/* + queue/error/* + DONE + harvest.log + CODEBLOWUP + ); + +foreach my $fileSpec (@filelist) { + `mv $fileSpec $chtcNoise 2> /dev/null`; +} + +opendir D, "." or croak "Can't read directory"; +my @outputFiles = grep {/^\d+\.\d+\.out$/} readdir D; +closedir D; + +`mv @outputFiles outdir`; + + +__END__ diff --git a/queue/process.cmd b/queue/process.cmd new file mode 100644 index 0000000..e97d59b --- /dev/null +++ b/queue/process.cmd @@ -0,0 +1,84 @@ +############### +universe = vanilla + + +################################################### +# 1. Edit these 2 lines to run your python program. +################################################### + +PythonProgram = pythontest.py +PythonProgramArguments = $(PROCESS) +################################################### + +################################################### +# 2. Edit the last line in this file +# +# queue N +# +# to run N instances of this program. +################################################### + +################################################### +# 3 . Edit these lines if appropriate +################################################### +transfer_input_files = indir/ + +# Tell Condor how many CPUs (cores), how much memory (MB) and how much +# disk space (KB) each job will need: +request_cpus = 1 +request_memory = 1000 +request_disk = 1000000 +################################################### + + +################################################### +# 4. Most of the rest of this file does not need editing +################################################### + +executable = /squid/example/ChtcRun/chtcjobwrapper +arguments = --type=Other --version=Python-2.7.3 --cmdtorun $(PythonProgram) --unique=$(CLUSTER).$(PROCESS) -- $(PythonProgramArguments) + + +output = outdir/process.$(CLUSTER).$(PROCESS).out +error = queue/error/process.$(CLUSTER).$(PROCESS).err +log = queue/process.$(CLUSTER).log + +################################################################# + +requirements = (OpSysAndVer =?= "SL6") + +should_transfer_files = YES +when_to_transfer_output = ON_EXIT + +################################################################# +# By default, your job will be submitted to the CHTC's HTCondor +# Pool only, which is good for jobs that are each less than 24 hours. +# +# If your jobs are less than 4 hours long, "flock" them additionally to +# other HTCondor pools on campus by uncommenting the below line: +#+WantFlocking = true + +# +# If your jobs are less than ~2 hours long, "glide" them to the national +# Open Science Grid (OSG) for access to even more computers and the +# fastest overall throughput. Uncomment the below line: +#+WantGlidein = true +################################################### + +# Release a job from being on hold hold after 5 minutes (300 seconds), up to 4 times, +# as long as the executable could be started, the input files and initial directory +# were accessible and the user log could be created. This will help your jobs to retry +# if they happen to fail due to a computer issue (not an issue with your job) +periodic_release = (JobStatus == 5) && ((CurrentTime - EnteredCurrentStatus) > 300) && (JobRunCount < 5) && (HoldReasonCode != 6) && (HoldReasonCode != 14) && (HoldReasonCode != 22) + +# If you want your jobs to go on hold because they are +# running longer then expected, uncomment this line and +# change from 24 hours to desired limit: +#periodic_hold = (JobStatus == 2) && ((CurrentTime - EnteredCurrentStatus) > (60 * 60 * 24)) + +# We don't want email about our jobs. +notification = never + +################################################### +## This must be the last line +queue 2 From f49c37459dd563b52342d78f4663412a8f9909c3 Mon Sep 17 00:00:00 2001 From: "Paul P.H. Wilson" Date: Wed, 9 Jul 2014 14:06:51 -0500 Subject: [PATCH 2/8] Add gitignore file to make this directory appear. --- outdir/.gitignore | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 outdir/.gitignore diff --git a/outdir/.gitignore b/outdir/.gitignore new file mode 100644 index 0000000..5e7d273 --- /dev/null +++ b/outdir/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore From d9cd58dfa41b26b08ec12c304e13681c6551f71f Mon Sep 17 00:00:00 2001 From: "Paul P.H. Wilson" Date: Wed, 9 Jul 2014 14:10:25 -0500 Subject: [PATCH 3/8] testing MD syntax... again\! --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 0a3aa7b..4c892c1 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,9 @@ Step by step instructions: 1. Log on to the CHTC submit node: +``` ssh @submit-3.chtc.wisc.edu +``` 2. Make a directory for your python work. From 0878a57af26b6471eaac7be2396338fc94fdb508 Mon Sep 17 00:00:00 2001 From: "Paul P.H. Wilson" Date: Wed, 9 Jul 2014 14:12:06 -0500 Subject: [PATCH 4/8] more testing MD syntax... again\! --- README.md | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 4c892c1..8b9800f 100644 --- a/README.md +++ b/README.md @@ -20,46 +20,62 @@ Step by step instructions: 1. Log on to the CHTC submit node: ``` - ssh @submit-3.chtc.wisc.edu +ssh @submit-3.chtc.wisc.edu ``` 2. Make a directory for your python work. - mkdir myProject - cd myProject +``` +mkdir myProject +cd myProject +``` 3. Copy the pythonSubmit tar file to your workspace and extract the archive. - wget - tar zxvf pythonSubmit.tgz +``` +wget +tar zxvf pythonSubmit.tgz +``` 4. a. Copy your python program to the indir/ subdirectory. - scp :bin/myPythonProgram.py indir/ +``` +scp :bin/myPythonProgram.py indir/ +``` b. Copy any other input files to the indir/ subdirectory. 5. Make sure the first line of the python program looks like this: - #!/usr/bin/env python +``` +#!/usr/bin/env python +``` It should match the first line of the pythontest.py program: - head -1 indir/*py +``` +head -1 indir/*py +``` -6. Edit the condor submit file, following the (albeit, sparse) -directions in that file: +6. Edit the condor submit file, following the (albeit, sparse) directions in + that file: - nano queue/process.cmd +``` +nano queue/process.cmd +``` 7. Submit the jobs to condor: - condor_submit queue/process.cmd +``` +condor_submit queue/process.cmd +``` 8. When the jobs have finished, run the cleanup script: - queue/cleanupCHTCPython.pl +``` +queue/cleanupCHTCPython.pl +``` Your output will be in the outdir directory. Check the files in `chtcOutput/` for errors. From e741733ac00b1835c440ffc995fdc78d11d97f61 Mon Sep 17 00:00:00 2001 From: "Paul P.H. Wilson" Date: Wed, 9 Jul 2014 14:19:55 -0500 Subject: [PATCH 5/8] still more formatting testing --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 8b9800f..17489f9 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,7 @@ Running python+numpy+scipy on HTCondor ====================================== -Author: Steve Goldstein sgoldstein@wisc.edu -July, 2014 +Author: Steve Goldstein sgoldstein@wisc.edu July, 2014 To run python with numpy or scipy on condor, you have to ensure that the execute node has a version of python that includes those @@ -10,7 +9,7 @@ libraries. CHTC has solved that problem by building such a version of python and providing a means to install that version on the execute node. -The purpose of pythonSubmit.tgz is to give you an easy way to +The purpose of the scipy-on-htcondor project is to give you an easy way to implement CHTC's solution that can be widely applied. On the CHTC web pages, you can read about some of the details and a more general implementation. @@ -19,9 +18,9 @@ Step by step instructions: 1. Log on to the CHTC submit node: -``` -ssh @submit-3.chtc.wisc.edu -``` + ``` + ssh @submit-3.chtc.wisc.edu + ``` 2. Make a directory for your python work. @@ -35,13 +34,14 @@ archive. ``` wget -tar zxvf pythonSubmit.tgz +tar zxvf scipy-on-htcondor.tgz ``` 4. a. Copy your python program to the indir/ subdirectory. ``` -scp :bin/myPythonProgram.py indir/ +option a:scp :bin/myPythonProgram.py indir/ +option b: scp bin/myPythonProgram.py submit-3.chtc.wisc.edu:myProject/indir ``` b. Copy any other input files to the indir/ subdirectory. From a26b9a410ac8e2d5478b9439ebc788aa5127eb1f Mon Sep 17 00:00:00 2001 From: "Paul P.H. Wilson" Date: Wed, 9 Jul 2014 14:21:45 -0500 Subject: [PATCH 6/8] still more formatting testing --- README.md | 73 +++++++++++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 17489f9..b72d1ed 100644 --- a/README.md +++ b/README.md @@ -22,60 +22,59 @@ Step by step instructions: ssh @submit-3.chtc.wisc.edu ``` -2. Make a directory for your python work. +1. Make a directory for your python work. -``` -mkdir myProject -cd myProject -``` + ``` + mkdir myProject + cd myProject + ``` -3. Copy the pythonSubmit tar file to your workspace and extract the +1. Copy the pythonSubmit tar file to your workspace and extract the archive. -``` -wget -tar zxvf scipy-on-htcondor.tgz -``` - -4. a. Copy your python program to the indir/ subdirectory. + ``` + wget + tar zxvf scipy-on-htcondor.tgz + ``` -``` -option a:scp :bin/myPythonProgram.py indir/ -option b: scp bin/myPythonProgram.py submit-3.chtc.wisc.edu:myProject/indir -``` +1. Copy your python program to the indir/ subdirectory. - b. Copy any other input files to the indir/ subdirectory. + ``` + option a:scp :bin/myPythonProgram.py indir/ + option b: scp bin/myPythonProgram.py submit-3.chtc.wisc.edu:myProject/indir + ``` +1. Copy any other input files to the indir/ subdirectory. -5. Make sure the first line of the python program looks like this: +1. Make sure the first line of the python program looks like this: -``` -#!/usr/bin/env python -``` + ``` + #!/usr/bin/env python + ``` -It should match the first line of the pythontest.py program: + It should match the first line of the pythontest.py program: -``` -head -1 indir/*py -``` + ``` + head -1 indir/*py + ``` -6. Edit the condor submit file, following the (albeit, sparse) directions in +1. Edit the condor submit file, following the (albeit, sparse) directions in that file: -``` -nano queue/process.cmd -``` + ``` + nano queue/process.cmd + ``` -7. Submit the jobs to condor: +1. Submit the jobs to condor: -``` -condor_submit queue/process.cmd -``` + ``` + condor_submit queue/process.cmd + ``` -8. When the jobs have finished, run the cleanup script: +1. When the jobs have finished, run the cleanup script: -``` -queue/cleanupCHTCPython.pl -``` + ``` + queue/cleanupCHTCPython.pl + ``` Your output will be in the outdir directory. Check the files in `chtcOutput/` for errors. From 67c3cf257f0c8494f6f9b6af551518477b0217df Mon Sep 17 00:00:00 2001 From: "Paul P.H. Wilson" Date: Wed, 9 Jul 2014 14:26:18 -0500 Subject: [PATCH 7/8] still more formatting testing --- README.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b72d1ed..672801d 100644 --- a/README.md +++ b/README.md @@ -37,15 +37,21 @@ archive. tar zxvf scipy-on-htcondor.tgz ``` -1. Copy your python program to the indir/ subdirectory. +1. Open a another terminal on your own local machine and transfer the program + to the indir/ subdirectory. + + ``` + scp /path/to/my/myPythonProgram.py @submit-3.chtc.wisc.edu:myProject/indir + ``` + +1. Transfer any other input files to the indir/ subdirectory. ``` - option a:scp :bin/myPythonProgram.py indir/ - option b: scp bin/myPythonProgram.py submit-3.chtc.wisc.edu:myProject/indir + scp /path/to/other/necessary_input_files @submit-3.chtc.wisc.edu:myProject/indir ``` -1. Copy any other input files to the indir/ subdirectory. -1. Make sure the first line of the python program looks like this: +1. Now back on the terminal in which you are logged into the CHTC submit node, + make sure the first line of the python program looks like this: ``` #!/usr/bin/env python From 0bf1d8fde744297b0d54f6234f4f0bbdbe9c4342 Mon Sep 17 00:00:00 2001 From: "Paul P.H. Wilson" Date: Sat, 4 Feb 2017 18:30:53 -0600 Subject: [PATCH 8/8] reword some readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 672801d..9ba691c 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Step by step instructions: cd myProject ``` -1. Copy the pythonSubmit tar file to your workspace and extract the +1. Copy the scipy-on-htcondor.tar.gz tar file to your workspace and extract the archive. ```