From 48ac68453231a1f958dd1eba76ef52e31d5ae667 Mon Sep 17 00:00:00 2001 From: Maria Pena-Guerrero Date: Tue, 2 Apr 2024 16:09:12 -0400 Subject: [PATCH 01/13] adding multiprocessing to docs --- .../running_pipeline_python.rst | 193 ++++++++++++++++++ 1 file changed, 193 insertions(+) diff --git a/docs/jwst/user_documentation/running_pipeline_python.rst b/docs/jwst/user_documentation/running_pipeline_python.rst index 1a7ce58128..2243048513 100644 --- a/docs/jwst/user_documentation/running_pipeline_python.rst +++ b/docs/jwst/user_documentation/running_pipeline_python.rst @@ -529,3 +529,196 @@ individual step parameter must be set when using this method, or else the coded defaults will be used, which may be inappropriate for the dataset being processed. See :ref:`call_examples` for more information. + + +.. _multiprocessing: + +Multiprocessing +=============== + +Python's multiprocessing module explicitly imports and executes a script's +`__main__` with each and every worker. If `__main__` is not present the behavior is +undefined. Hence, Python will crash unless the multiprocess code in enclosed in a +`__main__` block like this: + + +:: + + import sys + + def main(): + [code used in multiprocessing] + + if __name__ = '__main__': + sys.exit(main()) + + +There are three scenarios to use multiprocessing with the pipeline: + +1. Multiprocessing with a pipeline step. At the moment, these steps are +:ref:`jump_step `, :ref:`ramp_fitting_step `, +and :ref:`wfss_contam_step `. To enable multiprocessing the +optional parameters are `max_cores` for the ``jump`` step, and `maximum_cores` +for the ``ramp_fitting`` and ``wfss_contam`` steps. These parameters can be +set to `quarter`, `half`, `all`, or `none`, which is the default value. + +The following example turns on the step multiprocessing while setting up a log +file for each run of the pipeline and a text file with the full traceback in case +there is a crash. Notice only one of the steps has multiprocessing turned on. We +do not recommend to simultaneously enable both steps to do multiprocessing, as +this may likely incur in the system running out of memory. + + + +:: + + # SampleScript1 + + import os, sys + import traceback + import configparser + from glob import glob + from jwst.pipeline import Detector1Pipeline + + files = glob('*uncal.fits') + output_dir = '/my_project' + + def mk_stpipe_log_cfg(output_dir, log_name): + """ + Create a configuration file with the name log_name, where + the pipeline will write all output. + Args: + outpur_dir: str, path of the output directory + log_name: str, name of the log to record screen output + Returns: + nothing + """ + config = configparser.ConfigParser() + config.add_section("*") + config.set("*", "handler", "file:" + log_name) + config.set("*", "level", "INFO") + pipe_log_config = os.path.join(output_dir, "pipeline-log.cfg") + config.write(open(pipe_log_config, "w")) + + def main(): + for item in files: + fle = os.path.basename(item).replace('.fits', '') + log_name = os.path.join(output_dir, fle) + mk_stpipe_log_cfg(output_dir, log_name+'.log') + det1 = Detector1Pipeline() + parameter_dict = {"ramp_fit": {"maximum_cores": 'all'}} + pipe_success = False + print('Running Detector 1 on file: ', item) + try: + det1.call(item, save_results=True, steps=parameter_dict, output_dir=output_dir, logcfg="pipeline-log.cfg") + pipe_success = True + print('\n * Pipeline finished for file: ', item, ' \n') + except Exception: + print('\n *** OH NO! The detector1 pipeline crashed! *** \n') + pipe_crash_msg = traceback.print_exc() + if not pipe_success: + crashfile = open(log_name+'_pipecrash.txt', 'w') + print('printing file with crash message') + print(pipe_crash_msg, file=crashfile) + + print('\n * Finished multiprocessing! \n') + + if __name__ == "__main__": + main() + + +2. Calling the pipeline using multiprocessing. The following example uses this +option setting up a log file for each run of the pipeline and a text file with +the full traceback in case there is a crash. Notice that the ``import`` statement +of the pipeline is within the multiprocessing block that gets called by every +worker. This is to avoid a known memory leackage issue. + + +:: + + # SampleScript2 + + import os + import traceback + import configparser + import multiprocessing + from glob import glob + + def mk_stpipe_log_cfg(output_dir, log_name): + """ + Create a configuration file with the name log_name, where + the pipeline will write all output. + Args: + outpur_dir: str, path of the output directory + log_name: str, name of the log to record screen output + Returns: + nothing + """ + config = configparser.ConfigParser() + config.add_section("*") + config.set("*", "handler", "file:" + log_name) + config.set("*", "level", "INFO") + pipe_log_config = os.path.join(output_dir, "pipeline-log.cfg") + config.write(open(pipe_log_config, "w")) + + def run_det1(uncal_file, output_dir): + """ + Run the Detector1 pipeline on the given file. + Args: + uncal_file: str, name of uncalibrated file to run + outpur_dir: str, path of the output directory + Returns: + nothing + """ + log_name = os.path.basename(uncal_file).replace('.fits', '') + mk_stpipe_log_cfg(output_dir, log_name+'.log') + from jwst.pipeline.calwebb_detector1 import Detector1Pipeline + pipe_success = False + try: + det1 = Detector1Pipeline() + det1.call(uncal_file, output_dir=output_dir, logcfg="pipeline-log.cfg", save_results=True) + pipe_success = True + print('\n * Pipeline finished for file: ', uncal_file, ' \n') + except Exception: + print('\n *** OH NO! The detector1 pipeline crashed! *** \n') + pipe_crash_msg = traceback.print_exc() + if not pipe_success: + crashfile = open(log_name+'_pipecrash.txt', 'w') + print('Printing file with full traceback') + print(pipe_crash_msg, file=crashfile) + + def main(): + input_data_dir = '/my_project_dir' + output_dir = input_data_dir + + # get the files to run + files_to_run = glob(os.path.join(input_data_dir, '*_uncal.fits')) + print('Will run the pipeline on {} files'.format(len(files_to_run))) + + # the output list should be the same length as the files to run + outptd = [output_dir for _ in range(len(files_to_run))] + + # get the cores to use + cores2use = int(os.cpu_count()/2) # half of all available cores + print('* Using ', cores2use, ' cores for multiprocessing.') + + # set the pool and run multiprocess + p = multiprocessing.Pool(cores2use) + p.starmap(run_det1, zip(files_to_run, outptd)) + p.close() + p.join() + + if __name__ == '__main__': + multiprocessing.freeze_support() + main() + print('\n * Finished multiprocessing! \n') + + +3. Using both, calling the pipeline with multiprocessing and turning on a step +multiprocessing parameter. This scenario is the same as `SampleScript2` except +with adding and calling the parameter dictionary `parameter_dict` in +`SampleScript1`. However, this scenario will likely crash if both +multiprocessing options are set to use all the cores, hence we recommend to +set both to half or less. Nonetheless, we recommend not enabling step +multiprocessing for parallel pipeline runs to avoid potentially running +out of memory. From d320b87895cf4263d01687f364493a73f2bfd6f9 Mon Sep 17 00:00:00 2001 From: Maria Pena-Guerrero Date: Tue, 2 Apr 2024 16:15:33 -0400 Subject: [PATCH 02/13] added changes --- CHANGES.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index de15c485f4..c15a316d51 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,6 +7,8 @@ documentation - Added docs for the NIRSpec MSA metadata file to the data products area of RTD. [#8399] +- Added docs for multipreocessing. [#8408] + pipeline -------- From a2de0945606a2d539346915e9e86548612932e25 Mon Sep 17 00:00:00 2001 From: Maria Pena-Guerrero Date: Tue, 2 Apr 2024 17:05:27 -0400 Subject: [PATCH 03/13] minor edits --- docs/jwst/user_documentation/running_pipeline_python.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/jwst/user_documentation/running_pipeline_python.rst b/docs/jwst/user_documentation/running_pipeline_python.rst index 2243048513..4f2fea88e8 100644 --- a/docs/jwst/user_documentation/running_pipeline_python.rst +++ b/docs/jwst/user_documentation/running_pipeline_python.rst @@ -566,7 +566,7 @@ The following example turns on the step multiprocessing while setting up a log file for each run of the pipeline and a text file with the full traceback in case there is a crash. Notice only one of the steps has multiprocessing turned on. We do not recommend to simultaneously enable both steps to do multiprocessing, as -this may likely incur in the system running out of memory. +this may likely lead to running out of system memory. @@ -631,7 +631,7 @@ this may likely incur in the system running out of memory. option setting up a log file for each run of the pipeline and a text file with the full traceback in case there is a crash. Notice that the ``import`` statement of the pipeline is within the multiprocessing block that gets called by every -worker. This is to avoid a known memory leackage issue. +worker. This is to avoid a known memory leak. :: From 443056176e2d2e71367a09aa78477479e637eca3 Mon Sep 17 00:00:00 2001 From: Maria Date: Thu, 4 Apr 2024 09:58:12 -0400 Subject: [PATCH 04/13] Update CHANGES.rst Co-authored-by: Ned Molter --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8134a70318..a5c003ab6a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -13,7 +13,7 @@ documentation - Added docs for the NIRSpec MSA metadata file to the data products area of RTD. [#8399] -- Added docs for multipreocessing. [#8408] +- Added documentation for multiprocessing. [#8408] pipeline -------- From f76dfe227fc9ea3fb537316f46f422e695a2e0f7 Mon Sep 17 00:00:00 2001 From: Maria Pena-Guerrero Date: Thu, 4 Apr 2024 11:27:42 -0400 Subject: [PATCH 05/13] reducing code to better showcase multiprocessing in step --- .../running_pipeline_python.rst | 90 +++++++------------ 1 file changed, 30 insertions(+), 60 deletions(-) diff --git a/docs/jwst/user_documentation/running_pipeline_python.rst b/docs/jwst/user_documentation/running_pipeline_python.rst index 4f2fea88e8..b4a196d7d0 100644 --- a/docs/jwst/user_documentation/running_pipeline_python.rst +++ b/docs/jwst/user_documentation/running_pipeline_python.rst @@ -536,6 +536,13 @@ See :ref:`call_examples` for more information. Multiprocessing =============== +Multiprocessing is supported to speed up certain computationally-intensive steps +in the pipeline, including :ref:`jump_step `, +:ref:`ramp_fitting_step `, and +:ref:`wfss_contam_step `. The examples below show how +multiprocessing can be enabled for these steps, as well as how to set up +multiprocessing to simultaneously run the entire pipeline on multiple observations. + Python's multiprocessing module explicitly imports and executes a script's `__main__` with each and every worker. If `__main__` is not present the behavior is undefined. Hence, Python will crash unless the multiprocess code in enclosed in a @@ -553,20 +560,20 @@ undefined. Hence, Python will crash unless the multiprocess code in enclosed in sys.exit(main()) -There are three scenarios to use multiprocessing with the pipeline: +There are a couple of scenarios to use multiprocessing with the pipeline: -1. Multiprocessing with a pipeline step. At the moment, these steps are -:ref:`jump_step `, :ref:`ramp_fitting_step `, +1. Multiprocessing within a pipeline step. At the moment, the steps that +support this are the :ref:`jump_step `, +:ref:`ramp_fitting_step `, and :ref:`wfss_contam_step `. To enable multiprocessing the optional parameters are `max_cores` for the ``jump`` step, and `maximum_cores` for the ``ramp_fitting`` and ``wfss_contam`` steps. These parameters can be set to `quarter`, `half`, `all`, or `none`, which is the default value. -The following example turns on the step multiprocessing while setting up a log -file for each run of the pipeline and a text file with the full traceback in case -there is a crash. Notice only one of the steps has multiprocessing turned on. We -do not recommend to simultaneously enable both steps to do multiprocessing, as -this may likely lead to running out of system memory. +The following example script turns on the step multiprocessing. Notice only +one of the steps has multiprocessing turned on. We do not recommend to +simultaneously enable both steps to do multiprocessing, as this may likely +lead to running out of system memory. @@ -575,56 +582,18 @@ this may likely lead to running out of system memory. # SampleScript1 import os, sys - import traceback - import configparser - from glob import glob from jwst.pipeline import Detector1Pipeline - files = glob('*uncal.fits') + uncal_file = 'jw0000_0000_uncal.fits' output_dir = '/my_project' - def mk_stpipe_log_cfg(output_dir, log_name): - """ - Create a configuration file with the name log_name, where - the pipeline will write all output. - Args: - outpur_dir: str, path of the output directory - log_name: str, name of the log to record screen output - Returns: - nothing - """ - config = configparser.ConfigParser() - config.add_section("*") - config.set("*", "handler", "file:" + log_name) - config.set("*", "level", "INFO") - pipe_log_config = os.path.join(output_dir, "pipeline-log.cfg") - config.write(open(pipe_log_config, "w")) - def main(): - for item in files: - fle = os.path.basename(item).replace('.fits', '') - log_name = os.path.join(output_dir, fle) - mk_stpipe_log_cfg(output_dir, log_name+'.log') - det1 = Detector1Pipeline() - parameter_dict = {"ramp_fit": {"maximum_cores": 'all'}} - pipe_success = False - print('Running Detector 1 on file: ', item) - try: - det1.call(item, save_results=True, steps=parameter_dict, output_dir=output_dir, logcfg="pipeline-log.cfg") - pipe_success = True - print('\n * Pipeline finished for file: ', item, ' \n') - except Exception: - print('\n *** OH NO! The detector1 pipeline crashed! *** \n') - pipe_crash_msg = traceback.print_exc() - if not pipe_success: - crashfile = open(log_name+'_pipecrash.txt', 'w') - print('printing file with crash message') - print(pipe_crash_msg, file=crashfile) - - print('\n * Finished multiprocessing! \n') + det1 = Detector1Pipeline() + parameter_dict = {"ramp_fit": {"maximum_cores": 'all'}} + det1.call(uncal_file, save_results=True, steps=parameter_dict, output_dir=output_dir) - if __name__ == "__main__": - main() + if __name__ = '__main__': + sys.exit(main()) 2. Calling the pipeline using multiprocessing. The following example uses this @@ -714,11 +683,12 @@ worker. This is to avoid a known memory leak. print('\n * Finished multiprocessing! \n') -3. Using both, calling the pipeline with multiprocessing and turning on a step -multiprocessing parameter. This scenario is the same as `SampleScript2` except -with adding and calling the parameter dictionary `parameter_dict` in -`SampleScript1`. However, this scenario will likely crash if both -multiprocessing options are set to use all the cores, hence we recommend to -set both to half or less. Nonetheless, we recommend not enabling step -multiprocessing for parallel pipeline runs to avoid potentially running -out of memory. +.. warning:: + Allthough it is technically possible to call the pipeline with + multiprocessing as well as turning on a step multiprocessing parameter, we + strongly recommend not to do this. This scenario would be the same as + `SampleScript2` except with adding and calling the parameter dictionary + `parameter_dict` in `SampleScript1`. However, this scenario will likely + crash if both multiprocessing options are set to use all the cores or less. + We recommend not enabling step multiprocessing for parallel pipeline runs + to avoid potentially running out of memory. From 8f6248fa3c787450a8093a32095650698229b247 Mon Sep 17 00:00:00 2001 From: Maria Pena-Guerrero Date: Thu, 4 Apr 2024 12:02:40 -0400 Subject: [PATCH 06/13] cleaning up wording --- .../user_documentation/running_pipeline_python.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/jwst/user_documentation/running_pipeline_python.rst b/docs/jwst/user_documentation/running_pipeline_python.rst index b4a196d7d0..38618dab34 100644 --- a/docs/jwst/user_documentation/running_pipeline_python.rst +++ b/docs/jwst/user_documentation/running_pipeline_python.rst @@ -570,7 +570,7 @@ optional parameters are `max_cores` for the ``jump`` step, and `maximum_cores` for the ``ramp_fitting`` and ``wfss_contam`` steps. These parameters can be set to `quarter`, `half`, `all`, or `none`, which is the default value. -The following example script turns on the step multiprocessing. Notice only +The following example turns on a step's multiprocessing option. Notice only one of the steps has multiprocessing turned on. We do not recommend to simultaneously enable both steps to do multiprocessing, as this may likely lead to running out of system memory. @@ -684,11 +684,11 @@ worker. This is to avoid a known memory leak. .. warning:: - Allthough it is technically possible to call the pipeline with - multiprocessing as well as turning on a step multiprocessing parameter, we + Although it is technically possible to call the pipeline with + multiprocessing while also enabling this option in a step, we strongly recommend not to do this. This scenario would be the same as `SampleScript2` except with adding and calling the parameter dictionary - `parameter_dict` in `SampleScript1`. However, this scenario will likely - crash if both multiprocessing options are set to use all the cores or less. - We recommend not enabling step multiprocessing for parallel pipeline runs - to avoid potentially running out of memory. + `parameter_dict` in `SampleScript1`. However, it will likely + crash if both multiprocessing options are set to use all the cores or even + less. We recommend not enabling step multiprocessing for parallel pipeline + runs to avoid potentially running out of memory. From 109381b62d2107e3637f5e90bccfb33a278a6d8d Mon Sep 17 00:00:00 2001 From: Maria Date: Fri, 19 Apr 2024 08:46:46 -0400 Subject: [PATCH 07/13] Update docs/jwst/user_documentation/running_pipeline_python.rst Co-authored-by: Howard Bushouse --- docs/jwst/user_documentation/running_pipeline_python.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/jwst/user_documentation/running_pipeline_python.rst b/docs/jwst/user_documentation/running_pipeline_python.rst index 38618dab34..a9f980fbc3 100644 --- a/docs/jwst/user_documentation/running_pipeline_python.rst +++ b/docs/jwst/user_documentation/running_pipeline_python.rst @@ -537,9 +537,9 @@ Multiprocessing =============== Multiprocessing is supported to speed up certain computationally-intensive steps -in the pipeline, including :ref:`jump_step `, -:ref:`ramp_fitting_step `, and -:ref:`wfss_contam_step `. The examples below show how +in the pipeline, including the :ref:`jump detection `, +:ref:`ramp fitting `, and +:ref:`WFSS contamination correction ` steps. The examples below show how multiprocessing can be enabled for these steps, as well as how to set up multiprocessing to simultaneously run the entire pipeline on multiple observations. From 49eb61ab22a171848c6ea18765109810ebfe7cf6 Mon Sep 17 00:00:00 2001 From: Maria Date: Fri, 19 Apr 2024 08:48:05 -0400 Subject: [PATCH 08/13] Update docs/jwst/user_documentation/running_pipeline_python.rst Co-authored-by: Howard Bushouse --- docs/jwst/user_documentation/running_pipeline_python.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/jwst/user_documentation/running_pipeline_python.rst b/docs/jwst/user_documentation/running_pipeline_python.rst index a9f980fbc3..5fbc0fcdd6 100644 --- a/docs/jwst/user_documentation/running_pipeline_python.rst +++ b/docs/jwst/user_documentation/running_pipeline_python.rst @@ -563,9 +563,9 @@ undefined. Hence, Python will crash unless the multiprocess code in enclosed in There are a couple of scenarios to use multiprocessing with the pipeline: 1. Multiprocessing within a pipeline step. At the moment, the steps that -support this are the :ref:`jump_step `, -:ref:`ramp_fitting_step `, -and :ref:`wfss_contam_step `. To enable multiprocessing the +support this are the :ref:`jump `, +:ref:`ramp_fitting `, +and :ref:`wfss_contam ` steps. To enable multiprocessing, the optional parameters are `max_cores` for the ``jump`` step, and `maximum_cores` for the ``ramp_fitting`` and ``wfss_contam`` steps. These parameters can be set to `quarter`, `half`, `all`, or `none`, which is the default value. From 79e8a62a055a1082687a9a9d8d65a7a09edfc994 Mon Sep 17 00:00:00 2001 From: Maria Date: Fri, 19 Apr 2024 08:48:24 -0400 Subject: [PATCH 09/13] Update docs/jwst/user_documentation/running_pipeline_python.rst Co-authored-by: Howard Bushouse --- docs/jwst/user_documentation/running_pipeline_python.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/jwst/user_documentation/running_pipeline_python.rst b/docs/jwst/user_documentation/running_pipeline_python.rst index 5fbc0fcdd6..f9b741cc8e 100644 --- a/docs/jwst/user_documentation/running_pipeline_python.rst +++ b/docs/jwst/user_documentation/running_pipeline_python.rst @@ -571,8 +571,8 @@ for the ``ramp_fitting`` and ``wfss_contam`` steps. These parameters can be set to `quarter`, `half`, `all`, or `none`, which is the default value. The following example turns on a step's multiprocessing option. Notice only -one of the steps has multiprocessing turned on. We do not recommend to -simultaneously enable both steps to do multiprocessing, as this may likely +one of the steps has multiprocessing turned on. We do not recommend +simultaneously enabling both steps to do multiprocessing, as this may likely lead to running out of system memory. From 284021d687bf2ca9f9fc50f8a44599fc13868cc8 Mon Sep 17 00:00:00 2001 From: Maria Pena-Guerrero Date: Fri, 19 Apr 2024 09:41:29 -0400 Subject: [PATCH 10/13] added suggested changes --- .../running_pipeline_python.rst | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/jwst/user_documentation/running_pipeline_python.rst b/docs/jwst/user_documentation/running_pipeline_python.rst index f9b741cc8e..bb84130eb7 100644 --- a/docs/jwst/user_documentation/running_pipeline_python.rst +++ b/docs/jwst/user_documentation/running_pipeline_python.rst @@ -566,9 +566,10 @@ There are a couple of scenarios to use multiprocessing with the pipeline: support this are the :ref:`jump `, :ref:`ramp_fitting `, and :ref:`wfss_contam ` steps. To enable multiprocessing, the -optional parameters are `max_cores` for the ``jump`` step, and `maximum_cores` -for the ``ramp_fitting`` and ``wfss_contam`` steps. These parameters can be -set to `quarter`, `half`, `all`, or `none`, which is the default value. +optional parameter is `maximum_cores` for the ``jump``, ``ramp_fitting``, and +``wfss_contam`` steps. This parameter can be set to a numerical value given +as a string or it can be set to the words `quarter`, `half`, `all`, +or `none`, which is the default value. The following example turns on a step's multiprocessing option. Notice only one of the steps has multiprocessing turned on. We do not recommend @@ -607,7 +608,7 @@ worker. This is to avoid a known memory leak. # SampleScript2 - import os + import os, sys import traceback import configparser import multiprocessing @@ -672,23 +673,22 @@ worker. This is to avoid a known memory leak. print('* Using ', cores2use, ' cores for multiprocessing.') # set the pool and run multiprocess - p = multiprocessing.Pool(cores2use) - p.starmap(run_det1, zip(files_to_run, outptd)) - p.close() - p.join() + with multiprocessing.Pool(cores2use) as pool: + pool.starmap(run_det1, zip(files_to_run, outptd)) - if __name__ == '__main__': - multiprocessing.freeze_support() - main() print('\n * Finished multiprocessing! \n') + if __name__ == '__main__': + sys.exit(main()) + .. warning:: Although it is technically possible to call the pipeline with multiprocessing while also enabling this option in a step, we strongly recommend not to do this. This scenario would be the same as `SampleScript2` except with adding and calling the parameter dictionary - `parameter_dict` in `SampleScript1`. However, it will likely - crash if both multiprocessing options are set to use all the cores or even - less. We recommend not enabling step multiprocessing for parallel pipeline + `parameter_dict` in `SampleScript1`. However, Python will crash + if both multiprocessing options are set to use all the cores or even + less, because it is not permitted that a worker has children processes. + We recommend not enabling step multiprocessing for parallel pipeline runs to avoid potentially running out of memory. From ccfa560c19d355cdf375fb4d517f3855867838a5 Mon Sep 17 00:00:00 2001 From: Maria Date: Mon, 22 Apr 2024 09:44:37 -0400 Subject: [PATCH 11/13] Update docs/jwst/user_documentation/running_pipeline_python.rst Co-authored-by: Brett Graham --- .../user_documentation/running_pipeline_python.rst | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/docs/jwst/user_documentation/running_pipeline_python.rst b/docs/jwst/user_documentation/running_pipeline_python.rst index bb84130eb7..64429ebd92 100644 --- a/docs/jwst/user_documentation/running_pipeline_python.rst +++ b/docs/jwst/user_documentation/running_pipeline_python.rst @@ -543,21 +543,12 @@ in the pipeline, including the :ref:`jump detection `, multiprocessing can be enabled for these steps, as well as how to set up multiprocessing to simultaneously run the entire pipeline on multiple observations. -Python's multiprocessing module explicitly imports and executes a script's -`__main__` with each and every worker. If `__main__` is not present the behavior is -undefined. Hence, Python will crash unless the multiprocess code in enclosed in a -`__main__` block like this: - +Since the pipeline uses multiprocessing it is critical that any code using the pipeline adhere to the guidelines described in the `python multiprocessing documentation `_. The pipeline uses the `forkserver` start method internally and it is recommended that any multiprocessing scripts that use the pipline use the same start. As detailed in the `python documentation `_ this will require that code be "protected" with a ``if __name__ == '__main__':`` check as follows :: - import sys - - def main(): - [code used in multiprocessing] - if __name__ = '__main__': - sys.exit(main()) + [code used in multiprocessing] There are a couple of scenarios to use multiprocessing with the pipeline: From 7e1f8cecbdc2753b3b2f3abf6e62917bdc9dfc4c Mon Sep 17 00:00:00 2001 From: Howard Bushouse Date: Mon, 22 Apr 2024 11:33:58 -0400 Subject: [PATCH 12/13] Update docs/jwst/user_documentation/running_pipeline_python.rst --- docs/jwst/user_documentation/running_pipeline_python.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/jwst/user_documentation/running_pipeline_python.rst b/docs/jwst/user_documentation/running_pipeline_python.rst index 64429ebd92..d769e91bf8 100644 --- a/docs/jwst/user_documentation/running_pipeline_python.rst +++ b/docs/jwst/user_documentation/running_pipeline_python.rst @@ -543,7 +543,13 @@ in the pipeline, including the :ref:`jump detection `, multiprocessing can be enabled for these steps, as well as how to set up multiprocessing to simultaneously run the entire pipeline on multiple observations. -Since the pipeline uses multiprocessing it is critical that any code using the pipeline adhere to the guidelines described in the `python multiprocessing documentation `_. The pipeline uses the `forkserver` start method internally and it is recommended that any multiprocessing scripts that use the pipline use the same start. As detailed in the `python documentation `_ this will require that code be "protected" with a ``if __name__ == '__main__':`` check as follows +Since the pipeline uses multiprocessing it is critical that any code using the pipeline adhere to the guidelines +described in the `python multiprocessing documentation +`_. +The pipeline uses the `forkserver` start method internally and it is recommended that any +multiprocessing scripts that use the pipline use the same start. As detailed in the +`python documentation `_ +this will require that code be "protected" with a ``if __name__ == '__main__':`` check as follows :: From 1baa4c886297de3a827f49aa41346e4eed80786f Mon Sep 17 00:00:00 2001 From: Howard Bushouse Date: Mon, 22 Apr 2024 11:34:55 -0400 Subject: [PATCH 13/13] Update docs/jwst/user_documentation/running_pipeline_python.rst --- docs/jwst/user_documentation/running_pipeline_python.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/jwst/user_documentation/running_pipeline_python.rst b/docs/jwst/user_documentation/running_pipeline_python.rst index d769e91bf8..6d27cd0908 100644 --- a/docs/jwst/user_documentation/running_pipeline_python.rst +++ b/docs/jwst/user_documentation/running_pipeline_python.rst @@ -543,9 +543,9 @@ in the pipeline, including the :ref:`jump detection `, multiprocessing can be enabled for these steps, as well as how to set up multiprocessing to simultaneously run the entire pipeline on multiple observations. -Since the pipeline uses multiprocessing it is critical that any code using the pipeline adhere to the guidelines -described in the `python multiprocessing documentation -`_. +Since the pipeline uses multiprocessing it is critical that any code using the pipeline adhere +to the guidelines described in the +`python multiprocessing documentation `_. The pipeline uses the `forkserver` start method internally and it is recommended that any multiprocessing scripts that use the pipline use the same start. As detailed in the `python documentation `_