I don’t see any computations in the Python code. If you just need to execute several external programs in parallel it is sufficient to use subprocess
to run the programs and threading
module to maintain constant number of processes running, but the simplest code is using multiprocessing.Pool
:
#!/usr/bin/env python
import os
import multiprocessing as mp
def run(filename_def_param):
filename, def_param = filename_def_param # unpack arguments
... # call external program on `filename`
def safe_run(*args, **kwargs):
"""Call run(), catch exceptions."""
try: run(*args, **kwargs)
except Exception as e:
print("error: %s run(*%r, **%r)" % (e, args, kwargs))
def main():
# populate files
ws = r'D:\Data\Users\jbellino\Project\stJohnsDeepening\model\xsec_a'
workdir = os.path.join(ws, r'fieldgen\reals')
files = ((os.path.join(workdir, f), ws)
for f in os.listdir(workdir) if f.endswith('.npy'))
# start processes
pool = mp.Pool() # use all available CPUs
pool.map(safe_run, files)
if __name__=="__main__":
mp.freeze_support() # optional if the program is not frozen
main()
If there are many files then pool.map()
could be replaced by for _ in pool.imap_unordered(safe_run, files): pass
.
There is also mutiprocessing.dummy.Pool
that provides the same interface as multiprocessing.Pool
but uses threads instead of processes that might be more appropriate in this case.
You don’t need to keep some CPUs free. Just use a command that starts your executables with a low priority (on Linux it is a nice
program).
ThreadPoolExecutor
example
concurrent.futures.ThreadPoolExecutor
would be both simple and sufficient but it requires 3rd-party dependency on Python 2.x (it is in the stdlib since Python 3.2).
#!/usr/bin/env python
import os
import concurrent.futures
def run(filename, def_param):
... # call external program on `filename`
# populate files
ws = r'D:\Data\Users\jbellino\Project\stJohnsDeepening\model\xsec_a'
wdir = os.path.join(ws, r'fieldgen\reals')
files = (os.path.join(wdir, f) for f in os.listdir(wdir) if f.endswith('.npy'))
# start threads
with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
future_to_file = dict((executor.submit(run, f, ws), f) for f in files)
for future in concurrent.futures.as_completed(future_to_file):
f = future_to_file[future]
if future.exception() is not None:
print('%r generated an exception: %s' % (f, future.exception()))
# run() doesn't return anything so `future.result()` is always `None`
Or if we ignore exceptions raised by run()
:
from itertools import repeat
... # the same
# start threads
with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
executor.map(run, files, repeat(ws))
# run() doesn't return anything so `map()` results can be ignored
subprocess
+ threading
(manual pool) solution
#!/usr/bin/env python
from __future__ import print_function
import os
import subprocess
import sys
from Queue import Queue
from threading import Thread
def run(filename, def_param):
... # define exe, swt_nam
subprocess.check_call([exe, swt_nam]) # run external program
def worker(queue):
"""Process files from the queue."""
for args in iter(queue.get, None):
try:
run(*args)
except Exception as e: # catch exceptions to avoid exiting the
# thread prematurely
print('%r failed: %s' % (args, e,), file=sys.stderr)
# start threads
q = Queue()
threads = [Thread(target=worker, args=(q,)) for _ in range(8)]
for t in threads:
t.daemon = True # threads die if the program dies
t.start()
# populate files
ws = r'D:\Data\Users\jbellino\Project\stJohnsDeepening\model\xsec_a'
wdir = os.path.join(ws, r'fieldgen\reals')
for f in os.listdir(wdir):
if f.endswith('.npy'):
q.put_nowait((os.path.join(wdir, f), ws))
for _ in threads: q.put_nowait(None) # signal no more files
for t in threads: t.join() # wait for completion