import os
from functools import partial
import ipyparallel as ipp
cluster = ipp.Cluster(engines="mpi", n=4)
def custom_error_message(launcher, stop_data):
"""log a custom error message when engines stop"""
for line in launcher.get_output().splitlines():
if 'EXIT CODE:' in line:
print("!!!!!!!!!!!!!Looks like your engine existed with:", line.split(":", 1)[1])
with cluster as rc:
cluster.engine_set.on_stop(partial(custom_error_message, cluster.engine_set))
rc[0].apply_sync(os._exit, -1)
Starting 4 engines with <class 'ipyparallel.cluster.launcher.MPIEngineSetLauncher'>
0%| | 0/4 [00:00<?, ?engine/s]
Stopping engine(s): 1650530859 mpiexec error output: =================================================================================== = BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES = PID 92567 RUNNING AT heavy.local = EXIT CODE: 9 = CLEANING UP REMAINING PROCESSES = YOU CAN IGNORE THE BELOW CLEANUP MESSAGES =================================================================================== YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Killed: 9 (signal 9) engine set stopped 1650530859: {'exit_code': 9, 'pid': 92564, 'identifier': 'ipengine-1650530858-8qah-1650530859-92105'} !!!!!!!!!!!!!Looks like your engine existed with: 9 Stopping controller Controller stopped: {'exit_code': 0, 'pid': 92546, 'identifier': 'ipcontroller-1650530858-8qah-92105'}
[Engine Exception] Traceback (most recent call last): File "/Users/minrk/conda/lib/python3.9/site-packages/ipyparallel/client/client.py", line 894, in _handle_stranded_msgs raise error.EngineError( ipyparallel.error.EngineError: Engine 0 died while running task '1bcf4fdb-a86d7b6c0537fcd454fea0ea_92105_1'