dask · mrocklin · Jul 16, 2018 · May 18, 2018 · May 18, 2018 · May 18, 2018
diff --git a/dask_jobqueue/core.py b/dask_jobqueue/core.py
@@ -1,4 +1,7 @@
+from __future__ import absolute_import, division, print_function
+
 import logging
+import math
 import os
 import shlex
 import socket
@@ -133,8 +136,11 @@ def __init__(self,
         if memory is not None:
             self._command_template += " --memory-limit %s" % memory
         if name is not None:
-            self._command_template += " --name %s" % name
-            self._command_template += "-%(n)d" # Keep %(n) to be replaced later
+            # worker names follow this template: {NAME}-{JOB_ID}-{WORKER_NUM}
+            self._command_template += " --name %s" % name  # e.g. "dask-worker"
+            # Keep %(n) to be replaced later (worker id on this job)
+            # ${JOB_ID} is an environment variable describing this job
+            self._command_template += "-${JOB_ID}-%(n)d"
         if death_timeout is not None:
             self._command_template += " --death-timeout %s" % death_timeout
         if local_directory is not None:
@@ -161,7 +167,8 @@ def job_file(self):
     def start_workers(self, n=1):
         """ Start workers and point them to our local scheduler """
         workers = []
-        for _ in range(n):
+        num_jobs = min(1, math.ceil(n / self.worker_processes))
+        for _ in range(num_jobs):
             with self.job_file() as fn:
                 out = self._call(shlex.split(self.submit_command) + [fn])
                 job = self._job_id_from_submit_output(out.decode())
@@ -196,12 +203,12 @@ def _calls(self, cmds):
         Also logs any stderr information
         """
         logger.debug("Submitting the following calls to command line")
+        procs = []
         for cmd in cmds:
             logger.debug(' '.join(cmd))
-        procs = [subprocess.Popen(cmd,
-                                  stdout=subprocess.PIPE,
-                                  stderr=subprocess.PIPE)
-                 for cmd in cmds]
+            procs.append(subprocess.Popen(cmd,
+                                          stdout=subprocess.PIPE,
+                                          stderr=subprocess.PIPE))
 
         result = []
         for proc in procs:
@@ -232,10 +239,13 @@ def scale_up(self, n, **kwargs):
 
     def scale_down(self, workers):
         ''' Close the workers with the given addresses '''
-        if isinstance(workers, dict):
-            names = {v['name'] for v in workers.values()}
-            job_ids = {name.split('-')[-2] for name in names}
-            self.stop_workers(job_ids)
+        if not isinstance(workers, dict):
+            raise ValueError(
+                'Expected dictionary of workers, got %s' % type(workers))
+        names = {v['name'] for v in workers.values()}
+        # This will close down the full group of workers
+        job_ids = {name.split('-')[-2] for name in names}
+        self.stop_workers(job_ids)
 
     def __enter__(self):
         return self

diff --git a/dask_jobqueue/pbs.py b/dask_jobqueue/pbs.py
@@ -73,7 +73,7 @@ def __init__(self,
         # Try to find a project name from environment variable
         project = project or os.environ.get('PBS_ACCOUNT')
 
-        header_lines = []
+        header_lines = ['#!/usr/bin/env bash']
         # PBS header build
         if self.name is not None:
             header_lines.append('#PBS -N %s' % self.name)
@@ -93,6 +93,7 @@ def __init__(self,
         if walltime is not None:
             header_lines.append('#PBS -l walltime=%s' % walltime)
         header_lines.extend(['#PBS %s' % arg for arg in job_extra])
+        header_lines.append('JOB_ID=${PBS_JOBID%.*}')
 
         # Declare class attribute that shall be overriden
         self.job_header = '\n'.join(header_lines)

diff --git a/dask_jobqueue/sge.py b/dask_jobqueue/sge.py
@@ -1,3 +1,5 @@
+from __future__ import absolute_import, division, print_function
+
 import logging
 
 import dask
@@ -52,8 +54,7 @@ def __init__(self,
 
         super(SGECluster, self).__init__(**kwargs)
 
-        header_lines = ['#!/bin/bash']
-
+        header_lines = ['#!/usr/bin/env bash']
         if self.name is not None:
             header_lines.append('#$ -N %(name)s')
         if queue is not None:

diff --git a/dask_jobqueue/slurm.py b/dask_jobqueue/slurm.py
@@ -71,7 +71,7 @@ def __init__(self,
         super(SLURMCluster, self).__init__(**kwargs)
 
         # Always ask for only one task
-        header_lines = []
+        header_lines = ['#!/usr/bin/env bash']
         # SLURM header build
         if self.name is not None:
             header_lines.append('#SBATCH -J %s' % self.name)
@@ -99,6 +99,7 @@ def __init__(self,
 
         if walltime is not None:
             header_lines.append('#SBATCH -t %s' % walltime)
+        header_lines.append('JOB_ID=${SLURM_JOB_ID%;*}')
         header_lines.extend(['#SBATCH %s' % arg for arg in job_extra])
 
         # Declare class attribute that shall be overriden