More style updates and comment additions.

a49f8b8f · aknecht2 · ea63700f · a49f8b8f · a49f8b8f
Commit a49f8b8f authored 8 years ago by aknecht2
--- a/chipathlon/workflow.py
+++ b/chipathlon/workflow.py
--- a/chipathlon/workflow_module.py
+++ b/chipathlon/workflow_module.py
@@ -231,7 +231,30 @@ class WorkflowModule(object):
        """
        valid, msg = self._check_params(master_files, markers, inputs, outputs)
        if valid:
-            self._traverse_jobs(dax, master_jobs, master_files, markers, inputs, outputs)
+            job_list = self.get_job_list(markers)
+            for job_dict in job_list:
+                job_name = job_dict.keys()[0]
+                job_info = job_dict[job_name]
+                # outputs is a dict!  Check the values not the keys.
+                # output files get created before a job is added, check against master_jobs instead
+                if any([output_file not in master_jobs for output_file in outputs.values()]):
+                    job_inputs = self._setup_job_params(master_files, job_name, job_info, markers, "inputs", inputs, outputs)
+                    job_outputs = self._setup_job_params(master_files, job_name, job_info, markers, "outputs", outputs, outputs)
+                    job = self.workflow_jobs[job_name].create_job(job_inputs, job_outputs)
+                    if job is not None:
+                        dax.addJob(job)
+                    else:
+                        print "JOB ERROR for '%s'.\n" % (job_dict,)
+                        raise SystemExit(1)
+                    # NOW, WE add our job to the master_list, and check dependencies
+                    # Python ALWAYS passes by reference, so even though
+                    # we are a little redundant here, dependency checking
+                    # will be easier later and not very costly
+                    for logical_name, param_dict in job_outputs.iteritems():
+                        master_jobs[param_dict["name"]] = job
+                    for logical_name, param_dict in job_inputs.iteritems():
+                        if param_dict["name"] in master_jobs:
+                            dax.depends(child=job, parent=master_jobs[param_dict["name"]])
        else:
            print msg
        return
@@ -277,7 +300,9 @@ class WorkflowModule(object):
        for input_marker in markers:
            if input_marker in self.markers:
                if markers[input_marker] not in self.markers[input_marker]:
-                    msg += "Marker '%s' does not have value '%s', should be one of %s.\n" % (input_marker, markers[input_marker], self.markers[marker])
+                    msg += "Marker '%s' does not have value '%s', should be one of %s.\n" % (
+                        input_marker, markers[input_marker], self.markers[marker]
+                    )
                    valid = False
            else:
                msg += "Marker '%s' does not exist.\n" % (input_marker,)
@@ -306,7 +331,9 @@ class WorkflowModule(object):
                if param_dict["type"] in chipathlon.conf.argument_types["file"]:
                    if arg_params[param_name] not in master_files:
                        valid = False
-                        msg += "Error loading '%s' jobs.  File with name: '%s', value: '%s' does not exist in master_files.\n" % (self.name, param_name, arg_params[param_name])
+                        msg += "Error loading '%s' jobs.  File with name: '%s', value: '%s' does not exist in master_files.\n" % (
+                            self.name, param_name, arg_params[param_name]
+                        )
            else:
                valid = False
                msg += "Error loading '%s' jobs.  Required param '%s' for argument type '%s' not defined.\n" % (self.name, param_name, arg_type)
@@ -342,49 +369,6 @@ class WorkflowModule(object):
                            })
        return (inputs, outputs)
-    def _traverse_jobs(self, dax, master_jobs, master_files, markers, inputs, outputs):
-        """
-        :param dax: The Pegasus dax object to add jobs to.
-        :type dax: Pegasus.DAX3.ADAG
-        :param master_jobs: Master job dictionary mapping job_names -> job objects.
-        :type master_jobs: dict
-        :param master_files: Master file dictionary mapping file_names -> file objects.
-        :type master_files: dict
-        :param markers: Input markers
-        :type markers: dict.
-        :param inputs: All required inputs for the given markers.
-        :type inputs: dict
-        :param outputs: All required outputs for the given markers.
-        :type outputs: dict
-        Walk through the jobs in the modules and add the jobs to the workflow.
-        """
-        job_list = self.get_job_list(markers)
-        for job_dict in job_list:
-            job_name = job_dict.keys()[0]
-            job_info = job_dict[job_name]
-            # outputs is a dict!  Check the values not the keys.
-            # output files get created before a job is added, check against master_jobs instead
-            if any([output_file not in master_jobs for output_file in outputs.values()]):
-                job_inputs = self._setup_job_params(master_files, job_name, job_info, markers, "inputs", inputs, outputs)
-                job_outputs = self._setup_job_params(master_files, job_name, job_info, markers, "outputs", outputs, outputs)
-                job = self.workflow_jobs[job_name].create_job(job_inputs, job_outputs)
-                if job is not None:
-                    dax.addJob(job)
-                else:
-                    print "JOB ERROR for '%s'.\n" % (job_dict,)
-                    raise SystemExit(1)
-                # NOW, WE add our job to the master_list, and check dependencies
-                # Python ALWAYS passes by reference, so even though
-                # we are a little redundant here, dependency checking
-                # will be easier later and not very costly
-                for logical_name, param_dict in job_outputs.iteritems():
-                    master_jobs[param_dict["name"]] = job
-                for logical_name, param_dict in job_inputs.iteritems():
-                    if param_dict["name"] in master_jobs:
-                        dax.depends(child=job, parent=master_jobs[param_dict["name"]])
-        return
    def get_all_final_results(self, markers):
        """
        :param markers: The splits to take for the current workflow.
@@ -426,9 +410,33 @@ class WorkflowModule(object):
        :type outputs: list
        This function sets up the required params for a job to run
-        successfully.  Primarily, it converts the format from the dictionary
+        successfully.  It converts all inputs from the format expected for the
-        style passed into the module, to the list style expected for job
+        module into the format expected for the individual jobs.  The format
-        creation.
+        expected by jobs is a dictionary with keys as the parameter names:
+        .. code-block:: python
+            {
+                "file_param_name": {
+                    "name": "FULL-FILE-NAME",
+                    "file": PegasusFileObject,
+                    "transfer": True/False
+                },
+                "arg_param_name": {
+                    "name": "arg_param_name",
+                    "value": ArgValue
+                },
+                "list_param_name": {
+                    "name": "list_param_name",
+                    "value": [
+                        {
+                            "name": "FULL-FILE-NAME"
+                            "file": PegasusFileObject
+                        },
+                        ...
+                    ]
+                }
+            }
        """
        param_dict = {}
        if job_info[param_type] is not None:
@@ -439,6 +447,8 @@ class WorkflowModule(object):
                # wj_param is the info contained in the workflow job yaml.
                if wj_param["type"] in chipathlon.conf.argument_types["file"]:
                    if logical_name in outputs:
+                        # This particular file is an output from a previous step!
+                        # Load its information accordingly.
                        param_dict[param_name] = {
                            "name": outputs[logical_name],
                            "file": master_files[outputs[logical_name]],
@@ -452,7 +462,6 @@ class WorkflowModule(object):
                        }
                elif wj_param["type"] in chipathlon.conf.argument_types["list"]:
                    if wj_param["type"] == "file_list":
-                        sub_list = []
                        param_dict[param_name] = {
                            "name": param_name,
                            "value": [{