Commit 2929cb03 authored by aknecht2's avatar aknecht2
Browse files

Fixed typo in remove_duplicates.yaml. Adjusted get_full_name to handle no...

Fixed typo in remove_duplicates.yaml.  Adjusted get_full_name to handle no markers.  Updated workflow to track prefixes cross module.
parent 894bffce
......@@ -11,7 +11,7 @@ remove_duplicates:
inputs:
- filtered.bam:
type: file
additional_input: null
additional_inputs: null
outputs:
- sorted.bam:
type: file
......
......@@ -292,6 +292,7 @@ class Workflow(object):
for ext in chipathlon.conf.genomes[run["align"]]["additional_files"]:
additional_files["ref_genome." + ext] = self.run_data["genomes"][run["align"]]["grch38p6_files"]["additional_files"][ext]
for pair in run["input_sets"]:
run["prefix"] = []
for file_tuple in pair:
if len(file_tuple) == 2:
markers["read_end"] = "paired"
......@@ -302,23 +303,25 @@ class Workflow(object):
markers["read_end"] = "single"
prefix = "%s_%s" % (run["experiment"], file_tuple[0]["accession"])
input_files["download_1.fastq"] = "%s.fastq.gz" % (prefix,)
# For future tools, we need to keep track
# of prefixes since all files are tracked
# by markers.
run["prefix"].append(prefix)
self.modules["align"].add_jobs(self.dax, self.jobs, self.files, prefix, markers, input_files, additional_files)
return
def _add_remove_duplicates(self):
markers = {}
for run in self.run_data["runs"]:
input_files = {}
additional_files = {}
for pair in run["input_sets"]:
for file_tuple in pair:
if len(file_tuple) == 2:
# Its a paired end read
prefix = "%s_%s_%s" % (run["experiment"], file_tuple[0]["accession"], file_tuple[1]["accession"])
else:
# It's a single end read
prefix = "%s_%s" % (run["experiment"], file_tuple[0]["accession"])
self.modules["remove_duplicates"].add_jobs(self.dax, self.jobs, self.files, prefix, markers, input_files, additional_files)
for i, file_tuple in enumerate(pair):
# Load our prefix from the previously saved
# version. Remove duplicates has no markers, so it won't
# update the prefix.
prefix = run["prefix"]
# Remove duplicates has everything it needs!
# Input files should be in master_files from
# the result of the align step
self.modules["remove_duplicates"].add_jobs(self.dax, self.jobs, self.files, prefix, {}, {}, {})
return
def _create_setup(self):
......
......@@ -238,4 +238,7 @@ class WorkflowModule(object):
return (valid_params, msg, data)
def _get_full_name(self, prefix, markers, file_name):
return "%s_%s_%s" % (prefix, "_".join([x + "_" + markers[x] for x in self.order]), file_name)
if markers:
return "%s_%s_%s" % (prefix, "_".join([x + "_" + markers[x] for x in self.order]), file_name)
else:
return "%s_%s" % (prefix, file_name)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment