Commit 0c514ed3 authored by aknecht2's avatar aknecht2
Browse files

Adjusted remove_duplicates generator adn workflow to enable starting from bam...

Adjusted remove_duplicates generator adn workflow to enable starting from bam files instead of fastq files.
parent 6aead68a
......@@ -138,7 +138,7 @@ class ModuleGenerator(object):
outputs = [
{"name": result.full_name, "type": "file", "file": result.pegasus_file}
]
download_job = self.workflow_jobs["download_fastq"].create_job(inputs, [], outputs)
download_job = self.workflow_jobs["download_from_encode"].create_job(inputs, [], outputs)
if download_job is not None:
self.master_files[result.full_name] = result.pegasus_file
self.dax.addJob(download_job)
......
......@@ -25,6 +25,8 @@ class RemoveDuplicatesGenerator(ModuleGenerator):
super(RemoveDuplicatesGenerator, self).__init__(dax, master_jobs, master_files, mdb, workflow_module, workflow_jobs, base_path, debug)
self.module_name = "remove_duplicates"
self.result_dict = {}
if debug:
print "[LOADING GENERATOR] RemoveDuplicatesGenerator"
return
def create_final_results(self, run):
......@@ -33,7 +35,7 @@ class RemoveDuplicatesGenerator(ModuleGenerator):
:type run: :py:class:chipathlon.run.Run
"""
module_jobs = [self.workflow_jobs[job_name] for job_name in self.module.get_job_names({})]
for align_result in run.get_results("align", "align.bam"):
for align_result in self._find_prev_results(run):
markers = dict({"remove_duplicates": {}}, **align_result.all_markers)
result = Result(
"no_dups_chr.bed",
......@@ -46,6 +48,14 @@ class RemoveDuplicatesGenerator(ModuleGenerator):
run.add_result("remove_duplicates", result)
return run.get_results("remove_duplicates", "no_dups_chr.bed")
def _find_prev_results(self, run):
if run.file_type == "fastq":
return run.get_results("align", "align.bam")
elif run.file_type == "bam":
return run.get_results("download", "encode.bam")
else:
return []
def find_prev_results(self, run, result):
"""
:param run: The target run to generate jobs for.
......@@ -53,7 +63,7 @@ class RemoveDuplicatesGenerator(ModuleGenerator):
:param result: The target result to create jobs for.
:type result: :py:class:chipathlon.result.Result
"""
align_results = run.get_results("align", "align.bam")
align_results = self._find_prev_results(run)
prev_results = []
control_accessions = result.get_accessions("control")
signal_accessions = result.get_accessions("signal")
......
......@@ -220,7 +220,8 @@ class Workflow(object):
def _generate_jobs(self):
for run in self.runs:
self.align_gen.generate(run)
if run.file_type == "fastq":
self.align_gen.generate(run)
self.remove_dup_gen.generate(run)
self.peak_call_gen.generate(run)
return
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment