Commit abbd6622 authored by aknecht2's avatar aknecht2
Browse files

New run file / genomes now successfully loaded into workflow.

parent 442db394
......@@ -52,7 +52,7 @@ class Genome(object):
return self.files.get("base_file")
def get_chrom_sizes(self):
return self.files.get("chrom_sizes")
return self.files.get("chrom.sizes")
def get_additional_files(self):
return self.files.get("additional_files")
......@@ -60,15 +60,18 @@ class Genome(object):
def get_chr_fasta_files(self):
return self.files.get("chr_fasta")
def get_all_files(self):
return [self.get_base_file(), self.get_chrom_sizes()] + self.get_additional_files() + self.get_chr_fasta_files()
def _load_prefixes(self):
self.file_prefix = "genome_%s_%s" % (self.assembly, self.tool)
self.add_file_prefix = self.file_prefix if self.tool == "bowtie2" else self.file_prefix + os.path.splitext(self.base_file)[1]
bowtie2_path_prefix, base_ext = os.path.splitext(self.base_file)
self.base_ext = base_ext
self.base_ext = base_ext[1:]
self.path_prefix = bowtie2_path_prefix if self.tool == "bowtie2" else self.base_file
return
def _add_file(self, name, path, gen_file_type="base_file"):
def _add_file(self, name, path, gen_file_type):
"""
Adds a file to the internal files list. Files are indexed
by type. There should only be a single base_file / chrom.sizes
......@@ -123,4 +126,5 @@ class Genome(object):
for f in files:
if f.startswith("chr"):
self._add_file("%s_%s" % (self.add_file_prefix, f), root + "/" + f, "chr_fasta")
break
return
......@@ -34,7 +34,7 @@ class RunParser(object):
"""
return len(self.errors) == 0 and not self.parse_error
def get_errors(self):
def get_error_string(self):
"""
Returns the errors as a newline separated string.
"""
......@@ -64,6 +64,9 @@ class RunParser(object):
return self.genomes[assembly][tool]
return None
def get_genomes(self):
return [genome for gen_info in self.genomes.values() for genome in gen_info.values()]
def _load_genomes(self):
"""
Validate that the correct genomic information exists, we leave it up
......
......@@ -13,6 +13,7 @@ import chipathlon.workflow_job
import chipathlon.db
import chipathlon.workflow_module
import chipathlon.run_data
from chipathlon.run_parser import RunParser
from chipathlon.download_generator import DownloadGenerator
from chipathlon.align_generator import AlignGenerator
from chipathlon.remove_duplicates_generator import RemoveDuplicatesGenerator
......@@ -21,7 +22,6 @@ import random
from pprint import pprint
from Pegasus.DAX3 import *
class Workflow(object):
def __init__(self, jobhome, run_file, param_file, config_file, host, username, password, rewrite=False, debug=False):
......@@ -97,6 +97,11 @@ class Workflow(object):
self.dax.addFile(f)
return
def _add_genome_file(self, gen_file_obj):
self.files[gen_file_obj["name"]] = gen_file_obj["file"]
self.dax.addFile(gen_file_obj["file"])
return
def _remove_job(self, name):
"""
We don't delete out of master_jobs list so we can keep track of
......@@ -174,9 +179,15 @@ class Workflow(object):
return
def _load_run_data(self):
self.run_data = chipathlon.run_data.RunData(self.run_file, self.mdb)
for genome_file in self.run_data.file_list:
self._add_file(genome_file["name"], genome_file["path"], genome_file["site"])
self.run_parser = RunParser(self.run_file, self.mdb)
if self.run_parser.is_valid():
for genome in self.run_parser.get_genomes():
for gen_file_obj in genome.get_all_files():
self._add_genome_file(gen_file_obj)
self.runs = self.run_parser.get_runs()
else:
self.err += self.run_parser.get_error_string()
self._raise()
return
def _download_from_gridfs(self, gridfs_id, output_file):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment