Commit 848c8fba authored by aknecht2's avatar aknecht2
Browse files

Added initial run_parser. Removed run_keys from conf since they weren't being used anyway.

parent 7be2148d
......@@ -117,12 +117,6 @@ param_keys = {
"optional": ["arguments"] + resources.keys()
# run keys
run_keys = {
"required": ["experiment", "align", "peak", "genome"],
"optional": []
# workflow order
workflow = ["align", "remove_duplicates", "peak_calling"]
from import Run
#from chipathlon.genome import Genome
class RunParser(object):
def __init__(self, run_file, mdb):
:param run_file: The path to the input run yaml file.
:type run_file: str
:param mdb: The MongoDB instance to load sample data from.
:type mdb: chipathlon.db.MongoDB
This class parses an input file to return a list of
properly instantiated Run classes
self.run_file = run_file
self.errors = []
self.runs = []
self.genomes = {}
self.parse_error = False
with open(self.run_file, "r") as rh:
self.yaml_data = yaml.load(rh)
except yaml.YAMLError as e:
self.err += "Error pasring run template file [%s]: %s.\n" % (self.run_file, e)
def is_valid(self):
Checks if the run is valid.
return len(self.errors) == 0 && not self.parse_error
def get_errors(self):
Returns the errors as a newline separated string.
return "\n".join([
"\n".join([genome.get_error_string() for tool_dict in self.genomes.values() for genome in tool_dict.values()]),
"\n".join([run.get_error_string() for run in self.runs])
def get_run(self, index):
:param index: The ith spot in the yaml file
:type index: int
return self.runs[index] if len(self.runs) > index else None
def get_runs(self):
return self.runs
def yield_runs(self):
for run in self.runs:
yield run
def get_genome(self, assembly, tool):
if assembly in self.genomes:
if tool in self.genomes[assembly]:
return self.genomes[assembly][tool]
return None
def _load_genomes(self):
Validate that the correct genomic information exists, we leave it up
to the genome class to validate the content.
if "genomes" in self.yaml_data:
for assembly, gen_data in self.yaml_data["genomes"].iteritems():
if "chrom.sizes" in gen_data:
tools = [key for key in gen_data if key != "chrom.sizes"]
if len(tools) > 0:
for tool in tools:
genome = Genome(assembly, tool, gen_data[tool], gen_data["chrom.sizes"])
if not genome.is_valid():
self.parse_error = True
if assembly not in self.genomes:
self.genomes[assembly] = {}
self.genomes[assembly][tool] = genome
self.errors.append("Error parsing run file[%s]: Genome defined under assembly [%s] has no alignment tools defined." % (self.run_file, assembly))
self.errors.append("Error parsing run file[%s]: Genome defined under assembly [%s] has no 'chrom.sizes' key." % (self.run_file, assembly))
self.errors.append("Error parsing run file [%s]: No 'genomes' key defined." % (self.run_file,))
def _load_runs(self, mdb):
Validate that the correct run information exists, we leave it up to
the run class to validate the content.
if not self.parse_error:
if "runs" in self.yaml_data:
for i, yaml_run in enumerate(self.yaml_data["runs"]):
has_req_params = True
for req_key in ["assembly", "align", "peak", "signal1", "control1", "file_type"]:
if req_key not in yaml_run:
self.errors.append("Error parsing run file[%s]: Run #%s is missing required key '%s'." % (self.run_file, i, req_key))
self.parse_error = True
has_req_params = False
if has_req_params:
run = Run(
self.get_genome(yaml_run.get("assembly"), yaml_run.get("align"))
if not run.is_valid():
self.parse_error = True
self.errors.append("Error parsing run file [%s]: No 'runs' key defined." % (self.run_file,))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment