Commit dad2ad96 authored by aknecht2's avatar aknecht2
Browse files

Update to conf & file_extensions handling. Updated file type validation.

parent 6d73d6b3
......@@ -9,11 +9,13 @@ job_wrappers = "jobs/wrappers/"
# File extensions
file_extensions = {
"fna": ["fna"],
"genome_index": ["fa", "fna"],
"fastq": ["fastq", "fastq.gz"],
"sam": ["sam"],
"bam": ["bam"],
"bed": ["bed", "narrowPeak", "broadPeak"]
"bed": ["bed", "narrowPeak", "broadPeak"],
"bwa_genome": ["amb", "ann", "bwt", "pac", "sac"],
"bowtie2_genome": [".bt2"]
}
# param keys
......
bwa_align_paired:
inputs:
-
- fna
- fa
- genome_index
- fastq
- fastq
additional_inputs:
- .1.bt2
- .2.bt2
- .3.bt2
- .4.bt2
- .rev.1.bt2
- .rev.2.bt2
- bowtie2_genome
- bowtie2_genome
- bowtie2_genome
- bowtie2_genome
- bowtie2_genome
- bowtie2_genome
outputs:
- sam
command: bowtie2
......
bwa_align_single:
inputs:
-
- fna
- fa
- genome_index
- fastq
additional_inputs:
- .1.bt2
- .2.bt2
- .3.bt2
- .4.bt2
- .rev.1.bt2
- .rev.2.bt2
- bowtie2_genome
- bowtie2_genome
- bowtie2_genome
- bowtie2_genome
- bowtie2_genome
- bowtie2_genome
outputs:
- sam
command: bowtie2
......
bwa_align_paired:
inputs:
-
- fna
- fa
- genome_index
- fastq
- fastq
additional_inputs:
- .amb
- .ann
- .bwt
- .pac
- .sa
- bwa_genome
- bwa_genome
- bwa_genome
- bwa_genome
- bwa_genome
outputs:
- sam
command: bwa
......
bwa_align_single:
inputs:
-
- fna
- fa
- genome_index
- fastq
additional_inputs:
- .amb
- .ann
- .bwt
- .pac
- .sa
- bwa_genome
- bwa_genome
- bwa_genome
- bwa_genome
- bwa_genome
outputs:
- sai
command: bwa
......
bwa_sai_to_sam:
inputs:
- fa
- genome_index
- sai
- fastq
outputs:
......
......@@ -76,22 +76,40 @@ class YamlJob(object):
self.err += "Argument '%s' specified in params file does not exist.\n" % (arg,)
return
def check_file_names(self, file_names):
def check_files(self, input_files, additional_inputs, output_files):
valid_files = True
if (len(file_names) == len(self.base[self.jobname]["inputs"])):
for i,f in enumerate(file_names):
if not f.split(".")[1] in chipathlon.conf.file_extensions[self.base[self.jobname]["inputs"][i]]:
self.err += "File '%s' is not of type '%s'. Should match one of '%s' extensions.\n" % (f, self.base[self.jobname]["inputs"][i], chipathlon.conf.file_extensions[self.base[self.jobname]["inputs"][i]])
valid_files = False
else:
self.err += "Number of file_names '%s' must match number of expected input files.\n" % (file_names,)
valid_files = False
for ftype, flist in zip(["inputs", "additional_inputs", "outputs"], [input_files, additional_inputs, output_files]):
if (len(flist) == len(self.base[self.jobname][ftype])):
for i,f in enumerate(flist):
if not f["name"].split(".")[1] in chipathlon.conf.file_extensions[self.base[self.jobname][ftype][i]]:
self.err += "File '%s' is not of type '%s'. Should match one of '%s' extensions.\n" % (f, self.base[self.jobname][ftype][i], chipathlon.conf.file_extensions[self.base[self.jobname]["inputs"][i]])
valid_files = False
else:
self.err += "Number of file_names '%s' must match number of expected %s files.\n" % (file_names, ftype[:-1])
valid_files = False
return valid_files
def create_job(self, executable, input_files, additional_inputs, output_files):
"""
- executable should be the pegasus class
- input_files should be a list of dictionaries:
[{"name": FILE_NAME, "file": FILE_OBJECT},...]
- additional_files should be a list of dictionaries:
[{"name": FILE_NAME, "file": FILE_OBJECT},...]
- output_files should be a list of dictionaries:
[{"name": FILE_NAME, "file": FILE_OBJECT},...]
"""
if self.check_files(input_files, additional_inputs, output_files):
self.job = Job(executable)
else:
def create_arg_list(self, file_names, lfns):
"""
Create the necessary argument list for pegasus.
To add arguments to the pegasus job use:
job.addArguments(*arg_list)
"""
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment