diff --git a/chipathlon/conf.py b/chipathlon/conf.py index 372f47a7ff0499bfe4e2bcd95eda6bb3cbd683b1..5c2c0d0ff939721498df0ace6bcf956886b777be 100644 --- a/chipathlon/conf.py +++ b/chipathlon/conf.py @@ -9,11 +9,13 @@ job_wrappers = "jobs/wrappers/" # File extensions file_extensions = { - "fna": ["fna"], + "genome_index": ["fa", "fna"], "fastq": ["fastq", "fastq.gz"], "sam": ["sam"], "bam": ["bam"], - "bed": ["bed", "narrowPeak", "broadPeak"] + "bed": ["bed", "narrowPeak", "broadPeak"], + "bwa_genome": ["amb", "ann", "bwt", "pac", "sac"], + "bowtie2_genome": [".bt2"] } # param keys diff --git a/chipathlon/jobs/params/bowtie2_align_paired.yaml b/chipathlon/jobs/params/bowtie2_align_paired.yaml index 20ec2cfb9121ac0f15b4ee58cad159c2185c381e..876009a4797a7ad52940784d2945072b4aa30b28 100644 --- a/chipathlon/jobs/params/bowtie2_align_paired.yaml +++ b/chipathlon/jobs/params/bowtie2_align_paired.yaml @@ -1,17 +1,15 @@ bwa_align_paired: inputs: - - - - fna - - fa + - genome_index - fastq - fastq additional_inputs: - - .1.bt2 - - .2.bt2 - - .3.bt2 - - .4.bt2 - - .rev.1.bt2 - - .rev.2.bt2 + - bowtie2_genome + - bowtie2_genome + - bowtie2_genome + - bowtie2_genome + - bowtie2_genome + - bowtie2_genome outputs: - sam command: bowtie2 diff --git a/chipathlon/jobs/params/bowtie2_align_single.yaml b/chipathlon/jobs/params/bowtie2_align_single.yaml index f6a13c1aa5d31809081264a46d835b35412dcc7a..eafa3d2fd29483d788772d3561cd9f09a5956a7f 100644 --- a/chipathlon/jobs/params/bowtie2_align_single.yaml +++ b/chipathlon/jobs/params/bowtie2_align_single.yaml @@ -1,16 +1,14 @@ bwa_align_single: inputs: - - - - fna - - fa + - genome_index - fastq additional_inputs: - - .1.bt2 - - .2.bt2 - - .3.bt2 - - .4.bt2 - - .rev.1.bt2 - - .rev.2.bt2 + - bowtie2_genome + - bowtie2_genome + - bowtie2_genome + - bowtie2_genome + - bowtie2_genome + - bowtie2_genome outputs: - sam command: bowtie2 diff --git a/chipathlon/jobs/params/bwa_align_paired.yaml b/chipathlon/jobs/params/bwa_align_paired.yaml index 65d7ebb5ae1b9c824991adfa947180eb3b5ad3d8..6ff5cf27717cfa2f8b44ac6ffa87134a53704b1b 100644 --- a/chipathlon/jobs/params/bwa_align_paired.yaml +++ b/chipathlon/jobs/params/bwa_align_paired.yaml @@ -1,16 +1,14 @@ bwa_align_paired: inputs: - - - - fna - - fa + - genome_index - fastq - fastq additional_inputs: - - .amb - - .ann - - .bwt - - .pac - - .sa + - bwa_genome + - bwa_genome + - bwa_genome + - bwa_genome + - bwa_genome outputs: - sam command: bwa diff --git a/chipathlon/jobs/params/bwa_align_single.yaml b/chipathlon/jobs/params/bwa_align_single.yaml index fe898c989c9ef8c0dda40500cb4dd06fbf99810d..315f6a7a1493f5711c5147f4cc43ab8459f78676 100644 --- a/chipathlon/jobs/params/bwa_align_single.yaml +++ b/chipathlon/jobs/params/bwa_align_single.yaml @@ -1,15 +1,13 @@ bwa_align_single: inputs: - - - - fna - - fa + - genome_index - fastq additional_inputs: - - .amb - - .ann - - .bwt - - .pac - - .sa + - bwa_genome + - bwa_genome + - bwa_genome + - bwa_genome + - bwa_genome outputs: - sai command: bwa diff --git a/chipathlon/jobs/params/bwa_sai_to_sam.yaml b/chipathlon/jobs/params/bwa_sai_to_sam.yaml index cdae73669a39964bd009f0e8d22fa9588b41e4b3..be7f5b0b88acba26e964bb21ed3ff77385bc9cbb 100644 --- a/chipathlon/jobs/params/bwa_sai_to_sam.yaml +++ b/chipathlon/jobs/params/bwa_sai_to_sam.yaml @@ -1,6 +1,6 @@ bwa_sai_to_sam: inputs: - - fa + - genome_index - sai - fastq outputs: diff --git a/chipathlon/yaml_job.py b/chipathlon/yaml_job.py index 4b0c98dcaf2317d58217b4bebf437cd70975f0ef..22dbe5b62962a077dcc0641ccd1a7ef0e3e6dc03 100644 --- a/chipathlon/yaml_job.py +++ b/chipathlon/yaml_job.py @@ -76,22 +76,40 @@ class YamlJob(object): self.err += "Argument '%s' specified in params file does not exist.\n" % (arg,) return - def check_file_names(self, file_names): + def check_files(self, input_files, additional_inputs, output_files): valid_files = True - if (len(file_names) == len(self.base[self.jobname]["inputs"])): - for i,f in enumerate(file_names): - if not f.split(".")[1] in chipathlon.conf.file_extensions[self.base[self.jobname]["inputs"][i]]: - self.err += "File '%s' is not of type '%s'. Should match one of '%s' extensions.\n" % (f, self.base[self.jobname]["inputs"][i], chipathlon.conf.file_extensions[self.base[self.jobname]["inputs"][i]]) - valid_files = False - else: - self.err += "Number of file_names '%s' must match number of expected input files.\n" % (file_names,) - valid_files = False + for ftype, flist in zip(["inputs", "additional_inputs", "outputs"], [input_files, additional_inputs, output_files]): + if (len(flist) == len(self.base[self.jobname][ftype])): + for i,f in enumerate(flist): + if not f["name"].split(".")[1] in chipathlon.conf.file_extensions[self.base[self.jobname][ftype][i]]: + self.err += "File '%s' is not of type '%s'. Should match one of '%s' extensions.\n" % (f, self.base[self.jobname][ftype][i], chipathlon.conf.file_extensions[self.base[self.jobname]["inputs"][i]]) + valid_files = False + else: + self.err += "Number of file_names '%s' must match number of expected %s files.\n" % (file_names, ftype[:-1]) + valid_files = False return valid_files + def create_job(self, executable, input_files, additional_inputs, output_files): + """ + - executable should be the pegasus class + - input_files should be a list of dictionaries: + [{"name": FILE_NAME, "file": FILE_OBJECT},...] + - additional_files should be a list of dictionaries: + [{"name": FILE_NAME, "file": FILE_OBJECT},...] + - output_files should be a list of dictionaries: + [{"name": FILE_NAME, "file": FILE_OBJECT},...] + """ + if self.check_files(input_files, additional_inputs, output_files): + self.job = Job(executable) + else: + + + def create_arg_list(self, file_names, lfns): """ Create the necessary argument list for pegasus. + To add arguments to the pegasus job use: job.addArguments(*arg_list) """