Skip to content
Snippets Groups Projects
Commit b9b424a3 authored by npavlovikj2's avatar npavlovikj2
Browse files

Merge branch '4-enhance-type-strictness-and-parsing' into 'master'

Resolve "Enhance type strictness and parsing"

Closes #4 
Most of the modifications are done in workflow_job.py
A lot of changes across all the yaml files to conform to the new standard.

See merge request !14
parents 46bd2574 cb13b3a2
No related branches found
No related tags found
1 merge request!14Resolve "Enhance type strictness and parsing"
Showing
with 333 additions and 333 deletions
......@@ -39,7 +39,6 @@ peak_tools = [
# File extensions
file_extensions = {
"any": ["fastq", "fastq.gz", "bam"],
"genome_index": ["fa", "fna"],
"fastq": ["fastq", "fastq.gz"],
"sai": ["sai"],
......@@ -62,6 +61,7 @@ file_extensions = {
"ccat_conf": ["txt", "conf"],
"log": ["log"]
}
file_extensions["any"] = [extension for ext_list in file_extensions.values() for extension in ext_list]
# list of resources that can be specified per job (step) in
# the workflow and corresponding Pegasus profile info
......@@ -103,7 +103,7 @@ argument_types = {
# Defines information about arguments
argument_keys = {
"required": ["type", "changeable", "has_value"],
"optional": ["required", "default"]
"optional": ["required", "default", "file_type"]
}
# workflow_job keys
......@@ -118,6 +118,11 @@ param_keys = {
"optional": ["arguments"] + resources.keys()
}
file_list_keys = {
"required": ["name", "type"],
"optional": ["file_type"]
}
# workflow order
workflow = ["align", "remove_duplicates", "peak_calling"]
......
......@@ -51,35 +51,6 @@ peak_call:
outputs:
- results_sorted.narrowPeak:
type: file
- jamm[tool]:
- jamm_prep:
inputs:
- exp.bed:
type: file
- control.bed:
type: file
- jamm_dir:
type: string
additional_inputs: null
outputs:
- jamm_exp.bed:
type: file
- jamm_control.bed:
type: file
- jamm_callpeak:
inputs:
- chrom.sizes:
type: file
- jamm_dir:
type: string
additional_inputs:
- jamm_exp.bed:
type: file
- jamm_control.bed:
type: file
outputs:
- results_sorted.narrowPeak:
type: file
- gem[tool]:
- gem_callpeak:
inputs:
......@@ -100,7 +71,7 @@ peak_call:
- results_sorted.narrowPeak:
type: file
- spp[tool]:
- cp_bed_tagalign_exp:
- cp_bed_tagalign:
inputs:
- exp.bed:
type: file
......@@ -108,7 +79,7 @@ peak_call:
outputs:
- exp.tagAlign:
type: file
- cp_bed_tagalign_control:
- cp_bed_tagalign:
inputs:
- control.bed:
type: file
......
bedtools_bam_to_bed:
inputs:
- bam:
- name: input_bam
type: file
file_type: bam
additional_inputs: null
outputs:
- bed:
- name: output_bed
type: stdout
file_type: bed
command: bedtools
arguments:
- "bamtobed":
......
bowtie2_align_paired:
inputs:
- genome_prefix:
- name: genome_prefix
type: string
- fastq:
- name: fastq1
type: file
- fastq:
file_type: fastq
- name: fastq2
type: file
file_type: fastq
additional_inputs:
- genome_index:
- name: base_genome_file
type: file
- bowtie2_genome:
file_type: genome_index
- name: genome.1.bt2
type: file
- bowtie2_genome:
file_type: bowtie2_genome
- name: genome.2.bt2
type: file
- bowtie2_genome:
file_type: bowtie2_genome
- name: genome.3.bt2
type: file
- bowtie2_genome:
file_type: bowtie2_genome
- name: genome.4.bt2
type: file
- bowtie2_genome:
file_type: bowtie2_genome
- name: genome.rev.1.bt2
type: file
- bowtie2_genome:
file_type: bowtie2_genome
- name: genome.rev.2.bt2
type: file
file_type: bowtie2_genome
outputs:
- sam:
- name: output_sam
type: file
- quality:
file_type: sam
- name: fastq_quality
type: stderr
file_type: quality
command: bowtie2
arguments:
- "-x":
......
bowtie2_align_single:
inputs:
- genome_prefix:
- name: genome_prefix
type: string
- fastq:
- name: fastq1
type: file
file_type: fastq
additional_inputs:
- genome_index:
- name: base_genome_file
type: file
- bowtie2_genome:
file_type: genome_index
- name: genome.1.bt2
type: file
- bowtie2_genome:
file_type: bowtie2_genome
- name: genome.2.bt2
type: file
- bowtie2_genome:
file_type: bowtie2_genome
- name: genome.3.bt2
type: file
- bowtie2_genome:
file_type: bowtie2_genome
- name: genome.4.bt2
type: file
- bowtie2_genome:
file_type: bowtie2_genome
- name: genome.rev.1.bt2
type: file
- bowtie2_genome:
file_type: bowtie2_genome
- name: genome.rev.2.bt2
type: file
file_type: bowtie2_genome
outputs:
- sam:
- name: output_sam
type: file
- quality:
file_type: sam
- name: fastq_quality
type: stderr
file_type: quality
command: bowtie2
arguments:
- "-x":
......
bwa_align_paired:
inputs:
- genome_index:
- name: base_genome_file
type: file
- fastq:
file_type: genome_index
- name: fastq1
type: file
- fastq:
file_type: fastq
- name: fastq2
type: file
file_type: fastq
additional_inputs:
- bwa_genome:
- name: genome.fna.amb
type: file
- bwa_genome:
file_type: bwa_genome
- name: genome.fna.ann
type: file
- bwa_genome:
file_type: bwa_genome
- name: genome.fna.bwt
type: file
- bwa_genome:
file_type: bwa_genome
- name: genome.fna.pac
type: file
- bwa_genome:
file_type: bwa_genome
- name: genome.fna.sa
type: file
file_type: bwa_genome
outputs:
- sam:
type: stderr
- name: output_sam
type: file
file_type: sam
command: bwa
arguments:
- "mem":
......
bwa_align_single:
inputs:
- genome_index:
- name: base_genome_file
type: file
- fastq:
file_type: genome_index
- name: fastq1
type: file
file_type: fastq
additional_inputs:
- bwa_genome:
- name: genome.fna.amb
type: file
- bwa_genome:
file_type: bwa_genome
- name: genome.fna.ann
type: file
- bwa_genome:
file_type: bwa_genome
- name: genome.fna.bwt
type: file
- bwa_genome:
file_type: bwa_genome
- name: genome.fna.pac
type: file
- bwa_genome:
file_type: bwa_genome
- name: genome.fna.sa
type: file
file_type: bwa_genome
outputs:
- sai:
- name: output_sai
type: file
file_type: sai
command: bwa
arguments:
- aln:
......
bwa_sai_to_sam:
inputs:
- genome_index:
- name: base_genome_file
type: file
- sai:
file_type: genome_index
- name: input_sai
type: file
- fastq:
file_type: sai
- name: base_fastq
type: file
file_type: fastq
additional_inputs:
- bwa_genome:
- name: genome.fna.amb
type: file
- bwa_genome:
file_type: bwa_genome
- name: genome.fna.ann
type: file
- bwa_genome:
file_type: bwa_genome
- name: genome.fna.bwt
type: file
- bwa_genome:
file_type: bwa_genome
- name: genome.fna.pac
type: file
- bwa_genome:
file_type: bwa_genome
- name: genome.fna.sa
type: file
file_type: bwa_genome
outputs:
- sam:
- name: output_sam
type: file
file_type: sam
command: bwa
arguments:
- samse:
......
cat_awk_sort_peaks:
inputs:
- bed:
- name: peak_result
type: file
file_type: bed
additional_inputs: null
outputs:
- bed:
- name: sorted_result
type: file
file_type: bed
command: cat_spp.sh
arguments:
- "$inputs.0":
......
ccat_callpeak:
inputs:
- bed:
- name: signal_bed
type: file
- bed:
file_type: bed
- name: control_bed
type: file
- chrom_sizes:
file_type: bed
- name: chrom_sizes
type: file
- ccat_conf:
file_type: chrom_sizes
- name: ccat_configuration
type: file
- prefix:
file_type: ccat_conf
- name: prefix
type: string
additional_inputs: null
outputs:
- bed:
- name: significant_peak
type: file
- bed:
file_type: bed
- name: significant_region
type: file
- bed:
file_type: bed
- name: top_100000_peak
type: file
- log:
file_type: bed
- name: ccat_log
type: stdout
file_type: log
command: ccat
arguments:
- "$inputs.0":
......@@ -39,6 +54,7 @@ ccat_callpeak:
has_value: false
- "$inputs.3":
type: rawfile
file_type: txt
changeable: true
required: true
has_value: false
......
chr_locus_convert:
inputs:
- bed:
- name: aligned_bed
type: file
file_type: bed
additional_inputs: null
outputs:
- bed:
- name: chr_bed
type: file
file_type: bed
command: chr_locus_convert.py
arguments:
- "-b":
......
cp_bed_tagalign_control:
cp_bed_tagalign:
inputs:
- bed:
- name: peak_result
type: file
file_type: bed
additional_inputs: null
outputs:
- bed:
- name: tag_align_result
type: file
file_type: bed
command: cp
arguments:
- "$inputs.0":
......
cp_bed_tagalign_exp:
inputs:
- bed:
type: file
additional_inputs: null
outputs:
- bed:
type: file
command: cp
arguments:
- "$inputs.0":
type: file
changeable: false
required: true
has_value: false
- "$outputs.0":
type: file
changeable: false
required: true
has_value: false
walltime: 2000
memory: 2000
cores: 1
nodes: 1
db_save_result:
inputs:
- username:
type: argument
- password:
type: argument
- host:
type: argument
- result:
- name: username
type: string
- name: password
type: string
- name: host
type: string
- name: result
type: file
- yaml:
file_type: any
- name: meta_data
type: file
file_type: yaml
additional_inputs: null
outputs: null
command: db_save_result.py
......
download_from_encode:
inputs:
- url:
- name: url
type: string
- md5:
- name: md5
type: string
additional_inputs: null
outputs:
- any:
- name: downloaded_file
type: file
file_type: any
command: download_from_encode.py
arguments:
- "-u":
......
download_from_gridfs:
inputs:
- host:
- name: username
type: string
- username:
- name: password
type: string
- password:
- name: host
type: string
- id:
- name: gfs_id
type: string
additional_inputs: null
outputs:
- result:
- name: downloaded_result
type: file
file_type: any
command: download_from_gridfs.py
arguments:
- "-H":
......@@ -19,19 +23,19 @@ download_from_gridfs:
changeable: false
required: true
has_value: true
default: $inputs.0
default: $inputs.2
- "-u":
type: string
changeable: false
required: true
has_value: true
default: $inputs.1
default: $inputs.0
- "-p":
type: string
changeable: false
required: true
has_value: true
default: $inputs.2
default: $inputs.1
- "-i":
type: string
changeable: false
......
gem_callpeak:
inputs:
- chrom_sizes:
- name: chrom_sizes
type: file
- bed:
file_type: chrom_sizes
- name: signal_bed
type: file
- bed:
file_type: bed
- name: control_bed
type: file
- prefix:
type: argument
file_type: bed
- name: prefix
type: string
additional_inputs:
- read_dist:
- name: read_distribution
type: file
- chr_fasta:
file_type: read_dist
- name: chr_fasta
type: list
file_type: chr_fasta
outputs:
- bed:
- name: peak_result
type: file
file_type: bed
command: gem
arguments:
- "--d":
type: rawfile
file_type: txt
changeable: true
required: true
has_value: true
......
jamm_callpeak:
inputs:
- chrom_sizes:
type: file
- dir:
type: string
additional_inputs:
- bed:
type: file
- bed:
type: file
outputs:
- bed:
type: file
command: jamm
arguments:
- "-s":
type: string
changeable: false
required: true
has_value: true
default: "$inputs.1/sample"
- "-c":
type: string
changeable: false
required: true
has_value: true
default: "$inputs.1/control"
- "-p":
type: numeric
changeable: true
required: false
has_value: true
default: 8
- "-g":
type: file
changeable: false
required: true
has_value: true
default: $inputs.0
- "-o":
type: string
changeable: false
required: true
has_value: true
default: $inputs.1
walltime: 1000
memory: 32000
cores: 8
nodes: 1
jamm_prep:
inputs:
- bed:
type: file
- bed:
type: file
- dir:
type: argument
additional_inputs: null
outputs:
- bed:
type: file
- bed:
type: file
command: prep_jamm_inputs.py
arguments:
- "--exp":
type: file
changeable: false
required: true
has_value: true
default: $inputs.0
- "--control":
type: file
changeable: false
required: true
has_value: true
default: $inputs.1
- "--dir":
type: string
changeable: false
required: true
has_value: true
default: $inputs.2
walltime: 60
memory: 4000
cores: 1
nodes: 1
macs2_callpeak:
inputs:
- bed:
- name: signal_bed
type: file
- bed:
file_type: bed
- name: control_bed
type: file
- prefix:
type: argument
file_type: bed
- name: prefix
type: string
additional_inputs: null
outputs:
- bed:
- name: result_peak
type: file
- xls:
file_type: bed
- name: excel_result
type: file
- bed:
file_type: xls
- name: result_summit
type: file
file_type: bed
command: macs2
arguments:
- callpeak:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment