Commit 4b894ba5 authored by Natasha Pavlovikj's avatar Natasha Pavlovikj
Browse files

Add hiddendomains, pepr, zerone to the pipeline

parent 12d2ea5a
......@@ -36,7 +36,10 @@ peak_tools = [
"macs2",
"gem",
"peakranger",
"ccat"
"ccat",
"zerone",
"hiddendomains",
"pepr"
]
# Peak_type validation
......@@ -45,7 +48,10 @@ peak_types = {
"macs2": ["narrow", "broad"],
"gem": ["narrow"],
"peakranger": ["narrow"],
"ccat": ["broad"]
"ccat": ["broad"],
"zerone": ["broad"],
"hiddendomains": ["broad"],
"pepr": ["narrow", "broad"]
}
# File extensions
......
......@@ -30,7 +30,10 @@ class IdrGenerator(ModuleGenerator):
"ccat": ["region_sorted.bed", "peak_sorted.bed"],
"gem": ["results_GEM_sorted.bed", "results_GPS_sorted.bed"],
"spp": ["results_sorted.bed"],
"macs2": ["results_sorted.bed"]
"zerone": ["results_sorted_final.bed"],
"hiddendomains": ["results_final.bed"],
"macs2": ["results_sorted.bed"],
"pepr": ["pepr_result.bed"]
}
if debug:
print "[LOADING GENERATOR] IdrGenerator"
......
......@@ -29,7 +29,10 @@ class PeakCallGenerator(ModuleGenerator):
"spp": self._spp,
"macs2": self._macs2,
"ccat": self._ccat,
"peakranger": self._peakranger
"peakranger": self._peakranger,
"zerone": self._zerone,
"hiddendomains": self._hiddendomains,
"pepr": self._hiddendomains
}
self.call_pairs = {}
if debug:
......@@ -125,6 +128,53 @@ class PeakCallGenerator(ModuleGenerator):
additional_inputs = {}
return (self.get_markers(run), inputs, additional_inputs)
def _zerone(self, run, result):
"""
:param run: The run to generate jobs for
:type run: :py:class:chipathlon.run.Run
:param result: The result to generate jobs for.
:type result: :py:class:chipathlon.result.Result
"""
call_pair = self.call_pairs[result.full_name]
inputs = {
"control.bam": call_pair[0].full_name,
"exp.bam": call_pair[1].full_name
}
additional_inputs = {}
return (self.get_markers(run), inputs, additional_inputs)
def _hiddendomains(self, run, result):
"""
:param run: The run to generate jobs for
:type run: :py:class:chipathlon.run.Run
:param result: The result to generate jobs for.
:type result: :py:class:chipathlon.result.Result
"""
call_pair = self.call_pairs[result.full_name]
inputs = {
"chrom.sizes": run.genome.get_chrom_sizes()["name"],
"control.bed": call_pair[0].full_name,
"exp.bed": call_pair[1].full_name,
"prefix": result.prefix
}
additional_inputs = {}
return (self.get_markers(run), inputs, additional_inputs)
def _pepr(self, run, result):
"""
:param run: The run to generate jobs for
:type run: :py:class:chipathlon.run.Run
:param result: The result to generate jobs for.
:type result: :py:class:chipathlon.result.Result
"""
call_pair = self.call_pairs[result.full_name]
inputs = {
"control.bed": call_pair[0].full_name,
"signal.bed": call_pair[1].full_name
}
additional_inputs = {}
return (self.get_markers(run), inputs, additional_inputs)
def _make_call_pairs(self, run, result_list):
"""
:param run: The run currently being processed.
......@@ -164,12 +214,23 @@ class PeakCallGenerator(ModuleGenerator):
:param run: The target run to generate jobs for.
:type run: :py:class:chipathlon.run.Run
"""
remove_duplicates_results = run.get_results("remove_duplicates", "no_dups_chr.bed")
if run.peak == "zerone":
print "zerone"
if run.file_type == "fastq":
results = run.get_results("align", "align.bam")
elif run.file_type == "bam":
results = run.get_results("download", "encode.bam")
# results = run.get_results("align", "align.bam")
print results
else:
results = run.get_results("remove_duplicates", "no_dups_chr.bed")
module_markers = {"peak_call": self.get_markers(run)}
all_result_names = []
final_results = self.module.get_all_final_results(self.get_markers(run))
for paired_result in self._make_call_pairs(run, remove_duplicates_results):
for paired_result in self._make_call_pairs(run, results):
for i, final_result in enumerate(final_results):
final_result_name = final_result["file_name"]
if final_result_name not in all_result_names:
......@@ -198,11 +259,23 @@ class PeakCallGenerator(ModuleGenerator):
:param result: The target result to create jobs for.
:type result: :py:class:chipathlon.result.Result
"""
remove_duplicate_results = run.get_results("remove_duplicates", "no_dups_chr.bed")
if run.peak == "zerone":
print "zerone"
if run.file_type == "fastq":
results = run.get_results("align", "align.bam")
elif run.file_type == "bam":
results = run.get_results("download", "encode.bam")
# results = run.get_results("align", "align.bam")
print results
else:
results = run.get_results("remove_duplicates", "no_dups_chr.bed")
prev_results = []
control_accessions = result.get_accessions("control")
signal_accessions = result.get_accessions("signal")
for prev_result in remove_duplicate_results:
for prev_result in results:
if (set(prev_result.get_accessions("control")).issubset(control_accessions) and
set(prev_result.get_accessions("signal")).issubset(signal_accessions)):
prev_results.append(prev_result)
......
......@@ -245,3 +245,82 @@ peak_call:
- results_sorted.bed:
type: file
final_result: true
- zerone[tool]:
- broad[peak_type]:
- zerone_callpeak:
inputs:
- control.bam:
type: file
- exp.bam:
type: file
additional_inputs: null
outputs:
- zerone_output.bed:
type: stdout
- sort_awk_sort_peaks:
inputs:
- zerone_output.bed:
type: file
additional_inputs: null
outputs:
- results_sorted.bed:
type: file
- zerone_add_columns:
inputs:
- results_sorted.bed:
type: file
additional_inputs: null
outputs:
- results_sorted_final.bed:
type: file
final_result: true
- hiddendomains[tool]:
- broad[peak_type]:
- hiddendomains_callpeak:
inputs:
- control.bed:
type: file
- exp.bed:
type: file
- chrom.sizes:
type: file
- prefix:
type: string
additional_inputs: null
outputs:
- analysis.bed:
type: file
- vis.bed:
type: file
- domains.txt:
type: file
- control_bins.txt:
type: file
- treatment_bins.txt:
type: file
- hiddendomains_add_columns:
inputs:
- analysis.bed:
type: file
additional_inputs: null
outputs:
- results_final.bed:
type: file
final_result: true
- pepr[tool]:
- broad[peak_type]:
- pepr_callpeak:
inputs:
- control.bed:
type: file
- signal.bed:
type: file
additional_inputs: null
outputs:
- pepr_result.bed:
type: file
final_result: true
- pepr_parameters.txt:
type: file
- pepr_log.txt:
type: file
hiddendomains_add_columns:
inputs:
- name: peak_result
type: file
file_type: bed
additional_inputs: null
outputs:
- name: full_result
type: file
file_type: bed
command: hiddendomains_add_columns.sh
arguments:
- "$inputs.0":
type: file
changeable: false
required: true
has_value: false
- "$outputs.0":
type: file
changeable: false
required: true
has_value: false
walltime: 2000
memory: 2000
cores: 1
nodes: 1
hiddendomains_callpeak:
inputs:
- name: control_bed
type: file
file_type: bed
- name: signal_bed
type: file
file_type: bed
- name: chrom_sizes
type: file
file_type: chrom_sizes
- name: prefix
type: string
additional_inputs: null
outputs:
- name: hiddendomains_result
type: file
file_type: bed
- name: enriched_bins_vis
type: file
file_type: bed
- name: domains
type: file
file_type: txt
- name: control_bins
type: file
file_type: txt
- name: treatment_bins
type: file
file_type: txt
command: hiddendomains
arguments:
- "-B":
type: string
changeable: false
required: true
has_value: false
- "-c":
type: file
changeable: false
required: true
has_value: true
default: "$inputs.0"
- "-t":
type: file
changeable: false
required: true
has_value: true
default: "$inputs.1"
- "-g":
type: file
changeable: false
required: true
has_value: true
default: "$inputs.2"
- "-o":
type: file
changeable: false
required: true
has_value: true
default: "$inputs.3"
- "-b":
type: numeric
changeable: true
required: true
has_value: true
default: 1000
- "-p":
type: numeric
changeable: true
required: true
has_value: true
default: 0
- "-q":
type: numeric
changeable: true
required: true
has_value: true
default: 30
walltime: 240
memory: 16000
cores: 1
nodes: 1
pepr_callpeak:
inputs:
- name: control
type: file
file_type: bed
- name: signal
type: file
file_type: bed
additional_inputs: null
outputs:
- name: pepr_result
type: file
file_type: bed
- name: pepr_parameters
type: file
file_type: txt
- name: pepr_log
type: file
file_type: txt
command: pepr
arguments:
- "-c":
type: file
changeable: false
required: true
has_value: true
default: "$inputs.0,$inputs.0"
- "-i":
type: file
changeable: false
required: true
has_value: true
default: "$inputs.1,$inputs.1"
- "-n":
type: string
changeable: true
required: false
has_value: true
default: "NA"
- "-f":
type: string
changeable: true
required: true
has_value: true
default: "bed"
- "-s":
type: numeric
changeable: true
required: false
has_value: true
default: 10
- "-w":
type: numeric
changeable: true
required: false
has_value: true
default: 100
- "--diff":
type: boolean
changeable: true
required: false
has_value: false
- "--threshold":
type: string
changeable: true
required: false
has_value: true
default: "1e-5"
- "--peaktype":
type: string
changeable: true
required: true
has_value: true
default: "broad"
- "--normalization":
type: string
changeable: true
required: false
has_value: true
default: "intra-group"
- "--keep-max-dup":
type: numeric
changeable: true
required: false
has_value: true
default: 10000000
- "--num-processors":
type: numeric
changeable: true
required: false
has_value: true
default: 1
- "--input-directory":
type: string
changeable: true
required: false
has_value: true
default: ""
- "--output-directory":
type: string
changeable: true
required: false
has_value: true
default: ""
- "--verbose":
type: string
changeable: false
required: true
has_value: false
walltime: 120
memory: 16000
cores: 1
nodes: 1
......@@ -8,8 +8,7 @@ picard_mark_duplicates:
- name: dups_marked
type: file
file_type: bam
- name: quaality
- name: quality
type: file
file_type: qc
command: picard
......
zerone_add_columns:
inputs:
- name: peak_result
type: file
file_type: bed
additional_inputs: null
outputs:
- name: full_result
type: file
file_type: bed
command: zerone_add_columns.sh
arguments:
- "$inputs.0":
type: file
changeable: false
required: true
has_value: false
- "$outputs.0":
type: file
changeable: false
required: true
has_value: false
walltime: 2000
memory: 2000
cores: 1
nodes: 1
zerone_callpeak:
inputs:
- name: control_bam
type: file
file_type: bam
- name: signal_bam
type: file
file_type: bam
additional_inputs: null
outputs:
- name: zerone_result
type: stdout
file_type: bed
command: zerone
arguments:
- "--mock":
type: file
changeable: false
required: true
has_value: true
default: "$inputs.0"
- "--chip":
type: file
changeable: false
required: true
has_value: true
default: "$inputs.1"
- "--window":
type: numeric
changeable: true
required: true
has_value: true
default: 300
- "--quality":
type: numeric
changeable: true
required: true
has_value: true
default: 20
walltime: 120
memory: 16000
cores: 1
nodes: 1
#!/bin/bash
awk '{print $0,0,0,0,0,0}' "$1" > "$2"
#!/bin/bash
awk '{print $0,0,0}' "$1" > "$2"
#!/bin/bash
. /util/opt/lmod/lmod/init/profile
module load hiddendomains/2.3
hiddenDomains "$@"
#!/bin/bash
. /util/opt/lmod/lmod/init/profile
module load pepr/1.1
PePr "$@"
......@@ -3,45 +3,17 @@
#set -o pipefail
. /util/opt/lmod/lmod/init/profile
module load picard/1.1
module load picard/2.9
java=java
# Use Java installed with Anaconda to ensure correct version
java="$ENV_PREFIX/bin/java"
if [ -z "${JAVA_HOME:=}" ]; then
# if JAVA_HOME is set (non-empty), use it. Otherwise keep "java"
if [ ! -z "${JAVA_HOME:=}" ]; then
if [ -e "$JAVA_HOME/bin/java" ]; then
java="$JAVA_HOME/bin/java"
fi
fi
# extract memory and system property Java arguments from the list of provided arguments
# http://java.dzone.com/articles/better-java-shell-script
default_jvm_mem_opts="-Xms512m -Xmx1g"
jvm_mem_opts=""
jvm_prop_opts=""
jar_file=""
pass_args=""
for arg in "$@"; do
case $arg in
'-D'*)
jvm_prop_opts="$jvm_prop_opts $arg"
;;
'-XX'*)
jvm_prop_opts="$jvm_prop_opts $arg"
;;
'-Xm'*)