Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Holland Computing Center
chipathlon
Commits
4b894ba5
Commit
4b894ba5
authored
Jun 05, 2017
by
Natasha Pavlovikj
Browse files
Add hiddendomains, pepr, zerone to the pipeline
parent
12d2ea5a
Changes
18
Hide whitespace changes
Inline
Side-by-side
chipathlon/conf.py
View file @
4b894ba5
...
@@ -36,7 +36,10 @@ peak_tools = [
...
@@ -36,7 +36,10 @@ peak_tools = [
"macs2"
,
"macs2"
,
"gem"
,
"gem"
,
"peakranger"
,
"peakranger"
,
"ccat"
"ccat"
,
"zerone"
,
"hiddendomains"
,
"pepr"
]
]
# Peak_type validation
# Peak_type validation
...
@@ -45,7 +48,10 @@ peak_types = {
...
@@ -45,7 +48,10 @@ peak_types = {
"macs2"
:
[
"narrow"
,
"broad"
],
"macs2"
:
[
"narrow"
,
"broad"
],
"gem"
:
[
"narrow"
],
"gem"
:
[
"narrow"
],
"peakranger"
:
[
"narrow"
],
"peakranger"
:
[
"narrow"
],
"ccat"
:
[
"broad"
]
"ccat"
:
[
"broad"
],
"zerone"
:
[
"broad"
],
"hiddendomains"
:
[
"broad"
],
"pepr"
:
[
"narrow"
,
"broad"
]
}
}
# File extensions
# File extensions
...
...
chipathlon/generators/idr_generator.py
View file @
4b894ba5
...
@@ -30,7 +30,10 @@ class IdrGenerator(ModuleGenerator):
...
@@ -30,7 +30,10 @@ class IdrGenerator(ModuleGenerator):
"ccat"
:
[
"region_sorted.bed"
,
"peak_sorted.bed"
],
"ccat"
:
[
"region_sorted.bed"
,
"peak_sorted.bed"
],
"gem"
:
[
"results_GEM_sorted.bed"
,
"results_GPS_sorted.bed"
],
"gem"
:
[
"results_GEM_sorted.bed"
,
"results_GPS_sorted.bed"
],
"spp"
:
[
"results_sorted.bed"
],
"spp"
:
[
"results_sorted.bed"
],
"macs2"
:
[
"results_sorted.bed"
]
"zerone"
:
[
"results_sorted_final.bed"
],
"hiddendomains"
:
[
"results_final.bed"
],
"macs2"
:
[
"results_sorted.bed"
],
"pepr"
:
[
"pepr_result.bed"
]
}
}
if
debug
:
if
debug
:
print
"[LOADING GENERATOR] IdrGenerator"
print
"[LOADING GENERATOR] IdrGenerator"
...
...
chipathlon/generators/peak_call_generator.py
View file @
4b894ba5
...
@@ -29,7 +29,10 @@ class PeakCallGenerator(ModuleGenerator):
...
@@ -29,7 +29,10 @@ class PeakCallGenerator(ModuleGenerator):
"spp"
:
self
.
_spp
,
"spp"
:
self
.
_spp
,
"macs2"
:
self
.
_macs2
,
"macs2"
:
self
.
_macs2
,
"ccat"
:
self
.
_ccat
,
"ccat"
:
self
.
_ccat
,
"peakranger"
:
self
.
_peakranger
"peakranger"
:
self
.
_peakranger
,
"zerone"
:
self
.
_zerone
,
"hiddendomains"
:
self
.
_hiddendomains
,
"pepr"
:
self
.
_hiddendomains
}
}
self
.
call_pairs
=
{}
self
.
call_pairs
=
{}
if
debug
:
if
debug
:
...
@@ -125,6 +128,53 @@ class PeakCallGenerator(ModuleGenerator):
...
@@ -125,6 +128,53 @@ class PeakCallGenerator(ModuleGenerator):
additional_inputs
=
{}
additional_inputs
=
{}
return
(
self
.
get_markers
(
run
),
inputs
,
additional_inputs
)
return
(
self
.
get_markers
(
run
),
inputs
,
additional_inputs
)
def
_zerone
(
self
,
run
,
result
):
"""
:param run: The run to generate jobs for
:type run: :py:class:chipathlon.run.Run
:param result: The result to generate jobs for.
:type result: :py:class:chipathlon.result.Result
"""
call_pair
=
self
.
call_pairs
[
result
.
full_name
]
inputs
=
{
"control.bam"
:
call_pair
[
0
].
full_name
,
"exp.bam"
:
call_pair
[
1
].
full_name
}
additional_inputs
=
{}
return
(
self
.
get_markers
(
run
),
inputs
,
additional_inputs
)
def
_hiddendomains
(
self
,
run
,
result
):
"""
:param run: The run to generate jobs for
:type run: :py:class:chipathlon.run.Run
:param result: The result to generate jobs for.
:type result: :py:class:chipathlon.result.Result
"""
call_pair
=
self
.
call_pairs
[
result
.
full_name
]
inputs
=
{
"chrom.sizes"
:
run
.
genome
.
get_chrom_sizes
()[
"name"
],
"control.bed"
:
call_pair
[
0
].
full_name
,
"exp.bed"
:
call_pair
[
1
].
full_name
,
"prefix"
:
result
.
prefix
}
additional_inputs
=
{}
return
(
self
.
get_markers
(
run
),
inputs
,
additional_inputs
)
def
_pepr
(
self
,
run
,
result
):
"""
:param run: The run to generate jobs for
:type run: :py:class:chipathlon.run.Run
:param result: The result to generate jobs for.
:type result: :py:class:chipathlon.result.Result
"""
call_pair
=
self
.
call_pairs
[
result
.
full_name
]
inputs
=
{
"control.bed"
:
call_pair
[
0
].
full_name
,
"signal.bed"
:
call_pair
[
1
].
full_name
}
additional_inputs
=
{}
return
(
self
.
get_markers
(
run
),
inputs
,
additional_inputs
)
def
_make_call_pairs
(
self
,
run
,
result_list
):
def
_make_call_pairs
(
self
,
run
,
result_list
):
"""
"""
:param run: The run currently being processed.
:param run: The run currently being processed.
...
@@ -164,12 +214,23 @@ class PeakCallGenerator(ModuleGenerator):
...
@@ -164,12 +214,23 @@ class PeakCallGenerator(ModuleGenerator):
:param run: The target run to generate jobs for.
:param run: The target run to generate jobs for.
:type run: :py:class:chipathlon.run.Run
:type run: :py:class:chipathlon.run.Run
"""
"""
remove_duplicates_results
=
run
.
get_results
(
"remove_duplicates"
,
"no_dups_chr.bed"
)
if
run
.
peak
==
"zerone"
:
print
"zerone"
if
run
.
file_type
==
"fastq"
:
results
=
run
.
get_results
(
"align"
,
"align.bam"
)
elif
run
.
file_type
==
"bam"
:
results
=
run
.
get_results
(
"download"
,
"encode.bam"
)
# results = run.get_results("align", "align.bam")
print
results
else
:
results
=
run
.
get_results
(
"remove_duplicates"
,
"no_dups_chr.bed"
)
module_markers
=
{
"peak_call"
:
self
.
get_markers
(
run
)}
module_markers
=
{
"peak_call"
:
self
.
get_markers
(
run
)}
all_result_names
=
[]
all_result_names
=
[]
final_results
=
self
.
module
.
get_all_final_results
(
self
.
get_markers
(
run
))
final_results
=
self
.
module
.
get_all_final_results
(
self
.
get_markers
(
run
))
for
paired_result
in
self
.
_make_call_pairs
(
run
,
remove_duplicates_
results
):
for
paired_result
in
self
.
_make_call_pairs
(
run
,
results
):
for
i
,
final_result
in
enumerate
(
final_results
):
for
i
,
final_result
in
enumerate
(
final_results
):
final_result_name
=
final_result
[
"file_name"
]
final_result_name
=
final_result
[
"file_name"
]
if
final_result_name
not
in
all_result_names
:
if
final_result_name
not
in
all_result_names
:
...
@@ -198,11 +259,23 @@ class PeakCallGenerator(ModuleGenerator):
...
@@ -198,11 +259,23 @@ class PeakCallGenerator(ModuleGenerator):
:param result: The target result to create jobs for.
:param result: The target result to create jobs for.
:type result: :py:class:chipathlon.result.Result
:type result: :py:class:chipathlon.result.Result
"""
"""
remove_duplicate_results
=
run
.
get_results
(
"remove_duplicates"
,
"no_dups_chr.bed"
)
if
run
.
peak
==
"zerone"
:
print
"zerone"
if
run
.
file_type
==
"fastq"
:
results
=
run
.
get_results
(
"align"
,
"align.bam"
)
elif
run
.
file_type
==
"bam"
:
results
=
run
.
get_results
(
"download"
,
"encode.bam"
)
# results = run.get_results("align", "align.bam")
print
results
else
:
results
=
run
.
get_results
(
"remove_duplicates"
,
"no_dups_chr.bed"
)
prev_results
=
[]
prev_results
=
[]
control_accessions
=
result
.
get_accessions
(
"control"
)
control_accessions
=
result
.
get_accessions
(
"control"
)
signal_accessions
=
result
.
get_accessions
(
"signal"
)
signal_accessions
=
result
.
get_accessions
(
"signal"
)
for
prev_result
in
remove_duplicate_
results
:
for
prev_result
in
results
:
if
(
set
(
prev_result
.
get_accessions
(
"control"
)).
issubset
(
control_accessions
)
and
if
(
set
(
prev_result
.
get_accessions
(
"control"
)).
issubset
(
control_accessions
)
and
set
(
prev_result
.
get_accessions
(
"signal"
)).
issubset
(
signal_accessions
)):
set
(
prev_result
.
get_accessions
(
"signal"
)).
issubset
(
signal_accessions
)):
prev_results
.
append
(
prev_result
)
prev_results
.
append
(
prev_result
)
...
...
chipathlon/jobs/modules/peak_call.yaml
View file @
4b894ba5
...
@@ -245,3 +245,82 @@ peak_call:
...
@@ -245,3 +245,82 @@ peak_call:
-
results_sorted.bed
:
-
results_sorted.bed
:
type
:
file
type
:
file
final_result
:
true
final_result
:
true
-
zerone[tool]
:
-
broad[peak_type]
:
-
zerone_callpeak
:
inputs
:
-
control.bam
:
type
:
file
-
exp.bam
:
type
:
file
additional_inputs
:
null
outputs
:
-
zerone_output.bed
:
type
:
stdout
-
sort_awk_sort_peaks
:
inputs
:
-
zerone_output.bed
:
type
:
file
additional_inputs
:
null
outputs
:
-
results_sorted.bed
:
type
:
file
-
zerone_add_columns
:
inputs
:
-
results_sorted.bed
:
type
:
file
additional_inputs
:
null
outputs
:
-
results_sorted_final.bed
:
type
:
file
final_result
:
true
-
hiddendomains[tool]
:
-
broad[peak_type]
:
-
hiddendomains_callpeak
:
inputs
:
-
control.bed
:
type
:
file
-
exp.bed
:
type
:
file
-
chrom.sizes
:
type
:
file
-
prefix
:
type
:
string
additional_inputs
:
null
outputs
:
-
analysis.bed
:
type
:
file
-
vis.bed
:
type
:
file
-
domains.txt
:
type
:
file
-
control_bins.txt
:
type
:
file
-
treatment_bins.txt
:
type
:
file
-
hiddendomains_add_columns
:
inputs
:
-
analysis.bed
:
type
:
file
additional_inputs
:
null
outputs
:
-
results_final.bed
:
type
:
file
final_result
:
true
-
pepr[tool]
:
-
broad[peak_type]
:
-
pepr_callpeak
:
inputs
:
-
control.bed
:
type
:
file
-
signal.bed
:
type
:
file
additional_inputs
:
null
outputs
:
-
pepr_result.bed
:
type
:
file
final_result
:
true
-
pepr_parameters.txt
:
type
:
file
-
pepr_log.txt
:
type
:
file
chipathlon/jobs/params/hiddendomains_add_columns.yaml
0 → 100644
View file @
4b894ba5
hiddendomains_add_columns
:
inputs
:
-
name
:
peak_result
type
:
file
file_type
:
bed
additional_inputs
:
null
outputs
:
-
name
:
full_result
type
:
file
file_type
:
bed
command
:
hiddendomains_add_columns.sh
arguments
:
-
"
$inputs.0"
:
type
:
file
changeable
:
false
required
:
true
has_value
:
false
-
"
$outputs.0"
:
type
:
file
changeable
:
false
required
:
true
has_value
:
false
walltime
:
2000
memory
:
2000
cores
:
1
nodes
:
1
chipathlon/jobs/params/hiddendomains_callpeak.yaml
0 → 100644
View file @
4b894ba5
hiddendomains_callpeak
:
inputs
:
-
name
:
control_bed
type
:
file
file_type
:
bed
-
name
:
signal_bed
type
:
file
file_type
:
bed
-
name
:
chrom_sizes
type
:
file
file_type
:
chrom_sizes
-
name
:
prefix
type
:
string
additional_inputs
:
null
outputs
:
-
name
:
hiddendomains_result
type
:
file
file_type
:
bed
-
name
:
enriched_bins_vis
type
:
file
file_type
:
bed
-
name
:
domains
type
:
file
file_type
:
txt
-
name
:
control_bins
type
:
file
file_type
:
txt
-
name
:
treatment_bins
type
:
file
file_type
:
txt
command
:
hiddendomains
arguments
:
-
"
-B"
:
type
:
string
changeable
:
false
required
:
true
has_value
:
false
-
"
-c"
:
type
:
file
changeable
:
false
required
:
true
has_value
:
true
default
:
"
$inputs.0"
-
"
-t"
:
type
:
file
changeable
:
false
required
:
true
has_value
:
true
default
:
"
$inputs.1"
-
"
-g"
:
type
:
file
changeable
:
false
required
:
true
has_value
:
true
default
:
"
$inputs.2"
-
"
-o"
:
type
:
file
changeable
:
false
required
:
true
has_value
:
true
default
:
"
$inputs.3"
-
"
-b"
:
type
:
numeric
changeable
:
true
required
:
true
has_value
:
true
default
:
1000
-
"
-p"
:
type
:
numeric
changeable
:
true
required
:
true
has_value
:
true
default
:
0
-
"
-q"
:
type
:
numeric
changeable
:
true
required
:
true
has_value
:
true
default
:
30
walltime
:
240
memory
:
16000
cores
:
1
nodes
:
1
chipathlon/jobs/params/pepr_callpeak.yaml
0 → 100644
View file @
4b894ba5
pepr_callpeak
:
inputs
:
-
name
:
control
type
:
file
file_type
:
bed
-
name
:
signal
type
:
file
file_type
:
bed
additional_inputs
:
null
outputs
:
-
name
:
pepr_result
type
:
file
file_type
:
bed
-
name
:
pepr_parameters
type
:
file
file_type
:
txt
-
name
:
pepr_log
type
:
file
file_type
:
txt
command
:
pepr
arguments
:
-
"
-c"
:
type
:
file
changeable
:
false
required
:
true
has_value
:
true
default
:
"
$inputs.0,$inputs.0"
-
"
-i"
:
type
:
file
changeable
:
false
required
:
true
has_value
:
true
default
:
"
$inputs.1,$inputs.1"
-
"
-n"
:
type
:
string
changeable
:
true
required
:
false
has_value
:
true
default
:
"
NA"
-
"
-f"
:
type
:
string
changeable
:
true
required
:
true
has_value
:
true
default
:
"
bed"
-
"
-s"
:
type
:
numeric
changeable
:
true
required
:
false
has_value
:
true
default
:
10
-
"
-w"
:
type
:
numeric
changeable
:
true
required
:
false
has_value
:
true
default
:
100
-
"
--diff"
:
type
:
boolean
changeable
:
true
required
:
false
has_value
:
false
-
"
--threshold"
:
type
:
string
changeable
:
true
required
:
false
has_value
:
true
default
:
"
1e-5"
-
"
--peaktype"
:
type
:
string
changeable
:
true
required
:
true
has_value
:
true
default
:
"
broad"
-
"
--normalization"
:
type
:
string
changeable
:
true
required
:
false
has_value
:
true
default
:
"
intra-group"
-
"
--keep-max-dup"
:
type
:
numeric
changeable
:
true
required
:
false
has_value
:
true
default
:
10000000
-
"
--num-processors"
:
type
:
numeric
changeable
:
true
required
:
false
has_value
:
true
default
:
1
-
"
--input-directory"
:
type
:
string
changeable
:
true
required
:
false
has_value
:
true
default
:
"
"
-
"
--output-directory"
:
type
:
string
changeable
:
true
required
:
false
has_value
:
true
default
:
"
"
-
"
--verbose"
:
type
:
string
changeable
:
false
required
:
true
has_value
:
false
walltime
:
120
memory
:
16000
cores
:
1
nodes
:
1
chipathlon/jobs/params/picard_mark_duplicates.yaml
View file @
4b894ba5
...
@@ -8,8 +8,7 @@ picard_mark_duplicates:
...
@@ -8,8 +8,7 @@ picard_mark_duplicates:
-
name
:
dups_marked
-
name
:
dups_marked
type
:
file
type
:
file
file_type
:
bam
file_type
:
bam
-
name
:
quality
-
name
:
quaality
type
:
file
type
:
file
file_type
:
qc
file_type
:
qc
command
:
picard
command
:
picard
...
...
chipathlon/jobs/params/zerone_add_columns.yaml
0 → 100644
View file @
4b894ba5
zerone_add_columns
:
inputs
:
-
name
:
peak_result
type
:
file
file_type
:
bed
additional_inputs
:
null
outputs
:
-
name
:
full_result
type
:
file
file_type
:
bed
command
:
zerone_add_columns.sh
arguments
:
-
"
$inputs.0"
:
type
:
file
changeable
:
false
required
:
true
has_value
:
false
-
"
$outputs.0"
:
type
:
file
changeable
:
false
required
:
true
has_value
:
false
walltime
:
2000
memory
:
2000
cores
:
1
nodes
:
1
chipathlon/jobs/params/zerone_callpeak.yaml
0 → 100644
View file @
4b894ba5
zerone_callpeak
:
inputs
:
-
name
:
control_bam
type
:
file
file_type
:
bam
-
name
:
signal_bam
type
:
file
file_type
:
bam
additional_inputs
:
null
outputs
:
-
name
:
zerone_result
type
:
stdout
file_type
:
bed
command
:
zerone
arguments
:
-
"
--mock"
:
type
:
file
changeable
:
false
required
:
true
has_value
:
true
default
:
"
$inputs.0"
-
"
--chip"
:
type
:
file
changeable
:
false
required
:
true
has_value
:
true
default
:
"
$inputs.1"
-
"
--window"
:
type
:
numeric
changeable
:
true
required
:
true
has_value
:
true
default
:
300
-
"
--quality"
:
type
:
numeric
changeable
:
true
required
:
true
has_value
:
true
default
:
20
walltime
:
120
memory
:
16000
cores
:
1
nodes
:
1
chipathlon/jobs/scripts/hiddendomains_add_columns.sh
0 → 100755
View file @
4b894ba5
#!/bin/bash
awk
'{print $0,0,0,0,0,0}'
"
$1
"
>
"
$2
"
chipathlon/jobs/scripts/zerone_add_columns.sh
0 → 100755
View file @
4b894ba5
#!/bin/bash
awk
'{print $0,0,0}'
"
$1
"
>
"
$2
"
chipathlon/jobs/wrappers/hiddendomains_wrapper.sh
0 → 100755
View file @
4b894ba5
#!/bin/bash
.
/util/opt/lmod/lmod/init/profile
module load hiddendomains/2.3
hiddenDomains
"
$@
"
chipathlon/jobs/wrappers/pepr_wrapper.sh
0 → 100755
View file @
4b894ba5
#!/bin/bash