conf.py 3.35 KB
Newer Older
1
2
3
4
5
6
# Module directory
job_modules = "jobs/modules/"

# Job params directory
job_params = "jobs/params/"

7
8
9
# Job wrappers directory
job_wrappers = "jobs/wrappers/"

10
11
12
# Job scripts directory
job_scripts = "jobs/scripts/"

13
14
15
16
17
18
19
20
21
22
23
24
# SYSTEM PATH, probably /bin
system_path = "/bin/"

# System commands mv, cp, sort, zcat, awk e.t.c
system_commands = [
    "mv",
    "cp",
    "sort",
    "zcat",
    "awk"
]

25
26
27
28
29
30
# Current align tools
align_tools = [
    "bwa",
    "bowtie2"
]

31
# Current peak calling tools
32
33
peak_tools = [
    "spp",
34
    "macs2",
35
    "gem",
36
37
    "jamm",
    "ccat"
38
39
]

40
41
# File extensions
file_extensions = {
42
    "any": ["fastq", "fastq.gz", "bam"],
43
    "genome_index": ["fa", "fna"],
44
    "fastq": ["fastq", "fastq.gz"],
45
    "sai": ["sai"],
46
47
    "sam": ["sam"],
    "bam": ["bam"],
48
    "bed": ["bed", "peak", "region", "narrowPeak", "broadPeak", "tagAlign", "narrowPeak.gz"],
49
    "bwa_genome": ["amb", "ann", "bwt", "pac", "sa"],
50
    "bowtie2_genome": ["1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2"],
aknecht2's avatar
aknecht2 committed
51
    "quality": ["quality"],
52
53
    "qc": ["qc"],
    "pdf": ["pdf"],
54
    "ccscore": ["ccscore"],
55
56
    "xls": ["xls"],
    "yaml": ["yaml"],
Adam Caprez's avatar
Adam Caprez committed
57
    "result": ["bed", "narrowPeak", "broadPeak", "tagAlign", "bam"],
58
    "txt": ["txt"],
59
    "chrom_sizes": ["sizes"],
60
    "read_dist" : ["txt"],
61
62
63
    "plaintext": ["txt", "md"],
    "ccat_conf": ["txt", "conf"],
    "log": ["log"]
64
}
65

Adam Caprez's avatar
Adam Caprez committed
66
67
# list of resources that can be specified per job (step) in
# the workflow and corresponding Pegasus profile info
68
69
70
71
72
73
74
75
resources = {
    "walltime": {
        "namespace": "globus",
        "key": "maxwalltime"
    },
    "memory": {
        "namespace": "condor",
        "key": "request_memory"
76
77
    },
    "cores": {
aknecht2's avatar
aknecht2 committed
78
        "namespace": "pegasus",
79
        "key": "cores"
80
81
82
83
    },
    "nodes": {
        "namespace": "pegasus",
        "key": "nodes"
84
    }
Adam Caprez's avatar
Adam Caprez committed
85
86
}

87
88
# Defines the types of input / output arguments
# argument -> Any non file argument
89
90
#   string -> Any string argument
#   numeric -> Numeric arguments.
91
# file -> Any file argument that isn't redirected
92
93
94
#   file -> Normal file arguments.
#   stdout -> Any file argument that is redirected from stdout
#   stderr -> Any file argument that is redirected from stderr
95
96
# list -> For variable argument inputs
#   list -> Currently a list of files.
97
argument_types = {
98
    "argument": ["string", "numeric"],
99
100
    "file": ["file", "rawfile", "stdout", "stderr"],
    "list": ["list"]
101
102
103
104
105
}

# Defines information about arguments
argument_keys = {
    "required": ["type", "changeable", "has_value"],
106
    "optional": ["required", "default", "file_type"]
107
108
109
110
111
112
}

# workflow_job keys
job_keys = {
    "required": ["inputs", "additional_inputs", "outputs", "command", "arguments"] + resources.keys(),
    "optional": []
113
}
114

115
116
# param keys
param_keys = {
117
118
    "required": [],
    "optional": ["arguments"] + resources.keys()
119
120
}

121
122
123
124
125
file_list_keys = {
    "required": ["name", "type"],
    "optional": ["file_type"]
}

126
127
# workflow order
workflow = ["align", "remove_duplicates", "peak_calling"]
128
129
130
131
132
133
134
135
136
137
138
139

# genome info
genomes = {
    "bwa": {
        "base_file": file_extensions["genome_index"],
        "additional_files": file_extensions["bwa_genome"]
    },
    "bowtie2": {
        "base_file": file_extensions["genome_index"],
        "additional_files": file_extensions["bowtie2_genome"]
    }
}
140
141
142
143

# number of lines from BED and peak result files to
# include per-document for bed and peak collections
result_lines_per_document = 75000