Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Holland Computing Center
chipathlon
Commits
3ea96f58
Commit
3ea96f58
authored
Jun 08, 2017
by
aknecht2
Browse files
Initial master merge into optional mongo branch.
parent
220af4bf
Changes
8
Hide whitespace changes
Inline
Side-by-side
chipathlon/db.py
View file @
3ea96f58
...
...
@@ -30,7 +30,7 @@ class MongoDB(object):
operations much easier.
"""
def
__init__
(
self
,
host
,
username
,
password
,
debug
=
False
):
def
__init__
(
self
,
host
=
"localhost"
,
username
=
None
,
password
=
None
,
debug
=
False
):
self
.
debug
=
debug
self
.
host
=
host
self
.
username
=
username
...
...
@@ -38,12 +38,14 @@ class MongoDB(object):
self
.
client
=
MongoClient
(
host
)
self
.
db
=
self
.
client
.
chipseq
self
.
cache
=
collections
.
defaultdict
(
dict
)
try
:
self
.
db
.
authenticate
(
username
,
password
,
mechanism
=
"SCRAM-SHA-1"
)
except
:
print
(
"Could not authenticate to db %s!"
%
(
host
,))
print
traceback
.
format_exc
()
sys
.
exit
(
1
)
self
.
cache
=
{}
if
username
and
password
:
try
:
self
.
db
.
authenticate
(
username
,
password
,
mechanism
=
"SCRAM-SHA-1"
)
except
:
print
(
"Could not authenticate to db %s!"
%
(
host
,))
print
traceback
.
format_exc
()
sys
.
exit
(
1
)
self
.
gfs
=
gridfs
.
GridFS
(
self
.
db
)
return
...
...
chipathlon/workflow.py
View file @
3ea96f58
No preview for this file type
scripts/chip-create-run
View file @
3ea96f58
...
...
@@ -6,9 +6,9 @@ import yaml
import
pprint
parser
=
argparse
.
ArgumentParser
(
description
=
"Create a run file from a list of experiment accessions."
)
parser
.
add_argument
(
"-H"
,
"--host"
,
dest
=
"host"
,
required
=
True
,
help
=
"Database host."
)
parser
.
add_argument
(
"-u"
,
"--username"
,
dest
=
"username"
,
required
=
True
,
help
=
"Database user."
)
parser
.
add_argument
(
"-p"
,
"--password"
,
dest
=
"password"
,
required
=
True
,
help
=
"Database
user
password."
)
parser
.
add_argument
(
"-H"
,
"--host"
,
dest
=
"host"
,
default
=
"localhost"
,
help
=
"Database host.
(default: %(default)s)
"
)
parser
.
add_argument
(
"-u"
,
"--username"
,
dest
=
"username"
,
help
=
"Database user
name (if required)
."
)
parser
.
add_argument
(
"-p"
,
"--password"
,
dest
=
"password"
,
help
=
"Database password
(if required)
."
)
parser
.
add_argument
(
"-n"
,
"--name"
,
dest
=
"name"
,
required
=
True
,
help
=
"Name of the run file to create."
)
parser
.
add_argument
(
"-f"
,
"--file_type"
,
dest
=
"file_type"
,
default
=
"fastq"
,
help
=
"Type of files to extract (fastq or bam)."
)
...
...
scripts/chip-gen
View file @
3ea96f58
...
...
@@ -3,9 +3,9 @@ from chipathlon.workflow import Workflow
import
argparse
parser
=
argparse
.
ArgumentParser
(
description
=
"Generates a workflow from a run, param & config file."
)
parser
.
add_argument
(
"-H"
,
"--host"
,
dest
=
"host"
,
required
=
True
,
help
=
"Database host."
)
parser
.
add_argument
(
"-u"
,
"--username"
,
dest
=
"username"
,
required
=
True
,
help
=
"Database user."
)
parser
.
add_argument
(
"-p"
,
"--password"
,
dest
=
"password"
,
required
=
True
,
help
=
"Database
user
password."
)
parser
.
add_argument
(
"-H"
,
"--host"
,
dest
=
"host"
,
default
=
"localhost"
,
help
=
"Database host.
(default: %(default)s)
"
)
parser
.
add_argument
(
"-u"
,
"--username"
,
dest
=
"username"
,
help
=
"Database user
name (if required)
."
)
parser
.
add_argument
(
"-p"
,
"--password"
,
dest
=
"password"
,
help
=
"Database password
(if required)
."
)
parser
.
add_argument
(
"-d"
,
"--dir"
,
dest
=
"dir"
,
required
=
True
,
help
=
"Directory name to generate files in."
)
parser
.
add_argument
(
"--param"
,
dest
=
"param"
,
required
=
True
,
help
=
"Path to param file to load."
)
parser
.
add_argument
(
"--conf"
,
dest
=
"config"
,
required
=
True
,
help
=
"Path to config file to load."
)
...
...
scripts/chip-job-download-gridfs
View file @
3ea96f58
...
...
@@ -3,9 +3,9 @@ import chipathlon.utils
import
argparse
parser
=
argparse
.
ArgumentParser
(
description
=
"Download target file from GridFS."
)
parser
.
add_argument
(
"-
p
"
,
"--
password
"
,
dest
=
"
password"
,
required
=
True
,
help
=
"Database user password.
"
)
parser
.
add_argument
(
"-u"
,
"--username"
,
dest
=
"username"
,
required
=
True
,
help
=
"Database user."
)
parser
.
add_argument
(
"-
H
"
,
"--
host
"
,
dest
=
"
host"
,
required
=
True
,
help
=
"Database host
."
)
parser
.
add_argument
(
"-
H
"
,
"--
host
"
,
dest
=
"
host"
,
default
=
"localhost"
,
help
=
"Database host. (default: %(default)s)
"
)
parser
.
add_argument
(
"-u"
,
"--username"
,
dest
=
"username"
,
help
=
"Database user
name (if required)
."
)
parser
.
add_argument
(
"-
p
"
,
"--
password
"
,
dest
=
"
password"
,
help
=
"Database password (if required)
."
)
parser
.
add_argument
(
"-i"
,
"--id"
,
dest
=
"gridfs_id"
,
required
=
True
,
help
=
"GridFS ID."
)
parser
.
add_argument
(
"-d"
,
"--destination"
,
dest
=
"destination"
,
required
=
True
,
help
=
"Local path to file destination."
)
parser
.
add_argument
(
"-r"
,
"--retries"
,
dest
=
"retries"
,
default
=
3
,
type
=
int
,
help
=
"Number of retries."
)
...
...
scripts/chip-job-save-result
View file @
3ea96f58
...
...
@@ -5,9 +5,9 @@ import yaml
import
os
parser
=
argparse
.
ArgumentParser
(
description
=
"Insert a bed file into the database."
)
parser
.
add_argument
(
"-
p
"
,
"--
password
"
,
dest
=
"
password"
,
required
=
True
,
help
=
"Database user
password
."
)
parser
.
add_argument
(
"-
u
"
,
"--
username
"
,
dest
=
"
username"
,
required
=
True
,
help
=
"Database user
."
)
parser
.
add_argument
(
"-d"
,
"--host"
,
dest
=
"host"
,
required
=
True
,
help
=
"Database host."
)
parser
.
add_argument
(
"-
u
"
,
"--
username
"
,
dest
=
"
username"
,
help
=
"Database user
name (if required)
."
)
parser
.
add_argument
(
"-
p
"
,
"--
password
"
,
dest
=
"
password"
,
help
=
"Database password (if required)
."
)
parser
.
add_argument
(
"-d"
,
"--host"
,
dest
=
"host"
,
default
=
"localhost"
,
help
=
"Database host.
(default: %(default)s)
"
)
parser
.
add_argument
(
"-f"
,
"--file"
,
dest
=
"file"
,
required
=
True
,
help
=
"Path to result file."
)
parser
.
add_argument
(
"-m"
,
"--meta"
,
dest
=
"meta"
,
required
=
True
,
help
=
"Path to meta yaml file."
)
args
=
parser
.
parse_args
()
...
...
scripts/chip-meta-download
View file @
3ea96f58
...
...
@@ -12,6 +12,7 @@ import datetime
parser
=
argparse
.
ArgumentParser
(
description
=
"Download raw JSON for all experiments."
)
parser
.
add_argument
(
"-o"
,
"--output-dir"
,
dest
=
"outputdir"
,
default
=
os
.
getcwd
(),
help
=
"Output directory. (default: %(default)s)"
)
parser
.
add_argument
(
"-q"
,
"--quiet"
,
action
=
'store_true'
,
help
=
"Quiet mode. Do not print progress information. (default: false)"
)
args
=
parser
.
parse_args
()
encode_baseurl
=
"https://www.encodeproject.org/experiments/"
...
...
@@ -39,7 +40,8 @@ for i, exp_id in enumerate(exp_ids):
r
=
requests
.
get
(
exp_url
,
params
=
json_arg
)
with
open
(
os
.
path
.
join
(
args
.
outputdir
,
"data"
,
"%s.json"
%
(
exp_id
,)),
"w"
)
as
wh
:
wh
.
write
(
r
.
text
)
progress
(
i
,
total
)
progress
(
total
,
total
)
if
not
args
.
quiet
:
progress
(
i
,
total
)
if
not
args
.
quiet
:
progress
(
total
,
total
)
print
scripts/chip-meta-import
View file @
3ea96f58
...
...
@@ -9,18 +9,20 @@ import os
import
os.path
parser
=
argparse
.
ArgumentParser
(
description
=
"Read per-experiment JSON files and create experiment and samples collections."
)
parser
.
add_argument
(
"-H"
,
"--host"
,
dest
=
"host"
,
default
=
"
hcc-anvil-175-9.unl.edu
"
,
help
=
"Database host. (default: %(default)s)"
)
parser
.
add_argument
(
"-u"
,
"--username"
,
dest
=
"username"
,
default
=
"aknecht"
,
required
=
True
,
help
=
"Database user. (default: %(default)s)
"
)
parser
.
add_argument
(
"-p"
,
"--password"
,
dest
=
"password"
,
required
=
True
,
help
=
"Database
user
password."
)
parser
.
add_argument
(
"-H"
,
"--host"
,
dest
=
"host"
,
default
=
"
localhost
"
,
help
=
"Database host. (default: %(default)s)"
)
parser
.
add_argument
(
"-u"
,
"--username"
,
dest
=
"username"
,
help
=
"Database username (if required).
"
)
parser
.
add_argument
(
"-p"
,
"--password"
,
dest
=
"password"
,
help
=
"Database password
(if required)
."
)
parser
.
add_argument
(
"-i"
,
"--input-dir"
,
dest
=
"inputdir"
,
default
=
os
.
getcwd
(),
help
=
"Directory containing per-experiment JSON files. (default: %(default)s)"
)
parser
.
add_argument
(
"-d"
,
"--drop"
,
dest
=
"drop"
,
default
=
False
,
action
=
"store_true"
,
help
=
"Drop data if it exists. (default: %(default)s)"
)
parser
.
add_argument
(
"-s"
,
"--samples"
,
dest
=
"samples"
,
default
=
False
,
action
=
"store_true"
,
help
=
"Only recreate the samples collection."
)
parser
.
add_argument
(
"-q"
,
"--quiet"
,
action
=
'store_true'
,
help
=
"Quiet mode. Do not print progress information. (default: false)"
)
args
=
parser
.
parse_args
()
client
=
MongoClient
(
args
.
host
)
db
=
client
.
chipseq
db
.
authenticate
(
args
.
username
,
args
.
password
,
mechanism
=
"SCRAM-SHA-1"
)
if
args
.
username
:
db
.
authenticate
(
args
.
username
,
args
.
password
,
mechanism
=
"SCRAM-SHA-1"
)
if
args
.
drop
:
if
not
args
.
samples
:
...
...
@@ -30,7 +32,8 @@ if args.drop:
if
not
args
.
samples
:
# Insert all experiment JSON files into DB
json_filelist
=
os
.
listdir
(
args
.
inputdir
)
if
not
args
.
quiet
:
print
"Creating experiments collection..."
for
i
,
json_file
in
enumerate
(
json_filelist
):
if
json_file
.
endswith
(
"json"
):
exp_file_contents
=
open
(
os
.
path
.
join
(
args
.
inputdir
,
json_file
)).
read
()
...
...
@@ -42,7 +45,8 @@ if not args.samples:
db
.
experiments
.
insert_one
(
exp_json_data
)
except
Exception
:
pass
progress
(
i
,
len
(
json_filelist
))
if
not
args
.
quiet
:
progress
(
i
,
len
(
json_filelist
))
organism
=
{
"mm"
:
"mouse"
,
...
...
@@ -59,6 +63,8 @@ cursor = db.experiments.find({
total
=
cursor
.
count
()
# Create the samples collection using the 'files' value from each experiment
if
not
args
.
quiet
:
print
"
\n
Creating samples collection..."
for
i
,
document
in
enumerate
(
cursor
):
for
f
in
document
[
"files"
]:
doc
=
{}
...
...
@@ -84,8 +90,8 @@ for i, document in enumerate(cursor):
db
.
samples
.
update_one
({
'_id'
:
result
.
inserted_id
},
{
"$set"
:
f
})
except
Exception
:
pass
progress
(
i
,
total
)
if
not
args
.
quiet
:
progress
(
i
,
total
)
progress
(
total
,
total
)
print
cursor
.
close
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment