Skip to content
Snippets Groups Projects
Commit 1fa9a9d1 authored by aknecht2's avatar aknecht2
Browse files

Merge branch '34-mongodb-auth-should-be-optional' into 'master'

Resolve "MongoDB auth should be optional"

Closes #34

See merge request !33
parents 220af4bf ffbdf207
Branches docs
No related tags found
1 merge request!33Resolve "MongoDB auth should be optional"
......@@ -11,7 +11,20 @@ import chipathlon.conf
from pprint import pprint
import hashlib
from chipathlon.utils import progress
import bson
def download_from_gridfs(host, gridfs_id, local_path, username=None, password=None, retries=3, overwrite=True, checkmd5=False):
mdb = chipathlon.db.MongoDB(host, username, password)
if not os.path.isfile(local_path) or overwrite:
for i in range(0, retries):
print "Attempt #%s, downloading file with ID '%s' to '%s'" % (i + 1, gridfs_id, local_path)
if mdb.fetch_from_gridfs(bson.objectid.ObjectId(gridfs_id), localpath, checkmd5):
return True
else:
print "Download attempt #%s from GridFS failed, retrying..." % (i + 1)
else:
print "File already exists, skipping download.\n"
return False
class MongoDB(object):
"""
......@@ -30,7 +43,7 @@ class MongoDB(object):
operations much easier.
"""
def __init__(self, host, username, password, debug=False):
def __init__(self, host="localhost", username=None, password=None, debug=False):
self.debug = debug
self.host = host
self.username = username
......@@ -38,12 +51,13 @@ class MongoDB(object):
self.client = MongoClient(host)
self.db = self.client.chipseq
self.cache = collections.defaultdict(dict)
try:
self.db.authenticate(username, password, mechanism="SCRAM-SHA-1")
except:
print("Could not authenticate to db %s!" % (host,))
print traceback.format_exc()
sys.exit(1)
if username and password:
try:
self.db.authenticate(username, password, mechanism="SCRAM-SHA-1")
except:
print("Could not authenticate to db %s!" % (host,))
print traceback.format_exc()
sys.exit(1)
self.gfs = gridfs.GridFS(self.db)
return
......
......@@ -3,8 +3,6 @@ import hashlib
import urllib2
import os
import traceback
import chipathlon.db
import bson.objectid
def progress(current, end, length=20):
percent = float(current) / end
......@@ -50,21 +48,6 @@ def downloadFile(url, localpath, urltype="http://", retries=3, overwrite=True, c
print "File already exists, skipping download.\n"
return
def downloadFromGridFS(hostname, username, password, gridfs_id, localpath, retries=3, overwrite=True, checkmd5=False):
success = False
mdb = chipathlon.db.MongoDB(hostname, username, password)
if not os.path.isfile(localpath) or overwrite:
for i in range(0, retries):
print "Attempt #%s, downloading file with ID '%s' to '%s'" % (i + 1, gridfs_id, localpath)
if mdb.fetch_from_gridfs(bson.objectid.ObjectId(gridfs_id), localpath, checkmd5):
success = True
break
else:
print "Download attempt #%s from GridFS failed, retrying..." % (i + 1)
else:
print "File already exists, skipping download.\n"
return success
# http://pythoncentral.io/how-to-check-if-a-string-is-a-number-in-python-including-unicode/
def is_number(s):
try:
......
No preview for this file type
......@@ -6,9 +6,9 @@ import yaml
import pprint
parser = argparse.ArgumentParser(description="Create a run file from a list of experiment accessions.")
parser.add_argument("-H", "--host", dest="host", required=True, help="Database host.")
parser.add_argument("-u", "--username", dest="username", required=True, help="Database user.")
parser.add_argument("-p", "--password", dest="password", required=True, help="Database user password.")
parser.add_argument("-H", "--host", dest="host", default="localhost", help="Database host. (default: %(default)s)")
parser.add_argument("-u", "--username", dest="username", help="Database username (if required).")
parser.add_argument("-p", "--password", dest="password", help="Database password (if required).")
parser.add_argument("-n", "--name", dest="name", required=True, help="Name of the run file to create.")
parser.add_argument("-f", "--file_type", dest="file_type", default="fastq", help="Type of files to extract (fastq or bam).")
......
......@@ -3,9 +3,9 @@ from chipathlon.workflow import Workflow
import argparse
parser = argparse.ArgumentParser(description="Generates a workflow from a run, param & config file.")
parser.add_argument("-H", "--host", dest="host", required=True, help="Database host.")
parser.add_argument("-u", "--username", dest="username", required=True, help="Database user.")
parser.add_argument("-p", "--password", dest="password", required=True, help="Database user password.")
parser.add_argument("-H", "--host", dest="host", default="localhost", help="Database host. (default: %(default)s)")
parser.add_argument("-u", "--username", dest="username", help="Database username (if required).")
parser.add_argument("-p", "--password", dest="password", help="Database password (if required).")
parser.add_argument("-d", "--dir", dest="dir", required=True, help="Directory name to generate files in.")
parser.add_argument("--param", dest="param", required=True, help="Path to param file to load.")
parser.add_argument("--conf", dest="config", required=True, help="Path to config file to load.")
......@@ -25,9 +25,9 @@ workflow = Workflow(
args.param,
args.config,
args.properties,
args.host,
args.username,
args.password,
host=args.host,
username=args.username,
password=args.password,
execute_site=args.execute_site,
output_site=args.output_site,
save_db=args.save_db,
......
#!/usr/bin/env python
import chipathlon.utils
import chipathlon.db
import argparse
parser = argparse.ArgumentParser(description="Download target file from GridFS.")
parser.add_argument("-p", "--password", dest="password", required=True, help="Database user password.")
parser.add_argument("-u", "--username", dest="username", required=True, help="Database user.")
parser.add_argument("-H", "--host", dest="host", required=True, help="Database host.")
parser.add_argument("-H", "--host", dest="host", default="localhost", help="Database host. (default: %(default)s)")
parser.add_argument("-u", "--username", dest="username", help="Database username (if required).")
parser.add_argument("-p", "--password", dest="password", help="Database password (if required).")
parser.add_argument("-i", "--id", dest="gridfs_id", required=True, help="GridFS ID.")
parser.add_argument("-d", "--destination", dest="destination", required=True, help="Local path to file destination.")
parser.add_argument("-r", "--retries", dest="retries", default=3, type=int, help="Number of retries.")
parser.add_argument("-n", "--overwrite", dest="overwrite", default=True, action="store_false", help="Don't overwrite local file if it exists.")
parser.add_argument("-n", "--no-overwrite", dest="overwrite", default=True, action="store_false", help="Don't overwrite local file if it exists.")
parser.add_argument("-c", "--checkmd5", dest="checkmd5", action="store_true", help="Check md5 value of downloaded file against database value.")
args = parser.parse_args()
chipathlon.utils.downloadFromGridFS(
chipathlon.db.download_from_gridfs(
args.host,
args.username,
args.password,
args.gridfs_id,
args.destination,
args.retries,
args.overwrite,
args.checkmd5
username=args.username,
password=args.password,
retries=args.retries,
overwrite=args.overwrite,
checkmd5=args.checkmd5
)
......@@ -5,9 +5,9 @@ import yaml
import os
parser = argparse.ArgumentParser(description="Insert a bed file into the database.")
parser.add_argument("-p", "--password", dest="password", required=True, help="Database user password.")
parser.add_argument("-u", "--username", dest="username", required=True, help="Database user.")
parser.add_argument("-d", "--host", dest="host", required=True, help="Database host.")
parser.add_argument("-u", "--username", dest="username", help="Database username (if required).")
parser.add_argument("-p", "--password", dest="password", help="Database password (if required).")
parser.add_argument("-d", "--host", dest="host", default="localhost", help="Database host. (default: %(default)s)")
parser.add_argument("-f", "--file", dest="file", required=True, help="Path to result file.")
parser.add_argument("-m", "--meta", dest="meta", required=True, help="Path to meta yaml file.")
args = parser.parse_args()
......
......@@ -12,6 +12,7 @@ import datetime
parser = argparse.ArgumentParser(description="Download raw JSON for all experiments.")
parser.add_argument("-o", "--output-dir", dest="outputdir", default=os.getcwd(), help="Output directory. (default: %(default)s)")
parser.add_argument("-q", "--quiet", action='store_true', help="Quiet mode. Do not print progress information. (default: false)")
args = parser.parse_args()
encode_baseurl = "https://www.encodeproject.org/experiments/"
......@@ -39,7 +40,8 @@ for i, exp_id in enumerate(exp_ids):
r = requests.get(exp_url, params=json_arg)
with open(os.path.join(args.outputdir, "data", "%s.json" % (exp_id,)), "w") as wh:
wh.write(r.text)
progress(i, total)
progress(total, total)
if not args.quiet:
progress(i, total)
if not args.quiet:
progress(total, total)
print
......@@ -9,18 +9,20 @@ import os
import os.path
parser = argparse.ArgumentParser(description="Read per-experiment JSON files and create experiment and samples collections.")
parser.add_argument("-H", "--host", dest="host", default="hcc-anvil-175-9.unl.edu", help="Database host. (default: %(default)s)")
parser.add_argument("-u", "--username", dest="username", default="aknecht", required=True, help="Database user. (default: %(default)s)")
parser.add_argument("-p", "--password", dest="password", required=True, help="Database user password.")
parser.add_argument("-H", "--host", dest="host", default="localhost", help="Database host. (default: %(default)s)")
parser.add_argument("-u", "--username", dest="username", help="Database username (if required).")
parser.add_argument("-p", "--password", dest="password", help="Database password (if required).")
parser.add_argument("-i", "--input-dir", dest="inputdir", default=os.getcwd(), help="Directory containing per-experiment JSON files. (default: %(default)s)")
parser.add_argument("-d", "--drop", dest="drop", default=False, action="store_true", help="Drop data if it exists. (default: %(default)s)")
parser.add_argument("-s", "--samples", dest="samples", default=False, action="store_true", help="Only recreate the samples collection.")
parser.add_argument("-q", "--quiet", action='store_true', help="Quiet mode. Do not print progress information. (default: false)")
args = parser.parse_args()
client = MongoClient(args.host)
db = client.chipseq
db.authenticate(args.username, args.password, mechanism="SCRAM-SHA-1")
if args.username:
db.authenticate(args.username, args.password, mechanism="SCRAM-SHA-1")
if args.drop:
if not args.samples:
......@@ -30,7 +32,8 @@ if args.drop:
if not args.samples:
# Insert all experiment JSON files into DB
json_filelist = os.listdir(args.inputdir)
if not args.quiet:
print "Creating experiments collection..."
for i, json_file in enumerate(json_filelist):
if json_file.endswith("json"):
exp_file_contents = open(os.path.join(args.inputdir, json_file)).read()
......@@ -42,7 +45,8 @@ if not args.samples:
db.experiments.insert_one(exp_json_data)
except Exception:
pass
progress(i, len(json_filelist))
if not args.quiet:
progress(i, len(json_filelist))
organism = {
"mm": "mouse",
......@@ -59,6 +63,8 @@ cursor = db.experiments.find({
total = cursor.count()
# Create the samples collection using the 'files' value from each experiment
if not args.quiet:
print "\nCreating samples collection..."
for i, document in enumerate(cursor):
for f in document["files"]:
doc = {}
......@@ -84,8 +90,8 @@ for i, document in enumerate(cursor):
db.samples.update_one({'_id': result.inserted_id}, {"$set": f})
except Exception:
pass
progress(i, total)
if not args.quiet:
progress(i, total)
progress(total, total)
print
cursor.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment