Commit 3ea96f58 authored by aknecht2's avatar aknecht2
Browse files

Initial master merge into optional mongo branch.

parent 220af4bf
......@@ -30,7 +30,7 @@ class MongoDB(object):
operations much easier.
"""
def __init__(self, host, username, password, debug=False):
def __init__(self, host="localhost", username=None, password=None, debug=False):
self.debug = debug
self.host = host
self.username = username
......@@ -38,12 +38,14 @@ class MongoDB(object):
self.client = MongoClient(host)
self.db = self.client.chipseq
self.cache = collections.defaultdict(dict)
try:
self.db.authenticate(username, password, mechanism="SCRAM-SHA-1")
except:
print("Could not authenticate to db %s!" % (host,))
print traceback.format_exc()
sys.exit(1)
self.cache = {}
if username and password:
try:
self.db.authenticate(username, password, mechanism="SCRAM-SHA-1")
except:
print("Could not authenticate to db %s!" % (host,))
print traceback.format_exc()
sys.exit(1)
self.gfs = gridfs.GridFS(self.db)
return
......
No preview for this file type
......@@ -6,9 +6,9 @@ import yaml
import pprint
parser = argparse.ArgumentParser(description="Create a run file from a list of experiment accessions.")
parser.add_argument("-H", "--host", dest="host", required=True, help="Database host.")
parser.add_argument("-u", "--username", dest="username", required=True, help="Database user.")
parser.add_argument("-p", "--password", dest="password", required=True, help="Database user password.")
parser.add_argument("-H", "--host", dest="host", default="localhost", help="Database host. (default: %(default)s)")
parser.add_argument("-u", "--username", dest="username", help="Database username (if required).")
parser.add_argument("-p", "--password", dest="password", help="Database password (if required).")
parser.add_argument("-n", "--name", dest="name", required=True, help="Name of the run file to create.")
parser.add_argument("-f", "--file_type", dest="file_type", default="fastq", help="Type of files to extract (fastq or bam).")
......
......@@ -3,9 +3,9 @@ from chipathlon.workflow import Workflow
import argparse
parser = argparse.ArgumentParser(description="Generates a workflow from a run, param & config file.")
parser.add_argument("-H", "--host", dest="host", required=True, help="Database host.")
parser.add_argument("-u", "--username", dest="username", required=True, help="Database user.")
parser.add_argument("-p", "--password", dest="password", required=True, help="Database user password.")
parser.add_argument("-H", "--host", dest="host", default="localhost", help="Database host. (default: %(default)s)")
parser.add_argument("-u", "--username", dest="username", help="Database username (if required).")
parser.add_argument("-p", "--password", dest="password", help="Database password (if required).")
parser.add_argument("-d", "--dir", dest="dir", required=True, help="Directory name to generate files in.")
parser.add_argument("--param", dest="param", required=True, help="Path to param file to load.")
parser.add_argument("--conf", dest="config", required=True, help="Path to config file to load.")
......
......@@ -3,9 +3,9 @@ import chipathlon.utils
import argparse
parser = argparse.ArgumentParser(description="Download target file from GridFS.")
parser.add_argument("-p", "--password", dest="password", required=True, help="Database user password.")
parser.add_argument("-u", "--username", dest="username", required=True, help="Database user.")
parser.add_argument("-H", "--host", dest="host", required=True, help="Database host.")
parser.add_argument("-H", "--host", dest="host", default="localhost", help="Database host. (default: %(default)s)")
parser.add_argument("-u", "--username", dest="username", help="Database username (if required).")
parser.add_argument("-p", "--password", dest="password", help="Database password (if required).")
parser.add_argument("-i", "--id", dest="gridfs_id", required=True, help="GridFS ID.")
parser.add_argument("-d", "--destination", dest="destination", required=True, help="Local path to file destination.")
parser.add_argument("-r", "--retries", dest="retries", default=3, type=int, help="Number of retries.")
......
......@@ -5,9 +5,9 @@ import yaml
import os
parser = argparse.ArgumentParser(description="Insert a bed file into the database.")
parser.add_argument("-p", "--password", dest="password", required=True, help="Database user password.")
parser.add_argument("-u", "--username", dest="username", required=True, help="Database user.")
parser.add_argument("-d", "--host", dest="host", required=True, help="Database host.")
parser.add_argument("-u", "--username", dest="username", help="Database username (if required).")
parser.add_argument("-p", "--password", dest="password", help="Database password (if required).")
parser.add_argument("-d", "--host", dest="host", default="localhost", help="Database host. (default: %(default)s)")
parser.add_argument("-f", "--file", dest="file", required=True, help="Path to result file.")
parser.add_argument("-m", "--meta", dest="meta", required=True, help="Path to meta yaml file.")
args = parser.parse_args()
......
......@@ -12,6 +12,7 @@ import datetime
parser = argparse.ArgumentParser(description="Download raw JSON for all experiments.")
parser.add_argument("-o", "--output-dir", dest="outputdir", default=os.getcwd(), help="Output directory. (default: %(default)s)")
parser.add_argument("-q", "--quiet", action='store_true', help="Quiet mode. Do not print progress information. (default: false)")
args = parser.parse_args()
encode_baseurl = "https://www.encodeproject.org/experiments/"
......@@ -39,7 +40,8 @@ for i, exp_id in enumerate(exp_ids):
r = requests.get(exp_url, params=json_arg)
with open(os.path.join(args.outputdir, "data", "%s.json" % (exp_id,)), "w") as wh:
wh.write(r.text)
progress(i, total)
progress(total, total)
if not args.quiet:
progress(i, total)
if not args.quiet:
progress(total, total)
print
......@@ -9,18 +9,20 @@ import os
import os.path
parser = argparse.ArgumentParser(description="Read per-experiment JSON files and create experiment and samples collections.")
parser.add_argument("-H", "--host", dest="host", default="hcc-anvil-175-9.unl.edu", help="Database host. (default: %(default)s)")
parser.add_argument("-u", "--username", dest="username", default="aknecht", required=True, help="Database user. (default: %(default)s)")
parser.add_argument("-p", "--password", dest="password", required=True, help="Database user password.")
parser.add_argument("-H", "--host", dest="host", default="localhost", help="Database host. (default: %(default)s)")
parser.add_argument("-u", "--username", dest="username", help="Database username (if required).")
parser.add_argument("-p", "--password", dest="password", help="Database password (if required).")
parser.add_argument("-i", "--input-dir", dest="inputdir", default=os.getcwd(), help="Directory containing per-experiment JSON files. (default: %(default)s)")
parser.add_argument("-d", "--drop", dest="drop", default=False, action="store_true", help="Drop data if it exists. (default: %(default)s)")
parser.add_argument("-s", "--samples", dest="samples", default=False, action="store_true", help="Only recreate the samples collection.")
parser.add_argument("-q", "--quiet", action='store_true', help="Quiet mode. Do not print progress information. (default: false)")
args = parser.parse_args()
client = MongoClient(args.host)
db = client.chipseq
db.authenticate(args.username, args.password, mechanism="SCRAM-SHA-1")
if args.username:
db.authenticate(args.username, args.password, mechanism="SCRAM-SHA-1")
if args.drop:
if not args.samples:
......@@ -30,7 +32,8 @@ if args.drop:
if not args.samples:
# Insert all experiment JSON files into DB
json_filelist = os.listdir(args.inputdir)
if not args.quiet:
print "Creating experiments collection..."
for i, json_file in enumerate(json_filelist):
if json_file.endswith("json"):
exp_file_contents = open(os.path.join(args.inputdir, json_file)).read()
......@@ -42,7 +45,8 @@ if not args.samples:
db.experiments.insert_one(exp_json_data)
except Exception:
pass
progress(i, len(json_filelist))
if not args.quiet:
progress(i, len(json_filelist))
organism = {
"mm": "mouse",
......@@ -59,6 +63,8 @@ cursor = db.experiments.find({
total = cursor.count()
# Create the samples collection using the 'files' value from each experiment
if not args.quiet:
print "\nCreating samples collection..."
for i, document in enumerate(cursor):
for f in document["files"]:
doc = {}
......@@ -84,8 +90,8 @@ for i, document in enumerate(cursor):
db.samples.update_one({'_id': result.inserted_id}, {"$set": f})
except Exception:
pass
progress(i, total)
if not args.quiet:
progress(i, total)
progress(total, total)
print
cursor.close()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment