from pymongo import MongoClient
import argparse
import json
import sys
def progress(current, end):
percent = float(current) / end
hashes = "#" * int(round(percent * 20))
spaces = " " * (20 - len(hashes))
sys.stdout.write("\rProcessed %s / %s entries. [%s] %s%%" % (current, end, hashes + spaces, int(round(percent * 100))))
parser = argparse.ArgumentParser(description = "Import all test data into the database.")
parser.add_argument("--password", dest="password", required=True, help="Database user password.")
parser.add_argument("--username", dest="username", default="aknecht", required=True, help="Database user.")
parser.add_argument("--host", dest="host", default="", required=True, help="Database host.")
parser.add_argument("--drop", dest="drop", default=False, action="store_true", help="Drop data if it exists.")
args = parser.parse_args()
# Everything in a db named chipseq
# Set up connection and authenticate
client = MongoClient(
db = client.chipseq
db.authenticate(args.username, args.password, mechanism="SCRAM-SHA-1")
if args.drop:
# Insert all experiment metadata, expected in meta/meta_clean.json
with open("meta/meta_clean.json", "r") as rh:
data = json.load(rh)
exp_ids = db.experiments.insert_many(data)
organism = {
"mm": "mouse",
"hg": "human",
"ce": "celegans"
# Loop through experiments to create samples
cursor = db.experiments.find({
"target": {"$exists": True},
"revoked_files.0": {"$exists": False},
"assembly.0": {"$exists": True},
"assembly.1": {"$exists": False}
total = cursor.count()
for i,document in enumerate(cursor):
for f in document["files"]:
doc = {}
doc["experiment_id"] = document["_id"]
doc["organism"] = organism[document["assembly"][0][:2]]
doc["genome"] = document["assembly"][0]
if "/targets/H" in document["target"]:
doc["histone_modification"] = document["target"].split("/")[2][:2]
doc["transcription_factor"] = document["target"].split("/")[2][2:].split("-")[0]
doc["transcription_factor"] = document["target"].split("/")[2].split("-")[0]
doc["cell_type"] = document["biosample_term_name"]
doc["url"] = "" % (f,)
progress(i, total)
progress(total, total)
