Commit 73a0f8c1 authored by aknecht2's avatar aknecht2
Browse files

Added valid_sample checking in database. Fixed db_save_results to use sample...

Added valid_sample checking in database.  Fixed db_save_results to use sample ids.  Adjusted memory request usage for spp & db_save.
parent fc76e368
......@@ -39,8 +39,9 @@ class MongoDB(object):
# Make sure output_file exists
if os.path.isfile(output_file):
# Make sure that all control_ids & experiment_ids are valid
valid_controls = [self.is_valid_experiment(cid) for cid in control_ids]
valid_experiments = [self.is_valid_experiment(eid) for eid in experiment_ids]
# REMEMBER, these are ids for control & experiment SAMPLES
valid_controls = [self.is_valid_sample(cid) for cid in control_ids]
valid_experiments = [self.is_valid_sample(eid) for eid in experiment_ids]
if all(valid_controls) and all(valid_experiments):
# First, we load the output file into gfs
with open(output_file, "r") as rh:
......@@ -59,11 +60,9 @@ class MongoDB(object):
result = self.db.results.insert_one(result_entry)
return (True, "Result created successfully.", result.inserted_id)
else:
msg = "Not all input ids are valid. The following are invalid:"
msg = "Not all input ids are valid. The following are invalid: "
for id_list, valid_list in zip([control_ids, experiment_ids], [valid_controls, valid_experiments]):
for i, valid in enumerate(valid_list):
if not valid:
msg += id_list[i] + ", "
msg += ",".join([id_list[i] for i, valid in enumerate(valid_list) if not valid])
else:
msg = "Specified output_file %s does not exist." % (output_file,)
return (False, msg, None)
......@@ -77,6 +76,7 @@ class MongoDB(object):
# chr, start, end, name, score, strand
# Load data using a list comprehension over lines,
# then insert with insert_many()
print "loading bed_data..."
with open(bed_file, "r") as rh:
bed_data = [
{
......@@ -92,6 +92,7 @@ class MongoDB(object):
for line_info in (line.split(),)
]
try:
print "bed data loaded, inserting."
self.db.bed.insert_many(bed_data)
return (True, "Bed file successfully inserted.", result_id)
except pymongo.errors.OperationFailure as e:
......@@ -132,6 +133,17 @@ class MongoDB(object):
msg = "Error inserting peak_file %s: %s" % (peak_file, e)
return (valid, msg, None)
def is_valid_sample(self, sample_accession):
try:
cursor = self.db.samples.find({
"accession": sample_accession
})
if cursor.count() == 1:
return True
except pymongo.errors.OperationFailure as e:
print "Error with sample_accession %s: %s" % (sample_accession, e)
return False
def is_valid_experiment(self, experiment_id):
try:
cursor = self.db.experiments.find({
......
......@@ -40,5 +40,5 @@ db_save_result:
has_value: true
default: $inputs.4
walltime: 2000
memory: 2000
memory: 16000
cores: 1
......@@ -65,5 +65,5 @@ r_spp_nodups:
has_value: true
default: 8
walltime: 2000
memory: 8000
memory: 16000
cores: 8
#!/usr/bin/env python
import chipathlon.db
import argparse
import yaml
......@@ -15,7 +16,13 @@ if os.path.isfile(args.file) and os.path.isfile(args.meta):
mdb = chipathlon.db.MongoDB(args.host, args.username, args.password)
with open(args.meta, "r") as rh:
meta = yaml.load(rh)
valid = False
msg = ""
if meta["result_type"] == "bed":
mdb.save_bed(args.file, meta["control_ids"], meta["experiment_ids"], meta)
valid, msg, data = mdb.save_bed(args.file, meta["control_ids"], meta["experiment_ids"], meta)
elif meta["result_type"] == "peak":
mdb.save_peak(args.file, meta["control_ids"], meta["experiment_ids"], meta)
valid, msg, data = mdb.save_peak(args.file, meta["control_ids"], meta["experiment_ids"], meta)
print msg
else:
print "Either input file %s or meta file %s does not exist." % (args.file, args.meta)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment