Skip to content
Snippets Groups Projects
Commit 73a0f8c1 authored by aknecht2's avatar aknecht2
Browse files

Added valid_sample checking in database. Fixed db_save_results to use sample...

Added valid_sample checking in database.  Fixed db_save_results to use sample ids.  Adjusted memory request usage for spp & db_save.
parent fc76e368
No related branches found
No related tags found
No related merge requests found
...@@ -39,8 +39,9 @@ class MongoDB(object): ...@@ -39,8 +39,9 @@ class MongoDB(object):
# Make sure output_file exists # Make sure output_file exists
if os.path.isfile(output_file): if os.path.isfile(output_file):
# Make sure that all control_ids & experiment_ids are valid # Make sure that all control_ids & experiment_ids are valid
valid_controls = [self.is_valid_experiment(cid) for cid in control_ids] # REMEMBER, these are ids for control & experiment SAMPLES
valid_experiments = [self.is_valid_experiment(eid) for eid in experiment_ids] valid_controls = [self.is_valid_sample(cid) for cid in control_ids]
valid_experiments = [self.is_valid_sample(eid) for eid in experiment_ids]
if all(valid_controls) and all(valid_experiments): if all(valid_controls) and all(valid_experiments):
# First, we load the output file into gfs # First, we load the output file into gfs
with open(output_file, "r") as rh: with open(output_file, "r") as rh:
...@@ -61,9 +62,7 @@ class MongoDB(object): ...@@ -61,9 +62,7 @@ class MongoDB(object):
else: else:
msg = "Not all input ids are valid. The following are invalid: " msg = "Not all input ids are valid. The following are invalid: "
for id_list, valid_list in zip([control_ids, experiment_ids], [valid_controls, valid_experiments]): for id_list, valid_list in zip([control_ids, experiment_ids], [valid_controls, valid_experiments]):
for i, valid in enumerate(valid_list): msg += ",".join([id_list[i] for i, valid in enumerate(valid_list) if not valid])
if not valid:
msg += id_list[i] + ", "
else: else:
msg = "Specified output_file %s does not exist." % (output_file,) msg = "Specified output_file %s does not exist." % (output_file,)
return (False, msg, None) return (False, msg, None)
...@@ -77,6 +76,7 @@ class MongoDB(object): ...@@ -77,6 +76,7 @@ class MongoDB(object):
# chr, start, end, name, score, strand # chr, start, end, name, score, strand
# Load data using a list comprehension over lines, # Load data using a list comprehension over lines,
# then insert with insert_many() # then insert with insert_many()
print "loading bed_data..."
with open(bed_file, "r") as rh: with open(bed_file, "r") as rh:
bed_data = [ bed_data = [
{ {
...@@ -92,6 +92,7 @@ class MongoDB(object): ...@@ -92,6 +92,7 @@ class MongoDB(object):
for line_info in (line.split(),) for line_info in (line.split(),)
] ]
try: try:
print "bed data loaded, inserting."
self.db.bed.insert_many(bed_data) self.db.bed.insert_many(bed_data)
return (True, "Bed file successfully inserted.", result_id) return (True, "Bed file successfully inserted.", result_id)
except pymongo.errors.OperationFailure as e: except pymongo.errors.OperationFailure as e:
...@@ -132,6 +133,17 @@ class MongoDB(object): ...@@ -132,6 +133,17 @@ class MongoDB(object):
msg = "Error inserting peak_file %s: %s" % (peak_file, e) msg = "Error inserting peak_file %s: %s" % (peak_file, e)
return (valid, msg, None) return (valid, msg, None)
def is_valid_sample(self, sample_accession):
try:
cursor = self.db.samples.find({
"accession": sample_accession
})
if cursor.count() == 1:
return True
except pymongo.errors.OperationFailure as e:
print "Error with sample_accession %s: %s" % (sample_accession, e)
return False
def is_valid_experiment(self, experiment_id): def is_valid_experiment(self, experiment_id):
try: try:
cursor = self.db.experiments.find({ cursor = self.db.experiments.find({
......
...@@ -40,5 +40,5 @@ db_save_result: ...@@ -40,5 +40,5 @@ db_save_result:
has_value: true has_value: true
default: $inputs.4 default: $inputs.4
walltime: 2000 walltime: 2000
memory: 2000 memory: 16000
cores: 1 cores: 1
...@@ -65,5 +65,5 @@ r_spp_nodups: ...@@ -65,5 +65,5 @@ r_spp_nodups:
has_value: true has_value: true
default: 8 default: 8
walltime: 2000 walltime: 2000
memory: 8000 memory: 16000
cores: 8 cores: 8
#!/usr/bin/env python
import chipathlon.db import chipathlon.db
import argparse import argparse
import yaml import yaml
...@@ -15,7 +16,13 @@ if os.path.isfile(args.file) and os.path.isfile(args.meta): ...@@ -15,7 +16,13 @@ if os.path.isfile(args.file) and os.path.isfile(args.meta):
mdb = chipathlon.db.MongoDB(args.host, args.username, args.password) mdb = chipathlon.db.MongoDB(args.host, args.username, args.password)
with open(args.meta, "r") as rh: with open(args.meta, "r") as rh:
meta = yaml.load(rh) meta = yaml.load(rh)
valid = False
msg = ""
if meta["result_type"] == "bed": if meta["result_type"] == "bed":
mdb.save_bed(args.file, meta["control_ids"], meta["experiment_ids"], meta) valid, msg, data = mdb.save_bed(args.file, meta["control_ids"], meta["experiment_ids"], meta)
elif meta["result_type"] == "peak": elif meta["result_type"] == "peak":
mdb.save_peak(args.file, meta["control_ids"], meta["experiment_ids"], meta) valid, msg, data = mdb.save_peak(args.file, meta["control_ids"], meta["experiment_ids"], meta)
print msg
else:
print "Either input file %s or meta file %s does not exist." % (args.file, args.meta)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment