Skip to content
Snippets Groups Projects

Resolve "Create result class"

Merged aknecht2 requested to merge 6-create-result-class into master
3 files
+ 186
99
Compare changes
  • Side-by-side
  • Inline
Files
3
+ 37
23
@@ -14,6 +14,9 @@ import hashlib
@@ -14,6 +14,9 @@ import hashlib
class MongoDB(object):
class MongoDB(object):
def __init__(self, host, username, password):
def __init__(self, host, username, password):
 
self.host = host
 
self.username = username
 
self.password = password
self.client = MongoClient(host)
self.client = MongoClient(host)
self.db = self.client.chipseq
self.db = self.client.chipseq
try:
try:
@@ -39,33 +42,44 @@ class MongoDB(object):
@@ -39,33 +42,44 @@ class MongoDB(object):
print "result_id %s doesn't exist." % (result_id,)
print "result_id %s doesn't exist." % (result_id,)
return
return
def check_result(self, file_name, control_sample_ids, experiment_sample_ids, ref_genome, result_type, params, debug=False):
def _get_result_query(self, result, genome):
 
query = {
 
"result_type": result.file_type,
 
"assembly": genome.assembly,
 
"timestamp": {"$exists": True},
 
"file_name": result.full_name
 
}
 
# In the case that there are 0 samples we just want to check for existence.
 
control_sample_accessions = result.get_accessions("control")
 
signal_sample_accessions = result.get_accessions("signal")
 
query["control_sample_accessions"] = {"$all": control_sample_accessions} if (len(control_sample_accessions) > 0) else {"$exists": True}
 
query["signal_sample_accessions"] = {"$all": signal_sample_accessions} if (len(signal_sample_accessions) > 0) else {"$exists": True}
 
for job in result.all_jobs:
 
job_args = job.get_db_arguments()
 
arg_keys = job_args.keys()
 
if len(arg_keys) == 0:
 
query[job.job_name] = {"$exists": True}
 
else:
 
for arg_name in arg_keys:
 
query[job.job_name + "." + arg_name] = job_args[arg_name]
 
return query
 
 
def result_exists(self, result, genome):
try:
try:
query = {
cursor = self.db.results.find(self._get_result_query(result, genome))
"result_type": result_type,
return cursor.count() > 0
"ref_genome": ref_genome,
except pymongo.errors.OperationFailure as e:
"timestamp": {"$exists": True},
print "Error checking result [%s]: %s" % (file_name, e)
"file_name": file_name
return False
}
# In the case that there are 0 samples we just want to check for existence.
query["control_sample_ids"] = {"$all": control_sample_ids} if (len(control_sample_ids) > 0) else {"$exists": True}
def get_result(self, result, genome):
query["experiment_sample_ids"] = {"$all": experiment_sample_ids} if (len(experiment_sample_ids) > 0) else {"$exists": True}
try:
for job_name in params:
cursor = self.db.results.find(self._get_result_query(result, genome))
param_keys = params[job_name].keys()
if len(param_keys) == 0:
query[job_name] = {"$exists": True}
else:
for param_name in param_keys:
query[job_name + "." + param_name] = params[job_name][param_name]
cursor = self.db.results.find(query)
if debug:
print "[DB.check_result] Query = %s" % (query,)
print "[DB.check_result] count = %s" % (cursor.count(),)
if cursor.count() > 0:
if cursor.count() > 0:
# Use the latest if available
return cursor.sort("timestamp", pymongo.DESCENDING).next()
return cursor.sort("timestamp", pymongo.DESCENDING).next()
except pymongo.errors.OperationFailure as e:
except pymongo.errors.OperationFailure as e:
print "Error with experiment_id %s: %s" % (experiment_id, e)
print "Error checking result [%s]: %s" % (file_name, e)
return None
return None
def create_result(self, output_file, control_sample_ids, experiment_sample_ids, result_type, additional_data = {}, gfs_attributes = {}):
def create_result(self, output_file, control_sample_ids, experiment_sample_ids, result_type, additional_data = {}, gfs_attributes = {}):
Loading