db.py 7.13 KB
Newer Older
aknecht2's avatar
aknecht2 committed
1
from pymongo import MongoClient
2
3
4
import gridfs
import sys
import traceback
5
from pprint import pprint
aknecht2's avatar
aknecht2 committed
6

7

8
9
10
class MongoDB(object):

    def __init__(self, host, username, password):
11
12
        self.client = MongoClient(host)
        self.db = self.client.chipseq
13
        try:
14
            self.db.authenticate(username, password, mechanism="SCRAM-SHA-1")
15
16
17
18
19
20
21
        except:
            print("Could not authenticate to db %s!" % (host,))
            print traceback.format_exc()
            sys.exit(1)
        self.gfs = gridfs.GridFS(self.db)
        return

22
    def load_bed(self, collection, result_id, bed_file, attributes={}):
23
24
25

        return

26
27
28
29
30
31
32
33
34
35
36
37
    def check_valid_samples(self):
        cursor = self.db.experiments.aggregate([
            {
                "$match": {
                    "target": {"$exists": True},
                    "revoked_files.0": {"$exists": False},
                    "assembly.0": {"$exists": True},
                    "assembly.1": {"$exists": False}
                }
            },
            {
                "$lookup": {
38
                    "from": "samples",
39
                    "localField": "uuid",
40
41
42
43
44
45
46
47
48
49
50
51
52
                    "foreignField": "experiment_id",
                    "as": "samples"
                }
            }
        ])
        total = 0
        has_samples = 0
        for document in cursor:
            total += 1
            if len(document["samples"]) > 0:
                has_samples += 1
        return (has_samples, total)

53
54
55
56
57
58
59
60
61
62
63
64
65
66
    def get_assembly(self, experiment_id):
        valid = True
        msg = ""
        data = ""
        cursor = self.db.experiments.find({
            "target": {"$exists": True},
            "revoked_files.0": {"$exists": False},
            "assembly.0": {"$exists": True},
            "assembly.1": {"$exists": False},
            "@id": "/experiments/%s/" % (experiment_id,)
        })
        if cursor.count() == 1:
            document = cursor.next()
            data = document["assembly"][0]
aknecht2's avatar
aknecht2 committed
67
            msg = "Succesfully retrieved assembly for experiment with id '%s'.\n" % (experiment_id,)
68
69
        else:
            valid = False
aknecht2's avatar
aknecht2 committed
70
            msg = "Experiment with id '%s' does not exist.\n" % (experiment_id,)
71
72
        return (valid, msg, data)

73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
    def get_samples(self, experiment_id):
        valid = True
        msg = ""
        data = {}
        # First, check to make sure the target experiment exists.
        check = self.db.experiments.find({
            "@id": "/experiments/%s/" % (experiment_id,)
        })
        if check.count() == 1:
            # Next, we check that all metadata is defined
            check2 = self.db.experiments.find({
                "target": {"$exists": True},
                "revoked_files.0": {"$exists": False},
                "@id": "/experiments/%s/" % (experiment_id,)
            })
            if check2.count() == 1:
                # Next, we check that there is a least 1 possible control
                check3 = self.db.experiments.find({
                    "target": {"$exists": True},
                    "revoked_files.0": {"$exists": False},
                    "assembly.0": {"$exists": True},
                    "assembly.1": {"$exists": False},
                    "possible_controls.0": {"$exists": True},
                    "@id": "/experiments/%s/" % (experiment_id,)
                })
                if check3.count() == 1:
                    # Complicated aggregtaion pipeline does the following steps:
                    # 1. Find the experiment that matches the given id
                    # 2. Join samples into the collection by exp_id
                    # 3. Iterate through possible_controls
                    # 4. Join possible_control data into control_exps
                    # 5. Iterate through control_exps
                    # 6. Join samples into the control_exps by exp_id
                    # 7. Re-aggregate all data into arrays
107
                    pipeline = [
108
109
110
111
112
113
114
115
116
117
118
119
                        {
                            "$match": {
                                "target": {"$exists": True},
                                "revoked_files.0": {"$exists": False},
                                "assembly.0": {"$exists": True},
                                "assembly.1": {"$exists": False},
                                "possible_controls.0": {"$exists": True},
                                "@id": "/experiments/%s/" % (experiment_id,)
                            }
                        },
                        {
                            "$lookup": {
120
                                "from": "samples",
121
                                "localField": "uuid",
122
123
124
125
126
127
128
129
130
                                "foreignField": "experiment_id",
                                "as": "samples"
                            }
                        },
                        {
                            "$unwind": "$possible_controls"
                        },
                        {
                            "$lookup": {
131
                                "from": "samples",
132
                                "localField": "possible_controls.uuid",
133
                                "foreignField": "experiment_id",
134
                                "as": "possible_controls.samples"
135
136
137
138
139
140
141
142
143
                            }
                        },
                        {
                            "$group": {
                                "_id": "$_id",
                                "possible_controls": {"$push": "$possible_controls"},
                                "samples": {"$push": "$samples"}
                            }
                        }
144
145
                    ]
                    cursor = self.db.experiments.aggregate(pipeline)
146
147
                    # We should have only 1 document
                    document = cursor.next()
148
149
                    control_inputs = [sample for control in document["possible_controls"] for sample in control["samples"] if ("file_type" in sample and sample["file_type"] == "fastq")]
                    experiment_inputs = [sample for sample in document["samples"][0] if ("file_type" in sample and sample["file_type"] == "fastq")]
150
                    if (len(control_inputs) > 0 and len(experiment_inputs) > 0):
aknecht2's avatar
aknecht2 committed
151
                        msg = "Succesfully retrieved input files for experiment with id '%s'.\n" % (experiment_id,)
152
153
154
155
156
157
                        data = {
                            "control": control_inputs,
                            "experiment": experiment_inputs
                        }
                    else:
                        valid = False
aknecht2's avatar
aknecht2 committed
158
                        msg = "Experiment with id '%s' has %s possible control inputs, and %s possible experiment inputs.\n" % (experiment_id, len(control_inputs), len(experiment_inputs))
159
160
                else:
                    valid = False
aknecht2's avatar
aknecht2 committed
161
                    msg = "Experiment with id '%s' does not have possible_controls.\n" % (experiment_id,)
162
163
            else:
                valid = False
aknecht2's avatar
aknecht2 committed
164
                msg = "Experiment with id '%s' does not have all required metadata (target exists and no revoked_files).\n" % (experiment_id,)
165
166
        else:
            valid = False
aknecht2's avatar
aknecht2 committed
167
            msg = "Experiment with id '%s' does not exist.\n" % (experiment_id,)
168
        return (valid, msg, data)