download_generator.py 2.23 KB
Newer Older
aknecht2's avatar
aknecht2 committed
1
from module_generator import ModuleGenerator
2
3
4

class DownloadGenerator(ModuleGenerator):

5
    def __init__(self, master_files, workflow_module, run_data, debug = False):
6
7
8
9
10
11
12
13
14
15
        """
        :param master_files: The dictionary mapping file name -> file object.
        :type master_files: dict
        :param workflow_module: The actual module being used.
        :type workflow_module: chipathlon.workflow_module.WorkflowModule
        :param run_data: Input sample data.
        :type run_data: chipathlon.run_data.RunData
        :param debug: If true, prints out params for each job & module.
        :type debug: bool
        """
16
17
        super(DownloadGenerator, self).__init__(master_files, workflow_module, run_data, debug)
        self.module_name = "download"
18
19
        return

20
    def parse_run(self, run_index):
21
22
23
        """
        :param run_index: The index of the run in the yaml file.
        :type run_index: int
24
25

        Generate necessary params for a single run.
26
        """
27
        run = self.run_data.runs[run_index]
28
29
30
31
32
33
34
35
        for experiment_id in run["experiments"]:
            for treatment in ["experiment", "control"]:
                for sample in run["samples"][experiment_id][treatment]:
                    inputs = {
                        "url": sample["hcc_url"] if "hcc_url" in sample else sample["url"],
                        "md5": sample["md5sum"]
                    }
                    additional_inputs = {}
36
37
38
39
40
41
42
                    file_data = [
                        [{
                            "file_name": "%s_%s.fastq.gz" % (experiment_id, sample["accession"]),
                            "save_result": False
                        }]
                    ]
                    prefix = "%s_%s" % (experiment_id, sample["accession"])
43
44
                    control_sample_ids = []
                    experiment_sample_ids = []
45
                    if treatment == "control":
46
                        control_sample_ids = [sample["accession"]]
47
                    else:
48
49
                        experiment_sample_ids = [sample["accession"]]
                    outputs = self.construct_outputs(file_data, {}, {}, prefix, sample, experiment_sample_ids, control_sample_ids, [])
50
                    yield ({}, inputs, additional_inputs, outputs)