chip-create-run 2.64 KB
Newer Older
1
2
3
4
5
6
7
#!/usr/bin/env python
from chipathlon.db import MongoDB
import chipathlon.conf
import argparse
import yaml
import pprint

8
parser = argparse.ArgumentParser(description="Create a run file from a list of experiment accessions.")
9
10
11
parser.add_argument("-H", "--host", dest="host", default="localhost", help="Database host. (default: %(default)s)")
parser.add_argument("-u", "--username", dest="username", help="Database username (if required).")
parser.add_argument("-p", "--password", dest="password", help="Database password (if required).")
12
13
parser.add_argument("-n", "--name", dest="name", required=True, help="Name of the run file to create.")
parser.add_argument("-f", "--file_type", dest="file_type", default="fastq", help="Type of files to extract (fastq or bam).")
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

parser.add_argument("-a", "--accessions", dest="accessions", nargs="+", required=True, help="List of experiment accessions to load.")
args = parser.parse_args()

assemblies = set()

write_dict = {}
write_dict["genomes"] = {}
write_dict["runs"] = []

mdb = MongoDB(args.host, args.username, args.password)

for accession in args.accessions:
    print "Loading samples for experiment accession: %s" % (accession,)
    valid, msg, sample_data = mdb.get_samples(accession, args.file_type)
    if valid:
        exp_assembly = sample_data["signal"][0]["genome"]
        assemblies.add(exp_assembly)
32
        run = {"file_type": args.file_type, "assembly": exp_assembly}
33
34
35
36
37
38
39
40
41
42
        run["signals"] = [sample["accession"] for sample in sample_data["signal"]]
        run["controls"] = [sample["accession"] for sample in sample_data["control"]]
        if (run["signals"] >= 2):
            run["idr"] = [run["signals"][0], run["signals"][1]]

        # For each combination of peak calling tool / alignment
        # tool generate a new run.  Just use the first two samples
        # for idr if available.
        for peak_tool in chipathlon.conf.peak_tools:
            run["peak"] = peak_tool
43
44
45
46
47
48
49
50
            for peak_type in chipathlon.conf.peak_types[peak_tool]:
                run["peak_type"] = peak_type
                if args.file_type == "fastq":
                    for align_tool in chipathlon.conf.align_tools:
                        run["align"] = align_tool
                        write_dict["runs"].append(run.copy())
                else:
                    run["align"] = "bwa"
51
52
53
54
55
56
57
58
59
60
                    write_dict["runs"].append(run.copy())

    else:
        print msg

for assembly in assemblies:
    write_dict["genomes"][assembly] = {"bwa": "/path/to/bwa/base", "bowtie2": "/path/to/bowtie2/base", "chrom.sizes": "/path/to/chrom/sizes"}

with open(args.name, "w") as wh:
    yaml.safe_dump(write_dict, wh, default_flow_style=False)