chip-meta-download 2 KB
Newer Older
1
2
3
4
5
6
7
8
9
#!/usr/bin/env python

import argparse
import json
import sys
from chipathlon.utils import progress
import requests
import urlparse
import os.path
10
11
import os
import datetime
12

13
14
parser = argparse.ArgumentParser(description="Download raw JSON for all experiments.")
parser.add_argument("-o", "--output-dir", dest="outputdir", default=os.getcwd(), help="Output directory.  (default: %(default)s)")
15
parser.add_argument("-q", "--quiet", action='store_true', help="Quiet mode.  Do not print progress information. (default: false)")
16
17
parser.add_argument("-r", "--resume", action='store_false', \
    help="Skip re-fetching existing experiment JSON files to speed up overall download. (default: true)")
18
19
20
args = parser.parse_args()

encode_baseurl = "https://www.encodeproject.org/experiments/"
21
json_arg = {'format': 'json'}
22

23
24
25
26
27
28
29
current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
meta_file = os.path.join(args.outputdir, "encode_meta_%s.json" % (current_date,))

r = requests.get("%s?format=json&limit=all" % (encode_baseurl,), params=json_arg)
with open(meta_file, "w") as wh:
    wh.write(r.text)
    data = json.loads(r.text)
30
31

exp_ids = []
32
33
34
35
36
for exp in data["@graph"]:
    exp_ids.append(exp["accession"])

if not os.path.isdir(os.path.join(args.outputdir, "data")):
    os.makedirs(os.path.join(args.outputdir, "data"))
37
38
39

# Loop through the IDs and use the exp ID to download the full JSON file.
total = len(exp_ids)
40
41
for i, exp_id in enumerate(exp_ids):
    exp_url = urlparse.urljoin(encode_baseurl, exp_id)
42
43
44
45
46
47
48
49
50
51
52
    json_file = os.path.join(args.outputdir, "data", "%s.json" % (exp_id,))
    if args.resume:
        if not os.path.isfile(json_file) or os.path.getsize(json_file) == 0:
            r = requests.get(exp_url, params=json_arg)
            with open(json_file, "w") as wh:
                wh.write(r.text)
    else:
        r = requests.get(exp_url, params=json_arg)
        with open(json_file, "w") as wh:
                wh.write(r.text)

53
54
55
56
    if not args.quiet:
        progress(i, total)
if not args.quiet:
    progress(total, total)
57
print