From b743bb9761eb2500147c9351d6c6ff0bd4d09d60 Mon Sep 17 00:00:00 2001 From: Adam Caprez <acaprez2@unl.edu> Date: Thu, 6 Jul 2017 16:44:30 -0500 Subject: [PATCH] Add resume option to meta download script. Fixes #36. --- scripts/chip-meta-download | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/scripts/chip-meta-download b/scripts/chip-meta-download index 7c1a674..03657fe 100644 --- a/scripts/chip-meta-download +++ b/scripts/chip-meta-download @@ -13,6 +13,8 @@ import datetime parser = argparse.ArgumentParser(description="Download raw JSON for all experiments.") parser.add_argument("-o", "--output-dir", dest="outputdir", default=os.getcwd(), help="Output directory. (default: %(default)s)") parser.add_argument("-q", "--quiet", action='store_true', help="Quiet mode. Do not print progress information. (default: false)") +parser.add_argument("-r", "--resume", action='store_false', \ + help="Skip re-fetching existing experiment JSON files to speed up overall download. (default: true)") args = parser.parse_args() encode_baseurl = "https://www.encodeproject.org/experiments/" @@ -37,9 +39,17 @@ if not os.path.isdir(os.path.join(args.outputdir, "data")): total = len(exp_ids) for i, exp_id in enumerate(exp_ids): exp_url = urlparse.urljoin(encode_baseurl, exp_id) - r = requests.get(exp_url, params=json_arg) - with open(os.path.join(args.outputdir, "data", "%s.json" % (exp_id,)), "w") as wh: - wh.write(r.text) + json_file = os.path.join(args.outputdir, "data", "%s.json" % (exp_id,)) + if args.resume: + if not os.path.isfile(json_file) or os.path.getsize(json_file) == 0: + r = requests.get(exp_url, params=json_arg) + with open(json_file, "w") as wh: + wh.write(r.text) + else: + r = requests.get(exp_url, params=json_arg) + with open(json_file, "w") as wh: + wh.write(r.text) + if not args.quiet: progress(i, total) if not args.quiet: -- GitLab