From b743bb9761eb2500147c9351d6c6ff0bd4d09d60 Mon Sep 17 00:00:00 2001
From: Adam Caprez <acaprez2@unl.edu>
Date: Thu, 6 Jul 2017 16:44:30 -0500
Subject: [PATCH] Add resume option to meta download script.

Fixes #36.
---
 scripts/chip-meta-download | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/scripts/chip-meta-download b/scripts/chip-meta-download
index 7c1a674..03657fe 100644
--- a/scripts/chip-meta-download
+++ b/scripts/chip-meta-download
@@ -13,6 +13,8 @@ import datetime
 parser = argparse.ArgumentParser(description="Download raw JSON for all experiments.")
 parser.add_argument("-o", "--output-dir", dest="outputdir", default=os.getcwd(), help="Output directory.  (default: %(default)s)")
 parser.add_argument("-q", "--quiet", action='store_true', help="Quiet mode.  Do not print progress information. (default: false)")
+parser.add_argument("-r", "--resume", action='store_false', \
+    help="Skip re-fetching existing experiment JSON files to speed up overall download. (default: true)")
 args = parser.parse_args()
 
 encode_baseurl = "https://www.encodeproject.org/experiments/"
@@ -37,9 +39,17 @@ if not os.path.isdir(os.path.join(args.outputdir, "data")):
 total = len(exp_ids)
 for i, exp_id in enumerate(exp_ids):
     exp_url = urlparse.urljoin(encode_baseurl, exp_id)
-    r = requests.get(exp_url, params=json_arg)
-    with open(os.path.join(args.outputdir, "data", "%s.json" % (exp_id,)), "w") as wh:
-        wh.write(r.text)
+    json_file = os.path.join(args.outputdir, "data", "%s.json" % (exp_id,))
+    if args.resume:
+        if not os.path.isfile(json_file) or os.path.getsize(json_file) == 0:
+            r = requests.get(exp_url, params=json_arg)
+            with open(json_file, "w") as wh:
+                wh.write(r.text)
+    else:
+        r = requests.get(exp_url, params=json_arg)
+        with open(json_file, "w") as wh:
+                wh.write(r.text)
+
     if not args.quiet:
         progress(i, total)
 if not args.quiet:
-- 
GitLab