Commit b65b02bf authored by Adam Caprez's avatar Adam Caprez
Browse files

Add result_lines_per_document config variable

Add conf.py entry for the number of result file lines to use for
each document in the bed and peak collections.
parent 67e43c78
......@@ -127,3 +127,7 @@ genomes = {
"additional_files": file_extensions["bowtie2_genome"]
}
}
# number of lines from BED and peak result files to
# include per-document for bed and peak collections
result_lines_per_document = 75000
......@@ -5,6 +5,7 @@ import sys
import traceback
import os
import itertools
import chipathlon.conf
from pprint import pprint
......@@ -82,8 +83,8 @@ class MongoDB(object):
print "loading bed_data..."
with open(bed_file, "r") as rh:
msg = "Bed file successfully inserted."
# Lazy load files in specified line chunk size ~100k lines
n_lines = 100000
# Lazy load files in specified line chunk size
n_lines = chipathlon.conf.result_lines_per_document
line_set = list(itertools.islice(rh, n_lines))
while line_set:
try:
......@@ -119,8 +120,8 @@ class MongoDB(object):
# result peak file.
with open(peak_file, "r") as rh:
msg = "Peak file successfully inserted."
# Lazy load files in specified line chunk size ~100k lines
n_lines = 10000
# Lazy load files in specified line chunk size
n_lines = chipathlon.conf.result_lines_per_document
line_set = list(itertools.islice(rh, n_lines))
while line_set:
try:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment