Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Holland Computing Center
chipathlon
Commits
3ab6fe71
Commit
3ab6fe71
authored
Jun 06, 2017
by
aknecht2
Browse files
Added a function to remove unused files from gridfs.
parent
3f749a89
Changes
1
Hide whitespace changes
Inline
Side-by-side
chipathlon/db.py
View file @
3ab6fe71
...
...
@@ -10,6 +10,7 @@ import collections
import
chipathlon.conf
from
pprint
import
pprint
import
hashlib
from
chipathlon.utils
import
progress
class
MongoDB
(
object
):
...
...
@@ -350,6 +351,42 @@ class MongoDB(object):
)
return
(
valid
,
msg
,
data
)
def
clean_gfs
(
self
):
"""
This function finds all files stored in gridfs that are not currently
referenced by any result file and removes them.
A clean database is a happy database.
"""
cursor
=
self
.
db
.
results
.
aggregate
([
{
"$group"
:
{
"_id"
:
1
,
"valid_ids"
:
{
"$push"
:
"$gridfs_id"
}
}
}
])
# Doc contains all our valid ids
id_doc
=
cursor
.
next
()
# Find all fs.files documents
gfs_cursor
=
self
.
db
.
fs
.
files
.
find
({
"_id"
:
{
"$nin"
:
id_doc
[
"valid_ids"
]
}
})
# Iterate through file, delete fs.chunks then fs.files
total_files
=
gfs_cursor
.
count
()
print
"Found %s unused gridfs files. Preparing to delete...."
%
(
total_files
,)
for
i
,
fs_file
in
enumerate
(
gfs_cursor
):
progress
(
i
,
total_files
)
self
.
db
.
fs
.
chunks
.
remove
({
"files_id"
:
fs_file
[
"_id"
]
})
self
.
db
.
fs
.
files
.
remove
({
"_id"
:
fs_file
[
"_id"
]
})
progress
(
total_files
,
total_files
)
return
def
get_samples
(
self
,
experiment_accession
,
file_type
):
"""
:param experiment_accession: Accession number of the experiment to grab samples from.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment