forked from amanchadha/iSeeBetter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
DatasetFetcher.py
100 lines (80 loc) · 3.76 KB
/
DatasetFetcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
This file fetches and sanity-checks the dataset so it can be consumed by the algorithm.
- Download the Vimeo90K dataset (get the original test set - not downsampled or downgraded by noise) from:
http://data.csail.mit.edu/tofu/testset/vimeo_test_clean.zip
- Run this code for LR and HR seperately to form a sorted data folder for convenience
- To delete all the .DS_Store files: find . -name '.DS_Store' -type f -delete
Aman Chadha | aman@amanchadha.com
"""
import argparse, os, sys, shutil, urllib.request, logger
from tqdm import tqdm
import zipfile
################################################### DATASETFETCHER KNOBS ###############################################
# URL to get the the Vimeo90K dataset (get the original test set - not downsampled or downgraded by noise) from
DATASET_URL = "http://data.csail.mit.edu/tofu/testset/vimeo_test_clean.zip"
# Folder where all the data resides
DATA_FOLDER = "vimeo_septuplet"
# Folder within data where the HR dataset resides
SOURCE_PATH = os.path.join(DATA_FOLDER, "vimeo_test_clean")
# Filename of the dataset
DATASET_FILE = os.path.basename(DATASET_URL)
# Destination folder
DEST_PATH = os.path.join(DATA_FOLDER, "HR")
########################################################################################################################
parser = argparse.ArgumentParser(description='iSeeBetter Dataset Fetcher.')
parser.add_argument('-d', '--debug', default=False, action='store_true', help='Print debug spew.')
args = parser.parse_args()
# Initialize logger
logger.initLogger(args.debug)
# Create a data folder if it doesn't exist
if not os.path.exists(DATA_FOLDER):
try:
os.mkdir(DATA_FOLDER)
except OSError:
logger.info("Creation of the directory %s failed", DATA_FOLDER)
else:
logger.debug("Successfully created the directory: %s", DATA_FOLDER)
class downloadProgressBar(tqdm):
def update_to(self, b=1, bsize=1, tsize=None):
if tsize is not None:
self.total = tsize
self.update(b * bsize - self.n)
def downloadURL(url, output_path):
with downloadProgressBar(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t:
urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)
# If the dataset doesn't exist, download and extract it
if not os.path.exists(SOURCE_PATH):
# Fetch the dataset if it hasn't been downloaded yet
if not os.path.exists(SOURCE_PATH + '.zip'):
downloadURL(DATASET_URL, os.path.join(DATA_FOLDER, DATASET_FILE))
# Extract it
logger.info("Extracting: %s", os.path.join(DATA_FOLDER, DATASET_FILE))
try:
with zipfile.ZipFile(os.path.join(DATA_FOLDER, DATASET_FILE), 'r') as zipObj:
# Extract all the contents of zip file in current directory
zipObj.extractall(DATA_FOLDER)
except zipfile.BadZipFile:
# Re-download the file
downloadURL(DATASET_URL, os.path.join(DATA_FOLDER, DATASET_FILE))
zipObj.extractall(DATA_FOLDER)
# Recursively remove all the ".DS_Store files"
for currentPath, _, currentFiles in os.walk(SOURCE_PATH):
if ".DS_Store" in currentFiles:
os.remove(os.path.join(currentPath, ".DS_Store"))
# Make a list of video sequences
sequencesPath = os.path.join(SOURCE_PATH, "sequences")
videoList = os.listdir(sequencesPath)
videoList.sort()
# Go through each video sequence and copy it over in the structure we need
count = 0
for video in videoList:
videoPath = os.path.join(sequencesPath, video)
framesList = os.listdir(videoPath)
framesList.sort()
for frames in framesList:
frames_path = os.path.join(videoPath, frames)
count += 1
new_frames_name = count
des = os.path.join(DEST_PATH, str(new_frames_name))
logger.info("Creating: %s", des)
shutil.copytree(frames_path, des)