Skip to content

Commit

Permalink
Implement web_download_untar (#96)
Browse files Browse the repository at this point in the history
  • Loading branch information
lovit committed Oct 4, 2020
1 parent a110496 commit f7de35d
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions Korpora/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import requests
import tarfile
import zipfile
from os.path import expanduser
from tqdm import tqdm
Expand Down Expand Up @@ -118,6 +119,18 @@ def web_download_unzip(url, zip_path, corpus_name='', force_download=False):
print(f'unzip {data_path}')


def web_download_untar(url, tar_path, corpus_name='', force_download=False):
web_download(url, tar_path, corpus_name, force_download)
# assume that path/to/abc.tar consists path/to/abc
data_path = tar_path[:-4]
if (not force_download) and os.path.exists(data_path):
return None
data_root = os.path.dirname(tar_path)
with tarfile.open(tar_path) as tar:
tar.extractall(data_root)
print(f'decompress {tar_path}')


def google_drive_download(file_id, local_path, corpus_name='', force_download=False):
def get_confirm_token(response):
for key, value in response.cookies.items():
Expand Down Expand Up @@ -177,5 +190,7 @@ def fetch(remote_path, local_path, corpus_name=None, force_download=False, metho
google_drive_download(remote_path, destination, corpus_name, force_download)
elif method == "download & unzip":
web_download_unzip(remote_path, destination, corpus_name, force_download)
elif method == "download & untar":
web_download_untar(remote_path, destination, corpus_name, force_download)
else:
print(f'download method is not valid ({method})')

0 comments on commit f7de35d

Please sign in to comment.