diff --git a/README.md b/README.md index e593523..92551ce 100644 --- a/README.md +++ b/README.md @@ -164,6 +164,9 @@ onc.downloadArchivefile("AXISQ6044PTZACCC8E334C53_20161201T000001.000Z.jpg", ove You can use the method `downloadArchivefile()` as above to download individual files or the method `downloadDirectArchivefile()` to download all the files that match your filters. +Alternatively, if you prefer using a download manager like [aria2](https://aria2.github.io/) or [Free Download Manager](https://www.freedownloadmanager.org/), `getArchivefileUrls` and `getArchivefileUrl` can return the download URLs of the archivefile +without downloading the files. + Check more on the _[archive file download methods guide](https://oceannetworkscanada.github.io/Oceans3.0-API/API_Guide.html#archive-file-download-methods)_ and _[code examples](https://oceannetworkscanada.github.io/api-python-client/Code_Examples/Download_Archived_Files.html)_. diff --git a/doc/source/Code_Examples/Download_Archived_Files.md b/doc/source/Code_Examples/Download_Archived_Files.md index 6c9218c..c27ffcf 100644 --- a/doc/source/Code_Examples/Download_Archived_Files.md +++ b/doc/source/Code_Examples/Download_Archived_Files.md @@ -110,7 +110,7 @@ onc.getArchivefile(params) ## Download archived files that match the parameters -Download all "mat" files from a hydrophone at Straight of Georgia East (_locationCode_:"**SEVIP**") using the parameter above. +Download all "mat" files from a hydrophone at Straight of Georgia East (_locationCode_:"**SEVIP**"). ```python params = { @@ -127,3 +127,57 @@ onc.downloadDirectArchivefile(params) # onc.getDirectFiles(params) ``` + +## Download archived files using a download manager + +Return the download URLs from a hydrophone at Straight of Georgia East (_locationCode_:"**SEVIP**"). + +```python +params = { + "deviceCategoryCode": "HYDROPHONE", + "locationCode": "SEVIP", + "extension": "mat", + "dateFrom": "2018-10-05T00:00:00.000Z", + "dateTo": "2018-10-05T00:10:00.000Z", +} + +# print is necessary to render the newline character +print(onc.getArchivefileUrls(params, joinedWithNewline=True)) +``` + +After running the code, a list of URLs will be printed. + +``` +https://data.oceannetworks.ca/api/archivefile/download?filename=ICLISTENHF1560_20181004T235903.000Z-spect.mat&token=Your_TOKEN +https://data.oceannetworks.ca/api/archivefile/download?filename=ICLISTENHF1560_20181005T000403.000Z-spect.mat&token=Your_TOKEN +https://data.oceannetworks.ca/api/archivefile/download?filename=ICLISTENHF1560_20181005T000903.000Z-spect.mat&token=Your_TOKEN +``` + +At this point, you can open your favorite download manager, paste the URLs, and start the download. +Most modern download managers support batch download, probably from a multi-line text input, the clipboard or a file. + +Here is an example of using a popular open source download manager -- aria2. + +### webui-aria2 + +[aria2](https://aria2.github.io/) is a lightweight multi-protocol & multi-source command-line download utility. +[webui-aria2](https://github.com/ziahamza/webui-aria2) is a web interface to interact with aria2. +Refer to the webui-aria2 [README](https://github.com/ziahamza/webui-aria2?tab=readme-ov-file#webui-aria2) file +for more information on how to use the tool. + +1. Install aria2 by downloading it from the [release](https://github.com/aria2/aria2/releases) + (or use your package manager if you are on Linux), extract the zip file, + and start the server by running + +```shell +./aria2c --enable-rpc --rpc-listen-all +``` + +2. Go to (or you can download this repository and open index.html + from docs folder), change "Enter the host" field to "localhost" in the setting, and save the settings. + Sometimes you need to also refresh the page. + ![Aria2c host change.png](../_static/Code_Examples/webui-aria2c-host.png) + +3. Click "Add" -> "By URIs" in the menu. Fill in the URLs and start the download. + You can also customize the download in the "Download settings" like changing the download directory. + ![Aria2c add URLs.png](../_static/Code_Examples/webui-aria2c-add-uri.png) diff --git a/doc/source/_static/Code_Examples/webui-aria2c-add-uri.png b/doc/source/_static/Code_Examples/webui-aria2c-add-uri.png new file mode 100644 index 0000000..af989db Binary files /dev/null and b/doc/source/_static/Code_Examples/webui-aria2c-add-uri.png differ diff --git a/doc/source/_static/Code_Examples/webui-aria2c-host.png b/doc/source/_static/Code_Examples/webui-aria2c-host.png new file mode 100644 index 0000000..2cdc0a7 Binary files /dev/null and b/doc/source/_static/Code_Examples/webui-aria2c-host.png differ diff --git a/src/onc/modules/_OncArchive.py b/src/onc/modules/_OncArchive.py index 567fb4a..ea54b5e 100644 --- a/src/onc/modules/_OncArchive.py +++ b/src/onc/modules/_OncArchive.py @@ -41,6 +41,18 @@ def getArchivefile(self, filters: dict, allPages: bool): allPages=allPages, ) + def getArchivefileUrls(self, filters: dict, allPages: bool) -> list[str]: + file_list: list[str] = self.getArchivefile(filters, allPages)["files"] + return list(map(self.getArchivefileUrl, file_list)) + + def getArchivefileUrl(self, filename: str) -> str: + """ + Return an archivefile absolute download URL for a filename + """ + url = self._serviceUrl("archivefile") + token = self._config("token") + return f"{url}/download?filename={filename}&token={token}" + def downloadArchivefile(self, filename: str = "", overwrite: bool = False): url = self._serviceUrl("archivefiles") @@ -123,7 +135,7 @@ def downloadDirectArchivefile( else: print(f' Skipping "{filename}": File already exists.') downInfo = { - "url": self._getDownloadUrl(filename), + "url": self.getArchivefileUrl(filename), "status": "skipped", "size": 0, "downloadTime": 0, @@ -139,15 +151,6 @@ def downloadDirectArchivefile( "stats": {"totalSize": size, "downloadTime": time, "fileCount": successes}, } - def _getDownloadUrl(self, filename: str): - """ - Returns an archivefile absolute download URL for a filename - """ - url = self._serviceUrl("archivefiles") - return "{:s}?method=getFile&filename={:s}&token={:s}".format( - url, filename, self._config("token") - ) - def _getList(self, filters: dict, by: str = "location", allPages: bool = False): """ Wraps archivefiles getArchivefileByLocation and getArchivefileByDevice methods diff --git a/src/onc/modules/_OncService.py b/src/onc/modules/_OncService.py index 3953632..d278b02 100644 --- a/src/onc/modules/_OncService.py +++ b/src/onc/modules/_OncService.py @@ -77,6 +77,7 @@ def _serviceUrl(self, service: str): "properties", "dataProducts", "archivefiles", + "archivefile", "scalardata", "rawdata", ]: diff --git a/src/onc/onc.py b/src/onc/onc.py index 2aa8300..e34b74c 100644 --- a/src/onc/onc.py +++ b/src/onc/onc.py @@ -1299,6 +1299,59 @@ def getArchivefile(self, filters: dict = None, allPages: bool = False): """ # noqa: E501 return self.archive.getArchivefile(filters, allPages) + def getArchivefileUrls( + self, + filters: dict = None, + allPages: bool = False, + joinedWithNewline: bool = False, + ) -> list[str] | str: + """ + Return a list of file URLs (or joined with a newline) available in Oceans 3.0 Archiving System by given query parameters. + + A helper method for getting a list of archive files URLs without downloading them. + It can also return a single string that concatenates all the URLs in the list together with a newline, + which can be useful if you are using a download manager and it supports batch downloading multiple URLs + that expects all the URLs on a separate line from either a file or the clipboard. + + Parameters + ---------- + filters : dict, optional + Query string parameters in the API request. + See ``getArchivefileByLocation`` and ``getArchivefileByDevice`` for more information. + allPages : bool, default False + Whether the response concatenates data on all pages if there are more than one page due to rowLimit. + joinedWithNewline: bool, default False + Whether it returns a list of URLs or a single string that concatenates the list with a newline. + + Returns + ------- + list[str] | str + A list of file URLs or a single joined string. + """ # noqa: E501 + file_urls = self.archive.getArchivefileUrls(filters, allPages) + if joinedWithNewline: + return "\n".join(file_urls) + else: + return file_urls + + def getArchivefileUrl(self, filename: str = "") -> str: + """ + Return a file URL from Oceans 3.0 Archiving System by specifying the file name. + + A helper method for obtaining the archive file URL without actually downloading the file. + + Parameters + ---------- + filename : str, default "" + A valid name of a file in DMAS Archiving System. + + Returns + ------- + str: + A download URL for the given archive filename. + """ # noqa: E501 + return self.archive.getArchivefileUrl(filename) + def downloadArchivefile(self, filename: str = "", overwrite: bool = False): """ Download a file from Oceans 3.0 Archiving System by specifying the file name.