diff --git a/README.md b/README.md index 87443b4..b69c02d 100644 --- a/README.md +++ b/README.md @@ -14,31 +14,28 @@ The following datasets are supported: | Dataset Name | Dataset ID | |-|-| -| Landsat 5 TM Collection 1 Level 1 | `landsat_tm_c1` | | Landsat 5 TM Collection 2 Level 1 | `landsat_tm_c2_l1` | | Landsat 5 TM Collection 2 Level 2 | `landsat_tm_c2_l2` | -| Landsat 7 ETM+ Collection 1 Level 1 | `landsat_etm_c1` | | Landsat 7 ETM+ Collection 2 Level 1 | `landsat_etm_c2_l1` | | Landsat 7 ETM+ Collection 2 Level 2 | `landsat_etm_c2_l2` | -| Landsat 8 Collection 1 Level 1 | `landsat_8_c1` | | Landsat 8 Collection 2 Level 1 | `landsat_ot_c2_l1` | | Landsat 8 Collection 2 Level 2 | `landsat_ot_c2_l2` | -| Sentinel 2A | `sentinel_2a` | - +| Landsat 9 Collection 2 Level 1 | `landsat_ot_c2_l1` | +| Landsat 9 Collection 2 Level 2 | `landsat_ot_c2_l2` | # Quick start Searching for Landsat 5 TM scenes that contains the location (12.53, -1.53) acquired during the year 1995. ``` -landsatxplore search --dataset LANDSAT_TM_C1 --location 12.53 -1.53 \ +landsatxplore search --dataset landsat_tm_c2_l1 --location 12.53 -1.53 \ --start 1995-01-01 --end 1995-12-31 ``` Search for Landsat 7 ETM scenes in Brussels with less than 5% of clouds. Save the returned results in a `.csv` file. ``` -landsatxplore search --dataset LANDSAT_ETM_C1 \ +landsatxplore search --dataset landsat_tm_c2_l2 \ --location 50.83 4.38 --clouds 5 > results.csv ``` @@ -181,7 +178,7 @@ api = API(username, password) # Search for Landsat TM scenes scenes = api.search( - dataset='landsat_tm_c1', + dataset='landsat_tm_c2_l1', latitude=50.85, longitude=-4.35, start_date='1995-01-01', diff --git a/landsatxplore/cli.py b/landsatxplore/cli.py index 7b5ba05..c6b8d49 100644 --- a/landsatxplore/cli.py +++ b/landsatxplore/cli.py @@ -151,8 +151,9 @@ def search( "--timeout", "-t", type=click.INT, default=300, help="Download timeout in seconds." ) @click.option("--skip", is_flag=True, default=False) +@click.option("--overwrite", is_flag=True, default=False) @click.argument("scenes", type=click.STRING, nargs=-1) -def download(username, password, dataset, output, timeout, skip, scenes): +def download(username, password, dataset, output, timeout, skip, overwrite, scenes): """Download one or several scenes.""" ee = EarthExplorer(username, password) output_dir = os.path.abspath(output) @@ -162,7 +163,12 @@ def download(username, password, dataset, output, timeout, skip, scenes): if not ee.logged_in(): ee = EarthExplorer(username, password) fname = ee.download( - scene, output_dir, dataset=dataset, timeout=timeout, skip=skip + scene, + output_dir, + dataset=dataset, + timeout=timeout, + skip=skip, + overwrite=overwrite, ) if skip: click.echo(fname) diff --git a/landsatxplore/earthexplorer.py b/landsatxplore/earthexplorer.py index 9b39dbb..c20315d 100644 --- a/landsatxplore/earthexplorer.py +++ b/landsatxplore/earthexplorer.py @@ -20,31 +20,24 @@ # IDs of GeoTIFF data product for each dataset DATA_PRODUCTS = { - "landsat_tm_c1": "5e83d08fd9932768", - "landsat_etm_c1": "5e83a507d6aaa3db", - "landsat_8_c1": "5e83d0b84df8d8c2", - "landsat_tm_c2_l1": "5e83d0a0f94d7d8d", - "landsat_etm_c2_l1": "5e83d0d0d2aaa488", - "landsat_ot_c2_l1": "5e81f14ff4f9941c", - "landsat_tm_c2_l2": "5e83d11933473426", - "landsat_etm_c2_l2": "5e83d12aada2e3c5", - "landsat_ot_c2_l2": "5e83d14f30ea90a9", - "sentinel_2a": "5e83a42c6eba8084", + # Level 1 datasets + "landsat_tm_c2_l1": ["5e81f14f92acf9ef", "5e83d0a0f94d7d8d", "63231219fdd8c4e5"], + "landsat_etm_c2_l1":[ "5e83d0d0d2aaa488", "5e83d0d08fec8a66"], + "landsat_ot_c2_l1": ["5e81f14ff4f9941c", "5e81f14f92acf9ef"], + # Level 2 datasets + "landsat_tm_c2_l2": ["5e83d11933473426", "5e83d11933473426", "632312ba6c0988ef"], + "landsat_etm_c2_l2": ["5e83d12aada2e3c5", "5e83d12aed0efa58", "632311068b0935a8"], + "landsat_ot_c2_l2": ["5e83d14f30ea90a9", "5e83d14fec7cae84", "632210d4770592cf"] } - -def _get_tokens(body): - """Get `csrf_token` and `__ncforminfo`.""" +def _get_token(body): + """Get `csrf_token`.""" csrf = re.findall(r'name="csrf" value="(.+?)"', body)[0] - ncform = re.findall(r'name="__ncforminfo" value="(.+?)"', body)[0] - + if not csrf: raise EarthExplorerError("EE: login failed (csrf token not found).") - if not ncform: - raise EarthExplorerError("EE: login failed (ncforminfo not found).") - - return csrf, ncform + return csrf class EarthExplorer(object): """Access Earth Explorer portal.""" @@ -63,12 +56,11 @@ def logged_in(self): def login(self, username, password): """Login to Earth Explorer.""" rsp = self.session.get(EE_LOGIN_URL) - csrf, ncform = _get_tokens(rsp.text) + csrf = _get_token(rsp.text) payload = { "username": username, "password": password, "csrf": csrf, - "__ncforminfo": ncform, } rsp = self.session.post(EE_LOGIN_URL, data=payload, allow_redirects=True) @@ -78,8 +70,10 @@ def login(self, username, password): def logout(self): """Log out from Earth Explorer.""" self.session.get(EE_LOGOUT_URL) - - def _download(self, url, output_dir, timeout, chunk_size=1024, skip=False): + + def _download( + self, url, output_dir, timeout, chunk_size=1024, skip=False, overwrite=False + ): """Download remote file given its URL.""" # Check availability of the requested product # EarthExplorer should respond with JSON @@ -93,30 +87,77 @@ def _download(self, url, output_dir, timeout, chunk_size=1024, skip=False): download_url = r.json().get("url") try: + local_filename, filesize = self._get_fileinfo( + download_url, timeout=timeout, output_dir=output_dir + ) + + if skip: + return local_filename + + headers = {} + file_mode = "wb" + downloaded_bytes = 0 + file_exists = os.path.exists(local_filename) + + if file_exists and not overwrite: + downloaded_bytes = os.path.getsize(local_filename) + headers = {"Range": f"bytes={downloaded_bytes}-"} + file_mode = "ab" + if file_exists and downloaded_bytes == filesize: + # assert file is already complete + return local_filename + with self.session.get( - download_url, stream=True, allow_redirects=True, timeout=timeout + download_url, + stream=True, + allow_redirects=True, + headers=headers, + timeout=timeout, ) as r: - file_size = int(r.headers.get("Content-Length")) with tqdm( - total=file_size, unit_scale=True, unit="B", unit_divisor=1024 + total=filesize, + unit_scale=True, + unit="B", + unit_divisor=1024, + initial=downloaded_bytes ) as pbar: - local_filename = r.headers["Content-Disposition"].split("=")[-1] - local_filename = local_filename.replace('"', "") - local_filename = os.path.join(output_dir, local_filename) - if skip: - return local_filename - with open(local_filename, "wb") as f: + with open(local_filename, file_mode) as f: for chunk in r.iter_content(chunk_size=chunk_size): if chunk: f.write(chunk) pbar.update(chunk_size) + return local_filename + except requests.exceptions.Timeout: raise EarthExplorerError( "Connection timeout after {} seconds.".format(timeout) ) - return local_filename - def download(self, identifier, output_dir, dataset=None, timeout=300, skip=False): + def _get_fileinfo(self, download_url, timeout, output_dir): + """Get file name and size given its URL.""" + try: + with self.session.get( + download_url, stream=True, allow_redirects=True, timeout=timeout + ) as r: + file_size = int(r.headers.get("Content-Length")) + local_filename = r.headers["Content-Disposition"].split("=")[-1] + local_filename = local_filename.replace('"', "") + local_filename = os.path.join(output_dir, local_filename) + except requests.exceptions.Timeout: + raise EarthExplorerError( + "Connection timeout after {} seconds.".format(timeout) + ) + return local_filename, file_size + + def download( + self, + identifier, + output_dir, + dataset=None, + timeout=300, + skip=False, + overwrite=False, + ): """Download a Landsat scene. Parameters @@ -144,8 +185,22 @@ def download(self, identifier, output_dir, dataset=None, timeout=300, skip=False entity_id = self.api.get_entity_id(identifier, dataset) else: entity_id = identifier - url = EE_DOWNLOAD_URL.format( - data_product_id=DATA_PRODUCTS[dataset], entity_id=entity_id - ) - filename = self._download(url, output_dir, timeout=timeout, skip=skip) + # Cycle through the available dataset ids until one works + dataset_id_list = DATA_PRODUCTS[dataset] + id_num = len(dataset_id_list) + for id_count, dataset_id in enumerate(dataset_id_list): + try: + url = EE_DOWNLOAD_URL.format( + data_product_id=dataset_id, entity_id=entity_id + ) + filename = self._download( + url, output_dir, timeout=timeout, skip=skip, overwrite=overwrite + ) + except EarthExplorerError: + if id_count+1 < id_num: + print('Download failed with dataset id {:d} of {:d}. Re-trying with the next one.'.format(id_count+1, id_num)) + pass + else: + print('None of the archived ids succeeded! Update necessary!') + raise EarthExplorerError() return filename diff --git a/landsatxplore/util.py b/landsatxplore/util.py index 9293924..ce7a198 100644 --- a/landsatxplore/util.py +++ b/landsatxplore/util.py @@ -87,15 +87,16 @@ def parse_scene_id(scene_id): } -def landsat_dataset(satellite, collection="c1", level="l1"): +def landsat_dataset(satellite, collection="c2", level="l1"): """Get landsat dataset name.""" if satellite == 5: sensor = "tm" + collection = "c2" elif satellite == 7: sensor = "etm" - elif satellite == 8 and collection == "c1": - sensor = "8" - elif satellite == 8 and collection == "c2": + elif satellite in (8, 9) and collection == "c1": + raise ValueError('Collection 1 was decommissioned!') + elif satellite in [8, 9] and collection == "c2": sensor = "ot" else: raise LandsatxploreError("Failed to guess dataset from identifier.")