yannforget · yannforget · Apr 11, 2023 · Mar 20, 2021 · Mar 21, 2023 · Mar 21, 2023
diff --git a/README.md b/README.md
@@ -14,31 +14,28 @@ The following datasets are supported:
 
 | Dataset Name | Dataset ID |
 |-|-|
-| Landsat 5 TM Collection 1 Level 1 | `landsat_tm_c1` |
 | Landsat 5 TM Collection 2 Level 1 | `landsat_tm_c2_l1` |
 | Landsat 5 TM Collection 2 Level 2 | `landsat_tm_c2_l2` |
-| Landsat 7 ETM+ Collection 1 Level 1 | `landsat_etm_c1` |
 | Landsat 7 ETM+ Collection 2 Level 1 | `landsat_etm_c2_l1` |
 | Landsat 7 ETM+ Collection 2 Level 2 | `landsat_etm_c2_l2` |
-| Landsat 8 Collection 1 Level 1 | `landsat_8_c1` |
 | Landsat 8 Collection 2 Level 1 | `landsat_ot_c2_l1` |
 | Landsat 8 Collection 2 Level 2 | `landsat_ot_c2_l2` |
-| Sentinel 2A | `sentinel_2a` |
-
+| Landsat 9 Collection 2 Level 1 | `landsat_ot_c2_l1` |
+| Landsat 9 Collection 2 Level 2 | `landsat_ot_c2_l2` |
 
 # Quick start
 
 Searching for Landsat 5 TM scenes that contains the location (12.53, -1.53) acquired during the year 1995.
 
 ```
-landsatxplore search --dataset LANDSAT_TM_C1 --location 12.53 -1.53 \
+landsatxplore search --dataset landsat_tm_c2_l1 --location 12.53 -1.53 \
     --start 1995-01-01 --end 1995-12-31
 ```
 
 Search for Landsat 7 ETM scenes in Brussels with less than 5% of clouds. Save the returned results in a `.csv` file.
 
 ```
-landsatxplore search --dataset LANDSAT_ETM_C1 \
+landsatxplore search --dataset landsat_tm_c2_l2 \
     --location 50.83 4.38 --clouds 5 > results.csv
 ```
 
@@ -181,7 +178,7 @@ api = API(username, password)
 
 # Search for Landsat TM scenes
 scenes = api.search(
-    dataset='landsat_tm_c1',
+    dataset='landsat_tm_c2_l1',
     latitude=50.85,
     longitude=-4.35,
     start_date='1995-01-01',

diff --git a/landsatxplore/cli.py b/landsatxplore/cli.py
@@ -151,8 +151,9 @@ def search(
     "--timeout", "-t", type=click.INT, default=300, help="Download timeout in seconds."
 )
 @click.option("--skip", is_flag=True, default=False)
+@click.option("--overwrite", is_flag=True, default=False)
 @click.argument("scenes", type=click.STRING, nargs=-1)
-def download(username, password, dataset, output, timeout, skip, scenes):
+def download(username, password, dataset, output, timeout, skip, overwrite, scenes):
     """Download one or several scenes."""
     ee = EarthExplorer(username, password)
     output_dir = os.path.abspath(output)
@@ -162,7 +163,12 @@ def download(username, password, dataset, output, timeout, skip, scenes):
         if not ee.logged_in():
             ee = EarthExplorer(username, password)
         fname = ee.download(
-            scene, output_dir, dataset=dataset, timeout=timeout, skip=skip
+            scene,
+            output_dir,
+            dataset=dataset,
+            timeout=timeout,
+            skip=skip,
+            overwrite=overwrite,
         )
         if skip:
             click.echo(fname)

diff --git a/landsatxplore/earthexplorer.py b/landsatxplore/earthexplorer.py
@@ -20,31 +20,24 @@
 
 # IDs of GeoTIFF data product for each dataset
 DATA_PRODUCTS = {
-    "landsat_tm_c1": "5e83d08fd9932768",
-    "landsat_etm_c1": "5e83a507d6aaa3db",
-    "landsat_8_c1": "5e83d0b84df8d8c2",
-    "landsat_tm_c2_l1": "5e83d0a0f94d7d8d",
-    "landsat_etm_c2_l1": "5e83d0d0d2aaa488",
-    "landsat_ot_c2_l1": "5e81f14ff4f9941c",
-    "landsat_tm_c2_l2": "5e83d11933473426",
-    "landsat_etm_c2_l2": "5e83d12aada2e3c5",
-    "landsat_ot_c2_l2": "5e83d14f30ea90a9",
-    "sentinel_2a": "5e83a42c6eba8084",
+    # Level 1 datasets
+    "landsat_tm_c2_l1": ["5e81f14f92acf9ef", "5e83d0a0f94d7d8d", "63231219fdd8c4e5"],
+    "landsat_etm_c2_l1":[ "5e83d0d0d2aaa488", "5e83d0d08fec8a66"],
+    "landsat_ot_c2_l1": ["5e81f14ff4f9941c", "5e81f14f92acf9ef"],
+    # Level 2 datasets
+    "landsat_tm_c2_l2": ["5e83d11933473426", "5e83d11933473426", "632312ba6c0988ef"],
+    "landsat_etm_c2_l2": ["5e83d12aada2e3c5", "5e83d12aed0efa58", "632311068b0935a8"],
+    "landsat_ot_c2_l2": ["5e83d14f30ea90a9", "5e83d14fec7cae84", "632210d4770592cf"]
 }
 
-
-def _get_tokens(body):
-    """Get `csrf_token` and `__ncforminfo`."""
+def _get_token(body):
+    """Get `csrf_token`."""
     csrf = re.findall(r'name="csrf" value="(.+?)"', body)[0]
-    ncform = re.findall(r'name="__ncforminfo" value="(.+?)"', body)[0]
-
+
     if not csrf:
         raise EarthExplorerError("EE: login failed (csrf token not found).")
-    if not ncform:
-        raise EarthExplorerError("EE: login failed (ncforminfo not found).")
-
-    return csrf, ncform
 
+    return csrf
 
 class EarthExplorer(object):
     """Access Earth Explorer portal."""
@@ -63,12 +56,11 @@ def logged_in(self):
     def login(self, username, password):
         """Login to Earth Explorer."""
         rsp = self.session.get(EE_LOGIN_URL)
-        csrf, ncform = _get_tokens(rsp.text)
+        csrf = _get_token(rsp.text)
         payload = {
             "username": username,
             "password": password,
             "csrf": csrf,
-            "__ncforminfo": ncform,
         }
         rsp = self.session.post(EE_LOGIN_URL, data=payload, allow_redirects=True)
 
@@ -78,8 +70,10 @@ def login(self, username, password):
     def logout(self):
         """Log out from Earth Explorer."""
         self.session.get(EE_LOGOUT_URL)
-
-    def _download(self, url, output_dir, timeout, chunk_size=1024, skip=False):
+
+    def _download(
+        self, url, output_dir, timeout, chunk_size=1024, skip=False, overwrite=False
+    ):
         """Download remote file given its URL."""
         # Check availability of the requested product
         # EarthExplorer should respond with JSON
@@ -93,30 +87,77 @@ def _download(self, url, output_dir, timeout, chunk_size=1024, skip=False):
             download_url = r.json().get("url")
 
         try:
+            local_filename, filesize = self._get_fileinfo(
+                download_url, timeout=timeout, output_dir=output_dir
+            )
+
+            if skip:
+                return local_filename
+
+            headers = {}
+            file_mode = "wb"
+            downloaded_bytes = 0
+            file_exists = os.path.exists(local_filename)
+
+            if file_exists and not overwrite:
+                downloaded_bytes = os.path.getsize(local_filename)
+                headers = {"Range": f"bytes={downloaded_bytes}-"}
+                file_mode = "ab"
+            if file_exists and downloaded_bytes == filesize:
+                # assert file is already complete
+                return local_filename
+
             with self.session.get(
-                download_url, stream=True, allow_redirects=True, timeout=timeout
+                download_url,
+                stream=True,
+                allow_redirects=True,
+                headers=headers,
+                timeout=timeout,
             ) as r:
-                file_size = int(r.headers.get("Content-Length"))
                 with tqdm(
-                    total=file_size, unit_scale=True, unit="B", unit_divisor=1024
+                    total=filesize,
+                    unit_scale=True,
+                    unit="B",
+                    unit_divisor=1024,
+                    initial=downloaded_bytes
                 ) as pbar:
-                    local_filename = r.headers["Content-Disposition"].split("=")[-1]
-                    local_filename = local_filename.replace('"', "")
-                    local_filename = os.path.join(output_dir, local_filename)
-                    if skip:
-                        return local_filename
-                    with open(local_filename, "wb") as f:
+                    with open(local_filename, file_mode) as f:
                         for chunk in r.iter_content(chunk_size=chunk_size):
                             if chunk:
                                 f.write(chunk)
                                 pbar.update(chunk_size)
+            return local_filename
+
         except requests.exceptions.Timeout:
             raise EarthExplorerError(
                 "Connection timeout after {} seconds.".format(timeout)
             )
-        return local_filename
 
-    def download(self, identifier, output_dir, dataset=None, timeout=300, skip=False):
+    def _get_fileinfo(self, download_url, timeout, output_dir):
+        """Get file name and size given its URL."""
+        try:
+            with self.session.get(
+                download_url, stream=True, allow_redirects=True, timeout=timeout
+            ) as r:
+                file_size = int(r.headers.get("Content-Length"))
+                local_filename = r.headers["Content-Disposition"].split("=")[-1]
+                local_filename = local_filename.replace('"', "")
+                local_filename = os.path.join(output_dir, local_filename)
+        except requests.exceptions.Timeout:
+            raise EarthExplorerError(
+                "Connection timeout after {} seconds.".format(timeout)
+            )
+        return local_filename, file_size
+
+    def download(
+        self,
+        identifier,
+        output_dir,
+        dataset=None,
+        timeout=300,
+        skip=False,
+        overwrite=False,
+    ):
         """Download a Landsat scene.
 
         Parameters
@@ -144,8 +185,22 @@ def download(self, identifier, output_dir, dataset=None, timeout=300, skip=False
             entity_id = self.api.get_entity_id(identifier, dataset)
         else:
             entity_id = identifier
-        url = EE_DOWNLOAD_URL.format(
-            data_product_id=DATA_PRODUCTS[dataset], entity_id=entity_id
-        )
-        filename = self._download(url, output_dir, timeout=timeout, skip=skip)
+        # Cycle through the available dataset ids until one works
+        dataset_id_list = DATA_PRODUCTS[dataset]
+        id_num = len(dataset_id_list)
+        for id_count, dataset_id in enumerate(dataset_id_list):
+            try:
+                url = EE_DOWNLOAD_URL.format(
+                    data_product_id=dataset_id, entity_id=entity_id
+                )
+                filename = self._download(
+                    url, output_dir, timeout=timeout, skip=skip, overwrite=overwrite
+                )
+            except EarthExplorerError:
+                if id_count+1 < id_num:
+                    print('Download failed with dataset id {:d} of {:d}. Re-trying with the next one.'.format(id_count+1, id_num))
-                if id_count+1 < id_num:
-                    print('Download failed with dataset id {:d} of {:d}. Re-trying with the next one.'.format(id_count+1, id_num))
+                if id_count+1 < id_num:
+                    print('Download failed with dataset id {:d} of {:d}. Re-trying with the next one.'.format(id_count+1, id_num))
-                if id_count+1 < id_num:
-                    print('Download failed with dataset id {:d} of {:d}. Re-trying with the next one.'.format(id_count+1, id_num))
+                if id_count+1 < id_num:
+                    print('Download failed with dataset id {:d} of {:d}. Re-trying with the next one.'.format(id_count+1, id_num))
+                    pass
+                else:
+                    print('None of the archived ids succeeded! Update necessary!')
+                    raise EarthExplorerError()
         return filename
diff --git a/landsatxplore/util.py b/landsatxplore/util.py
@@ -87,15 +87,16 @@ def parse_scene_id(scene_id):
     }
 
 
-def landsat_dataset(satellite, collection="c1", level="l1"):
+def landsat_dataset(satellite, collection="c2", level="l1"):
     """Get landsat dataset name."""
     if satellite == 5:
         sensor = "tm"
+        collection = "c2"
     elif satellite == 7:
         sensor = "etm"
-    elif satellite == 8 and collection == "c1":
-        sensor = "8"
-    elif satellite == 8 and collection == "c2":
+    elif satellite in (8, 9) and collection == "c1":
+        raise ValueError('Collection 1 was decommissioned!')
+    elif satellite in [8, 9] and collection == "c2":
         sensor = "ot"
     else:
         raise LandsatxploreError("Failed to guess dataset from identifier.")