Source code for sat_download.api.usgs
import requests
import json
import re
import os
from tqdm import tqdm
from typing import List
from sat_download.api.base import SatelliteAPI
from sat_download.data_types.search import SearchFilters, SearchResults
from sat_download.factories.search import get_satellite_image
from sat_download.enums import COLLECTIONS
[docs]
class USGSAPI(SatelliteAPI):
"""
Implementation of SatelliteAPI for the USGS Earth Explorer API.
This class provides methods to search and download satellite imagery from the
USGS Earth Explorer service using their Machine-to-Machine (M2M) API.
Parameters
----------
username : str
Username for authentication with the USGS Earth Explorer API
password : str
API token for authentication (not the user's password)
Attributes
----------
API_URL : str
Base URL for the USGS M2M API
LOGIN_ENDPOINT : str
Endpoint for authentication
SEARCH_ENDPOINT : str
Endpoint for searching scenes
DOWNLOAD_REQUEST_ENDPOINT : str
Endpoint for requesting download URLs
DOWNLOAD_OPTIONS_ENDPOINT : str
Endpoint for fetching download options
Notes
-----
Authentication is performed using API tokens which must be generated
through the USGS Earth Explorer portal. The password parameter should
actually be the API token, not the user's password.
See Also
--------
sat_download.api.base.SatelliteAPI : Base class defining the API interface
sat_download.api.odata.ODataAPI : Implementation for Copernicus Data Space API
"""
API_URL = "https://m2m.cr.usgs.gov/api/api/json/stable/"
LOGIN_ENDPOINT = "login-token"
SEARCH_ENDPOINT = "scene-search"
DOWNLOAD_REQUEST_ENDPOINT = "download-request"
DOWNLOAD_OPTIONS_ENDPOINT = 'download-options'
def __init__(self, username, password):
"""
Initialize USGS API client and authenticate with the service.
Parameters
----------
username : str
Username for authentication with the USGS Earth Explorer API
password : str
API token for authentication (not the user's password)
Notes
-----
Automatically calls the __login method to authenticate with USGS.
"""
super().__init__(username, password)
self.__login()
def __login(self):
"""
Authenticate with the USGS Earth Explorer API.
Returns
-------
None
Updates the self.api_key attribute on success
Raises
------
Exception
If authentication fails
Notes
-----
Private method that handles authentication and stores the token
for use with subsequent API requests.
"""
payload = {'username' : self.username, 'token' : self.password}
payload = json.dumps(payload)
response = requests.post(f'{self.API_URL}{self.LOGIN_ENDPOINT}', payload)
response = json.loads(response.text)
if response['errorCode'] is None:
self.api_key = {'X-Auth-Token': response['data']}
else:
raise Exception(response['errorCode'])
[docs]
def search(self, filters: SearchFilters) -> SearchResults:
"""
Search for satellite imagery using specified filters.
Parameters
----------
filters : SearchFilters
The search filters to apply to the search
Returns
-------
SearchResults
Dictionary mapping product IDs to SatelliteImage objects
Raises
------
Exception
If the API request fails
Notes
-----
Implementation of the abstract search method for the USGS Earth Explorer API.
Additional filtering is performed client-side on the returned results.
"""
query = self.__prepare_query(filters)
response = requests.post(f"{self.API_URL}{self.SEARCH_ENDPOINT}", query, headers=self.api_key)
response = json.loads(response.text)
if response["errorCode"] is None and bool(response["data"]["results"]):
scenes = response["data"]
metadata = self.__request_download_metadata(filters.collection, scenes)
results = {}
for scene in scenes["results"]:
if filters.is_set('processing_level') and (not filters.processing_level in scene["displayId"]):
continue
if filters.is_set('tile_id') and (not f'_{filters.tile_id}_' in scene['displayId']):
continue
image_id = scene["entityId"]
url = next(
(met["url"] for met in metadata["availableDownloads"] if met["entityId"] == image_id),
None
)
if url:
results[url] = get_satellite_image(COLLECTIONS(filters.collection), {'Name' : scene["displayId"]})
return results
else:
raise Exception(response["errorCode"])
def __prepare_query(self, filters : SearchFilters) -> str:
"""
Prepare a USGS API query from search filters.
Parameters
----------
filters : SearchFilters
The search filters to convert to USGS API query parameters
Returns
-------
str
JSON string containing USGS API query parameters
Notes
-----
Private method that converts SearchFilters into the specific
format required by the USGS Earth Explorer API.
"""
payload = {'maxResults' : 20, 'startingNumber' : 1, 'sceneFilter' : {}}
acquisitionFilter = {}
spatialFilter = {}
if filters.is_set('collection'):
payload['datasetName'] = filters.collection
if filters.is_set('start_date'):
acquisitionFilter['start'] = filters.start_date
if filters.is_set('end_date'):
acquisitionFilter['end'] = filters.end_date
if filters.is_set('geometry'):
lat, lon = filters.geometry.replace(')', '').split('(')[-1].split(' ')
lat, lon = float(lat), float(lon)
spatialFilter['filterType'] = 'mbr'
spatialFilter['lowerLeft'] = {'latitude': lat, 'longitude': lon}
spatialFilter['upperRight'] = {'latitude': lat, 'longitude': lon}
if bool(spatialFilter):
payload['sceneFilter']['spatialFilter'] = spatialFilter
if bool(acquisitionFilter):
payload['sceneFilter']['acquisitionFilter'] = acquisitionFilter
return json.dumps(payload)
def __request_download_metadata(self, dataset, scenes):
"""
Request metadata needed for downloading images.
Parameters
----------
dataset : str
The collection identifier for the search results
scenes : dict
Dictionary containing scene search results
Returns
-------
dict
Download metadata including download URLs
Raises
------
Exception
If the metadata request fails
Notes
-----
Private method that obtains download options and creates download
requests to generate URLs for the identified scenes.
"""
options = self.__get_downloads_options(dataset, scenes)
download_ids = self.__get_download_ids(options)
payload = {'downloads' : download_ids, 'label' : 'sample'}
payload = json.dumps(payload)
response = requests.post(f'{self.API_URL}{self.DOWNLOAD_REQUEST_ENDPOINT}', payload, headers = self.api_key)
response = json.loads(response.text)
if response['errorCode'] is None:
return response['data']
else:
raise Exception(response['errorCode'])
def __get_downloads_options(self, dataset : str, scenes : List[dict]):
"""
Get available download options for a set of scenes.
Parameters
----------
dataset : str
The collection identifier
scenes : List[dict]
List of scene metadata from search results
Returns
-------
list
List of download options for the requested scenes
Raises
------
Exception
If the options request fails
Notes
-----
Private method that queries available download formats and options
for the identified scenes.
"""
scene_ids = [result['entityId'] for result in scenes['results']]
payload = {'datasetName' : dataset, 'entityIds' : scene_ids}
payload = json.dumps(payload)
response = requests.post(f'{self.API_URL}{self.DOWNLOAD_OPTIONS_ENDPOINT}', payload, headers = self.api_key)
response = json.loads(response.text)
if response['errorCode'] is None:
return response['data']
else:
raise Exception(response['errorCode'])
def __get_download_ids(self, options : List[dict]) -> List[dict]:
"""
Extract download IDs from options for products that are available.
Parameters
----------
options : List[dict]
List of download options from __get_downloads_options
Returns
-------
List[dict]
List of entity/product ID pairs for available downloads
Notes
-----
Private method that filters for available bundle products and
extracts their identifiers needed for download requests.
"""
download_ids = []
for product in options:
if product['available'] == True and 'Bundle' in product['productName']:
download_ids.append({'entityId' : product['entityId'], 'productId' : product['id']})
return download_ids
[docs]
def download(self, image_id : str, outname : str) -> None:
"""
Download a satellite image by its ID (URL).
Parameters
----------
image_id : str
The download URL for the image (not entity ID)
outname : str
The output filename where the image will be saved
Returns
-------
None
The file is saved to the specified location on success
Notes
-----
Implementation of the abstract download method for USGS API.
Uses tqdm to display a progress bar during download.
Unlike other APIs, the image_id parameter is actually the download URL.
"""
MB = (1024 * 1024)
try:
response = requests.get(image_id, stream=True)
if response.status_code == 200:
disposition = response.headers['content-disposition']
total_size = int(response.headers.get('Content-Length', 0)) // MB
filename = re.findall("filename=(.+)", disposition)[0].strip("\"")
with open(outname, 'wb') as new_file:
for chunk in tqdm(response.iter_content(chunk_size = MB), total = total_size,
unit = 'MB', desc = f"Downloading image at {os.path.basename(outname)}"):
new_file.write(chunk)
else:
raise Exception(f"Error en la descarga: {response.status_code}")
except Exception as e:
print(f"Failed to download from {image_id}. {e}.")