Source code for sentinelhub.aws.batch

"""
Module implementing utilities for collecting data, produced with Sentinel Hub Statistical Batch API, from an S3 bucket.
"""

from typing import List, Optional, Sequence, Union

from ..api.batch.statistical import BatchStatisticalRequest, BatchStatisticalRequestType, SentinelHubBatchStatistical
from ..base import DataRequest
from ..config import SHConfig
from ..constants import MimeType
from ..download.models import DownloadRequest
from .client import AwsDownloadClient


[docs]class AwsBatchStatisticalResults(DataRequest): """A utility class for downloading results of Batch Statistical API from an S3 bucket.""" def __init__( self, batch_request: BatchStatisticalRequestType, *, feature_ids: Optional[Sequence[Union[str, int]]] = None, data_folder: Optional[str] = None, config: Optional[SHConfig] = None, ): """ :param batch_request: Info about a batch request - either an instance of `BatchStatisticalRequest` or a batch ID or a raw payload of the batch response. :param feature_ids: A list of feature IDs of saved results on the bucket. If provided it will download only these results. If not provided it will collect the names of all JSON files from results folder on the bucket and download all of them. Note that it is recommended that you provide this parameter otherwise this class will have to make additional requests to the S3 bucket in order to list all features from the folder. :param data_folder: Directory to which the files should be saved. :param config: A config object that contains AWS credentials to access the S3 bucket with results. """ self.batch_request = self._parse_batch_request(batch_request, config) self.feature_ids = feature_ids super().__init__(AwsDownloadClient, data_folder=data_folder, config=config) @staticmethod def _parse_batch_request( batch_request: BatchStatisticalRequestType, config: Optional[SHConfig] ) -> BatchStatisticalRequest: """In case a batch request is not defined with an instance of `BatchStatisticalRequest` it will make sure that such an instance is created.""" if isinstance(batch_request, BatchStatisticalRequest): return batch_request if isinstance(batch_request, dict): return BatchStatisticalRequest.from_dict(batch_request) batch_client = SentinelHubBatchStatistical(config=config) return batch_client.get_request(batch_request)
[docs] def create_request(self) -> None: """Creates a list of download requests.""" base_s3_path = self.batch_request.request["output"]["s3"]["url"].rstrip("/") s3_path = f"{base_s3_path}/{self.batch_request.request_id}/" filenames = self._get_filenames(s3_path) self.download_list = [ DownloadRequest( url=f"{s3_path}{filename}", data_folder=self.data_folder, data_type=MimeType.JSON, filename=filename ) for filename in filenames ]
def _get_filenames(self, s3_path: str) -> List[str]: """Creates a list of JSON filenames from given feature ids or from given S3 path if feature ids are not provided. In case if it has to collect them from S3 path it makes sure not to collect any data from any subfolder in the path.""" if self.feature_ids is not None: return [f"{feature_id}.json" for feature_id in self.feature_ids] filenames: List[str] = [] s3_client = AwsDownloadClient.get_s3_client(self.config) _, _, bucket_name, url_key = s3_path.split("/", 3) paginator = s3_client.get_paginator("list_objects") for page in paginator.paginate(Bucket=bucket_name, Prefix=url_key): for item in page["Contents"]: key_path = item["Key"] key_name = key_path.rsplit("/", 1)[1] if key_name.endswith(".json") and key_path == f"{url_key}{key_name}": filenames.append(key_name) return filenames