Source code for sentinelhub.data_utils

"""
Module with statistics to dataframe transformation.
"""
from typing import Any, Dict, List, Optional, Tuple, Union

from .time_utils import parse_time
from .types import JsonDict

_PANDAS_IMPORT_MESSAGE = (
    "To use this function you need to install the `pandas` library, which is not a dependency of sentinelhub-py."
)


def _extract_hist(hist_data: List[Dict[str, float]]) -> Tuple[List[float], List[float]]:
    """Transform Statistical API histogram into sequences of bins and counts

    :param hist_data: An input representation of Statistical API histogram data in a form of the low edges,
        the high edges, and the counts for each bin.
    :return: Statistical histogram bins and counts value as sequences.
    """

    nbins = len(hist_data)
    bins, counts = [], []
    for idx in range(nbins):
        bins.append(hist_data[idx]["lowEdge"])
        if idx == nbins - 1:
            bins.append(hist_data[idx]["highEdge"])
        counts.append(hist_data[idx]["count"])

    return bins, counts


def _extract_stats(interval_output: JsonDict, exclude_stats: List[str]) -> Dict[str, Union[List[float], float]]:
    """Transform statistics into pandas.DataFrame entry

    :param interval_output: An input representation of statistics of an aggregation interval.
    :param exclude_stats: Statistics that will be excluded from output.
    :return: Statistics as a pandas.DataFrame entry.
    """
    stat_entry: Dict[str, Union[List[float], float]] = {}
    for output_name, output_data in interval_output.items():  # pylint: disable=too-many-nested-blocks
        for band_name, band_values in output_data["bands"].items():
            band_stats = band_values["stats"]
            # statistics are not valid when sample count equals to no data count
            if band_stats["sampleCount"] == band_stats["noDataCount"]:
                break

            for stat_name, value in band_stats.items():
                if stat_name not in exclude_stats:
                    col_name = f"{output_name}_{band_name}_{stat_name}"
                    if stat_name == "percentiles":
                        for percentile_name, percentile_value in value.items():
                            stat_entry[f"{col_name}_{percentile_name}"] = percentile_value
                    else:
                        stat_entry[col_name] = value

            if "histogram" in band_values:
                band_bins = band_values["histogram"]["bins"]
                hist_bins, hist_counts = _extract_hist(band_bins)
                stat_entry[f"{output_name}_{band_name}_bins"] = hist_bins
                stat_entry[f"{output_name}_{band_name}_counts"] = hist_counts

    return stat_entry


def _extract_response_data(response_data: List[JsonDict], exclude_stats: List[str]) -> List[Dict[str, Any]]:
    """Transform Statistical API response into a pandas.DataFrame

    :param response_data: An input representation of Statistical API response. The response is a list of JsonDict and
        each contains the statistics of an aggregation interval.
    :param exclude_stats: Statistics that will be excluded from output.
    :return: DataFrame entries of all aggregation intervals of a single geometry.
    """
    df_entries = []
    for interval in response_data:
        if "outputs" in interval:
            df_entry: Dict[str, Any] = _extract_stats(interval["outputs"], exclude_stats)
            if df_entry:
                df_entry["interval_from"] = parse_time(interval["interval"]["from"])
                df_entry["interval_to"] = parse_time(interval["interval"]["to"])
                df_entries.append(df_entry)

    return df_entries


[docs]def statistical_to_dataframe(result_data: List[JsonDict], exclude_stats: Optional[List[str]] = None) -> Any: """Transform (Batch) Statistical API results into a pandas.DataFrame This function has a dependency of the `pandas` library, which is not a requirement of sentinelhub-py and needs to be installed before using the function. :param result_data: An input representation of (Batch) Statistical API result returned from `AwsBatchStatisticalResults.get_data()`. Each JsonDict in the list is a Statistical API response of an input geometry. :param exclude_stats: The statistic names defined in this parameter will be excluded from the output DataFrame. :return: Statistical dataframe. """ try: import pandas # pylint: disable=import-outside-toplevel except ImportError as exception: raise ImportError(_PANDAS_IMPORT_MESSAGE) from exception exclude_stats = exclude_stats or [] nresults = len(result_data) dfs = [None] * nresults for idx in range(nresults): identifier, response = result_data[idx]["identifier"], result_data[idx]["response"] if response: result_entries = _extract_response_data(response["data"], exclude_stats) result_df = pandas.DataFrame(result_entries) result_df["identifier"] = identifier dfs[idx] = result_df return pandas.concat(dfs)
def _get_failed_intervals( identifier: str, response_data: List[JsonDict] ) -> Optional[Dict[str, Union[str, List[Tuple[str, str]]]]]: """Collect failed intervals of a partially failed request :param identifier: The identifier of the geometry. :param response_data: An input representation of Statistical API response. :return: The identifier of a geometry that has a response status of PARTIAL and the failed intervals. """ failed_intervals = [] for interval in response_data: if "error" in interval: failed_intervals.append((interval["interval"]["from"], interval["interval"]["to"])) return {"identifier": identifier, "failed_intervals": failed_intervals} if failed_intervals else None
[docs]def get_failed_statistical_requests(result_data: List[JsonDict]) -> List[Dict[str, Union[str, List[Tuple[str, str]]]]]: """Collect failed requests of (Batch) Statistical Results :param result_data: An input representation of (Batch) Statistical API result. :return: Failed requests of (Batch) Statistical Results. """ failed_requests = [] for result in result_data: identifier, response = result["identifier"], result["response"] if not response: failed_requests.append({"identifier": identifier}) else: failed_intervals = _get_failed_intervals(identifier, response["data"]) if failed_intervals: failed_requests.append(failed_intervals) return failed_requests