gcages.harmonisation.common#

Common tools across different approaches

Classes:

Name	Description
`NotHarmonisedError`	Raised when a pd.DataFrame is not harmonised

Functions:

Name	Description
`align_history_to_data_at_time`	Align history to a given set of data for a given column
`assert_harmonised`	Assert that the input is harmonised

NotHarmonisedError #

Bases: ValueError

Raised when a pd.DataFrame is not harmonised

Methods:

Name	Description
`__init__`	Initialise the error

Source code in src/gcages/harmonisation/common.py

class NotHarmonisedError(ValueError):
    """
    Raised when a [pd.DataFrame][pandas.DataFrame] is not harmonised
    """

    def __init__(
        self,
        comparison: pd.DataFrame,
        harmonisation_time: TIME_POINT,
    ) -> None:
        """
        Initialise the error

        Parameters
        ----------
        comparison
            Results of comparing the data and history

        harmonisation_time
            Expected harmonisation time
        """
        error_msg = (
            f"The DataFrame is not harmonised in {harmonisation_time}. "
            f"comparison=\n{comparison}"
        )
        super().__init__(error_msg)

init #

__init__(
    comparison: DataFrame, harmonisation_time: TIME_POINT
) -> None

Initialise the error

Parameters:

Name	Type	Description	Default
`comparison`	`DataFrame`	Results of comparing the data and history	required
`harmonisation_time`	`TIME_POINT`	Expected harmonisation time	required

Source code in src/gcages/harmonisation/common.py

def __init__(
    self,
    comparison: pd.DataFrame,
    harmonisation_time: TIME_POINT,
) -> None:
    """
    Initialise the error

    Parameters
    ----------
    comparison
        Results of comparing the data and history

    harmonisation_time
        Expected harmonisation time
    """
    error_msg = (
        f"The DataFrame is not harmonised in {harmonisation_time}. "
        f"comparison=\n{comparison}"
    )
    super().__init__(error_msg)

align_history_to_data_at_time #

align_history_to_data_at_time(
    df: TimeseriesDataFrame,
    *,
    history: TimeseriesDataFrame,
    time: Any,
) -> tuple[Series[NUMERIC_DATA], Series[NUMERIC_DATA]]

Align history to a given set of data for a given column

Parameters:

Name	Type	Description	Default
`df`	`TimeseriesDataFrame`	Data to which to align history	required
`history`	`TimeseriesDataFrame`	History data to align	required
`time`	`Any`	Time (i.e. column) for which to align the data	required

Returns:

Type	Description
`tuple[Series[NUMERIC_DATA], Series[NUMERIC_DATA]]`	History, aligned with `df` for the given column

Raises:

Type	Description
`AssertionError`	`df` and `history` could not be aligned for some reason

Source code in src/gcages/harmonisation/common.py

def align_history_to_data_at_time(
    df: TimeseriesDataFrame, *, history: TimeseriesDataFrame, time: Any
) -> tuple[pd.Series[NUMERIC_DATA], pd.Series[NUMERIC_DATA]]:  # type: ignore # pandas-stubs not up to date
    """
    Align history to a given set of data for a given column

    Parameters
    ----------
    df
        Data to which to align history

    history
        History data to align

    time
        Time (i.e. column) for which to align the data

    Returns
    -------
    :
        History, aligned with `df` for the given column

    Raises
    ------
    AssertionError
        `df` and `history` could not be aligned for some reason
    """
    df_year_aligned, history_year_aligned = df[time].align(history[time], join="left")

    # Implicitly assuming that people have already checked
    # that they have history values for all timeseries in `df`,
    # so any null is an obvious issue.
    if history_year_aligned.isnull().any():
        msg_l = ["history did not align properly with df"]

        if df.index.names == history.index.names:
            msg_l.append(
                "history and df have the same index levels "
                f"({list(history.index.names)}). "
                "You probably need to drop some of history's index levels "
                "so alignment can happen along the levels of interest "
                "(usually dropping everything except variable and unit (or similar)). "
            )

        # Might be useful, pandas might handle it
        # names_only_in_hist = history.index.names.difference(df.index.names)

        for unit_col_guess in ["unit", "units"]:
            if (
                unit_col_guess in df.index.names
                and unit_col_guess in history.index.names
            ):
                df_units_guess = df.index.get_level_values(unit_col_guess)
                history_units_guess = history.index.get_level_values(unit_col_guess)

                differing_units = (
                    df_units_guess.difference(history_units_guess).unique().tolist()
                )
                msg_l.append(
                    "The following units only appear in `df`, "
                    f"which might be why the data isn't aligned: {differing_units}. "
                    f"{df_units_guess=} {history_units_guess=}"
                )

        msg = ". ".join(msg_l)
        raise AssertionError(msg)

    return df_year_aligned, history_year_aligned

assert_harmonised #

assert_harmonised(
    df: TimeseriesDataFrame,
    *,
    history: TimeseriesDataFrame,
    harmonisation_time: TIME_POINT,
    rounding: int = 10,
    df_unit_level: str = "unit",
    history_unit_level: str | None = None,
    ur: UnitRegistry | None = None,
) -> None

Assert that the input is harmonised

Parameters:

Name	Type	Description	Default
`df`	`TimeseriesDataFrame`	Data to check	required
`history`	`TimeseriesDataFrame`	History to which `df` should be harmonised	required
`harmonisation_time`	`TIME_POINT`	Time at which `df` should be harmonised to `history`	required
`rounding`	`int`	Rounding to apply to the data before comparing	`10`
`df_unit_level`	`str`	Level in `df`'s index which has unit information Only used if unit conversion is required	`'unit'`
`history_unit_level`	`str \| None`	Level in `history`'s index which has unit information If not provided, we assume this is the same as `df_unit_level` Only used if unit conversion is required	`None`
`ur`	`UnitRegistry \| None`	Unit registry to use for determining unit conversions Passed to gcages.units_helpers.convert_unit_like Only used if unit conversion is required	`None`

Raises:

Type	Description
`NotHarmonisedError`	`df` is not harmonised to `history`

Source code in src/gcages/harmonisation/common.py

def assert_harmonised(  # noqa: PLR0913
    df: TimeseriesDataFrame,
    *,
    history: TimeseriesDataFrame,
    harmonisation_time: TIME_POINT,
    rounding: int = 10,
    df_unit_level: str = "unit",
    history_unit_level: str | None = None,
    ur: pint.UnitRegistry | None = None,
) -> None:
    """
    Assert that the input is harmonised

    Parameters
    ----------
    df
        Data to check

    history
        History to which `df` should be harmonised

    harmonisation_time
        Time at which `df` should be harmonised to `history`

    rounding
        Rounding to apply to the data before comparing

    df_unit_level
        Level in `df`'s index which has unit information

        Only used if unit conversion is required

    history_unit_level
        Level in `history`'s index which has unit information

        If not provided, we assume this is the same as `df_unit_level`

        Only used if unit conversion is required

    ur
        Unit registry to use for determining unit conversions

        Passed to [gcages.units_helpers.convert_unit_like][]

        Only used if unit conversion is required

    Raises
    ------
    NotHarmonisedError
        `df` is not harmonised to `history`
    """
    df_unit_match = convert_unit_like(
        df,
        target=history,
        df_unit_level=df_unit_level,
        target_unit_level=history_unit_level,
        ur=ur,
    )
    df_harm_year_aligned, history_harm_year_aligned = align_history_to_data_at_time(
        df_unit_match, history=history, time=harmonisation_time
    )
    comparison = df_harm_year_aligned.round(rounding).compare(
        history_harm_year_aligned.round(rounding), result_names=("df", "history")
    )
    if not comparison.empty:
        raise NotHarmonisedError(
            comparison=comparison, harmonisation_time=harmonisation_time
        )

gcages.harmonisation.common#

NotHarmonisedError #

__init__ #

align_history_to_data_at_time #

assert_harmonised #

init #