Source code for wolfhece.compare_series

import numpy as np
import logging
from pathlib import Path
import pandas as pd

"""
Reference : https://hess.copernicus.org/articles/27/2397/2023/hess-27-2397-2023.html - "When best is the enemy of good – critical evaluation of performance criteria in hydrological models"

Definitions :
 - beta = mean(simulated) / mean(observed)
 - beta_n = (mean(simulated) - mean(observed)) / standard_deviation(observed)
 - alpha = standard_deviation(simulated) / standard_deviation(observed)
 - CV = standard_deviation / mean
 - gamma = CV(simulated) / CV(observed)
 - B_rel = (FDC_simulated - FDC_observed) / FDC_observed
 - B_rel_mean = mean(B_rel)
 - B_res = B_rel - B_rel_mean
 - B_area = integral(B_res)
 - r = correlation coefficient between observed and simulated data
 - r_s = Spearman rank correlation coefficient between observed and simulated data
"""


[docs]
def Nash_Sutcliffe_efficiency(observed, simulated):
    """
    Calculate the Nash-Sutcliffe efficiency coefficient (NSE) between observed and simulated data.

    :param observed: Array-like structure containing observed data values.
    :param simulated: Array-like structure containing simulated data values.
    :raises ValueError: If the lengths of observed and simulated data do not match.
    :return: The Nash-Sutcliffe efficiency coefficient.
    """

    if isinstance(observed, pd.Series):
        observed = observed.values

    if isinstance(simulated, pd.Series):
        simulated = simulated.values

    if len(observed) != len(simulated):
        raise ValueError("Observed and simulated data must have the same length.")

    # Convert to numpy arrays for calculations
    observed = np.array(observed)
    simulated = np.array(simulated)

    # Calculate the numerator and denominator for the NSE
    numerator = np.sum((observed - simulated) ** 2)
    denominator = np.sum((observed - np.mean(observed)) ** 2)

    # Calculate and return the NSE
    nse = 1 - (numerator / denominator) if denominator != 0 else np.nan

    return nse



[docs]
def Kling_Gupta_efficiency(observed, simulated):
    """
    Calculate the Kling-Gupta efficiency coefficient (KGE) between observed and simulated data.

    :param observed: Array-like structure containing observed data values.
    :param simulated: Array-like structure containing simulated data values.
    :raises ValueError: If the lengths of observed and simulated data do not match.
    :return: The Kling-Gupta efficiency coefficient.
    """

    if isinstance(observed, pd.Series):
        observed = observed.values

    if isinstance(simulated, pd.Series):
        simulated = simulated.values

    if len(observed) != len(simulated):
        raise ValueError("Observed and simulated data must have the same length.")

    # Convert to numpy arrays for calculations
    observed = np.array(observed)
    simulated = np.array(simulated)

    # Calculate the components for the KGE
    r = np.corrcoef(observed, simulated)[0, 1]
    alpha = np.std(simulated) / np.std(observed)
    beta = np.mean(simulated) / np.mean(observed)

    # Calculate and return the KGE
    kge = 1 - np.sqrt((r - 1) ** 2 + (alpha - 1) ** 2 + (beta - 1) ** 2)
    return kge



[docs]
def modified_Kling_Gupta_efficiency(observed, simulated):
    """
    Calculate the modified Kling-Gupta efficiency coefficient (KGE') between observed and simulated data.

    A modified Kling-Gupta efficiency was proposed by Kling et al. (2012).
    The coefficient of variation is used instead of the standard deviation
    to ensure that bias and variability are not cross-correlated

    :param observed: Array-like structure containing observed data values.
    :param simulated: Array-like structure containing simulated data values.
    :raises ValueError: If the lengths of observed and simulated data do not match.
    :return: The modified Kling-Gupta efficiency coefficient.
    """

    if isinstance(observed, pd.Series):
        observed = observed.values

    if isinstance(simulated, pd.Series):
        simulated = simulated.values

    if len(observed) != len(simulated):
        raise ValueError("Observed and simulated data must have the same length.")

    # Convert to numpy arrays for calculations
    observed = np.array(observed)
    simulated = np.array(simulated)

    # Calculate the components for the modified KGE
    r = np.corrcoef(observed, simulated)[0, 1]
    # alpha = np.std(simulated) / np.std(observed)
    beta = np.mean(simulated) / np.mean(observed)
    cv_observed = np.std(observed) / np.mean(observed)
    cv_simulated = np.std(simulated) / np.mean(simulated)

    # Calculate and return the modified KGE
    kge_prime = 1 - np.sqrt((r - 1) ** 2 + (beta - 1) ** 2 + (cv_simulated / cv_observed - 1) ** 2)
    return kge_prime



[docs]
def modified_Kling_Gupta_efficiency_Spearman(observed, simulated):
    """
    Calculate the modified Kling-Gupta efficiency coefficient (KGE') between observed and simulated data.

    A modified Kling-Gupta efficiency was proposed by Kling et al. (2012).
    The coefficient of variation is used instead of the standard deviation
    to ensure that bias and variability are not cross-correlated

    :param observed: Array-like structure containing observed data values.
    :param simulated: Array-like structure containing simulated data values.
    :raises ValueError: If the lengths of observed and simulated data do not match.
    :return: The modified Kling-Gupta efficiency coefficient.
    """

    if isinstance(observed, pd.Series):
        observed = observed.values

    if isinstance(simulated, pd.Series):
        simulated = simulated.values

    if len(observed) != len(simulated):
        raise ValueError("Observed and simulated data must have the same length.")

    # Convert to numpy arrays for calculations
    observed = np.array(observed)
    simulated = np.array(simulated)

    # Calculate the components for the modified KGE
    r = Spearman_Rank_Correlation_Coefficient(observed, simulated)
    # alpha = np.std(simulated) / np.std(observed)
    beta = np.mean(simulated) / np.mean(observed)
    cv_observed = np.std(observed) / np.mean(observed)
    cv_simulated = np.std(simulated) / np.mean(simulated)

    # Calculate and return the modified KGE
    kge_prime_sp = 1 - np.sqrt((r - 1) ** 2 + (beta - 1) ** 2 + (cv_simulated / cv_observed - 1) ** 2)
    return kge_prime_sp



[docs]
def Normalised_Diagnostic_efficiency(observed, simulated):
    """
    Calculate the Diagnostic efficiency coefficient (DE) between observed and simulated data.

    Schwemmle et al. (2021) used Flow Duration Curve (FDC)-based parameters to account
    for variability and bias in another KGE variant: the diagnostic efficiency.
    This criterion is based on constant, dynamic, and timing errors and aims to provide
    a stronger link to hydrological processes (Schwemmle et al., 2021)

    :param observed: Array-like structure containing observed data values.
    :param simulated: Array-like structure containing simulated data values.
    :raises ValueError: If the lengths of observed and simulated data do not match.
    :return: The Diagnostic efficiency coefficient.
    """

    if isinstance(observed, pd.Series):
        observed = observed.values

    if isinstance(simulated, pd.Series):
        simulated = simulated.values

    if len(observed) != len(simulated):
        raise ValueError("Observed and simulated data must have the same length.")

    # Convert to numpy arrays for calculations
    observed = np.array(observed)
    simulated = np.array(simulated)

    FDC_observed = np.sort(observed)
    FDC_simulated = np.sort(simulated)

    r = np.corrcoef(FDC_observed, FDC_simulated)[0, 1]

    B_rel = (FDC_simulated - FDC_observed) / FDC_observed
    B_rel_mean = np.mean(B_rel)
    B_res = B_rel - B_rel_mean
    B_area = np.trapz(B_res, dx=1)  # Assuming uniform spacing for simplicity

    de = 1 - np.sqrt(B_rel_mean ** 2 + (r - 1) ** 2 + B_area ** 2)

    return de



[docs]
def Liu_mean_efficiency(observed, simulated):
    """
    Calculate the Liu mean efficiency coefficient (LME) between observed and simulated data.

    The Liu mean efficiency is a variant of the Nash-Sutcliffe efficiency that
    accounts for the mean of the observed data.

    :param observed: Array-like structure containing observed data values.
    :param simulated: Array-like structure containing simulated data values.
    :raises ValueError: If the lengths of observed and simulated data do not match.
    :return: The Liu mean efficiency coefficient.
    """

    if isinstance(observed, pd.Series):
        observed = observed.values

    if isinstance(simulated, pd.Series):
        simulated = simulated.values

    if len(observed) != len(simulated):
        raise ValueError("Observed and simulated data must have the same length.")

    # Convert to numpy arrays for calculations
    observed = np.array(observed)
    simulated = np.array(simulated)

    r = np.corrcoef(observed, simulated)[0, 1]
    alpha = np.std(simulated) / np.std(observed)
    beta = np.mean(simulated) / np.mean(observed)

    lme = 1 - np.sqrt((r*alpha - 1) ** 2 + (beta - 1) ** 2)

    return lme



[docs]
def Lee_Choi_mean_efficiency(observed, simulated):
    """
    Calculate the Lee-Choi mean efficiency coefficient (LCME) between observed and simulated data.

    The Lee-Choi mean efficiency is a variant of the Nash-Sutcliffe efficiency that
    accounts for the mean of the observed data and is less sensitive to outliers.

    :param observed: Array-like structure containing observed data values.
    :param simulated: Array-like structure containing simulated data values.
    :raises ValueError: If the lengths of observed and simulated data do not match.
    :return: The Lee-Choi mean efficiency coefficient.
    """

    if isinstance(observed, pd.Series):
        observed = observed.values

    if isinstance(simulated, pd.Series):
        simulated = simulated.values

    if len(observed) != len(simulated):
        raise ValueError("Observed and simulated data must have the same length.")

    # Convert to numpy arrays for calculations
    observed = np.array(observed)
    simulated = np.array(simulated)

    r = np.corrcoef(observed, simulated)[0, 1]
    alpha = np.std(simulated) / np.std(observed)
    beta = np.mean(simulated) / np.mean(observed)

    lcme = 1 - np.sqrt((r*alpha - 1) ** 2 + (r / alpha -1) ** 2 + (beta - 1) ** 2)

    return lcme



[docs]
def Dynamic_Time_Warping_distance(series1, series2):
    """
    Calculate the Dynamic Time Warping (DTW) distance between two time series.

    The time series can be of different lengths, and DTW finds the optimal alignment
    between them by minimizing the distance.

    :param series1: First time series as a list or numpy array.
    :param series2: Second time series as a list or numpy array.
    :return: The DTW distance between the two time series.
    """
    from dtaidistance import dtw

    if isinstance(series1, pd.Series):
        series1 = series1.values

    if isinstance(series2, pd.Series):
        series2 = series2.values

    return dtw.distance_fast(series1, series2)



[docs]
def Dynamic_Time_Warping_distance_normalized(series1, series2):
    """
    Calculate the normalized Dynamic Time Warping (DTW) distance between two time series.

    The DTW distance is normalized by the length of the path to provide a relative measure.

    :param series1: First time series as a list or numpy array.
    :param series2: Second time series as a list or numpy array.
    :return: The normalized DTW distance between the two time series.
    """
    from dtaidistance import dtw

    if isinstance(series1, pd.Series):
        series1 = series1.values

    if isinstance(series2, pd.Series):
        series2 = series2.values

    path, distance = dtw.warping_path_fast(series1, series2, include_distance=True)

    if len(path) == 0:
        return 0.0

    return distance / len(path)




[docs]
def Root_Mean_Square_Error(observed, simulated):
    """
    Calculate the Root Mean Square Error (RMSE) between observed and simulated data.

    :param observed: Array-like structure containing observed data values.
    :param simulated: Array-like structure containing simulated data values.
    :raises ValueError: If the lengths of observed and simulated data do not match.
    :return: The RMSE value.
    """

    if isinstance(observed, pd.Series):
        observed = observed.values

    if isinstance(simulated, pd.Series):
        simulated = simulated.values

    if len(observed) != len(simulated):
        raise ValueError("Observed and simulated data must have the same length.")

    # Convert to numpy arrays for calculations
    observed = np.array(observed)
    simulated = np.array(simulated)

    # Calculate RMSE
    rmse = np.sqrt(np.mean((observed - simulated) ** 2))

    return rmse



[docs]
def Mean_Absolute_Error(observed, simulated):
    """
    Calculate the Mean Absolute Error (MAE) between observed and simulated data.

    :param observed: Array-like structure containing observed data values.
    :param simulated: Array-like structure containing simulated data values.
    :raises ValueError: If the lengths of observed and simulated data do not match.
    :return: The MAE value.
    """

    if isinstance(observed, pd.Series):
        observed = observed.values

    if isinstance(simulated, pd.Series):
        simulated = simulated.values

    if len(observed) != len(simulated):
        raise ValueError("Observed and simulated data must have the same length.")

    # Convert to numpy arrays for calculations
    observed = np.array(observed)
    simulated = np.array(simulated)

    # Calculate MAE
    mae = np.mean(np.abs(observed - simulated))

    return mae



[docs]
def Mean_Absolute_Percentage_Error(observed, simulated):
    """
    Calculate the Mean Absolute Percentage Error (MAPE) between observed and simulated data.

    :param observed: Array-like structure containing observed data values.
    :param simulated: Array-like structure containing simulated data values.
    :raises ValueError: If the lengths of observed and simulated data do not match.
    :return: The MAPE value.
    """

    if isinstance(observed, pd.Series):
        observed = observed.values

    if isinstance(simulated, pd.Series):
        simulated = simulated.values

    if len(observed) != len(simulated):
        raise ValueError("Observed and simulated data must have the same length.")

    # Convert to numpy arrays for calculations
    observed = np.array(observed)
    simulated = np.array(simulated)

    # Calculate MAPE
    mape = np.mean(np.abs((observed - simulated) / observed)) * 100

    return mape



[docs]
def Pearson_Correlation_Coefficient(observed, simulated):
    """
    Calculate the Pearson correlation coefficient between observed and simulated data.

    :param observed: Array-like structure containing observed data values.
    :param simulated: Array-like structure containing simulated data values.
    :raises ValueError: If the lengths of observed and simulated data do not match.
    :return: The Pearson correlation coefficient.
    """

    if isinstance(observed, pd.Series):
        observed = observed.values

    if isinstance(simulated, pd.Series):
        simulated = simulated.values

    if len(observed) != len(simulated):
        raise ValueError("Observed and simulated data must have the same length.")

    # Convert to numpy arrays for calculations
    observed = np.array(observed)
    simulated = np.array(simulated)

    # Calculate Pearson correlation coefficient
    r = np.corrcoef(observed, simulated)[0, 1]

    return r



[docs]
def Spearman_Rank_Correlation_Coefficient(observed, simulated):
    """
    Calculate the Spearman rank correlation coefficient between observed and simulated data.

    :param observed: Array-like structure containing observed data values.
    :param simulated: Array-like structure containing simulated data values.
    :raises ValueError: If the lengths of observed and simulated data do not match.
    :return: The Spearman rank correlation coefficient.
    """

    if isinstance(observed, pd.Series):
        observed = observed.values

    if isinstance(simulated, pd.Series):
        simulated = simulated.values

    if len(observed) != len(simulated):
        raise ValueError("Observed and simulated data must have the same length.")

    # Convert to numpy arrays for calculations
    observed = np.array(observed)
    simulated = np.array(simulated)

    # Calculate Spearman rank correlation coefficient
    from scipy.stats import spearmanr
    r, _ = spearmanr(observed, simulated)

    return r



[docs]
def normalize_series(series):
    """
    Normalize a time series to the range [0, 1].

    :param series: Array-like structure containing the time series data.
    :return: Normalized time series as a numpy array.
    """
    if isinstance(series, pd.Series):
        series = series.values

    series = np.array(series)
    min_val = np.min(series)
    max_val = np.max(series)

    if max_val - min_val == 0:
        return np.zeros_like(series)  # Avoid division by zero

    normalized_series = (series - min_val) / (max_val - min_val)

    return normalized_series