Source code for bluemath_tk.core.decorators

import functools
from typing import Any, Dict, List

import pandas as pd
import xarray as xr


[docs] def validate_data_lhs(func): """ Decorator to validate data in LHS class fit method. Parameters ---------- func : callable The function to be decorated Returns ------- callable The decorated function """ @functools.wraps(func) def wrapper( self, dimensions_names: List[str], lower_bounds: List[float], upper_bounds: List[float], num_samples: int, ): if not isinstance(dimensions_names, list): raise TypeError("Dimensions names must be a list") if not isinstance(lower_bounds, list): raise TypeError("Lower bounds must be a list") if not isinstance(upper_bounds, list): raise TypeError("Upper bounds must be a list") if len(dimensions_names) != len(lower_bounds) or len(lower_bounds) != len( upper_bounds ): raise ValueError( "Dimensions names, lower bounds and upper bounds must have the same length" ) if not all( [lower <= upper for lower, upper in zip(lower_bounds, upper_bounds)] ): raise ValueError("Lower bounds must be less than or equal to upper bounds") if not isinstance(num_samples, int) or num_samples <= 0: raise ValueError("Variable num_samples must be integer and > 0") return func(self, dimensions_names, lower_bounds, upper_bounds, num_samples) return wrapper
[docs] def validate_data_mda(func): """ Decorator to validate data in MDA class fit method. Parameters ---------- func : callable The function to be decorated Returns ------- callable The decorated function """ @functools.wraps(func) def wrapper( self, data: pd.DataFrame, directional_variables: List[str] = [], custom_scale_factor: dict = {}, first_centroid_seed: int = None, ): if data is None: raise ValueError("Data cannot be None") elif not isinstance(data, pd.DataFrame): raise TypeError("Data must be a pandas DataFrame") if not isinstance(directional_variables, list): raise TypeError("Directional variables must be a list") if not isinstance(custom_scale_factor, dict): raise TypeError("Custom scale factor must be a dict") if first_centroid_seed is not None: if ( not isinstance(first_centroid_seed, int) or first_centroid_seed < 0 or first_centroid_seed > data.shape[0] ): raise ValueError( "First centroid seed must be an integer >= 0 and < num of data points" ) return func( self, data, directional_variables, custom_scale_factor, first_centroid_seed ) return wrapper
[docs] def validate_data_kma(func): """ Decorator to validate data in KMA class fit method. Parameters ---------- func : callable The function to be decorated Returns ------- callable The decorated function """ @functools.wraps(func) def wrapper( self, data: pd.DataFrame, directional_variables: List[str] = [], custom_scale_factor: dict = {}, min_number_of_points: int = None, max_number_of_iterations: int = 10, normalize_data: bool = True, ): if data is None: raise ValueError("Data cannot be None") elif not isinstance(data, pd.DataFrame): raise TypeError("Data must be a pandas DataFrame") if not isinstance(directional_variables, list): raise TypeError("Directional variables must be a list") if not isinstance(custom_scale_factor, dict): raise TypeError("Custom scale factor must be a dict") if min_number_of_points is not None: if not isinstance(min_number_of_points, int) or min_number_of_points <= 0: raise ValueError("Minimum number of points must be integer and > 0") if ( not isinstance(max_number_of_iterations, int) or max_number_of_iterations <= 0 ): raise ValueError("Maximum number of iterations must be integer and > 0") if not isinstance(normalize_data, bool): raise TypeError("Normalize data must be a boolean") return func( self, data, directional_variables, custom_scale_factor, min_number_of_points, max_number_of_iterations, normalize_data, ) return wrapper
[docs] def validate_data_som(func): """ Decorator to validate data in SOM class fit method. Parameters ---------- func : callable The function to be decorated Returns ------- callable The decorated function """ @functools.wraps(func) def wrapper( self, data: pd.DataFrame, directional_variables: List[str] = [], num_iteration: int = 1000, ): if data is None: raise ValueError("Data cannot be None") elif not isinstance(data, pd.DataFrame): raise TypeError("Data must be a pandas DataFrame") if not isinstance(directional_variables, list): raise TypeError("Directional variables must be a list") if not isinstance(num_iteration, int) or num_iteration <= 0: raise ValueError("Number of iterations must be integer and > 0") return func(self, data, directional_variables, num_iteration) return wrapper
[docs] def validate_data_pca(func): """ Decorator to validate data in PCA class fit method. Parameters ---------- func : callable The function to be decorated Returns ------- callable The decorated function """ @functools.wraps(func) def wrapper( self, data: xr.Dataset, vars_to_stack: List[str], coords_to_stack: List[str], pca_dim_for_rows: str, windows_in_pca_dim_for_rows: dict = {}, value_to_replace_nans: dict = {}, nan_threshold_to_drop: dict = {}, scale_data: bool = True, ): if not isinstance(data, xr.Dataset): raise TypeError("Data must be an xarray Dataset") # Check that all vars_to_stack are in the data if not isinstance(vars_to_stack, list) or len(vars_to_stack) == 0: raise ValueError("Variables to stack must be a non-empty list") for var in vars_to_stack: if var not in data.data_vars: raise ValueError(f"Variable {var} not found in data") # Check that all variables in vars_to_stack have the same coordinates and dimensions first_var = vars_to_stack[0] first_var_dims = list(data[first_var].dims) first_var_coords = list(data[first_var].coords) for var in vars_to_stack: if list(data[var].dims) != first_var_dims: raise ValueError( f"All variables must have the same dimensions. Variable {var} does not match." ) if list(data[var].coords) != first_var_coords: raise ValueError( f"All variables must have the same coordinates. Variable {var} does not match." ) # Check that all coords_to_stack are in the data if not isinstance(coords_to_stack, list) or len(coords_to_stack) == 0: raise ValueError("Coordinates to stack must be a non-empty list") for coord in coords_to_stack: if coord not in data.coords: raise ValueError(f"Coordinate {coord} not found in data.") # Check that pca_dim_for_rows is in the data, and window > 0 if provided if not isinstance(pca_dim_for_rows, str) or pca_dim_for_rows not in data.dims: raise ValueError( "PCA dimension for rows must be a string and found in the data dimensions" ) for variable, windows in windows_in_pca_dim_for_rows.items(): if not isinstance(windows, list): raise TypeError("Windows must be a list") if not all([isinstance(window, int) and window > 0 for window in windows]): raise ValueError("Windows must be a list of integers > 0") for variable, threshold in nan_threshold_to_drop.items(): if not isinstance(threshold, float) or threshold < 0 or threshold > 1: raise ValueError("Threshold must be a float between 0 and 1") if not isinstance(scale_data, bool): raise TypeError("Scale data must be a boolean, either True or False") return func( self, data, vars_to_stack, coords_to_stack, pca_dim_for_rows, windows_in_pca_dim_for_rows, value_to_replace_nans, nan_threshold_to_drop, scale_data, ) return wrapper
[docs] def validate_data_rbf(func): """ Decorator to validate data in RBF class fit method. Parameters ---------- func : callable The function to be decorated Returns ------- callable The decorated function """ @functools.wraps(func) def wrapper( self, subset_data: pd.DataFrame, target_data: pd.DataFrame, subset_directional_variables: List[str] = [], target_directional_variables: List[str] = [], subset_custom_scale_factor: dict = {}, normalize_target_data: bool = True, target_custom_scale_factor: dict = {}, num_workers: int = None, iteratively_update_sigma: bool = False, ): if subset_data is None: raise ValueError("Subset data cannot be None") elif not isinstance(subset_data, pd.DataFrame): raise TypeError("Subset data must be a pandas DataFrame") if target_data is None: raise ValueError("Target data cannot be None") elif not isinstance(target_data, pd.DataFrame): raise TypeError("Target data must be a pandas DataFrame") if not isinstance(subset_directional_variables, list): raise TypeError("Subset directional variables must be a list") for directional_variable in subset_directional_variables: if directional_variable not in subset_data.columns: raise ValueError( f"Directional variable {directional_variable} not found in subset data" ) if not isinstance(target_directional_variables, list): raise TypeError("Target directional variables must be a list") for directional_variable in target_directional_variables: if directional_variable not in target_data.columns: raise ValueError( f"Directional variable {directional_variable} not found in target data" ) if not isinstance(subset_custom_scale_factor, dict): raise TypeError("Subset custom scale factor must be a dict") if not isinstance(normalize_target_data, bool): raise TypeError("Normalize target data must be a bool") if not isinstance(target_custom_scale_factor, dict): raise TypeError("Target custom scale factor must be a dict") if num_workers is not None: if not isinstance(num_workers, int) or num_workers <= 0: raise ValueError("Number of workers must be integer and > 0") if not isinstance(iteratively_update_sigma, bool): raise TypeError("Iteratively update sigma must be a boolean") return func( self, subset_data, target_data, subset_directional_variables, target_directional_variables, subset_custom_scale_factor, normalize_target_data, target_custom_scale_factor, num_workers, iteratively_update_sigma, ) return wrapper
[docs] def validate_data_xwt(func): """ Decorator to validate data in XWT class fit method. Parameters ---------- func : callable The function to be decorated Returns ------- callable The decorated function """ @functools.wraps(func) def wrapper( self, data: xr.Dataset, fit_params: Dict[str, Dict[str, Any]] = {}, variable_to_sort_bmus: str = None, ): if not isinstance(data, xr.Dataset): raise TypeError("Data must be an xarray Dataset") if "time" not in data.dims: raise ValueError( 'Time dimension with name "time" not found in data, rename and re-fit' ) # TODO: check time is actually datetime if not isinstance(fit_params, dict): raise TypeError("Fit params must be a dict") if "pca" not in fit_params: raise ValueError("Fit params must contain PCA parameters") if variable_to_sort_bmus is not None: if ( not isinstance(variable_to_sort_bmus, str) or variable_to_sort_bmus not in data.data_vars ): raise TypeError( "variable_to_sort_bmus must be a string and must exist in data variables" ) return func( self, data, fit_params, variable_to_sort_bmus, ) return wrapper