Source code for pyIntensityFeatures.utils.checks

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# DOI: 10.5281/zenodo.15102100
# Full license can be found in License.md
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# -----------------------------------------------------------------------------
"""Functions for testing the outcome of various functions."""

import numpy as np
import pandas as pds

from pyIntensityFeatures.utils import grids
from pyIntensityFeatures.utils.distributions import calc_quadratic
from pyIntensityFeatures import logger


[docs] fwhm_const = 2.0 * np.sqrt(2.0 * np.log(2.0))
[docs] def evaluate_gauss_quad(npeaks, fit_coeff, rvalue, pvalue, po_bound, un_bound_po, eq_bound, un_bound_eq, min_mlat, max_mlat, un_threshold=1.25): """Test the robustness of the quadratic Gaussian fitting results. Parameters ---------- npeaks : int Number of Gaussian peaks in the fit fit_coeff : array-like List or array containing Guassian constant, quadratic multiplier for x, quadratic multiplier for x^2, and Gaussian amplitudes, x offsets, and exponential scalers in that order. rvalue : float Pearson correlation coefficient pvalue : float Pearson p-value po_bound : float Polar auroral boundary location in degrees latitude un_bound_po : float Uncertainty of the polar auroral boundary location in degrees latitude eq_bound : float Equatorward auroral boundary location in degrees latitude un_bound_eq : float Uncertainty of the equatorward auroral boundary location in degrees lat min_mlat : float Minimum latitude in intensity slice in degrees max_mlat : float Maximum latitude in intensity slice in degrees un_threshold : float Maximum acceptable uncertainty value in degrees (default=1.25) Returns ------- bool True if all quality criteria are met, False if any fails References ---------- Longden, N. S., et al. (2010) Estimating the location of the open-closed magnetic field line boundary from auroral images, 28 (9), p 1659-1678, doi:10.5194/angeo-28-1659-2010. Notes ----- Differs from Longden, et al. (2010) by using Pearson correlation coefficients. """ # All coefficients are finite if not np.all(np.isfinite(fit_coeff)): logger.info('Fit coefficients are not all finite.') return False # Cycle through each peak for individual tests num_narrow = 0 for i in range(npeaks): icoeff = i * 3 # The Gaussian coefficients have the expected sign if (np.array([fit_coeff[icoeff + 3], fit_coeff[icoeff + 5]]) < 0.0).any(): logger.info(''.join(['Gaussian amplitude and/or spread are ', 'negative for peak {:d} of {:d}'.format( i, npeaks)])) return False if np.sign(fit_coeff[icoeff + 4]) != np.sign(max_mlat): logger.info(''.join(['Gaussian peak location in the wrong ', 'hemisphere for peak {:d} of {:d}'.format( i, npeaks)])) return False # The center of the Gaussian peak is within the latitude range covered # by the intensity profile if abs(fit_coeff[icoeff + 4]) <= abs(min_mlat) or abs(fit_coeff[ icoeff + 4]) >= abs(max_mlat): logger.info(''.join(['Gaussian peak is outside of the intensity ', 'profile for peak {:d} of {:d}'.format( i, npeaks)])) return False # Calculate the background at the peak location using the quadradic fit bg = calc_quadratic(fit_coeff[icoeff + 4], fit_coeff[0], fit_coeff[1], fit_coeff[2]) # The mangitude of the Gaussian peak is at least 10% of the background if fit_coeff[icoeff + 3] < 0.1 * bg: logger.info(''.join(['Gaussian amplitude is less than 10% of the', ' background for peak {:d} of {:d}'.format( i, npeaks)])) return False # At least one Gaussian peak must exceed a minimum width if fit_coeff[icoeff + 5] < 1.0: num_narrow += 1 # The Gaussian peak width should not be larger than the data range if fwhm_const * fit_coeff[icoeff + 5] > abs(max_mlat - min_mlat): logger.info(''.join(['Gaussian peak width is larger than the ', 'data range for peak {:d} of {:d}'.format( i, npeaks)])) return False # Ensure at least one Gaussian peak exceeds the minimum width if num_narrow == npeaks: logger.info('All Gaussian peaks are too narrow.') return False # The uncertainty is below a given threshold for the polar boundary if not np.isfinite(un_bound_po) or un_bound_po >= un_threshold: logger.info(''.join(['The polar boundary uncertainty is unrealistic:', ' {:.2f} >= {:.2f}.'.format( un_bound_po, un_threshold)])) return False # The uncertainty is below a given threshold for the equatorward boundary if not np.isfinite(un_bound_eq) or un_bound_eq >= un_threshold: logger.info(''.join(['The equatorward boundary uncertainty is ', 'unrealistic: {:.2f} >= {:.2f}.'.format( un_bound_eq, un_threshold)])) return False # The range of the PALB falls within the EALB and 90 degrees, while the # EALB falls withn the minimum latitude and the PALB if abs(po_bound) <= abs(eq_bound) or abs(po_bound) >= abs(max_mlat) or abs( eq_bound) <= abs(min_mlat): logger.info('The polar/equatorward boundary locations are mixed up.') return False # Ensure reasonable Pearson R and P values. return evaluate_pearson(rvalue, pvalue)
[docs] def evaluate_pearson(rvalue, pvalue, rthresh=0.9, pmax=1.0e-4): """Evaluate the Pearson correlation coefficient and p-value. Parameters ---------- rvalue : float Pearson correlation coefficient pvalue : float Pearson p-value for testing non-correlation rthresh : float Minimum acceptable correlation coefficient (default=0.8) pmax : float Maximum acceptable p-value (default=1.0e-4) Returns ------- bool True if thresholds pass, False if they fail """ if rvalue is None or pvalue is None: logger.info('Did not supply all Pearson values for evaluation.') return False if np.isfinite(rvalue) and np.isfinite(pvalue): return rvalue >= rthresh and pvalue <= pmax else: logger.info('Not all Pearson values are finite.') return False
[docs] def evaluate_dayglow(fit_coeff, locations, thresh=300.0): """Evaluate the dayglow level across a range of locations. Parameters ---------- fit_coeff : array-like Coefficients, the first three of which are the quadratic constant, x-term, and x-squared-term. locations : array-like Locations at which the dayglow level will be evaluated. thresh : float Maximum allowable background intensity value in Rayleighs (default=300) Returns ------- good : bool True if the background level is low across all desired locations, False if any location is too high. """ # Ensure the locations are array-like locations = np.asarray(locations) if locations.shape == (): locations = np.array([locations]) # Calculate the background at all locations bg = calc_quadratic(locations, fit_coeff[0], fit_coeff[1], fit_coeff[2]) # Evaluate the background good = True if np.all(bg <= thresh) else False return good
[docs] def compare_boundaries(rvalue, pvalue, eq_bounds, eq_uncert, po_bounds, po_uncert, min_mlat, max_mlat, max_uncert=3.0): """Compare different boundaries and choose the best values. Parameters ---------- rvalue : list-like Pearson correlation coefficient pvalue : list-like Pearson p-value for testing non-correlation eq_bounds : float Equatorward auroral boundary location in degrees latitude eq_uncert : float Equatorward auroral boundary uncertainty in degrees latitude po_bounds : float Polar auroral boundary location in degrees latitude po_uncert : float Polar auroral boundary uncertainty in degrees latitude min_mlat : float Minimum latitude in intensity slice in degrees max_mlat : float Maximum latitude in intensity slice in degrees max_uncert : float Maximum allowable boundary uncertainty in degrees (default=3.0) Returns ------- igood_eq : int or NoneType Index corresponding to the best equatorial index, or None igood_po : int or NoneType Index corresponding to the best polar index, or None """ # Initialize the output igood_eq = None igood_po = None # Determine which fit is the best ifit = compare_pearson(rvalue, pvalue) if ifit is not None: # Test the equatorward boundaries for the best value if not np.isfinite(eq_bounds[ifit]) or abs(eq_bounds[ifit]) <= abs( min_mlat) or abs(eq_bounds[ifit]) >= abs(max_mlat) or eq_uncert[ ifit] > max_uncert: ieqs = [i for i, eq in enumerate(eq_bounds) if i != ifit and eq is not None and np.isfinite(eq) and abs(eq) > abs(min_mlat) and abs(eq) < abs(max_mlat) and np.isfinite(eq_uncert[i]) and eq_uncert[i] <= max_uncert] if len(ieqs) == 1: igood_eq = ieqs[0] elif len(ieqs) > 0: isel = compare_pearson(np.asarray(rvalue)[ieqs], np.asarray(pvalue)[ieqs]) if isel is not None: igood_eq = ieqs[isel] else: igood_eq = ifit # Test the poleward boundaries for the best value if not np.isfinite(po_bounds[ifit]) or abs(po_bounds[ifit]) >= abs( max_mlat) or abs(po_bounds[ifit]) <= abs(min_mlat) or po_uncert[ ifit] > max_uncert: ipos = [i for i, po in enumerate(po_bounds) if i != ifit and po is not None and np.isfinite(po) and abs(po) < abs(max_mlat) and np.isfinite(po_uncert[i]) and po_uncert[i] <= max_uncert and abs(po) > abs(min_mlat)] if len(ipos) == 1: igood_po = ipos[0] elif len(ipos) > 0: isel = compare_pearson(np.asarray(rvalue)[ipos], np.asarray(pvalue)[ipos]) if isel is not None: igood_po = ipos[isel] else: igood_po = ifit return igood_eq, igood_po
[docs] def compare_pearson(rvalue, pvalue): """Evaluate different Pearson correlation coefficients and choose the best. Parameters ---------- rvalue : list-like Pearson correlation coefficient pvalue : list-like Pearson p-value for testing non-correlation Returns ------- igood : int or NoneType Index correspeonding to the best fit, None if no fit is good """ # Set the default output for no good fits igood = None # Replace NoneType with NaN p_value = np.asarray([np.nan if pval is None else pval for pval in pvalue]) r_value = np.asarray([np.nan if rval is None else rval for rval in rvalue]) # Identify which fits pass the threshold for correlation good_pval = np.where(np.isfinite(p_value) & (p_value <= 1.0e-4))[0] if len(good_pval) == 1: # There is only one value that passes the non-correlation test igood = good_pval[0] elif len(good_pval) > 0: # The rvalue ranges from -1 (anticorrelation) to 1 (correlation) igood = good_pval[r_value[good_pval].argmax()] return igood
[docs] def evaluate_boundary_in_mlt(bound_data, eq_key, po_key, lt_key, ut_key, lt_bin=5.0, max_iqr=1.5): """Evaluate boundary consistency with local time. Parameters ---------- bound_data : xr.Dataset Boundary data stored in an xarray Dataset eq_key : str Data key for the equatorward boundary data po_key : str Data key for the poleward boundary data lt_key : str Coordinate key for the MLT data ut_key : str Coordinate key for the UT data lt_bin : float Size of local time bin in hours over which the data will be evaluated (default=5.0) max_iqr : float Maximum multiplier for the interquartile range (IQR) used to identify outliers above or below the upper or lower quartile (default=1.5) Returns ------- bdata : xr.Dataset Dataset with only good boundaries """ # Test the input if np.any([key not in bound_data.data_vars for key in [eq_key, po_key]]): raise ValueError('bad boundary key(s)') if np.any([key not in bound_data.coords for key in [lt_key, ut_key]]): raise ValueError('unknown time coordinate(s)') # Initialize the output bdata = bound_data.copy() # Get the padded local time lt = list(bound_data[lt_key].values - 24.0) lt.extend(list(bound_data[lt_key].values)) lt.extend(list(bound_data[lt_key].values + 24.0)) # Determine the number of lt values in each window delta_lt = grids.unique(np.array(lt)[1:] - np.array(lt)[:-1]) if len(delta_lt) != 1: raise ValueError('local time does not have a fixed frequency') nbin = int(lt_bin / delta_lt[0]) for i, utime in enumerate(bound_data[ut_key]): # Evaluate the boundaries for bkey in [eq_key, po_key]: if np.isfinite(bound_data[bkey].values).any(): # Pad the boundaries bound = list(bound_data[bkey][i].values) bound.extend(list(bound_data[bkey][i].values)) bound.extend(list(bound_data[bkey][i].values)) # Save the data as a Pandas series bounds = pds.Series(bound, index=lt) # Get the running mean and standard deviation broll = bounds.rolling(nbin, min_periods=1, center=True, closed='both') bplus = broll.quantile(0.75) bminus = broll.quantile(0.25) iqr = bplus - bminus # Determine which values should be removed bbad = (bounds > bplus + max_iqr * iqr) | ( bound < bminus - max_iqr * iqr) if bbad.any(): # If there is data to remove, update the output bmask = bbad[bdata[lt_key].values].values bdata[bkey][i][bmask] = np.nan return bdata