Source code for utils.stats

"""
.. module: stats

Created on 03/07/18

:author: Lukas Malina

Provides statistical methods to compute:
    various weighted averages along specified axis and their errors
    unbiased error estimator of infinite normal distribution from finite-sized sample

TODO use weighted average and its error in circular calculations
TODO write tests
TODO LOGGER or Raising error and warnings?
TODO if zeros or nans occur in errors, fallback to uniform weights only in affected cases

"""
import numpy as np
from scipy.special import erf
from scipy.stats import t

PI2 = 2 * np.pi
PI2I = PI2 * 1j
CONFIDENCE_LEVEL = (1 + erf(1 / np.sqrt(2))) / 2


[docs]def circular_mean(data, period=PI2, errors=None, axis=None): """ Computes weighted circular average along the specified axis. Parameters: data: array-like Contains the data to be averaged period: scalar, optional, default (2 * np.pi) Periodicity period of data errors: array-like, optional Contains errors associated with the values in data, it is used to calculated weights axis: int or tuple of ints, optional Axis or axes along which to average data Returns: Returns the weighted circular average along the specified axis. """ phases = data * PI2I / period weights = weights_from_errors(errors, period=period) return np.angle(np.average(np.exp(phases), axis=axis, weights=weights)) * period / PI2
[docs]def circular_error(data, period=PI2, errors=None, axis=None, t_value_corr=True): """ Computes error of weighted circular average along the specified axis. Parameters: data: array-like Contains the data to be averaged period: scalar, optional Periodicity period of data, default is (2 * np.pi) errors: array-like, optional Contains errors associated with the values in data, it is used to calculated weights axis: int or tuple of ints, optional Axis or axes along which to average data t_value_corr: bool, optional Species if the error is corrected for small sample size, default True Returns: Returns the error of weighted circular average along the specified axis. """ phases = data * PI2I / period weights = weights_from_errors(errors, period=period) complex_phases = np.exp(phases) complex_average = np.average(complex_phases, axis=axis, weights=weights) (sample_variance, sum_of_weights) = np.average(np.square(np.abs(complex_phases - complex_average.reshape(_get_shape(complex_phases.shape, axis)))), weights=weights, axis=axis, returned=True) if weights is not None: sample_variance = sample_variance + 1. / sum_of_weights error_of_complex_average = np.sqrt(sample_variance * unbias_variance(data, weights, axis=axis)) phase_error = error_of_complex_average / np.abs(complex_average) if t_value_corr: phase_error = phase_error * t_value_correction(effective_sample_size(data, weights, axis=axis)) return np.where(phase_error > 0.25 * PI2, 0.3 * period, phase_error * period / PI2)
[docs]def weighted_mean(data, errors=None, axis=None): """ Computes weighted average along the specified axis. Parameters: data: array-like Contains the data to be averaged errors: array-like, optional Contains errors associated with the values in data, it is used to calculated weights axis: int or tuple of ints, optional Axis or axes along which to average data Returns: Returns the weighted average along the specified axis. """ weights = weights_from_errors(errors) return np.average(data, axis=axis, weights=weights)
def _get_shape(orig_shape, axis): new_shape = np.array(orig_shape) if axis is None: new_shape[:] = 1 else: new_shape[np.array(axis)] = 1 return tuple(new_shape)
[docs]def weighted_error(data, errors=None, axis=None, t_value_corr=True): """ Computes error of weighted average along the specified axis. Parameters: data: array-like Contains the data to be averaged errors: array-like, optional Contains errors associated with the values in data, it is used to calculated weights axis: int or tuple of ints, optional Axis or axes along which to average data t_value_corr: bool, optional Species if the error is corrected for small sample size, default True Returns: Returns the error of weighted average along the specified axis. """ weights = weights_from_errors(errors) weighted_average = np.average(data, axis=axis, weights=weights) (sample_variance, sum_of_weights) = np.average(np.square(np.abs(data - weighted_average.reshape(_get_shape(data.shape, axis)))), weights=weights, axis=axis, returned=True) if weights is not None: sample_variance = sample_variance + 1 / sum_of_weights error = np.sqrt(sample_variance * unbias_variance(data, weights, axis=axis)) if t_value_corr: error = error * t_value_correction(effective_sample_size(data, weights, axis=axis)) return error
[docs]def weighted_rms(data, errors=None, axis=None): """ Computes weighted root mean square along the specified axis. Parameters: data: array-like Contains the data to be averaged errors: array-like, optional Contains errors associated with the values in data, it is used to calculated weights axis: int or tuple of ints, optional Axis or axes along which to average data Returns: Returns weighted root mean square along the specified axis. """ weights = weights_from_errors(errors) return np.sqrt(np.average(np.square(data), weights=weights, axis=axis))
[docs]def weights_from_errors(errors, period=PI2): """ Computes weights from measurement errors, weights are not output if errors contain zeros or nans Parameters: errors: array-like Contains errors which are used to calculated weights period: scalar, optional Periodicity period of data, default is (2 * np.pi) Returns: Returns the error of weighted circular average along the specified axis. """ if errors is None: return None if np.any(np.isnan(errors)): # LOGGER.warning("Nans found, weights are not used.") return None if np.any(np.logical_not(errors)): # LOGGER.warning("Zeros found, weights are not used.") return None return 1 / np.square(errors * PI2 / period)
[docs]def effective_sample_size(data, weights, axis=None): """ Computes effective sample size of weighted data along specifies axis Parameters: data: array-like weights: array-like Contains weights associated with the values in data axis: int or tuple of ints, optional Axis or axes along which the effective sample size is computed Returns: Returns the error of weighted circular average along the specified axis. """ if weights is None: return np.sum(np.ones(data.shape), axis=axis) return np.square(np.sum(weights, axis=axis)) / np.sum(np.square(weights), axis=axis)
[docs]def unbias_variance(data, weights, axis=None): """ Computes a correction factor to unbias variance of weighted average of data along specified axis Parameters: data: array-like weights: array-like Contains weights associated with the values in data axis: int or tuple of ints, optional Axis or axes along which the effective sample size is computed Returns: Returns the error of weighted circular average along the specified axis. """ sample_size = effective_sample_size(data, weights, axis=axis) try: return sample_size / (sample_size - 1) except ZeroDivisionError: return np.zeros(sample_size.shape)
[docs]def t_value_correction(sample_size): """ Calculates the multiplicative correction factor to determine standard deviation of normally distributed quantity from standard deviation of its finite-sized sample Args: sample_size: array-like Returns: multiplicative correction factor(s) of same shape as sample_size can contain nans """ return t.ppf(CONFIDENCE_LEVEL, sample_size - 1)