Source code for scedar.eda.stats

import numpy as np


[docs]def gc1d(x): """ Compute Gini Index for 1D array. References ---------- [1] http://mathworld.wolfram.com/GiniCoefficient.html [2] Damgaard, C. and Weiner, J. "Describing Inequality in Plant Size or Fecundity." Ecology 81, 1139-1142, 2000. [3] Dixon, P. M.; Weiner, J.; Mitchell-Olds, T.; and Woodley, R. "Bootstrapping the Gini Coefficient of Inequality." Ecology 68, 1548-1551, 1987. [4] Dixon, P. M.; Weiner, J.; Mitchell-Olds, T.; and Woodley, R. "Erratum to 'Bootstrapping the Gini Coefficient of Inequality.' " Ecology 69, 1307, 1988. [5] https://en.wikipedia.org/wiki/Gini_coefficient [6] https://github.com/oliviaguest/gini/blob/master/gini.py """ x = np.array(x, dtype="float64") if x.ndim != 1: raise ValueError("Only support 1D array.") if x.shape[0] == 0: raise ValueError("Array size cannot be 0.") if x.shape[0] == 1: return np.nan # sort into ascending order xs = np.sort(x) if xs[0] == xs[-1]: return 0 # Wrap with np.int64 to prevent division by 0 n = np.int64(x.shape[0]) xs_ranks = np.arange(1, n+1) xmean = x.mean() gc = ((2 * xs_ranks - n - 1) * xs).sum() / (n * (n-1) * xmean) return gc
# Multiple testing correction adapted from # https://github.com/CoBiG2/cobig_misc_scripts/blob/master/FDR.py # # Following is the lincense info: # Copyright 2017 Francisco Pina Martins <f.pinamartins@gmail.com> # This file is part of structure_threader. # structure_threader is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # structure_threader is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with structure_threader. If not, see <http://www.gnu.org/licenses/>. # # Taken from https://stackoverflow.com/a/21739593/3091595, ported to python 3 # and improved readability.
[docs]def multiple_testing_correction(pvalues, correction_type="FDR"): """ Consistent with R. correct_pvalues_for_multiple_testing([0.0, 0.01, 0.029, 0.03, 0.031, 0.05, 0.069, 0.07, 0.071, 0.09, 0.1]) """ pvalues = np.array(pvalues) sample_size = pvalues.shape[0] qvalues = np.empty(sample_size) if correction_type == "Bonferroni": # Bonferroni correction qvalues = sample_size * pvalues elif correction_type == "FDR": # Benjamini-Hochberg, AKA - FDR test values = [(pvalue, i) for i, pvalue in enumerate(pvalues)] values.sort() values.reverse() new_values = [] for i, vals in enumerate(values): rank = sample_size - i pvalue, index = vals new_values.append((sample_size/rank) * pvalue) for i in range(0, int(sample_size)-1): if new_values[i] < new_values[i+1]: new_values[i+1] = new_values[i] for i, vals in enumerate(values): pvalue, index = vals qvalues[index] = new_values[i] else: raise ValueError("Not supported correction type " "{}".format(correction_type)) return np.clip(qvalues, 0, 1)
# lower upper bound # start, end, lb, ub should all be scalar
[docs]def bidir_ReLU(x, start, end, lb=0, ub=1): if start > end: raise ValueError("start should <= end" "start: {}. end: {}.".format(start, end)) if lb > ub: raise ValueError("lb should <= ub" "lower bound: {}. " "upper bound: {}. ".format(start, end)) if start < end: width = end - start height = ub - lb return np.clip(a=height * (x - start) / width + lb, a_min=lb, a_max=ub) else: # start == end return np.where(x >= start, ub, lb)