Source code for scedar.eda.mtype

import numpy as np


[docs]def is_uniq_np1darr(x): """Test whether x is a 1D np array that only contains unique values.""" if not isinstance(x, np.ndarray): return False if not x.ndim == 1: return False uniqx = np.unique(x) if not uniqx.shape[0] == x.shape[0]: return False return True
[docs]def is_valid_full_cut_tree_mat(cmat): """ Validate scipy hierarchical clustering cut tree Number of clusters should decrease from n to 1 """ col_unique_vals = [len(np.unique(x)) for x in cmat.T] return col_unique_vals == list(range(cmat.shape[0], 0, -1))
[docs]def is_valid_lab(lab): return (type(lab) == str) or (type(lab) == int)
[docs]def check_is_valid_labs(labs): if labs is None: raise ValueError("labs cannot be None") if type(labs) != list: raise ValueError("labs must be a homogenous list of int or str") n_uniq_types = len(set(map(type, labs))) if n_uniq_types > 1: raise ValueError("labs must be a homogenous list of int or str") elif n_uniq_types == 1: if not is_valid_lab(labs[0]): raise ValueError("labs must be a homogenous list of int or str") # At this point labs can either be an empty list or a list of ints/strs, # so it can only be 1d. labs = np.array(labs)
[docs]def is_valid_sfid(sfid): return (type(sfid) == str) or (type(sfid) == int)
[docs]def check_is_valid_sfids(sfids): if sfids is None: raise ValueError("[sf]ids cannot be None") if type(sfids) != list: raise ValueError("[sf]ids must be a homogenous list of int or str") n_uniq_types = len(set(map(type, sfids))) if n_uniq_types > 1: raise ValueError("[sf]ids must be a homogenous list of int or str") elif n_uniq_types == 1: if not is_valid_sfid(sfids[0]): raise ValueError("[sf]ids must be a homogenous list of int or str") # At this point sfids can either be an empty list or a list of ints/strs, # so it can only be 1d. sfids = np.array(sfids) if not is_uniq_np1darr(sfids): raise ValueError("[sf]ids must not contain duplicated values")