import functools import operator import sys import warnings import numbers from collections import namedtuple from multiprocessing import Pool import inspect import math import numpy as np try: from numpy.random import Generator as Generator except ImportError: class Generator(): # type: ignore[no-redef] pass def _valarray(shape, value=np.nan, typecode=None): """Return an array of all values. """ out = np.ones(shape, dtype=bool) * value if typecode is not None: out = out.astype(typecode) if not isinstance(out, np.ndarray): out = np.asarray(out) return out def _lazywhere(cond, arrays, f, fillvalue=None, f2=None): """ np.where(cond, x, fillvalue) always evaluates x even where cond is False. This one only evaluates f(arr1[cond], arr2[cond], ...). For example, >>> a, b = np.array([1, 2, 3, 4]), np.array([5, 6, 7, 8]) >>> def f(a, b): return a*b >>> _lazywhere(a > 2, (a, b), f, np.nan) array([ nan, nan, 21., 32.]) Notice, it assumes that all `arrays` are of the same shape, or can be broadcasted together. """ if fillvalue is None: if f2 is None: raise ValueError("One of (fillvalue, f2) must be given.") else: fillvalue = np.nan else: if f2 is not None: raise ValueError("Only one of (fillvalue, f2) can be given.") arrays = np.broadcast_arrays(*arrays) temp = tuple(np.extract(cond, arr) for arr in arrays) tcode = np.mintypecode([a.dtype.char for a in arrays]) out = _valarray(np.shape(arrays[0]), value=fillvalue, typecode=tcode) np.place(out, cond, f(*temp)) if f2 is not None: temp = tuple(np.extract(~cond, arr) for arr in arrays) np.place(out, ~cond, f2(*temp)) return out def _lazyselect(condlist, choicelist, arrays, default=0): """ Mimic `np.select(condlist, choicelist)`. Notice, it assumes that all `arrays` are of the same shape or can be broadcasted together. All functions in `choicelist` must accept array arguments in the order given in `arrays` and must return an array of the same shape as broadcasted `arrays`. Examples -------- >>> x = np.arange(6) >>> np.select([x <3, x > 3], [x**2, x**3], default=0) array([ 0, 1, 4, 0, 64, 125]) >>> _lazyselect([x < 3, x > 3], [lambda x: x**2, lambda x: x**3], (x,)) array([ 0., 1., 4., 0., 64., 125.]) >>> a = -np.ones_like(x) >>> _lazyselect([x < 3, x > 3], ... [lambda x, a: x**2, lambda x, a: a * x**3], ... (x, a), default=np.nan) array([ 0., 1., 4., nan, -64., -125.]) """ arrays = np.broadcast_arrays(*arrays) tcode = np.mintypecode([a.dtype.char for a in arrays]) out = _valarray(np.shape(arrays[0]), value=default, typecode=tcode) for index in range(len(condlist)): func, cond = choicelist[index], condlist[index] if np.all(cond is False): continue cond, _ = np.broadcast_arrays(cond, arrays[0]) temp = tuple(np.extract(cond, arr) for arr in arrays) np.place(out, cond, func(*temp)) return out def _aligned_zeros(shape, dtype=float, order="C", align=None): """Allocate a new ndarray with aligned memory. Primary use case for this currently is working around a f2py issue in NumPy 1.9.1, where dtype.alignment is such that np.zeros() does not necessarily create arrays aligned up to it. """ dtype = np.dtype(dtype) if align is None: align = dtype.alignment if not hasattr(shape, '__len__'): shape = (shape,) size = functools.reduce(operator.mul, shape) * dtype.itemsize buf = np.empty(size + align + 1, np.uint8) offset = buf.__array_interface__['data'][0] % align if offset != 0: offset = align - offset # Note: slices producing 0-size arrays do not necessarily change # data pointer --- so we use and allocate size+1 buf = buf[offset:offset+size+1][:-1] data = np.ndarray(shape, dtype, buf, order=order) data.fill(0) return data def _prune_array(array): """Return an array equivalent to the input array. If the input array is a view of a much larger array, copy its contents to a newly allocated array. Otherwise, return the input unchanged. """ if array.base is not None and array.size < array.base.size // 2: return array.copy() return array def prod(iterable): """ Product of a sequence of numbers. Faster than np.prod for short lists like array shapes, and does not overflow if using Python integers. """ product = 1 for x in iterable: product *= x return product def float_factorial(n: int) -> float: """Compute the factorial and return as a float Returns infinity when result is too large for a double """ return float(math.factorial(n)) if n < 171 else np.inf class DeprecatedImport(object): """ Deprecated import with redirection and warning. Examples -------- Suppose you previously had in some module:: from foo import spam If this has to be deprecated, do:: spam = DeprecatedImport("foo.spam", "baz") to redirect users to use "baz" module instead. """ def __init__(self, old_module_name, new_module_name): self._old_name = old_module_name self._new_name = new_module_name __import__(self._new_name) self._mod = sys.modules[self._new_name] def __dir__(self): return dir(self._mod) def __getattr__(self, name): warnings.warn("Module %s is deprecated, use %s instead" % (self._old_name, self._new_name), DeprecationWarning) return getattr(self._mod, name) # copy-pasted from scikit-learn utils/validation.py def check_random_state(seed): """Turn seed into a np.random.RandomState instance If seed is None (or np.random), return the RandomState singleton used by np.random. If seed is an int, return a new RandomState instance seeded with seed. If seed is already a RandomState instance, return it. If seed is a new-style np.random.Generator, return it. Otherwise, raise ValueError. """ if seed is None or seed is np.random: return np.random.mtrand._rand if isinstance(seed, (numbers.Integral, np.integer)): return np.random.RandomState(seed) if isinstance(seed, np.random.RandomState): return seed try: # Generator is only available in numpy >= 1.17 if isinstance(seed, np.random.Generator): return seed except AttributeError: pass raise ValueError('%r cannot be used to seed a numpy.random.RandomState' ' instance' % seed) def _asarray_validated(a, check_finite=True, sparse_ok=False, objects_ok=False, mask_ok=False, as_inexact=False): """ Helper function for SciPy argument validation. Many SciPy linear algebra functions do support arbitrary array-like input arguments. Examples of commonly unsupported inputs include matrices containing inf/nan, sparse matrix representations, and matrices with complicated elements. Parameters ---------- a : array_like The array-like input. check_finite : bool, optional Whether to check that the input matrices contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Default: True sparse_ok : bool, optional True if scipy sparse matrices are allowed. objects_ok : bool, optional True if arrays with dype('O') are allowed. mask_ok : bool, optional True if masked arrays are allowed. as_inexact : bool, optional True to convert the input array to a np.inexact dtype. Returns ------- ret : ndarray The converted validated array. """ if not sparse_ok: import scipy.sparse if scipy.sparse.issparse(a): msg = ('Sparse matrices are not supported by this function. ' 'Perhaps one of the scipy.sparse.linalg functions ' 'would work instead.') raise ValueError(msg) if not mask_ok: if np.ma.isMaskedArray(a): raise ValueError('masked arrays are not supported') toarray = np.asarray_chkfinite if check_finite else np.asarray a = toarray(a) if not objects_ok: if a.dtype is np.dtype('O'): raise ValueError('object arrays are not supported') if as_inexact: if not np.issubdtype(a.dtype, np.inexact): a = toarray(a, dtype=np.float_) return a # Add a replacement for inspect.getfullargspec()/ # The version below is borrowed from Django, # https://github.com/django/django/pull/4846. # Note an inconsistency between inspect.getfullargspec(func) and # inspect.signature(func). If `func` is a bound method, the latter does *not* # list `self` as a first argument, while the former *does*. # Hence, cook up a common ground replacement: `getfullargspec_no_self` which # mimics `inspect.getfullargspec` but does not list `self`. # # This way, the caller code does not need to know whether it uses a legacy # .getfullargspec or a bright and shiny .signature. FullArgSpec = namedtuple('FullArgSpec', ['args', 'varargs', 'varkw', 'defaults', 'kwonlyargs', 'kwonlydefaults', 'annotations']) def getfullargspec_no_self(func): """inspect.getfullargspec replacement using inspect.signature. If func is a bound method, do not list the 'self' parameter. Parameters ---------- func : callable A callable to inspect Returns ------- fullargspec : FullArgSpec(args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, annotations) NOTE: if the first argument of `func` is self, it is *not*, I repeat *not*, included in fullargspec.args. This is done for consistency between inspect.getargspec() under Python 2.x, and inspect.signature() under Python 3.x. """ sig = inspect.signature(func) args = [ p.name for p in sig.parameters.values() if p.kind in [inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.POSITIONAL_ONLY] ] varargs = [ p.name for p in sig.parameters.values() if p.kind == inspect.Parameter.VAR_POSITIONAL ] varargs = varargs[0] if varargs else None varkw = [ p.name for p in sig.parameters.values() if p.kind == inspect.Parameter.VAR_KEYWORD ] varkw = varkw[0] if varkw else None defaults = tuple( p.default for p in sig.parameters.values() if (p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD and p.default is not p.empty) ) or None kwonlyargs = [ p.name for p in sig.parameters.values() if p.kind == inspect.Parameter.KEYWORD_ONLY ] kwdefaults = {p.name: p.default for p in sig.parameters.values() if p.kind == inspect.Parameter.KEYWORD_ONLY and p.default is not p.empty} annotations = {p.name: p.annotation for p in sig.parameters.values() if p.annotation is not p.empty} return FullArgSpec(args, varargs, varkw, defaults, kwonlyargs, kwdefaults or None, annotations) class MapWrapper(object): """ Parallelisation wrapper for working with map-like callables, such as `multiprocessing.Pool.map`. Parameters ---------- pool : int or map-like callable If `pool` is an integer, then it specifies the number of threads to use for parallelization. If ``int(pool) == 1``, then no parallel processing is used and the map builtin is used. If ``pool == -1``, then the pool will utilize all available CPUs. If `pool` is a map-like callable that follows the same calling sequence as the built-in map function, then this callable is used for parallelization. """ def __init__(self, pool=1): self.pool = None self._mapfunc = map self._own_pool = False if callable(pool): self.pool = pool self._mapfunc = self.pool else: # user supplies a number if int(pool) == -1: # use as many processors as possible self.pool = Pool() self._mapfunc = self.pool.map self._own_pool = True elif int(pool) == 1: pass elif int(pool) > 1: # use the number of processors requested self.pool = Pool(processes=int(pool)) self._mapfunc = self.pool.map self._own_pool = True else: raise RuntimeError("Number of workers specified must be -1," " an int >= 1, or an object with a 'map' method") def __enter__(self): return self def __del__(self): self.close() self.terminate() def terminate(self): if self._own_pool: self.pool.terminate() def join(self): if self._own_pool: self.pool.join() def close(self): if self._own_pool: self.pool.close() def __exit__(self, exc_type, exc_value, traceback): if self._own_pool: self.pool.close() self.pool.terminate() def __call__(self, func, iterable): # only accept one iterable because that's all Pool.map accepts try: return self._mapfunc(func, iterable) except TypeError: # wrong number of arguments raise TypeError("The map-like callable must be of the" " form f(func, iterable)") def rng_integers(gen, low, high=None, size=None, dtype='int64', endpoint=False): """ Return random integers from low (inclusive) to high (exclusive), or if endpoint=True, low (inclusive) to high (inclusive). Replaces `RandomState.randint` (with endpoint=False) and `RandomState.random_integers` (with endpoint=True). Return random integers from the "discrete uniform" distribution of the specified dtype. If high is None (the default), then results are from 0 to low. Parameters ---------- gen: {None, np.random.RandomState, np.random.Generator} Random number generator. If None, then the np.random.RandomState singleton is used. low: int or array-like of ints Lowest (signed) integers to be drawn from the distribution (unless high=None, in which case this parameter is 0 and this value is used for high). high: int or array-like of ints If provided, one above the largest (signed) integer to be drawn from the distribution (see above for behavior if high=None). If array-like, must contain integer values. size: None Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn. Default is None, in which case a single value is returned. dtype: {str, dtype}, optional Desired dtype of the result. All dtypes are determined by their name, i.e., 'int64', 'int', etc, so byteorder is not available and a specific precision may have different C types depending on the platform. The default value is np.int_. endpoint: bool, optional If True, sample from the interval [low, high] instead of the default [low, high) Defaults to False. Returns ------- out: int or ndarray of ints size-shaped array of random integers from the appropriate distribution, or a single such random int if size not provided. """ if isinstance(gen, Generator): return gen.integers(low, high=high, size=size, dtype=dtype, endpoint=endpoint) else: if gen is None: # default is RandomState singleton used by np.random. gen = np.random.mtrand._rand if endpoint: # inclusive of endpoint # remember that low and high can be arrays, so don't modify in # place if high is None: return gen.randint(low + 1, size=size, dtype=dtype) if high is not None: return gen.randint(low, high=high + 1, size=size, dtype=dtype) # exclusive return gen.randint(low, high=high, size=size, dtype=dtype)