Skip to content

Commit

Permalink
Type annotations (#5)
Browse files Browse the repository at this point in the history
* Type annotations
* mypy CI integration
* case-insensitive rng
* tests for Python 3.5.0
  • Loading branch information
crusaderky authored Apr 29, 2019
1 parent 22f4e45 commit 51e6bcb
Show file tree
Hide file tree
Showing 15 changed files with 150 additions and 85 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pip-log.txt
.tox
nosetests.xml
.cache
.mypy_cache
.ropeproject/
.tags*
.testmon*
Expand Down
11 changes: 6 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ branches:
matrix:
fast_finish: true
include:
- env: CONDA_ENV=flake8
- env: CONDA_ENV=minimal
- env: CONDA_ENV=py35
- env: CONDA_ENV=py36
Expand All @@ -35,13 +34,15 @@ install:
- pip install --no-deps -e .

script:
- if [[ "$CONDA_ENV" != "flake8" ]]; then
python -c "import pyscenarios";
- python -c "import pyscenarios"
- if [[ "$CONVA_ENV" == "py37" ]]; then
flake8;
fi
- if [[ "$CONVA_ENV" == "py37" ]]; then
mypy .;
fi
- if [[ "$CONDA_ENV" == "docs" ]]; then
sphinx-build -n -j auto -b html -d _build/doctrees doc _build/html;
elif [[ "$CONDA_ENV" == "flake8" ]]; then
flake8;
else
py.test pyscenarios --cov=pyscenarios --cov-config ci/.coveragerc --cov-report term-missing --verbose $EXTRA_FLAGS;
fi
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ channels:
# random timeouts
- conda-forge
dependencies:
- python=3.6
- python=3.7
- dask
- numba
- numpy
Expand Down
12 changes: 0 additions & 12 deletions ci/requirements-flake8.yml

This file was deleted.

2 changes: 1 addition & 1 deletion ci/requirements-minimal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: test_env
channels:
- anaconda
dependencies:
- python=3.5
- python=3.5.0
- coveralls
- dask=0.17.3
- numba=0.34
Expand Down
3 changes: 3 additions & 0 deletions ci/requirements-py37.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ dependencies:
- pytest
- pytest-cov
- scipy
# linter tools (only in the latest version of Python)
- flake8
- mypy
7 changes: 4 additions & 3 deletions doc/installing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Installation
Required dependencies
---------------------

- Python 3.5 or 3.6
- Python 3.5 or later
- `dask <https://dask.org>`__
- `numba <http://numba.pydata.org>`__
- `numpy <http://www.numpy.org>`__
Expand All @@ -15,10 +15,11 @@ Required dependencies
Testing
-------

To run the test suite after installing pyscenarios, first install (via pypi or conda)
To run the test suite after installing pyscenarios, first install
(via pypi or conda)

- `py.test <https://pytest.org>`__: Simple unit testing library

and run
``py.test --pyargs pyscenarios``.
``py.test``.

6 changes: 4 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ What's New
v0.2.0 (unreleased)
-------------------

- Type annotations
- 'rng' parameter in copula functions is now case insensitive
- Work around regression in IT copula with dask >= 1.1
(`<https://github.com/dask/dask/issues/4739> dask#4739>`)
- Explicit CI tests for Windows and Python 3.7
- Mandatory flake8 in CI
- Explicit CI tests for Windows, Python 3.5.0, and Python 3.7
- Mandatory flake8 and mypy in CI
- Changed license to Apache 2.0


Expand Down
75 changes: 44 additions & 31 deletions pyscenarios/copula.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
"""High performance copula generators
"""
import numpy
from typing import List, Optional, Union, cast

import numpy as np
import numpy.random
import numpy.linalg
import dask.array
import dask.base
import dask.array as da
from dask.array.core import normalize_chunks
from .sobol import sobol

from . import duck
from .sobol import sobol
from .typing import Chunks2D, NormalizedChunks2D


def gaussian_copula(cov, samples, seed=0, chunks=None,
rng='Mersenne Twister'):
def gaussian_copula(cov: Union[List[List[float]], np.ndarray],
samples: int, seed: int = 0,
chunks: Chunks2D = None,
rng: str = 'Mersenne Twister'
) -> Union[np.ndarray, da.Array]:
"""Gaussian Copula scenario generator.
Simplified algorithm::
Expand All @@ -29,7 +35,7 @@ def gaussian_copula(cov, samples, seed=0, chunks=None,
Number of random samples to generate
.. note::
When using SOBOL, to obtain a uniform distribution one must use
When using Sobol, to obtain a uniform distribution one must use
:math:`2^{n} - 1` samples (for any n > 0).
:param chunks:
Expand All @@ -48,7 +54,7 @@ def gaussian_copula(cov, samples, seed=0, chunks=None,
:param int seed:
Random seed.
With ``rng='SOBOL'``, this is the initial dimension; when generating
With ``rng='Sobol'``, this is the initial dimension; when generating
multiple copulas with different seeds, one should never use seeds that
are less than ``cov.shape[0]`` apart from each other.
Expand All @@ -57,7 +63,7 @@ def gaussian_copula(cov, samples, seed=0, chunks=None,
pysamples.sobol.max_dimensions() - cov.shape[0] - 1
:param str rng:
Either ``Mersenne Twister`` or ``SOBOL``
Either ``Mersenne Twister`` or ``Sobol``
:returns:
array of shape (samples, dimensions), with all series
Expand All @@ -67,24 +73,25 @@ def gaussian_copula(cov, samples, seed=0, chunks=None,
:class:`numpy.ndarray`
"""
assert samples > 0
cov = numpy.array(cov)
cov = np.asarray(cov)
assert cov.ndim == 2
assert cov.shape[0] == cov.shape[1]

L = numpy.linalg.cholesky(cov)

L = numpy.linalg.cholesky(cov) # type: Union[np.ndarray, da.Array]
if chunks:
chunks = normalize_chunks(chunks, shape=(samples, cov.shape[0]))
L = dask.array.from_array(L, chunks=(chunks[1], chunks[1]))
chunks = cast(NormalizedChunks2D,
normalize_chunks(chunks, shape=(samples, cov.shape[0])))
L = da.from_array(L, chunks=(chunks[1], chunks[1]))

if rng == 'Mersenne Twister':
rng = rng.lower()
if rng == 'mersenne twister':
rnd_state = duck.RandomState(seed)
# When pulling samples from the Mersenne Twister generator, we have
# the samples on the rows. This guarantees that if we draw more
# samples, the original samples won't change.
y = rnd_state.standard_normal(size=(samples, cov.shape[0]),
chunks=chunks)
elif rng == 'SOBOL':
elif rng == 'sobol':
# Generate uniform (0, 1) distributions
samples = sobol(size=(samples, cov.shape[0]),
d0=seed, chunks=chunks)
Expand All @@ -96,7 +103,12 @@ def gaussian_copula(cov, samples, seed=0, chunks=None,
return duck.dot(L, y.T).T


def t_copula(cov, df, samples, seed=0, chunks=None, rng='Mersenne Twister'):
def t_copula(cov: Union[List[List[float]], np.ndarray],
df: Union[int, List[int], np.ndarray],
samples: int, seed: int = 0,
chunks: Chunks2D = None,
rng: str = 'Mersenne Twister'
) -> Union[np.ndarray, da.Array]:
"""Student T Copula / IT Copula scenario generator.
Simplified algorithm::
Expand All @@ -117,7 +129,7 @@ def t_copula(cov, df, samples, seed=0, chunks=None, rng='Mersenne Twister'):
:param int seed:
Random seed.
With ``rng='SOBOL'``, this is the initial dimension; when generating
With ``rng='Sobol'``, this is the initial dimension; when generating
multiple copulas with different seeds, one should never use seeds that
are less than ``cov.shape[0] + 1`` apart from each other.
Expand All @@ -129,33 +141,34 @@ def t_copula(cov, df, samples, seed=0, chunks=None, rng='Mersenne Twister'):
:func:`gaussian_copula`.
"""
assert samples > 0
cov = numpy.array(cov)
cov = np.asarray(cov)
assert cov.ndim == 2
assert cov.shape[0] == cov.shape[1]
dimensions = cov.shape[0]

L = numpy.linalg.cholesky(cov)
if chunks:
chunks = normalize_chunks(chunks, shape=(samples, dimensions))
L = dask.array.from_array(L, chunks=(chunks[1], chunks[1]))
if chunks is not None:
chunks = cast(NormalizedChunks2D,
normalize_chunks(chunks, shape=(samples, dimensions)))
L = da.from_array(L, chunks=(chunks[1], chunks[1]))

# Pre-process df into a 1D dask array
df = numpy.array(df)
df = np.asarray(df)
if (df <= 0).any():
raise ValueError("df must always be greater than zero")
if df.shape not in ((), (dimensions, )):
raise ValueError("df must be either a scalar or a 1D vector with as "
"many points as the width of the correlation matrix")
if df.ndim == 1 and chunks:
df = dask.array.from_array(df, chunks=(chunks[1], ))
if df.ndim == 1 and chunks is not None:
df = da.from_array(df, chunks=(chunks[1], ))

# Define chunks for the S chi-square matrix
if chunks:
chunks_r = None # type: Optional[NormalizedChunks2D]
if chunks is not None:
chunks_r = (chunks[0], (1, ))
else:
chunks_r = None

if rng == 'Mersenne Twister':
rng = rng.lower()
if rng == 'mersenne twister':
# Use two separate random states for the normal and the chi2
# distributions. This is NOT the same as just extracting two series
# from the same RandomState, as we must guarantee that, if you extract
Expand All @@ -172,7 +185,7 @@ def t_copula(cov, df, samples, seed=0, chunks=None, rng='Mersenne Twister'):
chunks=chunks)
r = rnd_state_r.uniform(size=(samples, 1), chunks=chunks_r)

elif rng == 'SOBOL':
elif rng == 'sobol':
seed_r = seed + dimensions

y = sobol(size=(samples, dimensions), d0=seed, chunks=chunks)
Expand All @@ -182,7 +195,7 @@ def t_copula(cov, df, samples, seed=0, chunks=None, rng='Mersenne Twister'):
else:
raise ValueError("Unknown rng: %s" % rng)

s = duck.chi2_ppf(r, df=df)
s = duck.chi2_ppf(r, df)
z = duck.sqrt(df / s) * duck.dot(L, y.T).T
# Convert t distribution to normal (0, 1)
u = duck.t_cdf(z, df)
Expand Down
36 changes: 22 additions & 14 deletions pyscenarios/duck.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
"""Duck-typed functions that call numpy or dask depending on the inputs
"""
from functools import wraps
from typing import Any, Callable, Optional, Tuple, Union

import dask.array as da
import numpy as np
import scipy.stats
from functools import wraps
from .typing import Chunks2D


def array(x):
def array(x: Any) -> Union[np.ndarray, da.Array]:
"""Convert x to numpy array, unless it's a da.array
"""
if isinstance(x, (np.ndarray, da.Array)):
return x
return np.array(x)


def _map_blocks(func):
def _map_blocks(func: Callable[..., np.ndarray]
) -> Callable[..., Union[np.ndarray, da.Array]]:
"""Wrap an arbitrary function that takes one or more arrays in input.
If any is a Dask Array, invoke :func:`dask.array.map_blocks`, otherwise
apply the function directly.
Expand All @@ -27,7 +31,8 @@ def wrapper(*args, **kwargs):
return wrapper


def _map_blocks_df(func):
def _map_blocks_df(func: Callable[[Any, Any], np.ndarray]
) -> Callable[[Any, Any], Union[np.ndarray, da.Array]]:
"""Specialized variant for functions with degrees of freedom - adds
auto-chunking in case of mismatched arguments
"""
Expand All @@ -44,7 +49,7 @@ def wrapper(x, df):
return wrapper


def _toplevel(func_name):
def _toplevel(func_name: str) -> Callable[..., Union[np.ndarray, da.Array]]:
"""If any of the args is a Dask Array, invoke da.func_name; else invoke
np.func_name
"""
Expand Down Expand Up @@ -75,26 +80,29 @@ class RandomState:
For each method, if chunks=None invoke the numpy version, otherwise invoke
the dask version.
"""
def __init__(self, seed=None):
def __init__(self, seed: Optional[int] = None):
self._dask_state = da.random.RandomState(seed)

@property
def _numpy_state(self):
def _numpy_state(self) -> np.random.RandomState:
return self._dask_state._numpy_state

def seed(self, seed=None):
def seed(self, seed: Optional[int] = None) -> None:
self._dask_state.seed(seed)

def _apply(self, func_name, *args, chunks=None, **kwargs):
if chunks:
def _apply(self, func_name: str, size: Optional[Tuple[int, int]] = None,
chunks: Chunks2D = None):
if chunks is not None:
func = getattr(self._dask_state, func_name)
return func(*args, **kwargs, chunks=chunks)
return func(size=size, chunks=chunks)
else:
func = getattr(self._numpy_state, func_name)
return func(*args, **kwargs)
return func(size=size)

def uniform(self, size=None, chunks=None):
def uniform(self, size: Optional[Tuple[int, int]] = None,
chunks: Chunks2D = None):
return self._apply('uniform', size=size, chunks=chunks)

def standard_normal(self, size=None, chunks=None):
def standard_normal(self, size: Optional[Tuple[int, int]] = None,
chunks: Chunks2D = None):
return self._apply('standard_normal', size=size, chunks=chunks)
Loading

0 comments on commit 51e6bcb

Please sign in to comment.