Type annotations (#5)

* Type annotations * mypy CI integration * case-insensitive rng * tests for Python 3.5.0
crusaderky · Apr 29, 2019 · 51e6bcb · 51e6bcb
1 parent 22f4e45
commit 51e6bcb
Show file tree

Hide file tree

Showing 15 changed files with 150 additions and 85 deletions.
diff --git a/.gitignore b/.gitignore
@@ -26,6 +26,7 @@ pip-log.txt
 .tox
 nosetests.xml
 .cache
+.mypy_cache
 .ropeproject/
 .tags*
 .testmon*

diff --git a/.travis.yml b/.travis.yml
@@ -10,7 +10,6 @@ branches:
 matrix:
   fast_finish: true
   include:
-  - env: CONDA_ENV=flake8
   - env: CONDA_ENV=minimal
   - env: CONDA_ENV=py35
   - env: CONDA_ENV=py36
@@ -35,13 +34,15 @@ install:
   - pip install --no-deps -e .
 
 script:
-  - if [[ "$CONDA_ENV" != "flake8" ]]; then
-      python -c "import pyscenarios";
+  - python -c "import pyscenarios"
+  - if [[ "$CONVA_ENV" == "py37" ]]; then
+      flake8;
+    fi
+  - if [[ "$CONVA_ENV" == "py37" ]]; then
+      mypy .;
     fi
   - if [[ "$CONDA_ENV" == "docs" ]]; then
       sphinx-build -n -j auto -b html -d _build/doctrees doc _build/html;
-    elif [[ "$CONDA_ENV" == "flake8" ]]; then
-      flake8;
     else
       py.test pyscenarios --cov=pyscenarios --cov-config ci/.coveragerc --cov-report term-missing --verbose $EXTRA_FLAGS;
     fi

diff --git a/ci/requirements-docs.yml b/ci/requirements-docs.yml
@@ -5,7 +5,7 @@ channels:
   # random timeouts
   - conda-forge
 dependencies:
-  - python=3.6
+  - python=3.7
   - dask
   - numba
   - numpy

diff --git a/ci/requirements-flake8.yml b/ci/requirements-flake8.yml
diff --git a/ci/requirements-minimal.yml b/ci/requirements-minimal.yml
@@ -2,7 +2,7 @@ name: test_env
 channels:
   - anaconda
 dependencies:
-  - python=3.5
+  - python=3.5.0
   - coveralls
   - dask=0.17.3
   - numba=0.34

diff --git a/ci/requirements-py37.yml b/ci/requirements-py37.yml
@@ -11,3 +11,6 @@ dependencies:
   - pytest
   - pytest-cov
   - scipy
+  # linter tools (only in the latest version of Python)
+  - flake8
+  - mypy
diff --git a/doc/installing.rst b/doc/installing.rst
@@ -6,7 +6,7 @@ Installation
 Required dependencies
 ---------------------
 
-- Python 3.5 or 3.6
+- Python 3.5 or later
 - `dask <https://dask.org>`__
 - `numba <http://numba.pydata.org>`__
 - `numpy <http://www.numpy.org>`__
@@ -15,10 +15,11 @@ Required dependencies
 Testing
 -------
 
-To run the test suite after installing pyscenarios, first install (via pypi or conda)
+To run the test suite after installing pyscenarios, first install
+(via pypi or conda)
 
 - `py.test <https://pytest.org>`__: Simple unit testing library
 
 and run
-``py.test --pyargs pyscenarios``.
+``py.test``.
 
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -8,10 +8,12 @@ What's New
 v0.2.0 (unreleased)
 -------------------
 
+- Type annotations
+- 'rng' parameter in copula functions is now case insensitive
 - Work around regression in IT copula with dask >= 1.1
   (`<https://github.com/dask/dask/issues/4739> dask#4739>`)
-- Explicit CI tests for Windows and Python 3.7
-- Mandatory flake8 in CI
+- Explicit CI tests for Windows, Python 3.5.0, and Python 3.7
+- Mandatory flake8 and mypy in CI
 - Changed license to Apache 2.0
 
 

diff --git a/pyscenarios/copula.py b/pyscenarios/copula.py
@@ -1,17 +1,23 @@
 """High performance copula generators
 """
-import numpy
+from typing import List, Optional, Union, cast
+
+import numpy as np
 import numpy.random
 import numpy.linalg
-import dask.array
-import dask.base
+import dask.array as da
 from dask.array.core import normalize_chunks
-from .sobol import sobol
+
 from . import duck
+from .sobol import sobol
+from .typing import Chunks2D, NormalizedChunks2D
 
 
-def gaussian_copula(cov, samples, seed=0, chunks=None,
-                    rng='Mersenne Twister'):
+def gaussian_copula(cov: Union[List[List[float]], np.ndarray],
+                    samples: int, seed: int = 0,
+                    chunks: Chunks2D = None,
+                    rng: str = 'Mersenne Twister'
+                    ) -> Union[np.ndarray, da.Array]:
     """Gaussian Copula scenario generator.
 
     Simplified algorithm::
@@ -29,7 +35,7 @@ def gaussian_copula(cov, samples, seed=0, chunks=None,
         Number of random samples to generate
 
         .. note::
-           When using SOBOL, to obtain a uniform distribution one must use
+           When using Sobol, to obtain a uniform distribution one must use
            :math:`2^{n} - 1` samples (for any n > 0).
 
     :param chunks:
@@ -48,7 +54,7 @@ def gaussian_copula(cov, samples, seed=0, chunks=None,
     :param int seed:
         Random seed.
 
-        With ``rng='SOBOL'``, this is the initial dimension; when generating
+        With ``rng='Sobol'``, this is the initial dimension; when generating
         multiple copulas with different seeds, one should never use seeds that
         are less than ``cov.shape[0]`` apart from each other.
 
@@ -57,7 +63,7 @@ def gaussian_copula(cov, samples, seed=0, chunks=None,
             pysamples.sobol.max_dimensions() - cov.shape[0] - 1
 
     :param str rng:
-        Either ``Mersenne Twister`` or ``SOBOL``
+        Either ``Mersenne Twister`` or ``Sobol``
 
     :returns:
         array of shape (samples, dimensions), with all series
@@ -67,24 +73,25 @@ def gaussian_copula(cov, samples, seed=0, chunks=None,
         :class:`numpy.ndarray`
     """
     assert samples > 0
-    cov = numpy.array(cov)
+    cov = np.asarray(cov)
     assert cov.ndim == 2
     assert cov.shape[0] == cov.shape[1]
 
-    L = numpy.linalg.cholesky(cov)
-
+    L = numpy.linalg.cholesky(cov)  # type: Union[np.ndarray, da.Array]
     if chunks:
-        chunks = normalize_chunks(chunks, shape=(samples, cov.shape[0]))
-        L = dask.array.from_array(L, chunks=(chunks[1], chunks[1]))
+        chunks = cast(NormalizedChunks2D,
+                      normalize_chunks(chunks, shape=(samples, cov.shape[0])))
+        L = da.from_array(L, chunks=(chunks[1], chunks[1]))
 
-    if rng == 'Mersenne Twister':
+    rng = rng.lower()
+    if rng == 'mersenne twister':
         rnd_state = duck.RandomState(seed)
         # When pulling samples from the Mersenne Twister generator, we have
         # the samples on the rows. This guarantees that if we draw more
         # samples, the original samples won't change.
         y = rnd_state.standard_normal(size=(samples, cov.shape[0]),
                                       chunks=chunks)
-    elif rng == 'SOBOL':
+    elif rng == 'sobol':
         # Generate uniform (0, 1) distributions
         samples = sobol(size=(samples, cov.shape[0]),
                         d0=seed, chunks=chunks)
@@ -96,7 +103,12 @@ def gaussian_copula(cov, samples, seed=0, chunks=None,
     return duck.dot(L, y.T).T
 
 
-def t_copula(cov, df, samples, seed=0, chunks=None, rng='Mersenne Twister'):
+def t_copula(cov: Union[List[List[float]], np.ndarray],
+             df: Union[int, List[int], np.ndarray],
+             samples: int, seed: int = 0,
+             chunks: Chunks2D = None,
+             rng: str = 'Mersenne Twister'
+             ) -> Union[np.ndarray, da.Array]:
     """Student T Copula / IT Copula scenario generator.
 
     Simplified algorithm::
@@ -117,7 +129,7 @@ def t_copula(cov, df, samples, seed=0, chunks=None, rng='Mersenne Twister'):
     :param int seed:
         Random seed.
 
-        With ``rng='SOBOL'``, this is the initial dimension; when generating
+        With ``rng='Sobol'``, this is the initial dimension; when generating
         multiple copulas with different seeds, one should never use seeds that
         are less than ``cov.shape[0] + 1`` apart from each other.
 
@@ -129,33 +141,34 @@ def t_copula(cov, df, samples, seed=0, chunks=None, rng='Mersenne Twister'):
     :func:`gaussian_copula`.
     """
     assert samples > 0
-    cov = numpy.array(cov)
+    cov = np.asarray(cov)
     assert cov.ndim == 2
     assert cov.shape[0] == cov.shape[1]
     dimensions = cov.shape[0]
 
     L = numpy.linalg.cholesky(cov)
-    if chunks:
-        chunks = normalize_chunks(chunks, shape=(samples, dimensions))
-        L = dask.array.from_array(L, chunks=(chunks[1], chunks[1]))
+    if chunks is not None:
+        chunks = cast(NormalizedChunks2D,
+                      normalize_chunks(chunks, shape=(samples, dimensions)))
+        L = da.from_array(L, chunks=(chunks[1], chunks[1]))
 
     # Pre-process df into a 1D dask array
-    df = numpy.array(df)
+    df = np.asarray(df)
     if (df <= 0).any():
         raise ValueError("df must always be greater than zero")
     if df.shape not in ((), (dimensions, )):
         raise ValueError("df must be either a scalar or a 1D vector with as "
                          "many points as the width of the correlation matrix")
-    if df.ndim == 1 and chunks:
-        df = dask.array.from_array(df, chunks=(chunks[1], ))
+    if df.ndim == 1 and chunks is not None:
+        df = da.from_array(df, chunks=(chunks[1], ))
 
     # Define chunks for the S chi-square matrix
-    if chunks:
+    chunks_r = None  # type: Optional[NormalizedChunks2D]
+    if chunks is not None:
         chunks_r = (chunks[0], (1, ))
-    else:
-        chunks_r = None
 
-    if rng == 'Mersenne Twister':
+    rng = rng.lower()
+    if rng == 'mersenne twister':
         # Use two separate random states for the normal and the chi2
         # distributions. This is NOT the same as just extracting two series
         # from the same RandomState, as we must guarantee that, if you extract
@@ -172,7 +185,7 @@ def t_copula(cov, df, samples, seed=0, chunks=None, rng='Mersenne Twister'):
                                         chunks=chunks)
         r = rnd_state_r.uniform(size=(samples, 1), chunks=chunks_r)
 
-    elif rng == 'SOBOL':
+    elif rng == 'sobol':
         seed_r = seed + dimensions
 
         y = sobol(size=(samples, dimensions), d0=seed, chunks=chunks)
@@ -182,7 +195,7 @@ def t_copula(cov, df, samples, seed=0, chunks=None, rng='Mersenne Twister'):
     else:
         raise ValueError("Unknown rng: %s" % rng)
 
-    s = duck.chi2_ppf(r, df=df)
+    s = duck.chi2_ppf(r, df)
     z = duck.sqrt(df / s) * duck.dot(L, y.T).T
     # Convert t distribution to normal (0, 1)
     u = duck.t_cdf(z, df)

diff --git a/pyscenarios/duck.py b/pyscenarios/duck.py
@@ -1,20 +1,24 @@
 """Duck-typed functions that call numpy or dask depending on the inputs
 """
+from functools import wraps
+from typing import Any, Callable, Optional, Tuple, Union
+
 import dask.array as da
 import numpy as np
 import scipy.stats
-from functools import wraps
+from .typing import Chunks2D
 
 
-def array(x):
+def array(x: Any) -> Union[np.ndarray, da.Array]:
     """Convert x to numpy array, unless it's a da.array
     """
     if isinstance(x, (np.ndarray, da.Array)):
         return x
     return np.array(x)
 
 
-def _map_blocks(func):
+def _map_blocks(func: Callable[..., np.ndarray]
+                ) -> Callable[..., Union[np.ndarray, da.Array]]:
     """Wrap an arbitrary function that takes one or more arrays in input.
     If any is a Dask Array, invoke :func:`dask.array.map_blocks`, otherwise
     apply the function directly.
@@ -27,7 +31,8 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
-def _map_blocks_df(func):
+def _map_blocks_df(func: Callable[[Any, Any], np.ndarray]
+                   ) -> Callable[[Any, Any], Union[np.ndarray, da.Array]]:
     """Specialized variant for functions with degrees of freedom - adds
     auto-chunking in case of mismatched arguments
     """
@@ -44,7 +49,7 @@ def wrapper(x, df):
     return wrapper
 
 
-def _toplevel(func_name):
+def _toplevel(func_name: str) -> Callable[..., Union[np.ndarray, da.Array]]:
     """If any of the args is a Dask Array, invoke da.func_name; else invoke
     np.func_name
     """
@@ -75,26 +80,29 @@ class RandomState:
     For each method, if chunks=None invoke the numpy version, otherwise invoke
     the dask version.
     """
-    def __init__(self, seed=None):
+    def __init__(self, seed: Optional[int] = None):
         self._dask_state = da.random.RandomState(seed)
 
     @property
-    def _numpy_state(self):
+    def _numpy_state(self) -> np.random.RandomState:
         return self._dask_state._numpy_state
 
-    def seed(self, seed=None):
+    def seed(self, seed: Optional[int] = None) -> None:
         self._dask_state.seed(seed)
 
-    def _apply(self, func_name, *args, chunks=None, **kwargs):
-        if chunks:
+    def _apply(self, func_name: str, size: Optional[Tuple[int, int]] = None,
+               chunks: Chunks2D = None):
+        if chunks is not None:
             func = getattr(self._dask_state, func_name)
-            return func(*args, **kwargs, chunks=chunks)
+            return func(size=size, chunks=chunks)
         else:
             func = getattr(self._numpy_state, func_name)
-            return func(*args, **kwargs)
+            return func(size=size)
 
-    def uniform(self, size=None, chunks=None):
+    def uniform(self, size: Optional[Tuple[int, int]] = None,
+                chunks: Chunks2D = None):
         return self._apply('uniform', size=size, chunks=chunks)
 
-    def standard_normal(self, size=None, chunks=None):
+    def standard_normal(self, size: Optional[Tuple[int, int]] = None,
+                        chunks: Chunks2D = None):
         return self._apply('standard_normal', size=size, chunks=chunks)