Merge branch 'main' into bounds_derived

HGWright · Oct 25, 2024 · 78cb7dd · 78cb7dd
2 parents b756210 + bd6f306
commit 78cb7dd
Show file tree

Hide file tree

Showing 71 changed files with 7,446 additions and 1,688 deletions.
diff --git a/.github/workflows/benchmarks_run.yml b/.github/workflows/benchmarks_run.yml
@@ -60,7 +60,7 @@ jobs:
     env:
       IRIS_TEST_DATA_LOC_PATH: benchmarks
       IRIS_TEST_DATA_PATH: benchmarks/iris-test-data
-      IRIS_TEST_DATA_VERSION: "2.22"
+      IRIS_TEST_DATA_VERSION: "2.28"
       # Lets us manually bump the cache to rebuild
       ENV_CACHE_BUILD: "0"
       TEST_DATA_CACHE_BUILD: "2"

diff --git a/.github/workflows/ci-manifest.yml b/.github/workflows/ci-manifest.yml
@@ -23,4 +23,4 @@ concurrency:
 jobs:
   manifest:
     name: "check-manifest"
-    uses: scitools/workflows/.github/workflows/ci-manifest.yml@2024.09.9
+    uses: scitools/workflows/.github/workflows/ci-manifest.yml@2024.10.2
diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
@@ -50,7 +50,8 @@ jobs:
             session: "tests"
 
     env:
-      IRIS_TEST_DATA_VERSION: "2.25"
+      # NOTE: IRIS_TEST_DATA_VERSION is also set in benchmarks_run.yml
+      IRIS_TEST_DATA_VERSION: "2.28"
       ENV_NAME: "ci-tests"
 
     steps:

diff --git a/.github/workflows/refresh-lockfiles.yml b/.github/workflows/refresh-lockfiles.yml
@@ -14,5 +14,5 @@ on:
 
 jobs:
   refresh_lockfiles:
-    uses: scitools/workflows/.github/workflows/refresh-lockfiles.yml@2024.09.9
+    uses: scitools/workflows/.github/workflows/refresh-lockfiles.yml@2024.10.2
     secrets: inherit
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,7 +13,7 @@ minimum_pre_commit_version: 1.21.0
 
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
         # Prevent giant files from being committed.
     -   id: check-added-large-files
@@ -29,7 +29,7 @@ repos:
     -   id: no-commit-to-branch
 
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.6.8"
+    rev: "v0.7.0"
     hooks:
     -   id: ruff
         types: [file, python]
@@ -51,7 +51,7 @@ repos:
         types: [file, python]
 
 -   repo: https://github.com/asottile/blacken-docs
-    rev: 1.18.0
+    rev: 1.19.0
     hooks:
     -   id: blacken-docs
         types: [file, rst]
@@ -63,7 +63,7 @@ repos:
         types: [file, python]
 
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: 'v1.11.2'
+    rev: 'v1.12.1'
     hooks:
     -   id: mypy
         additional_dependencies:

diff --git a/docs/src/common_links.inc b/docs/src/common_links.inc
@@ -79,5 +79,6 @@
 .. _@stephenworsley: https://github.com/stephenworsley
 .. _@tkknight: https://github.com/tkknight
 .. _@trexfeathers: https://github.com/trexfeathers
+.. _@ukmo-ccbunney: https://github.com/ukmo-ccbunney
 .. _@wjbenfold: https://github.com/wjbenfold
 .. _@zklaus: https://github.com/zklaus
diff --git a/docs/src/further_topics/controlling_merge.rst b/docs/src/further_topics/controlling_merge.rst
@@ -0,0 +1,222 @@
+.. _controlling_merge:
+
+=================================
+Controlling Merge and Concatenate
+=================================
+
+Preliminaries
+-------------
+
+The following code would have been necessary with loading behaviour prior to version 3.11.0 . For the sake of
+demonstration, we will revert back to this legacy loading behaviour as follows:
+
+    >>> iris.LOAD_POLICY.set("legacy")
+
+.. note::
+    The default settings for :data:`iris.LOAD_POLICY` effectively implements some version of the following demonstration
+    automatically upon loading. It may still be worth being aware of how to handle this manually if an even finer degree
+    of control is required.
+
+How to Merge Cubes When Coordinates Differ
+------------------------------------------
+
+Sometimes it is not possible to appropriately combine a CubeList using merge and concatenate on their own. In such cases
+it is possible to achieve much more control over cube combination by using the :func:`~iris.util.new_axis` utility.
+Consider the following set of cubes:
+
+    >>> file_1 = iris.sample_data_path("time_varying_hybrid_height", "*_2160-12.pp")
+    >>> file_2 = iris.sample_data_path("time_varying_hybrid_height", "*_2161-01.pp")
+    >>> cubes = iris.load([file_1, file_2], "x_wind")
+    >>> print(cubes[0])
+    x_wind / (m s-1)                    (model_level_number: 5; latitude: 144; longitude: 192)
+        Dimension coordinates:
+            model_level_number                             x            -               -
+            latitude                                       -            x               -
+            longitude                                      -            -               x
+        Auxiliary coordinates:
+            level_height                                   x            -               -
+            sigma                                          x            -               -
+            surface_altitude                               -            x               x
+        Derived coordinates:
+            altitude                                       x            x               x
+        Scalar coordinates:
+            forecast_period             1338840.0 hours, bound=(1338480.0, 1339200.0) hours
+            forecast_reference_time     2006-01-01 00:00:00
+            time                        2160-12-16 00:00:00, bound=(2160-12-01 00:00:00, 2161-01-01 00:00:00)
+        Cell methods:
+            0                           time: mean (interval: 1 hour)
+        Attributes:
+            STASH                       m01s00i002
+            source                      'Data from Met Office Unified Model'
+            um_version                  '12.1'
+    >>> print(cubes[1])
+    x_wind / (m s-1)                    (model_level_number: 5; latitude: 144; longitude: 192)
+        Dimension coordinates:
+            model_level_number                             x            -               -
+            latitude                                       -            x               -
+            longitude                                      -            -               x
+        Auxiliary coordinates:
+            level_height                                   x            -               -
+            sigma                                          x            -               -
+            surface_altitude                               -            x               x
+        Derived coordinates:
+            altitude                                       x            x               x
+        Scalar coordinates:
+            forecast_period             1339560.0 hours, bound=(1339200.0, 1339920.0) hours
+            forecast_reference_time     2006-01-01 00:00:00
+            time                        2161-01-16 00:00:00, bound=(2161-01-01 00:00:00, 2161-02-01 00:00:00)
+        Cell methods:
+            0                           time: mean (interval: 1 hour)
+        Attributes:
+            STASH                       m01s00i002
+            source                      'Data from Met Office Unified Model'
+            um_version                  '12.1'
+
+These two cubes have different time points (i.e. scalar time value).  So we would normally be able to merge them,
+creating a time dimension.  However, in this case we can not combine them with :meth:`~iris.cube.Cube.merge`
+due to the fact that their ``surface_altitude`` coordinate also varies over time:
+
+    >>> cubes.merge_cube()
+    Traceback (most recent call last):
+    ...
+    iris.exceptions.MergeError: failed to merge into a single cube.
+      Coordinates in cube.aux_coords (non-scalar) differ: surface_altitude.
+
+Since surface altitude is preventing merging, we want to find a way of combining these cubes while also *explicitly*
+combining the ``surface_altitude`` coordinate so that it also varies along the time dimension. We can do this by first
+adding a dimension to the cube *and* the ``surface_altitude`` coordinate using :func:`~iris.util.new_axis`, and then
+concatenating those cubes together. We can attempt this as follows:
+
+    >>> from iris.util import new_axis
+    >>> from iris.cube import CubeList
+    >>> processed_cubes = CubeList([new_axis(cube, scalar_coord="time", expand_extras=["surface_altitude"]) for cube in cubes])
+    >>> processed_cubes.concatenate_cube()
+    Traceback (most recent call last):
+    ...
+    iris.exceptions.ConcatenateError: failed to concatenate into a single cube.
+      Scalar coordinates values or metadata differ: forecast_period != forecast_period
+
+This error alerts us to the fact that the ``forecast_period`` coordinate is also varying over time. To get concatenation
+to work, we will have to expand the dimensions of this coordinate to include "time", by passing it also to the
+``expand_extras`` keyword.
+
+    >>> processed_cubes = CubeList(
+    ... [new_axis(cube, scalar_coord="time", expand_extras=["surface_altitude", "forecast_period"]) for cube in cubes]
+    ... )
+    >>> result = processed_cubes.concatenate_cube()
+    >>> print(result)
+    x_wind / (m s-1)                    (time: 2; model_level_number: 5; latitude: 144; longitude: 192)
+        Dimension coordinates:
+            time                             x                      -            -               -
+            model_level_number               -                      x            -               -
+            latitude                         -                      -            x               -
+            longitude                        -                      -            -               x
+        Auxiliary coordinates:
+            forecast_period                  x                      -            -               -
+            surface_altitude                 x                      -            x               x
+            level_height                     -                      x            -               -
+            sigma                            -                      x            -               -
+        Derived coordinates:
+            altitude                         x                      x            x               x
+        Scalar coordinates:
+            forecast_reference_time     2006-01-01 00:00:00
+        Cell methods:
+            0                           time: mean (interval: 1 hour)
+        Attributes:
+            STASH                       m01s00i002
+            source                      'Data from Met Office Unified Model'
+            um_version                  '12.1'
+
+.. note::
+    Since the derived coordinate ``altitude`` derives from ``surface_altitude``, adding ``time`` to the dimensions of
+    ``surface_altitude`` also means it is added to the dimensions of ``altitude``. So in the combined cube, both of
+    these coordinates vary along the ``time`` dimension.
+
+Controlling over multiple dimensions
+------------------------------------
+
+We now consider a more complex case. Instead of loading 2 files across different time steps we now load 15 such files.
+Each of these files covers a month's time step, however, the ``surface_altitude`` coordinate changes only once per year.
+The files span 3 years so there are 3 different ``surface_altitude`` coordinates.
+
+    >>> filename = iris.sample_data_path('time_varying_hybrid_height', '*.pp')
+    >>> cubes = iris.load(filename, constraints="x_wind")
+    >>> print(cubes)
+    0: x_wind / (m s-1)                    (time: 2; model_level_number: 5; latitude: 144; longitude: 192)
+    1: x_wind / (m s-1)                    (time: 12; model_level_number: 5; latitude: 144; longitude: 192)
+    2: x_wind / (m s-1)                    (model_level_number: 5; latitude: 144; longitude: 192)
+
+When :func:`iris.load` attempts to merge these cubes, it creates a cube for every unique ``surface_altitude`` coordinate.
+Note that since there is only one time point associated with the last cube, the "time" coordinate has not been promoted
+to a dimension. The ``surface_altitude`` in each of the above cubes is 2D, however, since some of these coordinates
+already have a time dimension, it is not possible to use :func:`~iris.util.new_axis` as above to promote
+``surface_altitude`` as we have done above.
+
+In order to fully control the merge process we instead use :func:`iris.load_raw`:
+
+    >>> raw_cubes = iris.load_raw(filename, constraints="x_wind")
+    >>> print(raw_cubes)
+    0: x_wind / (m s-1)                    (latitude: 144; longitude: 192)
+    1: x_wind / (m s-1)                    (latitude: 144; longitude: 192)
+    ...
+    73: x_wind / (m s-1)                    (latitude: 144; longitude: 192)
+    74: x_wind / (m s-1)                    (latitude: 144; longitude: 192)
+
+The raw cubes also separate cubes along the ``model_level_number`` dimension. In this instance, we will need to
+merge/concatenate along two different dimensions. Specifically, we can merge by promoting the ``model_level_number`` to
+a dimension, since ``surface_altitude`` does  not vary along this dimension, and we can concatenate along the ``time``
+dimension as before. We expand the ``time`` dimension first, as before:
+
+    >>> processed_raw_cubes = CubeList(
+    ... [new_axis(cube, scalar_coord="time", expand_extras=["surface_altitude", "forecast_period"]) for cube in raw_cubes]
+    ... )
+    >>> print(processed_raw_cubes)
+    0: x_wind / (m s-1)                    (time: 1; latitude: 144; longitude: 192)
+    1: x_wind / (m s-1)                    (time: 1; latitude: 144; longitude: 192)
+    ...
+    73: x_wind / (m s-1)                    (time: 1; latitude: 144; longitude: 192)
+    74: x_wind / (m s-1)                    (time: 1; latitude: 144; longitude: 192)
+
+Then we merge, promoting the different ``model_level_number`` scalar coordinates to a dimension coordinate.
+Note, however, that merging these cubes does *not* affect the ``time`` dimension, since merging only
+applies to scalar coordinates, not dimension coordinates of length 1.
+
+    >>> merged_cubes = processed_raw_cubes.merge()
+    >>> print(merged_cubes)
+    0: x_wind / (m s-1)                    (model_level_number: 5; time: 1; latitude: 144; longitude: 192)
+    1: x_wind / (m s-1)                    (model_level_number: 5; time: 1; latitude: 144; longitude: 192)
+    ...
+    13: x_wind / (m s-1)                    (model_level_number: 5; time: 1; latitude: 144; longitude: 192)
+    14: x_wind / (m s-1)                    (model_level_number: 5; time: 1; latitude: 144; longitude: 192)
+
+Once merged, we can now concatenate all these cubes into a single result cube, which is what we wanted:
+
+    >>> result = merged_cubes.concatenate_cube()
+    >>> print(result)
+    x_wind / (m s-1)                    (model_level_number: 5; time: 15; latitude: 144; longitude: 192)
+        Dimension coordinates:
+            model_level_number                             x        -             -               -
+            time                                           -        x             -               -
+            latitude                                       -        -             x               -
+            longitude                                      -        -             -               x
+        Auxiliary coordinates:
+            level_height                                   x        -             -               -
+            sigma                                          x        -             -               -
+            forecast_period                                -        x             -               -
+            surface_altitude                               -        x             x               x
+        Derived coordinates:
+            altitude                                       x        x             x               x
+        Scalar coordinates:
+            forecast_reference_time     2006-01-01 00:00:00
+        Cell methods:
+            0                           time: mean (interval: 1 hour)
+        Attributes:
+            STASH                       m01s00i002
+            source                      'Data from Met Office Unified Model'
+            um_version                  '12.1'
+
+See Also
+--------
+* :data:`iris.LOAD_POLICY` can be controlled to apply similar operations
+  within the load functions, i.e. :func:`~iris.load`, :func:`~iris.load_cube` and
+  :func:`~iris.load_cubes`.
diff --git a/docs/src/further_topics/index.rst b/docs/src/further_topics/index.rst
@@ -18,4 +18,5 @@ Extra information on specific technical issues.
    netcdf_io
    dask_best_practices/index
    ugrid/index
-   which_regridder_to_use
+   which_regridder_to_use
+   controlling_merge
diff --git a/docs/src/further_topics/metadata.rst b/docs/src/further_topics/metadata.rst
@@ -403,10 +403,10 @@ instances. Normally, this would cause issues. For example,
 
     >>> simply = {"one": np.int32(1), "two": np.array([1.0, 2.0])}
     >>> simply
-    {'one': 1, 'two': array([1., 2.])}
+    {'one': np.int32(1), 'two': array([1., 2.])}
     >>> fruity = {"one": np.int32(1), "two": np.array([1.0, 2.0])}
     >>> fruity
-    {'one': 1, 'two': array([1., 2.])}
+    {'one': np.int32(1), 'two': array([1., 2.])}
     >>> simply == fruity
     Traceback (most recent call last):
     ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
@@ -418,9 +418,9 @@ However, metadata class equality is rich enough to handle this eventuality,
     >>> metadata1 = cube.metadata._replace(attributes=simply)
     >>> metadata2 = cube.metadata._replace(attributes=fruity)
     >>> metadata1
-    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': 1, 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
+    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': np.int32(1), 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
     >>> metadata2
-    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': 1, 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
+    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': np.int32(1), 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
 
 .. doctest:: richer-metadata
 
@@ -430,10 +430,10 @@ However, metadata class equality is rich enough to handle this eventuality,
 .. doctest:: richer-metadata
 
     >>> metadata1
-    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': 1, 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
+    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': np.int32(1), 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
     >>> metadata2 = cube.metadata._replace(attributes={"one": np.int32(1), "two": np.array([1000.0, 2000.0])})
     >>> metadata2
-    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': 1, 'two': array([1000., 2000.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
+    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': np.int32(1), 'two': array([1000., 2000.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
     >>> metadata1 == metadata2
     False