Source code for sympl._core.util

from datetime import datetime

import numpy as np
from six import string_types

from .array import DataArray
from .exceptions import (
    SharedKeyError, InvalidStateError, InvalidPropertyDictError)

try:
    from numba import jit
except ImportError:
    # define a function with the same call signature as jit that does nothing
    def jit(signature_or_function=None, **kwargs):
        if signature_or_function is None:
            return lambda x: x
        else:
            return signature_or_function

dim_names = {'x': ['x'], 'y': ['y'], 'z': ['z']}

# internal exceptions used only within this module


class NoMatchForDirectionError(Exception):
    pass


class DimensionNotInOutDimsError(ValueError):
    pass


class ShapeMismatchError(Exception):
    pass


def ensure_consistent_dimension_lengths(state):
    dimension_lengths = {}
    for name, array in state.items():
        if isinstance(array, DataArray):
            for i, name in enumerate(array.dims):
                if name in dimension_lengths and (array.shape[i] != dimension_lengths[name]):
                    raise InvalidStateError(
                        'dimension {} has multiple lengths (at least {} and {})'.format(
                            name, array.shape[i], dimension_lengths[name]))
                else:
                    dimension_lengths[name] = array.shape[i]


def ensure_properties_have_dims_and_units(properties, quantity_name):
    if 'dims' not in properties:
        raise InvalidPropertyDictError(
            'dims not specified for quantity {}'.format(quantity_name))
    if 'units' not in properties:
        raise InvalidPropertyDictError(
            'units not specified for quantity {}'.format(quantity_name))


def ensure_quantity_has_units(quantity, quantity_name):
    if 'units' not in quantity.attrs:
        raise InvalidStateError(
            'quantity {} is missing units attribute'.format(quantity_name))


def independent_wildcards_first(item_list):
    """
    Sorts the items in a (quantity_name, properties) item list so that
    quantities that match_dims_like other quantities are placed *after* those
    quantities in the list. In the case that two quantities match_dims_like
    one another, a InvalidPropertyDictError is raised.

    This is necessary so that information about the wildcard matches from
    previous calls can be given to later calls to ensure wildcard-matched
    dimensions are ordered the same.
    """
    name_list = []
    properties_list = []
    matched_by_dict = {}
    for quantity_name, properties in item_list:
        if 'match_dims_like' in properties:
            target_name = properties['match_dims_like']
            if target_name in matched_by_dict:
                matched_by_dict[target_name].append(quantity_name)
            else:
                matched_by_dict[target_name] = [quantity_name]
    for quantity_name, properties in item_list:
        if quantity_name not in matched_by_dict:
            name_list.append(quantity_name)
            properties_list.append(properties)
        else:
            matched_by_indices = []
            for matched_by_name in matched_by_dict[quantity_name]:
                if matched_by_name in name_list:
                    matched_by_indices.append(name_list.index(matched_by_name))
            if len(matched_by_indices) == 0:
                name_list.append(quantity_name)
                properties_list.append(properties)
            else:
                index = min(matched_by_indices)
                name_list.insert(index, quantity_name)
                properties_list.insert(index, properties)
    return zip(name_list, properties_list)


[docs]def get_numpy_arrays_with_properties(state, property_dictionary): """ Parameters ---------- state : dict A state dictionary. property_dictionary : dict A dictionary whose keys are quantity names and values are dictionaries with properties for those quantities. The property "dims" must be present, indicating the dimensions that the quantity must have when it is returned as a numpy array. The property "units" must be present, and will be used to check the units on the input state and perform a conversion if necessary. If the optional property "match_dims_like" is present, its value should be a quantity also present in property_dictionary, and it will be ensured that any shared wildcard dimensions ('x', 'y', 'z', '*') for this quantity match the same dimensions as the specified quantity. Returns ------- out_dict : dict A dictionary whose keys are quantity names and values are numpy arrays containing the data for those quantities, as specified by property_dictionary. Raises ------ InvalidStateError If a DataArray in the state is missing an explicitly-specified dimension defined in its properties (dimension names other than 'x', 'y', 'z', or '*'), or if the state is missing a required quantity. InvalidPropertyError If a quantity in property_dictionary is missing values for "dims" or "units". """ ensure_consistent_dimension_lengths(state) out_dict = {} matches = {} for quantity_name, properties in independent_wildcards_first(property_dictionary.items()): ensure_properties_have_dims_and_units(properties, quantity_name) if quantity_name not in state.keys(): raise InvalidStateError( 'state is missing quantity {}'.format(quantity_name)) ensure_quantity_has_units(state[quantity_name], quantity_name) quantity_has_alias = 'alias' in properties.keys() if quantity_has_alias: out_name = properties['alias'] else: out_name = quantity_name if out_name in out_dict.keys(): raise InvalidPropertyDictError( 'Multiple arrays with output name {}'.format(out_name)) try: quantity_array = state[quantity_name].to_units(properties['units']) except ValueError: raise ValueError( 'Invalid target units {} for quantity {} ' 'with units {}'.format( properties['units'], quantity_name, state[quantity_name].attrs['units'])) try: if ('match_dims_like' in properties.keys() and properties['match_dims_like'] in matches): out_dict[out_name], matches[quantity_name] = get_numpy_array( quantity_array, out_dims=properties['dims'], return_wildcard_matches=True, require_wildcard_matches=matches[properties['match_dims_like']]) else: out_dict[out_name], matches[quantity_name] = get_numpy_array( quantity_array, out_dims=properties['dims'], return_wildcard_matches=True) except NoMatchForDirectionError as err: raise InvalidStateError( 'dimension {} is missing from quantity {}'.format( err, quantity_name) ) except DimensionNotInOutDimsError as err: raise InvalidStateError( 'dims property {} on quantity {} does not allow for state' 'quantity to have dimension {} (but it does)'.format( properties['dims'], quantity_name, err) ) ensure_dims_like_are_satisfied(matches, property_dictionary) return out_dict
def get_numpy_array( data_array, out_dims, return_wildcard_matches=False, require_wildcard_matches=None): """ Retrieve a numpy array with the desired dimensions and dimension order from the given DataArray, using transpose and creating length 1 dimensions as necessary. Args ---- data_array : DataArray The object from which to retrieve data. out_dims : list of str The desired dimensions of the output and their order. Length 1 dimensions will be created if the dimension is 'x', 'y', 'z', or '*' and does not exist in data_array. 'x', 'y', and 'z' indicate any axes registered to those directions with :py:function:`~sympl.set_direction_names`. '*' indicates an axis which is the flattened collection of all dimensions not explicitly listed in out_dims, including any dimensions with unknown direction. return_wildcard_matches : bool, optional If True, will additionally return a dictionary whose keys are direciton wildcards ('x', 'y', 'z', or '*') and values are lists of matched dimensions in the order they appear. require_wildcard_matches : dict, optional A dictionary mapping wildcards to matches. If the wildcard is used in out_dims, ensures that it matches the quantities present in this dictionary, in the same order. Returns ------- numpy_array : ndarray The desired array, with dimensions in the correct order and length 1 dimensions created as needed. Raises ------ ValueError If out_dims has values that are incompatible with the dimensions in data_array, or data_array's dimensions are invalid in some way. """ if (len(data_array.values.shape) == 0) and (len(out_dims) == 0): direction_to_names = {} # required in case we need wildcard_matches return_array = data_array.values # special case, 0-dimensional scalar array else: current_dim_names = dim_names.copy() for dim in out_dims: if dim not in ('x', 'y', 'z', '*'): current_dim_names[dim] = [dim] direction_to_names = get_input_array_dim_names( data_array, out_dims, current_dim_names) if require_wildcard_matches is not None: for direction in out_dims: if (direction in require_wildcard_matches and same_list(direction_to_names[direction], require_wildcard_matches[direction])): direction_to_names[direction] = require_wildcard_matches[ direction] else: # we could raise an exception here, because this is # inconsistent, but that exception is already raised # elsewhere when ensure_dims_like_are_satisfied is called pass target_dimension_order = get_target_dimension_order( out_dims, direction_to_names) for dim in data_array.dims: if dim not in target_dimension_order: raise DimensionNotInOutDimsError(dim) slices_or_none = get_slices_and_placeholder_nones( data_array, out_dims, direction_to_names) final_shape = get_final_shape(data_array, out_dims, direction_to_names) return_array = np.reshape(data_array.transpose( *target_dimension_order).values[slices_or_none], final_shape) if return_wildcard_matches: wildcard_matches = { key: value for key, value in direction_to_names.items() if key in ('x', 'y', 'z', '*')} return return_array, wildcard_matches else: return return_array def ensure_dims_like_are_satisfied(matches, property_dictionary): for quantity_name, properties in property_dictionary.items(): if 'match_dims_like' in properties: if properties['match_dims_like'] not in property_dictionary.keys(): raise InvalidPropertyDictError( 'quantity {} is not specified in property dictionary, ' 'but is referred to by {} in match_dims_like'.format( properties['match_dims_like'], quantity_name )) like_name = properties['match_dims_like'] for wildcard_dim in set(matches[quantity_name].keys()).intersection( matches[like_name].keys()): # We must use == because we need the dim order to be the same if not same_list( matches[quantity_name][wildcard_dim], matches[like_name][wildcard_dim]): raise InvalidStateError( 'quantity {} matches dimensions {} for direction {}, but ' 'is referred to in match_dims_like by quantity {} with matches ' '{}'.format( like_name, matches[like_name][wildcard_dim], wildcard_dim, quantity_name, matches[quantity_name][wildcard_dim]))
[docs]def restore_data_arrays_with_properties( raw_arrays, output_properties, input_state, input_properties): """ Parameters ---------- raw_arrays : dict A dictionary whose keys are quantity names and values are numpy arrays containing the data for those quantities. output_properties : dict A dictionary whose keys are quantity names and values are dictionaries with properties for those quantities. The property "dims_like" must be present, and specifies an input quantity that the dimensions of the output quantity should be like. All other properties are included as attributes on the output DataArray for that quantity, including "units" which is required. input_state : dict A state dictionary that was used as input to a component for which DataArrays are being restored. input_properties : dict A dictionary whose keys are quantity names and values are dictionaries with input properties for those quantities. The property "dims" must be present, indicating the dimensions that the quantity was transformed to when taken as input to a component. Returns ------- out_dict : dict A dictionary whose keys are quantities and values are DataArrays corresponding to those quantities, with data, shapes and attributes determined from the inputs to this function. Raises ------ InvalidPropertyDictError When an output property is specified to have dims_like an input property, but the arrays for the two properties have incompatible shapes. """ ensure_consistent_dimension_lengths(input_state) out_dict = {} for quantity_name, properties in output_properties.items(): attrs = properties.copy() dims_like = attrs.pop('dims_like', quantity_name) if (quantity_name not in raw_arrays.keys()) and ('alias' in properties): from_name = attrs.pop('alias') elif quantity_name in input_properties.keys() and 'alias' in input_properties[quantity_name].keys(): from_name = input_properties[quantity_name]['alias'] else: from_name = quantity_name if from_name not in raw_arrays.keys(): raise ValueError( 'requested output {} is not present in raw_arrays'.format( from_name)) array = raw_arrays[from_name] from_dims = input_properties[dims_like]['dims'] result_like = input_state[dims_like] try: out_dict[quantity_name] = restore_dimensions( array, from_dims=from_dims, result_like=result_like, result_attrs=attrs) except ShapeMismatchError: raise InvalidPropertyDictError( 'output quantity {} has dims_like input {}, but the ' 'provided output array for {} has ' 'a shape {} incompatible with the input shape {} of {}. ' 'Do they really have the same dimensions?'.format( quantity_name, dims_like, quantity_name, array.shape, result_like.shape, dims_like ) ) return out_dict
def restore_dimensions(array, from_dims, result_like, result_attrs=None): """ Restores a numpy array to a DataArray with similar dimensions to a reference Data Array. This is meant to be the reverse of get_numpy_array. Parameters ---------- array : ndarray The numpy array from which to create a DataArray from_dims : list of str The directions describing the numpy array. If being used to reverse a call to get_numpy_array, this should be the same as the out_dims argument used in the call to get_numpy_array. 'x', 'y', and 'z' indicate any axes registered to those directions with :py:function:`~sympl.set_direction_names`. '*' indicates an axis which is the flattened collection of all dimensions not explicitly listed in out_dims, including any dimensions with unknown direction. result_like : DataArray A reference array with the desired output dimensions of the DataArray. If being used to reverse a call to get_numpy_array, this should be the same as the data_array argument used in the call to get_numpy_array. result_attrs : dict, optional A dictionary with the desired attributes of the output DataArray. If not given, no attributes will be set. Returns ------- data_array : DataArray The output DataArray with the same dimensions as the reference DataArray. See Also -------- :py:function:~sympl.get_numpy_array: : Retrieves a numpy array with desired dimensions from a given DataArray. """ current_dim_names = dim_names.copy() for dim in from_dims: if dim not in ('x', 'y', 'z', '*'): current_dim_names[dim] = [dim] direction_to_names = get_input_array_dim_names( result_like, from_dims, current_dim_names) original_shape = [] original_dims = [] original_coords = [] for direction in from_dims: if direction in direction_to_names.keys(): for name in direction_to_names[direction]: original_shape.append(len(result_like.coords[name])) original_dims.append(name) original_coords.append(result_like.coords[name]) if np.product(array.shape) != np.product(original_shape): raise ShapeMismatchError data_array = DataArray( np.reshape(array, original_shape), dims=original_dims, coords=original_coords).transpose( *list(result_like.dims)) if result_attrs is not None: data_array.attrs = result_attrs return data_array def datetime64_to_datetime(dt64): ts = (dt64 - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's') return datetime.utcfromtimestamp(ts) def same_list(list1, list2): """Returns a boolean indicating whether the items in list1 are the same items present in list2 (ignoring order).""" return (len(list1) == len(list2) and all( [item in list2 for item in list1] + [item in list1 for item in list2])) def set_direction_names(x=None, y=None, z=None): """ Sets the directional wildcards 'x', 'y', and 'z' to match the provided dimension names only. """ for key, value in [('x', x), ('y', y), ('z', z)]: if isinstance(value, string_types): dim_names[key] = [key, value] elif value is not None: dim_names[key] = [key] + list(value) def add_direction_names(x=None, y=None, z=None): """ Sets the directional wildcards 'x', 'y', and 'z' to match the provided dimension names, in addition to any names they are already matching. """ for key, value in [('x', x), ('y', y), ('z', z)]: if isinstance(value, string_types): dim_names[key].append(value) elif value is not None: dim_names[key].extend(value) def combine_dimensions(arrays, out_dims): """ Returns a tuple of dimension names corresponding to dimension names from the DataArray objects given by *args when present. The names returned correspond to the directions in out_dims. Args ---- arrays : iterable of DataArray Objects from which to deduce dimension names. out_dims : {'x', 'y', 'z'} The desired output directions. Should contain only 'x', 'y', or 'z'. For example, ('y', 'x') is valid. Raises ------ ValueError If there are multiple names for a single direction, or if an array has a dimension along a direction not present in out_dims. Returns ------- dimensions : list of str The deduced dimension names, in the order given by out_dims. """ _ensure_no_invalid_directions(out_dims) out_names = [None for _ in range(len(out_dims))] all_names = set() for value in arrays: all_names.update(value.dims) for direction, dir_names in dim_names.items(): if direction in out_dims: names = all_names.intersection(dir_names) if len(names) > 1: raise ValueError( 'Multiple dimensions along {} direction'.format(direction)) elif len(names) == 1: out_names[out_dims.index(direction)] = names.pop() else: out_names[out_dims.index(direction)] = direction elif len(all_names.intersection(dir_names)) > 0: raise ValueError( 'Arrays have dimensions along {} direction, which is ' 'not included in output'.format(direction)) return out_names def _ensure_no_invalid_directions(out_dims): invalid_dims = set(out_dims).difference(['x', 'y', 'z']) if len(invalid_dims) != 0: raise ValueError( 'Invalid direction(s) in out_dims: {}'.format(invalid_dims)) def update_dict_by_adding_another(dict1, dict2): """ Takes two dictionaries. Add values in dict2 to the values in dict1, if present. If not present, create a new value in dict1 equal to the value in dict2. Addition is done in-place if the values are array-like, to avoid data copying. Units are handled if the values are DataArrays with a 'units' attribute. """ for key in dict2.keys(): if key not in dict1: dict1[key] = dict2[key] else: if (isinstance(dict1[key], DataArray) and isinstance(dict2[key], DataArray) and ('units' in dict1[key].attrs) and ('units' in dict2[key].attrs)): dict1[key] += dict2[key].to_units(dict1[key].attrs['units']) else: dict1[key] += dict2[key] # += is in-place addition operator return # not returning anything emphasizes that this is in-place def ensure_no_shared_keys(dict1, dict2): """ Raises SharedKeyError if there exists a key present in both dictionaries. """ shared_keys = set(dict1.keys()).intersection(dict2.keys()) if len(shared_keys) > 0: raise SharedKeyError( 'unexpected shared keys: {}'.format(shared_keys)) def get_input_array_dim_names(data_array, out_dims, dim_names): """ Parameters ---------- data_array : DataArray out_dims : iterable directions in dim_names that should be included in the output, in the order they should be included dim_names : dict a mapping from directions to dimension names that fall under that direction wildcard. Returns ------- input_array_dim_names : dict A mapping from directions included in out_dims to the directions present in data_array that correspond to those directions """ input_array_dim_names = {} for direction in out_dims: if direction != '*': matching_dims = set( data_array.dims).intersection(dim_names[direction]) # must ensure matching dims are in right order input_array_dim_names[direction] = [] for dim in data_array.dims: if dim in matching_dims: input_array_dim_names[direction].append(dim) if (direction not in ('x', 'y', 'z', '*') and len(input_array_dim_names[direction]) == 0): raise NoMatchForDirectionError(direction) if '*' in out_dims: matching_dims = set( data_array.dims).difference(set.union(set([]), *input_array_dim_names.values())) input_array_dim_names['*'] = [] for dim in data_array.dims: if dim in matching_dims: input_array_dim_names['*'].append(dim) return input_array_dim_names def get_target_dimension_order(out_dims, direction_to_names): """ Takes in an iterable of directions ('x', 'y', 'z', or '*') and a dictionary mapping those directions to a list of names corresponding to those directions. Returns a list of names in the same order as in out_dims, preserving the order within direction_to_names for each direction. """ target_dimension_order = [] for direction in out_dims: target_dimension_order.extend(direction_to_names[direction]) return target_dimension_order def get_slices_and_placeholder_nones(data_array, out_dims, direction_to_names): """ Takes in a DataArray, a desired ordering of output directions, and a dictionary mapping those directions to a list of names corresponding to those directions. Returns a list with the same ordering as out_dims that contains slices for out_dims that have corresponding names (as many slices as names, and spanning the entire dimension named), and None for out_dims without corresponding names. This returned list can be used to create length-1 axes for the dimensions that currently have no corresponding names in data_array. """ slices_or_none = [] for direction in out_dims: if len(direction_to_names[direction]) == 0: slices_or_none.append(None) elif (direction is not '*') and (len(direction_to_names[direction]) > 1): raise ValueError( 'DataArray has multiple dimensions for a single direction') else: for name in direction_to_names[direction]: slices_or_none.append(slice(0, len(data_array.coords[name]))) return slices_or_none def get_final_shape(data_array, out_dims, direction_to_names): """ Determine the final shape that data_array must be reshaped to in order to have one axis for each of the out_dims (for instance, combining all axes collected by the '*' direction). """ final_shape = [] for direction in out_dims: if len(direction_to_names[direction]) == 0: final_shape.append(1) else: # determine shape once dimensions for direction (usually '*') are combined final_shape.append( np.product([len(data_array.coords[name]) for name in direction_to_names[direction]])) return final_shape def get_component_aliases(*args): """ Returns aliases for variables in the properties of Components (e.g., Prognostics). Notes ----- - If a variable shows up in the input_properties or diagnostic_properties of two or more different Components, make sure they have the same 'alias' keyword in all Components. Args ---- *args : Component Components from which to fetch variable aliases from the input_properties, output_properties, diagnostic_properties, and tendency_properties dictionaries Returns ------- aliases : dict A dictionary with keys containing old variable names and values containing new variable names """ aliases = {} # Update the aliases dict with the properties in each provided Component for component in args: # combine the input, output, diagnostic, and tendency variables into one dict for prop_type in ['input_properties', 'output_properties', 'diagnostic_properties', 'tendency_properties']: if hasattr(component, prop_type): component_properties = getattr(component, prop_type) # save the alias (if there is one) for each variable for varname, properties in component_properties.items(): if 'alias' in properties.keys(): aliases.update({varname: properties['alias']}) return aliases