import matplotlib.pyplot as plt import numpy as np import iris import iris.analysis import iris.quickplot as qplt print('Iris version: {}'.format(iris.__version__)) obs_filepath = './example_data/EN3_v2a_Profiles_195001.nc' def ensure_full_mask(array): """Return MaskedArray version of data with a full mask array.""" full_mask_array = np.ma.getmaskarray(array) return np.ma.MaskedArray(array, mask=full_mask_array) def load_en3_variable(var_name): """ Load a named data variable from the EN3 file as an Iris cube with a masked data array. """ # Load cube from nc variable cube = iris.load_cube(obs_filepath, var_name) # Force data to be a masked array, with a fully expanded mask array cube.data = ensure_full_mask(cube.data) # Also infer a data mask if there is a '_fillvalue' attribute. This # _ought_ to be automatic with netCDF4-python, but some EN3 files seem to # have a mis-spelling here (should be '_FillValue', with capitals). if '_fillvalue' in cube.attributes: mdi = cube.attributes['_fillvalue'] cube.data.mask |= cube.data == mdi cube.data.set_fill_value(mdi) return cube def load_en3_with_quality_mask(main_var_name, qc_var_name, qc_max_valid=9): """ Load a named data variable from the EN3 file as an Iris cube. Generates a masked data cube, in which data is also masked where the related QC value exceeds a given threshold. Args: * main_var_name (string): the data variable long_name within the netCDF file. * qc_var_name (string): name of the related QC variable Kwargs: * qc_max_valid (int): Threshold value for QC data. QC values larger than this result in the datapoint being masked. """ # Load main and QC data values cube = load_en3_variable(main_var_name) qc_data = load_en3_variable(qc_var_name).data # Missing QC data equates to okay qc_data.set_fill_value(qc_max_valid) qc_data = qc_data.filled() # Convert to numbers qc_data = np.array(qc_data, dtype=int) # data QC figure means invalid if *too large* cube.data.mask |= qc_data > qc_max_valid return cube # Get the main data (potential temperatures), applying quality levels potm = load_en3_with_quality_mask('corrected pot. temp', 'quality on pot. temperature', qc_max_valid=2) # Get depth and location info depth = load_en3_with_quality_mask('corrected depth', 'quality on depth') longitude = load_en3_with_quality_mask( 'longitude of the station, best estimated value', 'quality on position (latitude and longitude)', qc_max_valid=1) latitude = load_en3_with_quality_mask( 'latitude of the station, best estimated value', 'quality on position (latitude and longitude)', qc_max_valid=1) # Get time reference + convert to units string reftime = iris.load_cube(obs_filepath, 'date of reference for julian days') reftime_str = ''.join(reftime.data) assert len(reftime_str) == 14 ref_unit_str = 'days since {:4s}-{:2s}-{:2s} {:2s}:{:2s}:{:2s}'.format( reftime_str[:4], reftime_str[4:6], reftime_str[6:8], reftime_str[8:10], reftime_str[10:12], reftime_str[12:14]) # Get time data time = load_en3_with_quality_mask( 'julian day (utc) of the location relative to reference_date_time', 'quality on date and time') time.units = ref_unit_str # Show some results print '\npotm cube:\n', potm print '\ndepth cube:\n', depth print '\nlongitude cube:\n', longitude print '\ntime cube:\n', time z_min, z_max, dz = 0.0, 500.0, 50.0 y_min, y_max, dy = -90.0, 90.0, 2.0 x_min, x_max, dx = -180.0, 180.0, 3.0 nz = int((z_max - z_min) / dz) ny = int((y_max - y_min) / dy) nx = int((x_max - x_min) / dx) def aggregate_to_grid(phenom, aggregator=iris.analysis.MEAN, **agg_kwargs): # Regrid a data cube onto the grid previously specified (by 'nx', 'dx', # 'x_min', 'x_max' and the equivalents in y and z). # # For this, each datapoint is assigned to a gridcell 'box' according to its # corresponding coordinate values (depth, longitude, latitude). # # N.B. agg_kwargs is passed to the aggregator operation. # Make a 1-D 'flattened' version of the source data cube # (as 1D coordinates are required by the 'aggregate_by' operation) cube_flat = iris.cube.Cube(phenom.data.flat[...]) cube_flat.metadata = phenom.metadata # Make coordinates containing gridcell indices for each grid coord value def add_index_coord(coord_points, coord_name, start, step): # Calculate gridcell indices of all the points. cell_indices = np.floor((coord_points - start) / step) # Make these all integers, for eventual use as array indices. cell_indices = np.array(cell_indices, dtype=int) # Add these as a "categorical coord" to aggregate by. cube_flat.add_aux_coord( iris.coords.AuxCoord(cell_indices, long_name=coord_name), 0) # Add a coordinate containing gridcell indices in the z-dimension (depth). add_index_coord(depth.data.flat[:], 'i_depth', z_min, dz) # Repeat for lats and lons -- except these need broadcasting to 2d first lons_2d, _ = np.broadcast_arrays(longitude.data[:, None], phenom.data) lats_2d, _ = np.broadcast_arrays(latitude.data[:, None], phenom.data) add_index_coord(lons_2d.flat[:], 'i_lon', x_min, dx) add_index_coord(lats_2d.flat[:], 'i_lat', y_min, dy) # Aggregate the data to get a statistical result for each 'inhabited' cell. result_cells = cube_flat.aggregated_by(('i_depth', 'i_lat', 'i_lon'), aggregator=aggregator, **agg_kwargs) # Make a full-grid result cube. # N.B. metadata comes from aggregation (includes units + cell_method) full_data_empty = np.ma.MaskedArray(np.zeros((nz, ny, nx), dtype=result_cells.data.dtype), mask=True) result_cube = iris.cube.Cube(full_data_empty) result_cube.metadata = result_cells.metadata # Assign aggregation results to the appropriate gridcells ... i_z, i_y, i_x = [result_cells.coord(coord_name).points for coord_name in ('i_depth', 'i_lat', 'i_lon')] # ... but discarding results that lie outside the target grid. i_ok = np.where((i_z >= 0) & (i_z < nz) & (i_y >= 0) & (i_y < ny) & (i_x >= 0) & (i_x < nx)) i_z, i_y, i_x = i_z[i_ok], i_y[i_ok], i_x[i_ok] result_cube.data[[i_z, i_y, i_x]] = result_cells.data[i_ok] # Add DimCoords defining the grid. result_cube.add_dim_coord(iris.coords.DimCoord.from_regular( z_min - 0.5 * dz, dz, nz, with_bounds=True, standard_name='depth', units='metres'), 0) result_cube.add_dim_coord(iris.coords.DimCoord.from_regular( y_min - 0.5 * dy, dy, ny, with_bounds=True, standard_name='latitude', units='degrees'), 1) result_cube.add_dim_coord(iris.coords.DimCoord.from_regular( x_min - 0.5 * dx, dx, nx, with_bounds=True, standard_name='longitude', units='degrees'), 2) return result_cube # Regrid the main data to get a gridded mean, std-dev and count. potm_mean = aggregate_to_grid(potm) potm_std_dev = aggregate_to_grid(potm, iris.analysis.STD_DEV) potm_counts = aggregate_to_grid(potm, iris.analysis.COUNT, function=lambda points: np.ones(points.shape)) # NOTE: 'function' arg looks a bit odd, as must return an *array* of bool # Add occupancy 'count > 3' as an extra condition for valid data on the others invalid_counts = potm_counts.data <= 3 potm_mean.data.mask |= invalid_counts potm_std_dev.data.mask |= invalid_counts # Expand the times to 2d so we can use the same code to get min+max times. time_data2d, _ = np.broadcast_arrays(time.data[:, None], potm.data) time2d = iris.cube.Cube(time_data2d) time2d.metadata = time.metadata # Add a bounded time coordinate to all the output cubes. time_min = aggregate_to_grid(time2d, iris.analysis.MIN) time_max = aggregate_to_grid(time2d, iris.analysis.MAX) time_centres = 0.5 * (time_min.data + time_max.data) time_bounds = np.concatenate((time_min.data[..., None], time_max.data[..., None]), axis=-1) time_coord = iris.coords.AuxCoord(time_centres, bounds=time_bounds, units=time.units, standard_name='time') potm_mean.add_aux_coord(time_coord, (0, 1, 2)) potm_std_dev.add_aux_coord(time_coord, (0, 1, 2)) potm_counts.add_aux_coord(time_coord, (0, 1, 2)) # Save result cubes to netCDF iris.save((potm_mean, potm_std_dev, potm_counts), 'temp_sparse_regrid.nc') # Plot the mean result at the top level plt.figure(figsize=(12, 8)) qplt.pcolormesh(potm_mean[0]) plt.gca().coastlines() plt.show() print potm_mean[0]