Skip to content

Commit

Permalink
Fix bugs in make_state_vector.py (#182)
Browse files Browse the repository at this point in the history
addresses the following bugs in make_state_vector.py:

- an offset was mistakenly being applied in non 0.25 degree resolution grids
- faulty if statement caused land threshold to always evaluated to false
- In some cases labels would not be applied to buffer cells, causing gaps in the statevector labels resulting in perturbation simulations that would not be perturbed at all. This seems to only occur when using custom state vectors from a shapefile
To fix the 3rd bug I updated the buffer cell creation code and aligned them in src/utilities/make_state_vector_file.py and src/notebooks/statevector_from_shapefile.ipynb
  • Loading branch information
laestrada authored Jan 8, 2024
1 parent c6e86a8 commit 0d0b57f
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 52 deletions.
30 changes: 5 additions & 25 deletions src/notebooks/statevector_from_shapefile.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@
"import matplotlib.pyplot as plt\n",
"import yaml\n",
"import warnings; warnings.filterwarnings(action='ignore')\n",
"sys.path.append('/home/ubuntu/integrated_methane_inversion/')\n",
"sys.path.append('../../')\n",
"from src.utilities.make_state_vector_file import cluster_buffer_elements\n",
"from src.utilities.utils import download_landcover_files\n",
"%matplotlib inline"
]
Expand Down Expand Up @@ -449,29 +450,8 @@
}
],
"source": [
"# Add buffer elements\n",
"buffer_elements = np.abs((mask > 0) - 1).values\n",
"\n",
"# Get image coordinates of buffer elements\n",
"irows = np.arange(buffer_elements.shape[0])\n",
"icols = np.arange(buffer_elements.shape[1])\n",
"irows = np.transpose(np.tile(irows,(len(icols),1)))\n",
"icols = np.tile(icols,(len(irows),1)) * (buffer_elements > 0)\n",
"\n",
"# Select image coordinates of buffer elements\n",
"irows_buffer = irows[buffer_elements > 0]\n",
"icols_buffer = icols[buffer_elements > 0]\n",
"coords = [[icols_buffer[j], irows_buffer[j]] for j in range(len(irows_buffer))]\n",
"\n",
"# Kmeans based on image coordinates\n",
"X = np.array(coords)\n",
"kmeans = KMeans(n_clusters=config['nBufferClusters'], random_state=0).fit(X)\n",
"\n",
"# Assign labels\n",
"for r in range(n_lat):\n",
" for c in range(n_lon):\n",
" if state_vector[r,c].values == 0:\n",
" state_vector[r,c] = kmeans.predict([[c,r]])[0] + count\n",
"# cluster the buffer elements with the above function\n",
"state_vector = cluster_buffer_elements(state_vector, config['nBufferClusters'], state_vector.max().item())\n",
"\n",
"# Add units attribute\n",
"state_vector.attrs['units'] = 'none'\n",
Expand Down Expand Up @@ -995,7 +975,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
"version": "3.11.4"
}
},
"nbformat": 4,
Expand Down
81 changes: 54 additions & 27 deletions src/utilities/make_state_vector_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,52 @@ def check_nested_grid_compatibility(lat_min, lat_max, lon_min, lon_max, land_cov

return compatible

def cluster_buffer_elements(data, num_clusters, offset):
"""
Description:
Cluster all 0 valued elements in dataarray into desired num clusters
arguments:
data [][]dataarray : xarrray sensitivity data
num_clusters int : number of labels to assign data to
offset bool : offset labels by this integer value
Returns: [][]dataarray : labeled data
"""
# Get the latitude and longitude coordinates as separate arrays
latitudes = data.coords["lat"].values
longitudes = data.coords["lon"].values

data_copy = data.copy()

# Get the sensitivity values as a 1D array
Z = data.values.flatten()
# labels shape for later
# labels = np.zeros(Z.shape)
valid_indices = np.where(Z == 0)[0]

# Flatten the latitude and longitude arrays into a 2D grid
# only keeping valid indices
X, Y = np.meshgrid(longitudes, latitudes)
X = X.flatten()[valid_indices]
Y = Y.flatten()[valid_indices]

# Stack the X, Y, and Z arrays to create a (n_samples, n_features) array
features = np.column_stack((X, Y))

# Cluster the features using KMeans
# Mini-Batch k-means is much faster, but with less accuracy
kmeans = KMeans(n_clusters=num_clusters, random_state=0)

cluster_labels = kmeans.fit_predict(features)

# fill labels on corresponding valid indices of label array
# adjust labels by offset + 1
Z[valid_indices] = cluster_labels + offset + 1

# reconstruct 2D grid
# cluster_labels = Z.reshape(data.shape)
data_copy.values = Z.reshape(data.shape)
return data_copy


def make_state_vector_file(
config_path,
Expand Down Expand Up @@ -85,7 +131,12 @@ def make_state_vector_file(
hd = xr.load_dataset(hemco_diag_pth)

# Require hemco diags on same global grid as land cover map
hd["lon"] = hd["lon"] - 0.03125 # initially offset by 0.03125 degrees
# TODO remove this offset once the HEMCO standalone files
# are regenerated with recent bugfix that corrects the offset
if config["Res"] == "0.25x0.3125":
hd["lon"] = hd["lon"] - 0.03125
elif config["Res"] == "0.5x0.625":
hd["lon"] = hd["lon"] - 0.0625

# Select / group fields together
lc = (lc["FRLAKE"] + lc["FRLAND"] + lc["FRLANDIC"]).drop("time").squeeze()
Expand Down Expand Up @@ -126,7 +177,7 @@ def make_state_vector_file(
statevector[(statevector.lat < lat_min) | (statevector.lat > lat_max), :] = 0

# Also set pixels over water to 0, unless there are offshore emissions
if land_threshold:
if land_threshold > 0:
# Where there is neither land nor emissions, replace with 0
land = lc.where((lc > land_threshold) | (hd > emis_threshold))
statevector.values[land.isnull().values] = -9999
Expand All @@ -138,31 +189,7 @@ def make_state_vector_file(

# Assign buffer pixels (the remaining 0's) to state vector
# -------------------------------------------------------------------------
buffer_area = statevector.values == 0

# Get image coordinates of all pixels in buffer area
irows = np.arange(buffer_area.shape[0])
icols = np.arange(buffer_area.shape[1])
irows = np.transpose(np.tile(irows, (len(icols), 1)))
icols = np.tile(icols, (len(irows), 1))
irows_good = irows[buffer_area > 0]
icols_good = icols[buffer_area > 0]
coords = [[icols_good[j], irows_good[j]] for j in range(len(irows_good))]

# K-means
X = np.array(coords)
kmeans = KMeans(n_clusters=k_buffer_clust, random_state=0).fit(X)

# Assign pixels to state vector
highres_statevector_max = np.nanmax(statevector.values)
n_rows = statevector.shape[0]
n_cols = statevector.shape[1]
for r in range(n_rows):
for c in range(n_cols):
if statevector[r, c].values == 0:
statevector[r, c] = (
kmeans.predict([[c, r]])[0] + 1 + highres_statevector_max
)
statevector = cluster_buffer_elements(statevector, k_buffer_clust, statevector.max().item())
# -------------------------------------------------------------------------

# Make dataset
Expand Down

0 comments on commit 0d0b57f

Please sign in to comment.