import matplotlib.pyplot as plt
import numpy as np
from matplotlib.typing import ColorType
from scipy.ndimage import binary_erosion
from scipy.stats import gaussian_kde
from cdiutils.plot.formatting import save_fig
from cdiutils.utils import normalise
[docs]
def kde_from_histogram(
counts: np.ndarray,
bin_edges: np.ndarray,
) -> tuple[np.ndarray, np.ndarray]:
"""
Compute the Kernel Density Estimate (KDE) from histogram counts and
bin edges provided by numpy.histogram function.
Args:
counts (np.ndarray): the number of elements in each bin.
bin_edges (np.ndarray): the limits of each bin.
Returns:
tuple[np.ndarray, np.ndarray]: x values used to compute the KDE
estimate, the y value (KDE estimate)
"""
# Check if the histogram is density or not by checking the sum of
# the counts
bin_widths = np.diff(bin_edges)
is_density = np.isclose(np.sum(counts * bin_widths), 1.0)
if is_density:
# When density=True, use the bin edges to reconstruct the data
# for KDE
data = []
for count, left_edge, right_edge in zip(
counts, bin_edges[:-1], bin_edges[1:]
):
data.extend(
np.random.uniform(
left_edge,
right_edge,
int(count * len(counts) * (right_edge - left_edge)),
)
)
data = np.array(data)
kde = gaussian_kde(data)
x = np.linspace(min(bin_edges), max(bin_edges))
y = kde(x)
else:
# Reconstruct the data from histogram counts and bin edges
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
bin_width = bin_edges[1] - bin_edges[0]
reconstructed_data = np.repeat(bin_centers, counts)
# Calculate KDE using the reconstructed data
kde = gaussian_kde(reconstructed_data)
# Evaluate KDE
x = np.linspace(bin_edges.min(), bin_edges.max())
y = kde.pdf(x)
# Scale the KDE values to match the original counts
y *= len(reconstructed_data) * bin_width
return x, y
[docs]
def find_isosurface(
amplitude: np.ndarray,
nbins: int = 100,
sigma_criterion: float = 3,
plot: bool = False,
show: bool = False,
save: str = None,
) -> tuple[float, plt.Axes]:
"""
Estimate the isosurface value from the amplitude distribution.
This function computes the isosurface value based on the amplitude
distribution of a 3D volume. The isosurface is calculated as:
`mu - sigma_criterion * sigma`, where `mu` is the mean and `sigma`
is the standard deviation of the distribution.
Args:
amplitude (np.ndarray): The 3D amplitude volume.
nbins (int, optional): The number of bins to use for the histogram.
Defaults to 100.
sigma_criterion (float, optional): The factor used to compute
the isosurface. Defaults to 3.
plot (bool, optional): Whether to generate a plot of the
histogram and density estimate. Defaults to False.
show (bool, optional): Whether to display the plot. Defaults to
False.
save (str, optional): File path to save the plot if generated.
Defaults to None.
Returns:
tuple[float, plt.Axes] | float: The isosurface value. If `plot`
or `show` is True, also returns the matplotlib figure object.
"""
# normalise and flatten the amplitude
flattened_amplitude = normalise(amplitude).ravel()
counts, bins = np.histogram(flattened_amplitude, bins=nbins)
# remove the background
background_value = bins[
np.where(counts == counts.max())[0] + 1 + nbins // 20
]
filtered_amplitude = flattened_amplitude[
flattened_amplitude > background_value
]
# redo the histogram with the filtered amplitude
counts, bins = np.histogram(filtered_amplitude, bins=nbins, density=True)
bin_centres = (bins[:-1] + bins[1:]) / 2
bin_size = bin_centres[1] - bin_centres[0]
# fit the amplitude distribution
kernel = gaussian_kde(filtered_amplitude)
x = np.linspace(0, 1, 1000)
fitted_counts = kernel(x)
max_index = np.argmax(fitted_counts)
right_gaussian_part = np.where(x >= x[max_index], fitted_counts, 0)
# find the closest indexes
right_HM_index = np.argmin(
np.abs(right_gaussian_part - fitted_counts.max() / 2)
)
left_HM_index = max_index - (right_HM_index - max_index)
fwhm = x[right_HM_index] - x[left_HM_index]
sigma_estimate = fwhm / (2 * np.sqrt(2 * np.log(2)))
isosurface = x[max_index] - sigma_criterion * sigma_estimate
# Ensure isosurface is non-negative
if isosurface < 0:
import warnings
warnings.warn(
f"Isosurface estimate ({isosurface:.3f}) is negative. "
"Returning 0. This may indicate poor signal-to-noise ratio "
"or unsuitable data for isosurface detection.",
UserWarning,
)
isosurface = 0
if plot or show:
figsize = (6, 4) # (5.812, 3.592) # golden ratio
fig, ax = plt.subplots(1, 1, layout="tight", figsize=figsize)
ax.bar(
bin_centres,
counts,
width=bin_size,
color="dodgerblue",
alpha=0.9,
edgecolor=(0, 0, 0, 0.25),
label=r"amplitude distribution",
)
kde_x, kde_y = kde_from_histogram(counts, bins)
ax.fill_between(
kde_x,
kde_y,
alpha=0.3,
color="navy",
label=r"density estimate",
)
ax.axvspan(
x[left_HM_index],
x[right_HM_index],
edgecolor="k",
facecolor="green",
alpha=0.2,
label="FWHM",
)
ax.plot(
[isosurface, isosurface],
[0, fitted_counts[(np.abs(x - isosurface)).argmin()]],
solid_capstyle="round",
color="lightcoral",
lw=5,
label=rf"isosurface estimated at {isosurface:0.3f}",
)
ax.set_xlabel(r"normalised amplitude")
ax.set_ylabel("counts")
ax.legend(frameon=False)
fig.suptitle(r"Reconstructed amplitude distribution")
fig.tight_layout()
if save is not None:
save_fig(fig, save, transparent=False)
if show:
plt.show()
return float(isosurface), fig
return float(isosurface), None
[docs]
def get_histogram(
data: np.ndarray,
support: np.ndarray = None,
bins: int = 50,
density: bool = False,
region: str = "overall",
) -> dict:
"""
Calculate histogram and optionally kernel density estimate (KDE) of
the data.
Optionally applies a support mask to the data before and calculates
the surface and bulk histograms.
Args:
data (np.ndarray): the data to be analysed
support (np.ndarray): the support mask to be applied to the data
before histogram calculation. If None, no mask is applied.
Defaults to None.
bins (int, optional): number of bins for the histogram.
Defaults to 50.
density (bool, optional): whether to normalise the histogram
to form a probability density function. Defaults to False.
region (str, optional): region of the data to be analysed. Can
be "overall", "surface", "bulk" or "all". Defaults to
"overall".
Returns:
dict: a dictionary containing the histograms for the specified
region(s). If kde is True, also includes the KDEs.
"""
if support is None and region != "overall":
raise ValueError(
"Support mask is required for surface or bulk region analysis."
)
if region not in ["overall", "surface", "bulk", "all"]:
raise ValueError(
"Invalid region specified. Choose from 'overall', "
"'surface', 'bulk', or 'all'."
)
histograms = {}
means = {}
stds = {}
if support is not None:
overall_data = data[support > 0]
# to handle any remaining NaN values, we need to specify the range
histograms["overall"] = np.histogram(
overall_data,
bins=bins,
density=density,
range=(np.nanmin(overall_data), np.nanmax(overall_data)),
)
means["overall"] = np.nanmean(overall_data)
stds["overall"] = np.nanstd(overall_data)
if region != "overall":
bulk = binary_erosion(support.astype(bool))
bulk_data = data[bulk > 0]
if region == "bulk" or region == "all":
histograms["bulk"] = np.histogram(
bulk_data,
bins=bins,
density=density,
range=(np.nanmin(bulk_data), np.nanmax(bulk_data)),
)
means["bulk"] = np.nanmean(bulk_data)
stds["bulk"] = np.nanstd(bulk_data)
if region == "surface" or region == "all":
surface = (support.astype(bool) & ~bulk).astype(int)
surface_data = data[surface > 0]
histograms["surface"] = np.histogram(
surface_data,
bins=bins,
density=density,
range=(np.nanmin(surface_data), np.nanmax(surface_data)),
)
means["surface"] = np.nanmean(surface_data)
stds["surface"] = np.nanstd(surface_data)
kdes = {k: kde_from_histogram(*v) for k, v in histograms.items()}
return histograms, kdes, means, stds
[docs]
def plot_histogram(
ax: plt.Axes,
counts: np.ndarray,
bin_edges: np.ndarray,
kde_x: np.ndarray = None,
kde_y: np.ndarray = None,
color: ColorType = "lightcoral",
fwhm: bool = True,
bar_args: dict = None,
kde_args: dict = None,
) -> float:
"""
Plot the bars of a histogram as well as the kernel density
estimate.
Args:
ax (plt.Axes): the matplotlib ax to plot the histogram on.
counts (np.ndarray): the count in each bin from
np.histogram().
bin_edges (np.ndarray): the bin edge values from
np.histogram().
kde_x (np.ndarray, optional): the x values used to
calculate the kernel density estimate values.
kde_y (np.ndarray, optional): the (y) values of the kernel
density estimate.
color (ColorType, optional): the colour of the bar and line.
Defaults to "lightcoral".
fwhm (bool, optional): whether to calculate and plot the full
width at half maximum (FWHM) of the kernel density estimate.
Defaults to True.
bar_args (dict, optional): additional arguments for the
matptlotlib bar function.
kde_args (dict, optional): additional arguments for the
matplotlib fill_between function. Can include boolean "fill"
and float "fill_alpha" to control whether to fill the kde
area and the alpha value of the fill. Defaults to None.
Returns:
float: the fwhm if required else None.
"""
_bar_args = {
"color": color,
"alpha": 0.4,
"edgecolor": color,
"linewidth": 0.5,
"label": "",
}
_bar_args.update(bar_args or {})
_kde_args = {"color": color, "label": "Kernel density estimate"}
fill_kde, fill_alpha = False, False
if kde_args is not None:
if "fill" in kde_args:
fill_kde = kde_args.pop("fill")
if "fill_alpha" in kde_args:
fill_alpha = kde_args.pop("fill_alpha")
_kde_args.update(kde_args)
# Resample the histogram to calculate the kernel density estimate
bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2
bin_width = bin_edges[1] - bin_edges[0]
# Plot the histogram bars
ax.bar(bin_centres, counts, bin_width, **_bar_args)
# Find the x-axis limits
xmax = np.max(np.abs(bin_centres))
xmin = -xmax
ax.set_xlim(xmin, xmax)
if kde_x is not None and kde_y is not None:
# Plot the kernel density estimate
ax.plot(kde_x, kde_y, **_kde_args)
if fill_kde:
ax.fill_between(kde_x, kde_y, 0, color=color, alpha=fill_alpha)
# Calculate the FWHM
if fwhm:
halfmax = kde_y.max() / 2
maxpos = kde_y.argmax()
leftpos = (np.abs(kde_y[:maxpos] - halfmax)).argmin()
rightpos = (np.abs(kde_y[maxpos:] - halfmax)).argmin() + maxpos
fwhm_value = kde_x[rightpos] - kde_x[leftpos]
(fwhm_line,) = ax.plot(
[],
[],
label=f"FWHM = {fwhm_value:.4f}%",
color=color,
ls="--",
linewidth=1,
)
def update_fwhm_line(event_ax):
xmin, xmax = event_ax.get_xlim()
fwhm_line.set_data(
[kde_x[leftpos], kde_x[rightpos]], [halfmax, halfmax]
)
fwhm_line.set_transform(event_ax.transData)
update_fwhm_line(ax)
ax.callbacks.connect("xlim_changed", update_fwhm_line)
ax.callbacks.connect("ylim_changed", update_fwhm_line)
return fwhm_value
return None