Source code for cdiutils.io.cxi_explorer

"""
Module for exploring and visualising CXI files with interactive
browser functionality.
"""

from pathlib import Path

import h5py
import matplotlib.pyplot as plt
import numpy as np

try:
    import ipywidgets
    from IPython.display import display

    HAS_IPYWIDGETS = True
except ImportError:
    HAS_IPYWIDGETS = False

from cdiutils.io.cxi import CXIFile
from cdiutils.plot.formatting import add_colorbar
from cdiutils.plot.slice import plot_volume_slices


[docs] class CXIExplorer: """ Interactive explorer for CXI file inspection and visualisation. Provides tools for navigating CXI file hierarchies, viewing metadata, and visualising array data. Requires Jupyter environment with ipywidgets for interactive features. Attributes: cxi: CXIFile instance being explored. paths: List of all dataset/group paths in file. soft_links: Dictionary mapping soft link names to targets. """ max_number_of_values_printed = 100 tree_max_string_length = 15 tree_max_array_size = 5
[docs] def __init__(self, cxi_file: str | CXIFile) -> None: """ Initialise the CXI explorer with either a path to a CXI file or a CXIFile object. Args: cxi_file (str | CXIFile): Either a string path to a CXI file or a CXIFile object Raises: ImportError: If ipywidgets is not installed (required for interactive features) """ if not HAS_IPYWIDGETS: raise ImportError( "CXIExplorer requires ipywidgets for interactive features. " "Install with: pip install ipywidgets\n" "Or install all interactive dependencies: " "pip install cdiutils[interactive]" ) # handle both string paths and CXIFile objects if isinstance(cxi_file, str): self.file_path = cxi_file self.cxi = CXIFile(cxi_file, mode="r") self.cxi.open() self._owner = True # we opened it, so we should close it elif isinstance(cxi_file, CXIFile): self.file_path = cxi_file.file_path self.cxi = cxi_file self._owner = False # we didn't open it, so shouldn't close it else: raise TypeError( "cxi_file must be a string path or a CXIFile object" ) # check if file is open if self.cxi.file is None: self.cxi.open() self.tab = 0 # build path lists including regular paths and soft links self.paths, self.soft_links = self._build_path_lists()
[docs] def close(self): """Close the CXI file if it was opened by this explorer.""" if self._owner and self.cxi.file is not None: self.cxi.close() self._owner = False
def __del__(self): """Clean up resources when the explorer is deleted.""" self.close() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): if self._owner and hasattr(self, "cxi"): self.cxi.close() def _build_path_lists(self) -> tuple[list, dict]: """ Build lists of all paths in the file, including regular items and soft links. Returns: tuple: (paths, soft_links) where: - paths is a list of all paths - soft_links is a dict mapping link names to their target paths """ paths = [] soft_links = {} def collect_paths(name, obj): paths.append(name) def collect_links(name, obj): if isinstance(obj, h5py.SoftLink): soft_links[name] = obj.path # collect all regular items self.cxi.file.visititems(collect_paths) # collect all soft links self.cxi.file.visititems_links(collect_links) # add soft links to the paths list if not already there for link_name in soft_links: if link_name not in paths: paths.append(link_name) paths.sort() return paths, soft_links def _is_softlink(self, path: str) -> tuple: """ Check if a path is a soft link. Args: path (str): Path to check Returns: tuple: (bool, str): if path is a softlink and the path it points to. """ try: link_info = self.cxi.file.get(path, getlink=True) if isinstance(link_info, h5py.SoftLink): return True, link_info.path return False, None except (KeyError, AttributeError, RuntimeError): return False, None
[docs] def tree( self, max_depth: int = None, show_attributes: bool = False ) -> None: """ Print a tree view of the CXI file structure similar to the Linux 'tree' command. Args: max_depth (int): Maximum depth to traverse (None for unlimited). Defaults to None. show_attributes (bool): whether to show attributes. Defaults to False. """ print(f"CXI File: {Path(self.file_path).name}") def print_group(name, node, prefix="", is_last=True, depth=0): # Determine the base name (last part of the path) basename = name.split("/")[-1] if name != "/" else "/" # Check if this is a soft link is_link, target = self._is_softlink(name) if is_link: print( f"{prefix}{'└── ' if is_last else '├── '}{basename} -> {target}" # noqa: E501 ) return # Print current item with appropriate branch symbols if isinstance(node, h5py.Group): # Add attributes if present attr_str = "" if len(node.attrs) > 0 and show_attributes: attr_keys = list(node.attrs.keys()) if len(attr_keys) <= 2: # Show all if few attrs = [f"{k}={node.attrs[k]}" for k in attr_keys] attr_str = f" ({', '.join(attrs)})" else: # Otherwise just show count attr_str = f" ({len(node.attrs)} attributes)" print( f"{prefix}{'└── ' if is_last else '├── '}{basename}{attr_str}" # noqa: E501 ) # Prepare for children keys = list(node.keys()) child_prefix = prefix + (" " if is_last else "│ ") # Check if we've reached the max depth for child nodes if max_depth is not None and depth == max_depth: # Only count direct children at this level if keys: # Only show if there are actually children print( f"{child_prefix}└── {len(keys)} more entrie(s)..." ) # noqa: E501 return # Process all children for i, key in enumerate(keys): child_name = f"{name}/{key}" if name != "/" else f"/{key}" try: child_node = node[key] print_group( child_name, child_node, child_prefix, i == len(keys) - 1, depth + 1, ) except Exception as e: # Handle any issues accessing the child print( f"{child_prefix}{'└── ' if i == len(keys) - 1 else '├── '}{key} (Error: {str(e)})" # noqa: E501 ) elif isinstance(node, h5py.Dataset): # Format dataset info shape_str = f"{node.shape}" if node.shape else "(scalar)" type_str = f"{node.dtype}" # Truncate type string if it's too long if len(type_str) > self.tree_max_string_length: type_str = ( type_str[: self.tree_max_string_length - 3] + "..." ) # Add short data preview for small datasets data_preview = "" if node.size <= self.tree_max_array_size: # small datasets value = self.cxi[name] if ( isinstance(value, np.ndarray) and value.size <= self.tree_max_array_size ): data_preview = f" = {value}" elif np.isscalar(value) or isinstance(value, (str, bytes)): if isinstance(value, (bytes, np.bytes_)): try: data_preview = f" = '{value.decode('utf-8')}'" except UnicodeDecodeError: data_preview = f" = {value}" else: data_preview = f" = {value}" # Add attributes indicator attr_str = "" if len(node.attrs) > 0: attr_str = f" ({len(node.attrs)} attributes)" print( f"{prefix}{'└── ' if is_last else '├── '}{basename} {shape_str} {type_str}{data_preview}{attr_str}" # noqa: E501 ) # Start recursion from root print_group("/", self.cxi.file, "")
[docs] def explore(self): """ Create an interactive widget to explore the CXI file. """ # create dropdown for selecting paths path_dropdown = ipywidgets.Dropdown( options=self.paths, description="Path:", style={"description_width": "initial"}, layout=ipywidgets.Layout(width="80%"), ) # create output widget to display content output = ipywidgets.Output() def show_item(change): path = change["new"] with output: output.clear_output() self.show(path) # Connect the callback to the dropdown widget path_dropdown.observe(show_item, names="value") # Set initial selection if possible if self.paths: with output: show_item({"new": self.paths[0]}) # Display the ipywidgets display(ipywidgets.VBox([path_dropdown, output]))
[docs] def tabbed_print(self, text: str, **kwargs) -> None: """ Print text with optional tabbing. Args: text (str): Text to print. """ print("\t" * self.tab + text, **kwargs)
def _show_h5_dataset( self, dataset: h5py.Dataset, show_attributes: bool = False, plot: bool = True, ) -> None: """ Show the content of a dataset in the CXI file. Args: dataset (h5py.Dataset): the h5 dataset to show. show_attributes (bool, optional): whether to show attributes. Defaults to False. plot (bool, optional): whether to plot. Defaults to True. """ self.tabbed_print(f"Path: {dataset.name}") self.tabbed_print("Type: Dataset") data = dataset[()] if len(dataset.attrs) > 0 and show_attributes: self.tabbed_print(f"Attributes: {dict(dataset.attrs)}") # Handle different data types for visualisation # For string data if isinstance(data, (str, bytes, np.bytes_)): if isinstance(data, (bytes, np.bytes_)): try: self.tabbed_print(f"Value: {data.decode('utf-8')}") except UnicodeDecodeError: self.tabbed_print(f"Value: {data} (binary data)") else: self.tabbed_print(f"Value: {data}") # For scalar data elif np.isscalar(data) or data.size == 1: self.tabbed_print(f"Value: {data}") # For array data elif isinstance(data, np.ndarray): self.tabbed_print(f"Shape: {data.shape}") self.tabbed_print(f"Dtype: {data.dtype}") # For small arrays, show the values if data.size <= self.max_number_of_values_printed: self.tabbed_print(f"Values: {data}") if plot and data.size > 3: self.tabbed_print( f"Data summary: min={np.nanmin(data):.3f}, " f"max={np.nanmax(data):.3f}, " f"mean={np.nanmean(data):.3f}" ) # For 1D arrays of reasonable size, plot them if data.ndim == 1: fig, ax = plt.subplots(1, 1, layout="tight") ax.plot(data) fig.suptitle(f"Plot of {dataset.name}") # For 2D arrays, display as images elif data.ndim == 2: fig, ax = plt.subplots(1, 1, layout="tight") ax.imshow(data) add_colorbar(ax, label="Value") fig.suptitle(f"Image of {dataset.name}") # For 3D arrays, show middle slice elif data.ndim == 3: plot_volume_slices( data, title=f"3D Volume of {dataset.name}", origin="lower", ) plt.show() def _show_h5_softlink( self, path: str, target_path: str, ) -> None: """ Show the content of a soft link in the CXI file. Args: path (str): Path to the soft link. target_path (str): Path to the linked item. """ self.tabbed_print(f"Path: {path}") self.tabbed_print(f"Type: Soft Link → {target_path}") def _show_h5_group( self, group: h5py.Group, show_attributes: bool = False, ) -> None: """ Show the content of a group in the CXI file. Args: group (h5py.Group): the h5 group to show. show_attributes (bool, optional): whether to show attributes. Defaults to False. """ path = group.name self.tabbed_print(f"Path: {path}") if "title" in group: self.tabbed_print(f"Title: {self.cxi[path + '/title']}") if "description" in group: self.tabbed_print( f"Description: {self.cxi[path + '/description']}" ) self.tabbed_print(f"Type: Group with {len(group)} items.") if len(group.attrs) > 0 and show_attributes: self.tabbed_print(f"Attributes: {dict(group.attrs)}") self.tabbed_print("Content:") self.tab += 1 # Print content for key, item in group.items(): print() # Check if the item is a soft link is_softlink, target_path = self._is_softlink(item.name) if is_softlink: self._show_h5_softlink(item.name, target_path) elif isinstance(item, h5py.Group): shape_info = f" {item.shape}" if hasattr(item, "shape") else "" self.tabbed_print( f"Path: {item.name}\n\tType: Group{shape_info}" ) elif isinstance(item, h5py.Dataset): self._show_h5_dataset(item, show_attributes, plot=True) self.tab -= 1
[docs] def show( self, path: str, show_attributes: bool = False, tab: int = 0 ) -> None: """ Visualise a specific dataset from the CXI file. Notes: This function only prints the content of the subset without going any deeper into the tree, except for soft links, for which it will print the target content and its content. If the target content of a group or dataset is a soft link, it will not print the target content. Args: path: Path to the dataset within the CXI file. show_attributes: Whether to show attributes. tab: Initial tab level. """ node = self.cxi.get_node(path) self.tab = tab # Check if this is a soft link first is_link, target_path = self._is_softlink(path) if is_link: self._show_h5_softlink(path, target_path) print("Target Content:\n") self.tab += 1 self.show(target_path, show_attributes, tab=self.tab) elif isinstance(node, h5py.Group): self._show_h5_group(node, show_attributes) elif isinstance(node, h5py.Dataset): self._show_h5_dataset(node, show_attributes)
[docs] def search(self, pattern: str, search_attrs: bool = True) -> None: """ Search for datasets, groups, or soft links that match the given pattern. Args: pattern (str): pattern to search for in names search_attrs (bool): whether to also search in attribute values. Defaults to True. """ results = [] # First check for matches in path names (including soft links) for path in self.paths: if pattern.lower() in path.lower(): # Check if it's a soft link if path in self.soft_links: results.append( ( path, f"soft link name match (→ {self.soft_links[path]})", # noqa: E501 ) ) else: results.append((path, "name match")) # Check soft link targets for link_name, target_path in self.soft_links.items(): if pattern.lower() in target_path.lower(): results.append( (link_name, f"soft link target match (→ {target_path})") ) # Check attributes if requested if search_attrs: def attr_visitor(name, obj): if hasattr(obj, "attrs"): for attr_name, attr_value in obj.attrs.items(): attr_str = str(attr_value).lower() if pattern.lower() in attr_str: results.append( ( name, f"attribute match: {attr_name}={attr_value}", # noqa: E501 ) ) self.cxi.file.visititems(attr_visitor) # Display results if not results: print(f"No matches found for '{pattern}'") else: print(f"Found {len(results)} matches for '{pattern}':") for path, match_type in results: # Determine node type (Group, Dataset, or Soft Link) if path in self.soft_links: node_type = "Soft Link" else: try: node_type = ( "Group" if isinstance(self.cxi.file[path], h5py.Group) else "Dataset" ) except KeyError: # This should not happen, but just in case node_type = "Unknown" print(f"- {node_type}: {path} ({match_type})")
[docs] def summarise(self): """Provide a summary of the CXI file content.""" # Get file info file_size = Path(self.file_path).stat().st_size / (1024 * 1024) # MB # Count groups, datasets, and total data size group_count = 0 dataset_count = 0 total_data_size = 0 def count_visitor(name, obj): nonlocal group_count, dataset_count, total_data_size if isinstance(obj, h5py.Group): group_count += 1 elif isinstance(obj, h5py.Dataset): dataset_count += 1 total_data_size += obj.size * obj.dtype.itemsize self.cxi.file.visititems(count_visitor) # Display summary print(f"CXI File Summary: {Path(self.file_path).name}") print(f"File size: {file_size:.2f} MB") print(f"Groups: {group_count}") print(f"Datasets: {dataset_count}") print(f"Total data size: {total_data_size / (1024 * 1024):.2f} MB") # Check for entry groups entries = [ key for key in self.cxi.file.keys() if key.startswith("entry_") ] print(f"\nEntries: {len(entries)}") for entry in entries: print(f"\n{entry}:") group = self.cxi.file[entry] # Print default if available if "default" in group.attrs: print(f" Default: {group.attrs['default']}") # List main group types and counts group_types = {} for key in group.keys(): prefix = key.split("_")[0] if "_" in key else key group_types[prefix] = group_types.get(prefix, 0) + 1 for prefix, count in group_types.items(): print(f" {prefix}: {count}")
# Add a convenience function to CXIFile def get_explorer(self): """ Create and return a CXIExplorer for this CXI file. Returns: CXIExplorer: An explorer instance for this file """ return CXIExplorer(self) # Add the explore method to CXIFile CXIFile.get_explorer = get_explorer