Source code for arpes.endstations.plugin.igor_export

"""Implements loading exported HDF files from Igor."""

from __future__ import annotations

from logging import DEBUG, INFO
from pathlib import Path
from typing import TYPE_CHECKING, ClassVar, cast

import h5py
import numpy as np
import xarray as xr

from arpes.configuration.interface import get_data_path
from arpes.debug import setup_logger
from arpes.endstations import SESEndstation
from arpes.load_pxt import read_single_pxt
from arpes.provenance import Provenance, provenance_from_file

if TYPE_CHECKING:
    from arpes._typing.attrs_property import ScanDesc

__all__ = ("IgorExportEndstation",)

LOGLEVELS = (DEBUG, INFO)
LOGLEVEL = LOGLEVELS[1]
logger = setup_logger(__name__, LOGLEVEL)


[docs] class IgorExportEndstation(SESEndstation): """Implements loading exported HDF files for ARPES data from Igor.""" PRINCIPAL_NAME = "Igor" ALIASES: ClassVar[list[str]] = [ "igor", "igor-export", ] RENAME_KEYS: ClassVar[dict[str, str]] = {} def load_single_frame( self, frame_path: str | Path = "", scan_desc: ScanDesc | None = None, **kwargs: bool, ) -> xr.Dataset: """HDF files are all inclusive, so we just need to load one file per scan. Args: frame_path (str | Path): frame path scan_desc (ScanDesc): scan description kwargs: pass to load_SES_h5, thus, only "robust_dimension_labels" can be accepted. Returns: xr.Dataset ARPES data """ if scan_desc is None: scan_desc = {} ext = Path(frame_path).suffix if "nc" in ext or "h5" in ext: # was converted to hdf5/NetCDF format with Conrad's Igor scripts scan_desc["path"] = frame_path return self.load_SES_h5(scan_desc=scan_desc, robust_dimension_labels=True, **kwargs) # it's given by SES PXT files pxt_data = read_single_pxt(frame_path).assign_coords( {"eV": -read_single_pxt(frame_path).eV.values}, ) # negate energy return xr.Dataset({"spectrum": pxt_data}, attrs=pxt_data.attrs) def load_SES_h5( self, scan_desc: ScanDesc | None = None, *, robust_dimension_labels: bool = False, ) -> xr.Dataset: """Imports an hdf5 dataset exported from Igor. In particular, this handles data that was originally generated by a Scienta spectrometer in the SES format. In order to understand the structure of these files have a look at Conrad's saveSESDataset in Igor Pro. Args: scan_desc: Dictionary with extra information to attach to the xr.Dataset, must contain the location of the file robust_dimension_labels (bool): set True when dimension is missing ang to override it. Returns: The loaded data. """ scan_desc = scan_desc or {} data_loc = scan_desc.get("path", scan_desc.get("file")) assert data_loc is not None if not Path(data_loc).exists(): data_path = get_data_path() if data_path is not None: data_loc = Path(data_path) / data_loc else: msg = "File not found." raise RuntimeError(msg) f = h5py.File(data_loc, "r") primary_dataset_name = next(iter(f)) # This is bugged for the moment in h5py due to an inability to read fixed length unicode # strings dimension_labels = list(f["/" + primary_dataset_name].attrs["IGORWaveDimensionLabels"][0]) if any(not x for x in dimension_labels): if not robust_dimension_labels: msg = "Missing dimension labels. Use robust_dimension_labels=True to override" raise ValueError( msg, ) used_blanks = 0 for i in range(len(dimension_labels)): if not dimension_labels[i]: dimension_labels[i] = f"missing{used_blanks}" used_blanks += 1 scaling = f["/" + primary_dataset_name].attrs["IGORWaveScaling"][-len(dimension_labels) :] raw_data = f["/" + primary_dataset_name][:] scaling = [ np.linspace(scale[1], scale[1] + scale[0] * raw_data.shape[i], raw_data.shape[i]) for i, scale in enumerate(scaling) ] dataset_contents = {} attrs = scan_desc.pop("note", {}) built_coords = dict(zip(dimension_labels, scaling, strict=True)) deg_to_rad_coords = {"theta", "beta", "phi"} # the hemisphere axis is handled below built_coords = { k: np.deg2rad(c) if k in deg_to_rad_coords else c for k, c in built_coords.items() } deg_to_rad_attrs = {"theta", "beta", "alpha", "chi"} for angle_attr in deg_to_rad_attrs: if angle_attr in attrs: attrs[angle_attr] = np.deg2rad(float(attrs[angle_attr])) dataset_contents["spectrum"] = xr.DataArray( raw_data, coords=built_coords, dims=dimension_labels, attrs=attrs, ) provenance_context: Provenance = cast( "Provenance", { "what": "Loaded SES dataset from HDF5.", "by": "load_SES", }, ) provenance_from_file(dataset_contents["spectrum"], str(data_loc), provenance_context) return xr.Dataset( dataset_contents, attrs={**scan_desc, "dataset_name": primary_dataset_name}, )