"""Implements loading exported HDF files from Igor."""
from __future__ import annotations
from logging import DEBUG, INFO
from pathlib import Path
from typing import TYPE_CHECKING, ClassVar, cast
import h5py
import numpy as np
import xarray as xr
from arpes.configuration.interface import get_data_path
from arpes.debug import setup_logger
from arpes.endstations import SESEndstation
from arpes.load_pxt import read_single_pxt
from arpes.provenance import Provenance, provenance_from_file
if TYPE_CHECKING:
from arpes._typing.attrs_property import ScanDesc
__all__ = ("IgorExportEndstation",)
LOGLEVELS = (DEBUG, INFO)
LOGLEVEL = LOGLEVELS[1]
logger = setup_logger(__name__, LOGLEVEL)
[docs]
class IgorExportEndstation(SESEndstation):
"""Implements loading exported HDF files for ARPES data from Igor."""
PRINCIPAL_NAME = "Igor"
ALIASES: ClassVar[list[str]] = [
"igor",
"igor-export",
]
RENAME_KEYS: ClassVar[dict[str, str]] = {}
def load_single_frame(
self,
frame_path: str | Path = "",
scan_desc: ScanDesc | None = None,
**kwargs: bool,
) -> xr.Dataset:
"""HDF files are all inclusive, so we just need to load one file per scan.
Args:
frame_path (str | Path): frame path
scan_desc (ScanDesc): scan description
kwargs: pass to load_SES_h5, thus, only "robust_dimension_labels" can be accepted.
Returns: xr.Dataset
ARPES data
"""
if scan_desc is None:
scan_desc = {}
ext = Path(frame_path).suffix
if "nc" in ext or "h5" in ext:
# was converted to hdf5/NetCDF format with Conrad's Igor scripts
scan_desc["path"] = frame_path
return self.load_SES_h5(scan_desc=scan_desc, robust_dimension_labels=True, **kwargs)
# it's given by SES PXT files
pxt_data = read_single_pxt(frame_path).assign_coords(
{"eV": -read_single_pxt(frame_path).eV.values},
) # negate energy
return xr.Dataset({"spectrum": pxt_data}, attrs=pxt_data.attrs)
def load_SES_h5(
self,
scan_desc: ScanDesc | None = None,
*,
robust_dimension_labels: bool = False,
) -> xr.Dataset:
"""Imports an hdf5 dataset exported from Igor.
In particular, this handles data that was originally generated by a Scienta spectrometer
in the SES format. In order to understand the structure of these files have a look at
Conrad's saveSESDataset in Igor Pro.
Args:
scan_desc: Dictionary with extra information to attach to the xr.Dataset, must contain
the location of the file
robust_dimension_labels (bool): set True when dimension is missing ang to override it.
Returns:
The loaded data.
"""
scan_desc = scan_desc or {}
data_loc = scan_desc.get("path", scan_desc.get("file"))
assert data_loc is not None
if not Path(data_loc).exists():
data_path = get_data_path()
if data_path is not None:
data_loc = Path(data_path) / data_loc
else:
msg = "File not found."
raise RuntimeError(msg)
f = h5py.File(data_loc, "r")
primary_dataset_name = next(iter(f))
# This is bugged for the moment in h5py due to an inability to read fixed length unicode
# strings
dimension_labels = list(f["/" + primary_dataset_name].attrs["IGORWaveDimensionLabels"][0])
if any(not x for x in dimension_labels):
if not robust_dimension_labels:
msg = "Missing dimension labels. Use robust_dimension_labels=True to override"
raise ValueError(
msg,
)
used_blanks = 0
for i in range(len(dimension_labels)):
if not dimension_labels[i]:
dimension_labels[i] = f"missing{used_blanks}"
used_blanks += 1
scaling = f["/" + primary_dataset_name].attrs["IGORWaveScaling"][-len(dimension_labels) :]
raw_data = f["/" + primary_dataset_name][:]
scaling = [
np.linspace(scale[1], scale[1] + scale[0] * raw_data.shape[i], raw_data.shape[i])
for i, scale in enumerate(scaling)
]
dataset_contents = {}
attrs = scan_desc.pop("note", {})
built_coords = dict(zip(dimension_labels, scaling, strict=True))
deg_to_rad_coords = {"theta", "beta", "phi"}
# the hemisphere axis is handled below
built_coords = {
k: np.deg2rad(c) if k in deg_to_rad_coords else c for k, c in built_coords.items()
}
deg_to_rad_attrs = {"theta", "beta", "alpha", "chi"}
for angle_attr in deg_to_rad_attrs:
if angle_attr in attrs:
attrs[angle_attr] = np.deg2rad(float(attrs[angle_attr]))
dataset_contents["spectrum"] = xr.DataArray(
raw_data,
coords=built_coords,
dims=dimension_labels,
attrs=attrs,
)
provenance_context: Provenance = cast(
"Provenance",
{
"what": "Loaded SES dataset from HDF5.",
"by": "load_SES",
},
)
provenance_from_file(dataset_contents["spectrum"], str(data_loc), provenance_context)
return xr.Dataset(
dataset_contents,
attrs={**scan_desc, "dataset_name": primary_dataset_name},
)