Source code for arpes.endstations.plugin.igor_export

"""Implements loading exported HDF files from Igor."""

from __future__ import annotations

from logging import DEBUG, INFO
from pathlib import Path
from typing import TYPE_CHECKING, ClassVar, cast

import h5py
import numpy as np
import xarray as xr

from arpes.configuration.interface import get_data_path
from arpes.debug import setup_logger
from arpes.endstations import SESEndstation
from arpes.load_pxt import read_single_pxt
from arpes.provenance import Provenance, provenance_from_file

if TYPE_CHECKING:
    from arpes._typing.attrs_property import ScanDesc

__all__ = ("IgorExportEndstation",)

LOGLEVELS = (DEBUG, INFO)
LOGLEVEL = LOGLEVELS[1]
logger = setup_logger(__name__, LOGLEVEL)



[docs]
class IgorExportEndstation(SESEndstation):
    """Implements loading exported HDF files for ARPES data from Igor."""

    PRINCIPAL_NAME = "Igor"
    ALIASES: ClassVar[list[str]] = [
        "igor",
        "igor-export",
    ]

    RENAME_KEYS: ClassVar[dict[str, str]] = {}

    def load_single_frame(
        self,
        frame_path: str | Path = "",
        scan_desc: ScanDesc | None = None,
        **kwargs: bool,
    ) -> xr.Dataset:
        """HDF files are all inclusive, so we just need to load one file per scan.

        Args:
            frame_path (str | Path): frame path
            scan_desc (ScanDesc): scan description
            kwargs: pass to load_SES_h5, thus, only "robust_dimension_labels" can be accepted.

        Returns: xr.Dataset
            ARPES data
        """
        if scan_desc is None:
            scan_desc = {}
        ext = Path(frame_path).suffix

        if "nc" in ext or "h5" in ext:
            # was converted to hdf5/NetCDF format with Conrad's Igor scripts
            scan_desc["path"] = frame_path
            return self.load_SES_h5(scan_desc=scan_desc, robust_dimension_labels=True, **kwargs)

        # it's given by SES PXT files
        pxt_data = read_single_pxt(frame_path).assign_coords(
            {"eV": -read_single_pxt(frame_path).eV.values},
        )  # negate energy
        return xr.Dataset({"spectrum": pxt_data}, attrs=pxt_data.attrs)

    def load_SES_h5(
        self,
        scan_desc: ScanDesc | None = None,
        *,
        robust_dimension_labels: bool = False,
    ) -> xr.Dataset:
        """Imports an hdf5 dataset exported from Igor.

        In particular, this handles data that was originally generated by a Scienta spectrometer
        in the SES format. In order to understand the structure of these files have a look at
        Conrad's saveSESDataset in Igor Pro.

        Args:
            scan_desc: Dictionary with extra information to attach to the xr.Dataset, must contain
                the location of the file
            robust_dimension_labels (bool): set True when dimension is missing ang to override it.

        Returns:
            The loaded data.
        """
        scan_desc = scan_desc or {}

        data_loc = scan_desc.get("path", scan_desc.get("file"))
        assert data_loc is not None
        if not Path(data_loc).exists():
            data_path = get_data_path()
            if data_path is not None:
                data_loc = Path(data_path) / data_loc
            else:
                msg = "File not found."
                raise RuntimeError(msg)

        f = h5py.File(data_loc, "r")

        primary_dataset_name = next(iter(f))
        # This is bugged for the moment in h5py due to an inability to read fixed length unicode
        # strings
        dimension_labels = list(f["/" + primary_dataset_name].attrs["IGORWaveDimensionLabels"][0])

        if any(not x for x in dimension_labels):
            if not robust_dimension_labels:
                msg = "Missing dimension labels. Use robust_dimension_labels=True to override"
                raise ValueError(
                    msg,
                )
            used_blanks = 0
            for i in range(len(dimension_labels)):
                if not dimension_labels[i]:
                    dimension_labels[i] = f"missing{used_blanks}"
                    used_blanks += 1

        scaling = f["/" + primary_dataset_name].attrs["IGORWaveScaling"][-len(dimension_labels) :]
        raw_data = f["/" + primary_dataset_name][:]

        scaling = [
            np.linspace(scale[1], scale[1] + scale[0] * raw_data.shape[i], raw_data.shape[i])
            for i, scale in enumerate(scaling)
        ]

        dataset_contents = {}
        attrs = scan_desc.pop("note", {})

        built_coords = dict(zip(dimension_labels, scaling, strict=True))

        deg_to_rad_coords = {"theta", "beta", "phi"}

        # the hemisphere axis is handled below
        built_coords = {
            k: np.deg2rad(c) if k in deg_to_rad_coords else c for k, c in built_coords.items()
        }

        deg_to_rad_attrs = {"theta", "beta", "alpha", "chi"}
        for angle_attr in deg_to_rad_attrs:
            if angle_attr in attrs:
                attrs[angle_attr] = np.deg2rad(float(attrs[angle_attr]))

        dataset_contents["spectrum"] = xr.DataArray(
            raw_data,
            coords=built_coords,
            dims=dimension_labels,
            attrs=attrs,
        )
        provenance_context: Provenance = cast(
            "Provenance",
            {
                "what": "Loaded SES dataset from HDF5.",
                "by": "load_SES",
            },
        )
        provenance_from_file(dataset_contents["spectrum"], str(data_loc), provenance_context)

        return xr.Dataset(
            dataset_contents,
            attrs={**scan_desc, "dataset_name": primary_dataset_name},
        )