Source code for aiida_cp2k.utils.datatype_helpers

# -*- coding: utf-8 -*-
###############################################################################
# Copyright (c), The AiiDA-CP2K authors.                                      #
# SPDX-License-Identifier: MIT                                                #
# AiiDA-CP2K is hosted on GitHub at https://github.com/aiidateam/aiida-cp2k   #
# For further information on the license, see the LICENSE.txt file.           #
###############################################################################
"""AiiDA-CP2K Gaussian Datatype Helpers."""

import io
from collections.abc import Sequence

from aiida.common import InputValidationError
from aiida.plugins import DataFactory


[docs]def _unpack(adict):
    """Unpack any lists as values into single elements for the key"""

    for key, value in adict.items():
        if isinstance(value, Sequence):
            for item in value:
                yield (key, item)
        else:
            yield (key, value)


[docs]def _identifier(gdt):
    """Our unique identifier for gaussian datatypes"""
    return gdt.element, gdt.name


[docs]def _validate_gdt_namespace(entries, gdt_cls, attr):
    """Common namespace validator for both basissets and pseudos"""

    identifiers = []

    for kind, gdt_instance in _unpack(entries):
        if not isinstance(gdt_instance, gdt_cls):
            return "invalid {attr} for '{kind}' specified".format(attr=attr, kind=kind)

        identifier = _identifier(gdt_instance)

        if identifier in identifiers:
            # note: this should be possible for basissets with different versions
            #       but at this point we should require some format for the key to match it
            return "{attr} for kind {gdt_instance.element} ({gdt_instance.name}) specified multiple times".format(
                attr=attr, gdt_instance=gdt_instance)

        identifiers += [identifier]

    return None


[docs]def _write_gdt(inp, entries, folder, key, fname):
    """inject <key>=<fname> into all FORCE_EVAL/DFT sections and write the entries to a file"""

    for secpath, section in inp.param_iter(sections=True):
        if secpath[-1].upper() == "DFT":
            section[key] = fname

    with io.open(folder.get_abs_path(fname), mode="w", encoding="utf-8") as fhandle:
        for _, entry in _unpack(entries):
            entry.to_cp2k(fhandle)


[docs]def validate_basissets_namespace(basissets, _):
    """A input_namespace validator to ensure passed down basis sets have the correct type."""
    return _validate_gdt_namespace(basissets, DataFactory("gaussian.basisset"), "basis set")


[docs]def validate_basissets(inp, basissets, structure):
    """
    Verify that all referenced basissets are present in the input.
    Currently supports 2 modes: either all of the basisssets are explicitly
    listed in a KIND section, or none of them are, at which point they're
    verified against the symbols in the structure.
    """
    # pylint: disable=too-many-branches

    basisset_used = {_identifier(bset): 0 for _, bset in _unpack(basissets)}
    basisset_kw_used = False

    for secpath, section in inp.param_iter(sections=True):
        # ignore non-kind sections
        if secpath[-1].upper() != "KIND":
            continue

        if "BASIS_SET" not in section:
            # ignore kind sections without a BASIS_SET keyword
            continue

        basisset_kw_used = True

        kind = section["_"]
        element = section.get("ELEMENT", kind)

        # the BASIS_SET keyword can be repeated, even for the same type
        bsnames = section["BASIS_SET"]

        # the keyword BASIS_SET can occur multiple times in which case
        # the specified basis sets are merged (given they match the same type)
        if isinstance(bsnames, str):
            bsnames = [bsnames]

        for bsname in bsnames:
            # test for new-style basis set specification
            try:
                bstype, bsname = bsname.split(maxsplit=1)
            except ValueError:
                bstype = "ORB"

            try:
                basisset_used[(element, bsname)] += 1
            except KeyError:
                raise InputValidationError(("'BASIS_SET {bstype} {bsname}' for element {element} (from kind {kind})"
                                            " not found in basissets input namespace").format(bsname=bsname,
                                                                                              bstype=bstype,
                                                                                              element=element,
                                                                                              kind=kind))

    if basisset_kw_used:
        for (sym, name), used in basisset_used.items():
            if not used:
                raise InputValidationError("Basis sets provided in calculation for kind {sym} ({name}),"
                                           " but not used in input".format(sym=sym, name=name))
        # if all basissets are referenced in the input, we're done
        return

    if not structure:  # no support for COORD section (yet)
        raise InputValidationError("No explicit structure given and basis sets not referenced in input")

    if isinstance(inp["FORCE_EVAL"], Sequence):
        raise InputValidationError(
            "Automated BASIS_SET keyword creation is not yet supported with multiple FORCE_EVALs."
            " Please explicitly reference a BASIS_SET for each KIND.")

    allowed_labels = structure.get_kind_names() + list(structure.get_symbols_set())

    for label, bset in _unpack(basissets):
        try:
            label, bstype = label.split("_", maxsplit=1)
        except ValueError:
            bstype = "ORB"

        if label not in allowed_labels:
            raise InputValidationError("Basis sets provided in calculation for kind {bset.element} ({bset.name}),"
                                       " with label {label} could not be matched to a kind in the structure".format(
                                           bset=bset, label=label))

        if "SUBSYS" not in inp["FORCE_EVAL"]:
            inp["FORCE_EVAL"]["SUBSYS"] = {}

        if "KIND" not in inp["FORCE_EVAL"]["SUBSYS"]:
            inp["FORCE_EVAL"]["SUBSYS"]["KIND"] = []

        kind_sec = next((s for s in inp["FORCE_EVAL"]["SUBSYS"]["KIND"] if s.get("_", "") == label), None)

        if not kind_sec:
            inp["FORCE_EVAL"]["SUBSYS"]["KIND"].append({"_": label})
            kind_sec = inp["FORCE_EVAL"]["SUBSYS"]["KIND"][-1]

        kind_sec["BASIS_SET"] = "{bstype} {bset.name}".format(bstype=bstype, bset=bset)
        if "ELEMENT" not in kind_sec:
            kind_sec["ELEMENT"] = bset.element


[docs]def write_basissets(inp, basissets, folder):
    """Writes the unified BASIS_SETS file with the used basissets"""
    _write_gdt(inp, basissets, folder, "BASIS_SET_FILE_NAME", "BASIS_SETS")


[docs]def validate_pseudos_namespace(pseudos, _):
    """A input_namespace validator to ensure passed down pseudopentials have the correct type."""
    return _validate_gdt_namespace(pseudos, DataFactory("gaussian.pseudo"), "pseudo")


[docs]def validate_pseudos(inp, pseudos, structure):
    """Verify that all referenced pseudos are present in the input"""

    # pylint: disable=too-many-branches

    # there can be only one pseudo per kind, thus, no _unpack
    pseudo_used = {_identifier(pseudo): 0 for _, pseudo in pseudos.items()}
    pseudo_kw_used = False

    for secpath, section in inp.param_iter(sections=True):
        # ignore non-kind sections
        if secpath[-1].upper() != "KIND":
            continue

        kind = section["_"]
        element = section.get("ELEMENT", kind)

        pname = section.get("POTENTIAL", section.get("POT"))

        if pname is None:
            # ignore kind sections without a POTENTIAL keyword (or POT alias)
            continue

        try:
            ptype, pname = pname.split(maxsplit=1)
        except ValueError:
            ptype = "GTH"

        pseudo_kw_used = True

        try:
            pseudo_used[(element, pname)] += 1
        except KeyError:
            raise InputValidationError(("'POTENTIAL {ptype} {pname}' for element {element} (from kind {kind})"
                                        " not found in pseudos input namespace").format(pname=pname,
                                                                                        ptype=ptype,
                                                                                        element=element,
                                                                                        kind=kind))

    if pseudo_kw_used:
        for (sym, name), used in pseudo_used.items():
            if not used:
                raise InputValidationError("Pseudos provided in calculation for kind {sym} ({name}),"
                                           " but not used in input".format(sym=sym, name=name))
        return

    if not structure:  # no support for COORD section (yet)
        raise InputValidationError("No explicit structure given and pseudos not referenced in input")

    if isinstance(inp["FORCE_EVAL"], Sequence):
        raise InputValidationError(
            "Automated POTENTIAL keyword creation is not yet supported with multiple FORCE_EVALs."
            " Please explicitly reference a POTENTIAL for each KIND.")

    allowed_labels = structure.get_kind_names() + list(structure.get_symbols_set())

    for label, pseudo in pseudos.items():
        if label not in allowed_labels:
            raise InputValidationError("Pseudo provided in calculation for kind {pseudo.element} ({pseudo.name}),"
                                       " with label {label} could not be matched to a kind in the structure".format(
                                           pseudo=pseudo, label=label))

        if "SUBSYS" not in inp["FORCE_EVAL"]:
            inp["FORCE_EVAL"]["SUBSYS"] = {}

        if "KIND" not in inp["FORCE_EVAL"]["SUBSYS"]:
            inp["FORCE_EVAL"]["SUBSYS"]["KIND"] = []

        kind_sec = next((s for s in inp["FORCE_EVAL"]["SUBSYS"]["KIND"] if s.get("_", "") == label), None)

        if not kind_sec:
            inp["FORCE_EVAL"]["SUBSYS"]["KIND"].append({"_": label})
            kind_sec = inp["FORCE_EVAL"]["SUBSYS"]["KIND"][-1]

        kind_sec["POTENTIAL"] = "GTH {pseudo.name}".format(pseudo=pseudo)

        if "ELEMENT" not in kind_sec:
            kind_sec["ELEMENT"] = pseudo.element


[docs]def write_pseudos(inp, pseudos, folder):
    """Writes the unified POTENTIAL file with the used pseudos"""
    _write_gdt(inp, pseudos, folder, "POTENTIAL_FILE_NAME", "POTENTIAL")