Source code for ahbicht.json_serialization.tree_schema

"""
Schemata for the JSON serialization of expressions.
"""

import sys
from typing import TYPE_CHECKING, Annotated, Any, Literal, TypeAlias, Union

from lark import Token, Tree
from pydantic import ConfigDict, PlainSerializer, TypeAdapter

if TYPE_CHECKING or sys.version_info >= (3, 12):
    from typing import TypedDict
else:
    # fixes pydantic.errors.PydanticUserError:
    # Please use `typing_extensions.TypedDict` instead of `typing.TypedDict` on Python < 3.12.
    from typing_extensions import TypedDict


# For both of the serialization behaviours: I don't know anymore WHY we chose to do it that way,
# but at this point we're just maintaining backward compatability in the pydantic world with the marshmallow past.

_TokenDict: TypeAlias = dict[Literal["value", "type"], Any]
_TreeDict: TypeAlias = dict[Literal["children", "type"], Any]


class _TreeOrTokenDictWithToken(TypedDict):
    token: _TokenDict
    tree: None


class _TreeOrTokenDictWithTree(TypedDict):
    token: None
    tree: _TreeDict


_TreeOrTokenDict: TypeAlias = Union[_TreeOrTokenDictWithToken, _TreeOrTokenDictWithTree]


def _serialize_children(t: Union[Tree[Token], Token]) -> Union[_TokenDict, _TreeDict]:
    if isinstance(t, Tree):
        return TREE_ADAPTER.dump_python(t, mode="json")  # type: ignore[no-any-return]
    if isinstance(t, Token):
        return TOKEN_ADAPTER.dump_python(t, mode="json")  # type: ignore[no-any-return]
    raise ValueError(f"Unsupported type {t.__class__.__name__}")


def _serialize_tree(tree: Tree[Token]) -> _TreeOrTokenDictWithTree:
    return {"token": None, "tree": {"type": tree.data, "children": [_serialize_children(c) for c in tree.children]}}


def _serialize_token(token: Token) -> _TreeOrTokenDictWithToken:
    return {"tree": None, "token": {"value": token.value, "type": token.type}}


TOKEN_ADAPTER: TypeAdapter[Token] = TypeAdapter(
    Annotated[Token, PlainSerializer(_serialize_token)], config=ConfigDict(arbitrary_types_allowed=True)
)

TREE_ADAPTER: TypeAdapter[Tree[Token]] = TypeAdapter(
    Annotated[Tree[Token], PlainSerializer(_serialize_tree)], config=ConfigDict(arbitrary_types_allowed=True)
)



[docs]
def model_dump_tree(
    tree: Tree[Token], mode: Literal["json", "concise", "compress-conditions-only"] = "json"
) -> dict[str, Any]:
    """ahbicht v1 replacement for the removed TreeSchema"""
    result: dict[str, Any] = TREE_ADAPTER.dump_python(tree, mode="json")
    if mode == "json":
        return result["tree"]  # type: ignore[no-any-return]
    if mode == "concise":
        return _compress(result["tree"])  # type: ignore[no-any-return]
    if mode == "compress-conditions-only":
        return _compress_condition_keys_only(result["tree"])
    raise ValueError(f"Unsupported mode {mode}")



def _compress_condition_keys_only(data: dict[str, Any]) -> dict[str, Any]:
    """
    a function that merges a condition key node with its only child (a token that has an int value)
    """
    # this has been found heuristically. There's no way to explain it, just follow the test cases.
    # there's probably a much easier way, e.g. by using a separate token schema.
    if "tree" in data and data["tree"] is not None:
        if "type" in data["tree"]:
            if data["tree"]["type"] == "single_requirement_indicator_expression":
                if data["tree"]["children"][0]["token"]["type"] == "MODAL_MARK":
                    modal_mark = data["tree"]["children"][0]["token"]["value"]
                    del data["tree"]["children"][0]
                    data["tree"]["type"] = modal_mark
                elif data["tree"]["children"][0]["token"]["type"] == "PREFIX_OPERATOR":
                    prefix_operator = data["tree"]["children"][0]["token"]["value"]
                    del data["tree"]["children"][0]
                    data["tree"]["type"] = prefix_operator
        if data["tree"]["type"] == "condition":
            return {
                "token": {"value": data["tree"]["children"][0]["token"]["value"], "type": "condition_key"},
                "tree": None,
            }
        if "token" in data and data["token"] is None and "children" in data["tree"]:
            data["tree"]["children"] = [_compress_condition_keys_only(child) for child in data["tree"]["children"]]
    if "type" in data and data["type"] is not None and "children" in data and data["children"] is not None:
        data["children"] = [_compress_condition_keys_only(child) for child in data["children"]]
    return data


def _compress(data: Any) -> Any:
    """
    a function that "throws away" unnecessary data.
    The price we pay is that we loose the ability to easily deserialize the result.
    But if we're only interested in a simple tree that's fine.
    """
    if (
        "children" in data
        and "type" in data
        and (data["type"].endswith("_composition") or data["type"].endswith("_expression"))
    ):
        return {data["type"]: [_compress(child) for child in data["children"]]}
    if "tree" in data and "token" in data and data["token"] is None:
        return _compress(data["tree"])
    if "tree" in data and "token" in data and data["tree"] is None:
        return _compress(data["token"])
    if "type" in data and "children" in data:  # and data["type"] in {"MODAL_MARK", "condition_key"}:
        return _compress(data["children"][0]["token"]["value"])
    if "type" in data and data["type"] in {"MODAL_MARK"}:
        return data["value"]
    return data


__all__ = ["model_dump_tree"]