Source code for ahbicht.json_serialization.tree_schema

"""
Schemata for the JSON serialization of expressions.
"""

import sys
from typing import TYPE_CHECKING, Annotated, Any, Literal, TypeAlias, Union

from lark import Token, Tree
from pydantic import ConfigDict, PlainSerializer, TypeAdapter

if TYPE_CHECKING or sys.version_info >= (3, 12):
    from typing import TypedDict
else:
    # fixes pydantic.errors.PydanticUserError:
    # Please use `typing_extensions.TypedDict` instead of `typing.TypedDict` on Python < 3.12.
    from typing_extensions import TypedDict


# For both of the serialization behaviours: I don't know anymore WHY we chose to do it that way,
# but at this point we're just maintaining backward compatability in the pydantic world with the marshmallow past.

_TokenDict: TypeAlias = dict[Literal["value", "type"], Any]
_TreeDict: TypeAlias = dict[Literal["children", "type"], Any]


class _TreeOrTokenDictWithToken(TypedDict):
    token: _TokenDict
    tree: None


class _TreeOrTokenDictWithTree(TypedDict):
    token: None
    tree: _TreeDict


_TreeOrTokenDict: TypeAlias = Union[_TreeOrTokenDictWithToken, _TreeOrTokenDictWithTree]


def _serialize_children(t: Union[Tree[Token], Token]) -> Union[_TokenDict, _TreeDict]:
    if isinstance(t, Tree):
        return TREE_ADAPTER.dump_python(t, mode="json")  # type: ignore[no-any-return]
    if isinstance(t, Token):
        return TOKEN_ADAPTER.dump_python(t, mode="json")  # type: ignore[no-any-return]
    raise ValueError(f"Unsupported type {t.__class__.__name__}")


def _serialize_tree(tree: Tree[Token]) -> _TreeOrTokenDictWithTree:
    return {"token": None, "tree": {"type": tree.data, "children": [_serialize_children(c) for c in tree.children]}}


def _serialize_token(token: Token) -> _TreeOrTokenDictWithToken:
    return {"tree": None, "token": {"value": token.value, "type": token.type}}


TOKEN_ADAPTER: TypeAdapter[Token] = TypeAdapter(
    Annotated[Token, PlainSerializer(_serialize_token)], config=ConfigDict(arbitrary_types_allowed=True)
)

TREE_ADAPTER: TypeAdapter[Tree[Token]] = TypeAdapter(
    Annotated[Tree[Token], PlainSerializer(_serialize_tree)], config=ConfigDict(arbitrary_types_allowed=True)
)


[docs] def model_dump_tree( tree: Tree[Token], mode: Literal["json", "concise", "compress-conditions-only"] = "json" ) -> dict[str, Any]: """ahbicht v1 replacement for the removed TreeSchema""" result: dict[str, Any] = TREE_ADAPTER.dump_python(tree, mode="json") if mode == "json": return result["tree"] # type: ignore[no-any-return] if mode == "concise": return _compress(result["tree"]) # type: ignore[no-any-return] if mode == "compress-conditions-only": return _compress_condition_keys_only(result["tree"]) raise ValueError(f"Unsupported mode {mode}")
def _compress_condition_keys_only(data: dict[str, Any]) -> dict[str, Any]: """ a function that merges a condition key node with its only child (a token that has an int value) """ # this has been found heuristically. There's no way to explain it, just follow the test cases. # there's probably a much easier way, e.g. by using a separate token schema. if "tree" in data and data["tree"] is not None: if "type" in data["tree"]: if data["tree"]["type"] == "single_requirement_indicator_expression": if data["tree"]["children"][0]["token"]["type"] == "MODAL_MARK": modal_mark = data["tree"]["children"][0]["token"]["value"] del data["tree"]["children"][0] data["tree"]["type"] = modal_mark elif data["tree"]["children"][0]["token"]["type"] == "PREFIX_OPERATOR": prefix_operator = data["tree"]["children"][0]["token"]["value"] del data["tree"]["children"][0] data["tree"]["type"] = prefix_operator if data["tree"]["type"] == "condition": return { "token": {"value": data["tree"]["children"][0]["token"]["value"], "type": "condition_key"}, "tree": None, } if "token" in data and data["token"] is None and "children" in data["tree"]: data["tree"]["children"] = [_compress_condition_keys_only(child) for child in data["tree"]["children"]] if "type" in data and data["type"] is not None and "children" in data and data["children"] is not None: data["children"] = [_compress_condition_keys_only(child) for child in data["children"]] return data def _compress(data: Any) -> Any: """ a function that "throws away" unnecessary data. The price we pay is that we loose the ability to easily deserialize the result. But if we're only interested in a simple tree that's fine. """ if ( "children" in data and "type" in data and (data["type"].endswith("_composition") or data["type"].endswith("_expression")) ): return {data["type"]: [_compress(child) for child in data["children"]]} if "tree" in data and "token" in data and data["token"] is None: return _compress(data["tree"]) if "tree" in data and "token" in data and data["tree"] is None: return _compress(data["token"]) if "type" in data and "children" in data: # and data["type"] in {"MODAL_MARK", "condition_key"}: return _compress(data["children"][0]["token"]["value"]) if "type" in data and data["type"] in {"MODAL_MARK"}: return data["value"] return data __all__ = ["model_dump_tree"]