Source code for ahbicht.expressions.condition_expression_parser
"""
This module parses a condition expression like "[59] U ([123] O [456])" into a tree structure using
the parsing library lark: https://lark-parser.readthedocs.io/en/latest/
The used terms are defined in the README_conditions.md.
"""
# pylint:disable=cyclic-import
from __future__ import annotations
from functools import lru_cache
from typing import TYPE_CHECKING, Optional, Union
from lark import Lark, Token, Tree
from lark.exceptions import UnexpectedCharacters, UnexpectedEOF
from ahbicht.condition_node_distinction import derive_condition_node_type
from ahbicht.expressions import parsing_logger
from ahbicht.expressions.sanitizer import sanitize_expression
from ahbicht.models.categorized_key_extract import CategorizedKeyExtract
from ahbicht.models.condition_node_type import ConditionNodeType
from ahbicht.utility_functions import tree_copy
if TYPE_CHECKING:
from ahbicht.content_evaluation.ahb_context import AhbContext
GRAMMAR = r"""
?expression: expression "O"i expression -> or_composition
| expression "∨" expression -> or_composition // the logical or
| expression "V"i expression -> or_composition // a 'v' for those who first chose to introduce logical symbols like ∨ but now can't find them on their keyboard
| expression "X"i expression -> xor_composition
| expression "⊻" expression -> xor_composition
| expression "U"i expression -> and_composition
| expression "∧" expression -> and_composition
| expression expression -> then_also_composition
| brackets
| package
| condition
| time_condition
?brackets: "(" expression ")"
time_condition: "[" TIME_CONDITION_KEY "]" // a rule for point in time-conditions
package: "[" PACKAGE_KEY REPEATABILITY? "]" // a rule for packages
condition: "[" CONDITION_KEY "]" // a rule for condition keys
TIME_CONDITION_KEY: /UB(1|2|3)/ // a terminal for "übergreifende Bedingungen für Zeitpunktangaben"
CONDITION_KEY: INT // a TERMINAL for all the remaining ints (lower priority)
REPEATABILITY: /\d+\.{2}(?:([1-9]\d*)|n)/ // a terminal for repetitions n..m with n>=0 and m>n or m=="n"
PACKAGE_KEY: INT "P" // a TERMINAL for all INTs followed by "P" (high priority)
%import common.INT
%import common.WS
%ignore WS // WS = whitespace
"""
_parser = Lark(GRAMMAR, start="expression")
@tree_copy
@lru_cache(maxsize=1024)
def parse_condition_expression_to_tree(condition_expression: str) -> Tree[Token]:
"""
Parse a given condition expression with the help of the here defined grammar to a lark tree.
The grammar starts with condition keys, e.g. [45] and combines them with
and _/or_compositions corresponding to U/O operators or without an operator (then_also_composition).
It follows the boolean logic 'brackets before `then_also` before `and` before `xor` before `or`'.
Whitespaces are ignored.
:param condition_expression: str, e.g. '[45]U[502]O[1][906]'
:return parsed_tree: Tree
"""
try:
condition_expression = sanitize_expression(condition_expression)
parsed_tree = _parser.parse(condition_expression)
parsing_logger.debug("Successfully parsed '%s' as condition expression", condition_expression)
except (UnexpectedEOF, UnexpectedCharacters, TypeError) as eof:
parsing_logger.warning(
"The condition expression is syntactically incorrect: '%s'", condition_expression, exc_info=eof
)
raise SyntaxError(f"""
condition expression: {condition_expression}
Please make sure that:
* all conditions have the form [INT]
* all packages have the form [INTPn..m]
* no conditions are empty
* all compositions are combined by operators 'U'/'O'/'X' or without an operator
* all open brackets are closed again and vice versa
""") from eof
return parsed_tree
[docs]
def extract_categorized_keys_from_tree(
tree_or_list: Union[Tree[Token], list[str]], sanitize: bool = False
) -> CategorizedKeyExtract:
"""
find different types of condition nodes inside the given tree or list of keys.
The types are differentiated by their number range.
See 'Allgemeine Festlegungen' from EDI@Energy.
"""
result = CategorizedKeyExtract(
format_constraint_keys=[],
requirement_constraint_keys=[],
hint_keys=[],
package_keys=[],
time_condition_keys=[],
)
condition_keys: list[str]
if isinstance(tree_or_list, list):
condition_keys = tree_or_list
elif isinstance(tree_or_list, Tree):
condition_keys = [
x.value
for x in tree_or_list.scan_values(lambda token: token.type == "CONDITION_KEY") # type: ignore[union-attr]
]
result.package_keys = [
x.value
for x in tree_or_list.scan_values(lambda token: token.type == "PACKAGE_KEY") # type: ignore[union-attr]
]
result.time_condition_keys = [
x.value
for x in tree_or_list.scan_values(
lambda token: token.type == "TIME_CONDITION_KEY" # type: ignore[union-attr]
)
]
else:
raise ValueError(f"{tree_or_list} is neither a list nor a {Tree.__name__}")
for condition_key in condition_keys:
condition_node_type = derive_condition_node_type(condition_key)
if (
condition_node_type is ConditionNodeType.REQUIREMENT_CONSTRAINT
or condition_node_type is ConditionNodeType.REPEATABILITY_CONSTRAINT
or condition_node_type is ConditionNodeType.PACKAGE_REPEATABILITY
):
result.requirement_constraint_keys.append(condition_key)
elif condition_node_type is ConditionNodeType.HINT:
result.hint_keys.append(condition_key)
elif condition_node_type is ConditionNodeType.FORMAT_CONSTRAINT:
result.format_constraint_keys.append(condition_key)
else:
# if you run into the ConditionNodeType.PACKAGE case, you probably forgot to enable the package resolving
# in the parsing step (although the error occurs during evaluation)
raise NotImplementedError(f"The type '{condition_node_type}' is not implemented yet.")
if sanitize:
result.sanitize()
return result
[docs]
async def extract_categorized_keys(
condition_expression: str,
resolve_packages: bool = False,
resolve_time_conditions: bool = False,
replace_time_conditions: bool = False,
ahb_context: Optional[AhbContext] = None,
) -> CategorizedKeyExtract:
"""
Parses the given condition expression and returns CategorizedKeyExtract as a template for content
evaluation.
:param ahb_context: required when resolve_packages=True; provides the package resolver
"""
# because of
# ImportError: cannot import name 'parse_condition_expression_to_tree' from partially initialized module
# 'ahbicht.expressions.condition_expression_parser' (most likely due to a circular import)
# pylint: disable=import-outside-toplevel
from ahbicht.expressions.expression_resolver import parse_expression_including_unresolved_subexpressions
tree = await parse_expression_including_unresolved_subexpressions(
condition_expression,
resolve_packages=resolve_packages,
resolve_time_conditions=resolve_time_conditions,
replace_time_conditions=replace_time_conditions,
ahb_context=ahb_context,
)
return extract_categorized_keys_from_tree(tree, sanitize=True)