diff --git a/uncoder-core/app/translator/core/custom_types/values.py b/uncoder-core/app/translator/core/custom_types/values.py index 440123dd..b97a922e 100644 --- a/uncoder-core/app/translator/core/custom_types/values.py +++ b/uncoder-core/app/translator/core/custom_types/values.py @@ -10,7 +10,7 @@ class ValueType(CustomEnum): no_quotes_value = "no_q_value" bool_value = "bool_value" regex_value = "re_value" - greater_than_or_equal = "gte_value" - less_than_or_equal = "lte_value" + gte_value = "gte_value" + lte_value = "lte_value" multi_value = "multi_value" - ip = "ip" + ip_value = "ip_value" diff --git a/uncoder-core/app/translator/core/render.py b/uncoder-core/app/translator/core/render.py index 97709dd0..72b185cf 100644 --- a/uncoder-core/app/translator/core/render.py +++ b/uncoder-core/app/translator/core/render.py @@ -76,7 +76,7 @@ def _get_value_type(field_name: str, value: Union[int, str, StrValue], value_typ return value_type or ValueType.value @staticmethod - def _wrap_str_value(value: str) -> str: + def _wrap_str_value(value: str, value_type: str = ValueType.value) -> str: # noqa: ARG004 return value @staticmethod @@ -98,10 +98,10 @@ def _pre_process_value( value_type = self._get_value_type(field, value, value_type) if isinstance(value, StrValue): value = self.str_value_manager.from_container_to_str(value, value_type) - return self._wrap_str_value(value) if wrap_str else value + return self._wrap_str_value(value, value_type) if wrap_str else value if isinstance(value, str): value = self.str_value_manager.escape_manager.escape(value, value_type) - return self._wrap_str_value(value) if wrap_str else value + return self._wrap_str_value(value, value_type) if wrap_str else value if isinstance(value, bool): return self._map_bool_value(value) if isinstance(value, int): @@ -428,14 +428,18 @@ def _generate_from_tokenized_query_container_by_source_mapping( self, query_container: TokenizedQueryContainer, source_mapping: SourceMapping ) -> str: unmapped_fields = self.mappings.check_fields_mapping_existence( - query_container.meta_info.query_fields, source_mapping + query_container.meta_info.query_fields, + query_container.meta_info.function_fields_map, + self.platform_functions.manager.supported_render_names, + source_mapping, ) rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping) prefix = self.generate_prefix(source_mapping.log_source_signature, rendered_functions.rendered_prefix) if source_mapping.raw_log_fields: defined_raw_log_fields = self.generate_raw_log_fields( - fields=query_container.meta_info.query_fields, source_mapping=source_mapping + fields=query_container.meta_info.query_fields + query_container.meta_info.function_fields, + source_mapping=source_mapping, ) prefix += f"\n{defined_raw_log_fields}" query = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping) diff --git a/uncoder-core/app/translator/core/str_value_manager.py b/uncoder-core/app/translator/core/str_value_manager.py index 1151d7a3..517ee71d 100644 --- a/uncoder-core/app/translator/core/str_value_manager.py +++ b/uncoder-core/app/translator/core/str_value_manager.py @@ -130,6 +130,14 @@ def has_spec_symbols(self) -> bool: return any(isinstance(el, BaseSpecSymbol) for el in self.split_value) +RE_STR_ALPHA_NUM_SYMBOLS_MAP = { + "b": ReWordBoundarySymbol, + "w": ReWordSymbol, + "d": ReDigitalSymbol, + "s": ReWhiteSpaceSymbol, +} + + RE_STR_SPEC_SYMBOLS_MAP = { "?": ReZeroOrOneQuantifier, "*": ReZeroOrMoreQuantifier, @@ -189,7 +197,7 @@ def from_str_to_container( ) -> StrValue: return StrValue(value=value, split_value=[value]) - def from_re_str_to_container(self, value: str) -> StrValue: + def from_re_str_to_container(self, value: str, value_type: str = ValueType.regex_value) -> StrValue: # noqa: ARG002 split = [] prev_char = None inside_curly_brackets = False diff --git a/uncoder-core/app/translator/platforms/anomali/renders/anomali.py b/uncoder-core/app/translator/platforms/anomali/renders/anomali.py index 6f9e89f6..4c8a76f5 100644 --- a/uncoder-core/app/translator/platforms/anomali/renders/anomali.py +++ b/uncoder-core/app/translator/platforms/anomali/renders/anomali.py @@ -22,10 +22,10 @@ from app.translator.managers import render_manager from app.translator.platforms.anomali.const import anomali_query_details from app.translator.platforms.anomali.mapping import AnomaliMappings, anomali_query_mappings -from app.translator.platforms.base.sql.renders.sql import SqlFieldValueRender +from app.translator.platforms.base.sql.renders.sql import SQLFieldValueRender -class AnomaliFieldValueRender(SqlFieldValueRender): +class AnomaliFieldValueRender(SQLFieldValueRender): details: PlatformDetails = anomali_query_details def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: diff --git a/uncoder-core/app/translator/platforms/athena/parsers/athena.py b/uncoder-core/app/translator/platforms/athena/parsers/athena.py index 9e2bd555..2055e362 100644 --- a/uncoder-core/app/translator/platforms/athena/parsers/athena.py +++ b/uncoder-core/app/translator/platforms/athena/parsers/athena.py @@ -20,11 +20,11 @@ from app.translator.managers import parser_manager from app.translator.platforms.athena.const import athena_query_details from app.translator.platforms.athena.mapping import AthenaMappings, athena_query_mappings -from app.translator.platforms.base.sql.parsers.sql import SqlQueryParser +from app.translator.platforms.base.sql.parsers.sql import SQLQueryParser @parser_manager.register_supported_by_roota -class AthenaQueryParser(SqlQueryParser): +class AthenaQueryParser(SQLQueryParser): details: PlatformDetails = athena_query_details mappings: AthenaMappings = athena_query_mappings query_delimiter_pattern = r"\sFROM\s\S*\sWHERE\s" diff --git a/uncoder-core/app/translator/platforms/athena/renders/athena.py b/uncoder-core/app/translator/platforms/athena/renders/athena.py index 2b431af2..aa606742 100644 --- a/uncoder-core/app/translator/platforms/athena/renders/athena.py +++ b/uncoder-core/app/translator/platforms/athena/renders/athena.py @@ -21,15 +21,15 @@ from app.translator.managers import render_manager from app.translator.platforms.athena.const import athena_query_details from app.translator.platforms.athena.mapping import AthenaMappings, athena_query_mappings -from app.translator.platforms.base.sql.renders.sql import SqlFieldValueRender, SqlQueryRender +from app.translator.platforms.base.sql.renders.sql import SQLFieldValueRender, SQLQueryRender -class AthenaFieldValueRender(SqlFieldValueRender): +class AthenaFieldValueRender(SQLFieldValueRender): details: PlatformDetails = athena_query_details @render_manager.register -class AthenaQueryRender(SqlQueryRender): +class AthenaQueryRender(SQLQueryRender): details: PlatformDetails = athena_query_details mappings: AthenaMappings = athena_query_mappings diff --git a/uncoder-core/app/translator/platforms/base/aql/renders/aql.py b/uncoder-core/app/translator/platforms/base/aql/renders/aql.py index 58fbc3ff..6964b2db 100644 --- a/uncoder-core/app/translator/platforms/base/aql/renders/aql.py +++ b/uncoder-core/app/translator/platforms/base/aql/renders/aql.py @@ -31,7 +31,7 @@ class AQLFieldValueRender(BaseFieldValueRender): str_value_manager = aql_str_value_manager @staticmethod - def _wrap_str_value(value: str) -> str: + def _wrap_str_value(value: str, value_type: str = ValueType.value) -> str: # noqa: ARG004 return f"'{value}'" def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: diff --git a/uncoder-core/app/translator/platforms/base/aql/str_value_manager.py b/uncoder-core/app/translator/platforms/base/aql/str_value_manager.py index 2f13931b..847be3bd 100644 --- a/uncoder-core/app/translator/platforms/base/aql/str_value_manager.py +++ b/uncoder-core/app/translator/platforms/base/aql/str_value_manager.py @@ -23,12 +23,9 @@ from app.translator.core.custom_types.values import ValueType from app.translator.core.str_value_manager import ( CONTAINER_SPEC_SYMBOLS_MAP, + RE_STR_ALPHA_NUM_SYMBOLS_MAP, RE_STR_SPEC_SYMBOLS_MAP, BaseSpecSymbol, - ReDigitalSymbol, - ReWhiteSpaceSymbol, - ReWordBoundarySymbol, - ReWordSymbol, SingleSymbolWildCard, StrValue, StrValueManager, @@ -43,12 +40,7 @@ class AQLStrValueManager(StrValueManager): escape_manager = aql_escape_manager container_spec_symbols_map: ClassVar[dict[type[BaseSpecSymbol], str]] = AQL_CONTAINER_SPEC_SYMBOLS_MAP - re_str_alpha_num_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = { - "b": ReWordBoundarySymbol, - "w": ReWordSymbol, - "d": ReDigitalSymbol, - "s": ReWhiteSpaceSymbol, - } + re_str_alpha_num_symbols_map = RE_STR_ALPHA_NUM_SYMBOLS_MAP re_str_spec_symbols_map = RE_STR_SPEC_SYMBOLS_MAP str_spec_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = { "_": SingleSymbolWildCard, @@ -78,7 +70,7 @@ def from_str_to_container( return StrValue(value, self._concat(split)) - def from_re_str_to_container(self, value: str) -> StrValue: + def from_re_str_to_container(self, value: str, value_type: str = ValueType.regex_value) -> StrValue: # noqa: ARG002 value = value.replace("''", "'") return super().from_re_str_to_container(value) diff --git a/uncoder-core/app/translator/platforms/base/aql/tokenizer.py b/uncoder-core/app/translator/platforms/base/aql/tokenizer.py index ff04be20..83c6e30f 100644 --- a/uncoder-core/app/translator/platforms/base/aql/tokenizer.py +++ b/uncoder-core/app/translator/platforms/base/aql/tokenizer.py @@ -17,7 +17,7 @@ """ import re -from typing import ClassVar, Optional, Union +from typing import Any, ClassVar, Optional, Union from app.translator.core.custom_types.tokens import OperatorType from app.translator.core.custom_types.values import ValueType @@ -27,7 +27,6 @@ from app.translator.core.models.query_tokens.function_value import FunctionValue from app.translator.core.models.query_tokens.identifier import Identifier from app.translator.core.models.query_tokens.keyword import Keyword -from app.translator.core.str_value_manager import StrValue from app.translator.core.tokenizer import QueryTokenizer from app.translator.platforms.base.aql.const import ( DOUBLE_QUOTES_FIELD_NAME_PATTERN, @@ -75,12 +74,13 @@ def should_process_value_wildcards(operator: Optional[str]) -> bool: def get_operator_and_value( self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None - ) -> tuple[str, StrValue]: + ) -> tuple[str, Any]: if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None: - return mapped_operator, StrValue(num_value, split_value=[num_value]) + return mapped_operator, num_value if (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None: - return mapped_operator, StrValue(bool_value, split_value=[bool_value]) + mapped_bool_value = bool_value == "true" + return mapped_operator, mapped_bool_value if (s_q_value := get_match_group(match, group_name=ValueType.single_quotes_value)) is not None: if mapped_operator == OperatorType.REGEX: diff --git a/uncoder-core/app/translator/platforms/base/lucene/escape_manager.py b/uncoder-core/app/translator/platforms/base/lucene/escape_manager.py index 8c9f0164..215f335c 100644 --- a/uncoder-core/app/translator/platforms/base/lucene/escape_manager.py +++ b/uncoder-core/app/translator/platforms/base/lucene/escape_manager.py @@ -10,7 +10,7 @@ class LuceneEscapeManager(EscapeManager): ValueType.value: [ EscapeDetails(pattern=r'([_!@#$%^&*=+()\[\]{}|;:\'",.<>?/`~\-\s\\])', escape_symbols=r"\\\1") ], - ValueType.ip: [EscapeDetails(pattern=r"([/])", escape_symbols=r"\\\1")], + ValueType.ip_value: [EscapeDetails(pattern=r"([/])", escape_symbols=r"\\\1")], } diff --git a/uncoder-core/app/translator/platforms/base/lucene/renders/lucene.py b/uncoder-core/app/translator/platforms/base/lucene/renders/lucene.py index f8511d82..05d07ad2 100644 --- a/uncoder-core/app/translator/platforms/base/lucene/renders/lucene.py +++ b/uncoder-core/app/translator/platforms/base/lucene/renders/lucene.py @@ -34,7 +34,7 @@ class LuceneFieldValueRender(BaseFieldValueRender): def _get_value_type(field_name: str, value: Union[int, str, StrValue], value_type: Optional[str] = None) -> str: # noqa: ARG004 is_ip_field = field_name and (field_name.endswith(".ip") or field_name.endswith(".address")) if is_ip_field and value_type != ValueType.regex_value: - return ValueType.ip + return ValueType.ip_value return ValueType.value diff --git a/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py b/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py index 8be19ffe..84a5de4f 100644 --- a/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py +++ b/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py @@ -52,8 +52,8 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin): rf"(?P<{ValueType.no_quotes_value}>(?:[a-zA-Z\*\?0-9=%#№!;_/,\'\.$@|]|\\[*?\"-_=%#№!;,\.$@/\s\\])+)\s*" ) re_value_pattern = rf"/(?P<{ValueType.regex_value}>(?:[:a-zA-Z\*\?0-9=+%#№;\\\-_\,\"\'\.$&^@!\(\)\{{\}}\[\]\s?<>|]|\\\/)+)/(?=\s+|\)|$)" # noqa: E501 - gte_value_pattern = rf"\[\s*(?P<{ValueType.greater_than_or_equal}>{_num_value_pattern})\s+TO\s+\*\s*\]" - lte_value_pattern = rf"\[\s*\*\s+TO\s+(?P<{ValueType.less_than_or_equal}>{_num_value_pattern})\s*\]" + gte_value_pattern = rf"\[\s*(?P<{ValueType.gte_value}>{_num_value_pattern})\s+TO\s+\*\s*\]" + lte_value_pattern = rf"\[\s*\*\s+TO\s+(?P<{ValueType.lte_value}>{_num_value_pattern})\s*\]" range_value_pattern = rf"{gte_value_pattern}|{lte_value_pattern}" _value_pattern = rf"{num_value_pattern}|{re_value_pattern}|{no_quotes_value_pattern}|{double_quotes_value_pattern}|{range_value_pattern}" # noqa: E501 keyword_pattern = ( @@ -97,10 +97,10 @@ def get_operator_and_value( # noqa: PLR0911 if (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: return mapped_operator, lucene_str_value_manager.from_str_to_container(d_q_value) - if (gte_value := get_match_group(match, group_name=ValueType.greater_than_or_equal)) is not None: + if (gte_value := get_match_group(match, group_name=ValueType.gte_value)) is not None: return OperatorType.GTE, StrValue(gte_value, split_value=[gte_value]) - if (lte_value := get_match_group(match, group_name=ValueType.less_than_or_equal)) is not None: + if (lte_value := get_match_group(match, group_name=ValueType.lte_value)) is not None: return OperatorType.LTE, StrValue(lte_value, split_value=[lte_value]) return super().get_operator_and_value(match, mapped_operator, operator) diff --git a/uncoder-core/app/translator/platforms/base/spl/escape_manager.py b/uncoder-core/app/translator/platforms/base/spl/escape_manager.py index 9b7e0154..315d57f2 100644 --- a/uncoder-core/app/translator/platforms/base/spl/escape_manager.py +++ b/uncoder-core/app/translator/platforms/base/spl/escape_manager.py @@ -5,8 +5,8 @@ from app.translator.core.models.escape_details import EscapeDetails -class SplEscapeManager(EscapeManager): +class SPLEscapeManager(EscapeManager): escape_map: ClassVar[dict[str, list[EscapeDetails]]] = {ValueType.value: [EscapeDetails(pattern=r"([<>=\"'\|\\])")]} -spl_escape_manager = SplEscapeManager() +spl_escape_manager = SPLEscapeManager() diff --git a/uncoder-core/app/translator/platforms/base/spl/parsers/spl.py b/uncoder-core/app/translator/platforms/base/spl/parsers/spl.py index 7818b4ac..fc34b551 100644 --- a/uncoder-core/app/translator/platforms/base/spl/parsers/spl.py +++ b/uncoder-core/app/translator/platforms/base/spl/parsers/spl.py @@ -21,19 +21,19 @@ from app.translator.core.models.functions.base import ParsedFunctions from app.translator.core.models.query_container import RawQueryContainer, TokenizedQueryContainer from app.translator.core.parser import PlatformQueryParser -from app.translator.platforms.base.spl.functions import SplFunctions -from app.translator.platforms.base.spl.tokenizer import SplTokenizer +from app.translator.platforms.base.spl.functions import SPLFunctions +from app.translator.platforms.base.spl.tokenizer import SPLTokenizer TSTATS_FUNC = "tstats" -class SplQueryParser(PlatformQueryParser): +class SPLQueryParser(PlatformQueryParser): log_source_pattern = r"^___source_type___\s*=\s*(?:\"(?P[%a-zA-Z_*:0-9\-/]+)\"|(?P[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?" # noqa: E501 - rule_name_pattern = r"`(?P(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s])*)`" + rule_name_pattern = r"`(?P(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s])*)`" # noqa: RUF001 log_source_key_types = ("index", "source", "sourcetype", "sourcecategory") - platform_functions: SplFunctions = None - tokenizer = SplTokenizer() + platform_functions: SPLFunctions = None + tokenizer = SPLTokenizer() wrapped_with_comment_pattern = r"^\s*```(?:|\n|.)*```" @@ -56,7 +56,7 @@ def _parse_log_sources(self, query: str) -> tuple[dict[str, list[str]], str]: def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]], ParsedFunctions]: if re.match(self.rule_name_pattern, query): search = re.search(self.rule_name_pattern, query, flags=re.IGNORECASE) - query = query[:search.start()] + query[search.end():] + query = query[: search.start()] + query[search.end() :] query = query.strip() log_sources, query = self._parse_log_sources(query) query, functions = self.platform_functions.parse(query) @@ -72,9 +72,13 @@ def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContain query, log_sources, functions = self._parse_query(raw_query_container.query) query_tokens = self.get_query_tokens(query) - field_tokens = self.get_field_tokens(query_tokens, functions.functions) - source_mappings = self.get_source_mappings(field_tokens, log_sources) + query_field_tokens, function_field_tokens, function_field_tokens_map = self.get_field_tokens( + query_tokens, functions.functions + ) + source_mappings = self.get_source_mappings(query_field_tokens + function_field_tokens, log_sources) meta_info = raw_query_container.meta_info - meta_info.query_fields = field_tokens + meta_info.query_fields = query_field_tokens + meta_info.function_fields = function_field_tokens + meta_info.function_fields_map = function_field_tokens_map meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings] return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info, functions=functions) diff --git a/uncoder-core/app/translator/platforms/base/spl/renders/spl.py b/uncoder-core/app/translator/platforms/base/spl/renders/spl.py index c3c36675..2246fcbe 100644 --- a/uncoder-core/app/translator/platforms/base/spl/renders/spl.py +++ b/uncoder-core/app/translator/platforms/base/spl/renders/spl.py @@ -26,11 +26,11 @@ from app.translator.platforms.base.spl.str_value_manager import spl_str_value_manager -class SplFieldValueRender(BaseFieldValueRender): +class SPLFieldValueRender(BaseFieldValueRender): str_value_manager = spl_str_value_manager @staticmethod - def _wrap_str_value(value: str) -> str: + def _wrap_str_value(value: str, value_type: str = ValueType.value) -> str: # noqa: ARG004 return f'"{value}"' def _pre_process_value( @@ -42,7 +42,7 @@ def _pre_process_value( wrap_int: bool = False, # noqa: ARG002 ) -> Union[int, str]: value = super()._pre_process_value(field, value, value_type=value_type, wrap_str=wrap_str) - return self._wrap_str_value(str(value)) if not isinstance(value, str) else value + return self._wrap_str_value(str(value), value_type) if not isinstance(value, str) else value def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): @@ -87,7 +87,7 @@ def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: return f"{self._pre_process_value(field, value, wrap_str=True)}" -class SplQueryRender(PlatformQueryRender): +class SPLQueryRender(PlatformQueryRender): or_token = "OR" and_token = "AND" not_token = "NOT" diff --git a/uncoder-core/app/translator/platforms/base/spl/str_value_manager.py b/uncoder-core/app/translator/platforms/base/spl/str_value_manager.py index ef638d6c..3ee78fe3 100644 --- a/uncoder-core/app/translator/platforms/base/spl/str_value_manager.py +++ b/uncoder-core/app/translator/platforms/base/spl/str_value_manager.py @@ -23,7 +23,7 @@ from app.translator.platforms.base.spl.escape_manager import spl_escape_manager -class SplStrValueManager(StrValueManager): +class SPLStrValueManager(StrValueManager): escape_manager = spl_escape_manager str_spec_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = {"*": UnboundLenWildCard} @@ -58,4 +58,4 @@ def from_str_to_container( return StrValue(self.escape_manager.remove_escape(value), self._concat(split)) -spl_str_value_manager = SplStrValueManager() +spl_str_value_manager = SPLStrValueManager() diff --git a/uncoder-core/app/translator/platforms/base/spl/tokenizer.py b/uncoder-core/app/translator/platforms/base/spl/tokenizer.py index 20133239..0434f121 100644 --- a/uncoder-core/app/translator/platforms/base/spl/tokenizer.py +++ b/uncoder-core/app/translator/platforms/base/spl/tokenizer.py @@ -33,7 +33,7 @@ from app.translator.tools.utils import get_match_group -class SplTokenizer(QueryTokenizer, ANDLogicOperatorMixin): +class SPLTokenizer(QueryTokenizer, ANDLogicOperatorMixin): single_value_operators_map: ClassVar[dict[str, str]] = { "=": OperatorType.EQ, "<=": OperatorType.LTE, diff --git a/uncoder-core/app/translator/platforms/base/sql/parsers/sql.py b/uncoder-core/app/translator/platforms/base/sql/parsers/sql.py index 735f95c6..b33fc8e9 100644 --- a/uncoder-core/app/translator/platforms/base/sql/parsers/sql.py +++ b/uncoder-core/app/translator/platforms/base/sql/parsers/sql.py @@ -17,35 +17,37 @@ """ import re +from typing import Optional, Union +from app.translator.core.models.functions.base import ParsedFunctions from app.translator.core.models.query_container import RawQueryContainer, TokenizedQueryContainer from app.translator.core.parser import PlatformQueryParser -from app.translator.platforms.base.sql.tokenizer import SqlTokenizer +from app.translator.platforms.base.sql.tokenizer import SQLTokenizer -class SqlQueryParser(PlatformQueryParser): - tokenizer = SqlTokenizer() +class SQLQueryParser(PlatformQueryParser): + tokenizer = SQLTokenizer() query_delimiter_pattern = r"\sFROM\s\S*\sWHERE\s" table_pattern = r"\sFROM\s(?P[a-zA-Z\.\-\*]+)\sWHERE\s" wrapped_with_comment_pattern = r"^\s*--.*(?:\n|$)" - def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]]]: + def _parse_query(self, query: str) -> tuple[str, dict[str, Union[list[str], list[int]]], Optional[ParsedFunctions]]: log_source = {"table": []} if re.search(self.query_delimiter_pattern, query, flags=re.IGNORECASE): table_search = re.search(self.table_pattern, query) table = table_search.group("table") log_source["table"] = [table] - return re.split(self.query_delimiter_pattern, query, flags=re.IGNORECASE)[1], log_source + return re.split(self.query_delimiter_pattern, query, flags=re.IGNORECASE)[1], log_source, None - return query, log_source + return query, log_source, None def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer: - query, log_sources = self._parse_query(raw_query_container.query) + query, log_sources, _ = self._parse_query(raw_query_container.query) query_tokens = self.get_query_tokens(query) - field_tokens = self.get_field_tokens(query_tokens) - source_mappings = self.get_source_mappings(field_tokens, log_sources) + query_field_tokens, _, _ = self.get_field_tokens(query_tokens) + source_mappings = self.get_source_mappings(query_field_tokens, log_sources) meta_info = raw_query_container.meta_info - meta_info.query_fields = field_tokens + meta_info.query_fields = query_field_tokens meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings] return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info) diff --git a/uncoder-core/app/translator/platforms/base/sql/renders/sql.py b/uncoder-core/app/translator/platforms/base/sql/renders/sql.py index e7178922..8da8f780 100644 --- a/uncoder-core/app/translator/platforms/base/sql/renders/sql.py +++ b/uncoder-core/app/translator/platforms/base/sql/renders/sql.py @@ -25,11 +25,11 @@ from app.translator.platforms.base.sql.str_value_manager import sql_str_value_manager -class SqlFieldValueRender(BaseFieldValueRender): +class SQLFieldValueRender(BaseFieldValueRender): str_value_manager = sql_str_value_manager @staticmethod - def _wrap_str_value(value: str) -> str: + def _wrap_str_value(value: str, value_type: str = ValueType.value) -> str: # noqa: ARG004 return f"'{value}'" def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: @@ -82,7 +82,7 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: return f"regexp_like({field}, {regex_str})" -class SqlQueryRender(PlatformQueryRender): +class SQLQueryRender(PlatformQueryRender): or_token = "OR" and_token = "AND" not_token = "NOT" diff --git a/uncoder-core/app/translator/platforms/base/sql/str_value_manager.py b/uncoder-core/app/translator/platforms/base/sql/str_value_manager.py index 5f47b8be..ade205e1 100644 --- a/uncoder-core/app/translator/platforms/base/sql/str_value_manager.py +++ b/uncoder-core/app/translator/platforms/base/sql/str_value_manager.py @@ -22,12 +22,9 @@ from app.translator.core.str_value_manager import ( CONTAINER_SPEC_SYMBOLS_MAP, + RE_STR_ALPHA_NUM_SYMBOLS_MAP, RE_STR_SPEC_SYMBOLS_MAP, BaseSpecSymbol, - ReDigitalSymbol, - ReWhiteSpaceSymbol, - ReWordBoundarySymbol, - ReWordSymbol, SingleSymbolWildCard, StrValue, StrValueManager, @@ -43,12 +40,7 @@ class SQLStrValueManager(StrValueManager): escape_manager = sql_escape_manager container_spec_symbols_map: ClassVar[dict[type[BaseSpecSymbol], str]] = SQL_CONTAINER_SPEC_SYMBOLS_MAP - re_str_alpha_num_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = { - "b": ReWordBoundarySymbol, - "w": ReWordSymbol, - "d": ReDigitalSymbol, - "s": ReWhiteSpaceSymbol, - } + re_str_alpha_num_symbols_map = RE_STR_ALPHA_NUM_SYMBOLS_MAP re_str_spec_symbols_map = RE_STR_SPEC_SYMBOLS_MAP str_spec_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = { "_": SingleSymbolWildCard, @@ -85,7 +77,7 @@ def from_str_to_container( return StrValue(value, self._concat(split)) - def from_re_str_to_container(self, value: str) -> StrValue: + def from_re_str_to_container(self, value: str, value_type: str = SQLValueType.regex_value) -> StrValue: # noqa: ARG002 value = value.replace("''", "'") return super().from_re_str_to_container(value) diff --git a/uncoder-core/app/translator/platforms/base/sql/tokenizer.py b/uncoder-core/app/translator/platforms/base/sql/tokenizer.py index fe92c8f6..69456ea2 100644 --- a/uncoder-core/app/translator/platforms/base/sql/tokenizer.py +++ b/uncoder-core/app/translator/platforms/base/sql/tokenizer.py @@ -33,7 +33,7 @@ _ESCAPE_SYMBOL_GROUP_NAME = "escape_symbol" -class SqlTokenizer(QueryTokenizer): +class SQLTokenizer(QueryTokenizer): single_value_operators_map: ClassVar[dict[str, str]] = { "=": OperatorType.EQ, "<=": OperatorType.LTE, @@ -69,7 +69,8 @@ def get_operator_and_value( return mapped_operator, num_value if (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None: - return mapped_operator, bool_value + mapped_bool_value = bool_value == "true" + return mapped_operator, mapped_bool_value if (s_q_value := get_match_group(match, group_name=ValueType.single_quotes_value)) is not None: escape_symbol = get_match_group(match, group_name=_ESCAPE_SYMBOL_GROUP_NAME) diff --git a/uncoder-core/app/translator/platforms/carbonblack/renders/carbonblack.py b/uncoder-core/app/translator/platforms/carbonblack/renders/carbonblack.py new file mode 100644 index 00000000..2f52229d --- /dev/null +++ b/uncoder-core/app/translator/platforms/carbonblack/renders/carbonblack.py @@ -0,0 +1,103 @@ +from app.translator.const import DEFAULT_VALUE_TYPE +from app.translator.core.custom_types.values import ValueType +from app.translator.core.models.platform_details import PlatformDetails +from app.translator.core.render import BaseFieldValueRender, PlatformQueryRender +from app.translator.managers import render_manager +from app.translator.platforms.carbonblack.const import carbonblack_query_details +from app.translator.platforms.carbonblack.mapping import CarbonBlackMappings, carbonblack_query_mappings +from app.translator.platforms.carbonblack.str_value_manager import ( + CarbonBlackStrValueManager, + carbon_black_str_value_manager, +) + + +class CarbonBlackFieldValueRender(BaseFieldValueRender): + details: PlatformDetails = carbonblack_query_details + str_value_manager: CarbonBlackStrValueManager = carbon_black_str_value_manager + + @staticmethod + def _wrap_str_value(value: str, value_type: str = ValueType.value) -> str: # noqa: ARG004 + return f'"{value}"' + + @staticmethod + def _wrap_int_value(value: int) -> str: + return f'"{value}"' + + def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.equal_modifier(field=field, value=v) for v in value)})" + value = self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True, wrap_int=True) + return f"{field}:{value}" + + def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + values = [ + self._pre_process_value(field, val, value_type=ValueType.value, wrap_str=True, wrap_int=True) + for val in value + ] + return f"(NOT {field}:({self.or_token.join(values)})" + value = self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True, wrap_int=True) + return f"(NOT {field}:{self.apply_value(value)})" + + def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + values = self.or_token.join( + [f"*{self._pre_process_value(field, val, value_type=ValueType.value)}*" for val in value] + ) + return f"{field}:({values})" + value = self._pre_process_value(field, value, value_type=ValueType.value) + return f"{field}:*{value}*" + + def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + values = self.or_token.join( + [f"*{self._pre_process_value(field, val, value_type=ValueType.value)}" for val in value] + ) + return f"{field}:({values})" + value = self._pre_process_value(field, value, value_type=ValueType.value) + return f"{field}:*{value}" + + def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + values = self.or_token.join( + [f"{self._pre_process_value(field, val, value_type=ValueType.value)}*" for val in value] + ) + return f"{field}:({values}" + value = self._pre_process_value(field, value, value_type=ValueType.value) + return f"{field}:{value}*" + + def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.regex_modifier(field=field, value=v) for v in value)})" + value = self._pre_process_value(field, value, value_type=ValueType.regex_value) + return f"{field}:/{value}/" + + def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})" + value = self._pre_process_value(field, value, value_type=ValueType.value) + return f"(*{value}*)" + + def is_none(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.is_none(field=field, value=v) for v in value)})" + return f"NOT _exists_:{field}" + + def is_not_none(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.is_not_none(field=field, value=v) for v in value)})" + return f"_exists_:{field}" + + +@render_manager.register +class CarbonBlackQueryRender(PlatformQueryRender): + details: PlatformDetails = carbonblack_query_details + mappings: CarbonBlackMappings = carbonblack_query_mappings + + or_token = "OR" + and_token = "AND" + not_token = "NOT" + + comment_symbol = "//" + + field_value_render = CarbonBlackFieldValueRender(or_token=or_token) diff --git a/uncoder-core/app/translator/platforms/chronicle/tokenizer.py b/uncoder-core/app/translator/platforms/chronicle/tokenizer.py index a0943952..b1779909 100644 --- a/uncoder-core/app/translator/platforms/chronicle/tokenizer.py +++ b/uncoder-core/app/translator/platforms/chronicle/tokenizer.py @@ -57,7 +57,8 @@ def get_operator_and_value( return mapped_operator, num_value if (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None: - return mapped_operator, bool_value + mapped_bool_value = bool_value == "true" + return mapped_operator, mapped_bool_value if (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None: return mapped_operator, self.escape_manager.remove_escape(d_q_value) diff --git a/uncoder-core/app/translator/platforms/crowdstrike/parsers/crowdstrike.py b/uncoder-core/app/translator/platforms/crowdstrike/parsers/crowdstrike.py index 08ec0b7f..b48bc4b0 100644 --- a/uncoder-core/app/translator/platforms/crowdstrike/parsers/crowdstrike.py +++ b/uncoder-core/app/translator/platforms/crowdstrike/parsers/crowdstrike.py @@ -18,14 +18,14 @@ from app.translator.core.models.platform_details import PlatformDetails from app.translator.managers import parser_manager -from app.translator.platforms.base.spl.parsers.spl import SplQueryParser +from app.translator.platforms.base.spl.parsers.spl import SPLQueryParser from app.translator.platforms.crowdstrike.const import crowdstrike_query_details from app.translator.platforms.crowdstrike.functions import CrowdStrikeFunctions, crowd_strike_functions from app.translator.platforms.crowdstrike.mapping import CrowdstrikeMappings, crowdstrike_query_mappings @parser_manager.register_supported_by_roota -class CrowdStrikeQueryParser(SplQueryParser): +class CrowdStrikeQueryParser(SPLQueryParser): details: PlatformDetails = crowdstrike_query_details log_source_pattern = r"___source_type___\s*=\s*(?:\"(?P[%a-zA-Z_*:0-9\-/]+)\"|(?P[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?" # noqa: E501 diff --git a/uncoder-core/app/translator/platforms/crowdstrike/renders/crowdstrike.py b/uncoder-core/app/translator/platforms/crowdstrike/renders/crowdstrike.py index 40911708..3c77bedb 100644 --- a/uncoder-core/app/translator/platforms/crowdstrike/renders/crowdstrike.py +++ b/uncoder-core/app/translator/platforms/crowdstrike/renders/crowdstrike.py @@ -19,18 +19,18 @@ from app.translator.core.models.platform_details import PlatformDetails from app.translator.managers import render_manager -from app.translator.platforms.base.spl.renders.spl import SplFieldValueRender, SplQueryRender +from app.translator.platforms.base.spl.renders.spl import SPLFieldValueRender, SPLQueryRender from app.translator.platforms.crowdstrike.const import crowdstrike_query_details from app.translator.platforms.crowdstrike.functions import CrowdStrikeFunctions, crowd_strike_functions from app.translator.platforms.crowdstrike.mapping import CrowdstrikeMappings, crowdstrike_query_mappings -class CrowdStrikeFieldValueRender(SplFieldValueRender): +class CrowdStrikeFieldValueRender(SPLFieldValueRender): details = crowdstrike_query_details @render_manager.register -class CrowdStrikeQueryRender(SplQueryRender): +class CrowdStrikeQueryRender(SPLQueryRender): details: PlatformDetails = crowdstrike_query_details mappings: CrowdstrikeMappings = crowdstrike_query_mappings platform_functions: CrowdStrikeFunctions = None diff --git a/uncoder-core/app/translator/platforms/elasticsearch/renders/elasticsearch_eql.py b/uncoder-core/app/translator/platforms/elasticsearch/renders/elasticsearch_eql.py new file mode 100644 index 00000000..0580a4fe --- /dev/null +++ b/uncoder-core/app/translator/platforms/elasticsearch/renders/elasticsearch_eql.py @@ -0,0 +1,181 @@ +from typing import Optional, Union + +from app.translator.const import DEFAULT_VALUE_TYPE +from app.translator.core.const import QUERY_TOKEN_TYPE +from app.translator.core.custom_types.tokens import GroupType, LogicalOperatorType, OperatorType +from app.translator.core.custom_types.values import ValueType +from app.translator.core.mapping import LogSourceSignature, SourceMapping +from app.translator.core.models.platform_details import PlatformDetails +from app.translator.core.models.query_container import TokenizedQueryContainer +from app.translator.core.models.query_tokens.field_value import FieldValue +from app.translator.core.models.query_tokens.identifier import Identifier +from app.translator.core.render import BaseFieldValueRender, PlatformQueryRender +from app.translator.core.str_value_manager import StrValueManager +from app.translator.managers import render_manager +from app.translator.platforms.base.lucene.mapping import LuceneMappings +from app.translator.platforms.elasticsearch.const import elastic_eql_query_details +from app.translator.platforms.elasticsearch.mapping import elastic_eql_query_mappings +from app.translator.platforms.elasticsearch.str_value_manager import eql_str_value_manager + + +class ElasticSearchEQLFieldValue(BaseFieldValueRender): + details: PlatformDetails = elastic_eql_query_details + str_value_manager: StrValueManager = eql_str_value_manager + list_token = ", " + + @staticmethod + def _wrap_str_value(value: str, value_type: str = ValueType.value) -> str: # noqa: ARG004 + return f'"{value}"' + + @staticmethod + def _wrap_int_value(value: int) -> str: + return f'"{value}"' + + @staticmethod + def apply_field(field: str) -> str: + if field.count("-") > 0 or field.count(" ") > 0 or field[0].isdigit(): + return f"`{field}`" + if field.endswith(".text"): + return field[:-5] + return field + + def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + values = self.list_token.join( + self._pre_process_value(field, v, value_type=ValueType.value, wrap_str=True, wrap_int=True) + for v in value + ) + return f"{self.apply_field(field)} : ({values})" + value = self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True, wrap_int=True) + return f"{self.apply_field(field)} : {value}" + + def less_modifier(self, field: str, value: Union[int, str]) -> str: + value = self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True, wrap_int=True) + return f"{self.apply_field(field)} < {value}" + + def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: + value = self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True, wrap_int=True) + return f"{self.apply_field(field)} <= {value}" + + def greater_modifier(self, field: str, value: Union[int, str]) -> str: + value = self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True, wrap_int=True) + return f"{self.apply_field(field)} > {value}" + + def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: + value = self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True, wrap_int=True) + return f"{self.apply_field(field)} >= {value}" + + def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + values = self.list_token.join( + self._pre_process_value(field, v, value_type=ValueType.value, wrap_str=True, wrap_int=True) + for v in value + ) + return f"{self.apply_field(field)} != ({values})" + value = self._pre_process_value(field, value, value_type=ValueType.value, wrap_str=True, wrap_int=True) + return f"{self.apply_field(field)} != {value}" + + def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + values = self.list_token.join( + f'"*{self._pre_process_value(field, v, value_type=ValueType.value)}*"' for v in value + ) + return f"{self.apply_field(field)} : ({values})" + value = self._pre_process_value(field, value, value_type=ValueType.value) + return f'{self.apply_field(field)} : "*{value}*"' + + def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + values = self.list_token.join( + f'"*{self._pre_process_value(field, v, value_type=ValueType.value)}"' for v in value + ) + return f"{self.apply_field(field)} : ({values})" + value = self._pre_process_value(field, value, value_type=ValueType.value) + return f'{self.apply_field(field)} : "*{value}"' + + def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + values = self.list_token.join( + f'"{self._pre_process_value(field, v, value_type=ValueType.value)}*"' for v in value + ) + return f"{self.apply_field(field)} : ({values})" + value = self._pre_process_value(field, value, value_type=ValueType.value) + return f'{self.apply_field(field)} : "{value}*"' + + def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.regex_modifier(field=field, value=v) for v in value)})" + value = self._pre_process_value(field, value, value_type=ValueType.regex_value, wrap_int=True) + return f'{self.apply_field(field)} regex~ "{value}.?"' + + def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})" + return self._pre_process_value(field, value, wrap_str=True) + + def is_none(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.is_none(field=field, value=v) for v in value)})" + + return f"{self.apply_field(field)} == null" + + def is_not_none(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.is_not_none(field=field, value=v) for v in value)})" + + return f"{self.apply_field(field)} != null" + + +@render_manager.register +class ElasticSearchEQLQueryRender(PlatformQueryRender): + details: PlatformDetails = elastic_eql_query_details + mappings: LuceneMappings = elastic_eql_query_mappings + or_token = "or" + and_token = "and" + not_token = "not" + comment_symbol = "//" + field_value_render = ElasticSearchEQLFieldValue(or_token=or_token) + + def generate_prefix(self, log_source_signature: Optional[LogSourceSignature], functions_prefix: str = "") -> str: # noqa: ARG002 + return "any where " + + def in_brackets(self, raw_list: list[QUERY_TOKEN_TYPE]) -> list[QUERY_TOKEN_TYPE]: + return [Identifier(token_type=GroupType.L_PAREN), *raw_list, Identifier(token_type=GroupType.R_PAREN)] + + def _generate_from_tokenized_query_container_by_source_mapping( + self, query_container: TokenizedQueryContainer, source_mapping: SourceMapping + ) -> str: + unmapped_fields = self.mappings.check_fields_mapping_existence( + query_container.meta_info.query_fields, + query_container.meta_info.function_fields_map, + self.platform_functions.manager.supported_render_names, + source_mapping, + ) + rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping) + prefix = self.generate_prefix(source_mapping.log_source_signature, rendered_functions.rendered_prefix) + + if source_mapping.raw_log_fields: + defined_raw_log_fields = self.generate_raw_log_fields( + fields=query_container.meta_info.query_fields + query_container.meta_info.function_fields, + source_mapping=source_mapping, + ) + prefix += f"\n{defined_raw_log_fields}" + if source_mapping.conditions: + for field, value in source_mapping.conditions.items(): + tokens = self.in_brackets(query_container.tokens) + extra_tokens = [ + FieldValue(source_name=field, operator=Identifier(token_type=OperatorType.EQ), value=value), + Identifier(token_type=LogicalOperatorType.AND), + ] + query_container.tokens = self.in_brackets([*extra_tokens, *tokens]) + query = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping) + not_supported_functions = query_container.functions.not_supported + rendered_functions.not_supported + return self.finalize_query( + prefix=prefix, + query=query, + functions=rendered_functions.rendered, + not_supported_functions=not_supported_functions, + unmapped_fields=unmapped_fields, + meta_info=query_container.meta_info, + source_mapping=source_mapping, + ) diff --git a/uncoder-core/app/translator/platforms/elasticsearch/renders/esql.py b/uncoder-core/app/translator/platforms/elasticsearch/renders/esql.py index ebeba491..7ca84cba 100644 --- a/uncoder-core/app/translator/platforms/elasticsearch/renders/esql.py +++ b/uncoder-core/app/translator/platforms/elasticsearch/renders/esql.py @@ -27,10 +27,7 @@ from app.translator.managers import render_manager from app.translator.platforms.elasticsearch.const import elasticsearch_esql_query_details from app.translator.platforms.elasticsearch.mapping import ElasticESQLMappings, esql_query_mappings -from app.translator.platforms.elasticsearch.str_value_manager import ( - ESQLStrValueManager, - esql_str_value_manager -) +from app.translator.platforms.elasticsearch.str_value_manager import ESQLStrValueManager, esql_str_value_manager class ESQLFieldValueRender(BaseFieldValueRender): @@ -48,7 +45,7 @@ def _make_case_insensitive(value: str) -> str: return "".join(container) @staticmethod - def _wrap_str_value(value: str) -> str: + def _wrap_str_value(value: str, value_type: str = ValueType.value) -> str: # noqa: ARG004 return f'"{value}"' @staticmethod diff --git a/uncoder-core/app/translator/platforms/hunters/renders/hunters.py b/uncoder-core/app/translator/platforms/hunters/renders/hunters.py index 4e977a16..b7599b83 100644 --- a/uncoder-core/app/translator/platforms/hunters/renders/hunters.py +++ b/uncoder-core/app/translator/platforms/hunters/renders/hunters.py @@ -19,17 +19,17 @@ from app.translator.core.models.platform_details import PlatformDetails from app.translator.managers import render_manager -from app.translator.platforms.base.sql.renders.sql import SqlFieldValueRender, SqlQueryRender +from app.translator.platforms.base.sql.renders.sql import SQLFieldValueRender, SQLQueryRender from app.translator.platforms.hunters.const import hunters_query_details from app.translator.platforms.hunters.mapping import HuntersMappings, hunters_query_mappings -class HuntersFieldValueRender(SqlFieldValueRender): +class HuntersFieldValueRender(SQLFieldValueRender): details: PlatformDetails = hunters_query_details @render_manager.register -class HuntersQueryRender(SqlQueryRender): +class HuntersQueryRender(SQLQueryRender): details: PlatformDetails = hunters_query_details mappings: HuntersMappings = hunters_query_mappings diff --git a/uncoder-core/app/translator/platforms/microsoft/custom_types/values.py b/uncoder-core/app/translator/platforms/microsoft/custom_types/values.py index c51e6a63..2d46ebac 100644 --- a/uncoder-core/app/translator/platforms/microsoft/custom_types/values.py +++ b/uncoder-core/app/translator/platforms/microsoft/custom_types/values.py @@ -1,6 +1,11 @@ from app.translator.core.custom_types.values import ValueType -class MicrosoftValueType(ValueType): +class KQLValueType(ValueType): verbatim_double_quotes_value = "v_d_q_value" verbatim_single_quotes_value = "v_s_q_value" + + double_quotes_regex_value = "double_quotes_re_value" + single_quotes_regex_value = "single_quotes_re_value" + verbatim_double_quotes_regex_value = "verbatim_double_quotes_re_value" + verbatim_single_quotes_regex_value = "verbatim_single_quotes_re_value" diff --git a/uncoder-core/app/translator/platforms/microsoft/escape_manager.py b/uncoder-core/app/translator/platforms/microsoft/escape_manager.py index cd562cb9..ad4be071 100644 --- a/uncoder-core/app/translator/platforms/microsoft/escape_manager.py +++ b/uncoder-core/app/translator/platforms/microsoft/escape_manager.py @@ -1,12 +1,25 @@ from typing import ClassVar -from app.translator.core.custom_types.values import ValueType from app.translator.core.escape_manager import EscapeManager from app.translator.core.models.escape_details import EscapeDetails +from app.translator.platforms.microsoft.custom_types.values import KQLValueType -class MicrosoftEscapeManager(EscapeManager): - escape_map: ClassVar[dict[str, list[EscapeDetails]]] = {ValueType.value: [EscapeDetails(pattern='(?:\\\\)?(")')]} +class MicrosoftKQLEscapeManager(EscapeManager): + escape_map: ClassVar[dict[str, list[EscapeDetails]]] = { + KQLValueType.verbatim_single_quotes_value: [EscapeDetails(pattern=r"(')", escape_symbols=r"'\1")], + KQLValueType.verbatim_double_quotes_regex_value: [ + EscapeDetails(pattern=r"([$^*+()\[\]{}|.?\-\\])", escape_symbols=r"\\\1") + ], + KQLValueType.verbatim_single_quotes_regex_value: [ + EscapeDetails(pattern=r"([$^*+()\[\]{}|.?\-\\])", escape_symbols=r"\\\1") + ], + KQLValueType.single_quotes_regex_value: [ + EscapeDetails(pattern=r"([$^*+()\[\]{}|.?\-])", escape_symbols=r"\\\\\1"), + EscapeDetails(pattern=r"(\\(?![$^*+()\[\]{}|.?\-\\]))", escape_symbols=r"[\\\\\\\1]"), + EscapeDetails(pattern=r"(')", escape_symbols=r"[\\\1]"), + ], + } -microsoft_escape_manager = MicrosoftEscapeManager() +microsoft_kql_escape_manager = MicrosoftKQLEscapeManager() diff --git a/uncoder-core/app/translator/platforms/microsoft/renders/microsoft_sentinel.py b/uncoder-core/app/translator/platforms/microsoft/renders/microsoft_sentinel.py index 961fe98a..9f104942 100644 --- a/uncoder-core/app/translator/platforms/microsoft/renders/microsoft_sentinel.py +++ b/uncoder-core/app/translator/platforms/microsoft/renders/microsoft_sentinel.py @@ -25,77 +25,107 @@ from app.translator.core.render import BaseFieldValueRender, PlatformQueryRender from app.translator.managers import render_manager from app.translator.platforms.microsoft.const import microsoft_sentinel_query_details -from app.translator.platforms.microsoft.escape_manager import microsoft_escape_manager +from app.translator.platforms.microsoft.custom_types.values import KQLValueType from app.translator.platforms.microsoft.functions import MicrosoftFunctions, microsoft_sentinel_functions from app.translator.platforms.microsoft.mapping import MicrosoftSentinelMappings, microsoft_sentinel_query_mappings +from app.translator.platforms.microsoft.str_value_manager import microsoft_kql_str_value_manager class MicrosoftSentinelFieldValueRender(BaseFieldValueRender): details: PlatformDetails = microsoft_sentinel_query_details - escape_manager = microsoft_escape_manager + str_value_manager = microsoft_kql_str_value_manager @staticmethod - def __escape_value(value: Union[int, str]) -> Union[int, str]: - return value.replace("'", "''") if isinstance(value, str) else value + def _wrap_str_value(value: str, value_type: str = KQLValueType.value) -> str: + if value_type == KQLValueType.verbatim_single_quotes_regex_value: + return f"@'(i?){value}'" + + if value_type == KQLValueType.verbatim_double_quotes_regex_value: + return f'@"(i?){value}"' + + if value_type == KQLValueType.single_quotes_regex_value: + return f"'{value}'" + + return f"@'{value}'" def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: - if isinstance(value, str): - return f"{field} =~ @'{self.__escape_value(value)}'" if isinstance(value, list): - prepared_values = ", ".join(f"@'{self.__escape_value(v)}'" for v in value) operator = "in~" if all(isinstance(v, str) for v in value) else "in" - return f"{field} {operator} ({prepared_values})" - return f"{field} == {self.apply_value(value)}" + values = ", ".join( + self._pre_process_value(field, v, KQLValueType.verbatim_single_quotes_value, wrap_str=True) + for v in value + ) + return f"{field} {operator} ({values})" + + operator = "=~" if isinstance(value, str) else "==" + value = self._pre_process_value(field, value, KQLValueType.verbatim_single_quotes_value, wrap_str=True) + return f"{field} {operator} {value}" def less_modifier(self, field: str, value: Union[int, str]) -> str: - if isinstance(value, int): - return f"{field} < {value}" - return f"{field} < '{self.apply_value(value)}'" + value = self._pre_process_value(field, value, KQLValueType.verbatim_single_quotes_value, wrap_str=True) + return f"{field} < {value}" def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: - if isinstance(value, int): - return f"{field} <= {value}" - return f"{field} <= '{self.apply_value(value)}'" + value = self._pre_process_value(field, value, KQLValueType.verbatim_single_quotes_value, wrap_str=True) + return f"{field} <= {value}" def greater_modifier(self, field: str, value: Union[int, str]) -> str: - if isinstance(value, int): - return f"{field} > {value}" - return f"{field} > '{self.apply_value(value)}'" + value = self._pre_process_value(field, value, KQLValueType.verbatim_single_quotes_value, wrap_str=True) + return f"{field} > {value}" def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str: - if isinstance(value, int): - return f"{field} >= {value}" - return f"{field} >= '{self.apply_value(value)}'" + value = self._pre_process_value(field, value, KQLValueType.verbatim_single_quotes_value, wrap_str=True) + return f"{field} >= {value}" def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join([self.not_equal_modifier(field=field, value=v) for v in value])})" - if isinstance(value, int): - return f"{field} !~ {value}" - return f"{field} !~ '{self.apply_value(value)}'" + + operator = "!~" if isinstance(value, str) else "!=" + value = self._pre_process_value(field, value, KQLValueType.verbatim_single_quotes_value, wrap_str=True) + return f"{field} {operator} {value}" def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.contains_modifier(field=field, value=v) for v in value)})" - return f"{field} contains @'{self.__escape_value(value)}'" + + value = self._pre_process_value(field, value, KQLValueType.verbatim_single_quotes_value, wrap_str=True) + return f"{field} contains {value}" def not_contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.not_contains_modifier(field=field, value=v) for v in value)})" - return f"{field} !contains @'{self.__escape_value(value)}'" + + value = self._pre_process_value(field, value, KQLValueType.verbatim_single_quotes_value, wrap_str=True) + return f"{field} !contains {value}" def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.endswith_modifier(field=field, value=v) for v in value)})" - return f"{field} endswith @'{self.__escape_value(value)}'" + + value = self._pre_process_value(field, value, KQLValueType.verbatim_single_quotes_value, wrap_str=True) + return f"{field} endswith {value}" def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.startswith_modifier(field=field, value=v) for v in value)})" - return f"{field} startswith @'{self.__escape_value(value)}'" + + value = self._pre_process_value(field, value, KQLValueType.verbatim_single_quotes_value, wrap_str=True) + return f"{field} startswith {value}" + + @staticmethod + def __get_regex_value_type(value: DEFAULT_VALUE_TYPE) -> str: + has_single_quote = "'" in value + has_double_quote = '"' in value + if has_single_quote: + if has_double_quote: + return KQLValueType.single_quotes_regex_value + return KQLValueType.verbatim_double_quotes_regex_value + return KQLValueType.verbatim_single_quotes_regex_value def __regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: - return f"{field} matches regex @'(?i){self.__escape_value(value)}'" + value_type = self.__get_regex_value_type(value) + return f"{field} matches regex {self._pre_process_value(field, value, value_type, wrap_str=True)}" def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): @@ -110,13 +140,21 @@ def not_regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: if isinstance(value, list): return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})" - return f"* contains @'{self.__escape_value(value)}'" - def is_none(self, field: str, value: Union[str, int]) -> str: # noqa: ARG002 - return f"isempty({self.apply_value(value)})" + value = self._pre_process_value(field, value, KQLValueType.verbatim_single_quotes_value, wrap_str=True) + return f"* contains {value}" + + def is_none(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.is_none(field=field, value=v) for v in value)})" + + return f"isempty({field})" + + def is_not_none(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: + if isinstance(value, list): + return f"({self.or_token.join(self.is_not_none(field=field, value=v) for v in value)})" - def is_not_none(self, field: str, value: Union[str, int]) -> str: # noqa: ARG002 - return f"isnotempty({self.apply_value(value)})" + return f"isnotempty({field})" @render_manager.register diff --git a/uncoder-core/app/translator/platforms/microsoft/str_value_manager.py b/uncoder-core/app/translator/platforms/microsoft/str_value_manager.py new file mode 100644 index 00000000..55dfa9db --- /dev/null +++ b/uncoder-core/app/translator/platforms/microsoft/str_value_manager.py @@ -0,0 +1,118 @@ +""" +Uncoder IO Community Edition License +----------------------------------------------------------------- +Copyright (c) 2024 SOC Prime, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +----------------------------------------------------------------- +""" +import copy +import re +from typing import Optional + +from app.translator.core.str_value_manager import ( + CONTAINER_SPEC_SYMBOLS_MAP, + RE_STR_ALPHA_NUM_SYMBOLS_MAP, + RE_STR_SPEC_SYMBOLS_MAP, + BaseSpecSymbol, + SingleSymbolWildCard, + StrValue, + StrValueManager, + UnboundLenWildCard, +) +from app.translator.platforms.microsoft.custom_types.values import KQLValueType +from app.translator.platforms.microsoft.escape_manager import microsoft_kql_escape_manager + +KQL_CONTAINER_SPEC_SYMBOLS_MAP = copy.copy(CONTAINER_SPEC_SYMBOLS_MAP) +KQL_CONTAINER_SPEC_SYMBOLS_MAP.update({SingleSymbolWildCard: ".?", UnboundLenWildCard: ".*"}) + + +class MicrosoftKQLStrValueManager(StrValueManager): + escape_manager = microsoft_kql_escape_manager + container_spec_symbols_map = KQL_CONTAINER_SPEC_SYMBOLS_MAP + re_str_alpha_num_symbols_map = RE_STR_ALPHA_NUM_SYMBOLS_MAP + re_str_spec_symbols_map = RE_STR_SPEC_SYMBOLS_MAP + + def from_str_to_container( + self, + value: str, + value_type: str = KQLValueType.value, + escape_symbol: Optional[str] = None, # noqa: ARG002 + ) -> StrValue: + if value_type == KQLValueType.verbatim_single_quotes_value: + return self.__from_verbatim_str_to_container(value, quote_char="'") + + if value_type == KQLValueType.verbatim_double_quotes_value: + return self.__from_verbatim_str_to_container(value, quote_char='"') + + if value_type == KQLValueType.single_quotes_value: + return self.__from_str_to_container(value, quote_char="'") + + return self.__from_str_to_container(value, quote_char='"') + + def __from_str_to_container(self, value: str, quote_char: str) -> StrValue: + split = [] + prev_char = None + + for char in value: + if char in ("\\", quote_char): + if prev_char == "\\": + split.append(char) + prev_char = None + continue + else: + split.append(char) + + prev_char = char + + return StrValue(value, self._concat(split)) + + def __from_verbatim_str_to_container(self, value: str, quote_char: str) -> StrValue: + split = [] + prev_char = None + + for char in value: + if char != quote_char: + split.append(char) + elif char == prev_char: + split.append(char) + prev_char = None + continue + + prev_char = char + + return StrValue(value, self._concat(split)) + + def from_re_str_to_container(self, value: str, value_type: str = KQLValueType.regex_value) -> StrValue: + if value_type in (KQLValueType.single_quotes_regex_value, KQLValueType.double_quotes_regex_value): + value = re.sub(r"\[\\\"]", r'"', value) + value = re.sub(r"\[\\\']", r"'", value) + value = re.sub(r"\\\\", r"\\", value) + value = re.sub(r"\[\\\\]", r"\\\\", value) + + return super().from_re_str_to_container(value, value_type) + + def from_container_to_str(self, container: StrValue, value_type: str = KQLValueType.value) -> str: + result = "" + for el in container.split_value: + if isinstance(el, str): + result += self.escape_manager.escape(el, value_type) + elif isinstance(el, BaseSpecSymbol) and (pattern := self.container_spec_symbols_map.get(type(el))): + if value_type == KQLValueType.single_quotes_regex_value and "\\" in pattern: + pattern = rf"\{pattern}" + result += pattern + + return result + + +microsoft_kql_str_value_manager = MicrosoftKQLStrValueManager() diff --git a/uncoder-core/app/translator/platforms/microsoft/tokenizer.py b/uncoder-core/app/translator/platforms/microsoft/tokenizer.py index 85cf3316..e9ae5932 100644 --- a/uncoder-core/app/translator/platforms/microsoft/tokenizer.py +++ b/uncoder-core/app/translator/platforms/microsoft/tokenizer.py @@ -21,9 +21,10 @@ from app.translator.core.custom_types.tokens import OperatorType from app.translator.core.mixins.operator import OperatorBasedMixin +from app.translator.core.str_value_manager import StrValue from app.translator.core.tokenizer import QueryTokenizer -from app.translator.platforms.microsoft.custom_types.values import MicrosoftValueType -from app.translator.platforms.microsoft.escape_manager import microsoft_escape_manager +from app.translator.platforms.microsoft.custom_types.values import KQLValueType +from app.translator.platforms.microsoft.str_value_manager import microsoft_kql_str_value_manager from app.translator.tools.utils import get_match_group @@ -40,50 +41,61 @@ class MicrosoftSentinelTokenizer(QueryTokenizer, OperatorBasedMixin): "contains": OperatorType.CONTAINS, "startswith": OperatorType.STARTSWITH, "endswith": OperatorType.ENDSWITH, + "matches regex": OperatorType.REGEX, } multi_value_operators_map: ClassVar[dict[str, str]] = {"in~": OperatorType.EQ, "in": OperatorType.EQ} field_pattern = r"(?P[a-zA-Z\.\-_]+)" - bool_value_pattern = rf"(?P<{MicrosoftValueType.bool_value}>true|false)\s*" - num_value_pattern = rf"(?P<{MicrosoftValueType.number_value}>\d+(?:\.\d+)*)\s*" - double_quotes_value_pattern = rf'"(?P<{MicrosoftValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\[\];<>?`~\s]|\\\"|\\\\)*)"\s*' # noqa: E501 - single_quotes_value_pattern = rf"'(?P<{MicrosoftValueType.single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{{\}}\[\];<>?`~\s]|\\\'|\\\\)*)'\s*" # noqa: E501 - verbatim_double_quotes_value_pattern = rf'@"(?P<{MicrosoftValueType.verbatim_double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\[\];<>?`~\s\\]|"")*)"\s*' # noqa: E501 - verbatim_single_quotes_value_pattern = rf"@'(?P<{MicrosoftValueType.verbatim_single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{{\}}\[\];<>?`~\s\\]|'')*)'\s*" # noqa: E501 + bool_value_pattern = rf"(?P<{KQLValueType.bool_value}>true|false)\s*" + num_value_pattern = rf"(?P<{KQLValueType.number_value}>\d+(?:\.\d+)*)\s*" + double_quotes_value_pattern = rf'"(?P<{KQLValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\[\];<>?`~\s]|\\\"|\\\\)*)"\s*' # noqa: E501 + single_quotes_value_pattern = rf"'(?P<{KQLValueType.single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{{\}}\[\];<>?`~\s]|\\\'|\\\\)*)'\s*" # noqa: E501 + verbatim_double_quotes_value_pattern = rf'@"(?:\(i\?\))?(?P<{KQLValueType.verbatim_double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\[\];<>?`~\s\\]|"")*)"\s*' # noqa: E501 + verbatim_single_quotes_value_pattern = rf"@'(?:\(i\?\))?(?P<{KQLValueType.verbatim_single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{{\}}\[\];<>?`~\s\\]|'')*)'\s*" # noqa: E501 str_value_pattern = rf"""{double_quotes_value_pattern}|{single_quotes_value_pattern}|{verbatim_double_quotes_value_pattern}|{verbatim_single_quotes_value_pattern}""" # noqa: E501 _value_pattern = rf"""{bool_value_pattern}|{num_value_pattern}|{str_value_pattern}""" - multi_value_pattern = ( - rf"""\((?P<{MicrosoftValueType.multi_value}>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\];<>?`~\s]+)\)""" - ) + multi_value_pattern = rf"""\((?P<{KQLValueType.multi_value}>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\];<>?`~\s]+)\)""" keyword_pattern = rf"\*\s+contains\s+(?:{str_value_pattern})" - escape_manager = microsoft_escape_manager + str_value_manager = microsoft_kql_str_value_manager def get_operator_and_value( # noqa: PLR0911 self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None ) -> tuple[str, Any]: - if (num_value := get_match_group(match, group_name=MicrosoftValueType.number_value)) is not None: + if (num_value := get_match_group(match, group_name=KQLValueType.number_value)) is not None: return mapped_operator, num_value - if (bool_value := get_match_group(match, group_name=MicrosoftValueType.bool_value)) is not None: - return mapped_operator, bool_value + if (bool_value := get_match_group(match, group_name=KQLValueType.bool_value)) is not None: + mapped_bool_value = bool_value == "true" + return mapped_operator, mapped_bool_value - if (d_q_value := get_match_group(match, group_name=MicrosoftValueType.double_quotes_value)) is not None: - return mapped_operator, self.escape_manager.remove_escape(d_q_value) + if (d_q_value := get_match_group(match, group_name=KQLValueType.double_quotes_value)) is not None: + if mapped_operator == OperatorType.REGEX: + value_type = KQLValueType.double_quotes_regex_value + return mapped_operator, self.str_value_manager.from_re_str_to_container(d_q_value, value_type) + return mapped_operator, self._str_to_container(d_q_value, KQLValueType.double_quotes_value) - if (s_q_value := get_match_group(match, group_name=MicrosoftValueType.single_quotes_value)) is not None: - return mapped_operator, self.escape_manager.remove_escape(s_q_value) + if (s_q_value := get_match_group(match, group_name=KQLValueType.single_quotes_value)) is not None: + if mapped_operator == OperatorType.REGEX: + value_type = KQLValueType.single_quotes_regex_value + return mapped_operator, self.str_value_manager.from_re_str_to_container(s_q_value, value_type) + return mapped_operator, self._str_to_container(s_q_value, KQLValueType.single_quotes_value) - group_name = MicrosoftValueType.verbatim_double_quotes_value - if (v_d_q_value := get_match_group(match, group_name=group_name)) is not None: - return mapped_operator, v_d_q_value + if (v_d_q_value := get_match_group(match, group_name=KQLValueType.verbatim_double_quotes_value)) is not None: + if mapped_operator == OperatorType.REGEX: + return mapped_operator, self.str_value_manager.from_re_str_to_container(v_d_q_value) + return mapped_operator, self._str_to_container(v_d_q_value, KQLValueType.verbatim_double_quotes_value) - group_name = MicrosoftValueType.verbatim_single_quotes_value - if (v_s_q_value := get_match_group(match, group_name=group_name)) is not None: - return mapped_operator, v_s_q_value + if (v_s_q_value := get_match_group(match, group_name=KQLValueType.verbatim_single_quotes_value)) is not None: + if mapped_operator == OperatorType.REGEX: + return mapped_operator, self.str_value_manager.from_re_str_to_container(v_s_q_value) + return mapped_operator, self._str_to_container(v_s_q_value, KQLValueType.verbatim_single_quotes_value) return super().get_operator_and_value(match, mapped_operator, operator) + def _str_to_container(self, value: str, value_type: str) -> StrValue: + return self.str_value_manager.from_str_to_container(value, value_type) + def clean_multi_value(self, value: str) -> str: value = value.strip(" ") value = value.lstrip("@") diff --git a/uncoder-core/app/translator/platforms/palo_alto/renders/base.py b/uncoder-core/app/translator/platforms/palo_alto/renders/base.py index 6983d0f3..8d411f37 100644 --- a/uncoder-core/app/translator/platforms/palo_alto/renders/base.py +++ b/uncoder-core/app/translator/platforms/palo_alto/renders/base.py @@ -57,7 +57,7 @@ def _get_value_type(field_name: str, value: Union[int, str, StrValue], value_typ return ValueType.value @staticmethod - def _wrap_str_value(value: str) -> str: + def _wrap_str_value(value: str, value_type: str = ValueType.value) -> str: # noqa: ARG004 return f'"{value}"' def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str: diff --git a/uncoder-core/app/translator/platforms/sigma/str_value_manager.py b/uncoder-core/app/translator/platforms/sigma/str_value_manager.py index 6d3abe56..10f35d29 100644 --- a/uncoder-core/app/translator/platforms/sigma/str_value_manager.py +++ b/uncoder-core/app/translator/platforms/sigma/str_value_manager.py @@ -20,15 +20,12 @@ from app.translator.core.custom_types.values import ValueType from app.translator.core.str_value_manager import ( + RE_STR_ALPHA_NUM_SYMBOLS_MAP, RE_STR_SPEC_SYMBOLS_MAP, - ReDigitalSymbol, - ReWhiteSpaceSymbol, - ReWordBoundarySymbol, - ReWordSymbol, SingleSymbolWildCard, StrValue, StrValueManager, - UnboundLenWildCard + UnboundLenWildCard, ) from app.translator.platforms.sigma.escape_manager import sigma_escape_manager @@ -36,12 +33,7 @@ class SigmaStrValueManager(StrValueManager): escape_manager = sigma_escape_manager str_spec_symbols_map = {"?": SingleSymbolWildCard, "*": UnboundLenWildCard} - re_str_alpha_num_symbols_map = { - "b": ReWordBoundarySymbol, - "w": ReWordSymbol, - "d": ReDigitalSymbol, - "s": ReWhiteSpaceSymbol - } + re_str_alpha_num_symbols_map = RE_STR_ALPHA_NUM_SYMBOLS_MAP re_str_spec_symbols_map = RE_STR_SPEC_SYMBOLS_MAP def from_str_to_container( diff --git a/uncoder-core/app/translator/platforms/splunk/parsers/splunk.py b/uncoder-core/app/translator/platforms/splunk/parsers/splunk.py index 2370717a..31885596 100644 --- a/uncoder-core/app/translator/platforms/splunk/parsers/splunk.py +++ b/uncoder-core/app/translator/platforms/splunk/parsers/splunk.py @@ -18,14 +18,14 @@ from app.translator.core.models.platform_details import PlatformDetails from app.translator.managers import parser_manager -from app.translator.platforms.base.spl.parsers.spl import SplQueryParser +from app.translator.platforms.base.spl.parsers.spl import SPLQueryParser from app.translator.platforms.splunk.const import splunk_query_details from app.translator.platforms.splunk.functions import SplunkFunctions, splunk_functions from app.translator.platforms.splunk.mapping import SplunkMappings, splunk_query_mappings @parser_manager.register_supported_by_roota -class SplunkQueryParser(SplQueryParser): +class SplunkQueryParser(SPLQueryParser): details: PlatformDetails = splunk_query_details mappings: SplunkMappings = splunk_query_mappings platform_functions: SplunkFunctions = splunk_functions diff --git a/uncoder-core/app/translator/platforms/splunk/renders/splunk.py b/uncoder-core/app/translator/platforms/splunk/renders/splunk.py index 7a50d3d1..4cb8cfbb 100644 --- a/uncoder-core/app/translator/platforms/splunk/renders/splunk.py +++ b/uncoder-core/app/translator/platforms/splunk/renders/splunk.py @@ -19,18 +19,18 @@ from app.translator.core.models.platform_details import PlatformDetails from app.translator.managers import render_manager -from app.translator.platforms.base.spl.renders.spl import SplFieldValueRender, SplQueryRender +from app.translator.platforms.base.spl.renders.spl import SPLFieldValueRender, SPLQueryRender from app.translator.platforms.splunk.const import splunk_query_details from app.translator.platforms.splunk.functions import SplunkFunctions, splunk_functions from app.translator.platforms.splunk.mapping import SplunkMappings, splunk_query_mappings -class SplunkFieldValueRender(SplFieldValueRender): +class SplunkFieldValueRender(SPLFieldValueRender): details: PlatformDetails = splunk_query_details @render_manager.register -class SplunkQueryRender(SplQueryRender): +class SplunkQueryRender(SPLQueryRender): details: PlatformDetails = splunk_query_details mappings: SplunkMappings = splunk_query_mappings platform_functions: SplunkFunctions = None