Skip to content

Commit c868b92

Browse files
authored
Merge pull request #208 from UncoderIO/field-tokens-separation
separate field tokens
2 parents 7ec3852 + bd9db47 commit c868b92

File tree

17 files changed

+99
-44
lines changed

17 files changed

+99
-44
lines changed

uncoder-core/app/translator/core/functions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,10 @@ def order_to_render(self) -> dict[str, int]:
164164

165165
return {}
166166

167+
@property
168+
def supported_render_names(self) -> set[str]:
169+
return set(self._renders_map)
170+
167171

168172
class PlatformFunctions:
169173
dir_path: str = None

uncoder-core/app/translator/core/mapping.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -188,13 +188,22 @@ def get_source_mapping(self, source_id: str) -> Optional[SourceMapping]:
188188
def default_mapping(self) -> SourceMapping:
189189
return self._source_mappings[DEFAULT_MAPPING_NAME]
190190

191-
def check_fields_mapping_existence(self, field_tokens: list[Field], source_mapping: SourceMapping) -> list[str]:
191+
def check_fields_mapping_existence(
192+
self,
193+
query_field_tokens: list[Field],
194+
function_field_tokens_map: dict[str, list[Field]],
195+
supported_func_render_names: set[str],
196+
source_mapping: SourceMapping,
197+
) -> list[str]:
192198
unmapped = []
193-
for field in field_tokens:
194-
generic_field_name = field.get_generic_field_name(source_mapping.source_id)
195-
mapped_field = source_mapping.fields_mapping.get_platform_field_name(generic_field_name=generic_field_name)
196-
if not mapped_field and field.source_name not in unmapped:
197-
unmapped.append(field.source_name)
199+
200+
for field in query_field_tokens:
201+
self._check_field_mapping_existence(field, source_mapping, unmapped)
202+
203+
for func_name, function_field_tokens in function_field_tokens_map.items():
204+
if func_name in supported_func_render_names:
205+
for field in function_field_tokens:
206+
self._check_field_mapping_existence(field, source_mapping, unmapped)
198207

199208
if self.is_strict_mapping and unmapped:
200209
raise StrictPlatformException(
@@ -203,6 +212,13 @@ def check_fields_mapping_existence(self, field_tokens: list[Field], source_mappi
203212

204213
return unmapped
205214

215+
@staticmethod
216+
def _check_field_mapping_existence(field: Field, source_mapping: SourceMapping, unmapped: list[str]) -> None:
217+
generic_field_name = field.get_generic_field_name(source_mapping.source_id)
218+
mapped_field = source_mapping.fields_mapping.get_platform_field_name(generic_field_name=generic_field_name)
219+
if not mapped_field and field.source_name not in unmapped:
220+
unmapped.append(field.source_name)
221+
206222
@staticmethod
207223
def map_field(field: Field, source_mapping: SourceMapping) -> list[str]:
208224
generic_field_name = field.get_generic_field_name(source_mapping.source_id)

uncoder-core/app/translator/core/models/query_container.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ def __init__(
6565
date: Optional[str] = None,
6666
output_table_fields: Optional[list[Field]] = None,
6767
query_fields: Optional[list[Field]] = None,
68+
function_fields: Optional[list[Field]] = None,
69+
function_fields_map: Optional[dict[str, list[Field]]] = None,
6870
license_: Optional[str] = None,
6971
severity: Optional[str] = None,
7072
references: Optional[list[str]] = None,
@@ -90,6 +92,8 @@ def __init__(
9092
self.date = date or datetime.now().date().strftime("%Y-%m-%d")
9193
self.output_table_fields = output_table_fields or []
9294
self.query_fields = query_fields or []
95+
self.function_fields = function_fields or []
96+
self.function_fields_map = function_fields_map or {}
9397
self.license = license_ or "DRL 1.1"
9498
self.severity = severity or SeverityType.low
9599
self.references = references or []

uncoder-core/app/translator/core/parser.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,19 @@ def get_query_tokens(self, query: str) -> list[QUERY_TOKEN_TYPE]:
6565
@staticmethod
6666
def get_field_tokens(
6767
query_tokens: list[QUERY_TOKEN_TYPE], functions: Optional[list[Function]] = None
68-
) -> list[Field]:
69-
field_tokens = []
68+
) -> tuple[list[Field], list[Field], dict[str, list[Field]]]:
69+
query_field_tokens = []
70+
function_field_tokens = []
71+
function_field_tokens_map = {}
7072
for token in query_tokens:
7173
if isinstance(token, (FieldField, FieldValue, FunctionValue)):
72-
field_tokens.extend(token.fields)
74+
query_field_tokens.extend(token.fields)
7375

74-
if functions:
75-
field_tokens.extend([field for func in functions for field in func.fields])
76+
for func in functions or []:
77+
function_field_tokens.extend(func.fields)
78+
function_field_tokens_map[func.name] = func.fields
7679

77-
return field_tokens
80+
return query_field_tokens, function_field_tokens, function_field_tokens_map
7881

7982
def get_source_mappings(
8083
self, field_tokens: list[Field], log_sources: dict[str, list[Union[int, str]]]

uncoder-core/app/translator/core/render.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -428,14 +428,18 @@ def _generate_from_tokenized_query_container_by_source_mapping(
428428
self, query_container: TokenizedQueryContainer, source_mapping: SourceMapping
429429
) -> str:
430430
unmapped_fields = self.mappings.check_fields_mapping_existence(
431-
query_container.meta_info.query_fields, source_mapping
431+
query_container.meta_info.query_fields,
432+
query_container.meta_info.function_fields_map,
433+
self.platform_functions.manager.supported_render_names,
434+
source_mapping,
432435
)
433436
rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping)
434437
prefix = self.generate_prefix(source_mapping.log_source_signature, rendered_functions.rendered_prefix)
435438

436439
if source_mapping.raw_log_fields:
437440
defined_raw_log_fields = self.generate_raw_log_fields(
438-
fields=query_container.meta_info.query_fields, source_mapping=source_mapping
441+
fields=query_container.meta_info.query_fields + query_container.meta_info.function_fields,
442+
source_mapping=source_mapping,
439443
)
440444
prefix += f"\n{defined_raw_log_fields}"
441445
query = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping)

uncoder-core/app/translator/platforms/base/aql/parsers/aql.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,13 @@ def _parse_query(self, text: str) -> tuple[str, dict[str, Union[list[str], list[
115115
def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
116116
query, log_sources, functions = self._parse_query(raw_query_container.query)
117117
query_tokens = self.get_query_tokens(query)
118-
field_tokens = self.get_field_tokens(query_tokens, functions.functions)
119-
source_mappings = self.get_source_mappings(field_tokens, log_sources)
118+
query_field_tokens, function_field_tokens, function_field_tokens_map = self.get_field_tokens(
119+
query_tokens, functions.functions
120+
)
121+
source_mappings = self.get_source_mappings(query_field_tokens + function_field_tokens, log_sources)
120122
meta_info = raw_query_container.meta_info
121-
meta_info.query_fields = field_tokens
123+
meta_info.query_fields = query_field_tokens
124+
meta_info.function_fields = function_field_tokens
125+
meta_info.function_fields_map = function_field_tokens_map
122126
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
123127
return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info, functions=functions)

uncoder-core/app/translator/platforms/base/lucene/parsers/lucene.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]]]:
4848
def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
4949
query, log_sources = self._parse_query(raw_query_container.query)
5050
query_tokens = self.get_query_tokens(query)
51-
field_tokens = self.get_field_tokens(query_tokens)
52-
source_mappings = self.get_source_mappings(field_tokens, log_sources)
51+
query_field_tokens, _, _ = self.get_field_tokens(query_tokens)
52+
source_mappings = self.get_source_mappings(query_field_tokens, log_sources)
5353
meta_info = raw_query_container.meta_info
54-
meta_info.query_fields = field_tokens
54+
meta_info.query_fields = query_field_tokens
5555
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
5656
return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info)

uncoder-core/app/translator/platforms/base/spl/functions/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ def parse(self, query: str) -> tuple[str, ParsedFunctions]:
2626
functions = query.split(self.function_delimiter)
2727
result_query = self.prepare_query(functions[0])
2828
for func in functions[1:]:
29-
split_func = func.strip().split(" ")
29+
func = func.strip()
30+
split_func = func.split(" ")
3031
func_name, func_body = split_func[0], " ".join(split_func[1:])
3132
try:
3233
func_parser = self.manager.get_hof_parser(func_name)

uncoder-core/app/translator/platforms/base/spl/parsers/spl.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
class SplQueryParser(PlatformQueryParser):
3131
log_source_pattern = r"^___source_type___\s*=\s*(?:\"(?P<d_q_value>[%a-zA-Z_*:0-9\-/]+)\"|(?P<value>[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?" # noqa: E501
32-
rule_name_pattern = r"`(?P<name>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s])*)`"
32+
rule_name_pattern = r"`(?P<name>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s])*)`" # noqa: RUF001
3333
log_source_key_types = ("index", "source", "sourcetype", "sourcecategory")
3434

3535
platform_functions: SplFunctions = None
@@ -56,7 +56,7 @@ def _parse_log_sources(self, query: str) -> tuple[dict[str, list[str]], str]:
5656
def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]], ParsedFunctions]:
5757
if re.match(self.rule_name_pattern, query):
5858
search = re.search(self.rule_name_pattern, query, flags=re.IGNORECASE)
59-
query = query[:search.start()] + query[search.end():]
59+
query = query[: search.start()] + query[search.end() :]
6060
query = query.strip()
6161
log_sources, query = self._parse_log_sources(query)
6262
query, functions = self.platform_functions.parse(query)
@@ -72,9 +72,13 @@ def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContain
7272

7373
query, log_sources, functions = self._parse_query(raw_query_container.query)
7474
query_tokens = self.get_query_tokens(query)
75-
field_tokens = self.get_field_tokens(query_tokens, functions.functions)
76-
source_mappings = self.get_source_mappings(field_tokens, log_sources)
75+
query_field_tokens, function_field_tokens, function_field_tokens_map = self.get_field_tokens(
76+
query_tokens, functions.functions
77+
)
78+
source_mappings = self.get_source_mappings(query_field_tokens + function_field_tokens, log_sources)
7779
meta_info = raw_query_container.meta_info
78-
meta_info.query_fields = field_tokens
80+
meta_info.query_fields = query_field_tokens
81+
meta_info.function_fields = function_field_tokens
82+
meta_info.function_fields_map = function_field_tokens_map
7983
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
8084
return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info, functions=functions)

uncoder-core/app/translator/platforms/base/sql/parsers/sql.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]]]:
4343
def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
4444
query, log_sources = self._parse_query(raw_query_container.query)
4545
query_tokens = self.get_query_tokens(query)
46-
field_tokens = self.get_field_tokens(query_tokens)
47-
source_mappings = self.get_source_mappings(field_tokens, log_sources)
46+
query_field_tokens, _, _ = self.get_field_tokens(query_tokens)
47+
source_mappings = self.get_source_mappings(query_field_tokens, log_sources)
4848
meta_info = raw_query_container.meta_info
49-
meta_info.query_fields = field_tokens
49+
meta_info.query_fields = query_field_tokens
5050
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
5151
return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info)

0 commit comments

Comments
 (0)