Skip to content

Commit 9e62837

Browse files
committed
added priority
1 parent a7af589 commit 9e62837

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

archive_query_log/cli/parsers.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -399,13 +399,13 @@ def warc_direct_answers() -> None:
399399
type=Choice(CHOICES_WARC_DIRECT_ANSWERS_PARSER_TYPE), required=True)
400400
@option("--xpath", type=str)
401401
@option("--url-xpath", type=str)
402-
@option("--title-xpath", type=str)
403402
@option("--text-xpath", type=str)
404403
@pass_config
405404
def warc_direct_answers_add(
406405
config: Config,
407406
provider_id: str | None,
408407
url_pattern_regex: str | None,
408+
priority: float | None,
409409
parser_type: str,
410410
xpath: str | None,
411411
url_xpath: str | None,
@@ -425,6 +425,7 @@ def warc_direct_answers_add(
425425
config=config,
426426
provider_id=provider_id,
427427
url_pattern_regex=url_pattern_regex,
428+
priority=priority,
428429
parser_type=parser_type_strict,
429430
xpath=xpath,
430431
url_xpath=url_xpath,

archive_query_log/imports/yaml.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ def import_warc_direct_answers_parsers(config: Config, services_path: Path) -> N
500500
continue
501501

502502
results_parsers = service["results_parsers"]
503+
num_results_parsers = len(results_parsers)
503504

504505
providers = (
505506
Provider.search(using=config.es.client)
@@ -508,7 +509,7 @@ def import_warc_direct_answers_parsers(config: Config, services_path: Path) -> N
508509
)
509510
providers = safe_iter_scan(providers)
510511
for provider in providers:
511-
for results_parser in enumerate(results_parsers):
512+
for k, results_parser in enumerate(results_parsers):
512513
if results_parser["type"] != "html_selector":
513514
continue
514515
results_selector = results_parser["results_selector"]
@@ -546,6 +547,7 @@ def import_warc_direct_answers_parsers(config: Config, services_path: Path) -> N
546547
config=config,
547548
provider_id=provider.meta.id,
548549
url_pattern_regex=results_parser.get("url_pattern"),
550+
priority=num_results_parsers - k,
549551
parser_type="xpath",
550552
xpath=results_xpath,
551553
url_xpath=url_xpath,

0 commit comments

Comments
 (0)