Skip to content

Commit

Permalink
fix: Scheduler not compatible with BaseDupeFilter (#294)
Browse files Browse the repository at this point in the history
* fix: Scheduler not compatible with BaseDupeFilter


Co-authored-by: R Max Espinoza <[email protected]>
  • Loading branch information
HairlessVillager and rmax committed Jul 6, 2024
1 parent ea646cb commit 50b8c6f
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion src/scrapy_redis/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(
flush_on_start=False,
queue_key=defaults.SCHEDULER_QUEUE_KEY,
queue_cls=defaults.SCHEDULER_QUEUE_CLASS,
dupefilter=None,
dupefilter_key=defaults.SCHEDULER_DUPEFILTER_KEY,
dupefilter_cls=defaults.SCHEDULER_DUPEFILTER_CLASS,
idle_before_close=0,
Expand All @@ -56,6 +57,8 @@ def __init__(
Requests queue key.
queue_cls : str
Importable path to the queue class.
dupefilter: Dupefilter
Custom dupefilter instance.
dupefilter_key : str
Duplicates filter key.
dupefilter_cls : str
Expand All @@ -72,6 +75,7 @@ def __init__(
self.flush_on_start = flush_on_start
self.queue_key = queue_key
self.queue_cls = queue_cls
self.df = dupefilter
self.dupefilter_cls = dupefilter_cls
self.dupefilter_key = dupefilter_key
self.idle_before_close = idle_before_close
Expand Down Expand Up @@ -105,6 +109,10 @@ def from_settings(cls, settings):
if val:
kwargs[name] = val

dupefilter_cls = load_object(kwargs["dupefilter_cls"])
if not hasattr(dupefilter_cls, "from_spider"):
kwargs["dupefilter"] = dupefilter_cls.from_settings(settings)

# Support serializer as a path to a module.
if isinstance(kwargs.get("serializer"), str):
kwargs["serializer"] = importlib.import_module(kwargs["serializer"])
Expand Down Expand Up @@ -137,7 +145,8 @@ def open(self, spider):
f"Failed to instantiate queue class '{self.queue_cls}': {e}"
)

self.df = load_object(self.dupefilter_cls).from_spider(spider)
if not self.df:
self.df = load_object(self.dupefilter_cls).from_spider(spider)

if self.flush_on_start:
self.flush()
Expand Down

0 comments on commit 50b8c6f

Please sign in to comment.