Skip to content

Commit 5907f4c

Browse files
authoredJan 27, 2025··
Merge pull request #159 from openstates/import-no-dupe-checks-in-import
Stop checking for duplicates in imports
2 parents 7504d87 + e7168a0 commit 5907f4c

File tree

4 files changed

+24
-10
lines changed

4 files changed

+24
-10
lines changed
 

‎CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## 6.20.14 - Jan 27, 2025
4+
5+
* Allow duplicate items to be imported in import via new runtime flag --allow_duplicates
6+
37
## 6.20.13 - Dec 27, 2024
48

59
* Sanitize phone number for US people scrape.

‎openstates/cli/update.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -264,11 +264,11 @@ def do_import(juris: State, args: argparse.Namespace) -> dict[str, typing.Any]:
264264
logger.info("import jurisdictions...")
265265
report.update(juris_importer.import_directory(datadir))
266266
logger.info("import bills...")
267-
report.update(bill_importer.import_directory(datadir))
267+
report.update(bill_importer.import_directory(datadir, allow_duplicates=args.allow_duplicates))
268268
logger.info("import vote events...")
269-
report.update(vote_event_importer.import_directory(datadir))
269+
report.update(vote_event_importer.import_directory(datadir, allow_duplicates=args.allow_duplicates))
270270
logger.info("import events...")
271-
report.update(event_importer.import_directory(datadir))
271+
report.update(event_importer.import_directory(datadir, allow_duplicates=args.allow_duplicates))
272272
DatabaseJurisdiction.objects.filter(id=juris.jurisdiction_id).update(
273273
latest_bill_update=datetime.datetime.utcnow()
274274
)
@@ -520,6 +520,12 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]:
520520
dest="strict",
521521
help="skip validation on save",
522522
)
523+
parser.add_argument(
524+
"--allow_duplicates",
525+
action="store_true",
526+
dest="allow_duplicates",
527+
help="Skip throwing a DuplicateItemError, instead all import of duplicate items",
528+
)
523529
parser.add_argument(
524530
"--fastmode", action="store_true", help="use cache and turn off throttling"
525531
)

‎openstates/importers/base.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ def resolve_json_id(
273273
except KeyError:
274274
raise UnresolvedIdError("cannot resolve id: {}".format(json_id))
275275

276-
def import_directory(self, datadir: str) -> typing.Dict[str, typing.Dict]:
276+
def import_directory(self, datadir: str, allow_duplicates=False) -> typing.Dict[str, typing.Dict]:
277277
"""import a JSON directory into the database"""
278278

279279
def json_stream() -> typing.Iterator[_JsonDict]:
@@ -282,7 +282,7 @@ def json_stream() -> typing.Iterator[_JsonDict]:
282282
with open(fname) as f:
283283
yield json.load(f)
284284

285-
return self.import_data(json_stream())
285+
return self.import_data(json_stream(), allow_duplicates)
286286

287287
def _prepare_imports(
288288
self, dicts: typing.Iterable[_JsonDict]
@@ -309,7 +309,7 @@ def _prepare_imports(
309309
self.duplicates[json_id] = seen_hashes[objhash]
310310

311311
def import_data(
312-
self, data_items: typing.Iterable[_JsonDict]
312+
self, data_items: typing.Iterable[_JsonDict], allow_duplicates=False
313313
) -> typing.Dict[str, typing.Dict]:
314314
"""import a bunch of dicts together"""
315315
# keep counts of all actions
@@ -322,7 +322,7 @@ def import_data(
322322
}
323323

324324
for json_id, data in self._prepare_imports(data_items):
325-
obj_id, what = self.import_item(data)
325+
obj_id, what = self.import_item(data, allow_duplicates)
326326
if not obj_id or not what:
327327
"Skipping data because it did not have an associated ID or type"
328328
continue
@@ -341,7 +341,7 @@ def import_data(
341341

342342
return {self._type: record}
343343

344-
def import_item(self, data: _JsonDict) -> typing.Tuple[_ID, str]:
344+
def import_item(self, data: _JsonDict, allow_duplicates=False) -> typing.Tuple[_ID, str]:
345345
"""function used by import_data"""
346346
what = "noop"
347347

@@ -369,8 +369,12 @@ def import_item(self, data: _JsonDict) -> typing.Tuple[_ID, str]:
369369

370370
# obj existed, check if we need to do an update
371371
if obj:
372-
if obj.id in self.json_to_db_id.values():
372+
# If --allow_duplicates flag is set on client CLI command
373+
# then we ignore duplicates instead of raising an exception
374+
if not allow_duplicates and obj.id in self.json_to_db_id.values():
373375
raise DuplicateItemError(data, obj, related.get("sources", []))
376+
elif allow_duplicates and obj.id in self.json_to_db_id.values():
377+
self.logger.warning(f"Ignored a DuplicateItemError for {obj.id}")
374378
# check base object for changes
375379
for key, value in data.items():
376380
if getattr(obj, key) != value:

‎pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "openstates"
3-
version = "6.20.13"
3+
version = "6.20.14"
44
description = "core infrastructure for the openstates project"
55
authors = ["James Turk <dev@jamesturk.net>"]
66
license = "MIT"

0 commit comments

Comments
 (0)
Please sign in to comment.