Skip to content

Commit 5bd0d8a

Browse files
committed
passed the test
1 parent de9b12e commit 5bd0d8a

File tree

3 files changed

+33
-1
lines changed

3 files changed

+33
-1
lines changed

pii_recognizer/data/config.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
input_file,output_file
2+
data/pii.txt,data/pii_out.txt
3+
data/letter.txt,data/letter_out.txt

pii_recognizer/pii_recognizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pathlib
1919
import tempfile
2020
import warnings
21+
import pandas as pd
2122
from collections.abc import Iterable
2223
from multiprocessing import Pool, cpu_count
2324
from typing import Any, Dict, List, Optional, Set, Tuple, Union
@@ -1037,7 +1038,6 @@ def recognize_pii_parallel(
10371038
)
10381039
for _, row in config_df.iterrows()
10391040
]
1040-
10411041
# Create a pool of processes and distribute the tasks
10421042
with Pool(processes=num_processes) as pool:
10431043
pool.starmap(recognize_pii_one_file, tasks)

pii_recognizer/test_pii_recognizer.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
_get_analyzer_engine,
2323
_anonymize,
2424
_annotate,
25+
recognize_pii_parallel
2526
)
2627

2728

@@ -211,3 +212,31 @@ def test_only_entities(fake_data):
211212
analyzer = _get_analyzer_engine(entities=list(ENTITIES.keys())[:5])
212213
res, results = _process(text, analyzer, score_threshold=0.5)
213214
assert any(entity in res for entity in ENTITIES.keys())
215+
216+
217+
218+
def test_parallel():
219+
ENTITIES = {
220+
"LOCATION": "location",
221+
"PERSON": "name",
222+
"ORGANIZATION": "organization",
223+
"MAC_ADDRESS": "mac_address",
224+
"US_BANK_NUMBER": "us_bank_number",
225+
"IMEI": "imei",
226+
"TITLE": "title",
227+
"LICENSE_PLATE": "license_plate",
228+
"US_PASSPORT": "us_passport",
229+
"CURRENCY": "currency",
230+
"ROUTING_NUMBER": "routing_number",
231+
"US_ITIN": "us_itin",
232+
"US_BANK_NUMBER": "us_bank_number",
233+
"AGE": "age",
234+
"CREDIT_CARD": "credit_card",
235+
"SSN": "ssn",
236+
"PHONE": "phone",
237+
"EMAIL": "email",
238+
"PASSWORD": "password",
239+
"SWIFT_CODE": "swift_code",
240+
}
241+
recognize_pii_parallel(config_input_output = "data/config.csv", score_threshold = 0.5, entities = list(ENTITIES.keys()), model = "whole")
242+

0 commit comments

Comments
 (0)