Skip to content

Commit 4853af6

Browse files
authored
3 ant 1 (#9)
* #3 - V4 code snippets * #3 - Updates * #3 - Updates
1 parent 56cc344 commit 4853af6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+369642
-65
lines changed

.pylintrc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
[pylint]
22
disable=
33
line-too-long,
4-
good-names=
5-
code-snippets-v4
4+
missing-function-docstring,
5+
missing-module-docstring,
6+
protected-access
67
ignore=
78
__init__.py,
89
notes=

.vscode/settings.json

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"python.testing.unittestEnabled": false,
3+
"python.testing.pytestEnabled": true,
4+
"terminal.integrated.env.linux": {
5+
"PYTHONPATH": ".:/home/ant/Work/Senzing/git/sz-sdk-python/src:/home/ant/Work/Senzing/git/sz-sdk-python/src/senzing:/home/ant/Work/Senzing/git/sz-sdk-python-core/src:/home/ant/Work/Senzing/git/sz-sdk-python-core/src/senzing",
6+
// "PYTHONPATH": ".:/home/ant/Work/Senzing/git/sz-sdk-python-core/src:/home/ant/Work/Senzing/git/sz-sdk-python-core/src/senzing:./sz_tools",
7+
"LD_LIBRARY_PATH": "/opt/senzing/er/lib/",
8+
// "SENZING_ENGINE_CONFIGURATION_JSON": "{\"PIPELINE\":{\"CONFIGPATH\":\"/etc/opt/senzing\",\"RESOURCEPATH\":\"/opt/senzing/er/resources\",\"SUPPORTPATH\":\"/opt/senzing/data\"},\"SQL\":{\"CONNECTION\":\"sqlite3://na:na@/tmp/sqlite/G2C.db\"}}"
9+
"SENZING_ENGINE_CONFIGURATION_JSON": "{\"PIPELINE\":{\"CONFIGPATH\":\"/etc/opt/senzing\",\"RESOURCEPATH\":\"/opt/senzing/er/resources\",\"SUPPORTPATH\":\"/opt/senzing/data\"},\"SQL\":{\"CONNECTION\":\"sqlite3://na:na@/tmp/sqlite/G2C.db\"}, \"LOGGING\":{\"CONFIG\":\"console://stdout/?style=jsonl *.TRCE;*.CRIT;*.ERR\"}}"
10+
// "SENZING_ENGINE_CONFIGURATION_JSON": "{\"PIPELINE\":{\"CONFIGPATH\":\"/etc/opt/senzing\",\"LICENSESTRINGBASE64\": \"AQAAADgCAAAAAAAAU2VuemluZyBJbnRlcm5hbAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAU2VuemluZyBJbnRlcm5hbAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADIwMjQtMDUtMDIAAAAAAAAAAAAARVZBTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFNUQU5EQVJEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC4mH8AAEBCDwAAAAAAMjAyNS0wNS0wMgAAAAAAAAAAAABZRUFSTFkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFXwDOdVd1TL+0dJRXnE9ykJJyJYnGhUN1QqoS8ASfNaDioankisRviWuB3I5uQ20EEh9tjNzzOszGf1+khWl5cb+XqE+GoMMW0rrSi6ScZmgrfh2oHrRpEbnfb4uejMrl3XGdTPdHUGNSkTKDgEQrlimVt04W5gsFVcBHBiUbKoZCghI+qaYGocsNZLh1yWOklu8Fh02CWkXXQZSKvq/PsXpkHtbsxPbActcMmZRYPZNiRXq0BK3ChyCRM0zbl4mZCPBfNL9zAx6v2HLUmDp4lNEVIyS86T9/enSrsK1udnJq09jnP8gBzY6kBxpoYyxr5o2u1VX3DC9ySHiwtio6NQMo0ckGultNqYpSBejXm10YCYH6eCsnnC5z49Gp+2NYIRcgRz/N93uLd7PrkLyLreayF8HCQOg7CBZeUGcFsufdf0304eJHCsoRy1w2dUT8N2auYJxuzjwzAMvZIYrYamjiG6Mc4Wdcpuktlcht+pjhqk9vwqQI0AzjMq2oXDGYL6KlFcOAojAIZu8bl30pZGGkq2n9NFuuO4gMiRjIwYkBpwHNmBq3QT21owPb4urlidmQelmXtzk9+BNMZL34bUK7R509Rt3GTmjb2c5TDqyIatGfnBsh3658ce8ohnBJ/ZmUgJifcorgLDawDqr8spClKfwLtcwzbkNPDKHJ/e\",\"RESOURCEPATH\":\"/opt/senzing/g2/resources\",\"SUPPORTPATH\":\"/opt/senzing/data\"},\"SQL\":{\"CONNECTION\":\"sqlite3://na:na@/tmp/sqlite/G2C.db\"}}"
11+
},
12+
"python.testing.pytestArgs": [],
13+
"python.autoComplete.extraPaths": [
14+
"/home/ant/Work/Senzing/git/sz-sdk-python-core/src",
15+
"/home/ant/Work/Senzing/git/sz-sdk-python-core/src/senzing",
16+
"/home/ant/Work/Senzing/git/sz-sdk-python/src",
17+
"/home/ant/Work/Senzing/git/sz-sdk-python/src/senzing"
18+
],
19+
"python.analysis.extraPaths": [
20+
"/home/ant/Work/Senzing/git/sz-sdk-python-core/src",
21+
"/home/ant/Work/Senzing/git/sz-sdk-python-core/src/senzing",
22+
"/home/ant/Work/Senzing/git/sz-sdk-python/src",
23+
"/home/ant/Work/Senzing/git/sz-sdk-python/src/senzing"
24+
],
25+
"pylint.importStrategy": "useBundled",
26+
}

CHANGELOG.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,9 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog], [markdownlint],
66
and this project adheres to [Semantic Versioning].
77

8-
## [1.0.0] - 2024-11-28
8+
### Added to 0.0.1
99

10-
### Added to 1.0.0
11-
12-
- Initial
10+
- Initial for V4
1311

1412
[Keep a Changelog]: https://keepachangelog.com/en/1.0.0/
1513
[markdownlint]: https://dlaa.me/markdownlint/

README.md

Lines changed: 13 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ To find the Senzing API V3 version of this repository, visit [code-snippets-v3].
99

1010
## Overview
1111

12-
Succinct examples of how you might use the Senzing APIs for operational tasks.
12+
Succinct examples of how you might use the Senzing SDK for operational tasks.
1313

1414
## Contents
1515

@@ -63,15 +63,15 @@ The JSON configuration string is set via the environment variable `SENZING_ENGIN
6363

6464
## Senzing APIs Bare Metal Usage
6565

66-
You may already have installed the Senzing APIs and created a Senzing project by following the [Quickstart Guide]. If not, and you would like to install the Senzing APIs directly on a machine, follow the steps in the [Quickstart Guide]. Be sure to review the API [Quickstart Roadmap], especially the [System Requirements].
66+
You may already have installed the Senzing and created a Senzing project by following the [Quickstart Guide]. If not, and you would like to install Senzing directly on a machine, follow the steps in the [Quickstart Guide]. Be sure to review the [Quickstart Roadmap], especially the [System Requirements].
6767

6868
### Configuration
6969

7070
When using a bare metal install, the initialization parameters used by the Senzing Python utilities are maintained within `<project_path>/etc/G2Module.ini`.
7171

7272
🤔To convert an existing Senzing project G2Module.ini file to a JSON string use one of the following methods:
7373

74-
- [G2ModuleIniToJson.py]
74+
- [g2_module_ini_to_json.py]
7575

7676
- Modify the path to your projects G2Module.ini file.
7777

@@ -87,12 +87,6 @@ When using a bare metal install, the initialization parameters used by the Senzi
8787
python3 -c $'import configparser; ini_file_name = "<project_path>/etc/G2Module.ini";engine_config_json = {};cfgp = configparser.ConfigParser();cfgp.optionxform = str;cfgp.read(ini_file_name)\nfor section in cfgp.sections(): engine_config_json[section] = dict(cfgp.items(section))\nprint(engine_config_json)'
8888
```
8989

90-
- [SenzingGo.py]
91-
92-
- ```console
93-
<project_path>/python/SenzingGo.py --iniToJson
94-
```
95-
9690
:pencil2: `<project_path>` in the above example should point to your project.
9791

9892
### Usage
@@ -120,57 +114,17 @@ The included Dockerfile leverages the [Senzing API runtime] image to provide an
120114

121115
### Configuration for Docker usage
122116

123-
When used with a container, the JSON configuration is relative to the paths within the container. The JSON configuration should look like:
124-
125-
```json
126-
{
127-
"PIPELINE": {
128-
"CONFIGPATH": "/etc/opt/senzing",
129-
"RESOURCEPATH": "/opt/senzing/g2/resources",
130-
"SUPPORTPATH": "/opt/senzing/data"
131-
},
132-
"SQL": {
133-
"CONNECTION": "postgresql://senzing:password@myhost:5432:g2"
134-
}
135-
}
136-
```
137-
138-
✏️You only need to modify the `CONNECTION` string to point to your Senzing database.
117+
Coming soon...
139118

140-
### Usage for Dccker usage
141-
142-
1. Clone this repository
143-
1. Export the engine configuration environment variable
144-
145-
```console
146-
export SENZING_ENGINE_CONFIGURATION_JSON='{"PIPELINE": {"CONFIGPATH": "/etc/opt/senzing", "RESOURCEPATH": "/opt/senzing/g2/resources", "SUPPORTPATH": "/opt/senzing/data"}, "SQL": {"CONNECTION": "postgresql://user:password@host:5432:g2"}}'
147-
```
148-
149-
1. Build the Docker image
150-
151-
```console
152-
cd <repository_dir>
153-
docker build --tag senzing/code-snippets-v4 .
154-
```
155-
156-
1. Run a container
157-
158-
```console
159-
docker run \
160-
--env SENZING_ENGINE_CONFIGURATION_JSON \
161-
--interactive \
162-
--tty \
163-
--rm \
164-
senzing/code-snippets-v4
165-
```
119+
### Usage for Docker
166120

167-
✏️You only need to modify the `CONNECTION` string to point to your Senzing database.
121+
Coming soon...
168122

169123
## Items of Note
170124

171-
### With Info
125+
### SZ_WITH_INFO flag
172126

173-
A feature of Senzing is the capability to pass changes from data manipulation API calls to downstream systems for analysis, consolidation and replication. Any API that can change the outcome of entity resolution have a "WithInfo" version of the API. For example, addRecord and addRecordWithInfo. The "WithInfo" version of the API returns a response message detailing any entities that were affected by the API. In the following example (from addRecordWithInfo) a single entity with the ID 7903 was affected.
127+
A feature of Senzing is the capability to pass changes from data manipulation SDK calls to downstream systems for analysis, consolidation and replication. SDK methods `add_record()`, `delete_record()` and `process_redo_record()` accept a `flags=` argument that when set to SzEngineFlags.SZ_WITH_INFO will return a response message detailing any entities affected by the method. In the following example (from `add_record("TEST", "10945", flags=SzEngineFlags.SZ_WITH_INFO)` a single entity with the ID 7903 was affected.
174128

175129
```json
176130
{
@@ -190,7 +144,7 @@ The AFFECTED_ENTITIES object contains a list of all entity IDs affected. Separat
190144

191145
### Parallel Processing
192146

193-
Many of the example tasks demonstrate concurrent execution with threads. The entity resolution process involves IO operations, the use of concurrent processes and threads when calling the Senzing APIs provides scalability and performance. If using multiple processes, each process should have its own instance of a Senzing engine, for example G2Engine. Each engine object can support multiple threads.
147+
Many of the example tasks demonstrate concurrent execution with threads. The entity resolution process involves IO operations, the use of concurrent processes and threads when calling the Senzing APIs provides scalability and performance.
194148

195149
### Scalability
196150

@@ -212,19 +166,19 @@ To run the same example again and see representative performance, first [purge]
212166

213167
### Input Load File Sizes
214168

215-
There are different sized load files within the [Data] path that can be used to decrease or increase the volume of data loaded depending on the specification of your hardware. The files are named loadx.json, where the x specifies the number of records in the file.
169+
There are different sized load files within the [data] path that can be used to increase the volume of data loaded depending on the specification of your hardware. Note, Senzing V4 comes with a default license that allows up to 500 source records to be loaded, without a larger license you will not be able to load these larger files.
216170

217171
[code-snippets-v3]: https://github.com/Senzing/code-snippets-v3
218172
[Configuration]: #configuration
219-
[Data]: Resources/Data/
173+
[data]: resources/data/
220174
[Docker Usage]: #docker-usage
221-
[G2ModuleIniToJson.py]: Python/Tasks/Initialization/
175+
[g2_module_ini_to_json.py]: python/initialization/g2_module_ini_to_json.py
222176
[Input Load File Sizes]: #input-load-file-sizes
223177
[Items of Note]: #items-of-note
224178
[jc]: https://github.com/kellyjonbrazil/jc
225179
[Legend]: #legend
226180
[Parallel Processing]: #parallel-processing
227-
[purge]: Python/Tasks/Initialization/PurgeRepository.py
181+
[purge]: python/initialization/purge_repository.py
228182
[Purging Senzing Repository Between Examples]: #purging-senzing-repository-between-examples
229183
[Quickstart Guide]: https://senzing.zendesk.com/hc/en-us/articles/115002408867-Quickstart-Guide
230184
[Quickstart Roadmap]: https://senzing.zendesk.com/hc/en-us/articles/115001579954-API-Quickstart-Roadmap

pyproject.toml

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
[build-system]
2+
requires = [
3+
"setuptools>=42",
4+
"wheel"
5+
]
6+
build-backend = "setuptools.build_meta"
7+
8+
[dependency-groups]
9+
test = ["pytest", "coverage"]
10+
11+
[tool.bandit]
12+
skips = ["B101"]
13+
14+
[tool.black]
15+
line-length = 120
16+
17+
[tool.flake8]
18+
extend-ignore = ["E203", "E501", "E704", "W503"]
19+
max-line-length = 120
20+
21+
[tool.isort]
22+
profile = "black"
23+
src_paths = ["examples", "src", "tests"]
24+
25+
[[tool.mypy.overrides]]
26+
module = "senzing_abstract.szengineflags.*"
27+
ignore_missing_imports = "true"
28+
warn_unused_ignores = "false"
29+
30+
[[tool.mypy.overrides]]
31+
module = "pytest_schema.*"
32+
ignore_missing_imports = "true"
33+
34+
[tool.pylint]
35+
ignored-argument-names = "args|kwargs"
36+
disable = [
37+
"broad-except",
38+
"consider-using-f-string",
39+
"line-too-long",
40+
"missing-function-docstring",
41+
"missing-module-docstring",
42+
"protected-access",
43+
"too-many-branches",
44+
"too-many-locals",
45+
]
46+
good-names = [
47+
"template-python"
48+
]
49+
ignore = [
50+
"__init__.py",
51+
"docs/source/conf.py"
52+
]
53+
notes = [
54+
"FIXME"
55+
]

python/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Python Snippets

python/configuration/README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Deleting Data
2+
The deletion snippets outline deleting previously added source records. Deleting source records removes the previously added source record from the system, completes the entity resolution process and persists outcomes in the Senzing repository.
3+
4+
Deleting a record only requires the data source code and record ID for the record to be deleted.
5+
6+
## Snippets
7+
* **DeleteFutures.py**
8+
* Read and delete source records from a file using multiple threads
9+
* **DeleteLoop.py**
10+
* Basic read and delete source records from a file
11+
* **DeleteWithInfoFutures.py**
12+
* Read and delete source records from a file using multiple threads
13+
* Collect the response from the [with info](../../../README.md#with-info) version of the API and write it to a file
14+
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#! /usr/bin/env python3
2+
3+
import os
4+
import sys
5+
from pathlib import Path
6+
7+
from senzing_core import SzAbstractFactory, SzError
8+
9+
ENGINE_CONFIG_JSON = os.getenv("SENZING_ENGINE_CONFIGURATION_JSON", "{}")
10+
INSTANCE_NAME = Path(__file__).stem
11+
12+
13+
try:
14+
sz_factory = SzAbstractFactory("add_records", ENGINE_CONFIG_JSON, verbose_logging=False)
15+
sz_config = sz_factory.create_config()
16+
sz_configmanager = sz_factory.create_configmanager()
17+
18+
config_id = sz_configmanager.get_default_config_id()
19+
config_definition = sz_configmanager.get_config(config_id)
20+
config_handle = sz_config.import_config(config_definition)
21+
22+
for data_source in ("CUSTOMERS", "REFERENCE", "WATCHLIST"):
23+
response = sz_config.add_data_source(config_handle, data_source)
24+
25+
config_definition = sz_config.export_config(config_handle)
26+
config_id = sz_configmanager.add_config(config_definition, INSTANCE_NAME)
27+
sz_configmanager.set_default_config_id(config_id)
28+
29+
response2 = sz_config.get_data_sources(config_handle)
30+
sz_config.close_config(config_handle)
31+
print(response2)
32+
except SzError as err:
33+
print(f"{err.__class__.__name__} - {err}", file=sys.stderr)

python/deleting/README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Deleting Data
2+
3+
The deletion snippets outline deleting previously added source records. Deleting source records removes the previously added source record from the system, completes the entity resolution process and persists outcomes in the Senzing repository.
4+
5+
Deleting a record only requires the data source code and record ID for the record to be deleted.
6+
7+
## Snippets
8+
9+
- **delete_futures.py**
10+
- Read and delete source records from a file using multiple threads
11+
- **delete_loop.py**
12+
- Basic read and delete source records from a file
13+
- **delete_with_info_futures.py**
14+
- Read and delete source records from a file using multiple threads
15+
- Collect the response using the [SZ_WITH_INFO flag](../../README.md#with-info) on the `delete_record()` method and write it to a file

python/deleting/delete_futures.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#! /usr/bin/env python3
2+
3+
import concurrent.futures
4+
import itertools
5+
import json
6+
import os
7+
import sys
8+
from pathlib import Path
9+
10+
from senzing_core import (
11+
SzAbstractFactory,
12+
SzBadInputError,
13+
SzError,
14+
SzRetryableError,
15+
SzUnrecoverableError,
16+
)
17+
18+
ENGINE_CONFIG_JSON = os.getenv("SENZING_ENGINE_CONFIGURATION_JSON", "{}")
19+
INPUT_FILE = Path("../../resources/data/del-500.jsonl").resolve()
20+
INSTANCE_NAME = Path(__file__).stem
21+
22+
23+
def mock_logger(level, error, error_record=None):
24+
print(f"\n{level}: {error.__class__.__name__} - {error}", file=sys.stderr)
25+
if error_record:
26+
print(f"{error_record}", file=sys.stderr)
27+
28+
29+
def delete_record(engine, record_to_delete):
30+
record_dict = json.loads(record_to_delete)
31+
data_source = record_dict.get("DATA_SOURCE", "")
32+
record_id = record_dict.get("RECORD_ID", "")
33+
engine.delete_record(data_source, record_id)
34+
35+
36+
def futures_del(engine, input_file):
37+
success_recs = 0
38+
error_recs = 0
39+
40+
with open(input_file, "r", encoding="utf-8") as in_file:
41+
with concurrent.futures.ThreadPoolExecutor() as executor:
42+
futures = {
43+
executor.submit(delete_record, engine, record): record
44+
for record in itertools.islice(in_file, executor._max_workers)
45+
}
46+
47+
while futures:
48+
done, _ = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
49+
for f in done:
50+
try:
51+
f.result()
52+
except (SzBadInputError, json.JSONDecodeError) as err:
53+
mock_logger("ERROR", err, futures[f])
54+
error_recs += 1
55+
except SzRetryableError as err:
56+
mock_logger("WARN", err, futures[f])
57+
error_recs += 1
58+
except (SzUnrecoverableError, SzError) as err:
59+
mock_logger("CRITICAL", err, futures[f])
60+
raise err
61+
else:
62+
record = in_file.readline()
63+
if record:
64+
futures[executor.submit(delete_record, engine, record)] = record
65+
66+
success_recs += 1
67+
if success_recs % 100 == 0:
68+
print(f"Processed {success_recs:,} adds, with {error_recs:,} errors", flush=True)
69+
finally:
70+
del futures[f]
71+
72+
print(f"\nSuccessfully deleted {success_recs:,} records, with" f" {error_recs:,} errors")
73+
74+
75+
try:
76+
sz_factory = SzAbstractFactory(INSTANCE_NAME, ENGINE_CONFIG_JSON, verbose_logging=False)
77+
sz_engine = sz_factory.create_engine()
78+
futures_del(sz_engine, INPUT_FILE)
79+
except SzError as err:
80+
mock_logger("CRITICAL", err)

0 commit comments

Comments
 (0)