diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..35b644d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +## Describe the bug + + +## How To Reproduce + + +## Expected behavior + + +## Additional context + diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..15e8923 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,24 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +## Problem + + +## Requested feature + + +### Alternatives considered + +- + +## Additional context +- diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..dfae554 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,7 @@ +Closes + +## Background + +## Design (high-level) + +## Other notes diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..a00f29f --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,17 @@ +name: tests +on: [pull_request] +jobs: + run-tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install uv + uses: yezz123/setup-uv@v4 + - name: Install dependencies + run: uv sync --dev + - name: Run tests + run: uv run pytest diff --git a/.gitignore b/.gitignore index 4d318ca..22b6bab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,3 @@ -# Generated by Cargo -# will have compiled files and executables -debug/ -target/ - -# These are backup files generated by rustfmt -**/*.rs.bk - -# MSVC Windows builds of rustc generate these, which store debugging information -*.pdb - -# RustRover -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - # MacOS Stuff .DS_Store @@ -40,3 +22,78 @@ temp.txt # SpecStory explanation file .specstory/.what-is-this.md + + +/target +.hypothesis/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +# lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index 88b3fe1..0000000 --- a/Cargo.lock +++ /dev/null @@ -1,201 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "autocfg" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chidian" -version = "0.1.0" -dependencies = [ - "chidian-core", - "pyo3", -] - -[[package]] -name = "chidian-core" -version = "0.1.0" -dependencies = [ - "nom", -] - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "indoc" -version = "2.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" - -[[package]] -name = "libc" -version = "0.2.172" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" - -[[package]] -name = "memchr" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" - -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "portable-atomic" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" - -[[package]] -name = "proc-macro2" -version = "1.0.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "pyo3" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219" -dependencies = [ - "cfg-if", - "indoc", - "libc", - "memoffset", - "once_cell", - "portable-atomic", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", - "unindent", -] - -[[package]] -name = "pyo3-build-config" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999" -dependencies = [ - "once_cell", - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-macros" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a" -dependencies = [ - "heck", - "proc-macro2", - "pyo3-build-config", - "quote", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "syn" -version = "2.0.101" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "target-lexicon" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" - -[[package]] -name = "unicode-ident" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" - -[[package]] -name = "unindent" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" diff --git a/Cargo.toml b/Cargo.toml deleted file mode 100644 index a8aed1c..0000000 --- a/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[workspace] -members = [ - "chidian-core", - "chidian-py" -] -resolver = "2" - -# Workspace-level settings -[workspace.dependencies] -nom = "7.1" - -# Optional: shared workspace metadata -[workspace.package] -version = "0.1.0" -edition = "2024" diff --git a/README.md b/README.md index 86862fb..0b2c564 100644 --- a/README.md +++ b/README.md @@ -2,20 +2,17 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -> Declarative, type‑safe data mapping for humans. Backed by Rust speed and the Pydantic ecosystem. +> Declarative, type-safe data mapping for humans. -chidian is a cross-language framework for composable, readable, and sharable data mappings built on top of Pydantic. - -> [!NOTE] -> chidian is pre-release -- v0.1 will be on PyPI soon! Feel free to build the code locally (using [maturin](https://github.com/PyO3/maturin)) if you want to try it out now. +chidian is a pure Python framework for composable, readable, and sharable data mappings built on top of **Pydantic v2**. ## 30-second tour ```python from pydantic import BaseModel -from chidian import DataMapping, Piper, template +from chidian import Mapper, DataMapping import chidian.partials as p -# 🎙️ 1. Describe your schemas +# 1️⃣ Define your source & target schemas class Source(BaseModel): name: dict address: dict @@ -24,55 +21,52 @@ class Target(BaseModel): full_name: str address: str -# 🔎 2. Define mapping logic with helpful partial functions +# 2️⃣ Write pure dict→dict transformation logic with `Mapper` fmt = p.template("{} {} {}", skip_none=True) -person_mapping = DataMapping( - Source, - Target, - mapping=lambda src: { + +person_mapper = Mapper( + lambda src: { "full_name": fmt( p.get("name.first")(src), - p.get("name.given[*]") >> p.join(" ")(src), + p.get("name.given[*]") | p.join(" ")(src), p.get("name.suffix")(src), ), - "address": p.get("address") >> p.flatten_paths( - ["street[0]", "street[1]", "city", "postal_code", "country"], - delimiter="\n" + "address": p.get("address") | p.flatten_paths( + [ + "street[0]", + "street[1]", + "city", + "postal_code", + "country", + ], + delimiter="\n", )(src), } ) -# 🌱 3. Create runtime and execute transformation -piper = Piper(person_mapping) -target_record = piper(source_data) - -# For bidirectional mappings, use simple path mappings: -bidirectional_mapping = DataMapping( - Source, - Target, - mapping={ - "name.first": "full_name", - "address": "address" - }, - bidirectional=True, +# 3️⃣ Wrap it with `DataMapping` for schema validation +person_mapping = DataMapping( + mapper=person_mapper, + input_schema=Source, + output_schema=Target, ) -# ⏪ Reverse transform (B → A) – zero extra code! -bidirectional_piper = Piper(bidirectional_mapping) -target, spillover = bidirectional_piper(source_data) -source_roundtrip = bidirectional_piper.reverse(target, spillover) +# 4️⃣ Execute! +source_obj = Source.model_validate(source_data) +result = person_mapping.forward(source_obj) +print(result) ``` -See the [tests](/chidian-py/tests) for some use-cases. +See the [tests](/chidian/tests) for some use-cases. ## Feature highlights | Feature | In one line | | ---------------- | ---------------------------------------------------------------------------- | -| **Piper** | Runtime engine for executing DataMapping transformations between Pydantic models. | -| **DataMapping** | Unidirectional or bidirectional mappings between Pydantic models with callable logic support. | -| **Partials API** | `>>` operator chains (`split >> last >> upper`) keep lambdas away. | -| **RecordSet** | Lightweight collection class: `select`, `filter`, `to_json`, arrow export. | +| **Mapper** | Pure dict→dict runtime transformations – no schema required. | +| **DataMapping** | Adds Pydantic validation around a `Mapper` for safe, forward-only transforms. | +| **Partials API** | `|` operator chains (`split | last | upper`) keep lambdas away. | +| **DictGroup** | Lightweight collection class: `select`, `filter`, `to_json`, arrow export. | | **Lexicon** | Bidirectional code look‑ups *(LOINC ↔ SNOMED)* with defaults + metadata. | @@ -81,7 +75,7 @@ See the [tests](/chidian-py/tests) for some use-cases. chidian treats **Pydantic v2 models as first‑class citizens**: * Validate inputs & outputs automatically with Pydantic v2 -* `Piper` attaches models to your pipeline for IDE completion & mypy. +* `DataMapping` wraps your `Mapper` for IDE completion & mypy. * You can drop down to plain dicts when prototyping with `strict=False`. diff --git a/chidian-core/Cargo.lock b/chidian-core/Cargo.lock deleted file mode 100644 index 084c19c..0000000 --- a/chidian-core/Cargo.lock +++ /dev/null @@ -1,32 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "chidian-core" -version = "0.1.0" -dependencies = [ - "nom", -] - -[[package]] -name = "memchr" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] diff --git a/chidian-core/Cargo.toml b/chidian-core/Cargo.toml deleted file mode 100644 index 9e2cac3..0000000 --- a/chidian-core/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "chidian-core" -version.workspace = true -edition.workspace = true - -[dependencies] -nom.workspace = true diff --git a/chidian-core/src/lib.rs b/chidian-core/src/lib.rs deleted file mode 100644 index dba3808..0000000 --- a/chidian-core/src/lib.rs +++ /dev/null @@ -1,6 +0,0 @@ -pub mod parser; -pub mod types; - -// Re-export main types for easy access -pub use parser::{Path, PathSegment, parse_path}; -pub use types::*; diff --git a/chidian-core/src/parser.rs b/chidian-core/src/parser.rs deleted file mode 100644 index 931845c..0000000 --- a/chidian-core/src/parser.rs +++ /dev/null @@ -1,208 +0,0 @@ -use nom::{ - IResult, - branch::alt, - bytes::complete::{tag, take_while1}, - character::complete::{char, digit1}, - combinator::{map, opt, recognize}, - multi::separated_list1, - sequence::{delimited, tuple}, -}; -use std::str::FromStr; - -#[derive(Debug, Clone, PartialEq)] -pub enum PathSegment { - Key(String), - Index(i32), - Slice(Option, Option), - Wildcard, - Tuple(Vec), -} - -#[derive(Debug, Clone, PartialEq)] -pub struct Path { - pub segments: Vec, -} - -// Parser for valid identifier characters -fn is_identifier_char(c: char) -> bool { - c.is_alphanumeric() || c == '_' || c == '-' -} - -// Parse a key name (alphanumeric + underscore + hyphen) -fn parse_key(input: &str) -> IResult<&str, PathSegment> { - map(take_while1(is_identifier_char), |s: &str| { - PathSegment::Key(s.to_string()) - })(input) -} - -// Parse a signed integer -fn parse_integer(input: &str) -> IResult<&str, i32> { - map(recognize(tuple((opt(char('-')), digit1))), |s: &str| { - i32::from_str(s).unwrap() - })(input) -} - -// Parse array index like [0] or [-1] -fn parse_index(input: &str) -> IResult<&str, PathSegment> { - map( - delimited(char('['), parse_integer, char(']')), - PathSegment::Index, - )(input) -} - -// Parse wildcard [*] -fn parse_wildcard(input: &str) -> IResult<&str, PathSegment> { - map(tag("[*]"), |_| PathSegment::Wildcard)(input) -} - -// Parse slice like [1:3] or [:3] or [1:] -fn parse_slice(input: &str) -> IResult<&str, PathSegment> { - delimited( - char('['), - map( - tuple((opt(parse_integer), char(':'), opt(parse_integer))), - |(start, _, end)| PathSegment::Slice(start, end), - ), - char(']'), - )(input) -} - -// Parse whitespace -fn ws(input: &str) -> IResult<&str, &str> { - use nom::error::Error; - take_while1::<_, _, Error<&str>>(|c: char| c.is_whitespace())(input).or(Ok((input, ""))) -} - -// Parse key followed by optional brackets -fn parse_key_with_brackets(input: &str) -> IResult<&str, Vec> { - let (input, key) = parse_key(input)?; - let mut segments = vec![key]; - - // Parse any following brackets - let mut remaining = input; - loop { - if let Ok((new_remaining, bracket)) = - alt((parse_wildcard, parse_slice, parse_index))(remaining) - { - segments.push(bracket); - remaining = new_remaining; - } else { - break; - } - } - - Ok((remaining, segments)) -} - -// Parse a complete path (handles paths starting with brackets) -pub fn parse_path(input: &str) -> IResult<&str, Path> { - // Check if path starts with a bracket - if input.starts_with('[') { - let (remaining, first_segments) = parse_path_segment_or_key_with_brackets(input)?; - - if remaining.is_empty() { - return Ok(( - remaining, - Path { - segments: first_segments, - }, - )); - } - - // If there's more path after the initial bracket - if remaining.starts_with('.') { - let (remaining, _) = char('.')(remaining)?; - let (remaining, rest) = - separated_list1(char('.'), parse_path_segment_or_key_with_brackets)(remaining)?; - - let mut all_segments = first_segments; - for segment_group in rest { - all_segments.extend(segment_group); - } - - return Ok(( - remaining, - Path { - segments: all_segments, - }, - )); - } - - Ok(( - remaining, - Path { - segments: first_segments, - }, - )) - } else { - // Normal path parsing - map( - separated_list1(char('.'), parse_path_segment_or_key_with_brackets), - |segment_groups| Path { - segments: segment_groups.into_iter().flatten().collect(), - }, - )(input) - } -} - -// Parse a single path (for use in tuples) -fn parse_single_path(input: &str) -> IResult<&str, Path> { - parse_path(input) -} - -// Parse tuple like (id,name) or (id,inner.msg) with optional whitespace -fn parse_tuple(input: &str) -> IResult<&str, PathSegment> { - map( - delimited( - tuple((char('('), ws)), - separated_list1( - tuple((ws, char(','), ws)), - delimited(ws, parse_single_path, ws), - ), - tuple((ws, char(')'))), - ), - PathSegment::Tuple, - )(input) -} - -// Parse any path segment or key with brackets -fn parse_path_segment_or_key_with_brackets(input: &str) -> IResult<&str, Vec> { - alt(( - map(parse_wildcard, |seg| vec![seg]), - map(parse_slice, |seg| vec![seg]), - map(parse_index, |seg| vec![seg]), - map(parse_tuple, |seg| vec![seg]), - parse_key_with_brackets, - ))(input) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_simple_path() { - let (_, path) = parse_path("patient.name").unwrap(); - assert_eq!(path.segments.len(), 2); - assert_eq!(path.segments[0], PathSegment::Key("patient".to_string())); - assert_eq!(path.segments[1], PathSegment::Key("name".to_string())); - } - - #[test] - fn test_parse_path_with_index() { - let (_, path) = parse_path("items[0].name").unwrap(); - assert_eq!(path.segments.len(), 3); - assert_eq!(path.segments[0], PathSegment::Key("items".to_string())); - assert_eq!(path.segments[1], PathSegment::Index(0)); - assert_eq!(path.segments[2], PathSegment::Key("name".to_string())); - } - - #[test] - fn test_parse_wildcard() { - let (_, path) = parse_path("items[*].id").unwrap(); - assert_eq!(path.segments.len(), 3); - assert_eq!(path.segments[0], PathSegment::Key("items".to_string())); - assert_eq!(path.segments[1], PathSegment::Wildcard); - assert_eq!(path.segments[2], PathSegment::Key("id".to_string())); - } -} diff --git a/chidian-core/src/types.rs b/chidian-core/src/types.rs deleted file mode 100644 index 9e0f474..0000000 --- a/chidian-core/src/types.rs +++ /dev/null @@ -1,28 +0,0 @@ -use std::fmt; - -/// Error type for traversal operations -#[derive(Debug, Clone)] -pub enum TraversalError { - KeyNotFound(String), - IndexOutOfRange(i32), - TypeMismatch(String), - InvalidPath(String), - Custom(String), -} - -impl fmt::Display for TraversalError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - TraversalError::KeyNotFound(key) => write!(f, "Key '{}' not found", key), - TraversalError::IndexOutOfRange(idx) => write!(f, "Index {} out of range", idx), - TraversalError::TypeMismatch(msg) => write!(f, "Type mismatch: {}", msg), - TraversalError::InvalidPath(path) => write!(f, "Invalid path: {}", path), - TraversalError::Custom(msg) => write!(f, "{}", msg), - } - } -} - -impl std::error::Error for TraversalError {} - -/// Result type for traversal operations -pub type TraversalResult = Result; diff --git a/chidian-py/.github/workflows/CI.yml b/chidian-py/.github/workflows/CI.yml deleted file mode 100644 index e437415..0000000 --- a/chidian-py/.github/workflows/CI.yml +++ /dev/null @@ -1,181 +0,0 @@ -# This file is autogenerated by maturin v1.8.6 -# To update, run -# -# maturin generate-ci github -# -name: CI - -on: - push: - branches: - - main - - master - tags: - - '*' - pull_request: - workflow_dispatch: - -permissions: - contents: read - -jobs: - linux: - runs-on: ${{ matrix.platform.runner }} - strategy: - matrix: - platform: - - runner: ubuntu-22.04 - target: x86_64 - - runner: ubuntu-22.04 - target: x86 - - runner: ubuntu-22.04 - target: aarch64 - - runner: ubuntu-22.04 - target: armv7 - - runner: ubuntu-22.04 - target: s390x - - runner: ubuntu-22.04 - target: ppc64le - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: 3.x - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter - sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} - manylinux: auto - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: wheels-linux-${{ matrix.platform.target }} - path: dist - - musllinux: - runs-on: ${{ matrix.platform.runner }} - strategy: - matrix: - platform: - - runner: ubuntu-22.04 - target: x86_64 - - runner: ubuntu-22.04 - target: x86 - - runner: ubuntu-22.04 - target: aarch64 - - runner: ubuntu-22.04 - target: armv7 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: 3.x - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter - sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} - manylinux: musllinux_1_2 - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: wheels-musllinux-${{ matrix.platform.target }} - path: dist - - windows: - runs-on: ${{ matrix.platform.runner }} - strategy: - matrix: - platform: - - runner: windows-latest - target: x64 - - runner: windows-latest - target: x86 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: 3.x - architecture: ${{ matrix.platform.target }} - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter - sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: wheels-windows-${{ matrix.platform.target }} - path: dist - - macos: - runs-on: ${{ matrix.platform.runner }} - strategy: - matrix: - platform: - - runner: macos-13 - target: x86_64 - - runner: macos-14 - target: aarch64 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: 3.x - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter - sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: wheels-macos-${{ matrix.platform.target }} - path: dist - - sdist: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Build sdist - uses: PyO3/maturin-action@v1 - with: - command: sdist - args: --out dist - - name: Upload sdist - uses: actions/upload-artifact@v4 - with: - name: wheels-sdist - path: dist - - release: - name: Release - runs-on: ubuntu-latest - if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }} - needs: [linux, musllinux, windows, macos, sdist] - permissions: - # Use to sign the release artifacts - id-token: write - # Used to upload release artifacts - contents: write - # Used to generate artifact attestation - attestations: write - steps: - - uses: actions/download-artifact@v4 - - name: Generate artifact attestation - uses: actions/attest-build-provenance@v2 - with: - subject-path: 'wheels-*/*' - - name: Publish to PyPI - if: ${{ startsWith(github.ref, 'refs/tags/') }} - uses: PyO3/maturin-action@v1 - env: - MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} - with: - command: upload - args: --non-interactive --skip-existing wheels-*/* diff --git a/chidian-py/.gitignore b/chidian-py/.gitignore deleted file mode 100644 index 44649fb..0000000 --- a/chidian-py/.gitignore +++ /dev/null @@ -1,73 +0,0 @@ -/target -.hypothesis/ - -# Byte-compiled / optimized / DLL files -__pycache__/ -.pytest_cache/ -*.py[cod] - -# C extensions -*.so - -# Distribution / packaging -.Python -.venv/ -env/ -bin/ -build/ -develop-eggs/ -dist/ -eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -include/ -man/ -venv/ -*.egg-info/ -.installed.cfg -*.egg - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt -pip-selfcheck.json - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.cache -nosetests.xml -coverage.xml - -# Translations -*.mo - -# Mr Developer -.mr.developer.cfg -.project -.pydevproject - -# Rope -.ropeproject - -# Django stuff: -*.log -*.pot - -.DS_Store - -# Sphinx documentation -docs/_build/ - -# PyCharm -.idea/ - -# VSCode -.vscode/ - -# Pyenv -.python-version diff --git a/chidian-py/Cargo.lock b/chidian-py/Cargo.lock deleted file mode 100644 index 88b3fe1..0000000 --- a/chidian-py/Cargo.lock +++ /dev/null @@ -1,201 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "autocfg" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chidian" -version = "0.1.0" -dependencies = [ - "chidian-core", - "pyo3", -] - -[[package]] -name = "chidian-core" -version = "0.1.0" -dependencies = [ - "nom", -] - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "indoc" -version = "2.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" - -[[package]] -name = "libc" -version = "0.2.172" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" - -[[package]] -name = "memchr" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" - -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "portable-atomic" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" - -[[package]] -name = "proc-macro2" -version = "1.0.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "pyo3" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219" -dependencies = [ - "cfg-if", - "indoc", - "libc", - "memoffset", - "once_cell", - "portable-atomic", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", - "unindent", -] - -[[package]] -name = "pyo3-build-config" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999" -dependencies = [ - "once_cell", - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-macros" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a" -dependencies = [ - "heck", - "proc-macro2", - "pyo3-build-config", - "quote", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "syn" -version = "2.0.101" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "target-lexicon" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" - -[[package]] -name = "unicode-ident" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" - -[[package]] -name = "unindent" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" diff --git a/chidian-py/Cargo.toml b/chidian-py/Cargo.toml deleted file mode 100644 index c8ec5aa..0000000 --- a/chidian-py/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] -name = "chidian" -version.workspace = true -edition.workspace = true - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[lib] -name = "chidian_rs" -crate-type = ["cdylib"] - -[dependencies] -pyo3 = "0.24.0" -chidian-core = { path = "../chidian-core" } diff --git a/chidian-py/chidian/__init__.py b/chidian-py/chidian/__init__.py deleted file mode 100644 index 18bd9e5..0000000 --- a/chidian-py/chidian/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from .chidian_rs import get -from .data_mapping import DataMapping -from .lib import put -from .partials import ChainableFn, FunctionChain -from .piper import Piper -from .recordset import RecordSet -from .seeds import DROP, KEEP - -__all__ = [ - "get", - "put", - "RecordSet", - "Piper", - "DataMapping", - "DROP", - "KEEP", - "FunctionChain", - "ChainableFn", -] diff --git a/chidian-py/chidian/data_mapping.py b/chidian-py/chidian/data_mapping.py deleted file mode 100644 index 43b0cfd..0000000 --- a/chidian-py/chidian/data_mapping.py +++ /dev/null @@ -1,440 +0,0 @@ -""" -Unified data mapping interface that supports both unidirectional (View) and bidirectional (Lens) transformations. -""" - -from typing import Any, Callable, Optional, Tuple, Type, TypeVar - -from pydantic import BaseModel - -from .chidian_rs import get -from .lib import put -from .recordset import RecordSet - -# Type variables for generic models -SourceT = TypeVar("SourceT", bound=BaseModel) -TargetT = TypeVar("TargetT", bound=BaseModel) - - -class DataMapping: - """ - A unified data mapping interface for transformations between Pydantic models. - - Supports two modes: - - View (unidirectional): Complex mappings with functions, conditions, etc. - - Lens (bidirectional): Simple string-to-string path mappings that can be reversed - """ - - def __init__( - self, - source_model: Type[SourceT], - target_model: Type[TargetT], - mapping: dict[str, str] | dict[str, str | Callable] | Callable[[dict], dict], - bidirectional: bool = False, - strict: bool = True, - metadata: Optional[dict] = None, - ): - """ - Initialize a data mapping between Pydantic models. - - Args: - source_model: Source Pydantic BaseModel class - target_model: Target Pydantic BaseModel class - mapping: For bidirectional mode: dict of string-to-string path mappings. - For unidirectional mode: dict of field mappings (strings or callables) - or a callable that transforms source dict to target dict - bidirectional: If True, enables bidirectional mode with reversible mappings - strict: If True, validate against models and fail on errors - metadata: Optional metadata about the mapping - """ - self._validate_pydantic_models(source_model, target_model) - - self.source_model = source_model - self.target_model = target_model - self.mapping = mapping - self.bidirectional = bidirectional - self.strict = strict - self.metadata = metadata or {} - - if self.bidirectional: - self._setup_bidirectional_mapping(mapping, strict) - else: - self._setup_unidirectional_mapping(mapping) - - def forward(self, source: SourceT | dict) -> TargetT | Tuple[TargetT, RecordSet]: - """ - Transform source model to target model. - - Args: - source: Instance of source_model or dict - - Returns: - - Unidirectional mode: Instance of target_model - - Bidirectional mode: Tuple of (target_model, spillover RecordSet) - """ - validated_source: SourceT = self._validate_and_convert_source(source) - source_dict = self._convert_to_dict(validated_source) - - if self.bidirectional: - return self._forward_bidirectional(source_dict) - else: - return self._forward_unidirectional(source_dict) - - def reverse(self, target: TargetT, spillover: Optional[RecordSet] = None) -> Any: - """ - Reverse transformation (target to source). Only available in bidirectional mode. - - Args: - target: Instance of target_model - spillover: Optional spillover data from forward transformation - - Returns: - Instance of source_model - """ - if not self.bidirectional: - raise RuntimeError("reverse() is only available in bidirectional mode") - - if not self.is_reversible(): - raise ValueError( - "This mapping cannot reverse - mappings are not bidirectional" - ) - - # Convert target to dict - target_dict = target.model_dump() - - # Apply reverse mappings - source_data: dict[str, Any] = {} - for target_path, source_path in self._reverse_mappings.items(): - value = get(target_dict, target_path) - if value is not None: - source_data = put(source_data, source_path, value, strict=False) - - # Merge spillover if provided - if spillover and len(spillover) > 0: - spillover_data = spillover._items[0] - source_data = self._merge_dicts(source_data, spillover_data) - - # Create source model - return self.source_model.model_validate(source_data) - - def is_reversible(self) -> bool: - """Check if this mapping can be reversed (bidirectional mode only).""" - if not self.bidirectional: - return False - - # Check for duplicate target paths (many-to-one mappings) - # Type guard to ensure mapping is dict for bidirectional mode - if not isinstance(self.mapping, dict): - return False - target_paths = list(self.mapping.values()) - return len(target_paths) == len(set(target_paths)) - - def can_reverse(self) -> bool: - """Alias for is_reversible() for backward compatibility.""" - return self.is_reversible() - - def _validate_pydantic_models(self, source_model: Type, target_model: Type) -> None: - """Validate that models are Pydantic v2 BaseModels.""" - if not hasattr(source_model, "model_fields"): - raise TypeError( - f"source_model must be a Pydantic v2 BaseModel, got {type(source_model)}" - ) - if not hasattr(target_model, "model_fields"): - raise TypeError( - f"target_model must be a Pydantic v2 BaseModel, got {type(target_model)}" - ) - - def _setup_bidirectional_mapping(self, mapping: Any, strict: bool) -> None: - """Setup and validate bidirectional mapping configuration.""" - self._validate_bidirectional_mapping_type(mapping) - self._validate_bidirectional_mapping_paths(mapping) - self._reverse_mappings = {v: k for k, v in mapping.items()} - - if strict: - self._validate_mapping_reversibility(mapping) - - def _validate_bidirectional_mapping_type(self, mapping: Any) -> None: - """Validate that bidirectional mapping is a dictionary.""" - if not isinstance(mapping, dict): - raise TypeError( - "Bidirectional mappings must be dict of string-to-string paths" - ) - - def _validate_bidirectional_mapping_paths(self, mapping: dict) -> None: - """Validate that all bidirectional mappings are string-to-string.""" - for source_path, target_path in mapping.items(): - if not isinstance(source_path, str) or not isinstance(target_path, str): - raise TypeError( - "Bidirectional mappings must be string-to-string paths. " - f"Got {type(source_path).__name__} -> {type(target_path).__name__}" - ) - - def _validate_mapping_reversibility(self, mapping: dict) -> None: - """Validate that mapping can be reversed without conflicts.""" - if not self.is_reversible(): - duplicates = [ - v for v in mapping.values() if list(mapping.values()).count(v) > 1 - ] - raise ValueError( - f"Mapping is not reversible - duplicate target paths: {duplicates}" - ) - - def _setup_unidirectional_mapping(self, mapping: Any) -> None: - """Setup and validate unidirectional mapping configuration.""" - if callable(mapping): - self.mapping_fn = mapping - elif isinstance(mapping, dict): - self._validate_unidirectional_dict_mapping(mapping) - else: - raise TypeError("Mapping must be callable or dict for unidirectional mode") - - def _validate_unidirectional_dict_mapping(self, mapping: dict) -> None: - """Validate unidirectional dictionary mapping in strict mode.""" - # Validate mapping value types - for target_field, mapping_spec in mapping.items(): - if not isinstance(target_field, str): - raise TypeError( - f"Target field names must be strings, got {type(target_field).__name__}" - ) - - if not (isinstance(mapping_spec, str) or callable(mapping_spec)): - raise TypeError( - f"Mapping values must be strings or callables. " - f"Got {type(mapping_spec).__name__} for field '{target_field}'" - ) - - if self.strict: - validation_issues = self._validate_mapping() - if validation_issues["missing_required_fields"]: - raise ValueError( - f"Missing required target fields in mapping: {validation_issues['missing_required_fields']}" - ) - - def _validate_and_convert_source(self, source: SourceT | dict) -> SourceT: - """Validate and convert source input to proper model instance.""" - if not isinstance(source, self.source_model): - if self.strict: - raise TypeError( - f"Expected {self.source_model.__name__}, got {type(source).__name__}" - ) - return self.source_model.model_validate(source) - return source - - def _convert_to_dict(self, source: SourceT) -> dict: - """Convert source model to dictionary for processing.""" - return source.model_dump() if hasattr(source, "model_dump") else source - - def _forward_bidirectional(self, source_dict: dict) -> Tuple[TargetT, RecordSet]: - """Handle forward transformation in bidirectional mode.""" - target_data, mapped_paths = self._apply_bidirectional_mappings(source_dict) - target = self.target_model.model_validate(target_data) - spillover = self._create_spillover(source_dict, mapped_paths) - return target, spillover - - def _apply_bidirectional_mappings(self, source_dict: dict) -> Tuple[dict, set[str]]: - """Apply bidirectional path mappings to source data.""" - target_data: dict[str, Any] = {} - mapped_paths = set() - - # Type guard to ensure mapping is a dict for bidirectional mode - if not isinstance(self.mapping, dict): - raise RuntimeError("Bidirectional mapping must be a dictionary") - - for source_path, target_path in self.mapping.items(): - if not isinstance(target_path, str): - raise RuntimeError( - "Bidirectional mappings must have string target paths" - ) - value = get(source_dict, source_path) - if value is not None: - target_data = put(target_data, target_path, value, strict=False) - mapped_paths.add(source_path) - - return target_data, mapped_paths - - def _create_spillover(self, source_dict: dict, mapped_paths: set[str]) -> RecordSet: - """Create spillover RecordSet from unmapped data.""" - spillover_data = self._collect_spillover(source_dict, mapped_paths) - return RecordSet([spillover_data]) if spillover_data else RecordSet() - - def _forward_unidirectional(self, source_dict: dict) -> Any: - """Handle forward transformation in unidirectional mode.""" - if hasattr(self, "mapping_fn"): - return self._apply_function_mapping(source_dict) - else: - return self._apply_dict_mapping(source_dict) - - def _apply_function_mapping(self, source_dict: dict) -> Any: - """Apply callable function mapping to source data.""" - try: - result = self.mapping_fn(source_dict) - return self.target_model.model_validate(result) - except Exception as e: - if self.strict: - raise ValueError(f"Error in mapping function: {e}") - return result - - def _apply_dict_mapping(self, source_dict: dict) -> Any: - """Apply dictionary field mappings to source data.""" - result = self._process_field_mappings(source_dict) - return self._validate_and_construct_target(result) - - def _process_field_mappings(self, source_dict: dict) -> dict: - """Process individual field mappings from the mapping dictionary.""" - result = {} - - # Type guard to ensure mapping is a dict for field processing - if not isinstance(self.mapping, dict): - raise RuntimeError("Field mapping processing requires a dictionary mapping") - - for target_field, mapping_spec in self.mapping.items(): - try: - result[target_field] = self._process_mapping(source_dict, mapping_spec) - except Exception as e: - if self.strict: - raise ValueError(f"Error mapping field '{target_field}': {e}") - return result - - def _validate_and_construct_target(self, result: dict) -> Any: - """Validate and construct target model from processed data.""" - try: - return self.target_model.model_validate(result) - except Exception as e: - if self.strict: - raise ValueError( - f"Failed to construct {self.target_model.__name__}: {e}" - ) - # In non-strict mode, we still need to return TargetT, so attempt validation anyway - return self.target_model.model_validate(result) - - # Core transformation helper methods - - def _process_mapping(self, source: dict, mapping_spec: Any) -> Any: - """Process a single mapping specification (unidirectional mode).""" - # String path - use get - if isinstance(mapping_spec, str): - return get(source, mapping_spec) - - # Callable (FunctionChain, ChainableFn, or other callables) - elif hasattr(mapping_spec, "__call__"): - # Check if it's a chainable function - if hasattr(mapping_spec, "func"): - return mapping_spec(source) - # Regular callable - else: - return mapping_spec(source) - - # Dict with conditional logic (legacy support) - elif isinstance(mapping_spec, dict): - if "source" in mapping_spec: - # Check condition if present - if "condition" in mapping_spec: - if not mapping_spec["condition"](source): - return None - - # Get the value - value = self._process_mapping(source, mapping_spec["source"]) - - # Apply transform if present - if "transform" in mapping_spec: - value = mapping_spec["transform"](value) - - return value - else: - # Nested mapping - return { - k: self._process_mapping(source, v) for k, v in mapping_spec.items() - } - - # SEED objects (they should have an evaluate method) - elif hasattr(mapping_spec, "evaluate"): - return mapping_spec.evaluate(source) - - # Direct value - else: - return mapping_spec - - def _collect_spillover(self, source_dict: dict, mapped_paths: set[str]) -> dict: - """Collect unmapped fields for spillover (bidirectional mode).""" - spillover: dict[str, Any] = {} - - def collect_unmapped(data: dict, path: str = "", target_dict=None): - if target_dict is None: - target_dict = spillover - - for key, value in data.items(): - current_path = f"{path}.{key}" if path else key - - # Check if this exact path was mapped - path_mapped = current_path in mapped_paths - - if not path_mapped: - if isinstance(value, dict): - # Add nested dict to spillover - target_dict[key] = {} - collect_unmapped(value, current_path, target_dict[key]) - # Remove empty dicts - if not target_dict[key]: - del target_dict[key] - else: - target_dict[key] = value - - collect_unmapped(source_dict) - return spillover - - def _merge_dicts(self, target: dict, source: dict) -> dict: - """Deep merge two dictionaries.""" - result = target.copy() - - for key, value in source.items(): - if ( - key in result - and isinstance(result[key], dict) - and isinstance(value, dict) - ): - result[key] = self._merge_dicts(result[key], value) - else: - result[key] = value - - return result - - def _validate_mapping(self) -> dict[str, list[str]]: - """Validate the mapping against source and target models (unidirectional mode with dict mapping).""" - issues: dict[str, list[str]] = { - "missing_required_fields": [], - "unknown_target_fields": [], - "invalid_source_fields": [], - } - - # Skip validation for callable mappings - if hasattr(self, "mapping_fn"): - return issues - - # Type guard for dict mapping - if not isinstance(self.mapping, dict): - return issues - - # Get target model fields - target_fields = self._get_model_fields(self.target_model) - - # Check for required fields - required_fields = { - name - for name, field_info in target_fields.items() - if self._is_field_required(field_info) - } - mapped_fields = set(self.mapping.keys()) - issues["missing_required_fields"] = list(required_fields - mapped_fields) - - # Check for unknown target fields - all_target_fields = set(target_fields.keys()) - issues["unknown_target_fields"] = list(mapped_fields - all_target_fields) - - return issues - - def _get_model_fields(self, model: type) -> dict: - """Get fields from Pydantic v2 model.""" - return getattr(model, "model_fields", {}) - - def _is_field_required(self, field_info) -> bool: - """Check if field is required in Pydantic v2.""" - return field_info.is_required() diff --git a/chidian-py/chidian/lexicon.py b/chidian-py/chidian/lexicon.py deleted file mode 100644 index 6e5ba79..0000000 --- a/chidian-py/chidian/lexicon.py +++ /dev/null @@ -1,101 +0,0 @@ -from typing import Any, Optional - -""" -Bidirectional string mapper for code/terminology translations. - -Primary use case: Medical code system mappings (e.g., LOINC ↔ SNOMED). -Supports both one-to-one and many-to-one relationships with automatic -reverse lookup generation. - -Examples: - Simple code mapping: - >>> loinc_to_snomed = Lexicon({'8480-6': '271649006'}) - >>> loinc_to_snomed['8480-6'] # Forward lookup - '271649006' - >>> loinc_to_snomed['271649006'] # Reverse lookup - '8480-6' - - Many-to-one mapping (first value is default): - >>> mapper = Lexicon({('LA6699-8', 'LA6700-4'): 'absent'}) - >>> mapper['absent'] # Returns first key as default - 'LA6699-8' -""" - - -class Lexicon(dict): - def __init__( - self, mappings: dict, default: Any = None, metadata: Optional[dict] = None - ): - """ - Initialize a bidirectional string mapper. - - Args: - mappings: Dict of string mappings. Keys can be strings or tuples (for many-to-one). - default: Default value to return for missing keys - metadata: Optional metadata about the mapping (version, source, etc.) - """ - super().__init__() - self._forward: dict[str, Any] = {} - self._reverse: dict[str, Any] = {} - self._default = default - self.metadata = metadata or {} - - # Build forward and reverse mappings - for key, value in mappings.items(): - if isinstance(key, tuple): - # Many-to-one mapping - for k in key: - self._forward[k] = value - # First element is default for reverse - if value not in self._reverse: - self._reverse[value] = key[0] - else: - # One-to-one mapping - self._forward[key] = value - if value not in self._reverse: - self._reverse[value] = key - - # Store in parent dict for dict-like access - self.update(self._forward) - - def forward(self, key: str) -> Optional[str]: - """Transform from source to target format.""" - return self._forward.get(key, self._default) - - def reverse(self, key: str) -> Optional[str]: - """Transform from target back to source format.""" - return self._reverse.get(key, self._default) - - def __getitem__(self, key: str) -> str: - """Support bidirectional lookup with dict syntax.""" - # Try forward first, then reverse - if key in self._forward: - return self._forward[key] - elif key in self._reverse: - return self._reverse[key] - else: - if self._default is not None: - return self._default - raise KeyError(f"Key '{key}' not found in forward or reverse mappings") - - def get(self, key: str, default: Any = None) -> Any: - """Safe bidirectional lookup with default.""" - if key in self._forward: - return self._forward[key] - elif key in self._reverse: - return self._reverse[key] - else: - # Use provided default if given, otherwise instance default - return default if default is not None else self._default - - def __contains__(self, key: object) -> bool: - """Check if key exists in either direction.""" - return key in self._forward or key in self._reverse - - def __len__(self) -> int: - """Return total number of unique mappings.""" - return len(self._forward) + len(self._reverse) - - def can_reverse(self) -> bool: - """Lexicon always supports reverse transformation.""" - return True diff --git a/chidian-py/chidian/lib.py b/chidian-py/chidian/lib.py deleted file mode 100644 index 810a9ef..0000000 --- a/chidian-py/chidian/lib.py +++ /dev/null @@ -1,345 +0,0 @@ -import re -from copy import deepcopy -from typing import Any - - -def put( - target: dict[str, Any], path: str, value: Any, strict: bool = False -) -> dict[str, Any]: - """ - Set a value at a specific path in a nested dictionary structure. - - This is the complement to the `get` function, allowing you to set values - using dot notation paths. - - Args: - target: The dictionary to modify (will not be mutated) - path: The path where to set the value (e.g., "patient.name.given") - value: The value to set at the path - strict: If True, raise errors when path cannot be created - - Returns: - A new dictionary with the value set at the specified path - - Examples: - >>> put({}, "patient.id", "123") - {'patient': {'id': '123'}} - - >>> put({}, "items[0].value", 42) - {'items': [{'value': 42}]} - - >>> put({"patient": {"name": "John"}}, "patient.id", "123") - {'patient': {'name': 'John', 'id': '123'}} - """ - # Create a deep copy to avoid mutating the original - result = deepcopy(target) - - # Parse the path into segments - segments = _parse_put_path(path) - - # Check if path starts with index - we don't support arrays at root - if segments and segments[0]["type"] == "index": - if strict: - raise ValueError( - "Cannot create array at root level - path must start with a key" - ) - else: - return result # Return unchanged - - # Navigate to the target location, creating structure as needed - current = result - - # Navigate to the target location, creating structure as needed - for i, segment in enumerate(segments[:-1]): - current = _navigate_path_segment(current, segment, segments, i, target, strict) - - # Set the value at the final segment - _set_final_value(current, segments[-1], value, strict) - - return result - - -def _parse_put_path(path: str) -> list[dict[str, str | int]]: - """ - Parse a path string into segments for the put operation. - - Returns a list of dicts with 'type' and 'value' keys. - Type can be 'key' or 'index'. - """ - segments = [] - remaining = path - - while remaining: - # Try to match a key at the start - key_match = re.match(r"^([a-zA-Z_][\w-]*)", remaining) - if key_match: - key = key_match.group(1) - segments.append({"type": "key", "value": key}) - remaining = remaining[len(key) :] - - # Check for any following brackets - while remaining and remaining[0] == "[": - bracket_match = re.match(r"^\[(-?\d+)\]", remaining) - if bracket_match: - idx = int(bracket_match.group(1)) - segments.append({"type": "index", "value": idx}) - remaining = remaining[bracket_match.end() :] - else: - break - - # Try to match a bracket at the start (for paths starting with [) - elif remaining and remaining[0] == "[": - bracket_match = re.match(r"^\[(-?\d+)\]", remaining) - if bracket_match: - idx = int(bracket_match.group(1)) - segments.append({"type": "index", "value": idx}) - remaining = remaining[bracket_match.end() :] - else: - raise ValueError(f"Invalid bracket syntax in path: '{path}'") - - # Skip dots - if remaining and remaining[0] == ".": - remaining = remaining[1:] - elif remaining: - # We have remaining content but can't parse it - raise ValueError(f"Invalid path syntax at: '{remaining}' in path: '{path}'") - - if not segments: - raise ValueError(f"Invalid path: '{path}'") - - return segments - - -def _navigate_path_segment( - current: Any, - segment: dict[str, str | int], - segments: list[dict[str, str | int]], - index: int, - target: dict[str, Any], - strict: bool, -) -> Any: - """Navigate through a single path segment, creating containers as needed.""" - if segment["type"] == "key": - return _navigate_key_segment(current, segment, segments, index, target, strict) - elif segment["type"] == "index": - return _navigate_index_segment( - current, segment, segments, index, target, strict - ) - else: - raise ValueError(f"Unknown segment type: {segment['type']}") - - -def _navigate_key_segment( - current: Any, - segment: dict[str, str | int], - segments: list[dict[str, str | int]], - index: int, - target: dict[str, Any], - strict: bool, -) -> Any: - """Navigate through a key segment in the path.""" - key = segment["value"] - - # Type guard: key should be str for dict access - if not isinstance(key, str): - if strict: - raise ValueError(f"Dictionary key must be string, got {type(key)}") - else: - return target - - # Ensure current is a dict - if not isinstance(current, dict): - if strict: - raise ValueError(f"Cannot traverse into non-dict at '{key}'") - else: - return target - - # Create or validate the key - _ensure_key_exists(current, key, segments, index, strict) - - return current[key] - - -def _navigate_index_segment( - current: Any, - segment: dict[str, str | int], - segments: list[dict[str, str | int]], - index: int, - target: dict[str, Any], - strict: bool, -) -> Any: - """Navigate through an index segment in the path.""" - idx = segment["value"] - - # Type guard: idx should be int for list access - if not isinstance(idx, int): - if strict: - raise ValueError(f"List index must be integer, got {type(idx)}") - else: - return target - - # Ensure current is a list - if not isinstance(current, list): - if strict: - raise ValueError("Cannot index into non-list") - else: - return target - - # Handle list expansion and negative indexing - expanded_idx = _expand_list_for_index(current, idx, strict) - if expanded_idx is None: # Error case in non-strict mode - return target - - # Create container at index if needed - _ensure_index_container(current, expanded_idx, segments, index, strict) - - return current[expanded_idx] - - -def _ensure_key_exists( - current: dict, - key: str, - segments: list[dict[str, str | int]], - index: int, - strict: bool, -) -> None: - """Ensure a key exists in the dictionary with the correct container type.""" - next_segment = segments[index + 1] - needs_list = next_segment["type"] == "index" - - if key not in current: - # Create new container of the correct type - current[key] = [] if needs_list else {} - elif not isinstance(current[key], (dict, list)): - # Replace non-container value - if strict: - raise ValueError(f"Cannot traverse into non-dict at '{key}'") - else: - current[key] = [] if needs_list else {} - elif isinstance(current[key], dict) and needs_list: - # Have dict but need list - if strict: - raise ValueError(f"Cannot index into dict at '{key}' - expected list") - else: - current[key] = [] - elif isinstance(current[key], list) and not needs_list: - # Have list but need dict - if strict: - raise ValueError(f"Cannot access key in list at '{key}' - expected dict") - else: - current[key] = {} - - -def _expand_list_for_index(current: list, idx: int, strict: bool) -> int | None: - """Expand list if necessary and handle negative indexing.""" - if idx >= 0: - # Positive index - expand list if needed - while len(current) <= idx: - current.append(None) - return idx - else: - # Negative indexing - only works on existing items - actual_idx = len(current) + idx - if actual_idx < 0: - if strict: - raise ValueError(f"Index {idx} out of range") - else: - return None # Signal error in non-strict mode - return actual_idx - - -def _ensure_index_container( - current: list, - idx: int, - segments: list[dict[str, str | int]], - index: int, - strict: bool, -) -> None: - """Ensure the container at the given index has the correct type.""" - if current[idx] is None: - # Determine what type of container we need - needs_list = _determine_next_container_type(segments, index) - current[idx] = [] if needs_list else {} - elif not isinstance(current[idx], (dict, list)): - # There's a non-container value here - if index + 1 < len(segments): - # We need to traverse further but can't - if strict: - next_seg = segments[index + 1] - if next_seg["type"] == "key": - raise ValueError(f"Cannot traverse into non-dict at index {idx}") - else: - raise ValueError(f"Cannot traverse into non-list at index {idx}") - - -def _determine_next_container_type( - segments: list[dict[str, str | int]], index: int -) -> bool: - """Determine if the next container should be a list (True) or dict (False).""" - if index + 1 < len(segments) - 1: - # Look at the next segment - next_segment = segments[index + 1] - return next_segment["type"] == "index" - else: - # This is the penultimate segment - look at the final segment - final_segment = segments[-1] - return final_segment["type"] == "index" - - -def _set_final_value( - current: Any, final_segment: dict[str, str | int], value: Any, strict: bool -) -> None: - """Set the value at the final segment of the path.""" - if final_segment["type"] == "key": - _set_key_value(current, final_segment, value, strict) - elif final_segment["type"] == "index": - _set_index_value(current, final_segment, value, strict) - - -def _set_key_value( - current: Any, segment: dict[str, str | int], value: Any, strict: bool -) -> None: - """Set a value at a key in a dictionary.""" - key = segment["value"] - - # Type guard: key should be str for dict access - if not isinstance(key, str): - if strict: - raise ValueError(f"Dictionary key must be string, got {type(key)}") - else: - return - - if not isinstance(current, dict): - if strict: - raise ValueError(f"Cannot set key '{key}' on non-dict") - else: - return - - current[key] = value - - -def _set_index_value( - current: Any, segment: dict[str, str | int], value: Any, strict: bool -) -> None: - """Set a value at an index in a list.""" - idx = segment["value"] - - # Type guard: idx should be int for list access - if not isinstance(idx, int): - if strict: - raise ValueError(f"List index must be integer, got {type(idx)}") - else: - return - - if not isinstance(current, list): - if strict: - raise ValueError(f"Cannot set index {idx} on non-list") - else: - return - - # Handle list expansion and negative indexing - expanded_idx = _expand_list_for_index(current, idx, strict) - if expanded_idx is None: # Error case in non-strict mode - return - - current[expanded_idx] = value diff --git a/chidian-py/chidian/partials.py b/chidian-py/chidian/partials.py deleted file mode 100644 index 8846637..0000000 --- a/chidian-py/chidian/partials.py +++ /dev/null @@ -1,411 +0,0 @@ -""" -The `partials` module provides a set of standardized partial classes if you don't want to write a bunch of lambda function. - -This makes it easier to standardize code and saves structure when exported to pure JSON. -""" - -import operator -from functools import partial, reduce -from typing import Any, Callable, Iterable, Sequence, TypeVar - -from .chidian_rs import get as _get - -T = TypeVar("T") - - -class FunctionChain: - """Composable function chain that consolidates operations.""" - - def __init__(self, *operations: Callable): - self.operations = list(operations) - - def __rshift__( - self, other: Callable | "FunctionChain" | "ChainableFn" - ) -> "FunctionChain": - """Chain operations with >> operator.""" - if isinstance(other, FunctionChain): - return FunctionChain(*self.operations, *other.operations) - elif isinstance(other, ChainableFn): - return FunctionChain(*self.operations, other.func) - else: - return FunctionChain(*self.operations, other) - - def __call__(self, value: Any) -> Any: - """Apply all operations in sequence.""" - return reduce(lambda v, f: f(v), self.operations, value) - - def __repr__(self) -> str: - ops = " >> ".join( - f.__name__ if hasattr(f, "__name__") else str(f) for f in self.operations - ) - return f"FunctionChain({ops})" - - def __len__(self) -> int: - """Number of operations in the chain.""" - return len(self.operations) - - -class ChainableFn: - """Wrapper to make any function/partial chainable with >>.""" - - def __init__(self, func: Callable): - self.func = func - # Preserve function metadata - self.__name__ = getattr(func, "__name__", repr(func)) - self.__doc__ = getattr(func, "__doc__", None) - - def __rshift__( - self, other: Callable | FunctionChain | "ChainableFn" - ) -> FunctionChain: - """Start or extend a chain with >> operator.""" - if isinstance(other, FunctionChain): - return FunctionChain(self.func, *other.operations) - elif isinstance(other, ChainableFn): - return FunctionChain(self.func, other.func) - else: - return FunctionChain(self.func, other) - - def __rrshift__(self, other: Callable | FunctionChain) -> FunctionChain: - """Allow chaining when ChainableFn is on the right side.""" - if isinstance(other, FunctionChain): - return FunctionChain(*other.operations, self.func) - else: - return FunctionChain(other, self.func) - - def __call__(self, *args, **kwargs): - """Call the wrapped function.""" - return self.func(*args, **kwargs) - - def __repr__(self) -> str: - return f"ChainableFn({self.__name__})" - - -def get( - key: str, default: Any = None, apply: Any = None, strict: bool = False -) -> Callable[[Any], Any]: - """Create a partial function for get operations.""" - - def get_partial(source): - return _get(source, key, default=default, apply=apply, strict=strict) - - return get_partial - - -# Arithmetic operations using operator module -def add(value: Any, before: bool = False) -> Callable[[Any], Any]: - """Add a value to the input.""" - if before: - return partial(operator.add, value) - else: - return partial(lambda x, v: operator.add(x, v), v=value) - - -def subtract(value: Any, before: bool = False) -> Callable[[Any], Any]: - """Subtract a value from the input.""" - if before: - return partial(operator.sub, value) - else: - return partial(lambda x, v: operator.sub(x, v), v=value) - - -def multiply(value: Any, before: bool = False) -> Callable[[Any], Any]: - """Multiply the input by a value.""" - if before: - return partial(operator.mul, value) - else: - return partial(lambda x, v: operator.mul(x, v), v=value) - - -def divide(value: Any, before: bool = False) -> Callable[[Any], Any]: - """Divide the input by a value.""" - if before: - return partial(operator.truediv, value) - else: - return partial(lambda x, v: operator.truediv(x, v), v=value) - - -# Comparison operations using operator module -def equals(value: Any) -> Callable[[Any], bool]: - """Check if input equals the given value.""" - return partial(operator.eq, value) - - -def not_equal(value: Any) -> Callable[[Any], bool]: - """Check if input does not equal the given value.""" - return partial(operator.ne, value) - - -def equivalent(value: Any) -> Callable[[Any], bool]: - """Check if input is the same object as the given value.""" - return partial(operator.is_, value) - - -def not_equivalent(value: Any) -> Callable[[Any], bool]: - """Check if input is not the same object as the given value.""" - return partial(operator.is_not, value) - - -def contains(value: Any) -> Callable[[Any], bool]: - """Check if input contains the given value.""" - return partial(lambda x, v: operator.contains(x, v), v=value) - - -def not_contains(value: Any) -> Callable[[Any], bool]: - """Check if input does not contain the given value.""" - return partial(lambda x, v: not operator.contains(x, v), v=value) - - -def contained_in(container: Any) -> Callable[[Any], bool]: - """Check if input is contained in the given container.""" - return partial(lambda c, x: operator.contains(c, x), container) - - -def not_contained_in(container: Any) -> Callable[[Any], bool]: - """Check if input is not contained in the given container.""" - return partial(lambda c, x: not operator.contains(c, x), container) - - -def isinstance_of(type_or_types: type) -> Callable[[Any], bool]: - """Check if input is an instance of the given type(s).""" - return partial(lambda x, types: isinstance(x, types), types=type_or_types) - - -# Iterable operations using operator module -def keep(n: int) -> Callable[[Sequence[T]], Sequence[T]]: - """Keep only the first n items from an iterable.""" - return partial(lambda x, n: x[:n], n=n) - - -def index(i: int) -> Callable[[Sequence[T]], Any]: - """Get the item at index i from an iterable.""" - return partial(lambda x, idx: operator.getitem(x, idx), idx=i) - - -# Standard library wrappers -def map_to_list(func: Callable[[T], Any]) -> Callable[[Iterable[T]], list]: - """Apply a function to each item in an iterable and return a list.""" - return partial(lambda f, iterable: list(map(f, iterable)), func) - - -def filter_to_list(predicate: Callable[[T], bool]) -> Callable[[Iterable[T]], list]: - """Filter an iterable using a predicate and return a list.""" - return partial(lambda p, iterable: list(filter(p, iterable)), predicate) - - -# String manipulation functions as ChainableFn -upper = ChainableFn(str.upper) -lower = ChainableFn(str.lower) -strip = ChainableFn(str.strip) -capitalize = ChainableFn(str.capitalize) - - -def split(sep: str | None = None) -> ChainableFn: - """Create a chainable split function.""" - return ChainableFn(partial(str.split, sep=sep)) - - -def replace(old: str, new: str) -> ChainableFn: - """Create a chainable replace function.""" - return ChainableFn( - partial( - lambda s, old_val, new_val: s.replace(old_val, new_val), - old_val=old, - new_val=new, - ) - ) - - -def join(sep: str) -> ChainableFn: - """Create a chainable join function.""" - return ChainableFn(partial(lambda separator, items: separator.join(items), sep)) - - -# Array/List operations as ChainableFn -first = ChainableFn(lambda x: x[0] if x else None) -last = ChainableFn(lambda x: x[-1] if x else None) -length = ChainableFn(len) - - -def at_index(i: int) -> ChainableFn: - """Get element at index.""" - return ChainableFn(partial(lambda x, idx: x[idx] if len(x) > idx else None, idx=i)) - - -def slice_range(start: int | None = None, end: int | None = None) -> ChainableFn: - """Slice a sequence.""" - return ChainableFn(partial(lambda x, s, e: x[s:e], s=start, e=end)) - - -# Type conversions as ChainableFn -to_int = ChainableFn(int) -to_float = ChainableFn(float) -to_str = ChainableFn(str) -to_bool = ChainableFn(bool) - - -# Common data transformations -def round_to(decimals: int) -> ChainableFn: - """Round to specified decimals.""" - return ChainableFn(partial(round, ndigits=decimals)) - - -def lookup(mapping: Any, default: Any = None) -> ChainableFn: - """ - Create a chainable lookup function for objects supporting __getitem__. - - Works with dict, Lexicon, or any object that implements __getitem__. - Uses the get() method if available for safe lookups with default values. - - Args: - mapping: Object supporting __getitem__ (dict, Lexicon, etc.) - default: Default value to return if key is not found - - Returns: - ChainableFn that performs lookups on the mapping - - Examples: - >>> codes = {'A': 'Alpha', 'B': 'Beta'} - >>> get('code') >> lookup(codes, 'Unknown') - - >>> lexicon = Lexicon({'01': 'One', '02': 'Two'}) - >>> get('id') >> lookup(lexicon) - """ - # Create the lookup function once, not per invocation - if hasattr(mapping, "get"): - # Use get method if available (dict, Lexicon, etc.) - def lookup_fn(key): - return mapping.get(key, default) - else: - # Fallback to __getitem__ with try/except - def lookup_fn(key): - try: - return mapping[key] - except (KeyError, IndexError, TypeError): - return default - - return ChainableFn(lookup_fn) - - -def default_to(default_value: Any) -> ChainableFn: - """Replace None with default value.""" - return ChainableFn( - partial(lambda x, default: default if x is None else x, default=default_value) - ) - - -def extract_id() -> ChainableFn: - """Extract ID from FHIR reference (e.g., 'Patient/123' -> '123').""" - return ChainableFn(lambda ref: ref.split("/")[-1] if "/" in str(ref) else ref) - - -def format_string(template: str) -> ChainableFn: - """Format value into a string template.""" - return ChainableFn(partial(lambda x, tmpl: tmpl.format(x), tmpl=template)) - - -# New partials replacing former SEED classes -def case( - cases: dict[Any, Any] | list[tuple[Any, Any]], default: Any = None -) -> ChainableFn: - """Switch-like pattern matching for values with ordered evaluation. - - Args: - cases: Dictionary or list of (condition, value) tuples - default: Default value if no cases match - - Returns: - ChainableFn that applies case matching to input value - """ - - def case_matcher(value): - # Support both dict and list for ordered evaluation - case_items = list(cases.items()) if isinstance(cases, dict) else cases - - for case_key, case_value in case_items: - # Exact match - if not callable(case_key) and value == case_key: - return case_value - - # Function match - if callable(case_key): - try: - if case_key(value): - return case_value - except (TypeError, AttributeError): - continue - - return default - - return ChainableFn(case_matcher) - - -def coalesce(*paths: str, default: Any = None) -> Callable[[Any], Any]: - """Grab first non-empty value from multiple paths. - - Args: - *paths: Paths to check in order - default: Default value if all paths are empty/None - - Returns: - Function that takes data and returns first non-empty value - """ - - def coalesce_func(data): - for path in paths: - value = _get(data, path) - if value is not None and value != "": - return value - return default - - return coalesce_func - - -def template(template_str: str, skip_none: bool = False) -> Callable[..., str]: - """Combine multiple values using a template string. - - Args: - template_str: Template string with {} placeholders - skip_none: If True, skip None values and adjust template - - Returns: - Function that takes values and formats them into template - """ - - def template_formatter(*values): - if skip_none: - # Filter out None values - filtered_values = [v for v in values if v is not None] - # Create template with correct number of placeholders - if filtered_values: - adjusted_template = " ".join("{}" for _ in filtered_values) - return adjusted_template.format(*filtered_values) - else: - return "" - else: - return template_str.format(*values) - - return template_formatter - - -def flatten(paths: list[str], delimiter: str = ", ") -> Callable[[Any], str]: - """Flatten values from multiple paths into a single delimited string. - - Args: - paths: List of paths to extract values from - delimiter: String to join values with - - Returns: - Function that takes data and returns flattened string - """ - - def flatten_func(data): - all_values = [] - for path in paths: - values = _get(data, path) - if isinstance(values, list): - all_values.extend(str(v) for v in values if v is not None) - elif values is not None: - all_values.append(str(values)) - return delimiter.join(all_values) - - return flatten_func diff --git a/chidian-py/chidian/piper.py b/chidian-py/chidian/piper.py deleted file mode 100644 index 797434b..0000000 --- a/chidian-py/chidian/piper.py +++ /dev/null @@ -1,68 +0,0 @@ -from typing import Generic, Optional, Tuple, TypeVar - -from .data_mapping import DataMapping -from .recordset import RecordSet - -""" -A `Piper` class that executes DataMapping transformations. - -The Piper class is a runtime that executes DataMapping instances. -DataMapping defines WHAT to map, Piper defines HOW to execute it. - -As a Piper processes data, it will consume SEEDs and apply them to the data accordingly. -Uses a two-pass approach: first mapping, then cleanup of SEED directives. -""" - -# Type variables for generic typing -InputT = TypeVar("InputT") -OutputT = TypeVar("OutputT") - - -class Piper(Generic[InputT, OutputT]): - def __init__(self, data_mapping: "DataMapping"): - """ - Initialize a Piper for executing DataMapping transformations. - - Args: - data_mapping: A DataMapping instance that defines the transformation - """ - self.data_mapping = data_mapping - - # Set up type and mode information - self.source_type = data_mapping.source_model - self.target_type = data_mapping.target_model - # Compatibility aliases - self.input_type = self.source_type - self.output_type = self.target_type - self.strict = data_mapping.strict - - self._mode = "lens" if data_mapping.bidirectional else "view" - - def forward(self, data: InputT) -> OutputT | Tuple[OutputT, RecordSet]: - """Apply forward transformation (alias for run).""" - # Type validation in strict mode - if self.strict and not isinstance(data, self.source_type): - raise TypeError( - f"Expected {self.source_type.__name__}, got {type(data).__name__}" - ) - - return self.data_mapping.forward(data) - - def reverse( - self, output_data: OutputT, spillover: Optional[RecordSet] = None - ) -> InputT: - """Apply reverse transformation (only available for bidirectional DataMapping).""" - if not self.data_mapping.bidirectional: - raise ValueError( - "Reverse transformation only available for bidirectional mappings" - ) - - return self.data_mapping.reverse(output_data, spillover or RecordSet()) - - def can_reverse(self) -> bool: - """Check if this piper supports reverse transformation.""" - return self.data_mapping.can_reverse() - - def __call__(self, data: InputT) -> OutputT | Tuple[OutputT, RecordSet]: - """Make Piper callable.""" - return self.forward(data) diff --git a/chidian-py/chidian/recordset.py b/chidian-py/chidian/recordset.py deleted file mode 100644 index eb75085..0000000 --- a/chidian-py/chidian/recordset.py +++ /dev/null @@ -1,337 +0,0 @@ -import json -from typing import Any, Callable, Iterator, Optional, Union - -from .chidian_rs import get - -""" -A `RecordSet` is aconvenient wrapper around dict[str, dict] for managing collections of dictionary data. - -Think of it as a group of dictionaries where you can `get` (with inter-dictionary references) and `select` from them as a group! - -Provides a middle ground between the strictness of DataFrames and raw list[dict]/dict[str, dict], -allowing users to work with collections semantically without worrying about keys and structure. - -Supports path-based queries, filtering, mapping, and other functional operations. -""" - - -class RecordSet(dict): - def __init__( - self, - items: Union[list[dict[str, Any]], dict[str, dict[str, Any]], None] = None, - **kwargs, - ): - """ - Initialize a RecordSet from a list or dict of dictionaries. - - Args: - items: Either a list of dicts (auto-keyed by index) or a dict of dicts - **kwargs: Additional dict initialization parameters - """ - # TODO: does the `self._items` field need to exist -- i.e. could this just be referenced later as `self.values()`? - # So don't need the middle abstraction then - super().__init__(**kwargs) - self._items: list[dict[str, Any]] = [] - - # Initialize items based on input type - if items is not None: - if isinstance(items, list): - self._items = items - # Store items by index using $-syntax - for i, item in enumerate(items): - self[f"${i}"] = item - elif isinstance(items, dict): - self._items = list(items.values()) - # Store items by their original keys - for key, item in items.items(): - self[key] = item - - def get_all( - self, - path: str, - default: Any = None, - apply: Optional[Callable] = None, - strict: bool = False, - ) -> list: - """ - Apply get to extract a path from all items in the collection. - - Examples: - collection.get_all("patient.id") # Get patient.id from all items - collection.get_all("name") # Get name from all items - collection.get_all("status", default="unknown") # With default - - Args: - path: Path to extract from each item - default: Default value for items missing this path - apply: Optional transform function to apply to each result - strict: If True, raise errors instead of returning default - - Returns: - List of extracted values (one per item) - """ - results = [] - for item in self._items: - value = get(item, path, default=default, apply=apply, strict=strict) - results.append(value) - return results - - def select( - self, - fields: str = "*", - where: Optional[Callable[[dict], bool]] = None, - flat: bool = False, - sparse: str = "preserve", - ): - """ - # TODO: Use the PEG grammar instead of custom parsing (refer to old `pydian` impl which is in separate repo) - - Select fields from the collection with optional filtering and sparse data handling. - - Examples: - collection.select("name, age") # Select specific fields, None for missing - collection.select("patient.name, patient.id") # Nested paths - collection.select("patient.*") # All from nested object - collection.select("*", where=lambda x: x.get("active") is not None) - collection.select("name", flat=True) # Return flat list - collection.select("patient.id", sparse="filter") # Filter out items with None values - - Args: - fields: Field specification ("*", "field1, field2", "nested.*") - where: Optional filter predicate - flat: If True and single field, return list instead of RecordSet - sparse: How to handle missing values ("preserve", "filter") - - "preserve": Keep items with None values (default for structure preservation) - - "filter": Remove items/fields with None values (default for aggregations) - - Returns: - RecordSet with query results, or list if flat=True - """ - # TODO: This function is way too big (though understandably so)... split this up into parsing step + execution functions - # Parse field specification - field_list: tuple[str, str] | list[str] | None - if fields == "*": - field_list = None # Keep all fields - elif ".*" in fields: - # Handle nested wildcard (e.g., "patient.*") - nested_path = fields.replace(".*", "") - field_list = ("wildcard", nested_path) - else: - # Parse comma-separated fields - field_list = [f.strip() for f in fields.split(",")] - - # Process each item - result_items = [] - result_keys = [] - - for i, item in enumerate(self._items): - # Apply filter if provided - if where is not None and not where(item): - continue - - # Extract fields based on specification - if field_list is None: - # Keep all fields (*) - result_item = item.copy() - elif isinstance(field_list, tuple) and field_list[0] == "wildcard": - # Handle "nested.*" syntax - nested_path = field_list[1] - nested_obj = get(item, nested_path, default=None) - - if sparse == "preserve": - # Always include, even if None or empty - if isinstance(nested_obj, dict): - result_item = nested_obj.copy() - else: - result_item = ( - {} if nested_obj is None else {"value": nested_obj} - ) - elif sparse == "filter": - # Only include if non-empty dict - if isinstance(nested_obj, dict) and nested_obj: - result_item = nested_obj.copy() - else: - continue - elif len(field_list) == 1 and flat: - # Single field with flat=True - collect for flat list - value = get(item, field_list[0], default=None) - if sparse == "preserve": - # Include even if None - result_items.append(value) - elif sparse == "filter" and value is not None: - # Only include non-None values - result_items.append(value) - continue - else: - # Multiple specific fields - result_item = {} - for field in field_list: - value = get(item, field, default=None) - key_name = field.split(".")[-1] if "." in field else field - - if sparse == "preserve": - # Always include the field, even if None - result_item[key_name] = value - elif sparse == "filter" and value is not None: - # Only include non-None values - result_item[key_name] = value - - # For "filter" mode, skip items with no valid fields - if sparse == "filter" and not result_item: - continue - - result_items.append(result_item) - - # Preserve the original key for this item - original_key = None - for key, val in self.items(): - if val is item: - original_key = key - break - result_keys.append(original_key) - - # Return based on flat parameter - if flat and isinstance(field_list, list) and len(field_list) == 1: - return result_items - - # Create new RecordSet preserving structure - result = RecordSet() - result._items = result_items - - # Preserve original keys - for i, (item, key) in enumerate(zip(result_items, result_keys)): - if key is not None: - if key.startswith("$") and key[1:].isdigit(): - # Reindex numeric keys based on new position - result[f"${i}"] = item - else: - # Preserve custom keys - result[key] = item - else: - # Fallback to numeric key - result[f"${i}"] = item - - return result - - def to_json(self, as_list: bool = False, indent: Optional[int] = None) -> str: - """ - Export collection as JSON string. - - Args: - as_list: Return as array (True) or dict (False) - indent: Pretty-print indentation - """ - if as_list: - return json.dumps(self._items, indent=indent, default=str) - else: - # Return as dict with current keys - return json.dumps(dict(self), indent=indent, default=str) - - def append(self, item: dict[str, Any], key: Optional[str] = None) -> None: - """ - Append an item to the collection (list-like behavior). - - Args: - item: Dictionary to add - key: Optional key for named access (defaults to $n where n is index) - """ - self._items.append(item) - - if key is None: - # Use $-prefixed index as key - key = f"${len(self._items) - 1}" - else: - # Ensure custom keys start with $ - if not key.startswith("$"): - key = f"${key}" - - self[key] = item - - def filter(self, predicate: Callable[[dict], bool]) -> "RecordSet": - """ - Filter items based on a predicate function. - - Args: - predicate: Function returning True for items to keep - - Returns: - New filtered RecordSet - """ - filtered_items = [item for item in self._items if predicate(item)] - - # Create new collection with filtered items - result = RecordSet() - result._items = filtered_items - - # First pass: add all items with numeric keys - for i, item in enumerate(filtered_items): - result[f"${i}"] = item - - # Second pass: preserve custom keys - for key, value in self.items(): - if value in filtered_items and not ( - key.startswith("$") and key[1:].isdigit() - ): - # This is a custom key, preserve it - result[key] = value - - return result - - def map(self, transform: Callable[[dict], dict]) -> "RecordSet": - """ - Transform each item in the collection. - - Args: - transform: Function to apply to each item - - Returns: - New RecordSet with transformed items - """ - transformed = [transform(item) for item in self._items] - - # Create new collection - result = RecordSet() - result._items = transformed - - # Map old items to their indices for lookup - item_to_index = {id(item): i for i, item in enumerate(self._items)} - - # First pass: add all items with numeric keys - for i, item in enumerate(transformed): - result[f"${i}"] = item - - # Second pass: preserve custom keys - for key, value in self.items(): - if id(value) in item_to_index and not ( - key.startswith("$") and key[1:].isdigit() - ): - # This is a custom key, preserve it with the transformed item - result[key] = transformed[item_to_index[id(value)]] - - return result - - def __iter__(self) -> Iterator[dict[str, Any]]: - """Iterate over items in the collection.""" - return iter(self._items) - - def __len__(self) -> int: - """Return number of items in collection.""" - return len(self._items) - - def _extract_paths(self, obj: Any, prefix: str = "") -> set[str]: - """Extract all paths from a nested dict.""" - paths = set() - - if isinstance(obj, dict): - for key, value in obj.items(): - new_prefix = f"{prefix}.{key}" if prefix else key - paths.add(new_prefix) - - if isinstance(value, (dict, list)): - paths.update(self._extract_paths(value, new_prefix)) - elif isinstance(obj, list) and obj: - # Just handle first item for schema - if isinstance(obj[0], dict): - paths.update(self._extract_paths(obj[0], prefix)) - - return paths diff --git a/chidian-py/chidian/seeds.py b/chidian-py/chidian/seeds.py deleted file mode 100644 index 5d5c35c..0000000 --- a/chidian-py/chidian/seeds.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -SEED classes provide data transformation directives for use with Piper. - -Contains DROP (enum for indicating data removal) and KEEP (class for preserving values). -All SEED objects implement a process() method for consistent interface. -""" - -from enum import Enum -from typing import Any - - -class DROP(Enum): - """ - A DROP placeholder object indicates the object relative to the current value should be dropped. - An "object" in this context is a dict or a list. - - This enum implements the SEED protocol without inheritance to avoid metaclass conflicts. - - Examples: - ``` - { <-- Grandparent (rel to _value) - 'A': { <-- Parent (rel to _value) - 'B': { <-- This Object (rel to _value) - 'C': _value - } - } - } - ``` - - ``` - { <-- Grandparent (rel to _value1 and _value2) - 'A': [ <-- Parent (rel to _value1 and _value2) - { <-- This Object (rel to _value1) - 'B': _value1 - }, - { <-- This Object (rel to _value2) - 'B': _value2 - } - ] - } - ``` - """ - - THIS_OBJECT = -1 - PARENT = -2 - GRANDPARENT = -3 - GREATGRANDPARENT = -4 - - def process(self, _data: Any, _context: dict[str, Any] | None = None) -> Any: - """DROP seeds are processed by Piper, not directly.""" - return self - - @property - def level(self) -> int: - """Get the drop level value for compatibility.""" - return self.value - - -class KEEP: - """ - A value wrapped in a KEEP object should be ignored by the Mapper class when removing values. - - Partial keeping is _not_ supported (i.e. a KEEP object within an object to be DROP-ed). - """ - - def __init__(self, value: Any): - self.value = value - - def process(self, _data: Any, _context: dict[str, Any] | None = None) -> Any: - """KEEP seeds preserve their value during processing.""" - return self.value diff --git a/chidian-py/pyproject.toml b/chidian-py/pyproject.toml deleted file mode 100644 index c2cdb7f..0000000 --- a/chidian-py/pyproject.toml +++ /dev/null @@ -1,26 +0,0 @@ -[build-system] -requires = ["maturin>=1.8,<2.0"] -build-backend = "maturin" - -[project] -name = "chidian" -requires-python = ">=3.8" -classifiers = [ - "Programming Language :: Rust", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", -] -dynamic = ["version"] -dependencies = [ - "pydantic>=2.10.6,<3.0.0", # Only Pydantic v2 is supported -] -[tool.maturin] -features = ["pyo3/extension-module"] -python-source = "." - -[dependency-groups] -dev = [ - "pytest>=8.3.5", - "hypothesis>=6.0.0", - "ruff>=0.11.12", -] diff --git a/chidian-py/src/lib.rs b/chidian-py/src/lib.rs deleted file mode 100644 index 54c384d..0000000 --- a/chidian-py/src/lib.rs +++ /dev/null @@ -1,62 +0,0 @@ -use pyo3::prelude::*; - -mod py_traversal; - -use chidian_core::parser::parse_path; -use py_traversal::{apply_functions, traverse_path, traverse_path_strict}; - -#[pyfunction] -#[pyo3(signature = (source, key, default=None, apply=None, strict=false))] -fn get( - py: Python<'_>, - source: &Bound<'_, PyAny>, - key: &str, - default: Option<&Bound<'_, PyAny>>, - apply: Option<&Bound<'_, PyAny>>, - strict: Option, -) -> PyResult { - let strict = strict.unwrap_or(false); - - // Parse the path using chidian-core - let path = match parse_path(key) { - Ok((remaining, path)) if remaining.is_empty() => path, - _ => { - return Err(PyErr::new::(format!( - "Invalid path syntax: {}", - key - ))); - } - }; - - // Traverse the path (always use flatten=false since it's removed) - let result = if strict { - match traverse_path_strict(py, source, &path, false) { - Ok(val) => val, - Err(e) => return Err(e), - } - } else { - traverse_path(py, source, &path, false)? - }; - - // Handle default value first - let mut final_result = result; - if final_result.bind(py).is_none() { - if let Some(default_val) = default { - final_result = default_val.to_object(py); - } - } - - // Apply functions if provided (to the final result, including defaults) - if let Some(functions) = apply { - final_result = apply_functions(py, final_result, functions)?; - } - - Ok(final_result) -} - -/// A Python module implemented in Rust. -#[pymodule] -fn chidian_rs(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_function(wrap_pyfunction!(get, m)?)?; - Ok(()) -} diff --git a/chidian-py/src/py_traversal.rs b/chidian-py/src/py_traversal.rs deleted file mode 100644 index 7733716..0000000 --- a/chidian-py/src/py_traversal.rs +++ /dev/null @@ -1,329 +0,0 @@ -use chidian_core::parser::{Path, PathSegment}; -use pyo3::prelude::*; -use pyo3::types::{PyDict, PyList, PyTuple}; - -// Apply a chain of functions to a value -pub fn apply_functions( - py: Python<'_>, - value: PyObject, - functions: &Bound<'_, PyAny>, -) -> PyResult { - let mut current = value; - - // Check if it's a single function or a list - if functions.downcast::().is_ok() { - let func_list = functions.downcast::()?; - for func in func_list.iter() { - match func.call1((current,)) { - Ok(result) => current = result.to_object(py), - Err(_) => return Ok(py.None()), - } - } - } else { - // Single function - match functions.call1((current,)) { - Ok(result) => current = result.to_object(py), - Err(_) => return Ok(py.None()), - } - } - - Ok(current) -} - -// Traverse the data structure according to the path (strict version) -pub fn traverse_path_strict( - py: Python<'_>, - data: &Bound<'_, PyAny>, - path: &Path, - flatten: bool, -) -> PyResult { - let mut current = vec![data.to_object(py)]; - - for segment in &path.segments { - let mut next = Vec::new(); - - for item in current { - let item_ref = item.bind(py); - - match segment { - PathSegment::Key(key) => { - if let Ok(dict) = item_ref.downcast::() { - if let Some(value) = dict.get_item(key)? { - next.push(value.to_object(py)); - } else { - return Err(PyErr::new::(format!( - "Key '{}' not found", - key - ))); - } - } else if let Ok(list) = item_ref.downcast::() { - // If we have a list and trying to access a key, apply to each element - for list_item in list { - if let Ok(dict) = list_item.downcast::() { - if let Some(value) = dict.get_item(key)? { - next.push(value.to_object(py)); - } else { - return Err(PyErr::new::( - format!("Key '{}' not found in list element", key), - )); - } - } else { - return Err(PyErr::new::( - "Expected dict in list but got different type", - )); - } - } - } else { - return Err(PyErr::new::( - "Expected dict but got different type", - )); - } - } - PathSegment::Index(idx) => { - if let Ok(list) = item_ref.downcast::() { - let len = list.len() as i32; - let actual_idx = if *idx < 0 { len + idx } else { *idx }; - - if actual_idx >= 0 && actual_idx < len { - next.push(list.get_item(actual_idx as usize)?.to_object(py)); - } else { - return Err(PyErr::new::(format!( - "Index {} out of range", - idx - ))); - } - } else { - return Err(PyErr::new::( - "Expected list but got different type", - )); - } - } - PathSegment::Slice(start, end) => { - if let Ok(list) = item_ref.downcast::() { - let len = list.len() as i32; - - // Handle negative indices Python-style - let start_idx = match start { - Some(s) if *s < 0 => (len + s).max(0) as usize, - Some(s) => (*s).min(len).max(0) as usize, - None => 0, - }; - - let end_idx = match end { - Some(e) if *e < 0 => (len + e).max(0) as usize, - Some(e) => (*e).min(len).max(0) as usize, - None => len as usize, - }; - - let slice_items: Vec = if start_idx <= end_idx { - (start_idx..end_idx) - .filter_map(|i| list.get_item(i).ok()) - .map(|item| item.to_object(py)) - .collect() - } else { - Vec::new() - }; - - next.push(PyList::new(py, slice_items)?.to_object(py)); - } else { - return Err(PyErr::new::( - "Expected list but got different type", - )); - } - } - PathSegment::Wildcard => { - if let Ok(list) = item_ref.downcast::() { - for list_item in list { - next.push(list_item.to_object(py)); - } - } else { - return Err(PyErr::new::( - "Expected list but got different type", - )); - } - } - PathSegment::Tuple(paths) => { - let mut tuple_items = Vec::new(); - - for tuple_path in paths { - let result = traverse_path_strict(py, item_ref, tuple_path, false)?; - tuple_items.push(result); - } - - next.push(PyTuple::new(py, tuple_items)?.to_object(py)); - } - } - } - - current = next; - } - - // Handle flattening if needed - if flatten { - let mut flattened = Vec::new(); - for item in ¤t { - let item_ref = item.bind(py); - if let Ok(list) = item_ref.downcast::() { - for subitem in list { - if let Ok(sublist) = subitem.downcast::() { - for subsubitem in sublist { - flattened.push(subsubitem.to_object(py)); - } - } else { - flattened.push(subitem.to_object(py)); - } - } - } else { - flattened.push(item.clone_ref(py)); - } - } - return Ok(PyList::new(py, flattened)?.to_object(py)); - } - - // Return the result - if current.len() == 1 { - Ok(current[0].clone_ref(py)) - } else { - Ok(PyList::new(py, current)?.to_object(py)) - } -} - -// Traverse the data structure according to the path -pub fn traverse_path( - py: Python<'_>, - data: &Bound<'_, PyAny>, - path: &Path, - flatten: bool, -) -> PyResult { - let mut current = vec![data.to_object(py)]; - - for segment in &path.segments { - let mut next = Vec::new(); - - for item in current { - let item_ref = item.bind(py); - - match segment { - PathSegment::Key(key) => { - if let Ok(dict) = item_ref.downcast::() { - if let Some(value) = dict.get_item(key)? { - next.push(value.to_object(py)); - } else { - next.push(py.None()); - } - } else if let Ok(list) = item_ref.downcast::() { - // If we have a list and trying to access a key, apply to each element - for list_item in list { - if let Ok(dict) = list_item.downcast::() { - if let Some(value) = dict.get_item(key)? { - next.push(value.to_object(py)); - } else { - next.push(py.None()); - } - } else { - next.push(py.None()); - } - } - } else { - next.push(py.None()); - } - } - PathSegment::Index(idx) => { - if let Ok(list) = item_ref.downcast::() { - let len = list.len() as i32; - let actual_idx = if *idx < 0 { len + idx } else { *idx }; - - if actual_idx >= 0 && actual_idx < len { - next.push(list.get_item(actual_idx as usize)?.to_object(py)); - } else { - next.push(py.None()); - } - } else { - next.push(py.None()); - } - } - PathSegment::Slice(start, end) => { - if let Ok(list) = item_ref.downcast::() { - let len = list.len() as i32; - - // Handle negative indices Python-style - let start_idx = match start { - Some(s) if *s < 0 => (len + s).max(0) as usize, - Some(s) => (*s).min(len).max(0) as usize, - None => 0, - }; - - let end_idx = match end { - Some(e) if *e < 0 => (len + e).max(0) as usize, - Some(e) => (*e).min(len).max(0) as usize, - None => len as usize, - }; - - let slice_items: Vec = if start_idx <= end_idx { - (start_idx..end_idx) - .filter_map(|i| list.get_item(i).ok()) - .map(|item| item.to_object(py)) - .collect() - } else { - Vec::new() - }; - - next.push(PyList::new(py, slice_items)?.to_object(py)); - } else { - next.push(py.None()); - } - } - PathSegment::Wildcard => { - if let Ok(list) = item_ref.downcast::() { - for list_item in list { - next.push(list_item.to_object(py)); - } - } else { - next.push(py.None()); - } - } - PathSegment::Tuple(paths) => { - let mut tuple_items = Vec::new(); - - for tuple_path in paths { - let result = traverse_path(py, item_ref, tuple_path, false)?; - tuple_items.push(result); - } - - next.push(PyTuple::new(py, tuple_items)?.to_object(py)); - } - } - } - - current = next; - } - - // Handle flattening if needed - if flatten { - let mut flattened = Vec::new(); - for item in ¤t { - let item_ref = item.bind(py); - if let Ok(list) = item_ref.downcast::() { - for subitem in list { - if let Ok(sublist) = subitem.downcast::() { - for subsubitem in sublist { - flattened.push(subsubitem.to_object(py)); - } - } else { - flattened.push(subitem.to_object(py)); - } - } - } else { - flattened.push(item.clone_ref(py)); - } - } - return Ok(PyList::new(py, flattened)?.to_object(py)); - } - - // Return the result - if current.len() == 1 { - Ok(current[0].clone_ref(py)) - } else { - Ok(PyList::new(py, current)?.to_object(py)) - } -} diff --git a/chidian-py/tests/test_data_mapping.py b/chidian-py/tests/test_data_mapping.py deleted file mode 100644 index 1c9d70e..0000000 --- a/chidian-py/tests/test_data_mapping.py +++ /dev/null @@ -1,268 +0,0 @@ -"""Test the unified DataMapping class.""" - -from typing import Any, Optional - -import chidian.partials as p -import pytest -from chidian import DataMapping, Piper -from pydantic import BaseModel - - -# Test models -class Patient(BaseModel): - id: str - name: str - active: bool - internal_notes: Optional[str] = None - age: Optional[int] = None - - -class Observation(BaseModel): - subject_ref: str - performer: str - status: Optional[str] = None - - -class TestDataMappingUnidirectional: - """Test DataMapping in unidirectional mode (without spillover).""" - - def test_simple_mapping(self) -> None: - """Test basic field mapping.""" - mapping = DataMapping( - source_model=Patient, - target_model=Observation, - mapping={"subject_ref": "id", "performer": "name"}, - bidirectional=False, - ) - - patient = Patient(id="123", name="John", active=True) - obs: Any = mapping.forward(patient) - - assert isinstance(obs, Observation) - assert obs.subject_ref == "123" - assert obs.performer == "John" - - def test_complex_mapping(self) -> None: - """Test mapping with transformations.""" - mapping = DataMapping( - source_model=Patient, - target_model=Observation, - mapping={ # type: ignore - "subject_ref": p.get("id") >> p.format_string("Patient/{}"), - "performer": p.get("name") >> p.upper, - "status": p.get("active") - >> p.case({True: "active", False: "inactive"}, default="unknown"), - }, - bidirectional=False, - ) - - patient = Patient(id="123", name="john", active=True) - obs: Any = mapping.forward(patient) - - assert obs.subject_ref == "Patient/123" - assert obs.performer == "JOHN" - assert obs.status == "active" - - def test_reverse_not_available(self) -> None: - """Test that reverse is not available in unidirectional mode.""" - mapping = DataMapping( - source_model=Patient, - target_model=Observation, - mapping={ - "subject_ref": "id", - "performer": "name", # Add required field - }, - bidirectional=False, - ) - - obs = Observation(subject_ref="123", performer="John") - - with pytest.raises( - RuntimeError, match="reverse.*only available in bidirectional mode" - ): - mapping.reverse(obs) - - assert not mapping.is_reversible() - assert not mapping.can_reverse() - - -class TestDataMappingBidirectional: - """Test DataMapping in bidirectional mode (Lens).""" - - def test_simple_bidirectional(self) -> None: - """Test basic bidirectional mapping.""" - mapping = DataMapping( - source_model=Patient, - target_model=Observation, - mapping={"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - - # Forward - patient = Patient(id="123", name="John", active=True) - obs: Any - obs, spillover = mapping.forward(patient) - - assert isinstance(obs, Observation) - assert obs.subject_ref == "123" - assert obs.performer == "John" - - # Check spillover - assert len(spillover) == 1 - assert spillover._items[0]["active"] is True - - # Reverse - recovered = mapping.reverse(obs, spillover) - assert isinstance(recovered, Patient) - assert recovered.id == "123" - assert recovered.name == "John" - assert recovered.active is True - - def test_invalid_bidirectional_mapping(self) -> None: - """Test that non-string mappings are rejected in bidirectional mode.""" - with pytest.raises( - TypeError, match="Bidirectional mappings must be string-to-string" - ): - DataMapping( - source_model=Patient, - target_model=Observation, - mapping={ # type: ignore - "id": lambda x: x[ - "subject_ref" - ] # Function not allowed # type: ignore - }, - bidirectional=True, - ) - - def test_non_reversible_mapping(self) -> None: - """Test detection of non-reversible mappings.""" - # Many-to-one mapping - with pytest.raises(ValueError, match="not reversible.*duplicate target paths"): - DataMapping( - source_model=Patient, - target_model=Observation, - mapping={ - "id": "subject_ref", - "name": "subject_ref", # Duplicate target - }, - bidirectional=True, - strict=True, - ) - - def test_roundtrip(self) -> None: - """Test lossless roundtrip transformation.""" - mapping = DataMapping( - source_model=Patient, - target_model=Observation, - mapping={"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - - original = Patient( - id="456", name="Jane", active=False, internal_notes="Important", age=30 - ) - - # Forward and reverse - target: Any - target, spillover = mapping.forward(original) - recovered = mapping.reverse(target, spillover) - - # Should be identical - assert recovered.model_dump() == original.model_dump() - - -class TestDataMappingWithPiper: - """Test DataMapping integration with Piper.""" - - def test_piper_unidirectional(self) -> None: - """Test Piper with unidirectional DataMapping.""" - - mapping = DataMapping( - source_model=Patient, - target_model=Observation, - mapping={"subject_ref": "id", "performer": "name"}, - bidirectional=False, - ) - - piper: Piper = Piper(mapping) - - patient = Patient(id="123", name="John", active=True) - obs = piper(patient) - - assert isinstance(obs, Observation) - assert obs.subject_ref == "123" - assert obs.performer == "John" - - # Should not be reversible - assert not piper.can_reverse() - - def test_piper_bidirectional(self) -> None: - """Test Piper with bidirectional DataMapping.""" - - mapping = DataMapping( - source_model=Patient, - target_model=Observation, - mapping={"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - - piper: Piper = Piper(mapping) - - # Forward - patient = Patient(id="123", name="John", active=True) - obs, spillover = piper(patient) - - assert isinstance(obs, Observation) - assert obs.subject_ref == "123" - - # Should be reversible - assert piper.can_reverse() - - # Reverse - recovered = piper.reverse(obs, spillover) - assert recovered.id == "123" - assert recovered.name == "John" - - -class TestDataMappingValidation: - """Test validation features.""" - - def test_strict_mode_validation(self) -> None: - """Test strict mode enforces required fields.""" - - class Source(BaseModel): - id: str - - class Target(BaseModel): - id: str - required_field: str # Required but not mapped - - # Should raise in strict mode - with pytest.raises(ValueError, match="Missing required target fields"): - DataMapping( - source_model=Source, - target_model=Target, - mapping={"id": "id"}, - bidirectional=False, - strict=True, - ) - - # Should work in non-strict mode - mapping = DataMapping( - source_model=Source, - target_model=Target, - mapping={"id": "id"}, - bidirectional=False, - strict=False, - ) - assert mapping.strict is False - - def test_type_validation(self) -> None: - """Test that non-Pydantic models are rejected.""" - with pytest.raises(TypeError, match="must be a Pydantic v2 BaseModel"): - DataMapping( - source_model=dict, # Not a Pydantic model - target_model=BaseModel, - mapping={}, - bidirectional=False, - ) diff --git a/chidian-py/tests/test_data_mapping_bidirectional.py b/chidian-py/tests/test_data_mapping_bidirectional.py deleted file mode 100644 index ea226ee..0000000 --- a/chidian-py/tests/test_data_mapping_bidirectional.py +++ /dev/null @@ -1,394 +0,0 @@ -"""Tests for DataMapping in bidirectional mode (formerly Lens).""" - -from typing import Optional - -import pytest -from chidian import DataMapping -from chidian.recordset import RecordSet -from pydantic import BaseModel - - -class Patient(BaseModel): - """Sample Patient model for testing.""" - - id: str - name: str - active: bool - internal_notes: Optional[str] = None - age: Optional[int] = None - - -class Observation(BaseModel): - """Sample Observation model for testing.""" - - subject_ref: str - performer: str - status: Optional[str] = None - - -class NestedSource(BaseModel): - """Model with nested structure.""" - - patient: dict - metadata: dict - extra_field: Optional[str] = None - - -class NestedTarget(BaseModel): - """Target with different nesting.""" - - subject_id: str - subject_name: str - created_by: Optional[str] = None - - -class TestDataMappingBidirectionalBasic: - """Test basic Lens functionality.""" - - def test_mapping_creation(self): - """Test mapping can be created with valid models and mappings.""" - mapping = DataMapping( - Patient, - Observation, - mapping={"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - - assert mapping.source_model == Patient - assert mapping.target_model == Observation - assert mapping.mapping == {"id": "subject_ref", "name": "performer"} - assert mapping.can_reverse() is True - - def test_mapping_invalid_models(self): - """Test mapping rejects non-Pydantic models.""" - with pytest.raises( - TypeError, match="source_model must be a Pydantic v2 BaseModel" - ): - DataMapping( - dict, Observation, mapping={"id": "subject_ref"}, bidirectional=True - ) - - with pytest.raises( - TypeError, match="target_model must be a Pydantic v2 BaseModel" - ): - DataMapping( - Patient, dict, mapping={"id": "subject_ref"}, bidirectional=True - ) - - def test_mapping_invalid_mappings(self): - """Test mapping rejects non-string mappings.""" - with pytest.raises( - TypeError, match="Bidirectional mappings must be string-to-string paths" - ): - DataMapping( - Patient, Observation, mapping={123: "subject_ref"}, bidirectional=True - ) - - with pytest.raises( - TypeError, match="Bidirectional mappings must be string-to-string paths" - ): - DataMapping(Patient, Observation, mapping={"id": 456}, bidirectional=True) - - -class TestDataMappingBidirectionalForward: - """Test forward transformations.""" - - def test_simple_forward(self): - """Test basic forward transformation.""" - mapping = DataMapping( - Patient, - Observation, - mapping={"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - - patient = Patient(id="123", name="John", active=True) - obs, spillover = mapping.forward(patient) - - assert isinstance(obs, Observation) - assert obs.subject_ref == "123" - assert obs.performer == "John" - assert obs.status is None - - # Check spillover contains unmapped fields - assert len(spillover) == 1 - spillover_data = spillover._items[0] - assert spillover_data["active"] is True - - def test_forward_with_spillover(self): - """Test forward transformation with multiple spillover fields.""" - mapping = DataMapping( - Patient, - Observation, - mapping={"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - - patient = Patient( - id="123", name="John", active=True, internal_notes="sensitive", age=45 - ) - obs, spillover = mapping.forward(patient) - - assert obs.subject_ref == "123" - assert obs.performer == "John" - - # All unmapped fields should be in spillover - spillover_data = spillover._items[0] - assert spillover_data["active"] is True - assert spillover_data["internal_notes"] == "sensitive" - assert spillover_data["age"] == 45 - - def test_forward_nested_mappings(self): - """Test forward transformation with nested path mappings.""" - mapping = DataMapping( - NestedSource, - NestedTarget, - mapping={ - "patient.id": "subject_id", - "patient.name": "subject_name", - "metadata.created_by": "created_by", - }, - bidirectional=True, - ) - - source = NestedSource( - patient={"id": "123", "name": "John", "age": 45}, - metadata={"created_by": "system", "version": "1.0"}, - extra_field="extra", - ) - - target, spillover = mapping.forward(source) - - assert target.subject_id == "123" - assert target.subject_name == "John" - assert target.created_by == "system" - - # Check spillover - spillover_data = spillover._items[0] - # The unmapped patient.age should be in spillover - assert "patient" in spillover_data - assert spillover_data["patient"]["age"] == 45 - # The unmapped metadata.version should be in spillover - assert "metadata" in spillover_data - assert spillover_data["metadata"]["version"] == "1.0" - # The completely unmapped field should be in spillover - assert spillover_data["extra_field"] == "extra" - - def test_forward_missing_fields(self): - """Test forward transformation with missing source fields.""" - mapping = DataMapping( - Patient, - Observation, - mapping={ - "id": "subject_ref", - "name": "performer", - "nonexistent": "status", # This field doesn't exist - }, - bidirectional=True, - ) - - patient = Patient(id="123", name="John", active=True) - obs, spillover = mapping.forward(patient) - - assert obs.subject_ref == "123" - assert obs.performer == "John" - assert obs.status is None # Missing field maps to None - - def test_forward_strict_mode(self): - """Test forward transformation.""" - mapping = DataMapping( - Patient, - Observation, - mapping={"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - - # Should work with correct type - patient = Patient(id="123", name="John", active=True) - obs, spillover = mapping.forward(patient) - assert obs.subject_ref == "123" - - -class TestDataMappingBidirectionalReverse: - """Test reverse transformations.""" - - def test_simple_reverse(self): - """Test basic reverse transformation.""" - mapping = DataMapping( - Patient, - Observation, - mapping={"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - - obs = Observation(subject_ref="123", performer="John") - spillover = RecordSet([{"active": True, "age": 45}]) - - patient = mapping.reverse(obs, spillover) - - assert isinstance(patient, Patient) - assert patient.id == "123" - assert patient.name == "John" - assert patient.active is True - assert patient.age == 45 - - def test_reverse_no_spillover(self): - """Test reverse transformation with minimal spillover.""" - mapping = DataMapping( - Patient, - Observation, - mapping={"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - - obs = Observation(subject_ref="123", performer="John") - spillover = RecordSet([{"active": True}]) # Provide required field - - patient = mapping.reverse(obs, spillover) - - assert patient.id == "123" - assert patient.name == "John" - assert patient.active is True - - def test_reverse_nested(self): - """Test reverse transformation with nested mappings.""" - mapping = DataMapping( - NestedSource, - NestedTarget, - mapping={"patient.id": "subject_id", "patient.name": "subject_name"}, - bidirectional=True, - ) - - target = NestedTarget( - subject_id="123", subject_name="John", created_by="system" - ) - spillover = RecordSet( - [ - { - "patient": {"age": 45}, - "metadata": {"created_by": "system", "version": "1.0"}, - "extra_field": "extra", - } - ] - ) - - source = mapping.reverse(target, spillover) - - assert source.patient["id"] == "123" - assert source.patient["name"] == "John" - assert source.extra_field == "extra" - - -class TestDataMappingBidirectionalRoundtrip: - """Test roundtrip transformations (forward + reverse).""" - - def test_lossless_roundtrip(self): - """Test that forward + reverse is lossless.""" - mapping = DataMapping( - Patient, - Observation, - mapping={"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - - original = Patient( - id="123", name="John", active=True, internal_notes="sensitive", age=45 - ) - - # Forward transformation - obs, spillover = mapping.forward(original) - - # Reverse transformation - recovered = mapping.reverse(obs, spillover) - - # Should be identical - assert recovered == original - - def test_roundtrip_nested(self): - """Test roundtrip with nested data (simplified).""" - mapping = DataMapping( - NestedSource, - NestedTarget, - mapping={"patient.id": "subject_id", "patient.name": "subject_name"}, - bidirectional=True, - ) - - original = NestedSource( - patient={"id": "123", "name": "John"}, - metadata={"created_by": "system", "version": "1.0"}, - extra_field="extra", - ) - - # Roundtrip - target, spillover = mapping.forward(original) - recovered = mapping.reverse(target, spillover) - - # Check key fields are preserved - assert recovered.patient["id"] == original.patient["id"] - assert recovered.patient["name"] == original.patient["name"] - assert recovered.extra_field == original.extra_field - - -class TestDataMappingBidirectionalReversibility: - """Test mapping reversibility validation.""" - - def test_reversible_mappings(self): - """Test that 1:1 mappings are reversible.""" - mapping = DataMapping( - Patient, - Observation, - mapping={"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - - assert mapping.can_reverse() is True - - def test_non_reversible_mappings(self): - """Test that many-to-one mappings are not reversible.""" - with pytest.raises(ValueError, match="not reversible.*duplicate target paths"): - DataMapping( - Patient, - Observation, - mapping={ - "id": "subject_ref", - "name": "subject_ref", # Duplicate target! - "active": "performer", - }, - strict=True, - bidirectional=True, - ) - - def test_non_reversible_can_reverse(self): - """Test that can_reverse correctly identifies non-reversible mappings.""" - # Create in non-strict mode to avoid validation error - mapping = DataMapping( - Patient, - Observation, - mapping={ - "id": "subject_ref", - "name": "subject_ref", # Duplicate target - }, - strict=False, - bidirectional=True, - ) - - assert mapping.can_reverse() is False - - def test_reverse_non_reversible_fails(self): - """Test that reverse fails on non-reversible mapping.""" - mapping = DataMapping( - Patient, - Observation, - mapping={ - "id": "subject_ref", - "name": "subject_ref", # Duplicate target - }, - strict=False, - bidirectional=True, - ) - - obs = Observation(subject_ref="123", performer="John") - spillover = RecordSet() - - with pytest.raises(ValueError, match="cannot reverse"): - mapping.reverse(obs, spillover) diff --git a/chidian-py/tests/test_data_mapping_unidirectional.py b/chidian-py/tests/test_data_mapping_unidirectional.py deleted file mode 100644 index 8d33b87..0000000 --- a/chidian-py/tests/test_data_mapping_unidirectional.py +++ /dev/null @@ -1,203 +0,0 @@ -"""Tests for DataMapping in unidirectional mode (formerly View).""" - -from typing import Any, Optional - -import chidian.partials as p -import pytest -from chidian import DataMapping -from pydantic import BaseModel - - -class TestDataMappingUnidirectionalBasic: - """Test basic DataMapping functionality in unidirectional mode.""" - - def test_simple_mapping(self) -> None: - """Test basic field mapping with Pydantic models.""" - - class Source(BaseModel): - id: str - name: str - - class Target(BaseModel): - person_id: str - display_name: str - - mapping = DataMapping( - source_model=Source, - target_model=Target, - mapping={"person_id": "id", "display_name": "name"}, - bidirectional=False, - ) - - source = Source(id="123", name="John Doe") - result: Any = mapping.forward(source) - - assert isinstance(result, Target) - assert result.person_id == "123" - assert result.display_name == "John Doe" - - def test_nested_paths(self) -> None: - """Test mapping with nested paths.""" - - class Source(BaseModel): - subject: dict - valueQuantity: dict - - class Target(BaseModel): - patient_id: str - value: float - - mapping = DataMapping( - source_model=Source, - target_model=Target, - mapping={"patient_id": "subject.reference", "value": "valueQuantity.value"}, - bidirectional=False, - ) - - source = Source( - subject={"reference": "Patient/123"}, - valueQuantity={"value": 140.0, "unit": "mmHg"}, - ) - result: Any = mapping.forward(source) - - assert result.patient_id == "Patient/123" - assert result.value == 140.0 - - def test_with_transformations(self) -> None: - """Test mapping with chainable transformations.""" - - class Source(BaseModel): - name: str - reference: str - - class Target(BaseModel): - name_upper: str - id: int - - mapping = DataMapping( - source_model=Source, - target_model=Target, - mapping={ # type: ignore - "name_upper": p.get("name") >> p.upper, - "id": p.get("reference") >> p.split("/") >> p.last >> p.to_int, - }, - bidirectional=False, - ) - - source = Source(name="john doe", reference="Patient/456") - result: Any = mapping.forward(source) - - assert result.name_upper == "JOHN DOE" - assert result.id == 456 - - -class TestDataMappingUnidirectionalValidation: - """Test DataMapping validation and error handling.""" - - def test_strict_mode_validation(self) -> None: - """Test strict mode enforces required fields.""" - - class Source(BaseModel): - id: str - - class Target(BaseModel): - id: str - required_field: str # Required but not mapped - - # Should raise in strict mode - with pytest.raises(ValueError, match="Missing required target fields"): - DataMapping( - source_model=Source, - target_model=Target, - mapping={"id": "id"}, - strict=True, - ) - - # Should work in non-strict mode - mapping = DataMapping( - source_model=Source, - target_model=Target, - mapping={"id": "id"}, - strict=False, - bidirectional=False, - ) - assert mapping.strict is False - - def test_type_validation(self) -> None: - """Test that non-Pydantic models are rejected.""" - with pytest.raises(TypeError, match="must be a Pydantic v2 BaseModel"): - DataMapping( - source_model=dict, # Not a Pydantic model - target_model=BaseModel, - mapping={}, - ) - - def test_error_handling(self) -> None: - """Test error handling in mappings.""" - - class Source(BaseModel): - data: dict - - class Target(BaseModel): - safe: Optional[str] = None - error: Optional[str] = None - - # Non-strict mode handles errors gracefully - mapping = DataMapping( - source_model=Source, - target_model=Target, - mapping={ # type: ignore - "safe": "data.value", - "error": p.ChainableFn(lambda x: 1 / 0), # Will raise - }, - strict=False, - bidirectional=False, - ) - - source = Source(data={"value": "test"}) - result: Any = mapping.forward(source) - - assert result.safe == "test" - assert result.error is None # Error was caught - - -class TestDataMappingUnidirectionalRealWorld: - """Test real-world transformation scenarios.""" - - def test_fhir_to_flat_structure(self, fhir_observation: Any) -> None: - """Test transforming nested FHIR to flat structure.""" - - class FHIRObservation(BaseModel): - id: str - subject: dict - code: dict - valueQuantity: Optional[dict] = None - - class FlatObservation(BaseModel): - observation_id: str - patient_id: str - loinc_code: str - value: Optional[float] = None - unit: Optional[str] = None - - mapping = DataMapping( - source_model=FHIRObservation, - target_model=FlatObservation, - mapping={ # type: ignore - "observation_id": "id", - "patient_id": p.get("subject.reference") >> p.split("/") >> p.last, - "loinc_code": "code.coding[0].code", - "value": "valueQuantity.value", - "unit": "valueQuantity.unit", - }, - bidirectional=False, - ) - - source = FHIRObservation(**fhir_observation) - result: Any = mapping.forward(source) - - assert result.observation_id == "obs-123" - assert result.patient_id == "456" - assert result.loinc_code == "8480-6" - assert result.value == 140.0 - assert result.unit == "mmHg" diff --git a/chidian-py/tests/test_lexicon.py b/chidian-py/tests/test_lexicon.py deleted file mode 100644 index 64c30bb..0000000 --- a/chidian-py/tests/test_lexicon.py +++ /dev/null @@ -1,90 +0,0 @@ -"""Consolidated tests for the Lexicon class.""" - -import pytest -from chidian.lexicon import Lexicon - - -class TestLexiconBasic: - """Test basic Lexicon functionality.""" - - @pytest.mark.parametrize( - "mappings,forward_tests,reverse_tests", - [ - # One-to-one mappings - ( - {"8480-6": "271649006", "8462-4": "271650006"}, - [("8480-6", "271649006"), ("8462-4", "271650006")], - [("271649006", "8480-6"), ("271650006", "8462-4")], - ), - # Many-to-one mappings - ( - {("active", "current"): "A", ("inactive", "stopped"): "I"}, - [("active", "A"), ("current", "A"), ("inactive", "I")], - [("A", "active"), ("I", "inactive")], # First in tuple - ), - ], - ) - def test_bidirectional_mappings(self, mappings, forward_tests, reverse_tests): - """Test forward and reverse mappings.""" - lexicon = Lexicon(mappings) - - for key, expected in forward_tests: - assert lexicon.forward(key) == expected - assert lexicon[key] == expected - - for key, expected in reverse_tests: - assert lexicon.reverse(key) == expected - assert lexicon[key] == expected - - def test_default_handling(self): - """Test default value behavior.""" - lexicon = Lexicon({"yes": "Y"}, default="UNKNOWN") - - assert lexicon["yes"] == "Y" - assert lexicon["missing"] == "UNKNOWN" - assert lexicon.get("missing", "CUSTOM") == "CUSTOM" - - def test_dict_interface(self): - """Test that Lexicon works as a dict.""" - lexicon = Lexicon({"a": "1", "b": "2"}) - - assert dict(lexicon) == {"a": "1", "b": "2"} - assert list(lexicon.keys()) == ["a", "b"] - assert "a" in lexicon - assert "1" in lexicon # Reverse lookup - - def test_empty_lexicon(self): - """Test empty lexicon behavior.""" - lexicon = Lexicon({}) - - assert len(lexicon) == 0 - assert lexicon.forward("any") is None - with pytest.raises(KeyError): - _ = lexicon["any"] - - -class TestLexiconRealWorld: - """Test real-world healthcare code mapping scenarios.""" - - def test_medical_code_mapping(self): - """Test LOINC to SNOMED mapping example.""" - lab_codes = Lexicon( - { - "8480-6": "271649006", # Systolic BP - ("2160-0", "38483-4"): "113075003", # Creatinine variants - }, - metadata={"version": "2023-Q4"}, - ) - - # Forward mapping - assert lab_codes["8480-6"] == "271649006" - assert lab_codes["2160-0"] == "113075003" - assert lab_codes["38483-4"] == "113075003" - - # Reverse mapping - assert lab_codes["271649006"] == "8480-6" - assert lab_codes["113075003"] == "2160-0" # First in tuple - - # Metadata - assert lab_codes.metadata["version"] == "2023-Q4" - assert lab_codes.can_reverse() is True diff --git a/chidian-py/tests/test_partials.py b/chidian-py/tests/test_partials.py deleted file mode 100644 index a809e78..0000000 --- a/chidian-py/tests/test_partials.py +++ /dev/null @@ -1,464 +0,0 @@ -from copy import deepcopy - -import chidian.partials as p -import pytest - - -def test_generic_apply_wrappers() -> None: - n = 100 - assert p.add(1)(n) == n + 1 - assert p.subtract(1)(n) == n - 1 - assert p.subtract(1, before=True)(n) == 1 - n - assert p.multiply(10)(n) == n * 10 - assert p.divide(10)(n) == n / 10 - assert p.divide(10, before=True)(n) == 10 / n - - lst = [1, 2, 3] - assert p.add([4])(lst) == lst + [4] - assert p.add([4], before=True)(lst) == [4] + lst - - f = 4.2 - assert p.multiply(3)(f) == 3 * f - assert p.multiply(3, before=True)(f * f) == (f * f) * 3 - - -def test_generic_conditional_wrappers() -> None: - value = {"a": "b", "c": "d"} - copied_value = deepcopy(value) - example_key = "a" - - assert p.equals(copied_value)(value) == (value == copied_value) - assert p.not_equal(copied_value)(value) == (value != copied_value) - assert p.equivalent(copied_value)(value) == (value is copied_value) - assert p.not_equivalent(copied_value)(value) == (value is not copied_value) - assert p.contains(example_key)(copied_value) == (example_key in value) - assert p.not_contains(example_key)(copied_value) == (example_key not in value) - assert p.contained_in(copied_value)(example_key) == (example_key in value) - assert p.not_contained_in(copied_value)(example_key) == (example_key not in value) - assert p.isinstance_of(dict)(value) == isinstance(value, dict) - assert p.isinstance_of(str)(example_key) == isinstance(example_key, str) - - -def test_iterable_wrappers() -> None: - supported_iterables = ([1, 2, 3, 4, 5], (1, 2, 3, 4, 5)) - for value in supported_iterables: - assert p.keep(1)(value) == value[:1] - assert p.keep(50)(value) == value[:50] - assert p.index(0)(value) == value[0] - assert p.index(1)(value) == value[1] - assert p.index(-1)(value) == value[-1] - assert p.index(-3)(value) == value[-3] - - -def test_stdlib_wrappers() -> None: - EXAMPLE_LIST = ["a", "b", "c"] - assert p.map_to_list(str.upper)(EXAMPLE_LIST) == ["A", "B", "C"] - assert p.filter_to_list(p.equals("a"))(EXAMPLE_LIST) == ["a"] - - -def test_basic_chainable_fn(): - """Test basic ChainableFn functionality.""" - # Single operation - assert p.upper("hello") == "HELLO" - assert p.lower("WORLD") == "world" - - # Check it preserves function behavior - assert p.strip(" test ") == "test" - assert p.capitalize("hello world") == "Hello world" - - -def test_function_chain_creation(): - """Test creating FunctionChain with >> operator.""" - # ChainableFn >> ChainableFn - chain = p.upper >> p.replace(" ", "_") - assert isinstance(chain, p.FunctionChain) - assert len(chain) == 2 - assert chain("hello world") == "HELLO_WORLD" - - # Regular function >> ChainableFn - chain2 = str.strip >> p.upper - assert chain2(" test ") == "TEST" - - # ChainableFn >> regular function - chain3 = p.lower >> str.title - assert chain3("HELLO WORLD") == "Hello World" - - -def test_complex_chains(): - """Test complex function chains.""" - # Multi-step string transformation - normalize = p.strip >> p.lower >> p.replace(" ", "_") >> p.replace("-", "_") - assert normalize(" Hello-World ") == "hello_world" - - # Array operations - get_last_word = p.split() >> p.last >> p.upper - assert get_last_word("hello beautiful world") == "WORLD" - - # Mixed operations - extract_number = p.split("-") >> p.last >> p.to_int >> p.multiply(10) - assert extract_number("item-42") == 420 - - -def test_parameterized_chainable_fns(): - """Test ChainableFn factories with parameters.""" - # Split with custom separator - split_comma = p.split(",") - assert split_comma("a,b,c") == ["a", "b", "c"] - - # Replace with parameters - sanitize = p.replace("&", "and") >> p.replace("@", "at") - assert sanitize("tom & jerry @ home") == "tom and jerry at home" - - # Round to decimals - round_2 = p.round_to(2) - assert round_2(3.14159) == 3.14 - - # Chain with parameters - process = p.to_float >> p.round_to(1) >> p.to_str - assert process("3.456") == "3.5" - - -def test_array_operations(): - """Test array/list operations.""" - data = ["first", "second", "third", "fourth"] - - assert p.first(data) == "first" - assert p.last(data) == "fourth" - assert p.length(data) == 4 - assert p.at_index(2)(data) == "third" - assert p.slice_range(1, 3)(data) == ["second", "third"] - - # Empty list handling - assert p.first([]) is None - assert p.last([]) is None - assert p.at_index(10)([1, 2, 3]) is None - - -def test_type_conversions(): - """Test type conversion chains.""" - # String to number - parse_int = p.strip >> p.to_int - assert parse_int(" 42 ") == 42 - - # Number to formatted string - format_price = p.to_float >> p.round_to(2) >> p.format_string("${}") - assert format_price("19.999") == "$20.0" - - # Boolean conversion - truthiness = p.lower >> p.equals("yes") - assert truthiness("YES") - assert not truthiness("no") - - -def test_fhir_specific_operations(): - """Test FHIR-specific transformations.""" - # Extract ID from reference - assert p.extract_id()("Patient/123") == "123" - assert p.extract_id()("Observation/obs-456") == "obs-456" - assert p.extract_id()("789") == "789" # No slash - - # Complex FHIR reference processing - get_patient_id = p.extract_id() >> p.to_int >> p.format_string("PAT-{:04d}") - assert get_patient_id("Patient/42") == "PAT-0042" - - -def test_default_handling(): - """Test default value handling.""" - # Replace None with default - safe_upper = p.default_to("") >> p.upper - assert safe_upper(None) == "" - assert safe_upper("hello") == "HELLO" - - # Chain with null safety - safe_process = p.default_to("0") >> p.to_int >> p.add(10) - assert safe_process(None) == 10 - assert safe_process("5") == 15 - - -def test_chain_composition(): - """Test composing multiple chains.""" - # Create reusable chains - normalize_name = p.strip >> p.lower >> p.capitalize - - # Compose chains - process_title = normalize_name >> p.format_string("Title: {}") - assert process_title(" john DOE ") == "Title: John doe" - - # Chain of chains - chain1 = p.upper >> p.replace("A", "X") - chain2 = p.replace("E", "Y") >> p.lower - combined = chain1 >> chain2 - assert combined("apple") == "xpply" - - -def test_with_existing_partials(): - """Test integration with existing partial functions.""" - # Use existing arithmetic partials - calculate = p.to_int >> p.add(10) >> p.multiply(2) - assert calculate("5") == 30 - - # Mix with new chainable functions - process = p.strip >> p.to_int >> p.ChainableFn(lambda x: x > 10) - assert process(" 15 ") - assert not process(" 5 ") - - -def test_error_propagation(): - """Test that errors propagate through chains.""" - chain = p.to_int >> p.multiply(2) - - with pytest.raises(ValueError): - chain("not a number") - - # But we can add error handling - safe_chain = p.ChainableFn(lambda x: int(x) if x.isdigit() else 0) >> p.multiply(2) - assert safe_chain("42") == 84 - assert safe_chain("abc") == 0 - - -def test_function_chain_repr(): - """Test string representation of chains.""" - chain = p.upper >> p.strip >> p.replace(" ", "_") - repr_str = repr(chain) - assert "upper" in repr_str - assert "strip" in repr_str - assert ">>" in repr_str - - -# Tests for new partials that replaced SEEDs -def test_case_partial(): - """Test case partial function.""" - # Test with dict cases - status_mapper = p.case( - {"active": "✓ Active", "inactive": "✗ Inactive"}, default="Unknown" - ) - - assert status_mapper("active") == "✓ Active" - assert status_mapper("inactive") == "✗ Inactive" - assert status_mapper("pending") == "Unknown" - - # Test with function cases - range_mapper = p.case( - [ - (lambda x: x > 100, "HIGH"), - (lambda x: x > 50, "MEDIUM"), - (lambda x: x >= 0, "LOW"), - ], - default="INVALID", - ) - - assert range_mapper(150) == "HIGH" - assert range_mapper(75) == "MEDIUM" - assert range_mapper(25) == "LOW" - assert range_mapper(-10) == "INVALID" - - -def test_coalesce_partial(): - """Test coalesce partial function.""" - - data = {"missing": None, "empty": "", "value": "found", "backup": "backup_value"} - - # Test with multiple paths - coalesce_found_key = p.coalesce("missing", "empty", "value", default="DEFAULT") - assert coalesce_found_key(data) == "found" - - # Test with all None/empty - coalesce_empty = p.coalesce("missing", "empty", default="DEFAULT") - assert coalesce_empty(data) == "DEFAULT" - - # Test without default - coalesce_no_default = p.coalesce("value", "backup") - assert coalesce_no_default(data) == "found" - - -def test_template_partial(): - """Test template partial function.""" - # Basic template - name_template = p.template("{} {}") - assert name_template("John", "Doe") == "John Doe" - - # Template with skip_none - full_template = p.template("{} {} {}", skip_none=True) - assert full_template("John", None, "Doe") == "John Doe" - assert full_template("John", "Middle", "Doe") == "John Middle Doe" - assert full_template(None, None, None) == "" - - -def test_flatten_partial(): - """Test flatten partial function.""" - data = { - "names": ["John", "Jane"], - "ids": ["123", "456"], - "empty": [], - "single": "solo", - } - - # Test basic flatten - flatten_func = p.flatten(["names", "ids"]) - result = flatten_func(data) - assert result == "John, Jane, 123, 456" - - # Test custom delimiter - flatten_pipe = p.flatten(["names"], delimiter=" | ") - assert flatten_pipe(data) == "John | Jane" - - # Test with empty and single values - flatten_mixed = p.flatten(["names", "empty", "single"]) - assert flatten_mixed(data) == "John, Jane, solo" - - -def test_partials_integration_with_chains(): - """Test that new partials work with function chains.""" - # Chain case with other operations - status_chain = ( - p.get("status") - >> p.case({"1": "active", "0": "inactive"}, default="unknown") - >> p.upper - ) - data = {"status": "1"} - assert status_chain(data) == "ACTIVE" - - # Use template in a complex chain - format_name = p.template("{} {}") - name_chain = ( - p.ChainableFn( - lambda data: format_name(p.get("first")(data), p.get("last")(data)) - ) - >> p.upper - ) - - name_data = {"first": "john", "last": "doe"} - assert name_chain(name_data) == "JOHN DOE" - - -def test_lookup_with_dict(): - """Test lookup function with dictionaries.""" - codes = {"A": "Alpha", "B": "Beta", "C": "Charlie"} - - # Basic lookup - lookup_codes = p.lookup(codes) - assert lookup_codes("A") == "Alpha" - assert lookup_codes("B") == "Beta" - assert lookup_codes("Z") is None # Missing key returns None - - # Lookup with custom default - lookup_with_default = p.lookup(codes, default="Unknown") - assert lookup_with_default("A") == "Alpha" - assert lookup_with_default("Z") == "Unknown" - - # Chain with get - chain = p.get("code") >> p.lookup(codes, "N/A") - assert chain({"code": "B"}) == "Beta" - assert chain({"code": "X"}) == "N/A" - assert chain({"other": "Y"}) == "N/A" - - -def test_lookup_with_lexicon(): - """Test lookup function with Lexicon.""" - from chidian.lexicon import Lexicon - - # Create a lexicon with default - lexicon = Lexicon({"01": "One", "02": "Two"}, default="Unknown") - - # Basic lookup - lookup_lex = p.lookup(lexicon) - assert lookup_lex("01") == "One" - assert lookup_lex("02") == "Two" - assert lookup_lex("99") == "Unknown" # Uses Lexicon's default - - # Lookup with override default - lookup_override = p.lookup(lexicon, default="Not Found") - assert lookup_override("01") == "One" - assert lookup_override("99") == "Not Found" # Override default - - # Test bidirectional lookup (Lexicon feature) - assert lookup_lex("One") == "01" # Reverse lookup - assert lookup_lex("Two") == "02" - - -def test_lookup_with_list(): - """Test lookup function with lists (using index).""" - values = ["zero", "one", "two", "three"] - - lookup_list = p.lookup(values, default="out of range") - assert lookup_list(0) == "zero" - assert lookup_list(2) == "two" - assert lookup_list(10) == "out of range" - assert lookup_list(-1) == "three" # Negative index - - # Chain with other operations - chain = p.to_int >> p.lookup(values, "invalid") - assert chain("1") == "one" - assert chain("99") == "invalid" - - -def test_lookup_with_custom_getitem(): - """Test lookup with custom object implementing __getitem__.""" - - class CustomMapping: - def __getitem__(self, key): - if key == "special": - return "✨ Special Value ✨" - raise KeyError(f"Key {key} not found") - - custom = CustomMapping() - lookup_custom = p.lookup(custom, default="default") - - assert lookup_custom("special") == "✨ Special Value ✨" - assert lookup_custom("other") == "default" - - -def test_lookup_complex_chains(): - """Test lookup in complex transformation chains.""" - # Medical code transformation - loinc_to_display = { - "8480-6": "Systolic blood pressure", - "8462-4": "Diastolic blood pressure", - "8867-4": "Heart rate", - } - - # Extract and transform - transform = ( - p.get("measurements[0].code") - >> p.lookup(loinc_to_display, "Unknown measurement") - >> p.upper - >> p.replace(" ", "_") - ) - - data = {"measurements": [{"code": "8480-6", "value": 120}]} - assert transform(data) == "SYSTOLIC_BLOOD_PRESSURE" - - # Multiple lookups in sequence - status_codes = {"A": "active", "I": "inactive", "P": "pending"} - status_display = { - "active": "✓ Active", - "inactive": "✗ Inactive", - "pending": "⏳ Pending", - } - - status_chain = ( - p.get("status_code") - >> p.lookup(status_codes, "unknown") - >> p.lookup(status_display, "? Unknown") - ) - - assert status_chain({"status_code": "A"}) == "✓ Active" - assert status_chain({"status_code": "X"}) == "? Unknown" - - -def test_lookup_caching(): - """Test that lookup function is created only once.""" - mapping = {"a": 1, "b": 2} - - # Create two lookup functions with same mapping - lookup1 = p.lookup(mapping) - lookup2 = p.lookup(mapping) - - # They should be different ChainableFn instances - assert lookup1 is not lookup2 - - # But the underlying function should work the same - assert lookup1("a") == lookup2("a") == 1 - - # The function is created once per call to lookup() - # not per invocation of the returned ChainableFn diff --git a/chidian-py/tests/test_piper.py b/chidian-py/tests/test_piper.py deleted file mode 100644 index 8bc4058..0000000 --- a/chidian-py/tests/test_piper.py +++ /dev/null @@ -1,435 +0,0 @@ -"""Comprehensive tests for Piper mapping scenarios.""" - -from typing import Any - -import chidian.partials as p -import pytest -from chidian import DataMapping, Piper, RecordSet, get - -from tests.structstest import ( - Observation, - Patient, - PersonSource, - PersonTarget, - ProcessedData, - SourceData, - SourceModel, - TargetModel, -) - - -class TestPiperBasic: - """Test basic Piper functionality with DataMapping.""" - - def test_simple_mapping(self, simple_data: dict[str, Any]) -> None: - """Test basic Piper functionality with callable mapping.""" - - def mapping(data: dict) -> dict: - return { - "patient_id": get(data, "data.patient.id"), - "is_active": get(data, "data.patient.active"), - "status": "processed", - } - - data_mapping = DataMapping(SourceData, ProcessedData, mapping) - piper: Piper = Piper(data_mapping) - result = piper(SourceData.model_validate(simple_data)) - - assert isinstance(result, ProcessedData) - assert result.patient_id == "abc123" - assert result.is_active - assert result.status == "processed" - - def test_callable_mapping_with_partials(self) -> None: - """Test DataMapping with callable mapping using partials API.""" - - data = { - "firstName": "John", - "lastName": "Doe", - "status": "active", - "codes": ["A", "B", "C"], - "address": "123 Main St|Boston|02101", - } - - def mapper(data: dict) -> dict: - # Use new partials API - name_template = p.template("{} {}") - status_classifier = p.get("status") >> p.case( - {"active": "✓ Active", "inactive": "✗ Inactive"}, default="Unknown" - ) - city_extractor = p.get("address") >> p.split("|") >> p.at_index(1) - - return { - "name": name_template(get(data, "firstName"), get(data, "lastName")), - "status_display": status_classifier(data), - "all_codes": p.flatten(["codes"], delimiter=", ")(data), - "city": city_extractor(data), - "backup_name": p.coalesce("nickname", "firstName", default="Guest")( - data - ), - } - - data_mapping = DataMapping(PersonSource, PersonTarget, mapper) - piper: Piper = Piper(data_mapping) - result = piper(PersonSource.model_validate(data)) - - assert isinstance(result, PersonTarget) - assert result.name == "John Doe" - assert result.status_display == "✓ Active" - assert result.all_codes == "A, B, C" - assert result.city == "Boston" - assert result.backup_name == "John" - - -class TestPiperUnidirectional: - """Test Piper in unidirectional mode (View).""" - - def test_typed_piper_view_creation(self) -> None: - """Test creating a typed Piper in View mode.""" - mapping = DataMapping( - Patient, - Observation, - {"subject_ref": "id", "performer": "name"}, - strict=False, - bidirectional=False, - ) - piper: Piper = Piper(mapping) - - assert isinstance(piper, Piper) - assert piper._mode == "view" - assert piper.input_type == Patient - assert piper.output_type == Observation - assert piper.can_reverse() is False - - def test_typed_piper_view_forward(self) -> None: - """Test forward transformation with View.""" - mapping = DataMapping( - Patient, - Observation, - {"subject_ref": "id", "performer": "name"}, - strict=False, - bidirectional=False, - ) - piper: Piper = Piper(mapping) - - patient = Patient(id="123", name="John", active=True, age=45) - - # Forward should return just the target object (no spillover) - obs: Any = piper.forward(patient) - - # In non-strict mode, may return dict instead of typed object - if isinstance(obs, dict): - assert obs.get("subject_ref") == "123" - assert obs.get("performer") == "John" - else: - assert isinstance(obs, Observation) - assert obs.subject_ref == "123" - assert obs.performer == "John" - - def test_typed_piper_view_call_syntax(self) -> None: - """Test that View can be called directly without .forward().""" - mapping = DataMapping( - Patient, - Observation, - {"subject_ref": "id", "performer": "name"}, - strict=False, - bidirectional=False, - ) - piper: Piper = Piper(mapping) - - patient = Patient(id="123", name="John", active=True) - - # Should work same as .forward() - obs1: Any = piper(patient) - obs2: Any = piper.forward(patient) - - # Both should return the same type and content - if isinstance(obs1, dict) and isinstance(obs2, dict): - assert obs1.get("subject_ref") == obs2.get("subject_ref") == "123" - elif hasattr(obs1, "subject_ref") and hasattr(obs2, "subject_ref"): - assert obs1.subject_ref == obs2.subject_ref == "123" - - def test_typed_piper_view_reverse_fails(self) -> None: - """Test that reverse transformation fails with View.""" - mapping = DataMapping( - Patient, - Observation, - {"subject_ref": "id", "performer": "name"}, - strict=False, - bidirectional=False, - ) - piper: Piper = Piper(mapping) - - obs = Observation(subject_ref="123", performer="John") - - # Reverse should fail - with pytest.raises(ValueError, match="Reverse transformation only available"): - piper.reverse(obs, RecordSet()) - - assert piper.can_reverse() is False - - def test_typed_piper_view_type_validation(self) -> None: - """Test type validation with typed Piper.""" - mapping = DataMapping( - Patient, - Observation, - {"subject_ref": "id", "performer": "name"}, - strict=True, - bidirectional=False, - ) - piper: Piper = Piper(mapping) - - # Correct type works - patient = Patient(id="123", name="John", active=True) - obs = piper.forward(patient) - assert isinstance(obs, Observation) - - # Wrong type should fail - with pytest.raises(TypeError): - piper.forward("not a patient") - - def test_create_unidirectional_piper(self) -> None: - """Test creating unidirectional piper.""" - mapping = DataMapping( - Patient, - Observation, - {"subject_ref": "id", "performer": "name"}, - strict=False, - bidirectional=False, - ) - piper: Piper = Piper(mapping) - - assert isinstance(piper, Piper) - assert piper._mode == "view" - assert piper.can_reverse() is False - - -class TestPiperBidirectional: - """Test Piper in bidirectional mode (Lens).""" - - def test_typed_piper_lens_creation(self) -> None: - """Test creating a typed Piper in Lens mode.""" - mapping = DataMapping( - Patient, - Observation, - {"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - piper: Piper = Piper(mapping) - - assert isinstance(piper, Piper) - assert piper._mode == "lens" - assert piper.input_type == Patient - assert piper.output_type == Observation - assert piper.can_reverse() is True - - def test_typed_piper_lens_forward(self) -> None: - """Test forward transformation with Lens.""" - mapping = DataMapping( - Patient, - Observation, - {"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - piper: Piper = Piper(mapping) - - patient = Patient(id="123", name="John", active=True, age=45) - - # Forward should return target object AND spillover - obs, spillover = piper.forward(patient) - - assert isinstance(obs, Observation) - assert obs.subject_ref == "123" - assert obs.performer == "John" - - assert isinstance(spillover, RecordSet) - spillover_data = spillover._items[0] - assert spillover_data["active"] is True - assert spillover_data["age"] == 45 - - def test_typed_piper_lens_reverse(self) -> None: - """Test reverse transformation with Lens.""" - mapping = DataMapping( - Patient, - Observation, - {"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - piper: Piper = Piper(mapping) - - obs = Observation(subject_ref="123", performer="John") - spillover = RecordSet([{"active": True, "age": 45}]) - - patient = piper.reverse(obs, spillover) - - assert isinstance(patient, Patient) - assert patient.id == "123" - assert patient.name == "John" - assert patient.active is True - assert patient.age == 45 - - def test_typed_piper_lens_roundtrip(self) -> None: - """Test lossless roundtrip with Lens.""" - mapping = DataMapping( - Patient, - Observation, - {"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - piper: Piper = Piper(mapping) - - original = Patient(id="123", name="John", active=True, age=45) - - # Forward - obs, spillover = piper.forward(original) - - # Reverse - recovered = piper.reverse(obs, spillover) - - assert recovered == original - - def test_typed_piper_lens_call_syntax(self) -> None: - """Test Piper with Lens can be called directly.""" - mapping = DataMapping( - Patient, - Observation, - {"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - piper: Piper = Piper(mapping) - - patient = Patient(id="123", name="John", active=True) - obs, spillover = piper(patient) # Should work same as forward() - - assert obs.subject_ref == "123" - assert spillover is not None - - def test_create_bidirectional_piper(self) -> None: - """Test creating bidirectional piper.""" - mapping = DataMapping( - Patient, - Observation, - {"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - piper: Piper = Piper(mapping) - - assert isinstance(piper, Piper) - assert piper._mode == "lens" - assert piper.can_reverse() is True - - -class TestPiperStrictMode: - """Test Piper strict mode behavior.""" - - def test_piper_inherits_data_mapping_properties(self) -> None: - """Test that Piper inherits properties from DataMapping.""" - - def mapper(data: dict) -> dict: - return {"result": data.get("value")} - - data_mapping = DataMapping(SourceModel, TargetModel, mapper, strict=False) - piper: Piper = Piper(data_mapping) - - assert piper.input_type is SourceModel - assert piper.output_type is TargetModel - assert piper.strict is False - - def test_lens_inherits_strict_mode(self) -> None: - """Test that Piper inherits strict mode from mapping.""" - mapping_strict = DataMapping( - Patient, - Observation, - {"id": "subject_ref", "name": "performer"}, - strict=True, - bidirectional=True, - ) - mapping_nonstrict = DataMapping( - Patient, - Observation, - {"id": "subject_ref", "name": "performer"}, - strict=False, - bidirectional=False, - ) - - piper_strict: Piper = Piper(mapping_strict) - piper_nonstrict: Piper = Piper(mapping_nonstrict) - - assert piper_strict.strict is True - assert piper_nonstrict.strict is False - - def test_strict_input_validation(self) -> None: - """Test strict input type validation with mapping.""" - mapping = DataMapping( - Patient, - Observation, - {"id": "subject_ref", "name": "performer"}, - strict=True, - bidirectional=True, - ) - piper: Piper = Piper(mapping) - - # Correct type works - patient = Patient(id="123", name="John", active=True) - obs, spillover = piper.forward(patient) - assert obs.subject_ref == "123" - - # Wrong type should fail in strict mode - with pytest.raises(TypeError): - piper.forward("not a patient") - - -class TestPiperIntegration: - """Test Piper integration scenarios.""" - - def test_type_safety_prevents_chaining_errors(self) -> None: - """Test that type safety prevents incompatible chaining.""" - # Create two pipers with incompatible types - mapping = DataMapping( - Patient, - Observation, - {"subject_ref": "id", "performer": "name"}, - strict=False, - bidirectional=False, - ) - piper: Piper = Piper(mapping) - - # This would be a type error if we tried to chain with a different input type - # (In real usage, mypy/type checker would catch this) - assert piper.input_type == Patient - assert piper.output_type == Observation - - def test_mixed_mode_workflow(self) -> None: - """Test workflow mixing View and Lens based pipers.""" - # Step 1: Use View for one-way transformation - mapping = DataMapping( - Patient, - Observation, - {"subject_ref": "id", "performer": "name"}, - strict=False, - bidirectional=False, - ) - unidirectional_piper: Piper = Piper(mapping) - - # Step 2: Use Lens for bidirectional transformation - mapping = DataMapping( - Patient, - Observation, - {"id": "subject_ref", "name": "performer"}, - bidirectional=True, - ) - bidirectional_piper: Piper = Piper(mapping) - - patient = Patient(id="123", name="John", active=True, age=45) - - # View transformation (one-way) - may return dict in non-strict mode - obs_unidirectional: Any = unidirectional_piper.forward(patient) - if isinstance(obs_unidirectional, dict): - assert obs_unidirectional.get("subject_ref") == "123" - elif hasattr(obs_unidirectional, "subject_ref"): - assert obs_unidirectional.subject_ref == "123" - - # Lens transformation (bidirectional) - obs_bidirectional, spillover = bidirectional_piper.forward(patient) - recovered = bidirectional_piper.reverse(obs_bidirectional, spillover) - assert recovered == patient diff --git a/chidian-py/tests/test_recordset.py b/chidian-py/tests/test_recordset.py deleted file mode 100644 index 1f348bd..0000000 --- a/chidian-py/tests/test_recordset.py +++ /dev/null @@ -1,320 +0,0 @@ -from chidian.recordset import RecordSet - - -def test_basic_collection(): - """Test basic RecordSet functionality.""" - # Create from list - items = [ - {"id": "p1", "name": "John", "age": 30}, - {"id": "p2", "name": "Jane", "age": 25}, - {"id": "p3", "name": "Bob", "age": 35}, - ] - - collection = RecordSet(items) - - # Test length - assert len(collection) == 3 - - # Test iteration - assert list(collection) == items - - # Test dict-like access with new $ syntax - assert collection["$0"]["name"] == "John" - assert collection["$1"]["name"] == "Jane" - assert collection["$2"]["name"] == "Bob" - - -def test_dict_access_and_get_all(): - """Test built-in dict access and get_all method.""" - collection = RecordSet( - [ - {"patient": {"id": "123", "name": "John"}, "status": "active"}, - {"patient": {"id": "456", "name": "Jane"}, "status": "inactive"}, - {"patient": {"id": "789", "name": "Bob"}, "status": "active"}, - ] - ) - - # Test built-in dict access (should work as normal dict) - assert collection["$0"]["patient"]["id"] == "123" - assert collection["$1"]["patient"]["id"] == "456" - assert collection["$2"]["patient"]["id"] == "789" - - # Test dict.get() method (inherited) - assert collection.get("$0")["patient"]["name"] == "John" - assert collection.get("$nonexistent") is None - assert collection.get("$nonexistent", "default") == "default" - - # Test get_all method for extracting from all items - all_ids = collection.get_all("patient.id") - assert all_ids == ["123", "456", "789"] - - all_names = collection.get_all("patient.name") - assert all_names == ["John", "Jane", "Bob"] - - all_statuses = collection.get_all("status") - assert all_statuses == ["active", "inactive", "active"] - - # Test get_all with missing paths and defaults - missing_field = collection.get_all("missing_field", default="N/A") - assert missing_field == ["N/A", "N/A", "N/A"] - - # Test get_all with apply function - upper_names = collection.get_all("patient.name", apply=str.upper) - assert upper_names == ["JOHN", "JANE", "BOB"] - - -def test_select_method(): - """Test the enhanced select method with field selection.""" - collection = RecordSet( - [ - {"name": "John", "age": 30, "patient": {"id": "p1", "status": "active"}}, - {"name": "Jane", "age": 25, "patient": {"id": "p2", "status": "inactive"}}, - {"name": "Bob", "age": 35, "patient": {"id": "p3", "status": "active"}}, - {"name": "Alice", "age": 28, "encounter": {"id": "e1", "patient": "p1"}}, - ] - ) - collection.append( - {"name": "Charlie", "age": 40, "patient": {"id": "p4", "status": "active"}}, - key="special", - ) - - # Select all items (*) - all_items = collection.select("*") - assert len(all_items) == 5 - assert all_items["$0"]["name"] == "John" - assert all_items["$special"]["name"] == "Charlie" # Preserves custom key - - # Select specific fields - names_ages = collection.select("name, age") - assert len(names_ages) == 5 - assert names_ages["$0"] == {"name": "John", "age": 30} - assert names_ages["$1"] == {"name": "Jane", "age": 25} - assert "patient" not in names_ages["$0"] # Only selected fields - - # Select nested fields (now includes None for missing with preserve mode) - patient_data = collection.select("patient.id, patient.status") - assert len(patient_data) == 5 # All items included, Alice gets None values - assert patient_data["$0"] == {"id": "p1", "status": "active"} - assert patient_data["$1"] == {"id": "p2", "status": "inactive"} - assert patient_data["$3"] == {"id": None, "status": None} # Alice has no patient - - # Select with wildcard from nested object - patient_all = collection.select("patient.*") - assert len(patient_all) == 5 - assert patient_all["$0"] == {"id": "p1", "status": "active"} - assert patient_all["$3"] == {} # Alice has no patient, gets empty dict - assert patient_all["$special"] == {"id": "p4", "status": "active"} - - # Test the old filtering behavior explicitly - patient_data_filtered = collection.select( - "patient.id, patient.status", sparse="filter" - ) - assert len(patient_data_filtered) == 4 # Alice filtered out - assert patient_data_filtered["$0"] == {"id": "p1", "status": "active"} - assert patient_data_filtered["$1"] == {"id": "p2", "status": "inactive"} - - # Select with filter - active_patients = collection.select( - "name, patient.status", - where=lambda x: x.get("patient", {}).get("status") == "active", - ) - assert len(active_patients) == 3 - assert active_patients["$0"] == {"name": "John", "status": "active"} - assert active_patients["$special"] == {"name": "Charlie", "status": "active"} - - # Flat return for single field - names_flat = collection.select("name", flat=True) - assert names_flat == ["John", "Jane", "Bob", "Alice", "Charlie"] - - # Flat with filter - active_names = collection.select( - "name", - where=lambda x: x.get("patient", {}).get("status") == "active", - flat=True, - ) - assert active_names == ["John", "Bob", "Charlie"] - - -def test_sparse_data_handling(): - """Test sparse data handling with missing fields.""" - collection = RecordSet( - [ - {"name": "John", "age": 30, "patient": {"id": "p1", "status": "active"}}, - {"name": "Jane", "age": 25}, # Missing patient field - {"name": "Bob", "patient": {"id": "p3"}}, # Missing age and patient.status - {"age": 35, "patient": {"status": "active"}}, # Missing name and patient.id - ] - ) - - # Test preserve mode (default) - keeps structure with None for missing values - names_ages = collection.select("name, age") - assert len(names_ages) == 4 - assert names_ages["$0"] == {"name": "John", "age": 30} - assert names_ages["$1"] == {"name": "Jane", "age": 25} - assert names_ages["$2"] == {"name": "Bob", "age": None} # age is None - assert names_ages["$3"] == {"name": None, "age": 35} # name is None - - # Test nested field extraction with preserve - patient_ids = collection.select("patient.id") - assert len(patient_ids) == 4 - assert patient_ids["$0"] == {"id": "p1"} - assert patient_ids["$1"] == {"id": None} # Jane has no patient - assert patient_ids["$2"] == {"id": "p3"} - assert patient_ids["$3"] == {"id": None} # No patient.id - - # Test filter mode - removes items/fields with None values - patient_ids_filtered = collection.select("patient.id", sparse="filter") - assert len(patient_ids_filtered) == 2 # Only John and Bob have patient.id - assert patient_ids_filtered["$0"] == {"id": "p1"} - assert patient_ids_filtered["$1"] == {"id": "p3"} - - # Test multiple fields with filter (includes items with ANY requested field) - partial_data = collection.select("name, patient.id", sparse="filter") - assert len(partial_data) == 3 # John (both), Jane (name only), Bob (both) - assert partial_data["$0"] == {"name": "John", "id": "p1"} - assert partial_data["$1"] == {"name": "Jane"} # Only has name - assert partial_data["$2"] == {"name": "Bob", "id": "p3"} - - # Test flat mode with preserve (includes None) - names_flat = collection.select("name", flat=True) - assert names_flat == ["John", "Jane", "Bob", None] - - # Test flat mode with filter (excludes None) - names_flat_filtered = collection.select("name", flat=True, sparse="filter") - assert names_flat_filtered == ["John", "Jane", "Bob"] - - # Test wildcard with sparse handling - patient_all = collection.select("patient.*") - assert len(patient_all) == 4 - assert patient_all["$0"] == {"id": "p1", "status": "active"} - assert patient_all["$1"] == {} # Jane has no patient, so empty dict - assert patient_all["$2"] == {"id": "p3"} # Bob has patient but no status - assert patient_all["$3"] == {"status": "active"} # Has status but no id - - # Test wildcard with filter - patient_all_filtered = collection.select("patient.*", sparse="filter") - assert len(patient_all_filtered) == 3 # Excludes Jane (no patient field) - assert patient_all_filtered["$0"] == {"id": "p1", "status": "active"} - assert patient_all_filtered["$1"] == {"id": "p3"} - assert patient_all_filtered["$2"] == {"status": "active"} - - -def test_filter_method(): - """Test the filter method.""" - collection = RecordSet( - [ - {"name": "John", "age": 30, "active": True}, - {"name": "Jane", "age": 25, "active": False}, - {"name": "Bob", "age": 35, "active": True}, - ] - ) - collection.append({"name": "Alice", "age": 28, "active": True}, key="alice") - - # Filter by active status - active_collection = collection.filter(lambda x: x.get("active", False)) - assert len(active_collection) == 3 - - # Check that numeric keys are reindexed - assert "$0" in active_collection - assert "$1" in active_collection - assert "$2" in active_collection - assert active_collection["$0"]["name"] == "John" - assert active_collection["$1"]["name"] == "Bob" - assert active_collection["$2"]["name"] == "Alice" - - # Check that custom key is preserved - assert "$alice" in active_collection - assert active_collection["$alice"]["name"] == "Alice" - - # Filter by age - young_collection = collection.filter(lambda x: x.get("age", 0) < 30) - assert len(young_collection) == 2 - assert list(young_collection)[0]["name"] == "Jane" - assert list(young_collection)[1]["name"] == "Alice" - - -def test_map_method(): - """Test the map method.""" - collection = RecordSet([{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]) - - # Transform to add computed field - enhanced = collection.map(lambda x: {**x, "adult": x.get("age", 0) >= 18}) - - assert all("adult" in item for item in enhanced) - assert all(item["adult"] is True for item in enhanced) - - -def test_to_json(): - """Test JSON serialization.""" - collection = RecordSet([{"id": 1, "name": "Test"}, {"id": 2, "name": "Another"}]) - - # As dict - json_str = collection.to_json() - assert '"$0":' in json_str - assert '"$1":' in json_str - - # As list - json_list = collection.to_json(as_list=True) - assert json_list.startswith("[") - assert json_list.endswith("]") - - -def test_append_method(): - """Test appending items to collection.""" - collection = RecordSet() - - # Append with auto-generated key - collection.append({"name": "John"}) - assert len(collection) == 1 - assert collection["$0"]["name"] == "John" - - # Append with specific key (should get $ prefix) - collection.append({"name": "Jane"}, key="jane_key") - assert collection["$jane_key"]["name"] == "Jane" - assert len(collection) == 2 - - # Append another auto-keyed item - collection.append({"name": "Bob"}) - assert collection["$2"]["name"] == "Bob" - assert len(collection) == 3 - - # Test accessing named item with dict access - assert collection["$jane_key"]["name"] == "Jane" - - -def test_complex_nested_access(): - """Test complex nested data access.""" - collection = RecordSet( - [ - { - "patient": { - "id": "123", - "identifiers": [ - {"system": "MRN", "value": "MRN123"}, - {"system": "SSN", "value": "SSN456"}, - ], - }, - "encounters": [ - {"id": "e1", "date": "2024-01-01"}, - {"id": "e2", "date": "2024-02-01"}, - ], - } - ] - ) - - # Access nested array element using get_all - mrn = collection.get_all("patient.identifiers[0].value") - assert mrn == ["MRN123"] - - # Access all encounter IDs using get_all - encounter_ids = collection.get_all("encounters[*].id") - assert encounter_ids == [["e1", "e2"]] - - # Access using dict access - first_patient_id = collection["$0"]["patient"]["id"] - assert first_patient_id == "123" - - # Test complex path with array using get_all - all_identifiers = collection.get_all("patient.identifiers") - assert len(all_identifiers[0]) == 2 - assert all_identifiers[0][0]["system"] == "MRN" diff --git a/chidian/__init__.py b/chidian/__init__.py new file mode 100644 index 0000000..c2bd237 --- /dev/null +++ b/chidian/__init__.py @@ -0,0 +1,21 @@ +from .core import get, put +from .data_mapping import DataMapping +from .lib.get_dsl_parser import parse_path_peg as parse_path +from .mapper import DROP, KEEP, Mapper, MapperResult, ValidationMode +from .partials import ChainableFunction, FunctionChain +from .table import Table + +__all__ = [ + "get", + "put", + "parse_path", + "Table", + "Mapper", + "DataMapping", + "DROP", + "KEEP", + "ValidationMode", + "MapperResult", + "FunctionChain", + "ChainableFunction", +] diff --git a/chidian/core.py b/chidian/core.py new file mode 100644 index 0000000..ffa12ef --- /dev/null +++ b/chidian/core.py @@ -0,0 +1,101 @@ +""" +Core get/put functions for chidian data traversal and mutation. +""" + +import copy +from typing import Any, Callable + +from .lib.core_helpers import ( + apply_functions, + mutate_path, + traverse_path, + validate_mutation_path, +) +from .lib.parser import parse_path + + +def get( + source: dict | list, + key: str, + default: Any = None, + apply: Callable | list[Callable] | None = None, + strict: bool = False, +) -> Any: + """ + Extract values from nested data structures using path notation. + + Args: + source: Source data to traverse + key: Path string (e.g., "data.items[0].name") + default: Default value if path not found + apply: Function(s) to apply to the result + strict: If True, raise errors on missing paths + + Returns: + Value at path or default if not found + """ + try: + path = parse_path(key) + except ValueError as e: + if strict: + raise ValueError(f"Invalid path syntax: {key}") from e + return default + + try: + result = traverse_path(source, path, strict=strict) + except Exception: + if strict: + raise + result = None + + # Handle default value + if result is None and default is not None: + result = default + + # Apply functions if provided + if apply is not None and result is not None: + result = apply_functions(result, apply) + + return result + + +def put( + target: Any, + path: str, + value: Any, + strict: bool = False, +) -> Any: + """ + Set a value in a nested data structure, creating containers as needed. + + Args: + target: Target data structure to modify + path: Path string (e.g., "data.items[0].name") + value: Value to set + strict: If True, raise errors on invalid operations + + Returns: + Modified copy of the target data + """ + try: + parsed_path = parse_path(path) + except ValueError as e: + raise ValueError(f"Invalid path syntax: {path}") from e + + # Validate path for mutation + if not validate_mutation_path(parsed_path): + if strict: + raise ValueError(f"Invalid mutation path: {path}") + return target + + # Deep copy for copy-on-write semantics + result = copy.deepcopy(target) + + try: + mutate_path(result, parsed_path, value, strict=strict) + except Exception: + if strict: + raise + return target + + return result diff --git a/chidian/data_mapping.py b/chidian/data_mapping.py new file mode 100644 index 0000000..cfc9725 --- /dev/null +++ b/chidian/data_mapping.py @@ -0,0 +1,61 @@ +""" +DataMapping class for pure semantic transformation definitions. +""" + +from typing import Any, Callable, Dict, Optional, Type, TypeVar + +from pydantic import BaseModel + +# Define generic type variables bounded to BaseModel +_InModel = TypeVar("_InModel", bound=BaseModel) +_OutModel = TypeVar("_OutModel", bound=BaseModel) + + +class DataMapping: + """ + Pure semantic transformation definition. + Only defines WHAT to transform, not HOW to execute it. + """ + + def __init__( + self, + transformations: Dict[str, Callable[[dict], Any] | Any], + input_schema: Optional[Type[BaseModel]] = None, + output_schema: Optional[Type[BaseModel]] = None, + ): + """ + Initialize a semantic data mapping. + + Args: + transformations: Dict mapping output fields to transformations + input_schema: Optional Pydantic model for input validation + output_schema: Optional Pydantic model for output validation + """ + if not isinstance(transformations, dict): + raise TypeError( + f"Transformations must be dict, got {type(transformations).__name__}" + ) + + self.transformations = transformations + self.input_schema = input_schema + self.output_schema = output_schema + + def transform(self, data: dict) -> dict: + """ + Apply the pure transformation logic. + This is the core semantic transformation without any validation. + """ + result = {} + + for target_field, transform_spec in self.transformations.items(): + if callable(transform_spec): + result[target_field] = transform_spec(data) + else: + result[target_field] = transform_spec + + return result + + @property + def has_schemas(self) -> bool: + """Check if this mapping has any schemas defined.""" + return self.input_schema is not None or self.output_schema is not None diff --git a/chidian/lexicon.py b/chidian/lexicon.py new file mode 100644 index 0000000..5135151 --- /dev/null +++ b/chidian/lexicon.py @@ -0,0 +1,207 @@ +""" +Bidirectional string mapper for code/terminology translations. + +Primary use case: Medical code system mappings (e.g., LOINC ↔ SNOMED). +Supports both one-to-one and many-to-one relationships with automatic +reverse lookup generation. + +Examples: + Simple code mapping: + >>> loinc_to_snomed = Lexicon({'8480-6': '271649006'}) + >>> loinc_to_snomed['8480-6'] # Forward lookup + '271649006' + >>> loinc_to_snomed['271649006'] # Reverse lookup + '8480-6' + + Many-to-one mapping (first value is default): + >>> mapper = Lexicon({('LA6699-8', 'LA6700-4'): 'absent'}) + >>> mapper['absent'] # Returns first key as default + 'LA6699-8' +""" + +from typing import Optional, Union + + +class LexiconBuilder: + """Builder for creating Lexicon instances.""" + + def __init__(self) -> None: + self._mappings: dict[str, str] = {} + self._reverse_priorities: dict[str, str] = {} + self._default: Optional[str] = None + self._metadata: dict[str, str] = {} + + def add(self, key: str, value: str) -> "LexiconBuilder": + """Add a single key-value mapping.""" + if not isinstance(key, str) or not isinstance(value, str): + raise TypeError("Keys and values must be strings") + + self._mappings[key] = value + if value not in self._reverse_priorities: + self._reverse_priorities[value] = key + return self + + def add_many(self, keys: list[str], value: str) -> "LexiconBuilder": + """Add multiple keys that map to the same value.""" + if not isinstance(value, str): + raise TypeError("Value must be a string") + + for i, key in enumerate(keys): + if not isinstance(key, str): + raise TypeError("All keys must be strings") + self._mappings[key] = value + # First key is default for reverse + if i == 0 and value not in self._reverse_priorities: + self._reverse_priorities[value] = key + return self + + def set_primary_reverse(self, value: str, primary_key: str) -> "LexiconBuilder": + """Override which key is returned for reverse lookup of a value.""" + if primary_key not in self._mappings or self._mappings[primary_key] != value: + raise ValueError(f"Key '{primary_key}' must map to value '{value}'") + self._reverse_priorities[value] = primary_key + return self + + def set_default(self, default: str) -> "LexiconBuilder": + """Set default value for missing keys.""" + if not isinstance(default, str): + raise TypeError("Default must be a string") + self._default = default + return self + + def set_metadata(self, metadata: dict[str, str]) -> "LexiconBuilder": + """Set metadata for the lexicon.""" + self._metadata = metadata + return self + + def build(self) -> "Lexicon": + """Build and return the Lexicon instance.""" + lexicon = Lexicon.__new__(Lexicon) + super(Lexicon, lexicon).__init__(self._mappings) + lexicon._default = self._default + lexicon._reverse = self._reverse_priorities.copy() + lexicon.metadata = self._metadata + + return lexicon + + +class Lexicon(dict): + def __init__( + self, + mappings: dict[Union[str, tuple], str], + default: Optional[str] = None, + metadata: Optional[dict] = None, + ): + """ + Initialize a bidirectional string mapper. + + Args: + mappings: Dict of mappings. Keys can be strings or tuples (for many-to-one). + default: Default value to return for missing keys + metadata: Optional metadata about the mapping (version, source, etc.) + """ + # Process mappings to flatten tuples + flat_mappings = {} + reverse_priorities = {} + + for key, value in mappings.items(): + # Validate value type + if not isinstance(value, str): + raise TypeError("Values must be strings") + + if isinstance(key, tuple): + # Many-to-one mapping + if len(key) == 0: + raise ValueError("Empty tuple keys are not allowed") + + for i, k in enumerate(key): + if not isinstance(k, str): + raise TypeError("All keys in tuples must be strings") + flat_mappings[k] = value + # First element is default for reverse + if i == 0 and value not in reverse_priorities: + reverse_priorities[value] = k + else: + # One-to-one mapping + if not isinstance(key, str): + raise TypeError("Keys must be strings or tuples of strings") + flat_mappings[key] = value + if value not in reverse_priorities: + reverse_priorities[value] = key + + # Initialize dict with flat mappings + super().__init__(flat_mappings) + self._default = default + self._reverse = reverse_priorities + self.metadata = metadata or {} + + def __getitem__(self, key: str) -> str: + """ + Bidirectional lookup with dict syntax. + Scans keys first, then values. + """ + # Try forward lookup first (check in dict keys) + if super().__contains__(key): + return super().__getitem__(key) + + # Try reverse lookup + # First check if it's in our reverse priority mapping + if key in self._reverse: + return self._reverse[key] + + # If not in priority mapping, search all values + for k, v in self.items(): + if v == key: + return k + + # Check if we have a default value + if self._default is not None: + return self._default + + # Raise KeyError if not found and no default + raise KeyError(f"Key '{key}' not found") + + def get(self, key: str, default: Optional[str] = None) -> Optional[str]: # type: ignore[override] + """ + Safe bidirectional lookup with default. + Scans keys first, then values. + """ + # Try forward lookup first (check in dict keys) + if super().__contains__(key): + return super().__getitem__(key) + + # Try reverse lookup + # First check if it's in our reverse priority mapping + if key in self._reverse: + return self._reverse[key] + + # If not in priority mapping, search all values + for k, v in self.items(): + if v == key: + return k + + # Key doesn't exist, use provided default if given, otherwise instance default + return default if default is not None else self._default + + def __contains__(self, key: object) -> bool: + """Check if key exists in either forward or reverse mapping.""" + if isinstance(key, str): + return super().__contains__(key) or key in self._reverse + return False + + def forward(self, key: str) -> Optional[str]: + """Transform from source to target format.""" + return super().get(key) + + def reverse(self, key: str) -> Optional[str]: + """Transform from target back to source format.""" + return self._reverse.get(key) + + def can_reverse(self) -> bool: + """Lexicon always supports reverse transformation.""" + return True + + @classmethod + def builder(cls) -> LexiconBuilder: + """Create a new LexiconBuilder instance.""" + return LexiconBuilder() diff --git a/chidian/lib/__init__.py b/chidian/lib/__init__.py new file mode 100644 index 0000000..1c3ef15 --- /dev/null +++ b/chidian/lib/__init__.py @@ -0,0 +1,5 @@ +""" +Internal library modules for chidian. + +This package contains helper functions, parsers, and implementation details. +""" diff --git a/chidian/lib/core_helpers.py b/chidian/lib/core_helpers.py new file mode 100644 index 0000000..d03a5d2 --- /dev/null +++ b/chidian/lib/core_helpers.py @@ -0,0 +1,325 @@ +""" +Helper functions for core get/put operations. +""" + +from typing import Any, Callable + +from .parser import Path, PathSegment, PathSegmentType + + +def traverse_path(data: Any, path: Path, strict: bool = False) -> Any: + """Traverse data structure according to path.""" + current = [data] + + for segment in path.segments: + next_items: list[Any] = [] + + for item in current: + if item is None: + if strict: + raise ValueError("Cannot traverse None value") + next_items.append(None) + continue + + if segment.type == PathSegmentType.KEY: + assert isinstance(segment.value, str) + result = _traverse_key(item, segment.value, strict) + # Only extend if we applied key to a list of dicts + # (i.e., when item was a list and we distributed the key) + if isinstance(item, list) and isinstance(result, list): + next_items.extend(result) + else: + next_items.append(result) + + elif segment.type == PathSegmentType.INDEX: + assert isinstance(segment.value, int) + result = _traverse_index(item, segment.value, strict) + next_items.append(result) + + elif segment.type == PathSegmentType.SLICE: + assert isinstance(segment.value, tuple) + start, end = segment.value + result = _traverse_slice(item, start, end, strict) + next_items.append(result) + + elif segment.type == PathSegmentType.WILDCARD: + result = _traverse_wildcard(item, strict) + if isinstance(result, list): + next_items.extend(result) + else: + next_items.append(result) + + elif segment.type == PathSegmentType.TUPLE: + assert isinstance(segment.value, list) + result = _traverse_tuple(item, segment.value, strict) + next_items.append(result) + + current = next_items + + # Return single item if only one result + if len(current) == 1: + return current[0] + return current + + +def _traverse_key(data: Any, key: str, strict: bool) -> Any: + """Traverse a key in dict or list of dicts.""" + if isinstance(data, dict): + if key in data: + return data[key] + elif strict: + raise KeyError(f"Key '{key}' not found") + else: + return None + + elif isinstance(data, list): + # Apply key to each dict in list + results = [] + for item in data: + if isinstance(item, dict): + if key in item: + results.append(item[key]) + elif strict: + raise KeyError(f"Key '{key}' not found in list element") + else: + results.append(None) + elif strict: + raise TypeError("Expected dict in list but got different type") + else: + results.append(None) + return results + + elif strict: + raise TypeError("Expected dict but got different type") + else: + return None + + +def _traverse_index(data: Any, idx: int, strict: bool) -> Any: + """Traverse an index in a list.""" + if not isinstance(data, list): + if strict: + raise TypeError("Expected list but got different type") + return None + + # Handle negative indexing + length = len(data) + actual_idx = idx if idx >= 0 else length + idx + + if 0 <= actual_idx < length: + return data[actual_idx] + elif strict: + raise IndexError(f"Index {idx} out of range") + else: + return None + + +def _traverse_slice(data: Any, start: int | None, end: int | None, strict: bool) -> Any: + """Traverse a slice in a list.""" + if not isinstance(data, list): + if strict: + raise TypeError("Expected list but got different type") + return None + + # Python handles negative indices and None values in slices automatically + return data[start:end] + + +def _traverse_wildcard(data: Any, strict: bool) -> Any: + """Traverse all elements in a list.""" + if not isinstance(data, list): + if strict: + raise TypeError("Expected list but got different type") + return None + return data + + +def _traverse_tuple(data: Any, paths: list[Path], strict: bool) -> tuple: + """Traverse multiple paths and return as tuple.""" + results = [] + for path in paths: + result = traverse_path(data, path, strict=strict) + results.append(result) + return tuple(results) + + +def apply_functions(value: Any, functions: Callable | list[Callable]) -> Any: + """Apply a function or list of functions to a value.""" + if not isinstance(functions, list): + functions = [functions] + + current = value + for func in functions: + try: + current = func(current) + except Exception: + return None + + return current + + +def validate_mutation_path(path: Path) -> bool: + """Validate that a path is suitable for mutation operations.""" + if not path.segments: + return False + + # Path must start with a key (not an index) + if path.segments[0].type != PathSegmentType.KEY: + return False + + # Check for unsupported segment types + for segment in path.segments: + if segment.type in ( + PathSegmentType.WILDCARD, + PathSegmentType.SLICE, + PathSegmentType.TUPLE, + ): + return False + + return True + + +def mutate_path(data: Any, path: Path, value: Any, strict: bool = False) -> None: + """Mutate data in-place at the specified path.""" + if not path.segments: + raise ValueError("Empty path") + + # Navigate to parent of target + current = data + for i, segment in enumerate(path.segments[:-1]): + if segment.type == PathSegmentType.KEY: + assert isinstance(segment.value, str) + current = _ensure_key_container( + current, segment.value, path.segments, i, strict + ) + elif segment.type == PathSegmentType.INDEX: + assert isinstance(segment.value, int) + current = _ensure_index_container( + current, segment.value, path.segments, i, strict + ) + + # Set final value + final_segment = path.segments[-1] + if final_segment.type == PathSegmentType.KEY: + assert isinstance(final_segment.value, str) + if not isinstance(current, dict): + if strict: + raise TypeError(f"Cannot set key '{final_segment.value}' on non-dict") + return + current[final_segment.value] = value + + elif final_segment.type == PathSegmentType.INDEX: + assert isinstance(final_segment.value, int) + if not isinstance(current, list): + if strict: + raise TypeError(f"Cannot set index {final_segment.value} on non-list") + return + + idx = final_segment.value + # Expand list if needed for positive indices + if idx >= 0: + while len(current) <= idx: + current.append(None) + current[idx] = value + else: + # Negative index + actual_idx = len(current) + idx + if actual_idx < 0: + if strict: + raise IndexError(f"Index {idx} out of range") + else: + current[actual_idx] = value + + +def _ensure_key_container( + current: Any, key: str, segments: list[PathSegment], index: int, strict: bool +) -> Any: + """Ensure a dict exists at key, creating if needed.""" + if not isinstance(current, dict): + if strict: + raise TypeError(f"Cannot traverse into non-dict at '{key}'") + return current + + # Determine what type of container we need + next_segment = segments[index + 1] + container_type = _determine_container_type(next_segment) + + if key not in current: + # Create appropriate container + if container_type == "list": + current[key] = [] + else: + current[key] = {} + else: + # Validate existing container type + existing = current[key] + if container_type == "list" and not isinstance(existing, list): + if strict: + raise TypeError( + f"Expected list at '{key}' but found {type(existing).__name__}" + ) + current[key] = [] + elif container_type == "dict" and not isinstance(existing, dict): + if strict: + raise TypeError( + f"Expected dict at '{key}' but found {type(existing).__name__}" + ) + current[key] = {} + + return current[key] + + +def _ensure_index_container( + current: Any, idx: int, segments: list[PathSegment], index: int, strict: bool +) -> Any: + """Ensure a list exists and has capacity for index.""" + if not isinstance(current, list): + if strict: + raise TypeError("Cannot index into non-list") + return current + + # Handle negative indexing + actual_idx = idx if idx >= 0 else len(current) + idx + if actual_idx < 0: + if strict: + raise IndexError(f"Index {idx} out of range") + return current + + # Expand list if needed + while len(current) <= actual_idx: + current.append(None) + + # Determine container type for this index + next_segment = segments[index + 1] + container_type = _determine_container_type(next_segment) + + if current[actual_idx] is None: + # Create appropriate container + if container_type == "list": + current[actual_idx] = [] + else: + current[actual_idx] = {} + else: + # Validate existing container type + existing = current[actual_idx] + if container_type == "list" and not isinstance(existing, list): + if strict: + raise TypeError( + f"Expected list at index {idx} but found {type(existing).__name__}" + ) + current[actual_idx] = [] + elif container_type == "dict" and not isinstance(existing, dict): + if strict: + raise TypeError( + f"Expected dict at index {idx} but found {type(existing).__name__}" + ) + current[actual_idx] = {} + + return current[actual_idx] + + +def _determine_container_type(segment: PathSegment) -> str: + """Determine whether we need a dict or list container.""" + if segment.type == PathSegmentType.INDEX: + return "list" + return "dict" diff --git a/chidian/lib/data_mapping_helpers.py b/chidian/lib/data_mapping_helpers.py new file mode 100644 index 0000000..f6ead4d --- /dev/null +++ b/chidian/lib/data_mapping_helpers.py @@ -0,0 +1,58 @@ +""" +Helper functions for DataMapping validation and processing. +""" + +from typing import Any, Type, TypeVar + +from pydantic import BaseModel + +# Define generic type variables bounded to BaseModel +_InModel = TypeVar("_InModel", bound=BaseModel) +_OutModel = TypeVar("_OutModel", bound=BaseModel) + + +def validate_schemas(input_schema: Type, output_schema: Type) -> None: + """Validate that schemas are Pydantic BaseModel classes.""" + if not is_pydantic_model(input_schema): + raise TypeError( + f"input_schema must be a Pydantic BaseModel, got {type(input_schema)}" + ) + if not is_pydantic_model(output_schema): + raise TypeError( + f"output_schema must be a Pydantic BaseModel, got {type(output_schema)}" + ) + + +def is_pydantic_model(model_class: Type) -> bool: + """Check if a class is a Pydantic BaseModel.""" + try: + return ( + isinstance(model_class, type) + and issubclass(model_class, BaseModel) + and hasattr(model_class, "model_fields") + ) + except TypeError: + return False + + +def validate_input(data: Any, input_schema: Type[_InModel]) -> _InModel: + """Validate input data against input schema.""" + if isinstance(data, input_schema): + return data # type: ignore[return-value] + + # Try to convert dict to model + if isinstance(data, dict): + return input_schema.model_validate(data) # type: ignore[return-value] + + # Try direct validation + return input_schema.model_validate(data) # type: ignore[return-value] + + +def to_dict(model: _InModel) -> dict[str, Any]: + """Convert Pydantic model to dictionary.""" + return model.model_dump() + + +def validate_output(data: dict[str, Any], output_schema: Type[_OutModel]) -> _OutModel: + """Validate output data against output schema.""" + return output_schema.model_validate(data) # type: ignore[return-value] diff --git a/chidian/lib/dsl/filter.peg b/chidian/lib/dsl/filter.peg new file mode 100644 index 0000000..cac9c08 --- /dev/null +++ b/chidian/lib/dsl/filter.peg @@ -0,0 +1,61 @@ +# === Table Filter DSL === +# NOTE: Assume whitespace is removed beforehand + +filter_expr = or_expr + +# === Logical Expressions === +or_expr = and_expr (whitespace or_op whitespace and_expr)* +and_expr = comparison (whitespace and_op whitespace comparison)* + +# === Comparisons === +comparison = path whitespace compare_op whitespace value + +# === Operators === +compare_op = lte / gte / lt / gt / ne / eq / contains / in_op +or_op = ~"(?i)OR" +and_op = ~"(?i)AND" + +# Order matters for these (>= before >, <= before <, != before =) +lte = "<=" +gte = ">=" +lt = "<" +gt = ">" +ne = "!=" +eq = "=" +contains = ~"(?i)CONTAINS" +in_op = ~"(?i)IN" + +# === Path Expression === +# Reuse path syntax from select +path = nested_path / simple_name +nested_path = simple_name (dot path_segment)+ +path_segment = simple_name array_index? +array_index = lbrack index_content rbrack +index_content = number / star + +# === Values === +value = string / number / boolean / null / list_value +string = single_quoted / double_quoted +single_quoted = single_quote string_content_single single_quote +double_quoted = double_quote string_content_double double_quote +string_content_single = ~"[^']*" +string_content_double = ~"[^\"]*" +boolean = true / false +true = ~"(?i)true" +false = ~"(?i)false" +null = ~"(?i)null" / ~"(?i)none" +list_value = lbrack (value (comma value)*)? rbrack + +# === Primitives === +lbrack = "[" +rbrack = "]" +comma = "," +dot = "." +star = "*" +single_quote = "'" +double_quote = "\"" +whitespace = ~"\\s+" + +# === Lexemes === +simple_name = ~"[a-zA-Z_][a-zA-Z0-9_-]*" +number = ~"-?[0-9]+(\\.[0-9]+)?" diff --git a/chidian/lib/dsl/get.peg b/chidian/lib/dsl/get.peg new file mode 100644 index 0000000..bf7effd --- /dev/null +++ b/chidian/lib/dsl/get.peg @@ -0,0 +1,30 @@ +# === Get DSL === +# NOTE: Assume whitespace is removed beforehand +get_expr = (array_access / key) (dot key)* +key = (list_op / single / tuple) +array_access = single_index / multi_index + +# === Actionable Units === +single = name single_index? +list_op = name? multi_index +tuple = lparen nested_expr (comma nested_expr)* rparen + +# === Intermediate Representation === +single_index = lbrack number rbrack +multi_index = lbrack (star / slice) rbrack +slice = number? colon number? +nested_expr = key (dot key)* # Re-defining so can handle separately + +# === Primitives === +lbrack = "[" +rbrack = "]" +lparen = "(" +rparen = ")" +comma = "," +colon = ":" +dot = "." +star = "*" + +# === Lexemes === +name = ~"[a-zA-Z_][a-zA-Z0-9_-]*" +number = ~"-?[0-9]+" diff --git a/chidian/lib/dsl/select.peg b/chidian/lib/dsl/select.peg new file mode 100644 index 0000000..c785314 --- /dev/null +++ b/chidian/lib/dsl/select.peg @@ -0,0 +1,32 @@ +# === Table Select DSL === +# NOTE: Assume whitespace is removed beforehand + +select_expr = star / column_list + +# === Column Lists === +column_list = column_spec (whitespace? comma whitespace? column_spec)* + +# === Column Specification === +column_spec = path (whitespace? rename_op)? +rename_op = arrow whitespace? name + +# === Path Expression === +# Reuse existing get.peg path syntax but simplified +path = nested_path / simple_name +nested_path = simple_name (dot path_segment)+ +path_segment = simple_name array_index? +array_index = lbrack (number / star) rbrack + +# === Primitives === +lbrack = "[" +rbrack = "]" +comma = "," +arrow = "->" +dot = "." +star = "*" +whitespace = ~"\\s+" + +# === Lexemes === +simple_name = ~"[a-zA-Z_][a-zA-Z0-9_-]*" +name = ~"[a-zA-Z_][a-zA-Z0-9_-]*" +number = ~"-?[0-9]+" diff --git a/chidian/lib/filter_parser.py b/chidian/lib/filter_parser.py new file mode 100644 index 0000000..9ddb6c9 --- /dev/null +++ b/chidian/lib/filter_parser.py @@ -0,0 +1,270 @@ +""" +Parser for Table filter DSL expressions. +""" + +from pathlib import Path as PathLib +from typing import Any, Callable, List, Union + +from parsimonious import Grammar, NodeVisitor +from parsimonious.nodes import Node + +from ..core import get + +# Load the PEG grammar +FILTER_PEG_PATH = PathLib(__file__).parent / "dsl" / "filter.peg" + +with open(FILTER_PEG_PATH, "r") as f: + FILTER_GRAMMAR_TEXT = f.read() + +FILTER_GRAMMAR = Grammar(FILTER_GRAMMAR_TEXT) + + +class FilterVisitor(NodeVisitor): + """Transforms filter DSL parse tree into callable predicates.""" + + def visit_filter_expr( + self, node: Node, visited_children: List[Any] + ) -> Callable[[dict], bool]: + """Process the root filter expression.""" + return visited_children[0] + + def visit_or_expr( + self, node: Node, visited_children: List[Any] + ) -> Callable[[dict], bool]: + """Process OR expressions.""" + first_expr, rest = visited_children + + if not rest: + return first_expr + + # Build OR chain + def or_predicate(row: dict) -> bool: + if first_expr(row): + return True + for or_part in rest: + # Extract expr from: whitespace or_op whitespace and_expr + expr = or_part[3] if len(or_part) > 3 else or_part[-1] + if expr(row): + return True + return False + + return or_predicate + + def visit_and_expr( + self, node: Node, visited_children: List[Any] + ) -> Callable[[dict], bool]: + """Process AND expressions.""" + first_comp, rest = visited_children + + if not rest: + return first_comp + + # Build AND chain + def and_predicate(row: dict) -> bool: + if not first_comp(row): + return False + for and_part in rest: + # Extract comp from: whitespace and_op whitespace comparison + comp = and_part[3] if len(and_part) > 3 else and_part[-1] + if not comp(row): + return False + return True + + return and_predicate + + def visit_comparison( + self, node: Node, visited_children: List[Any] + ) -> Callable[[dict], bool]: + """Process a single comparison.""" + # Extract path, op, value from: path whitespace op whitespace value + path = visited_children[0] + op = visited_children[2] + value = visited_children[4] + + def compare(row: dict) -> bool: + try: + row_value = get(row, path) + + # Handle different operators + if op == "=": + return row_value == value + elif op == "!=": + return row_value != value + elif op == ">": + return row_value > value + elif op == "<": + return row_value < value + elif op == ">=": + return row_value >= value + elif op == "<=": + return row_value <= value + elif op == "CONTAINS": + # String contains or list contains + if isinstance(row_value, str) and isinstance(value, str): + return value in row_value + elif isinstance(row_value, list): + return value in row_value + return False + elif op == "IN": + # Value in list + return row_value in value if isinstance(value, list) else False + + return False + except Exception: + # Path not found or comparison failed + return False + + return compare + + def visit_compare_op(self, node: Node, visited_children: List[Any]) -> str: + """Process comparison operator.""" + op = visited_children[0] + # Normalize to uppercase for CONTAINS/IN + if isinstance(op, str) and op.upper() in ["CONTAINS", "IN"]: + return op.upper() + return op + + def visit_path(self, node: Node, visited_children: List[Any]) -> str: + """Process a path expression.""" + result = visited_children[0] + if isinstance(result, list): + return result[0] + return result + + def visit_nested_path(self, node: Node, visited_children: List[Any]) -> str: + """Process a nested path.""" + base_name, segments = visited_children + parts = [base_name] + + for dot_segment in segments: + _, segment = dot_segment + parts.append(segment) + + return ".".join(parts) + + def visit_path_segment(self, node: Node, visited_children: List[Any]) -> str: + """Process a path segment.""" + name, array_index = visited_children + + if array_index: + [index_str] = array_index + return f"{name}{index_str}" + + return name + + def visit_array_index(self, node: Node, visited_children: List[Any]) -> str: + """Process array index.""" + lbrack, index_content, rbrack = visited_children + return f"[{index_content}]" + + def visit_index_content(self, node: Node, visited_children: List[Any]) -> str: + """Process index content.""" + return visited_children[0] + + def visit_value(self, node: Node, visited_children: List[Any]) -> Any: + """Process a value.""" + return visited_children[0] + + def visit_string(self, node: Node, visited_children: List[Any]) -> str: + """Process string value.""" + # Either single_quoted or double_quoted + return visited_children[0] + + def visit_single_quoted(self, node: Node, visited_children: List[Any]) -> str: + """Process single quoted string.""" + _, content, _ = visited_children + return content + + def visit_double_quoted(self, node: Node, visited_children: List[Any]) -> str: + """Process double quoted string.""" + _, content, _ = visited_children + return content + + def visit_string_content_single( + self, node: Node, visited_children: List[Any] + ) -> str: + """Process single quoted string content.""" + return node.text + + def visit_string_content_double( + self, node: Node, visited_children: List[Any] + ) -> str: + """Process double quoted string content.""" + return node.text + + def visit_number( + self, node: Node, visited_children: List[Any] + ) -> Union[int, float]: + """Process numeric value.""" + text = node.text + if "." in text: + return float(text) + return int(text) + + def visit_boolean(self, node: Node, visited_children: List[Any]) -> bool: + """Process boolean value.""" + value = visited_children[0] + return value.upper() == "TRUE" + + def visit_null(self, node: Node, visited_children: List[Any]) -> None: + """Process null value.""" + return None + + def visit_list_value(self, node: Node, visited_children: List[Any]) -> List[Any]: + """Process list value.""" + lbrack, content, rbrack = visited_children + + if not content: + return [] + + [values] = content + if not isinstance(values, list): + return [values] + + # Extract first value and rest + result = [] + if len(values) >= 1: + result.append(values[0]) + + if len(values) > 1 and values[1]: + for comma_value in values[1]: + _, value = comma_value + result.append(value) + + return result + + def visit_simple_name(self, node: Node, visited_children: List[Any]) -> str: + """Process a simple name.""" + return node.text + + def generic_visit(self, node: Node, visited_children: List[Any]) -> Any: + """Default handler.""" + return visited_children or node.text + + +def parse_filter(expr: str) -> Callable[[dict], bool]: + """ + Parse a filter expression into a callable predicate. + + Args: + expr: The filter expression (e.g., "age > 25 AND city = 'NYC'") + + Returns: + A callable that takes a dict and returns bool + + Examples: + >>> predicate = parse_filter("age > 25") + >>> predicate({"age": 30}) + True + >>> predicate({"age": 20}) + False + """ + # Remove extra whitespace but preserve spaces in operators + clean_expr = " ".join(expr.split()) + + if not clean_expr: + raise ValueError("Empty filter expression") + + tree = FILTER_GRAMMAR.parse(clean_expr) + visitor = FilterVisitor() + return visitor.visit(tree) diff --git a/chidian/lib/get_dsl_parser.py b/chidian/lib/get_dsl_parser.py new file mode 100644 index 0000000..9c9f567 --- /dev/null +++ b/chidian/lib/get_dsl_parser.py @@ -0,0 +1,247 @@ +""" +DSL parser using PEG grammar for chidian path expressions. +""" + +from pathlib import Path as PathLib +from typing import Any, List, Sequence, Union + +from parsimonious import Grammar, NodeVisitor +from parsimonious.nodes import Node + +from .parser import Path, PathSegment + +# Load the PEG grammar +GET_PEG_PATH = PathLib(__file__).parent / "dsl" / "get.peg" + +with open(GET_PEG_PATH, "r") as f: + GRAMMAR_TEXT = f.read() + +GET_DSL_GRAMMAR = Grammar(GRAMMAR_TEXT) + + +GetDslTreeResults = Union[str, int, slice, tuple, List[Any]] + + +def flatten_sequence(seq: Sequence[Any]) -> List[Any]: + """Flatten a nested sequence into a single list.""" + result = [] + for item in seq: + if isinstance(item, (list, tuple)) and not isinstance(item, str): + result.extend(flatten_sequence(item)) + else: + result.append(item) + return result + + +class GetDSLVisitor(NodeVisitor): + """ + Generates tree structure for path parsing using PEG grammar. + """ + + def visit_get_expr( + self, node: Node, visited_children: Sequence[Any] + ) -> List[PathSegment]: + """Entrypoint: handles full expression like 'a[0].b[*].c'""" + segments = [] + + # Collect all segments from the expression + for child in visited_children: + if child is None: + continue + elif isinstance(child, list): + # Flatten lists of segments + for item in child: + if isinstance(item, PathSegment): + segments.append(item) + elif isinstance(item, list): + segments.extend(s for s in item if isinstance(s, PathSegment)) + elif isinstance(child, PathSegment): + segments.append(child) + + return segments + + def visit_key( + self, node: Node, visited_children: Sequence[Any] + ) -> Union[PathSegment, List[PathSegment]]: + """Handle key which can be single, list_op, or tuple""" + return visited_children[0] + + def visit_single( + self, node: Node, visited_children: Sequence[Any] + ) -> List[PathSegment]: + """Handle single key expressions like 'a[0]'""" + segments = [] + + # visited_children: [name, single_index?] + name = visited_children[0] + if isinstance(name, PathSegment): + segments.append(name) + + # Add index if present + if len(visited_children) > 1 and visited_children[1] is not None: + index_segment = visited_children[1] + if isinstance(index_segment, PathSegment): + segments.append(index_segment) + + return segments + + def visit_list_op( + self, node: Node, visited_children: Sequence[Any] + ) -> List[PathSegment]: + """Handles expression meant to be applied on a list, e.g. `a[*]` or `[:1]`""" + segments = [] + + # visited_children: [name?, multi_index] + name = visited_children[0] + if name is not None and isinstance(name, PathSegment): + segments.append(name) + + # Multi-index is always present + multi_index = visited_children[1] + if isinstance(multi_index, PathSegment): + segments.append(multi_index) + + return segments + + def visit_tuple(self, node: Node, visited_children: Sequence[Any]) -> PathSegment: + """Handle tuple expressions like '(a,b,c)'""" + # Extract nested expressions from the tuple + paths = [] + + # Find all string expressions in the visited children + for child in visited_children: + if isinstance(child, str): + path_segments = _parse_simple_path(child) + paths.append(Path(path_segments)) + elif isinstance(child, list): + # Handle comma-separated expressions + for item in child: + if isinstance(item, str): + path_segments = _parse_simple_path(item) + paths.append(Path(path_segments)) + + return PathSegment.tuple(paths) + + def visit_array_access( + self, node: Node, visited_children: Sequence[Any] + ) -> PathSegment: + """Handle array access at the start of a path like '[0]' or '[*]'""" + # visited_children[0] is either single_index or multi_index + return visited_children[0] + + def visit_single_index( + self, node: Node, visited_children: Sequence[Any] + ) -> PathSegment: + """Handle index expressions like '[0]'""" + # visited_children = [lbrack, number, rbrack] + return PathSegment.index(visited_children[1]) + + def visit_multi_index( + self, node: Node, visited_children: Sequence[Any] + ) -> PathSegment: + """Handles index expressions '[*]' and slices like '[1:]'""" + # visited_children = [lbrack, (star | slice), rbrack] + content = visited_children[1] + if content == "*": + return PathSegment.wildcard() + elif isinstance(content, slice): + return PathSegment.slice(content.start, content.stop) + else: + raise ValueError(f"Unexpected multi_index content: {content}") + + def visit_slice(self, node: Node, visited_children: Sequence[Any]) -> slice: + """Handle slice notation like '[1:10]' or '[:]'""" + # visited_children = [start?, colon, stop?] + start = visited_children[0] if visited_children[0] is not None else None + stop = visited_children[2] if visited_children[2] is not None else None + return slice(start, stop) + + def visit_nested_expr(self, node: Node, visited_children: Sequence[Any]) -> str: + """Handle nested expressions in tuples""" + return node.text + + def visit_name(self, node: Node, visited_children: Sequence[Any]) -> PathSegment: + """Handle identifiers like 'a', 'b', 'c'""" + return PathSegment.key(node.text) + + def visit_number(self, node: Node, visited_children: Sequence[Any]) -> int: + """Handle numbers like '0', '-1'""" + return int(node.text) + + def visit_star(self, node: Node, visited_children: Sequence[Any]) -> str: + """Handle wildcard '*'""" + return "*" + + def generic_visit( + self, node: Node, visited_children: Sequence[Any] + ) -> Union[Sequence[Any], Any, None]: + """Default handler for unspecified rules""" + # Filter out None values and flatten + filtered = [child for child in visited_children if child is not None] + + if len(filtered) > 1: + return filtered + elif len(filtered) == 1: + return filtered[0] + else: + return None + + +def parse_path_peg(path_str: str) -> Path: + """Parse a path string into a Path object using PEG grammar.""" + if not path_str: + raise ValueError("Empty path") + + # Remove whitespace and parse + clean_path = path_str.replace(" ", "") + + try: + parsed_tree = GET_DSL_GRAMMAR.parse(clean_path) + segments = GetDSLVisitor().visit(parsed_tree) + + if isinstance(segments, list): + return Path(segments) + else: + return Path([segments]) + except Exception as e: + raise ValueError(f"Parse error: {e}") from e + + +# For recursive parsing in tuples, avoid infinite recursion +def _parse_simple_path(path_str: str) -> List[PathSegment]: + """Simple path parsing for use within tuples to avoid recursion.""" + if not path_str: + return [] + + # For tuple contents, use a simpler approach + parts = path_str.split(".") + segments = [] + + for part in parts: + # Check for array notation + if "[" in part and "]" in part: + # Extract key and index/slice + key_part = part[: part.index("[")] + bracket_part = part[part.index("[") : part.rindex("]") + 1] + + if key_part: + segments.append(PathSegment.key(key_part)) + + # Parse bracket content + bracket_content = bracket_part[1:-1] # Remove [ ] + if bracket_content == "*": + segments.append(PathSegment.wildcard()) + elif ":" in bracket_content: + # Slice + parts = bracket_content.split(":") + start = int(parts[0]) if parts[0] else None + end = int(parts[1]) if parts[1] else None + segments.append(PathSegment.slice(start, end)) + else: + # Index + segments.append(PathSegment.index(int(bracket_content))) + else: + # Simple key + segments.append(PathSegment.key(part)) + + return segments diff --git a/chidian/lib/parser.py b/chidian/lib/parser.py new file mode 100644 index 0000000..50ad348 --- /dev/null +++ b/chidian/lib/parser.py @@ -0,0 +1,65 @@ +""" +Path parser for chidian path expressions - now using PEG grammar. + +This module provides the core data structures and exports the PEG parser +as the primary parser implementation. +""" + +from dataclasses import dataclass +from enum import Enum, auto +from typing import List, Optional, Union + + +class PathSegmentType(Enum): + KEY = auto() + INDEX = auto() + SLICE = auto() + WILDCARD = auto() + TUPLE = auto() + + +@dataclass +class PathSegment: + """Represents a single segment in a path.""" + + type: PathSegmentType + value: Union[str, int, tuple[Optional[int], Optional[int]], List["Path"]] + + @classmethod + def key(cls, name: str) -> "PathSegment": + return cls(PathSegmentType.KEY, name) + + @classmethod + def index(cls, idx: int) -> "PathSegment": + return cls(PathSegmentType.INDEX, idx) + + @classmethod + def slice(cls, start: Optional[int], end: Optional[int]) -> "PathSegment": + return cls(PathSegmentType.SLICE, (start, end)) + + @classmethod + def wildcard(cls) -> "PathSegment": + return cls(PathSegmentType.WILDCARD, "*") + + @classmethod + def tuple(cls, paths: List["Path"]) -> "PathSegment": + return cls(PathSegmentType.TUPLE, paths) + + +@dataclass +class Path: + """Represents a parsed path expression.""" + + segments: List[PathSegment] + + +# Export the PEG parser as the main parser +try: + from .get_dsl_parser import parse_path_peg as parse_path +except ImportError: + # Fallback if PEG parser isn't available + def parse_path(path_str: str) -> Path: + raise NotImplementedError("PEG parser not available") + + +__all__ = ["Path", "PathSegment", "PathSegmentType", "parse_path"] diff --git a/chidian/lib/select_parser.py b/chidian/lib/select_parser.py new file mode 100644 index 0000000..41e11f1 --- /dev/null +++ b/chidian/lib/select_parser.py @@ -0,0 +1,171 @@ +""" +Parser for Table select DSL expressions. +""" + +from pathlib import Path as PathLib +from typing import Any, List, Optional, Union + +from parsimonious import Grammar, NodeVisitor +from parsimonious.nodes import Node + +# Load the PEG grammar +SELECT_PEG_PATH = PathLib(__file__).parent / "dsl" / "select.peg" + +with open(SELECT_PEG_PATH, "r") as f: + SELECT_GRAMMAR_TEXT = f.read() + +SELECT_GRAMMAR = Grammar(SELECT_GRAMMAR_TEXT) + + +class ColumnSpec: + """Represents a single column specification in a select expression.""" + + def __init__(self, path: str, rename_to: Optional[str] = None): + self.path = path + self.rename_to = rename_to + + def __repr__(self): + if self.rename_to: + return f"ColumnSpec({self.path!r} -> {self.rename_to!r})" + return f"ColumnSpec({self.path!r})" + + +class SelectVisitor(NodeVisitor): + """Transforms select DSL parse tree into column specifications.""" + + def visit_select_expr( + self, node: Node, visited_children: List[Any] + ) -> Union[str, List[ColumnSpec]]: + """Process the root select expression.""" + # Either star or column_list + return visited_children[0] + + def visit_star(self, node: Node, visited_children: List[Any]) -> str: + """Handle wildcard selection.""" + return "*" + + def visit_column_list( + self, node: Node, visited_children: List[Any] + ) -> List[ColumnSpec]: + """Process a list of column specifications.""" + first_spec, rest = visited_children + specs = [first_spec] + + if rest: + for comma_group in rest: + # Extract the spec from the group (might have whitespace) + spec = None + for item in comma_group: + if isinstance(item, ColumnSpec): + spec = item + break + if spec: + specs.append(spec) + + return specs + + def visit_column_spec(self, node: Node, visited_children: List[Any]) -> ColumnSpec: + """Process a single column specification.""" + path, rename_op = visited_children + rename_to = None + + if rename_op and rename_op[0]: # Check if rename_op exists and isn't empty + # Extract the actual rename value from the nested structure + if isinstance(rename_op[0], list): + # It's wrapped in a list, extract from it + for item in rename_op[0]: + if isinstance(item, str) and item not in [" ", "\t", "\n", "->"]: + rename_to = item + break + elif isinstance(rename_op[0], str): + rename_to = rename_op[0] + + return ColumnSpec(path, rename_to) + + def visit_rename_op(self, node: Node, visited_children: List[Any]) -> str: + """Process rename operation.""" + # Extract name from arrow, possible whitespace, name + for item in visited_children: + if isinstance(item, str) and item not in ["->", " ", "\t", "\n"]: + return item + return visited_children[-1] # Fallback to last item + + def visit_path(self, node: Node, visited_children: List[Any]) -> str: + """Process a path expression.""" + # Can be nested_path or simple_name + result = visited_children[0] + if isinstance(result, list): + # It's a simple_name wrapped in a list + return result[0] + return result + + def visit_nested_path(self, node: Node, visited_children: List[Any]) -> str: + """Process a nested path like 'user.profile.name'.""" + base_name, segments = visited_children + parts = [base_name] + + for dot_segment in segments: + _, segment = dot_segment + parts.append(segment) + + return ".".join(parts) + + def visit_path_segment(self, node: Node, visited_children: List[Any]) -> str: + """Process a path segment with optional array index.""" + name, array_index = visited_children + + if array_index: + [index_str] = array_index + return f"{name}{index_str}" + + return name + + def visit_array_index(self, node: Node, visited_children: List[Any]) -> str: + """Process array index notation.""" + lbrack, index_or_star, rbrack = visited_children + return f"[{index_or_star}]" + + def visit_simple_name(self, node: Node, visited_children: List[Any]) -> str: + """Process a simple name.""" + return node.text + + def visit_name(self, node: Node, visited_children: List[Any]) -> str: + """Process a name (for rename targets).""" + return node.text + + def visit_number(self, node: Node, visited_children: List[Any]) -> str: + """Process a number.""" + return node.text + + def generic_visit(self, node: Node, visited_children: List[Any]) -> Any: + """Default handler.""" + return visited_children or node.text + + +def parse_select(expr: str) -> Union[str, List[ColumnSpec]]: + """ + Parse a select expression into column specifications. + + Args: + expr: The select expression (e.g., "name, age -> years, address.city") + + Returns: + Either "*" for wildcard or a list of ColumnSpec objects + + Examples: + >>> parse_select("*") + "*" + >>> parse_select("name") + [ColumnSpec('name')] + >>> parse_select("patient.id -> patient_id, status") + [ColumnSpec('patient.id' -> 'patient_id'), ColumnSpec('status')] + """ + # Trim but preserve internal spaces for proper parsing + clean_expr = expr.strip() + + if not clean_expr: + raise ValueError("Empty select expression") + + tree = SELECT_GRAMMAR.parse(clean_expr) + visitor = SelectVisitor() + return visitor.visit(tree) diff --git a/chidian/mapper.py b/chidian/mapper.py new file mode 100644 index 0000000..e07779e --- /dev/null +++ b/chidian/mapper.py @@ -0,0 +1,263 @@ +from dataclasses import dataclass +from enum import Enum +from typing import Any, List, Optional + +from pydantic import ValidationError + +""" +Mapper class - execution engine for DataMapping with validation strategies. + +The Mapper class takes a DataMapping and executes it with different validation modes: +- STRICT: Validate and throw errors +- FLEXIBLE: Validate but continue on errors, collecting issues +- AUTO: Use strict if schemas present, flexible otherwise + +Also contains special types for transformation control (DROP, KEEP). +""" + + +class ValidationMode(Enum): + """Validation modes for mapper execution.""" + + STRICT = "strict" # Validate and throw errors + FLEXIBLE = "flexible" # Validate but continue on errors + AUTO = "auto" # Strict if schemas present, flexible otherwise + + +@dataclass +class ValidationIssue: + """Represents a validation issue in flexible mode.""" + + stage: str # "input" or "output" + field: Optional[str] + error: str + value: Any + + +class MapperResult: + """Result of a mapping operation, potentially with validation issues.""" + + def __init__(self, data: Any, issues: Optional[List[ValidationIssue]] = None): + self.data = data + self.issues = issues or [] + + @property + def has_issues(self) -> bool: + return len(self.issues) > 0 + + def raise_if_issues(self): + """Raise an exception if there are validation issues.""" + if self.has_issues: + messages = [f"{i.stage}: {i.error}" for i in self.issues] + raise ValidationError(f"Validation issues: {'; '.join(messages)}") + + +class Mapper: + """ + Execution engine for DataMapping with validation strategies. + """ + + def __init__( + self, + data_mapping_or_dict, # DataMapping or dict for backward compatibility + mode: ValidationMode = ValidationMode.AUTO, + collect_all_errors: bool = True, + ): + """ + Initialize a Mapper with a DataMapping and execution mode. + + Args: + data_mapping_or_dict: DataMapping instance or dict for backward compatibility + mode: Validation mode (strict, flexible, or auto) + collect_all_errors: In flexible mode, whether to collect all errors + """ + # Import here to avoid circular dependency + from .data_mapping import DataMapping + + # Backward compatibility: if dict is passed, create a DataMapping + if isinstance(data_mapping_or_dict, dict): + self.data_mapping = DataMapping(transformations=data_mapping_or_dict) + self._backward_compat = True + elif isinstance(data_mapping_or_dict, DataMapping): + self.data_mapping = data_mapping_or_dict + self._backward_compat = False + else: + raise TypeError( + f"Expected DataMapping or dict, got {type(data_mapping_or_dict).__name__}" + ) + + self.collect_all_errors = collect_all_errors + + # Determine actual mode + if mode == ValidationMode.AUTO: + self.mode = ( + ValidationMode.STRICT + if self.data_mapping.has_schemas + else ValidationMode.FLEXIBLE + ) + else: + self.mode = mode + + def __call__(self, data: Any) -> Any | MapperResult: + """ + Execute the mapping with the configured validation mode. + + Returns: + - In strict mode: The transformed data (raises on validation errors) + - In flexible mode: MapperResult with data and any validation issues + - In backward compat mode with dict: Always returns dict + """ + # Backward compatibility mode - always return dict + if self._backward_compat and not self.data_mapping.has_schemas: + return self.data_mapping.transform(data) + + if self.mode == ValidationMode.STRICT: + return self._execute_strict(data) + else: + return self._execute_flexible(data) + + def _execute_strict(self, data: Any) -> Any: + """Execute with strict validation - raise on any errors.""" + # Import helpers here to avoid circular dependency + from .lib.data_mapping_helpers import to_dict, validate_input, validate_output + + # Validate input if schema provided + if self.data_mapping.input_schema: + validated_input = validate_input(data, self.data_mapping.input_schema) + input_dict = to_dict(validated_input) + else: + input_dict = to_dict(data) if hasattr(data, "model_dump") else data + + # Apply transformation + output_dict = self.data_mapping.transform(input_dict) + + # Validate output if schema provided + if self.data_mapping.output_schema: + return validate_output(output_dict, self.data_mapping.output_schema) + return output_dict + + def _execute_flexible(self, data: Any) -> MapperResult: + """Execute with flexible validation - collect errors but continue.""" + # Import helpers here to avoid circular dependency + from .lib.data_mapping_helpers import to_dict, validate_input, validate_output + + issues = [] + + # Try to validate input + input_dict = None + if self.data_mapping.input_schema: + try: + validated_input = validate_input(data, self.data_mapping.input_schema) + input_dict = to_dict(validated_input) + except ValidationError as e: + # Collect input validation errors + for error in e.errors(): + issues.append( + ValidationIssue( + stage="input", + field=".".join(str(loc) for loc in error["loc"]), + error=error["msg"], + value=error.get("input"), + ) + ) + # Continue with raw data + input_dict = to_dict(data) if hasattr(data, "model_dump") else data + else: + input_dict = to_dict(data) if hasattr(data, "model_dump") else data + + # Apply transformation (might fail if input validation failed) + try: + output_dict = self.data_mapping.transform(input_dict) + except Exception as e: + # If transformation fails, return with error + issues.append( + ValidationIssue( + stage="transform", field=None, error=str(e), value=input_dict + ) + ) + return MapperResult(None, issues) + + # Try to validate output + final_output: Any = output_dict + if self.data_mapping.output_schema: + try: + final_output = validate_output( + output_dict, self.data_mapping.output_schema + ) + except ValidationError as e: + # Collect output validation errors + for error in e.errors(): + issues.append( + ValidationIssue( + stage="output", + field=".".join(str(loc) for loc in error["loc"]), + error=error["msg"], + value=error.get("input"), + ) + ) + # Return raw output dict if validation fails + final_output = output_dict + + return MapperResult(final_output, issues) + + +class DROP(Enum): + """ + A DROP placeholder object indicates the object relative to the current value should be dropped. + An "object" in this context is a dict or a list. + + This enum implements the transformation protocol without inheritance to avoid metaclass conflicts. + + Examples: + ``` + { <-- Grandparent (rel to _value) + 'A': { <-- Parent (rel to _value) + 'B': { <-- This Object (rel to _value) + 'C': _value + } + } + } + ``` + + ``` + { <-- Grandparent (rel to _value1 and _value2) + 'A': [ <-- Parent (rel to _value1 and _value2) + { <-- This Object (rel to _value1) + 'B': _value1 + }, + { <-- This Object (rel to _value2) + 'B': _value2 + } + ] + } + ``` + """ + + THIS_OBJECT = -1 + PARENT = -2 + GRANDPARENT = -3 + GREATGRANDPARENT = -4 + + def process(self, _data: Any, _context: dict[str, Any] | None = None) -> "DROP": + """DROP sentinels are processed by Mapper, not directly.""" + return self + + @property + def level(self) -> int: + """Get the drop level value for compatibility.""" + return self.value + + +class KEEP: + """ + A value wrapped in a KEEP object should be ignored by the Mapper class when removing values. + + Partial keeping is _not_ supported (i.e. a KEEP object within an object to be DROP-ed). + """ + + def __init__(self, value: Any): + self.value = value + + def process(self, _data: Any, _context: dict[str, Any] | None = None) -> Any: + """KEEP sentinels preserve their value during processing.""" + return self.value diff --git a/chidian/partials.py b/chidian/partials.py new file mode 100644 index 0000000..679c68f --- /dev/null +++ b/chidian/partials.py @@ -0,0 +1,195 @@ +""" +The `partials` module provides a simplified set of core functions for data transformation. + +This focuses on basic operations that are Rust-friendly and essential for data processing. +""" + +import operator +from functools import partial, reduce +from typing import Any, Callable, TypeVar + +from .core import get as _get + +T = TypeVar("T") + + +class FunctionChain: + """Composable function chain that consolidates operations.""" + + def __init__(self, *operations: Callable): + self.operations = list(operations) + + def __or__( + self, other: Callable | "FunctionChain" | "ChainableFunction" + ) -> "FunctionChain": + """Chain operations with | operator.""" + if isinstance(other, FunctionChain): + return FunctionChain(*self.operations, *other.operations) + elif isinstance(other, ChainableFunction): + return FunctionChain(*self.operations, other.func) + else: + return FunctionChain(*self.operations, other) + + def __call__(self, value: Any) -> Any: + """Apply all operations in sequence.""" + return reduce(lambda v, f: f(v), self.operations, value) + + def __repr__(self) -> str: + ops = " | ".join( + f.__name__ if hasattr(f, "__name__") else str(f) for f in self.operations + ) + return f"FunctionChain({ops})" + + def __len__(self) -> int: + """Number of operations in the chain.""" + return len(self.operations) + + +class ChainableFunction: + """Wrapper to make any function/partial chainable with |.""" + + def __init__(self, func: Callable): + self.func = func + # Preserve function metadata + self.__name__ = getattr(func, "__name__", repr(func)) + self.__doc__ = getattr(func, "__doc__", None) + + def __or__( + self, other: Callable | FunctionChain | "ChainableFunction" + ) -> FunctionChain: + """Start or extend a chain with | operator.""" + if isinstance(other, FunctionChain): + return FunctionChain(self.func, *other.operations) + elif isinstance(other, ChainableFunction): + return FunctionChain(self.func, other.func) + else: + return FunctionChain(self.func, other) + + def __ror__(self, other: Callable | FunctionChain) -> FunctionChain: + """Allow chaining when ChainableFunction is on the right side.""" + if isinstance(other, FunctionChain): + return FunctionChain(*other.operations, self.func) + else: + return FunctionChain(other, self.func) + + def __call__(self, *args, **kwargs): + """Call the wrapped function.""" + return self.func(*args, **kwargs) + + def __repr__(self) -> str: + return f"ChainableFunction({self.__name__})" + + +def get( + key: str, default: Any = None, apply: Any = None, strict: bool = False +) -> Callable[[Any], Any]: + """Create a partial function for get operations.""" + + def get_partial(source): + return _get(source, key, default=default, apply=apply, strict=strict) + + return get_partial + + +# Arithmetic operations +def add(value: Any) -> Callable[[Any], Any]: + """Add a value to the input.""" + return partial(lambda x, v: operator.add(x, v), v=value) + + +def subtract(value: Any) -> Callable[[Any], Any]: + """Subtract a value from the input.""" + return partial(lambda x, v: operator.sub(x, v), v=value) + + +def multiply(value: Any) -> Callable[[Any], Any]: + """Multiply the input by a value.""" + return partial(lambda x, v: operator.mul(x, v), v=value) + + +def divide(value: Any) -> Callable[[Any], Any]: + """Divide the input by a value.""" + return partial(lambda x, v: operator.truediv(x, v), v=value) + + +# Boolean operations +def equals(value: Any) -> Callable[[Any], bool]: + """Check if input equals the given value.""" + return partial(operator.eq, value) + + +def contains(value: Any) -> Callable[[Any], bool]: + """Check if input contains the given value.""" + return partial(lambda x, v: operator.contains(x, v), v=value) + + +def isinstance_of(type_or_types: type) -> Callable[[Any], bool]: + """Check if input is an instance of the given type(s).""" + return partial(lambda x, types: isinstance(x, types), types=type_or_types) + + +# String manipulation functions as ChainableFunction +upper = ChainableFunction(str.upper) +lower = ChainableFunction(str.lower) +strip = ChainableFunction(str.strip) + + +def split(sep: str | None = None) -> ChainableFunction: + """Create a chainable split function.""" + return ChainableFunction(partial(str.split, sep=sep)) + + +def replace(old: str, new: str) -> ChainableFunction: + """Create a chainable replace function.""" + return ChainableFunction( + partial( + lambda s, old_val, new_val: s.replace(old_val, new_val), + old_val=old, + new_val=new, + ) + ) + + +def join(sep: str) -> ChainableFunction: + """Create a chainable join function.""" + return ChainableFunction( + partial(lambda separator, items: separator.join(items), sep) + ) + + +# Array/List operations as ChainableFunction +first = ChainableFunction(lambda x: x[0] if x else None) +last = ChainableFunction(lambda x: x[-1] if x else None) +length = ChainableFunction(len) + + +def at_index(i: int) -> ChainableFunction: + """Get element at index.""" + return ChainableFunction( + partial(lambda x, idx: x[idx] if len(x) > idx else None, idx=i) + ) + + +def slice_range(start: int | None = None, end: int | None = None) -> ChainableFunction: + """Slice a sequence.""" + return ChainableFunction(partial(lambda x, s, e: x[s:e], s=start, e=end)) + + +# Type conversions as ChainableFunction +to_int = ChainableFunction(int) +to_float = ChainableFunction(float) +to_str = ChainableFunction(str) +to_bool = ChainableFunction(bool) + + +# Utility functions +def round_to(decimals: int) -> ChainableFunction: + """Round to specified decimals.""" + return ChainableFunction(partial(round, ndigits=decimals)) + + +def default_to(default_value: Any) -> ChainableFunction: + """Replace None with default value.""" + return ChainableFunction( + partial(lambda x, default: default if x is None else x, default=default_value) + ) diff --git a/chidian/table.py b/chidian/table.py new file mode 100644 index 0000000..2a94d56 --- /dev/null +++ b/chidian/table.py @@ -0,0 +1,384 @@ +from typing import Any, Callable, Iterator, Optional, Union + +from .core import get + +""" +A `Table` is a lightweight, sparse table implementation that treats a collection of dictionaries as rows in a table. + +Each dictionary represents a row with potentially different keys (columns), making it ideal for heterogeneous, +nested data. Provides a middle ground between the strictness of DataFrames and raw list[dict]/dict[str, dict]. + +Supports path-based queries, filtering, mapping, and other functional operations. +""" + + +class Table(dict): + def __init__( + self, + rows: Union[list[dict[str, Any]], dict[str, dict[str, Any]], None] = None, + **kwargs, + ): + """ + Initialize a Table from rows. + + Args: + rows: Either: + - list[dict]: Each dict is a row, auto-keyed by index ($0, $1, ...) + - dict[str, dict]: Pre-keyed rows (keys preserved) + - None: Empty table + **kwargs: Additional dict initialization parameters + """ + super().__init__(**kwargs) + self._rows: list[dict[str, Any]] = [] + self._row_keys: dict[str, int] = {} # Maps row keys to indices + + # Initialize rows based on input type + if rows is not None: + if isinstance(rows, list): + self._rows = rows + # Store rows by index using $-syntax + for i, row in enumerate(rows): + key = f"${i}" + self[key] = row + self._row_keys[key] = i + elif isinstance(rows, dict): + self._rows = list(rows.values()) + # Store rows by their original keys + for i, (key, row) in enumerate(rows.items()): + # Ensure keys start with $ for consistency + if not key.startswith("$"): + key = f"${key}" + self[key] = row + self._row_keys[key] = i + + def get(self, path: str, default: Any = None) -> Union[Any, list[Any]]: + """ + Extract values from rows using a path expression. + + If path starts with $, extracts from a specific row only. + Otherwise, extracts from all rows. + + Uses the existing chidian.core.get() engine to navigate nested structures. + + Args: + path: Path expression: + - "$0.name" or "$bob.name": Extract from specific row + - "name" or "patient.id": Extract from all rows + default: Value to use when path doesn't exist + + Returns: + - Single value when using $-prefixed path for specific row + - List of values (one per row) when extracting from all rows + + Examples: + >>> t = Table([ + ... {"name": "John", "age": 30}, + ... {"name": "Jane", "age": 25}, + ... {"name": "Bob"} # Note: no age + ... ]) + >>> t.get("name") + ["John", "Jane", "Bob"] + >>> t.get("$0.name") + "John" + >>> t.get("$1.age") + 25 + >>> t.get("$2.age", default=0) + 0 + >>> t.append({"name": "Alice"}, custom_key="alice") + >>> t.get("$alice.name") + "Alice" + """ + # Check if path starts with $ (specific row access) + if path.startswith("$"): + # Extract row key and remaining path + parts = path.split(".", 1) + row_key = parts[0] + + # Check if this key exists + if row_key not in self: + return default + + # Get the specific row + row = self[row_key] + + # If there's a remaining path, extract from the row + if len(parts) > 1: + return get(row, parts[1], default=default) + else: + # Just the row key itself, return the whole row + return row + + # Original behavior: extract from all rows + results = [] + for row in self._rows: + value = get(row, path, default=default) + results.append(value) + return results + + @property + def columns(self) -> set[str]: + """ + Return the union of all keys across all rows. + + This represents the "sparse columns" of the table. + + Examples: + >>> t = Table([ + ... {"name": "John", "age": 30}, + ... {"name": "Jane", "city": "NYC"} + ... ]) + >>> t.columns + {"name", "age", "city"} + """ + all_keys: set[str] = set() + for row in self._rows: + all_keys.update(row.keys()) + return all_keys + + def to_list(self) -> list[dict[str, Any]]: + """Return rows as a plain list of dicts.""" + return self._rows.copy() + + def to_dict(self) -> dict[str, dict[str, Any]]: + """Return rows as a dict keyed by row identifiers.""" + return dict(self) + + def append(self, row: dict[str, Any], custom_key: Optional[str] = None) -> None: + """ + Add a new row to the table. + + This operation may expand the logical column set if the new row + contains keys not seen in existing rows. + + Args: + row: Dictionary representing the new row + custom_key: Optional row identifier (defaults to $n where n is the index) + If provided and doesn't start with $, will be prefixed with $ + + Examples: + >>> t = Table([{"name": "John"}]) + >>> t.append({"name": "Jane", "age": 25}) # Adds 'age' column + >>> t.append({"name": "Bob", "city": "NYC"}, custom_key="bob") # Adds 'city' column + >>> len(t) + 3 + """ + self._rows.append(row) + + if custom_key is None: + # Use $-prefixed index as key + key = f"${len(self._rows) - 1}" + else: + # Ensure custom keys start with $ + if not custom_key.startswith("$"): + key = f"${custom_key}" + else: + key = custom_key + + self[key] = row + self._row_keys[key] = len(self._rows) - 1 + + def filter(self, predicate: Union[str, Callable[[dict], bool]]) -> "Table": + """ + Filter rows based on a predicate. + + Args: + predicate: Either: + - Callable: Function that takes a row dict and returns bool + - str: DSL filter expression + + Returns: + New Table with only rows matching the predicate + + Examples: + >>> t = Table([{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]) + >>> t.filter(lambda row: row.get("age", 0) > 26) # Returns Table with just John + >>> t.filter("age > 26") + >>> t.filter("status = 'active' AND age >= 18") + >>> t.filter("addresses[0].city = 'NYC'") + """ + if isinstance(predicate, str): + from .lib.filter_parser import parse_filter + + predicate = parse_filter(predicate) + + # Functional predicate implementation + filtered_rows = [row for row in self._rows if predicate(row)] + return Table(filtered_rows) + + def map(self, transform: Callable[[dict], dict]) -> "Table": + """ + Transform each row using the provided function. + + Args: + transform: Function that takes a row dict and returns a new dict + + Returns: + New Table with transformed rows + + Examples: + >>> t = Table([{"name": "john"}, {"name": "jane"}]) + >>> t2 = t.map(lambda row: {**row, "name": row["name"].upper()}) + >>> t2.get("name") + ["JOHN", "JANE"] + + >>> # Add computed field + >>> t3 = t.map(lambda row: {**row, "name_length": len(row.get("name", ""))}) + """ + transformed_rows = [transform(row) for row in self._rows] + return Table(transformed_rows) + + def select(self, query: str) -> "Table": + """ + Project columns and create a new Table using DSL syntax. + + Args: + query: DSL column selection expression + + Returns: + New Table with selected columns + + Examples: + >>> t.select("name, age") # Select specific columns + >>> t.select("*") # Select all columns + >>> t.select("patient.id -> patient_id, status") # Rename column + >>> t.select("name, addresses[0].city -> primary_city") # Nested + rename + """ + from .lib.select_parser import parse_select + + parsed = parse_select(query) + + # Handle wildcard selection + if parsed == "*": + return Table(self._rows.copy()) + + # Handle column specifications + if not isinstance(parsed, list): + # This shouldn't happen based on parse_select implementation + raise ValueError("Unexpected parser result") + + new_rows = [] + for row in self._rows: + new_row = {} + + for spec in parsed: + # Get value using path + value = get(row, spec.path, default=None) + + # Use rename if specified, otherwise use the last segment of path + if spec.rename_to: + key = spec.rename_to + else: + # Extract last part of path as column name + # e.g., "patient.id" -> "id", "name" -> "name" + path_parts = spec.path.split(".") + # Remove array indices from last part + last_part = path_parts[-1].split("[")[0] + key = last_part + + new_row[key] = value + + new_rows.append(new_row) + + return Table(new_rows) + + def unique(self, path: str) -> list[Any]: + """ + Get unique values from a column path. + + Args: + path: Path expression to extract values from + + Returns: + List of unique values found at the path + """ + values = self.get(path) + seen = set() + unique_values = [] + for value in values: + # Handle unhashable types by converting to string for dedup + try: + if value not in seen: + seen.add(value) + unique_values.append(value) + except TypeError: + # Unhashable type, use string representation for dedup + str_value = str(value) + if str_value not in seen: + seen.add(str_value) + unique_values.append(value) + return unique_values + + def group_by(self, path: str) -> dict[Any, "Table"]: + """ + Group rows by values at a given path. + + Args: + path: Path expression to group by + + Returns: + Dictionary mapping unique values to Tables containing matching rows + """ + groups: dict[Any, list[dict[str, Any]]] = {} + + for row in self._rows: + group_value = get(row, path, default=None) + # Handle unhashable types by converting to string + try: + hash(group_value) + key = group_value + except TypeError: + key = str(group_value) + + if key not in groups: + groups[key] = [] + groups[key].append(row) + + return {key: Table(rows) for key, rows in groups.items()} + + def head(self, n: int = 5) -> "Table": + """ + Return first n rows. + + Args: + n: Number of rows to return (default 5) + + Returns: + New Table with first n rows + """ + return Table(self._rows[:n]) + + def tail(self, n: int = 5) -> "Table": + """ + Return last n rows. + + Args: + n: Number of rows to return (default 5) + + Returns: + New Table with last n rows + """ + return Table(self._rows[-n:]) + + def __iter__(self) -> Iterator[dict[str, Any]]: + """ + Iterate over rows in insertion order. + + Examples: + >>> t = Table([{"id": 1}, {"id": 2}]) + >>> for row in t: + ... print(row["id"]) + 1 + 2 + """ + return iter(self._rows) + + def __len__(self) -> int: + """ + Return the number of rows in the table. + + Examples: + >>> t = Table([{"id": 1}, {"id": 2}]) + >>> len(t) + 2 + """ + return len(self._rows) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6d9c368 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "chidian" +version = "0.1.0" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + "parsimonious>=0.10.0", + "pydantic>=2.10.6,<3.0.0", # Only Pydantic v2 is supported +] + +[dependency-groups] +dev = [ + "pytest>=8.3.5", + "hypothesis>=6.0.0", + "ruff>=0.11.12", + "mypy>=1.14.1", +] diff --git a/chidian-py/tests/A.json b/tests/A.json similarity index 100% rename from chidian-py/tests/A.json rename to tests/A.json diff --git a/chidian-py/tests/B.json b/tests/B.json similarity index 100% rename from chidian-py/tests/B.json rename to tests/B.json diff --git a/chidian-py/tests/__init__.py b/tests/__init__.py similarity index 100% rename from chidian-py/tests/__init__.py rename to tests/__init__.py diff --git a/chidian-py/tests/conftest.py b/tests/conftest.py similarity index 100% rename from chidian-py/tests/conftest.py rename to tests/conftest.py diff --git a/chidian-py/tests/structstest.py b/tests/structstest.py similarity index 100% rename from chidian-py/tests/structstest.py rename to tests/structstest.py diff --git a/tests/test_data_mapping.py b/tests/test_data_mapping.py new file mode 100644 index 0000000..bde6a6f --- /dev/null +++ b/tests/test_data_mapping.py @@ -0,0 +1,265 @@ +"""Test the new DataMapping class and Mapper with validation modes.""" + +from typing import Optional + +import pytest +from pydantic import BaseModel + +import chidian.partials as p +from chidian import DataMapping, Mapper, MapperResult, ValidationMode + + +# Test models +class Patient(BaseModel): + id: str + name: str + active: bool + internal_notes: Optional[str] = None + age: Optional[int] = None + + +class Observation(BaseModel): + subject_ref: str + performer: str + status: Optional[str] = None + + +class TestDataMappingBasic: + """Test basic DataMapping functionality as forward-only validator.""" + + def test_simple_mapping_with_mapper(self) -> None: + """Test DataMapping with Mapper for basic field mapping.""" + # Create a DataMapping for transformation + data_mapping = DataMapping( + transformations={ + "subject_ref": p.get("id"), + "performer": p.get("name"), + }, + input_schema=Patient, + output_schema=Observation, + ) + + # Create Mapper with DataMapping + mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) + + patient = Patient(id="123", name="John", active=True) + obs = mapper(patient) + + assert isinstance(obs, Observation) + assert obs.subject_ref == "123" + assert obs.performer == "John" + + def test_complex_mapping_with_callable_mapper(self) -> None: + """Test DataMapping with callable transformations.""" + data_mapping = DataMapping( + transformations={ + "subject_ref": lambda data: f"Patient/{data['id']}", + "performer": lambda data: data["name"].upper(), + "status": lambda data: "active" if data["active"] else "inactive", + }, + input_schema=Patient, + output_schema=Observation, + ) + + mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) + + patient = Patient(id="123", name="john", active=True) + obs = mapper(patient) + + assert isinstance(obs, Observation) + assert obs.subject_ref == "Patient/123" + assert obs.performer == "JOHN" + assert obs.status == "active" + + def test_validation_modes(self) -> None: + """Test different validation modes.""" + data_mapping = DataMapping( + transformations={ + "subject_ref": p.get("id"), + "performer": p.get("name"), + }, + input_schema=Patient, + output_schema=Observation, + ) + + # Test strict mode + strict_mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) + patient = Patient(id="123", name="John", active=True) + obs = strict_mapper(patient) + assert isinstance(obs, Observation) + assert obs.subject_ref == "123" + + # Test flexible mode + flexible_mapper = Mapper(data_mapping, mode=ValidationMode.FLEXIBLE) + result = flexible_mapper(patient) + assert isinstance(result, MapperResult) + assert not result.has_issues + assert result.data.subject_ref == "123" + + +class TestDataMappingValidation: + """Test DataMapping validation features.""" + + def test_input_validation(self) -> None: + """Test that Mapper validates input against input schema.""" + data_mapping = DataMapping( + transformations={ + "subject_ref": p.get("id"), + "performer": p.get("name"), + }, + input_schema=Patient, + output_schema=Observation, + ) + + mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) + + # Valid input works + patient = Patient(id="123", name="John", active=True) + obs = mapper(patient) + assert isinstance(obs, Observation) + assert obs.subject_ref == "123" + + # Invalid input should raise ValidationError in strict mode + with pytest.raises(Exception): # Pydantic ValidationError + mapper({"invalid": "data"}) + + def test_output_validation(self) -> None: + """Test that Mapper validates output against output schema.""" + # DataMapping that produces invalid output + data_mapping = DataMapping( + transformations={ + "invalid_field": lambda data: "value", # Missing required fields + }, + input_schema=Patient, + output_schema=Observation, + ) + + mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) + patient = Patient(id="123", name="John", active=True) + + # Should raise ValidationError due to invalid output in strict mode + with pytest.raises(Exception): # Pydantic ValidationError + mapper(patient) + + def test_flexible_mode_validation(self) -> None: + """Test flexible mode collects validation errors.""" + # DataMapping that produces invalid output + data_mapping = DataMapping( + transformations={ + "invalid_field": lambda data: "value", # Missing required fields + }, + input_schema=Patient, + output_schema=Observation, + ) + + mapper = Mapper(data_mapping, mode=ValidationMode.FLEXIBLE) + patient = Patient(id="123", name="John", active=True) + + # Should return MapperResult with issues + result = mapper(patient) + assert isinstance(result, MapperResult) + assert result.has_issues + assert len(result.issues) > 0 + assert result.issues[0].stage == "output" + + def test_dict_input_with_strict_mode(self) -> None: + """Test handling of dict input in strict mode.""" + data_mapping = DataMapping( + transformations={ + "subject_ref": p.get("id"), + "performer": p.get("name"), + }, + input_schema=Patient, + output_schema=Observation, + ) + + mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) + + # Dict input should be validated and converted + dict_input = {"id": "123", "name": "John", "active": True} + obs = mapper(dict_input) + assert isinstance(obs, Observation) + assert obs.subject_ref == "123" + assert obs.performer == "John" + + def test_auto_mode(self) -> None: + """Test auto mode behavior.""" + # With schemas - should use strict mode + data_mapping_with_schemas = DataMapping( + transformations={ + "subject_ref": p.get("id"), + "performer": p.get("name"), + }, + input_schema=Patient, + output_schema=Observation, + ) + + mapper = Mapper(data_mapping_with_schemas) # AUTO mode by default + assert mapper.mode == ValidationMode.STRICT + + # Without schemas - should use flexible mode + data_mapping_no_schemas = DataMapping( + transformations={ + "subject_ref": p.get("id"), + "performer": p.get("name"), + } + ) + + mapper2 = Mapper(data_mapping_no_schemas) # AUTO mode by default + assert mapper2.mode == ValidationMode.FLEXIBLE + + +class TestDataMappingWithoutSchemas: + """Test DataMapping without schemas (pure transformation).""" + + def test_pure_transformation(self) -> None: + """Test DataMapping as pure transformation without schemas.""" + data_mapping = DataMapping( + transformations={ + "subject_ref": p.get("id"), + "performer": p.get("name"), + } + ) + + # Direct transformation + result = data_mapping.transform({"id": "123", "name": "John"}) + assert result["subject_ref"] == "123" + assert result["performer"] == "John" + + def test_with_flexible_mapper(self) -> None: + """Test DataMapping without schemas using flexible Mapper.""" + data_mapping = DataMapping( + transformations={ + "subject_ref": lambda data: f"Patient/{data.get('id', 'unknown')}", + "performer": lambda data: data.get("name", "Unknown"), + "status": lambda data: "processed", + } + ) + + mapper = Mapper(data_mapping, mode=ValidationMode.FLEXIBLE) + + # Should work with incomplete data + result = mapper({"id": "123"}) + assert isinstance(result, MapperResult) + assert result.data["subject_ref"] == "Patient/123" + assert result.data["performer"] == "Unknown" + assert result.data["status"] == "processed" + + def test_mapper_result_interface(self) -> None: + """Test MapperResult interface.""" + data_mapping = DataMapping( + transformations={ + "missing_field": p.get("nonexistent"), + }, + output_schema=Observation, + ) + + mapper = Mapper(data_mapping, mode=ValidationMode.FLEXIBLE) + result = mapper({"id": "123"}) + + assert isinstance(result, MapperResult) + assert result.has_issues + + # Test raise_if_issues + with pytest.raises(Exception): + result.raise_if_issues() diff --git a/chidian-py/tests/test_get.py b/tests/test_get.py similarity index 99% rename from chidian-py/tests/test_get.py rename to tests/test_get.py index 7064ab9..2a691dc 100644 --- a/chidian-py/tests/test_get.py +++ b/tests/test_get.py @@ -3,6 +3,7 @@ from typing import Any import pytest + from chidian import get diff --git a/tests/test_lexicon.py b/tests/test_lexicon.py new file mode 100644 index 0000000..4d9c35a --- /dev/null +++ b/tests/test_lexicon.py @@ -0,0 +1,354 @@ +"""Tests for the Lexicon class with tuple support.""" + +import pytest + +from chidian.lexicon import Lexicon + + +class TestLexiconBasic: + """Test basic Lexicon functionality.""" + + def test_simple_string_mappings(self): + """Test basic one-to-one string mappings.""" + lexicon = Lexicon({"8480-6": "271649006", "8462-4": "271650006"}) + + # Forward lookups (keys first) + assert lexicon["8480-6"] == "271649006" + assert lexicon["8462-4"] == "271650006" + + # Reverse lookups (values second) + assert lexicon["271649006"] == "8480-6" + assert lexicon["271650006"] == "8462-4" + + def test_tuple_many_to_one_mappings(self): + """Test many-to-one mappings with tuples.""" + lexicon = Lexicon( + { + ("A", "B", "C"): "x", + ("D", "E"): "y", + "F": "z", # Can mix single and tuple mappings + } + ) + + # Forward lookups - all keys map to value + assert lexicon["A"] == "x" + assert lexicon["B"] == "x" + assert lexicon["C"] == "x" + assert lexicon["D"] == "y" + assert lexicon["E"] == "y" + assert lexicon["F"] == "z" + + # Reverse lookups - first in tuple is default + assert lexicon["x"] == "A" # First in tuple + assert lexicon["y"] == "D" # First in tuple + assert lexicon["z"] == "F" + + def test_lookup_priority(self): + """Test that keys are scanned before values.""" + # If a value matches a key, the key lookup wins + lexicon = Lexicon({"A": "B", "B": "C"}) + + assert lexicon["A"] == "B" # Key lookup + assert lexicon["B"] == "C" # Key lookup (takes priority over value) + assert lexicon["C"] == "B" # Value lookup (reverse) + + def test_get_method(self): + """Test get method with defaults.""" + lexicon = Lexicon( + { + "yes": "Y", + "no": "N", + ("true", "1", "on"): "T", + ("false", "0", "off"): "F", + } + ) + + # Key lookups + assert lexicon.get("yes") == "Y" + assert lexicon.get("true") == "T" + assert lexicon.get("1") == "T" + + # Value lookups (reverse) + assert lexicon.get("Y") == "yes" + assert lexicon.get("T") == "true" # First in tuple + assert lexicon.get("F") == "false" # First in tuple + + # Missing keys with default + assert lexicon.get("missing") is None + assert lexicon.get("missing", "DEFAULT") == "DEFAULT" + + def test_instance_default(self): + """Test default value behavior.""" + lexicon = Lexicon({"yes": "Y"}, default="UNKNOWN") + + assert lexicon["yes"] == "Y" + assert lexicon["Y"] == "yes" + assert lexicon["missing"] == "UNKNOWN" + assert lexicon.get("missing") == "UNKNOWN" + assert lexicon.get("missing", "CUSTOM") == "CUSTOM" # Override default + + def test_contains(self): + """Test membership checking.""" + lexicon = Lexicon({"a": "1", ("b", "c"): "2"}) + + # Keys + assert "a" in lexicon + assert "b" in lexicon + assert "c" in lexicon + + # Values (also searchable) + assert "1" in lexicon + assert "2" in lexicon + + # Missing + assert "d" not in lexicon + assert "3" not in lexicon + + def test_dict_interface(self): + """Test that Lexicon maintains dict-like interface.""" + lexicon = Lexicon({"a": "1", "b": "2"}) + + # Basic dict operations + assert len(lexicon) == 2 + assert sorted(lexicon.keys()) == ["a", "b"] + assert sorted(lexicon.values()) == ["1", "2"] + assert dict(lexicon) == {"a": "1", "b": "2"} + + def test_empty_lexicon(self): + """Test empty lexicon behavior.""" + lexicon = Lexicon({}) + + assert len(lexicon) == 0 + assert lexicon.get("any") is None + + with pytest.raises(KeyError): + _ = lexicon["any"] + + def test_no_key_error_with_default(self): + """Test that KeyError is not raised when default is set.""" + lexicon = Lexicon({}, default="DEFAULT") + + # Should return default, not raise KeyError + assert lexicon["missing"] == "DEFAULT" + + +class TestLexiconBuilder: + """Test the builder pattern interface.""" + + def test_builder_basic(self): + """Test basic builder usage.""" + lexicon = Lexicon.builder().add("A", "1").add("B", "2").build() + + assert lexicon["A"] == "1" + assert lexicon["B"] == "2" + assert lexicon["1"] == "A" + assert lexicon["2"] == "B" + + def test_builder_with_many(self): + """Test builder with many-to-one mappings.""" + lexicon = ( + Lexicon.builder() + .add_many(["A", "B", "C"], "x") + .add_many(["D", "E"], "y") + .add("F", "z") + .build() + ) + + # Forward mappings + assert lexicon["A"] == "x" + assert lexicon["B"] == "x" + assert lexicon["C"] == "x" + assert lexicon["F"] == "z" + + # Reverse mappings (first is default) + assert lexicon["x"] == "A" + assert lexicon["y"] == "D" + assert lexicon["z"] == "F" + + def test_builder_with_default(self): + """Test builder with default value.""" + lexicon = Lexicon.builder().add("A", "1").set_default("MISSING").build() + + assert lexicon["A"] == "1" + assert lexicon["missing"] == "MISSING" + + def test_builder_with_metadata(self): + """Test builder with metadata.""" + lexicon = ( + Lexicon.builder().add("A", "1").set_metadata({"version": "1.0"}).build() + ) + + assert lexicon.metadata["version"] == "1.0" + + def test_builder_primary_override(self): + """Test that builder can override primary reverse mapping.""" + lexicon = ( + Lexicon.builder() + .add_many(["A", "B", "C"], "x") + .set_primary_reverse("x", "B") # Override default + .build() + ) + + assert lexicon["x"] == "B" # Not "A" + + +class TestLexiconEdgeCases: + """Test edge cases and special scenarios.""" + + def test_self_mapping(self): + """Test when a key maps to itself.""" + lexicon = Lexicon({"A": "A", "B": "B"}) + + # Should work normally + assert lexicon["A"] == "A" + assert lexicon["B"] == "B" + + def test_circular_mapping(self): + """Test circular mappings.""" + lexicon = Lexicon({"A": "B", "B": "A"}) + + # Forward lookups + assert lexicon["A"] == "B" + assert lexicon["B"] == "A" + + def test_chain_mapping(self): + """Test chain-like mappings.""" + lexicon = Lexicon({"A": "B", "B": "C", "C": "D"}) + + # Each lookup is independent + assert lexicon["A"] == "B" + assert lexicon["B"] == "C" + assert lexicon["C"] == "D" + assert lexicon["D"] == "C" # Reverse lookup + + def test_case_sensitivity(self): + """Test that lookups are case-sensitive.""" + lexicon = Lexicon({"Code": "VALUE", "code": "value"}) + + assert lexicon["Code"] == "VALUE" + assert lexicon["code"] == "value" + assert lexicon["VALUE"] == "Code" + assert lexicon["value"] == "code" + + def test_whitespace_handling(self): + """Test handling of whitespace in keys/values.""" + lexicon = Lexicon({" A ": " B ", "C": "D "}) + + assert lexicon[" A "] == " B " + assert lexicon[" B "] == " A " + assert lexicon["C"] == "D " + assert lexicon["D "] == "C" + + def test_overlapping_tuples(self): + """Test when multiple tuples map to same value.""" + lexicon = Lexicon( + { + ("A", "B"): "x", + ("C", "D"): "x", # Same value + "E": "x", # Also same value + } + ) + + # All forward mappings work + assert lexicon["A"] == "x" + assert lexicon["B"] == "x" + assert lexicon["C"] == "x" + assert lexicon["D"] == "x" + assert lexicon["E"] == "x" + + # Reverse gives first occurrence + assert lexicon["x"] == "A" # First key that mapped to "x" + + def test_empty_tuple(self): + """Test that empty tuples are handled gracefully.""" + with pytest.raises(ValueError, match="Empty tuple"): + Lexicon({(): "value"}) + + def test_mixed_types_rejected(self): + """Test that non-string types are rejected.""" + with pytest.raises(TypeError, match="must be strings"): + Lexicon({123: "value"}) + + with pytest.raises(TypeError, match="must be strings"): + Lexicon({"key": 456}) + + with pytest.raises(TypeError, match="must be strings"): + Lexicon({("A", 123): "value"}) + + +class TestLexiconRealWorld: + """Test real-world scenarios.""" + + def test_medical_code_mapping(self): + """Test LOINC to SNOMED mapping example.""" + lab_codes = Lexicon( + { + "8480-6": "271649006", # Systolic BP + "8462-4": "271650006", # Diastolic BP + "8867-4": "364075005", # Heart rate + # Multiple LOINC codes for same concept + ("2160-0", "38483-4", "14682-9"): "113075003", # Creatinine + }, + metadata={"version": "2023-Q4", "source": "LOINC-SNOMED"}, + ) + + # Forward mapping (LOINC to SNOMED) + assert lab_codes["8480-6"] == "271649006" + assert lab_codes["2160-0"] == "113075003" + assert lab_codes["38483-4"] == "113075003" + assert lab_codes["14682-9"] == "113075003" + + # Reverse mapping (SNOMED to LOINC) + assert lab_codes["271649006"] == "8480-6" + assert lab_codes["113075003"] == "2160-0" # First in tuple + + # Metadata + assert lab_codes.metadata["version"] == "2023-Q4" + + def test_status_code_mapping(self): + """Test status code transformations with aliases.""" + status_map = Lexicon( + { + ("active", "current", "live"): "A", + ("inactive", "stopped", "discontinued"): "I", + ("pending", "waiting"): "P", + "completed": "C", + }, + default="U", # Unknown + ) + + # Forward mapping with aliases + assert status_map["active"] == "A" + assert status_map["current"] == "A" + assert status_map["live"] == "A" + assert status_map["stopped"] == "I" + assert status_map["completed"] == "C" + + # Reverse mapping (first alias is default) + assert status_map["A"] == "active" + assert status_map["I"] == "inactive" + assert status_map["P"] == "pending" + + # Unknown status + assert status_map["unknown"] == "U" + assert status_map["X"] == "U" + + def test_unit_conversion_codes(self): + """Test unit of measure mappings.""" + unit_map = Lexicon( + { + ("mg/dL", "mg/dl", "MG/DL"): "MG_PER_DL", + ("mmol/L", "mmol/l", "MMOL/L"): "MMOL_PER_L", + "g/dL": "G_PER_DL", + "mEq/L": "MEQ_PER_L", + } + ) + + # Case variations all map to canonical form + assert unit_map["mg/dL"] == "MG_PER_DL" + assert unit_map["mg/dl"] == "MG_PER_DL" + assert unit_map["MG/DL"] == "MG_PER_DL" + + # Reverse gives the first (preferred) form + assert unit_map["MG_PER_DL"] == "mg/dL" + assert unit_map["MMOL_PER_L"] == "mmol/L" diff --git a/chidian-py/tests/test_lib.py b/tests/test_lib.py similarity index 95% rename from chidian-py/tests/test_lib.py rename to tests/test_lib.py index ec553c9..b2ed01b 100644 --- a/chidian-py/tests/test_lib.py +++ b/tests/test_lib.py @@ -1,7 +1,6 @@ """Simplified integration tests for core functionality.""" -from chidian import get -from chidian.lib import put +from chidian import get, put def test_get_function_basic(): diff --git a/tests/test_mapper.py b/tests/test_mapper.py new file mode 100644 index 0000000..0cd70ee --- /dev/null +++ b/tests/test_mapper.py @@ -0,0 +1,359 @@ +"""Tests for Mapper as independent dict->dict transformer and DataMapping executor.""" + +from typing import Any + +import pytest +from pydantic import BaseModel + +import chidian.partials as p +from chidian import DataMapping, Mapper, MapperResult, ValidationMode, get + + +class TestMapperBasic: + """Test basic Mapper functionality as dict->dict transformer.""" + + def test_simple_dict_mapping(self) -> None: + """Test basic Mapper with dict mapping.""" + mapping = { + "patient_id": p.get("data.patient.id"), + "is_active": p.get("data.patient.active"), + } + mapper = Mapper(mapping) + + input_data = { + "data": {"patient": {"id": "abc123", "active": True}, "other": "value"} + } + + result = mapper(input_data) + + assert isinstance(result, dict) + assert result["patient_id"] == "abc123" # type: ignore[index] + assert result["is_active"] is True # type: ignore[index] + + def test_callable_mapping(self) -> None: + """Test Mapper with callable mapping values.""" + mapping = { + "patient_id": lambda data: get(data, "data.patient.id"), + "is_active": lambda data: get(data, "data.patient.active"), + "status": lambda data: "processed", + } + + mapper = Mapper(mapping) + + input_data = { + "data": {"patient": {"id": "abc123", "active": True}, "other": "value"} + } + + result = mapper(input_data) + + assert isinstance(result, dict) + assert result["patient_id"] == "abc123" # type: ignore[index] + assert result["is_active"] is True # type: ignore[index] + assert result["status"] == "processed" # type: ignore[index] + + def test_callable_mapping_with_partials(self) -> None: + """Test Mapper with callable mapping values using simplified partials API.""" + # Use simplified partials API + get_first = p.get("firstName") + get_last = p.get("lastName") + + # Status mapping function + def status_transform(data: dict) -> str: + status_map = {"active": "✓ Active", "inactive": "✗ Inactive"} + status_value = get(data, "status", default="unknown") + return status_map.get(status_value, "Unknown") + + # Name concatenation function + def full_name_transform(data: dict) -> str: + first_name = get_first(data) or "" + last_name = get_last(data) or "" + return f"{first_name} {last_name}".strip() + + # Codes joining function + def codes_transform(data: dict) -> str: + codes = get(data, "codes", default=[]) + return ", ".join(str(c) for c in codes) if codes else "" + + # Backup name function + def backup_name_transform(data: dict) -> str: + return get(data, "nickname") or get(data, "firstName") or "Guest" + + mapping = { + "name": full_name_transform, + "status_display": status_transform, + "all_codes": codes_transform, + "city": p.get("address") | p.split("|") | p.at_index(1), + "backup_name": backup_name_transform, + } + + mapper = Mapper(mapping) + + input_data = { + "firstName": "John", + "lastName": "Doe", + "status": "active", + "codes": ["A", "B", "C"], + "address": "123 Main St|Boston|02101", + } + + result = mapper(input_data) + + assert isinstance(result, dict) + assert result["name"] == "John Doe" # type: ignore[index] + assert result["status_display"] == "✓ Active" # type: ignore[index] + assert result["all_codes"] == "A, B, C" # type: ignore[index] + assert result["city"] == "Boston" # type: ignore[index] + assert result["backup_name"] == "John" # type: ignore[index] + + +class TestMapperMapping: + """Test Mapper mapping functionality.""" + + def test_mapper_with_invalid_mapping(self) -> None: + """Test that Mapper rejects invalid mapping types.""" + with pytest.raises(TypeError): + Mapper(123) # type: ignore # Invalid type + + with pytest.raises(TypeError): + Mapper("not a mapping") # type: ignore # Invalid type + + with pytest.raises(TypeError): + Mapper(lambda x: x) # type: ignore # Callable not allowed + + def test_mapper_with_dict_mapping_containing_callable(self) -> None: + """Test Mapper with dict mapping containing callable values.""" + mapping = { + "simple": p.get("path.to.value"), + "transformed": lambda data: data.get("value", "").upper(), + "partial": p.get("nested.value") | p.upper, + } + mapper = Mapper(mapping) + + input_data = { + "path": {"to": {"value": "hello"}}, + "value": "world", + "nested": {"value": "test"}, + } + + result = mapper(input_data) + + assert result["simple"] == "hello" # type: ignore[index] + assert result["transformed"] == "WORLD" # type: ignore[index] + assert result["partial"] == "TEST" # type: ignore[index] + + def test_mapper_error_handling(self) -> None: + """Test Mapper error handling.""" + + def failing_mapper(data: dict) -> str: + raise ValueError("Test error") + + mapping: dict[str, Any] = {"result": failing_mapper} + mapper = Mapper(mapping) + + with pytest.raises(ValueError, match="Test error"): + mapper({"test": "data"}) + + def test_mapper_with_empty_mapping(self) -> None: + """Test Mapper with empty mapping.""" + mapper = Mapper({}) + result = mapper({"input": "data"}) + assert result == {} + + def test_mapper_with_constant_values(self) -> None: + """Test Mapper with constant string and other values.""" + mapping = { + "constant_string": "Hello, World!", + "constant_number": 42, + "constant_bool": True, + "constant_none": None, + "dynamic_value": p.get("input.value"), + } + mapper = Mapper(mapping) + + input_data = {"input": {"value": "dynamic"}, "ignored": "data"} + result = mapper(input_data) + + assert result["constant_string"] == "Hello, World!" # type: ignore[index] + assert result["constant_number"] == 42 # type: ignore[index] + assert result["constant_bool"] is True # type: ignore[index] + assert result["constant_none"] is None # type: ignore[index] + assert result["dynamic_value"] == "dynamic" # type: ignore[index] + + def test_mapper_preserves_dict_structure(self) -> None: + """Test that Mapper preserves nested dict structure in results.""" + # Note: Mapper only supports flat dictionaries, not nested output structures + # To achieve nested results, use callables that return nested dicts + + def nested_transform(data: dict) -> dict: + return {"deep": get(data, "another.path"), "value": "direct_value"} + + mapping = { + "flat": p.get("simple.value"), + "nested": nested_transform, + } + + mapper = Mapper(mapping) + + input_data = {"simple": {"value": "test"}, "another": {"path": "nested_test"}} + + result = mapper(input_data) + + assert result["flat"] == "test" # type: ignore[index] + assert result["nested"]["deep"] == "nested_test" # type: ignore[index] + assert result["nested"]["value"] == "direct_value" # type: ignore[index] + + +class TestMapperCalling: + """Test Mapper calling interface.""" + + def test_mapper_callable_interface(self) -> None: + """Test that Mapper can be called directly.""" + mapping = {"output": p.get("input")} + mapper = Mapper(mapping) + + input_data = {"input": "test_value"} + result = mapper(input_data) + + assert result["output"] == "test_value" # type: ignore[index] + + def test_mapper_callable_only(self) -> None: + """Test that Mapper only has __call__ method (no forward method).""" + mapping = {"output": p.get("input")} + mapper = Mapper(mapping) + + input_data = {"input": "test_value"} + + # Should work with __call__ + result = mapper(input_data) + assert result == {"output": "test_value"} + + # Should not have forward method + assert not hasattr(mapper, "forward") + + def test_mapper_no_reverse(self) -> None: + """Test that Mapper doesn't support reverse operations.""" + mapping = {"output": p.get("input")} + mapper = Mapper(mapping) + + # Should not have reverse method + assert not hasattr(mapper, "reverse") + + # Should not have can_reverse method + assert not hasattr(mapper, "can_reverse") + + +class TestMapperWithDataMapping: + """Test new Mapper functionality with DataMapping.""" + + def test_mapper_backward_compatibility(self) -> None: + """Test that Mapper maintains backward compatibility with dict.""" + # Old-style dict mapping should still work + mapper = Mapper({"output": p.get("input")}) + result = mapper({"input": "test"}) + assert result == {"output": "test"} + + def test_mapper_with_data_mapping_strict(self) -> None: + """Test Mapper with DataMapping in strict mode.""" + + class InputModel(BaseModel): + name: str + age: int + + class OutputModel(BaseModel): + display_name: str + age_group: str + + data_mapping = DataMapping( + transformations={ + "display_name": p.get("name") | p.upper, + "age_group": lambda d: "adult" if d.get("age", 0) >= 18 else "child", + }, + input_schema=InputModel, + output_schema=OutputModel, + ) + + mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) + + # Valid input + result = mapper({"name": "John", "age": 25}) + assert isinstance(result, OutputModel) + assert result.display_name == "JOHN" + assert result.age_group == "adult" + + # Invalid input should raise + with pytest.raises(Exception): + mapper({"name": "John"}) # Missing age + + def test_mapper_with_data_mapping_flexible(self) -> None: + """Test Mapper with DataMapping in flexible mode.""" + + class InputModel(BaseModel): + name: str + age: int + + class OutputModel(BaseModel): + display_name: str + age_group: str + + data_mapping = DataMapping( + transformations={ + "display_name": p.get("name") | p.upper, + "age_group": lambda d: "adult" if d.get("age", 0) >= 18 else "child", + }, + input_schema=InputModel, + output_schema=OutputModel, + ) + + mapper = Mapper(data_mapping, mode=ValidationMode.FLEXIBLE) + + # Valid input + result = mapper({"name": "John", "age": 25}) + assert isinstance(result, MapperResult) + assert not result.has_issues + assert result.data.display_name == "JOHN" + + # Invalid input should return issues + result = mapper({"name": "John"}) # Missing age + assert isinstance(result, MapperResult) + assert result.has_issues + assert any(issue.field == "age" for issue in result.issues) + + def test_mapper_auto_mode(self) -> None: + """Test Mapper auto mode selection.""" + # With schemas -> strict + data_mapping_with_schemas = DataMapping( + transformations={"out": p.get("in")}, + input_schema=BaseModel, + output_schema=BaseModel, + ) + mapper = Mapper(data_mapping_with_schemas) + assert mapper.mode == ValidationMode.STRICT + + # Without schemas -> flexible + data_mapping_no_schemas = DataMapping(transformations={"out": p.get("in")}) + mapper = Mapper(data_mapping_no_schemas) + assert mapper.mode == ValidationMode.FLEXIBLE + + def test_mapper_with_pure_data_mapping(self) -> None: + """Test Mapper with DataMapping without schemas.""" + data_mapping = DataMapping( + transformations={ + "id": p.get("patient.id"), + "name": p.get("patient.name"), + "provider": p.get("provider.name", default="Unknown"), + } + ) + + mapper = Mapper(data_mapping, mode=ValidationMode.FLEXIBLE) + + result = mapper( + { + "patient": {"id": "123", "name": "John"}, + "provider": {"name": "Dr. Smith"}, + } + ) + + assert isinstance(result, MapperResult) + assert result.data["id"] == "123" + assert result.data["name"] == "John" + assert result.data["provider"] == "Dr. Smith" diff --git a/tests/test_partials.py b/tests/test_partials.py new file mode 100644 index 0000000..e840619 --- /dev/null +++ b/tests/test_partials.py @@ -0,0 +1,233 @@ +import pytest + +import chidian.partials as p + + +def test_basic_arithmetic(): + """Test basic arithmetic operations.""" + n = 100 + assert p.add(1)(n) == n + 1 + assert p.subtract(1)(n) == n - 1 + assert p.multiply(10)(n) == n * 10 + assert p.divide(10)(n) == n / 10 + + # Test with lists + lst = [1, 2, 3] + assert p.add([4])(lst) == lst + [4] + + +def test_boolean_checks(): + """Test basic boolean operations.""" + value = {"a": "b", "c": "d"} + + assert p.equals(value)(value) is True + assert p.equals("test")("test") is True + assert p.equals("test")("other") is False + + assert p.contains("a")(value) is True + assert p.contains("z")(value) is False + + assert p.isinstance_of(dict)(value) is True + assert p.isinstance_of(str)("test") is True + assert p.isinstance_of(int)("test") is False + + +def test_basic_chainable_fn(): + """Test basic ChainableFunction functionality.""" + # Single operations + assert p.upper("hello") == "HELLO" + assert p.lower("WORLD") == "world" + assert p.strip(" test ") == "test" + + +def test_function_chain_creation(): + """Test creating FunctionChain with | operator.""" + # ChainableFunction | ChainableFunction + chain = p.upper | p.replace(" ", "_") + assert isinstance(chain, p.FunctionChain) + assert len(chain) == 2 + assert chain("hello world") == "HELLO_WORLD" + + # Regular function | ChainableFunction + chain2 = str.strip | p.upper + assert chain2(" test ") == "TEST" + + +def test_complex_chains(): + """Test complex function chains.""" + # Multi-step string transformation + normalize = p.strip | p.lower | p.replace(" ", "_") + assert normalize(" Hello World ") == "hello_world" + + # Array operations + get_last_word = p.split() | p.last | p.upper + assert get_last_word("hello beautiful world") == "WORLD" + + # Mixed operations + extract_number = p.split("-") | p.last | p.to_int | p.multiply(2) + assert extract_number("item-42") == 84 + + +def test_string_operations(): + """Test string manipulation functions.""" + # Split with custom separator + split_comma = p.split(",") + assert split_comma("a,b,c") == ["a", "b", "c"] + + # Replace with parameters + sanitize = p.replace("&", "and") | p.replace("@", "at") + assert sanitize("tom & jerry @ home") == "tom and jerry at home" + + # Join + join_with_dash = p.join("-") + assert join_with_dash(["a", "b", "c"]) == "a-b-c" + + +def test_array_operations(): + """Test array/list operations.""" + data = ["first", "second", "third", "fourth"] + + assert p.first(data) == "first" + assert p.last(data) == "fourth" + assert p.length(data) == 4 + assert p.at_index(2)(data) == "third" + assert p.slice_range(1, 3)(data) == ["second", "third"] + + # Empty list handling + assert p.first([]) is None + assert p.last([]) is None + assert p.at_index(10)([1, 2, 3]) is None + + +def test_type_conversions(): + """Test type conversion chains.""" + # String to number + parse_int = p.strip | p.to_int + assert parse_int(" 42 ") == 42 + + # Number to string + format_num = p.to_float | p.round_to(2) | p.to_str + assert format_num("19.999") == "20.0" + + # Boolean conversion + assert p.to_bool("") is False + assert p.to_bool("text") is True + assert p.to_bool(0) is False + assert p.to_bool(1) is True + + +def test_get_operations(): + """Test get operations for data access.""" + data = { + "user": { + "name": "John", + "age": 30, + "emails": ["john@example.com", "john.doe@work.com"], + } + } + + # Basic get + get_name = p.get("user.name") + assert get_name(data) == "John" + + # Get with default + get_missing = p.get("user.missing", default="N/A") + assert get_missing(data) == "N/A" + + # Get from array + get_email = p.get("user.emails[0]") + assert get_email(data) == "john@example.com" + + # Chain with get + get_upper_name = p.get("user.name") | p.upper + assert get_upper_name(data) == "JOHN" + + +def test_default_handling(): + """Test default value handling.""" + # Replace None with default + safe_upper = p.default_to("") | p.upper + assert safe_upper(None) == "" + assert safe_upper("hello") == "HELLO" + + # Chain with null safety + safe_process = p.default_to("0") | p.to_int | p.add(10) + assert safe_process(None) == 10 + assert safe_process("5") == 15 + + +def test_numeric_operations(): + """Test numeric operations and rounding.""" + # Round to decimals + round_2 = p.round_to(2) + assert round_2(3.14159) == 3.14 + + # Chain with arithmetic + calculate = p.to_int | p.add(10) | p.multiply(2) + assert calculate("5") == 30 + + +def test_chain_composition(): + """Test composing multiple chains.""" + # Create reusable chains + normalize_text = p.strip | p.lower + + # Compose chains + process_input = normalize_text | p.replace(" ", "_") | p.upper + assert process_input(" Hello World ") == "HELLO_WORLD" + + # Chain of chains + chain1 = p.upper | p.replace("A", "X") + chain2 = p.replace("E", "Y") | p.lower + combined = chain1 | chain2 + assert combined("apple") == "xpply" + + +def test_error_propagation(): + """Test that errors propagate through chains.""" + chain = p.to_int | p.multiply(2) + + with pytest.raises(ValueError): + chain("not a number") + + # Safe handling with default - first convert to "0" then to int + safe_chain = p.default_to("0") | p.to_int | p.multiply(2) + assert safe_chain(None) == 0 + assert safe_chain("42") == 84 + + +def test_function_chain_repr(): + """Test string representation of chains.""" + chain = p.upper | p.strip | p.replace(" ", "_") + repr_str = repr(chain) + assert "upper" in repr_str + assert "strip" in repr_str + assert "|" in repr_str + + +def test_real_world_usage(): + """Test realistic data transformation scenarios.""" + # Clean and format user input + clean_input = p.strip | p.lower | p.replace(" ", "_") + assert clean_input(" User Name ") == "user_name" + + # Process numeric data + process_score = p.to_float | p.round_to(1) | p.multiply(100) | p.to_int + assert process_score("0.856") == 90 + + # Extract and format + extract_domain = p.split("@") | p.last | p.upper + assert extract_domain("user@example.com") == "EXAMPLE.COM" + + # Complex nested data access + data = { + "users": [ + {"name": " john doe ", "score": "85.7"}, + {"name": "jane smith", "score": "92.3"}, + ] + } + + get_first_user_score = ( + p.get("users[0].score") | p.to_float | p.round_to(0) | p.to_int + ) + assert get_first_user_score(data) == 86 diff --git a/chidian-py/tests/test_property_based.py b/tests/test_property_based.py similarity index 52% rename from chidian-py/tests/test_property_based.py rename to tests/test_property_based.py index 51d1110..0df1640 100644 --- a/chidian-py/tests/test_property_based.py +++ b/tests/test_property_based.py @@ -1,10 +1,11 @@ """Property-based tests for core chidian functionality.""" -import chidian.partials as p -from chidian import get from hypothesis import given from hypothesis import strategies as st +import chidian.partials as p +from chidian import get + # Custom strategies for valid paths @st.composite @@ -79,91 +80,85 @@ def test_get_always_returns_value_or_none(self, data_and_paths): result = get(data, "nonexistent.path") assert result is None - @given(st.text(max_size=50), st.text(max_size=50)) - def test_template_formatting(self, value1, value2): - """Test that template always returns a string.""" - template_func = p.template("{} {}") - result = template_func(value1, value2) - assert isinstance(result, str) - # Values should appear in result (as strings) - assert str(value1) in result - assert str(value2) in result - - @given( - st.lists( - st.text( - alphabet=st.characters(whitelist_categories=("Ll", "Lu")), - min_size=1, - max_size=8, - ), - min_size=1, - max_size=3, - ) - ) - def test_coalesce_returns_value(self, path_names): - """Test that coalesce always returns something.""" - # Filter out empty strings - valid_paths = [p for p in path_names if p] - if not valid_paths: - return # Skip if no valid paths - - # Create data with at least one non-empty value - data = {valid_paths[0]: "found_value"} - - coalesce = p.coalesce(*valid_paths, default="DEFAULT") - result = coalesce(data) - - # Should return either the found value or the default - assert result == "found_value" or result == "DEFAULT" - - @given( - st.lists( - st.text( - alphabet=st.characters(whitelist_categories=("Ll", "Lu")), - min_size=1, - max_size=8, - ), - min_size=1, - max_size=3, - ) - ) - def test_flatten_returns_string(self, path_names): - """Test that flatten always returns a string.""" - # Filter out empty strings - valid_paths = [p for p in path_names if p] - if not valid_paths: - return # Skip if no valid paths - - # Create data with lists for each path - data = {path: [f"value_{i}" for i in range(2)] for path in valid_paths} - - flatten_func = p.flatten(valid_paths, delimiter=", ") - result = flatten_func(data) - - assert isinstance(result, str) + @given(st.text(alphabet=st.characters(blacklist_categories=["Z"]), max_size=50)) + def test_string_operations_property(self, text_value): + """Test that string operations are consistent.""" + # Test upper/lower are reversible + upper_result = p.upper(text_value) + lower_result = p.lower(text_value) + + assert isinstance(upper_result, str) + assert isinstance(lower_result, str) + + # Test strip functionality (strip removes all surrounding whitespace) + if text_value: + padded = f" {text_value} " + stripped = p.strip(padded) + assert isinstance(stripped, str) + # strip removes leading/trailing whitespace from the original value too + assert stripped == text_value.strip() + + @given(st.integers(min_value=1, max_value=100)) + def test_arithmetic_operations_property(self, value): + """Test that arithmetic operations are consistent.""" + # Test basic arithmetic properties + add_result = p.add(10)(value) + assert add_result == value + 10 + + multiply_result = p.multiply(2)(value) + assert multiply_result == value * 2 + + # Test chain consistency with ChainableFunction + add_chainable = p.ChainableFunction(p.add(5)) + multiply_chainable = p.ChainableFunction(p.multiply(2)) + chain_result = (add_chainable | multiply_chainable)(value) + assert chain_result == (value + 5) * 2 + + @given(st.lists(st.text(min_size=1, max_size=10), min_size=1, max_size=5)) + def test_array_operations_property(self, test_list): + """Test that array operations work consistently.""" + # Test first/last + first_result = p.first(test_list) + last_result = p.last(test_list) + + assert first_result == test_list[0] + assert last_result == test_list[-1] + + # Test length + length_result = p.length(test_list) + assert length_result == len(test_list) + + # Test at_index + if len(test_list) > 2: + middle_result = p.at_index(1)(test_list) + assert middle_result == test_list[1] @given(st.dictionaries(st.text(max_size=20), st.text(max_size=20), min_size=1)) - def test_case_matching(self, cases): - """Test that case matching works reliably.""" - if not cases: + def test_boolean_operations_property(self, test_dict): + """Test that boolean operations work consistently.""" + if not test_dict: return - # Pick a key that exists in cases - test_key = list(cases.keys())[0] - expected_value = cases[test_key] + # Pick a key that exists + test_key = list(test_dict.keys())[0] + test_value = test_dict[test_key] - case_func = p.case(cases, default="DEFAULT") + # Test equals + equals_func = p.equals(test_value) + assert equals_func(test_value) is True + assert equals_func("different_value") is False - # Should return the expected value for exact match - result = case_func(test_key) - assert result == expected_value + # Test contains + contains_func = p.contains(test_key) + assert contains_func(test_dict) is True + assert contains_func({}) is False @given(st.text(max_size=100)) def test_partials_chaining(self, input_text): """Test that partials chaining doesn't crash.""" # Simple chain that should always work try: - chain = p.strip >> p.lower >> p.upper + chain = p.strip | p.lower | p.upper result = chain(input_text) assert isinstance(result, str) assert result == input_text.strip().lower().upper() @@ -195,7 +190,7 @@ def test_string_partials(self, text): assert isinstance(p.strip(text), str) # Chain them - result = (p.strip >> p.lower >> p.upper)(text) + result = (p.strip | p.lower | p.upper)(text) assert isinstance(result, str) @@ -215,26 +210,33 @@ def test_get_robustness(self, data): # Result should be None or a valid type assert result is None or isinstance(result, (str, int, list, dict, bool, float)) - @given(st.text(), st.text()) - def test_template_edge_cases(self, template_str, value): - """Test template with various inputs.""" - if "{}" in template_str: - try: - template_func = p.template(template_str) - result = template_func(value) - assert isinstance(result, str) - except (ValueError, IndexError): - # Template formatting errors are acceptable - pass - - @given(st.lists(st.text(), max_size=5)) - def test_flatten_empty_inputs(self, paths): - """Test flatten with various path combinations.""" - # Should not crash even with empty or invalid paths + @given(st.text(min_size=1), st.text()) + def test_type_conversion_edge_cases(self, separator, input_value): + """Test type conversions with various inputs.""" + # Test string conversions are robust + str_result = p.to_str(input_value) + assert isinstance(str_result, str) + + # Test split doesn't crash + try: + split_func = p.split(separator) + result = split_func(str_result) + assert isinstance(result, list) + except (AttributeError, ValueError): + # Some edge cases may fail, which is acceptable + pass + + @given(st.lists(st.text(min_size=1, max_size=10), max_size=5)) + def test_join_operations(self, text_list): + """Test join operations with various inputs.""" + # Should not crash even with empty or invalid inputs try: - flatten_func = p.flatten(paths, delimiter=", ") - result = flatten_func({}) + join_func = p.join(", ") + result = join_func(text_list) assert isinstance(result, str) - except Exception: - # Some path combinations might be invalid, that's okay + if text_list: + # If we have content, result should contain it + assert len(result) >= 0 + except (AttributeError, TypeError): + # Some combinations may fail, which is acceptable for edge cases pass diff --git a/chidian-py/tests/test_put.py b/tests/test_put.py similarity index 99% rename from chidian-py/tests/test_put.py rename to tests/test_put.py index 1f0ddd6..fccd0f2 100644 --- a/chidian-py/tests/test_put.py +++ b/tests/test_put.py @@ -3,7 +3,8 @@ from typing import Any import pytest -from chidian.lib import put + +from chidian import put class TestPutBasic: diff --git a/tests/test_table.py b/tests/test_table.py new file mode 100644 index 0000000..0740498 --- /dev/null +++ b/tests/test_table.py @@ -0,0 +1,581 @@ +from chidian.table import Table + + +def test_basic_table(): + """Test basic Table functionality.""" + # Create from list + rows = [ + {"id": "p1", "name": "John", "age": 30}, + {"id": "p2", "name": "Jane", "age": 25}, + {"id": "p3", "name": "Bob", "age": 35}, + ] + + table = Table(rows) + + # Test length + assert len(table) == 3 + + # Test iteration + assert list(table) == rows + + +def test_dict_indexing(): + """Test dict-like access with $ syntax.""" + table = Table( + [ + {"id": "p1", "name": "John", "age": 30}, + {"id": "p2", "name": "Jane", "age": 25}, + ] + ) + + # Test basic dict access + assert table["$0"]["name"] == "John" + assert table["$1"]["name"] == "Jane" + + # Test dict.get() method (inherited) + assert dict.get(table, "$0")["name"] == "John" + assert dict.get(table, "$nonexistent") is None + assert dict.get(table, "$nonexistent", "default") == "default" + + # Test with custom keys + table.append({"name": "Bob", "age": 35}, custom_key="bob") + assert table["$bob"]["name"] == "Bob" + + +def test_get_method_basic(): + """Test Table.get method for extracting values from all rows.""" + table = Table( + [ + {"name": "John", "age": 30, "city": "NYC"}, + {"name": "Jane", "age": 25, "city": "LA"}, + {"name": "Bob", "age": 35}, # Note: no city + ] + ) + + # Test simple field extraction + assert table.get("name") == ["John", "Jane", "Bob"] + assert table.get("age") == [30, 25, 35] + + # Test with missing fields and default + assert table.get("city") == ["NYC", "LA", None] + assert table.get("city", default="Unknown") == ["NYC", "LA", "Unknown"] + + # Test completely missing field + assert table.get("phone") == [None, None, None] + assert table.get("phone", default="N/A") == ["N/A", "N/A", "N/A"] + + +def test_get_method_nested(): + """Test Table.get method with nested paths.""" + table = Table( + [ + {"patient": {"id": "123", "name": "John"}, "status": "active"}, + {"patient": {"id": "456", "name": "Jane"}, "status": "inactive"}, + {"patient": {"id": "789", "name": "Bob"}, "status": "active"}, + ] + ) + + # Test nested path extraction + assert table.get("patient.id") == ["123", "456", "789"] + assert table.get("patient.name") == ["John", "Jane", "Bob"] + assert table.get("status") == ["active", "inactive", "active"] + + # Test missing nested paths + assert table.get("patient.age") == [None, None, None] + assert table.get("patient.age", default=0) == [0, 0, 0] + + # Test partially missing nested structure + table_mixed = Table( + [ + {"patient": {"id": "123", "name": "John"}}, + {"status": "active"}, # No patient object + {"patient": {"id": "789"}}, # No name + ] + ) + assert table_mixed.get("patient.name") == ["John", None, None] + assert table_mixed.get("patient.name", default="Unknown") == [ + "John", + "Unknown", + "Unknown", + ] + + +def test_filter_method(): + """Test the filter method.""" + table = Table( + [ + {"name": "John", "age": 30, "active": True}, + {"name": "Jane", "age": 25, "active": False}, + {"name": "Bob", "age": 35, "active": True}, + ] + ) + table.append({"name": "Alice", "age": 28, "active": True}, custom_key="alice") + + # Filter by active status + active_table = table.filter(lambda x: x.get("active", False)) + assert len(active_table) == 3 + + # Check that new table has proper $ keys + assert "$0" in active_table + assert "$1" in active_table + assert "$2" in active_table + assert active_table["$0"]["name"] == "John" + assert active_table["$1"]["name"] == "Bob" + assert active_table["$2"]["name"] == "Alice" + + # Filter by age + young_table = table.filter(lambda x: x.get("age", 0) < 30) + assert len(young_table) == 2 + assert list(young_table)[0]["name"] == "Jane" + assert list(young_table)[1]["name"] == "Alice" + + +def test_map_method(): + """Test the map method.""" + table = Table([{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]) + + # Transform to add computed field + enhanced = table.map(lambda x: {**x, "adult": x.get("age", 0) >= 18}) + + assert all("adult" in row for row in enhanced) + assert all(row["adult"] is True for row in enhanced) + + +def test_columns_property(): + """Test the columns property.""" + table = Table( + [ + {"name": "John", "age": 30}, + {"name": "Jane", "city": "NYC"}, + {"id": "123", "name": "Bob", "age": 25, "country": "USA"}, + ] + ) + + expected_columns = {"name", "age", "city", "id", "country"} + assert table.columns == expected_columns + + +def test_to_list_to_dict(): + """Test conversion methods.""" + rows = [{"id": 1, "name": "Test"}, {"id": 2, "name": "Another"}] + table = Table(rows) + + # Test to_list + assert table.to_list() == rows + + # Test to_dict + result_dict = table.to_dict() + assert "$0" in result_dict + assert "$1" in result_dict + assert result_dict["$0"] == {"id": 1, "name": "Test"} + assert result_dict["$1"] == {"id": 2, "name": "Another"} + + +def test_append_method(): + """Test appending rows to table.""" + table = Table() + + # Append with auto-generated key + table.append({"name": "John"}) + assert len(table) == 1 + assert table["$0"]["name"] == "John" + + # Append with specific key (should get $ prefix) + table.append({"name": "Jane"}, custom_key="jane_key") + assert table["$jane_key"]["name"] == "Jane" + assert len(table) == 2 + + # Append another auto-keyed row + table.append({"name": "Bob"}) + assert table["$2"]["name"] == "Bob" + assert len(table) == 3 + + # Test accessing named row with dict access + assert table["$jane_key"]["name"] == "Jane" + + +def test_unique_method(): + """Test unique values extraction.""" + table = Table( + [ + {"name": "John", "city": "NYC"}, + {"name": "Jane", "city": "LA"}, + {"name": "Bob", "city": "NYC"}, + {"name": "Alice", "city": "Chicago"}, + {"name": "Charlie", "city": "NYC"}, + ] + ) + + unique_cities = table.unique("city") + assert set(unique_cities) == {"NYC", "LA", "Chicago"} + assert len(unique_cities) == 3 # Should preserve order and uniqueness + + unique_names = table.unique("name") + assert len(unique_names) == 5 # All names are unique + + +def test_group_by_method(): + """Test grouping by a column.""" + table = Table( + [ + {"name": "John", "city": "NYC", "age": 30}, + {"name": "Jane", "city": "LA", "age": 25}, + {"name": "Bob", "city": "NYC", "age": 35}, + {"name": "Alice", "city": "Chicago", "age": 28}, + {"name": "Charlie", "city": "NYC", "age": 40}, + ] + ) + + grouped = table.group_by("city") + + assert "NYC" in grouped + assert "LA" in grouped + assert "Chicago" in grouped + + nyc_table = grouped["NYC"] + assert len(nyc_table) == 3 + assert nyc_table.get("name") == ["John", "Bob", "Charlie"] + + la_table = grouped["LA"] + assert len(la_table) == 1 + assert la_table.get("name") == ["Jane"] + + chicago_table = grouped["Chicago"] + assert len(chicago_table) == 1 + assert chicago_table.get("name") == ["Alice"] + + +def test_head_tail_methods(): + """Test head and tail methods.""" + table = Table([{"id": i, "name": f"Person{i}"} for i in range(10)]) + + # Test head + head_3 = table.head(3) + assert len(head_3) == 3 + assert head_3.get("id") == [0, 1, 2] + + head_default = table.head() + assert len(head_default) == 5 # Default is 5 + assert head_default.get("id") == [0, 1, 2, 3, 4] + + # Test tail + tail_3 = table.tail(3) + assert len(tail_3) == 3 + assert tail_3.get("id") == [7, 8, 9] + + tail_default = table.tail() + assert len(tail_default) == 5 # Default is 5 + assert tail_default.get("id") == [5, 6, 7, 8, 9] + + +def test_get_method_arrays(): + """Test Table.get method with array paths and wildcards.""" + table = Table( + [ + { + "patient": { + "id": "123", + "identifiers": [ + {"system": "MRN", "value": "MRN123"}, + {"system": "SSN", "value": "SSN456"}, + ], + }, + "encounters": [ + {"id": "e1", "date": "2024-01-01"}, + {"id": "e2", "date": "2024-02-01"}, + ], + }, + { + "patient": { + "id": "456", + "identifiers": [ + {"system": "MRN", "value": "MRN789"}, + ], + }, + "encounters": [], # Empty encounters + }, + ] + ) + + # Test array index access + assert table.get("patient.identifiers[0].value") == ["MRN123", "MRN789"] + assert table.get("patient.identifiers[1].value") == ["SSN456", None] + + # Test wildcard array access + assert table.get("encounters[*].id") == [["e1", "e2"], []] + # Note: When wildcard matches single item, it returns the item directly, not wrapped in a list + assert table.get("patient.identifiers[*].system") == [["MRN", "SSN"], "MRN"] + + # Test getting entire array + identifiers = table.get("patient.identifiers") + assert len(identifiers) == 2 + assert len(identifiers[0]) == 2 # First patient has 2 identifiers + assert len(identifiers[1]) == 1 # Second patient has 1 identifier + + # Test with missing array paths + assert table.get("patient.addresses[0].city") == [None, None] + assert table.get("patient.addresses[0].city", default="Unknown") == [ + "Unknown", + "Unknown", + ] + + +def test_get_method_dollar_syntax(): + """Test Table.get method with $-prefixed paths for specific row access.""" + table = Table( + [ + {"name": "John", "age": 30, "city": "NYC"}, + {"name": "Jane", "age": 25, "city": "LA"}, + {"name": "Bob", "age": 35}, # Note: no city + ] + ) + + # Test basic $-prefixed access + assert table.get("$0.name") == "John" + assert table.get("$1.age") == 25 + assert table.get("$2.name") == "Bob" + + # Test missing fields with $-prefix + assert table.get("$2.city") is None + assert table.get("$2.city", default="Unknown") == "Unknown" + + # Test non-existent row keys + assert table.get("$99.name") is None + assert table.get("$99.name", default="N/A") == "N/A" + + # Test getting entire row with just $key + row0 = table.get("$0") + assert row0 == {"name": "John", "age": 30, "city": "NYC"} + + # Test with custom keys + table.append({"name": "Alice", "age": 28}, custom_key="alice") + assert table.get("$alice.name") == "Alice" + assert table.get("$alice.age") == 28 + + # Test nested paths with $-prefix + table2 = Table( + [ + {"patient": {"id": "123", "name": "John"}}, + {"patient": {"id": "456", "name": "Jane"}}, + ] + ) + assert table2.get("$0.patient.id") == "123" + assert table2.get("$1.patient.name") == "Jane" + + # Compare with non-$ behavior (returns list) + assert table.get("name") == ["John", "Jane", "Bob", "Alice"] + assert table2.get("patient.id") == ["123", "456"] + + +def test_get_method_edge_cases(): + """Test Table.get method edge cases.""" + # Test with empty table + empty_table = Table() + assert empty_table.get("name") == [] + assert empty_table.get("name", default="N/A") == [] + + # Test with heterogeneous data types + table = Table( + [ + {"value": "string"}, + {"value": 123}, + {"value": True}, + {"value": None}, + {"value": [1, 2, 3]}, + {"value": {"nested": "object"}}, + ] + ) + + values = table.get("value") + assert values == ["string", 123, True, None, [1, 2, 3], {"nested": "object"}] + + # Test deep nesting with mixed types + table2 = Table( + [ + {"data": {"level1": {"level2": {"level3": "deep"}}}}, + {"data": {"level1": "shallow"}}, # Not nested as deep + {"data": None}, # Null data + {}, # Missing data entirely + ] + ) + + assert table2.get("data.level1.level2.level3") == ["deep", None, None, None] + assert table2.get("data.level1") == [ + {"level2": {"level3": "deep"}}, + "shallow", + None, + None, + ] + + +def test_init_with_dict(): + """Test initialization with dict instead of list.""" + rows = {"user1": {"name": "John", "age": 30}, "user2": {"name": "Jane", "age": 25}} + + table = Table(rows) + + assert len(table) == 2 + assert "$user1" in table + assert "$user2" in table + assert table["$user1"]["name"] == "John" + assert table["$user2"]["name"] == "Jane" + + +def test_empty_table(): + """Test empty table initialization.""" + table = Table() + + assert len(table) == 0 + assert table.columns == set() + assert table.to_list() == [] + assert table.to_dict() == {} + + +# DSL Tests (TDD - these will fail until DSL is implemented) + + +def test_select_dsl_basic(): + """Test basic select DSL functionality.""" + table = Table( + [ + {"name": "John", "age": 30, "city": "NYC"}, + {"name": "Jane", "age": 25, "city": "LA"}, + ] + ) + + # Test specific column selection + result = table.select("name, age") + assert len(result) == 2 + assert result.get("name") == ["John", "Jane"] + assert result.get("age") == [30, 25] + assert "city" not in result.columns + + # Test wildcard selection + result = table.select("*") + assert len(result) == 2 + assert result.columns == {"name", "age", "city"} + + +def test_select_dsl_with_renaming(): + """Test select DSL with column renaming.""" + table = Table( + [ + {"patient": {"id": "123", "name": "John"}}, + {"patient": {"id": "456", "name": "Jane"}}, + ] + ) + + # Test column renaming + result = table.select("patient.id -> patient_id, patient.name -> patient_name") + assert len(result) == 2 + assert result.get("patient_id") == ["123", "456"] + assert result.get("patient_name") == ["John", "Jane"] + assert result.columns == {"patient_id", "patient_name"} + + +def test_filter_dsl_basic(): + """Test basic filter DSL functionality.""" + table = Table( + [ + {"name": "John", "age": 30}, + {"name": "Jane", "age": 25}, + {"name": "Bob", "age": 35}, + ] + ) + + # Test numeric comparison + result = table.filter("age > 26") + assert len(result) == 2 + assert result.get("name") == ["John", "Bob"] + + # Test string equality + result = table.filter("name = 'John'") + assert len(result) == 1 + assert result.get("name") == ["John"] + + +def test_filter_dsl_complex(): + """Test complex filter DSL functionality.""" + table = Table( + [ + {"name": "John", "age": 30, "status": "active"}, + {"name": "Jane", "age": 25, "status": "inactive"}, + {"name": "Bob", "age": 35, "status": "active"}, + ] + ) + + # Test AND operator + result = table.filter("status = 'active' AND age >= 30") + assert len(result) == 2 + assert result.get("name") == ["John", "Bob"] + + # Test OR operator + result = table.filter("age > 25 OR name = 'Jane'") + assert len(result) == 3 # All rows match + + +def test_filter_dsl_nested_paths(): + """Test filter DSL with nested paths.""" + table = Table( + [ + {"patient": {"name": "John", "addresses": [{"city": "NYC"}]}}, + {"patient": {"name": "Jane", "addresses": [{"city": "LA"}]}}, + ] + ) + + # Test nested path with array index + result = table.filter("patient.addresses[0].city = 'NYC'") + assert len(result) == 1 + assert result.get("patient.name") == ["John"] + + # Test CONTAINS with wildcard - note: this returns list from wildcard + table2 = Table( + [ + {"name": "John", "cities": ["NYC", "Boston"]}, + {"name": "Jane", "cities": ["LA", "SF"]}, + ] + ) + result = table2.filter("cities CONTAINS 'NYC'") + assert len(result) == 1 + assert result.get("name") == ["John"] + + +# Integration tests showing expected DSL behavior (will pass once implemented) + + +def test_full_workflow_with_dsl(): + """Test complete workflow combining functional and DSL APIs.""" + table = Table( + [ + {"name": "John", "age": 30, "city": "NYC", "department": "Engineering"}, + {"name": "Jane", "age": 25, "city": "LA", "department": "Marketing"}, + {"name": "Bob", "age": 35, "city": "NYC", "department": "Engineering"}, + {"name": "Alice", "age": 28, "city": "Chicago", "department": "Sales"}, + ] + ) + + # This workflow combines DSL and functional APIs: + # 1. Filter for NYC employees over 25 + # 2. Select specific columns with renaming + # 3. Add computed field + # 4. Get unique departments + + # Step 1: DSL filter + nyc_employees = table.filter("city = 'NYC' AND age > 25") + assert len(nyc_employees) == 2 + + # Step 2: DSL select + selected = nyc_employees.select("name -> employee_name, department, age") + assert len(selected) == 2 + assert selected.columns == {"employee_name", "department", "age"} + assert selected.get("employee_name") == ["John", "Bob"] + + # Step 3: Functional map + enhanced = selected.map( + lambda row: {**row, "seniority": "Senior" if row["age"] > 30 else "Junior"} + ) + assert len(enhanced) == 2 + + # Step 4: Functional unique + departments = enhanced.unique("department") + assert departments == ["Engineering"] # Both NYC employees are in Engineering diff --git a/chidian-py/tests/test_seeds.py b/tests/test_types.py similarity index 52% rename from chidian-py/tests/test_seeds.py rename to tests/test_types.py index 764d801..8b5d347 100644 --- a/chidian-py/tests/test_seeds.py +++ b/tests/test_types.py @@ -1,10 +1,8 @@ -"""Comprehensive tests for SEED operations with Piper and DataMapping.""" +"""Comprehensive tests for special types (DROP, KEEP) with Mapper and DataMapping.""" from typing import Any -from chidian import DataMapping, Piper, get -from chidian.seeds import DROP, KEEP - +from chidian import DROP, KEEP, DataMapping, Mapper, get from tests.structstest import ( ComplexPersonData, FlatPersonData, @@ -15,7 +13,7 @@ class TestSeedProcessing: - """Test SEED value processing independently.""" + """Test special type value processing independently.""" def test_drop_enum_values(self) -> None: """Test DROP enum values and level property.""" @@ -28,9 +26,11 @@ def test_drop_enum_values(self) -> None: assert DROP.PARENT.level == -2 def test_drop_process_method(self) -> None: - """Test DROP process method returns self.""" + """Test DROP process method returns itself.""" result = DROP.THIS_OBJECT.process({"test": "data"}) - assert result is DROP.THIS_OBJECT + # The Python implementation returns the DROP enum itself + assert result == DROP.THIS_OBJECT + assert result.level == DROP.THIS_OBJECT.value def test_keep_value_preservation(self) -> None: """Test KEEP preserves values correctly.""" @@ -54,20 +54,24 @@ def test_keep_complex_values(self) -> None: class TestSeedsWithDataMapping: - """Test SEED integration with DataMapping and Piper.""" + """Test special type integration with DataMapping and Mapper.""" - def test_simple_data_flow_without_seeds(self, simple_data: dict[str, Any]) -> None: - """Test baseline data flow without any SEED objects.""" + def test_simple_data_flow_without_special_types( + self, simple_data: dict[str, Any] + ) -> None: + """Test baseline data flow without any special type objects.""" + from chidian.partials import get as p_get - def mapping(data: dict) -> dict: - return { - "patient_id": get(data, "data.patient.id"), - "is_active": get(data, "data.patient.active"), - } + mapping = { + "patient_id": p_get("data.patient.id"), + "is_active": p_get("data.patient.active"), + } - data_mapping = DataMapping(SourceData, SimpleTarget, mapping) - piper: Piper = Piper(data_mapping) - result = piper(SourceData.model_validate(simple_data)) + data_mapping = DataMapping( + transformations=mapping, input_schema=SourceData, output_schema=SimpleTarget + ) + mapper = Mapper(data_mapping) + result = mapper(SourceData.model_validate(simple_data)) assert isinstance(result, SimpleTarget) assert result.patient_id == "abc123" @@ -76,26 +80,27 @@ def mapping(data: dict) -> dict: def test_keep_in_transformation(self) -> None: """Test KEEP objects in data transformations. - Note: This test demonstrates that SEED processing is not yet implemented - in the current DataMapping/Piper system. KEEP objects need to be processed + Note: This test demonstrates that special type processing is not yet implemented + in the current DataMapping/Mapper system. KEEP objects need to be processed to extract their values before Pydantic validation. """ + # For now, manually process KEEP objects since automatic processing isn't implemented + keep_obj = KEEP("processed_string") - def mapping(_data: dict) -> dict: - # For now, manually process KEEP objects since automatic processing isn't implemented - keep_obj = KEEP("processed_string") - processed_value = keep_obj.process({}) # Manually call process - - return { - "processed_value": processed_value, - "regular_value": "regular_string", - } + mapping = { + "processed_value": lambda _data: keep_obj.process({}), + "regular_value": lambda _data: "regular_string", + } - data_mapping = DataMapping(SourceData, KeepTestTarget, mapping) - piper: Piper = Piper(data_mapping) + data_mapping = DataMapping( + transformations=mapping, + input_schema=SourceData, + output_schema=KeepTestTarget, + ) + mapper = Mapper(data_mapping) source = SourceData(data={}) - result = piper(source) + result = mapper(source) # Manually processed KEEP objects work assert isinstance(result, KeepTestTarget) @@ -105,20 +110,19 @@ def mapping(_data: dict) -> dict: def test_complex_transformation_with_a_b_data(self, test_A: dict[str, Any]) -> None: """Test complex transformation using A.json data structure.""" - def complex_to_flat_mapping(data: dict) -> dict: - """Transform complex nested structure to flat format.""" - # Extract name parts + def full_name_transform(data: dict) -> str: + """Build full name from name parts.""" first_name = get(data, "name.first", default="") given_names = get(data, "name.given", default=[]) suffix = get(data, "name.suffix", default="") - # Build full name name_parts = [first_name] + given_names if suffix: name_parts.append(suffix) - full_name = " ".join(filter(None, name_parts)) + return " ".join(filter(None, name_parts)) - # Format current address + def current_address_transform(data: dict) -> str: + """Format current address.""" current_addr = get(data, "address.current", default={}) street_lines = get(current_addr, "street", default=[]) city = get(current_addr, "city", default="") @@ -126,41 +130,45 @@ def complex_to_flat_mapping(data: dict) -> dict: postal = get(current_addr, "postal_code", default="") country = get(current_addr, "country", default="") - current_address = "\n".join( + return "\n".join( filter(None, street_lines + [city, state, postal, country]) ) - # Get last previous address + def last_previous_address_transform(data: dict) -> str: + """Get last previous address.""" previous_addrs = get(data, "address.previous", default=[]) - last_previous = "" - if previous_addrs: - last_addr = previous_addrs[-1] - prev_street = get(last_addr, "street", default=[]) - prev_city = get(last_addr, "city", default="") - prev_state = get(last_addr, "state", default="") - prev_postal = get(last_addr, "postal_code", default="") - prev_country = get(last_addr, "country", default="") - last_previous = "\n".join( - filter( - None, - prev_street - + [prev_city, prev_state, prev_postal, prev_country], - ) + if not previous_addrs: + return "" + + last_addr = previous_addrs[-1] + prev_street = get(last_addr, "street", default=[]) + prev_city = get(last_addr, "city", default="") + prev_state = get(last_addr, "state", default="") + prev_postal = get(last_addr, "postal_code", default="") + prev_country = get(last_addr, "country", default="") + + return "\n".join( + filter( + None, + prev_street + [prev_city, prev_state, prev_postal, prev_country], ) + ) - return { - "full_name": full_name, - "current_address": current_address, - "last_previous_address": last_previous, - } + mapping: dict[str, Any] = { + "full_name": full_name_transform, + "current_address": current_address_transform, + "last_previous_address": last_previous_address_transform, + } data_mapping = DataMapping( - ComplexPersonData, FlatPersonData, complex_to_flat_mapping + transformations=mapping, + input_schema=ComplexPersonData, + output_schema=FlatPersonData, ) - piper: Piper = Piper(data_mapping) + mapper = Mapper(data_mapping) source = ComplexPersonData.model_validate(test_A) - result = piper(source) + result = mapper(source) assert isinstance(result, FlatPersonData) assert "Bob" in result.full_name diff --git a/chidian-py/uv.lock b/uv.lock similarity index 63% rename from chidian-py/uv.lock rename to uv.lock index 0e4e679..449c708 100644 --- a/chidian-py/uv.lock +++ b/uv.lock @@ -28,8 +28,10 @@ wheels = [ [[package]] name = "chidian" +version = "0.1.0" source = { editable = "." } dependencies = [ + { name = "parsimonious" }, { name = "pydantic", version = "2.10.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, { name = "pydantic", version = "2.11.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, ] @@ -38,16 +40,22 @@ dependencies = [ dev = [ { name = "hypothesis", version = "6.113.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, { name = "hypothesis", version = "6.131.32", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, + { name = "mypy", version = "1.14.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "mypy", version = "1.16.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" }, { name = "pytest" }, { name = "ruff" }, ] [package.metadata] -requires-dist = [{ name = "pydantic", specifier = ">=2.10.6,<3.0.0" }] +requires-dist = [ + { name = "parsimonious", specifier = ">=0.10.0" }, + { name = "pydantic", specifier = ">=2.10.6,<3.0.0" }, +] [package.metadata.requires-dev] dev = [ { name = "hypothesis", specifier = ">=6.0.0" }, + { name = "mypy", specifier = ">=1.14.1" }, { name = "pytest", specifier = ">=8.3.5" }, { name = "ruff", specifier = ">=0.11.12" }, ] @@ -116,6 +124,116 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 }, ] +[[package]] +name = "mypy" +version = "1.14.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.9'", +] +dependencies = [ + { name = "mypy-extensions", marker = "python_full_version < '3.9'" }, + { name = "tomli", marker = "python_full_version < '3.9'" }, + { name = "typing-extensions", marker = "python_full_version < '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/eb/2c92d8ea1e684440f54fa49ac5d9a5f19967b7b472a281f419e69a8d228e/mypy-1.14.1.tar.gz", hash = "sha256:7ec88144fe9b510e8475ec2f5f251992690fcf89ccb4500b214b4226abcd32d6", size = 3216051 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/7a/87ae2adb31d68402da6da1e5f30c07ea6063e9f09b5e7cfc9dfa44075e74/mypy-1.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:52686e37cf13d559f668aa398dd7ddf1f92c5d613e4f8cb262be2fb4fedb0fcb", size = 11211002 }, + { url = "https://files.pythonhosted.org/packages/e1/23/eada4c38608b444618a132be0d199b280049ded278b24cbb9d3fc59658e4/mypy-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1fb545ca340537d4b45d3eecdb3def05e913299ca72c290326be19b3804b39c0", size = 10358400 }, + { url = "https://files.pythonhosted.org/packages/43/c9/d6785c6f66241c62fd2992b05057f404237deaad1566545e9f144ced07f5/mypy-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:90716d8b2d1f4cd503309788e51366f07c56635a3309b0f6a32547eaaa36a64d", size = 12095172 }, + { url = "https://files.pythonhosted.org/packages/c3/62/daa7e787770c83c52ce2aaf1a111eae5893de9e004743f51bfcad9e487ec/mypy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ae753f5c9fef278bcf12e1a564351764f2a6da579d4a81347e1d5a15819997b", size = 12828732 }, + { url = "https://files.pythonhosted.org/packages/1b/a2/5fb18318a3637f29f16f4e41340b795da14f4751ef4f51c99ff39ab62e52/mypy-1.14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e0fe0f5feaafcb04505bcf439e991c6d8f1bf8b15f12b05feeed96e9e7bf1427", size = 13012197 }, + { url = "https://files.pythonhosted.org/packages/28/99/e153ce39105d164b5f02c06c35c7ba958aaff50a2babba7d080988b03fe7/mypy-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:7d54bd85b925e501c555a3227f3ec0cfc54ee8b6930bd6141ec872d1c572f81f", size = 9780836 }, + { url = "https://files.pythonhosted.org/packages/da/11/a9422850fd506edbcdc7f6090682ecceaf1f87b9dd847f9df79942da8506/mypy-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f995e511de847791c3b11ed90084a7a0aafdc074ab88c5a9711622fe4751138c", size = 11120432 }, + { url = "https://files.pythonhosted.org/packages/b6/9e/47e450fd39078d9c02d620545b2cb37993a8a8bdf7db3652ace2f80521ca/mypy-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d64169ec3b8461311f8ce2fd2eb5d33e2d0f2c7b49116259c51d0d96edee48d1", size = 10279515 }, + { url = "https://files.pythonhosted.org/packages/01/b5/6c8d33bd0f851a7692a8bfe4ee75eb82b6983a3cf39e5e32a5d2a723f0c1/mypy-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba24549de7b89b6381b91fbc068d798192b1b5201987070319889e93038967a8", size = 12025791 }, + { url = "https://files.pythonhosted.org/packages/f0/4c/e10e2c46ea37cab5c471d0ddaaa9a434dc1d28650078ac1b56c2d7b9b2e4/mypy-1.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:183cf0a45457d28ff9d758730cd0210419ac27d4d3f285beda038c9083363b1f", size = 12749203 }, + { url = "https://files.pythonhosted.org/packages/88/55/beacb0c69beab2153a0f57671ec07861d27d735a0faff135a494cd4f5020/mypy-1.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f2a0ecc86378f45347f586e4163d1769dd81c5a223d577fe351f26b179e148b1", size = 12885900 }, + { url = "https://files.pythonhosted.org/packages/a2/75/8c93ff7f315c4d086a2dfcde02f713004357d70a163eddb6c56a6a5eff40/mypy-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:ad3301ebebec9e8ee7135d8e3109ca76c23752bac1e717bc84cd3836b4bf3eae", size = 9777869 }, + { url = "https://files.pythonhosted.org/packages/43/1b/b38c079609bb4627905b74fc6a49849835acf68547ac33d8ceb707de5f52/mypy-1.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:30ff5ef8519bbc2e18b3b54521ec319513a26f1bba19a7582e7b1f58a6e69f14", size = 11266668 }, + { url = "https://files.pythonhosted.org/packages/6b/75/2ed0d2964c1ffc9971c729f7a544e9cd34b2cdabbe2d11afd148d7838aa2/mypy-1.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cb9f255c18052343c70234907e2e532bc7e55a62565d64536dbc7706a20b78b9", size = 10254060 }, + { url = "https://files.pythonhosted.org/packages/a1/5f/7b8051552d4da3c51bbe8fcafffd76a6823779101a2b198d80886cd8f08e/mypy-1.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b4e3413e0bddea671012b063e27591b953d653209e7a4fa5e48759cda77ca11", size = 11933167 }, + { url = "https://files.pythonhosted.org/packages/04/90/f53971d3ac39d8b68bbaab9a4c6c58c8caa4d5fd3d587d16f5927eeeabe1/mypy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:553c293b1fbdebb6c3c4030589dab9fafb6dfa768995a453d8a5d3b23784af2e", size = 12864341 }, + { url = "https://files.pythonhosted.org/packages/03/d2/8bc0aeaaf2e88c977db41583559319f1821c069e943ada2701e86d0430b7/mypy-1.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fad79bfe3b65fe6a1efaed97b445c3d37f7be9fdc348bdb2d7cac75579607c89", size = 12972991 }, + { url = "https://files.pythonhosted.org/packages/6f/17/07815114b903b49b0f2cf7499f1c130e5aa459411596668267535fe9243c/mypy-1.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:8fa2220e54d2946e94ab6dbb3ba0a992795bd68b16dc852db33028df2b00191b", size = 9879016 }, + { url = "https://files.pythonhosted.org/packages/9e/15/bb6a686901f59222275ab228453de741185f9d54fecbaacec041679496c6/mypy-1.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:92c3ed5afb06c3a8e188cb5da4984cab9ec9a77ba956ee419c68a388b4595255", size = 11252097 }, + { url = "https://files.pythonhosted.org/packages/f8/b3/8b0f74dfd072c802b7fa368829defdf3ee1566ba74c32a2cb2403f68024c/mypy-1.14.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:dbec574648b3e25f43d23577309b16534431db4ddc09fda50841f1e34e64ed34", size = 10239728 }, + { url = "https://files.pythonhosted.org/packages/c5/9b/4fd95ab20c52bb5b8c03cc49169be5905d931de17edfe4d9d2986800b52e/mypy-1.14.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8c6d94b16d62eb3e947281aa7347d78236688e21081f11de976376cf010eb31a", size = 11924965 }, + { url = "https://files.pythonhosted.org/packages/56/9d/4a236b9c57f5d8f08ed346914b3f091a62dd7e19336b2b2a0d85485f82ff/mypy-1.14.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d4b19b03fdf54f3c5b2fa474c56b4c13c9dbfb9a2db4370ede7ec11a2c5927d9", size = 12867660 }, + { url = "https://files.pythonhosted.org/packages/40/88/a61a5497e2f68d9027de2bb139c7bb9abaeb1be1584649fa9d807f80a338/mypy-1.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0c911fde686394753fff899c409fd4e16e9b294c24bfd5e1ea4675deae1ac6fd", size = 12969198 }, + { url = "https://files.pythonhosted.org/packages/54/da/3d6fc5d92d324701b0c23fb413c853892bfe0e1dbe06c9138037d459756b/mypy-1.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:8b21525cb51671219f5307be85f7e646a153e5acc656e5cebf64bfa076c50107", size = 9885276 }, + { url = "https://files.pythonhosted.org/packages/39/02/1817328c1372be57c16148ce7d2bfcfa4a796bedaed897381b1aad9b267c/mypy-1.14.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7084fb8f1128c76cd9cf68fe5971b37072598e7c31b2f9f95586b65c741a9d31", size = 11143050 }, + { url = "https://files.pythonhosted.org/packages/b9/07/99db9a95ece5e58eee1dd87ca456a7e7b5ced6798fd78182c59c35a7587b/mypy-1.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8f845a00b4f420f693f870eaee5f3e2692fa84cc8514496114649cfa8fd5e2c6", size = 10321087 }, + { url = "https://files.pythonhosted.org/packages/9a/eb/85ea6086227b84bce79b3baf7f465b4732e0785830726ce4a51528173b71/mypy-1.14.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44bf464499f0e3a2d14d58b54674dee25c031703b2ffc35064bd0df2e0fac319", size = 12066766 }, + { url = "https://files.pythonhosted.org/packages/4b/bb/f01bebf76811475d66359c259eabe40766d2f8ac8b8250d4e224bb6df379/mypy-1.14.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c99f27732c0b7dc847adb21c9d47ce57eb48fa33a17bc6d7d5c5e9f9e7ae5bac", size = 12787111 }, + { url = "https://files.pythonhosted.org/packages/2f/c9/84837ff891edcb6dcc3c27d85ea52aab0c4a34740ff5f0ccc0eb87c56139/mypy-1.14.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:bce23c7377b43602baa0bd22ea3265c49b9ff0b76eb315d6c34721af4cdf1d9b", size = 12974331 }, + { url = "https://files.pythonhosted.org/packages/84/5f/901e18464e6a13f8949b4909535be3fa7f823291b8ab4e4b36cfe57d6769/mypy-1.14.1-cp38-cp38-win_amd64.whl", hash = "sha256:8edc07eeade7ebc771ff9cf6b211b9a7d93687ff892150cb5692e4f4272b0837", size = 9763210 }, + { url = "https://files.pythonhosted.org/packages/ca/1f/186d133ae2514633f8558e78cd658070ba686c0e9275c5a5c24a1e1f0d67/mypy-1.14.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3888a1816d69f7ab92092f785a462944b3ca16d7c470d564165fe703b0970c35", size = 11200493 }, + { url = "https://files.pythonhosted.org/packages/af/fc/4842485d034e38a4646cccd1369f6b1ccd7bc86989c52770d75d719a9941/mypy-1.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:46c756a444117c43ee984bd055db99e498bc613a70bbbc120272bd13ca579fbc", size = 10357702 }, + { url = "https://files.pythonhosted.org/packages/b4/e6/457b83f2d701e23869cfec013a48a12638f75b9d37612a9ddf99072c1051/mypy-1.14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:27fc248022907e72abfd8e22ab1f10e903915ff69961174784a3900a8cba9ad9", size = 12091104 }, + { url = "https://files.pythonhosted.org/packages/f1/bf/76a569158db678fee59f4fd30b8e7a0d75bcbaeef49edd882a0d63af6d66/mypy-1.14.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:499d6a72fb7e5de92218db961f1a66d5f11783f9ae549d214617edab5d4dbdbb", size = 12830167 }, + { url = "https://files.pythonhosted.org/packages/43/bc/0bc6b694b3103de9fed61867f1c8bd33336b913d16831431e7cb48ef1c92/mypy-1.14.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:57961db9795eb566dc1d1b4e9139ebc4c6b0cb6e7254ecde69d1552bf7613f60", size = 13013834 }, + { url = "https://files.pythonhosted.org/packages/b0/79/5f5ec47849b6df1e6943d5fd8e6632fbfc04b4fd4acfa5a5a9535d11b4e2/mypy-1.14.1-cp39-cp39-win_amd64.whl", hash = "sha256:07ba89fdcc9451f2ebb02853deb6aaaa3d2239a236669a63ab3801bbf923ef5c", size = 9781231 }, + { url = "https://files.pythonhosted.org/packages/a0/b5/32dd67b69a16d088e533962e5044e51004176a9952419de0370cdaead0f8/mypy-1.14.1-py3-none-any.whl", hash = "sha256:b66a60cc4073aeb8ae00057f9c1f64d49e90f918fbcef9a977eb121da8b8f1d1", size = 2752905 }, +] + +[[package]] +name = "mypy" +version = "1.16.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.9'", +] +dependencies = [ + { name = "mypy-extensions", marker = "python_full_version >= '3.9'" }, + { name = "pathspec", marker = "python_full_version >= '3.9'" }, + { name = "tomli", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version >= '3.9'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/69/92c7fa98112e4d9eb075a239caa4ef4649ad7d441545ccffbd5e34607cbb/mypy-1.16.1.tar.gz", hash = "sha256:6bd00a0a2094841c5e47e7374bb42b83d64c527a502e3334e1173a0c24437bab", size = 3324747 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/12/2bf23a80fcef5edb75de9a1e295d778e0f46ea89eb8b115818b663eff42b/mypy-1.16.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b4f0fed1022a63c6fec38f28b7fc77fca47fd490445c69d0a66266c59dd0b88a", size = 10958644 }, + { url = "https://files.pythonhosted.org/packages/08/50/bfe47b3b278eacf348291742fd5e6613bbc4b3434b72ce9361896417cfe5/mypy-1.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:86042bbf9f5a05ea000d3203cf87aa9d0ccf9a01f73f71c58979eb9249f46d72", size = 10087033 }, + { url = "https://files.pythonhosted.org/packages/21/de/40307c12fe25675a0776aaa2cdd2879cf30d99eec91b898de00228dc3ab5/mypy-1.16.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ea7469ee5902c95542bea7ee545f7006508c65c8c54b06dc2c92676ce526f3ea", size = 11875645 }, + { url = "https://files.pythonhosted.org/packages/a6/d8/85bdb59e4a98b7a31495bd8f1a4445d8ffc86cde4ab1f8c11d247c11aedc/mypy-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:352025753ef6a83cb9e7f2427319bb7875d1fdda8439d1e23de12ab164179574", size = 12616986 }, + { url = "https://files.pythonhosted.org/packages/0e/d0/bb25731158fa8f8ee9e068d3e94fcceb4971fedf1424248496292512afe9/mypy-1.16.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ff9fa5b16e4c1364eb89a4d16bcda9987f05d39604e1e6c35378a2987c1aac2d", size = 12878632 }, + { url = "https://files.pythonhosted.org/packages/2d/11/822a9beb7a2b825c0cb06132ca0a5183f8327a5e23ef89717c9474ba0bc6/mypy-1.16.1-cp310-cp310-win_amd64.whl", hash = "sha256:1256688e284632382f8f3b9e2123df7d279f603c561f099758e66dd6ed4e8bd6", size = 9484391 }, + { url = "https://files.pythonhosted.org/packages/9a/61/ec1245aa1c325cb7a6c0f8570a2eee3bfc40fa90d19b1267f8e50b5c8645/mypy-1.16.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:472e4e4c100062488ec643f6162dd0d5208e33e2f34544e1fc931372e806c0cc", size = 10890557 }, + { url = "https://files.pythonhosted.org/packages/6b/bb/6eccc0ba0aa0c7a87df24e73f0ad34170514abd8162eb0c75fd7128171fb/mypy-1.16.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea16e2a7d2714277e349e24d19a782a663a34ed60864006e8585db08f8ad1782", size = 10012921 }, + { url = "https://files.pythonhosted.org/packages/5f/80/b337a12e2006715f99f529e732c5f6a8c143bb58c92bb142d5ab380963a5/mypy-1.16.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:08e850ea22adc4d8a4014651575567b0318ede51e8e9fe7a68f25391af699507", size = 11802887 }, + { url = "https://files.pythonhosted.org/packages/d9/59/f7af072d09793d581a745a25737c7c0a945760036b16aeb620f658a017af/mypy-1.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22d76a63a42619bfb90122889b903519149879ddbf2ba4251834727944c8baca", size = 12531658 }, + { url = "https://files.pythonhosted.org/packages/82/c4/607672f2d6c0254b94a646cfc45ad589dd71b04aa1f3d642b840f7cce06c/mypy-1.16.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2c7ce0662b6b9dc8f4ed86eb7a5d505ee3298c04b40ec13b30e572c0e5ae17c4", size = 12732486 }, + { url = "https://files.pythonhosted.org/packages/b6/5e/136555ec1d80df877a707cebf9081bd3a9f397dedc1ab9750518d87489ec/mypy-1.16.1-cp311-cp311-win_amd64.whl", hash = "sha256:211287e98e05352a2e1d4e8759c5490925a7c784ddc84207f4714822f8cf99b6", size = 9479482 }, + { url = "https://files.pythonhosted.org/packages/b4/d6/39482e5fcc724c15bf6280ff5806548c7185e0c090712a3736ed4d07e8b7/mypy-1.16.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:af4792433f09575d9eeca5c63d7d90ca4aeceda9d8355e136f80f8967639183d", size = 11066493 }, + { url = "https://files.pythonhosted.org/packages/e6/e5/26c347890efc6b757f4d5bb83f4a0cf5958b8cf49c938ac99b8b72b420a6/mypy-1.16.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:66df38405fd8466ce3517eda1f6640611a0b8e70895e2a9462d1d4323c5eb4b9", size = 10081687 }, + { url = "https://files.pythonhosted.org/packages/44/c7/b5cb264c97b86914487d6a24bd8688c0172e37ec0f43e93b9691cae9468b/mypy-1.16.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44e7acddb3c48bd2713994d098729494117803616e116032af192871aed80b79", size = 11839723 }, + { url = "https://files.pythonhosted.org/packages/15/f8/491997a9b8a554204f834ed4816bda813aefda31cf873bb099deee3c9a99/mypy-1.16.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ab5eca37b50188163fa7c1b73c685ac66c4e9bdee4a85c9adac0e91d8895e15", size = 12722980 }, + { url = "https://files.pythonhosted.org/packages/df/f0/2bd41e174b5fd93bc9de9a28e4fb673113633b8a7f3a607fa4a73595e468/mypy-1.16.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb6229b2c9086247e21a83c309754b9058b438704ad2f6807f0d8227f6ebdd", size = 12903328 }, + { url = "https://files.pythonhosted.org/packages/61/81/5572108a7bec2c46b8aff7e9b524f371fe6ab5efb534d38d6b37b5490da8/mypy-1.16.1-cp312-cp312-win_amd64.whl", hash = "sha256:1f0435cf920e287ff68af3d10a118a73f212deb2ce087619eb4e648116d1fe9b", size = 9562321 }, + { url = "https://files.pythonhosted.org/packages/28/e3/96964af4a75a949e67df4b95318fe2b7427ac8189bbc3ef28f92a1c5bc56/mypy-1.16.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ddc91eb318c8751c69ddb200a5937f1232ee8efb4e64e9f4bc475a33719de438", size = 11063480 }, + { url = "https://files.pythonhosted.org/packages/f5/4d/cd1a42b8e5be278fab7010fb289d9307a63e07153f0ae1510a3d7b703193/mypy-1.16.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:87ff2c13d58bdc4bbe7dc0dedfe622c0f04e2cb2a492269f3b418df2de05c536", size = 10090538 }, + { url = "https://files.pythonhosted.org/packages/c9/4f/c3c6b4b66374b5f68bab07c8cabd63a049ff69796b844bc759a0ca99bb2a/mypy-1.16.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a7cfb0fe29fe5a9841b7c8ee6dffb52382c45acdf68f032145b75620acfbd6f", size = 11836839 }, + { url = "https://files.pythonhosted.org/packages/b4/7e/81ca3b074021ad9775e5cb97ebe0089c0f13684b066a750b7dc208438403/mypy-1.16.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:051e1677689c9d9578b9c7f4d206d763f9bbd95723cd1416fad50db49d52f359", size = 12715634 }, + { url = "https://files.pythonhosted.org/packages/e9/95/bdd40c8be346fa4c70edb4081d727a54d0a05382d84966869738cfa8a497/mypy-1.16.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d5d2309511cc56c021b4b4e462907c2b12f669b2dbeb68300110ec27723971be", size = 12895584 }, + { url = "https://files.pythonhosted.org/packages/5a/fd/d486a0827a1c597b3b48b1bdef47228a6e9ee8102ab8c28f944cb83b65dc/mypy-1.16.1-cp313-cp313-win_amd64.whl", hash = "sha256:4f58ac32771341e38a853c5d0ec0dfe27e18e27da9cdb8bbc882d2249c71a3ee", size = 9573886 }, + { url = "https://files.pythonhosted.org/packages/49/5e/ed1e6a7344005df11dfd58b0fdd59ce939a0ba9f7ed37754bf20670b74db/mypy-1.16.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7fc688329af6a287567f45cc1cefb9db662defeb14625213a5b7da6e692e2069", size = 10959511 }, + { url = "https://files.pythonhosted.org/packages/30/88/a7cbc2541e91fe04f43d9e4577264b260fecedb9bccb64ffb1a34b7e6c22/mypy-1.16.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e198ab3f55924c03ead626ff424cad1732d0d391478dfbf7bb97b34602395da", size = 10075555 }, + { url = "https://files.pythonhosted.org/packages/93/f7/c62b1e31a32fbd1546cca5e0a2e5f181be5761265ad1f2e94f2a306fa906/mypy-1.16.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09aa4f91ada245f0a45dbc47e548fd94e0dd5a8433e0114917dc3b526912a30c", size = 11874169 }, + { url = "https://files.pythonhosted.org/packages/c8/15/db580a28034657fb6cb87af2f8996435a5b19d429ea4dcd6e1c73d418e60/mypy-1.16.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13c7cd5b1cb2909aa318a90fd1b7e31f17c50b242953e7dd58345b2a814f6383", size = 12610060 }, + { url = "https://files.pythonhosted.org/packages/ec/78/c17f48f6843048fa92d1489d3095e99324f2a8c420f831a04ccc454e2e51/mypy-1.16.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:58e07fb958bc5d752a280da0e890c538f1515b79a65757bbdc54252ba82e0b40", size = 12875199 }, + { url = "https://files.pythonhosted.org/packages/bc/d6/ed42167d0a42680381653fd251d877382351e1bd2c6dd8a818764be3beb1/mypy-1.16.1-cp39-cp39-win_amd64.whl", hash = "sha256:f895078594d918f93337a505f8add9bd654d1a24962b4c6ed9390e12531eb31b", size = 9487033 }, + { url = "https://files.pythonhosted.org/packages/cf/d3/53e684e78e07c1a2bf7105715e5edd09ce951fc3f47cf9ed095ec1b7a037/mypy-1.16.1-py3-none-any.whl", hash = "sha256:5fc2ac4027d0ef28d6ba69a0343737a23c4d1b83672bf38d1fe237bdc0643b37", size = 2265923 }, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963 }, +] + [[package]] name = "packaging" version = "25.0" @@ -125,6 +243,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 }, ] +[[package]] +name = "parsimonious" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "regex" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7b/91/abdc50c4ef06fdf8d047f60ee777ca9b2a7885e1a9cea81343fbecda52d7/parsimonious-0.10.0.tar.gz", hash = "sha256:8281600da180ec8ae35427a4ab4f7b82bfec1e3d1e52f80cb60ea82b9512501c", size = 52172 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/0f/c8b64d9b54ea631fcad4e9e3c8dbe8c11bb32a623be94f22974c88e71eaf/parsimonious-0.10.0-py3-none-any.whl", hash = "sha256:982ab435fabe86519b57f6b35610aa4e4e977e9f02a14353edf4bbc75369fc0f", size = 48427 }, +] + +[[package]] +name = "pathspec" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 }, +] + [[package]] name = "pluggy" version = "1.5.0" @@ -427,6 +566,107 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 }, ] +[[package]] +name = "regex" +version = "2024.11.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/5f/bd69653fbfb76cf8604468d3b4ec4c403197144c7bfe0e6a5fc9e02a07cb/regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519", size = 399494 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/95/3c/4651f6b130c6842a8f3df82461a8950f923925db8b6961063e82744bddcc/regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91", size = 482674 }, + { url = "https://files.pythonhosted.org/packages/15/51/9f35d12da8434b489c7b7bffc205c474a0a9432a889457026e9bc06a297a/regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0", size = 287684 }, + { url = "https://files.pythonhosted.org/packages/bd/18/b731f5510d1b8fb63c6b6d3484bfa9a59b84cc578ac8b5172970e05ae07c/regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e", size = 284589 }, + { url = "https://files.pythonhosted.org/packages/78/a2/6dd36e16341ab95e4c6073426561b9bfdeb1a9c9b63ab1b579c2e96cb105/regex-2024.11.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde", size = 782511 }, + { url = "https://files.pythonhosted.org/packages/1b/2b/323e72d5d2fd8de0d9baa443e1ed70363ed7e7b2fb526f5950c5cb99c364/regex-2024.11.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e", size = 821149 }, + { url = "https://files.pythonhosted.org/packages/90/30/63373b9ea468fbef8a907fd273e5c329b8c9535fee36fc8dba5fecac475d/regex-2024.11.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2", size = 809707 }, + { url = "https://files.pythonhosted.org/packages/f2/98/26d3830875b53071f1f0ae6d547f1d98e964dd29ad35cbf94439120bb67a/regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf", size = 781702 }, + { url = "https://files.pythonhosted.org/packages/87/55/eb2a068334274db86208ab9d5599ffa63631b9f0f67ed70ea7c82a69bbc8/regex-2024.11.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c", size = 771976 }, + { url = "https://files.pythonhosted.org/packages/74/c0/be707bcfe98254d8f9d2cff55d216e946f4ea48ad2fd8cf1428f8c5332ba/regex-2024.11.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86", size = 697397 }, + { url = "https://files.pythonhosted.org/packages/49/dc/bb45572ceb49e0f6509f7596e4ba7031f6819ecb26bc7610979af5a77f45/regex-2024.11.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67", size = 768726 }, + { url = "https://files.pythonhosted.org/packages/5a/db/f43fd75dc4c0c2d96d0881967897926942e935d700863666f3c844a72ce6/regex-2024.11.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d", size = 775098 }, + { url = "https://files.pythonhosted.org/packages/99/d7/f94154db29ab5a89d69ff893159b19ada89e76b915c1293e98603d39838c/regex-2024.11.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2", size = 839325 }, + { url = "https://files.pythonhosted.org/packages/f7/17/3cbfab1f23356fbbf07708220ab438a7efa1e0f34195bf857433f79f1788/regex-2024.11.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008", size = 843277 }, + { url = "https://files.pythonhosted.org/packages/7e/f2/48b393b51900456155de3ad001900f94298965e1cad1c772b87f9cfea011/regex-2024.11.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62", size = 773197 }, + { url = "https://files.pythonhosted.org/packages/45/3f/ef9589aba93e084cd3f8471fded352826dcae8489b650d0b9b27bc5bba8a/regex-2024.11.6-cp310-cp310-win32.whl", hash = "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e", size = 261714 }, + { url = "https://files.pythonhosted.org/packages/42/7e/5f1b92c8468290c465fd50c5318da64319133231415a8aa6ea5ab995a815/regex-2024.11.6-cp310-cp310-win_amd64.whl", hash = "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519", size = 274042 }, + { url = "https://files.pythonhosted.org/packages/58/58/7e4d9493a66c88a7da6d205768119f51af0f684fe7be7bac8328e217a52c/regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638", size = 482669 }, + { url = "https://files.pythonhosted.org/packages/34/4c/8f8e631fcdc2ff978609eaeef1d6994bf2f028b59d9ac67640ed051f1218/regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7", size = 287684 }, + { url = "https://files.pythonhosted.org/packages/c5/1b/f0e4d13e6adf866ce9b069e191f303a30ab1277e037037a365c3aad5cc9c/regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20", size = 284589 }, + { url = "https://files.pythonhosted.org/packages/25/4d/ab21047f446693887f25510887e6820b93f791992994f6498b0318904d4a/regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114", size = 792121 }, + { url = "https://files.pythonhosted.org/packages/45/ee/c867e15cd894985cb32b731d89576c41a4642a57850c162490ea34b78c3b/regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3", size = 831275 }, + { url = "https://files.pythonhosted.org/packages/b3/12/b0f480726cf1c60f6536fa5e1c95275a77624f3ac8fdccf79e6727499e28/regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f", size = 818257 }, + { url = "https://files.pythonhosted.org/packages/bf/ce/0d0e61429f603bac433910d99ef1a02ce45a8967ffbe3cbee48599e62d88/regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0", size = 792727 }, + { url = "https://files.pythonhosted.org/packages/e4/c1/243c83c53d4a419c1556f43777ccb552bccdf79d08fda3980e4e77dd9137/regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55", size = 780667 }, + { url = "https://files.pythonhosted.org/packages/c5/f4/75eb0dd4ce4b37f04928987f1d22547ddaf6c4bae697623c1b05da67a8aa/regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89", size = 776963 }, + { url = "https://files.pythonhosted.org/packages/16/5d/95c568574e630e141a69ff8a254c2f188b4398e813c40d49228c9bbd9875/regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d", size = 784700 }, + { url = "https://files.pythonhosted.org/packages/8e/b5/f8495c7917f15cc6fee1e7f395e324ec3e00ab3c665a7dc9d27562fd5290/regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34", size = 848592 }, + { url = "https://files.pythonhosted.org/packages/1c/80/6dd7118e8cb212c3c60b191b932dc57db93fb2e36fb9e0e92f72a5909af9/regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d", size = 852929 }, + { url = "https://files.pythonhosted.org/packages/11/9b/5a05d2040297d2d254baf95eeeb6df83554e5e1df03bc1a6687fc4ba1f66/regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45", size = 781213 }, + { url = "https://files.pythonhosted.org/packages/26/b7/b14e2440156ab39e0177506c08c18accaf2b8932e39fb092074de733d868/regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9", size = 261734 }, + { url = "https://files.pythonhosted.org/packages/80/32/763a6cc01d21fb3819227a1cc3f60fd251c13c37c27a73b8ff4315433a8e/regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60", size = 274052 }, + { url = "https://files.pythonhosted.org/packages/ba/30/9a87ce8336b172cc232a0db89a3af97929d06c11ceaa19d97d84fa90a8f8/regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a", size = 483781 }, + { url = "https://files.pythonhosted.org/packages/01/e8/00008ad4ff4be8b1844786ba6636035f7ef926db5686e4c0f98093612add/regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9", size = 288455 }, + { url = "https://files.pythonhosted.org/packages/60/85/cebcc0aff603ea0a201667b203f13ba75d9fc8668fab917ac5b2de3967bc/regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2", size = 284759 }, + { url = "https://files.pythonhosted.org/packages/94/2b/701a4b0585cb05472a4da28ee28fdfe155f3638f5e1ec92306d924e5faf0/regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4", size = 794976 }, + { url = "https://files.pythonhosted.org/packages/4b/bf/fa87e563bf5fee75db8915f7352e1887b1249126a1be4813837f5dbec965/regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577", size = 833077 }, + { url = "https://files.pythonhosted.org/packages/a1/56/7295e6bad94b047f4d0834e4779491b81216583c00c288252ef625c01d23/regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3", size = 823160 }, + { url = "https://files.pythonhosted.org/packages/fb/13/e3b075031a738c9598c51cfbc4c7879e26729c53aa9cca59211c44235314/regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e", size = 796896 }, + { url = "https://files.pythonhosted.org/packages/24/56/0b3f1b66d592be6efec23a795b37732682520b47c53da5a32c33ed7d84e3/regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe", size = 783997 }, + { url = "https://files.pythonhosted.org/packages/f9/a1/eb378dada8b91c0e4c5f08ffb56f25fcae47bf52ad18f9b2f33b83e6d498/regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e", size = 781725 }, + { url = "https://files.pythonhosted.org/packages/83/f2/033e7dec0cfd6dda93390089864732a3409246ffe8b042e9554afa9bff4e/regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29", size = 789481 }, + { url = "https://files.pythonhosted.org/packages/83/23/15d4552ea28990a74e7696780c438aadd73a20318c47e527b47a4a5a596d/regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39", size = 852896 }, + { url = "https://files.pythonhosted.org/packages/e3/39/ed4416bc90deedbfdada2568b2cb0bc1fdb98efe11f5378d9892b2a88f8f/regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51", size = 860138 }, + { url = "https://files.pythonhosted.org/packages/93/2d/dd56bb76bd8e95bbce684326302f287455b56242a4f9c61f1bc76e28360e/regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad", size = 787692 }, + { url = "https://files.pythonhosted.org/packages/0b/55/31877a249ab7a5156758246b9c59539abbeba22461b7d8adc9e8475ff73e/regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54", size = 262135 }, + { url = "https://files.pythonhosted.org/packages/38/ec/ad2d7de49a600cdb8dd78434a1aeffe28b9d6fc42eb36afab4a27ad23384/regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b", size = 273567 }, + { url = "https://files.pythonhosted.org/packages/90/73/bcb0e36614601016552fa9344544a3a2ae1809dc1401b100eab02e772e1f/regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84", size = 483525 }, + { url = "https://files.pythonhosted.org/packages/0f/3f/f1a082a46b31e25291d830b369b6b0c5576a6f7fb89d3053a354c24b8a83/regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4", size = 288324 }, + { url = "https://files.pythonhosted.org/packages/09/c9/4e68181a4a652fb3ef5099e077faf4fd2a694ea6e0f806a7737aff9e758a/regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0", size = 284617 }, + { url = "https://files.pythonhosted.org/packages/fc/fd/37868b75eaf63843165f1d2122ca6cb94bfc0271e4428cf58c0616786dce/regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0", size = 795023 }, + { url = "https://files.pythonhosted.org/packages/c4/7c/d4cd9c528502a3dedb5c13c146e7a7a539a3853dc20209c8e75d9ba9d1b2/regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7", size = 833072 }, + { url = "https://files.pythonhosted.org/packages/4f/db/46f563a08f969159c5a0f0e722260568425363bea43bb7ae370becb66a67/regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7", size = 823130 }, + { url = "https://files.pythonhosted.org/packages/db/60/1eeca2074f5b87df394fccaa432ae3fc06c9c9bfa97c5051aed70e6e00c2/regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c", size = 796857 }, + { url = "https://files.pythonhosted.org/packages/10/db/ac718a08fcee981554d2f7bb8402f1faa7e868c1345c16ab1ebec54b0d7b/regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3", size = 784006 }, + { url = "https://files.pythonhosted.org/packages/c2/41/7da3fe70216cea93144bf12da2b87367590bcf07db97604edeea55dac9ad/regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07", size = 781650 }, + { url = "https://files.pythonhosted.org/packages/a7/d5/880921ee4eec393a4752e6ab9f0fe28009435417c3102fc413f3fe81c4e5/regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e", size = 789545 }, + { url = "https://files.pythonhosted.org/packages/dc/96/53770115e507081122beca8899ab7f5ae28ae790bfcc82b5e38976df6a77/regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6", size = 853045 }, + { url = "https://files.pythonhosted.org/packages/31/d3/1372add5251cc2d44b451bd94f43b2ec78e15a6e82bff6a290ef9fd8f00a/regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4", size = 860182 }, + { url = "https://files.pythonhosted.org/packages/ed/e3/c446a64984ea9f69982ba1a69d4658d5014bc7a0ea468a07e1a1265db6e2/regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d", size = 787733 }, + { url = "https://files.pythonhosted.org/packages/2b/f1/e40c8373e3480e4f29f2692bd21b3e05f296d3afebc7e5dcf21b9756ca1c/regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff", size = 262122 }, + { url = "https://files.pythonhosted.org/packages/45/94/bc295babb3062a731f52621cdc992d123111282e291abaf23faa413443ea/regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a", size = 273545 }, + { url = "https://files.pythonhosted.org/packages/44/0f/207b37e6e08d548fac0aa00bf0b7464126315d58ab5161216b8cb3abb2aa/regex-2024.11.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b", size = 482777 }, + { url = "https://files.pythonhosted.org/packages/5a/5a/586bafa294c5d2451265d3685815606c61e620f469cac3b946fff0a4aa48/regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3", size = 287751 }, + { url = "https://files.pythonhosted.org/packages/08/92/9df786fad8a4e0766bfc9a2e334c5f0757356070c9639b2ec776b8cdef3d/regex-2024.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467", size = 284552 }, + { url = "https://files.pythonhosted.org/packages/0a/27/0b3cf7d9fbe43301aa3473d54406019a7380abe4e3c9ae250bac13c4fdb3/regex-2024.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd", size = 783587 }, + { url = "https://files.pythonhosted.org/packages/89/38/499b32cbb61163af60a5c5ff26aacea7836fe7e3d821e76af216e996088c/regex-2024.11.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf", size = 822904 }, + { url = "https://files.pythonhosted.org/packages/3f/a4/e3b11c643e5ae1059a08aeef971973f0c803d2a9ae2e7a86f97c68146a6c/regex-2024.11.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd", size = 809900 }, + { url = "https://files.pythonhosted.org/packages/5a/c8/dc7153ceb5bcc344f5c4f0291ea45925a5f00009afa3849e91561ac2e847/regex-2024.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6", size = 785105 }, + { url = "https://files.pythonhosted.org/packages/2a/29/841489ea52013062b22625fbaf49b0916aeb62bae2e56425ac30f9dead46/regex-2024.11.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f", size = 773033 }, + { url = "https://files.pythonhosted.org/packages/3e/4e/4a0da5e87f7c2dc73a8505785d5af2b1a19c66f4645b93caa50b7eb08242/regex-2024.11.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5", size = 702374 }, + { url = "https://files.pythonhosted.org/packages/94/6e/444e66346600d11e8a0f4bb31611973cffa772d5033ba1cf1f15de8a0d52/regex-2024.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df", size = 769990 }, + { url = "https://files.pythonhosted.org/packages/da/28/95c3ed6cd51b27f54e59940400e2a3ddd3f8bbbc3aaf947e57a67104ecbd/regex-2024.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773", size = 775345 }, + { url = "https://files.pythonhosted.org/packages/07/5d/0cd19cf44d96a7aa31526611c24235d21d27c23b65201cb2c5cac508dd42/regex-2024.11.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c", size = 840379 }, + { url = "https://files.pythonhosted.org/packages/2a/13/ec3f8d85b789ee1c6ffbdfd4092fd901416716317ee17bf51aa2890bac96/regex-2024.11.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc", size = 845842 }, + { url = "https://files.pythonhosted.org/packages/50/cb/7170247e65afea2bf9204bcb2682f292b0a3a57d112478da199b84d59792/regex-2024.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f", size = 775026 }, + { url = "https://files.pythonhosted.org/packages/cc/06/c817c9201f09b7d9dd033039ba90d8197c91e9fe2984141f2d1de270c159/regex-2024.11.6-cp38-cp38-win32.whl", hash = "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4", size = 261738 }, + { url = "https://files.pythonhosted.org/packages/cf/69/c39e16320400842eb4358c982ef5fc680800866f35ebfd4dd38a22967ce0/regex-2024.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001", size = 274094 }, + { url = "https://files.pythonhosted.org/packages/89/23/c4a86df398e57e26f93b13ae63acce58771e04bdde86092502496fa57f9c/regex-2024.11.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839", size = 482682 }, + { url = "https://files.pythonhosted.org/packages/3c/8b/45c24ab7a51a1658441b961b86209c43e6bb9d39caf1e63f46ce6ea03bc7/regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e", size = 287679 }, + { url = "https://files.pythonhosted.org/packages/7a/d1/598de10b17fdafc452d11f7dada11c3be4e379a8671393e4e3da3c4070df/regex-2024.11.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf", size = 284578 }, + { url = "https://files.pythonhosted.org/packages/49/70/c7eaa219efa67a215846766fde18d92d54cb590b6a04ffe43cef30057622/regex-2024.11.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b", size = 782012 }, + { url = "https://files.pythonhosted.org/packages/89/e5/ef52c7eb117dd20ff1697968219971d052138965a4d3d9b95e92e549f505/regex-2024.11.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0", size = 820580 }, + { url = "https://files.pythonhosted.org/packages/5f/3f/9f5da81aff1d4167ac52711acf789df13e789fe6ac9545552e49138e3282/regex-2024.11.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b", size = 809110 }, + { url = "https://files.pythonhosted.org/packages/86/44/2101cc0890c3621b90365c9ee8d7291a597c0722ad66eccd6ffa7f1bcc09/regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef", size = 780919 }, + { url = "https://files.pythonhosted.org/packages/ce/2e/3e0668d8d1c7c3c0d397bf54d92fc182575b3a26939aed5000d3cc78760f/regex-2024.11.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48", size = 771515 }, + { url = "https://files.pythonhosted.org/packages/a6/49/1bc4584254355e3dba930a3a2fd7ad26ccba3ebbab7d9100db0aff2eedb0/regex-2024.11.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13", size = 696957 }, + { url = "https://files.pythonhosted.org/packages/c8/dd/42879c1fc8a37a887cd08e358af3d3ba9e23038cd77c7fe044a86d9450ba/regex-2024.11.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2", size = 768088 }, + { url = "https://files.pythonhosted.org/packages/89/96/c05a0fe173cd2acd29d5e13c1adad8b706bcaa71b169e1ee57dcf2e74584/regex-2024.11.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95", size = 774752 }, + { url = "https://files.pythonhosted.org/packages/b5/f3/a757748066255f97f14506483436c5f6aded7af9e37bca04ec30c90ca683/regex-2024.11.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9", size = 838862 }, + { url = "https://files.pythonhosted.org/packages/5c/93/c6d2092fd479dcaeea40fc8fa673822829181ded77d294a7f950f1dda6e2/regex-2024.11.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f", size = 842622 }, + { url = "https://files.pythonhosted.org/packages/ff/9c/daa99532c72f25051a90ef90e1413a8d54413a9e64614d9095b0c1c154d0/regex-2024.11.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b", size = 772713 }, + { url = "https://files.pythonhosted.org/packages/13/5d/61a533ccb8c231b474ac8e3a7d70155b00dfc61af6cafdccd1947df6d735/regex-2024.11.6-cp39-cp39-win32.whl", hash = "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57", size = 261756 }, + { url = "https://files.pythonhosted.org/packages/dc/7b/e59b7f7c91ae110d154370c24133f947262525b5d6406df65f23422acc17/regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983", size = 274110 }, +] + [[package]] name = "ruff" version = "0.11.12"