diff --git a/.gitignore b/.gitignore index a6837ea..52cc532 100644 --- a/.gitignore +++ b/.gitignore @@ -130,3 +130,6 @@ venv/ .mypy_cache/ .ruff_cache/ # END roobert + +.pithy/ +.claude* diff --git a/README.md b/README.md index de8a9c8..2cdbe27 100644 --- a/README.md +++ b/README.md @@ -2,174 +2,149 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -> Declarative, type-safe data mapping for humans. +> Declarative, type-safe data mapping for savvy data engineers -chidian is a pure Python framework for composable, readable, and sharable data mappings built on top of **Pydantic v2**. +**chidian** is a composable framework for building readable data transformations with **Pydantic v2**. -## 30-second tour +## Quick Start ```python from pydantic import BaseModel -from chidian import Mapper, DataMapping +from chidian import Mapper import chidian.partials as p -# 1️⃣ Define your source & target schemas -class Source(BaseModel): +# Source data (nested) +source_data = { + "name": {"first": "Gandalf", "given": ["the", "Grey"], "suffix": None}, + "address": { + "street": ["Bag End", "Hobbiton"], + "city": "The Shire", + "postal_code": "ME001", + "country": "Middle Earth" + } +} + +# Target data (flat) +target = { + "full_name": "Gandalf the Grey", + "address": "Bag End\nHobbiton\nThe Shire\nME001\nMiddle Earth" +} + +# Define schemas +class SourceSchema(BaseModel): name: dict address: dict -class Target(BaseModel): +class TargetSchema(BaseModel): full_name: str address: str -# 2️⃣ Write pure dict→dict transformation logic with `Mapper` -fmt = p.template("{} {} {}", skip_none=True) - -person_mapper = Mapper( - lambda src: { - "full_name": fmt( - p.get("name.first")(src), - p.get("name.given[*]") | p.join(" ")(src), - p.get("name.suffix")(src), - ), - "address": p.get("address") | p.flatten_paths( - [ - "street[0]", - "street[1]", - "city", - "postal_code", - "country", - ], - delimiter="\n", - )(src), - } -) - -# 3️⃣ Wrap it with `DataMapping` for schema validation -person_mapping = DataMapping( - mapper=person_mapper, - input_schema=Source, - output_schema=Target, +# Create type-safe mapper +person_mapping = Mapper( + { + "full_name": p.get([ + "name.first", + "name.given[*]", + "name.suffix" + ]).join(" ", flatten=True), + + "address": p.get([ + "address.street[*]", + "address.city", + "address.postal_code", + "address.country" + ]).join("\n", flatten=True), + }, + min_input_schemas=[SourceSchema], + output_schema=TargetSchema, ) -# 4️⃣ Execute! -source_obj = Source.model_validate(source_data) -result = person_mapping.forward(source_obj) -print(result) +# Execute +result = person_mapping(SourceSchema(**source_data)) +assert result == TargetSchema(**target) ``` -See the [tests](/chidian/tests) for some use-cases. - -## Feature highlights - -| Feature | In one line | -| ---------------- | ---------------------------------------------------------------------------- | -| **Mapper** | Pure dict→dict runtime transformations – no schema required. | -| **DataMapping** | Adds Pydantic validation around a `Mapper` for safe, forward-only transforms. | -| **Partials API** | `|` operator chains (`split | last | upper`) keep lambdas away. | -| **Table** | Lightweight sparse table: path queries, joins, pandas/polars interop. | -| **Lexicon** | Bidirectional code look‑ups *(LOINC ↔ SNOMED)* with defaults + metadata. | +## Core Features +| Component | Purpose | +| ---------------- | ------------------------------------------------------------------------ | +| **Mapper** | Dict→dict transformations with optional schema validation | +| **DataMapping** | Pydantic-validated, type-safe transformations | +| **Partials API** | Composable operators for concise transformation chains | +| **Table** | Sparse tables with path queries, joins, pandas/polars interop | +| **Lexicon** | Bidirectional code lookups (e.g., LOINC ↔ SNOMED) with metadata | -## Table: DataFrames interoperability +## Table & DataFrames -The `Table` class provides seamless conversion to pandas and polars DataFrames via optional dependencies: - -### Installation +Seamless conversion between chidian Tables and pandas/polars: ```bash -# For pandas support -pip install 'chidian[pandas]' - -# For polars support -pip install 'chidian[polars]' - -# For both -pip install 'chidian[dfs]' +pip install 'chidian[pandas]' # pandas support +pip install 'chidian[polars]' # polars support +pip install 'chidian[df]' # both ``` -### Usage - ```python from chidian.table import Table -# Create a table table = Table([ {"name": "Alice", "age": 30}, {"name": "Bob", "age": 25} ]) -# Convert to pandas (with row keys as index) -df_pd = table.to_pandas(index=True) # pandas index from row keys - -# Convert to polars (with row keys as column) -df_pl = table.to_polars(add_index=True) # polars gets '_index' column +df_pd = table.to_pandas(index=True) +df_pl = table.to_polars(add_index=True) ``` -## Flattening nested data +### Flatten Nested Data -The `Table` class provides powerful flattening capabilities to convert nested dictionaries and lists into flat, column-based structures using intuitive path notation: +Convert nested structures into flat, column-based tables: ```python -from chidian.table import Table - -# Create table with nested data table = Table([ {"user": {"name": "John", "prefs": ["email", "sms"]}, "id": 123}, {"user": {"name": "Jane", "prefs": ["phone"]}, "id": 456} ]) -# Flatten nested structures +# Flatten with intuitive path notation flat = table.flatten() print(flat.columns) # {'id', 'user.name', 'user.prefs[0]', 'user.prefs[1]'} -# Direct export with flattening -df = table.to_pandas(flatten=True) # Flat pandas DataFrame -df = table.to_polars(flatten=True) # Flat polars DataFrame -table.to_csv("flat.csv", flatten=True) # Flat CSV with path columns +# Export flattened data +table.to_pandas(flatten=True) +table.to_polars(flatten=True) +table.to_csv("flat.csv", flatten=True) -# Control flattening depth and array limits -limited = table.flatten(max_depth=2, array_index_limit=5) +# Control flattening behavior +table.flatten(max_depth=2, array_index_limit=5) ``` -**Key features:** -- **Intuitive paths**: `user.name`, `items[0]`, `data.settings.theme` -- **Sparse-friendly**: Different nesting across rows creates union of all paths -- **Special key handling**: Keys with dots/brackets use bracket notation: `["key.with.dots"]` -- **Depth control**: Limit recursion to prevent over-flattening -- **Array limits**: Cap array indices to manage large arrays -- **Seamless integration**: All Table operations (join, select, group_by) work on flattened data +**Features:** +- Path notation: `user.name`, `items[0]`, `data.settings.theme` +- Handles sparse data (different nesting per row) +- Special key escaping for dots/brackets +- Depth and array size controls -## Powered by Pydantic +## Design Philosophy -chidian treats **Pydantic v2 models as first‑class citizens**: +Built by data engineers, for data engineers. chidian solves common pain points: -* Validate inputs & outputs automatically with Pydantic v2 -* `DataMapping` wraps your `Mapper` for IDE completion & mypy. -* You can drop down to plain dicts when prototyping with `strict=False`. +**Challenges:** +- Verbose edge-case handling +- Hard to share one-off code +- Difficult collaboration on data transformations +**Solutions:** +- **Iterate over perfection**: Learn and adapt as you build +- **Functions as first-class objects**: Compose transformations cleanly +- **JSON-first**: Simple, universal data structures -## Motivation + Philosophy +chidian applies functional programming principles to data mappings, drawing inspiration from [Pydantic](https://github.com/pydantic/pydantic), [JMESPath](https://github.com/jmespath), [funcy](https://github.com/Suor/funcy), and others. -This is a library for data engineers by a data engineer. Data engineering touches many parts of the stack, and the heuristics for data engineering offer some subtle differences from traditional software engineering. - -The goals of the library are: -1. Make fast, reliable, and readable data mappings -2. Make it easy to build-on + share pre-existing mappings (so we don't need to start from scratch every time!) - -Several challenges come up with traditional data mapping code: -1. **It's verbose**: Data can be very messy and has a lot of edge cases -2. **It's hard to share**: Code is often written for one-off use-cases -3. **It's difficult to collaborate**: Data interoperability becomes more difficult when subtle cases - -chidian aims to solve these issues by taking stronger opinions on common operations: -1. **Prefer iteration over exactness**: With data, we learn as we iterate and use what we need! -2. **Prefer using functions as objects**: Simplify code by passing functions as first-class objects. -3. **Prefer JSON-like structures**: No toml, yaml, xml -- just JSON (for now...). +## Contributing -The heart of chidian is applying [functional programming](https://en.wikipedia.org/wiki/Functional_programming) principles to data mappings. -Ideas from this repo are inspired from functional programming and other libraries (e.g. [Pydantic](https://github.com/pydantic/pydantic), [JMESPath](https://github.com/jmespath), [funcy](https://github.com/Suor/funcy), [Boomerang](https://github.com/boomerang-lang/boomerang/tree/master), [lens](https://hackage.haskell.org/package/lens), etc.) +Contributions welcome! Open an issue to discuss your idea before submitting a PR. -## Contributing +--- -All contributions welcome! Please open an Issue and tag me -- I'll make sure to get back to you and we can scope out a PR. +See [tests](/chidian/tests) for more examples. diff --git a/chidian/__init__.py b/chidian/__init__.py index 7f77687..208f3b9 100644 --- a/chidian/__init__.py +++ b/chidian/__init__.py @@ -1,5 +1,4 @@ from .core import get, put -from .data_mapping import DataMapping from .lexicon import Lexicon, LexiconBuilder from .lib.get_dsl_parser import parse_path_peg as parse_path from .mapper import DROP, KEEP, Mapper, MapperResult, ValidationMode @@ -12,7 +11,6 @@ "parse_path", "Table", "Mapper", - "DataMapping", "Lexicon", "LexiconBuilder", "DROP", diff --git a/chidian/data_mapping.py b/chidian/data_mapping.py deleted file mode 100644 index cfc9725..0000000 --- a/chidian/data_mapping.py +++ /dev/null @@ -1,61 +0,0 @@ -""" -DataMapping class for pure semantic transformation definitions. -""" - -from typing import Any, Callable, Dict, Optional, Type, TypeVar - -from pydantic import BaseModel - -# Define generic type variables bounded to BaseModel -_InModel = TypeVar("_InModel", bound=BaseModel) -_OutModel = TypeVar("_OutModel", bound=BaseModel) - - -class DataMapping: - """ - Pure semantic transformation definition. - Only defines WHAT to transform, not HOW to execute it. - """ - - def __init__( - self, - transformations: Dict[str, Callable[[dict], Any] | Any], - input_schema: Optional[Type[BaseModel]] = None, - output_schema: Optional[Type[BaseModel]] = None, - ): - """ - Initialize a semantic data mapping. - - Args: - transformations: Dict mapping output fields to transformations - input_schema: Optional Pydantic model for input validation - output_schema: Optional Pydantic model for output validation - """ - if not isinstance(transformations, dict): - raise TypeError( - f"Transformations must be dict, got {type(transformations).__name__}" - ) - - self.transformations = transformations - self.input_schema = input_schema - self.output_schema = output_schema - - def transform(self, data: dict) -> dict: - """ - Apply the pure transformation logic. - This is the core semantic transformation without any validation. - """ - result = {} - - for target_field, transform_spec in self.transformations.items(): - if callable(transform_spec): - result[target_field] = transform_spec(data) - else: - result[target_field] = transform_spec - - return result - - @property - def has_schemas(self) -> bool: - """Check if this mapping has any schemas defined.""" - return self.input_schema is not None or self.output_schema is not None diff --git a/chidian/lib/data_mapping_helpers.py b/chidian/lib/data_mapping_helpers.py index f6ead4d..adf6238 100644 --- a/chidian/lib/data_mapping_helpers.py +++ b/chidian/lib/data_mapping_helpers.py @@ -11,18 +11,6 @@ _OutModel = TypeVar("_OutModel", bound=BaseModel) -def validate_schemas(input_schema: Type, output_schema: Type) -> None: - """Validate that schemas are Pydantic BaseModel classes.""" - if not is_pydantic_model(input_schema): - raise TypeError( - f"input_schema must be a Pydantic BaseModel, got {type(input_schema)}" - ) - if not is_pydantic_model(output_schema): - raise TypeError( - f"output_schema must be a Pydantic BaseModel, got {type(output_schema)}" - ) - - def is_pydantic_model(model_class: Type) -> bool: """Check if a class is a Pydantic BaseModel.""" try: @@ -35,19 +23,6 @@ def is_pydantic_model(model_class: Type) -> bool: return False -def validate_input(data: Any, input_schema: Type[_InModel]) -> _InModel: - """Validate input data against input schema.""" - if isinstance(data, input_schema): - return data # type: ignore[return-value] - - # Try to convert dict to model - if isinstance(data, dict): - return input_schema.model_validate(data) # type: ignore[return-value] - - # Try direct validation - return input_schema.model_validate(data) # type: ignore[return-value] - - def to_dict(model: _InModel) -> dict[str, Any]: """Convert Pydantic model to dictionary.""" return model.model_dump() diff --git a/chidian/mapper.py b/chidian/mapper.py index e07779e..91b9fc1 100644 --- a/chidian/mapper.py +++ b/chidian/mapper.py @@ -1,8 +1,17 @@ from dataclasses import dataclass from enum import Enum -from typing import Any, List, Optional - -from pydantic import ValidationError +from typing import ( + Any, + Callable, + Generic, + List, + Mapping, + Optional, + Type, + TypeVar, +) + +from pydantic import BaseModel, ValidationError """ Mapper class - execution engine for DataMapping with validation strategies. @@ -15,6 +24,9 @@ Also contains special types for transformation control (DROP, KEEP). """ +# Define generic type variable for output models +_OutT = TypeVar("_OutT", bound=BaseModel) + class ValidationMode(Enum): """Validation modes for mapper execution.""" @@ -34,11 +46,15 @@ class ValidationIssue: value: Any -class MapperResult: +class MapperResult(Generic[_OutT]): """Result of a mapping operation, potentially with validation issues.""" - def __init__(self, data: Any, issues: Optional[List[ValidationIssue]] = None): - self.data = data + def __init__( + self, + data: _OutT | dict[str, Any] | Any, + issues: Optional[List[ValidationIssue]] = None, + ): + self.data: _OutT | dict[str, Any] | Any = data self.issues = issues or [] @property @@ -52,53 +68,78 @@ def raise_if_issues(self): raise ValidationError(f"Validation issues: {'; '.join(messages)}") -class Mapper: +class Mapper(Generic[_OutT]): """ - Execution engine for DataMapping with validation strategies. + Data transformation engine with validation strategies. + Combines semantic transformation definition with execution logic. """ def __init__( self, - data_mapping_or_dict, # DataMapping or dict for backward compatibility + transformations: Mapping[str, Callable[..., Any] | Any], + output_schema: Optional[Type[_OutT]] = None, mode: ValidationMode = ValidationMode.AUTO, + min_input_schemas: Optional[List[Type[BaseModel]]] = None, + other_input_schemas: Optional[List[Type[BaseModel]]] = None, collect_all_errors: bool = True, ): """ - Initialize a Mapper with a DataMapping and execution mode. + Initialize a Mapper with transformations and validation configuration. Args: - data_mapping_or_dict: DataMapping instance or dict for backward compatibility + transformations: Dict mapping output fields to transformations + output_schema: Optional Pydantic model for output validation mode: Validation mode (strict, flexible, or auto) + min_input_schemas: Minimal set of source models (metadata-only) + other_input_schemas: Additional source models (metadata-only) collect_all_errors: In flexible mode, whether to collect all errors """ - # Import here to avoid circular dependency - from .data_mapping import DataMapping - - # Backward compatibility: if dict is passed, create a DataMapping - if isinstance(data_mapping_or_dict, dict): - self.data_mapping = DataMapping(transformations=data_mapping_or_dict) - self._backward_compat = True - elif isinstance(data_mapping_or_dict, DataMapping): - self.data_mapping = data_mapping_or_dict - self._backward_compat = False + # Convert Mapping to dict if needed + if isinstance(transformations, dict): + self.transformations = transformations + elif hasattr(transformations, "items"): + # Support Mapping types by converting to dict + self.transformations = dict(transformations) else: raise TypeError( - f"Expected DataMapping or dict, got {type(data_mapping_or_dict).__name__}" + f"Transformations must be dict or Mapping, got {type(transformations).__name__}" ) + self.output_schema = output_schema + self.min_input_schemas = min_input_schemas or [] + self.other_input_schemas = other_input_schemas or [] + self._backward_compat = False self.collect_all_errors = collect_all_errors # Determine actual mode if mode == ValidationMode.AUTO: self.mode = ( - ValidationMode.STRICT - if self.data_mapping.has_schemas - else ValidationMode.FLEXIBLE + ValidationMode.STRICT if self.has_schemas else ValidationMode.FLEXIBLE ) else: self.mode = mode - def __call__(self, data: Any) -> Any | MapperResult: + def transform(self, data: dict) -> dict: + """ + Apply the pure transformation logic. + This is the core semantic transformation without any validation. + """ + result = {} + + for target_field, transform_spec in self.transformations.items(): + if callable(transform_spec): + result[target_field] = transform_spec(data) + else: + result[target_field] = transform_spec + + return result + + @property + def has_schemas(self) -> bool: + """Check if this mapping has output schema defined.""" + return self.output_schema is not None + + def __call__(self, data: Any) -> _OutT | MapperResult[_OutT] | Any: """ Execute the mapping with the configured validation mode. @@ -107,9 +148,9 @@ def __call__(self, data: Any) -> Any | MapperResult: - In flexible mode: MapperResult with data and any validation issues - In backward compat mode with dict: Always returns dict """ - # Backward compatibility mode - always return dict - if self._backward_compat and not self.data_mapping.has_schemas: - return self.data_mapping.transform(data) + # For non-schema mode, just return dict + if not self.has_schemas and self.mode == ValidationMode.FLEXIBLE: + return self.transform(data) if self.mode == ValidationMode.STRICT: return self._execute_strict(data) @@ -119,55 +160,32 @@ def __call__(self, data: Any) -> Any | MapperResult: def _execute_strict(self, data: Any) -> Any: """Execute with strict validation - raise on any errors.""" # Import helpers here to avoid circular dependency - from .lib.data_mapping_helpers import to_dict, validate_input, validate_output + from .lib.data_mapping_helpers import to_dict, validate_output - # Validate input if schema provided - if self.data_mapping.input_schema: - validated_input = validate_input(data, self.data_mapping.input_schema) - input_dict = to_dict(validated_input) - else: - input_dict = to_dict(data) if hasattr(data, "model_dump") else data + # Convert input to dict if needed (no validation) + input_dict = to_dict(data) if hasattr(data, "model_dump") else data # Apply transformation - output_dict = self.data_mapping.transform(input_dict) + output_dict = self.transform(input_dict) # Validate output if schema provided - if self.data_mapping.output_schema: - return validate_output(output_dict, self.data_mapping.output_schema) + if self.output_schema: + return validate_output(output_dict, self.output_schema) return output_dict def _execute_flexible(self, data: Any) -> MapperResult: """Execute with flexible validation - collect errors but continue.""" # Import helpers here to avoid circular dependency - from .lib.data_mapping_helpers import to_dict, validate_input, validate_output + from .lib.data_mapping_helpers import to_dict, validate_output issues = [] - # Try to validate input - input_dict = None - if self.data_mapping.input_schema: - try: - validated_input = validate_input(data, self.data_mapping.input_schema) - input_dict = to_dict(validated_input) - except ValidationError as e: - # Collect input validation errors - for error in e.errors(): - issues.append( - ValidationIssue( - stage="input", - field=".".join(str(loc) for loc in error["loc"]), - error=error["msg"], - value=error.get("input"), - ) - ) - # Continue with raw data - input_dict = to_dict(data) if hasattr(data, "model_dump") else data - else: - input_dict = to_dict(data) if hasattr(data, "model_dump") else data + # Convert input to dict if needed (no validation) + input_dict = to_dict(data) if hasattr(data, "model_dump") else data # Apply transformation (might fail if input validation failed) try: - output_dict = self.data_mapping.transform(input_dict) + output_dict = self.transform(input_dict) except Exception as e: # If transformation fails, return with error issues.append( @@ -179,11 +197,9 @@ def _execute_flexible(self, data: Any) -> MapperResult: # Try to validate output final_output: Any = output_dict - if self.data_mapping.output_schema: + if self.output_schema: try: - final_output = validate_output( - output_dict, self.data_mapping.output_schema - ) + final_output = validate_output(output_dict, self.output_schema) except ValidationError as e: # Collect output validation errors for error in e.errors(): diff --git a/chidian/partials.py b/chidian/partials.py index 679c68f..ec90665 100644 --- a/chidian/partials.py +++ b/chidian/partials.py @@ -44,9 +44,52 @@ def __len__(self) -> int: """Number of operations in the chain.""" return len(self.operations) + def __getattr__(self, name: str) -> Any: + """ + Support method chaining by looking up chainable functions in the module. + + This allows: p.get("path").join(" ").upper() instead of p.get("path") | p.join(" ") | p.upper + """ + # Get the current module's globals to look up functions + import sys + + current_module = sys.modules[__name__] + + # Check if the attribute is a known chainable function + if hasattr(current_module, name): + attr = getattr(current_module, name) + # If it's a ChainableFunction or a function that returns one, create a method + if isinstance(attr, ChainableFunction): + # Return a function that chains this operation + def chain_method(*args, **kwargs): + new_op = ( + attr + if not args and not kwargs + else (lambda v: attr(*args, **kwargs)(v)) + ) + return FunctionChain(*self.operations, new_op) + + return chain_method + # If it's a function that returns a ChainableFunction (like split, join, etc.) + elif callable(attr): + + def chain_method(*args, **kwargs): + new_op = attr(*args, **kwargs) + if isinstance(new_op, ChainableFunction): + return FunctionChain(*self.operations, new_op.func) + elif callable(new_op): + return FunctionChain(*self.operations, new_op) + return FunctionChain(*self.operations, lambda v: new_op) + + return chain_method + + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{name}'" + ) + class ChainableFunction: - """Wrapper to make any function/partial chainable with |.""" + """Wrapper to make any function/partial chainable with | or method chaining.""" def __init__(self, func: Callable): self.func = func @@ -76,19 +119,60 @@ def __call__(self, *args, **kwargs): """Call the wrapped function.""" return self.func(*args, **kwargs) + def __getattr__(self, name: str) -> Any: + """Support method chaining by delegating to FunctionChain.""" + # Convert to FunctionChain and delegate + return getattr(FunctionChain(self.func), name) + def __repr__(self) -> str: return f"ChainableFunction({self.__name__})" def get( - key: str, default: Any = None, apply: Any = None, strict: bool = False -) -> Callable[[Any], Any]: - """Create a partial function for get operations.""" - + key: str | list[str], default: Any = None, apply: Any = None, strict: bool = False +) -> FunctionChain: + """ + Create a chainable function for get operations. + + Args: + key: Path string (e.g., "data.items[0].name") or list of paths for multi-path extraction + default: Default value if path not found + apply: Function(s) to apply to the result (legacy parameter) + strict: If True, raise errors on missing paths + + Returns: + FunctionChain that extracts values from source data and supports method chaining + like .join(), .upper(), etc. + + Examples: + # Single path + p.get("user.name")(data) + + # Multi-path with chaining + p.get(["name.first", "name.given[*]", "name.suffix"]).join(" ", unwrap=True)(data) + """ + # Multi-path extraction + if isinstance(key, list): + + def multi_get(source): + values = [] + for path in key: + val = _get(source, path, default=default, strict=strict) + if val is not None: + # If val is a list, extend; otherwise append + if isinstance(val, list): + values.extend(val) + else: + values.append(val) + return values + + return FunctionChain(multi_get) + + # Single path - keep backward compatibility def get_partial(source): return _get(source, key, default=default, apply=apply, strict=strict) - return get_partial + return FunctionChain(get_partial) # Arithmetic operations @@ -150,11 +234,36 @@ def replace(old: str, new: str) -> ChainableFunction: ) -def join(sep: str) -> ChainableFunction: - """Create a chainable join function.""" - return ChainableFunction( - partial(lambda separator, items: separator.join(items), sep) - ) +def join(sep: str, flatten: bool = False) -> ChainableFunction: + """ + Create a chainable join function. + + Args: + sep: Separator string + flatten: If True, flatten nested lists and filter None values before joining + """ + if flatten: + + def join_flatten(items): + flat = [] + for item in items: + if isinstance(item, list): + # Flatten nested list + flat.extend(str(x) for x in item if x is not None) + elif item is not None: + flat.append(str(item)) + return sep.join(flat) + + return ChainableFunction(join_flatten) + else: + return ChainableFunction( + partial( + lambda separator, items: separator.join( + str(x) for x in items if x is not None + ), + sep, + ) + ) # Array/List operations as ChainableFunction diff --git a/pyproject.toml b/pyproject.toml index 75a522a..60d75d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "chidian" -version = "0.1.4" +version = "0.1.5" requires-python = ">=3.8" classifiers = [ "Programming Language :: Python :: 3", diff --git a/tests/test_data_mapping.py b/tests/test_data_mapping.py index bde6a6f..83fc3f4 100644 --- a/tests/test_data_mapping.py +++ b/tests/test_data_mapping.py @@ -1,12 +1,12 @@ """Test the new DataMapping class and Mapper with validation modes.""" -from typing import Optional +from typing import Any, Optional import pytest from pydantic import BaseModel import chidian.partials as p -from chidian import DataMapping, Mapper, MapperResult, ValidationMode +from chidian import Mapper, MapperResult, ValidationMode # Test models @@ -29,19 +29,17 @@ class TestDataMappingBasic: def test_simple_mapping_with_mapper(self) -> None: """Test DataMapping with Mapper for basic field mapping.""" - # Create a DataMapping for transformation - data_mapping = DataMapping( + # Create a Mapper for transformation + mapper = Mapper( transformations={ "subject_ref": p.get("id"), "performer": p.get("name"), }, - input_schema=Patient, + min_input_schemas=[Patient], output_schema=Observation, + mode=ValidationMode.STRICT, ) - # Create Mapper with DataMapping - mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) - patient = Patient(id="123", name="John", active=True) obs = mapper(patient) @@ -51,18 +49,17 @@ def test_simple_mapping_with_mapper(self) -> None: def test_complex_mapping_with_callable_mapper(self) -> None: """Test DataMapping with callable transformations.""" - data_mapping = DataMapping( + mapper = Mapper( transformations={ "subject_ref": lambda data: f"Patient/{data['id']}", "performer": lambda data: data["name"].upper(), "status": lambda data: "active" if data["active"] else "inactive", }, - input_schema=Patient, + min_input_schemas=[Patient], output_schema=Observation, + mode=ValidationMode.STRICT, ) - mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) - patient = Patient(id="123", name="john", active=True) obs = mapper(patient) @@ -73,68 +70,75 @@ def test_complex_mapping_with_callable_mapper(self) -> None: def test_validation_modes(self) -> None: """Test different validation modes.""" - data_mapping = DataMapping( + # Test strict mode + strict_mapper = Mapper( transformations={ "subject_ref": p.get("id"), "performer": p.get("name"), }, - input_schema=Patient, + min_input_schemas=[Patient], output_schema=Observation, + mode=ValidationMode.STRICT, ) - - # Test strict mode - strict_mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) patient = Patient(id="123", name="John", active=True) obs = strict_mapper(patient) assert isinstance(obs, Observation) assert obs.subject_ref == "123" # Test flexible mode - flexible_mapper = Mapper(data_mapping, mode=ValidationMode.FLEXIBLE) + flexible_mapper = Mapper( + transformations={ + "subject_ref": p.get("id"), + "performer": p.get("name"), + }, + min_input_schemas=[Patient], + output_schema=Observation, + mode=ValidationMode.FLEXIBLE, + ) result = flexible_mapper(patient) assert isinstance(result, MapperResult) assert not result.has_issues + assert isinstance(result.data, Observation) assert result.data.subject_ref == "123" class TestDataMappingValidation: """Test DataMapping validation features.""" - def test_input_validation(self) -> None: - """Test that Mapper validates input against input schema.""" - data_mapping = DataMapping( + def test_no_input_validation(self) -> None: + """Test that Mapper no longer validates input (min_input_schemas is metadata-only).""" + mapper = Mapper( transformations={ "subject_ref": p.get("id"), "performer": p.get("name"), }, - input_schema=Patient, + min_input_schemas=[Patient], output_schema=Observation, + mode=ValidationMode.STRICT, ) - mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) - # Valid input works patient = Patient(id="123", name="John", active=True) obs = mapper(patient) assert isinstance(obs, Observation) assert obs.subject_ref == "123" - # Invalid input should raise ValidationError in strict mode - with pytest.raises(Exception): # Pydantic ValidationError + # Invalid input now works because no input validation occurs + # Will fail on output validation due to missing required fields + with pytest.raises(Exception): # Output validation error mapper({"invalid": "data"}) def test_output_validation(self) -> None: """Test that Mapper validates output against output schema.""" - # DataMapping that produces invalid output - data_mapping = DataMapping( + # Mapper that produces invalid output + mapper = Mapper( transformations={ "invalid_field": lambda data: "value", # Missing required fields }, - input_schema=Patient, + min_input_schemas=[Patient], output_schema=Observation, + mode=ValidationMode.STRICT, ) - - mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) patient = Patient(id="123", name="John", active=True) # Should raise ValidationError due to invalid output in strict mode @@ -143,16 +147,15 @@ def test_output_validation(self) -> None: def test_flexible_mode_validation(self) -> None: """Test flexible mode collects validation errors.""" - # DataMapping that produces invalid output - data_mapping = DataMapping( + # Mapper that produces invalid output + mapper = Mapper( transformations={ "invalid_field": lambda data: "value", # Missing required fields }, - input_schema=Patient, + min_input_schemas=[Patient], output_schema=Observation, + mode=ValidationMode.FLEXIBLE, ) - - mapper = Mapper(data_mapping, mode=ValidationMode.FLEXIBLE) patient = Patient(id="123", name="John", active=True) # Should return MapperResult with issues @@ -164,17 +167,16 @@ def test_flexible_mode_validation(self) -> None: def test_dict_input_with_strict_mode(self) -> None: """Test handling of dict input in strict mode.""" - data_mapping = DataMapping( + mapper = Mapper( transformations={ "subject_ref": p.get("id"), "performer": p.get("name"), }, - input_schema=Patient, + min_input_schemas=[Patient], output_schema=Observation, + mode=ValidationMode.STRICT, ) - mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) - # Dict input should be validated and converted dict_input = {"id": "123", "name": "John", "active": True} obs = mapper(dict_input) @@ -184,29 +186,25 @@ def test_dict_input_with_strict_mode(self) -> None: def test_auto_mode(self) -> None: """Test auto mode behavior.""" - # With schemas - should use strict mode - data_mapping_with_schemas = DataMapping( + # With output schema - should use strict mode + mapper_with_schemas = Mapper( transformations={ "subject_ref": p.get("id"), "performer": p.get("name"), }, - input_schema=Patient, + min_input_schemas=[Patient], output_schema=Observation, - ) - - mapper = Mapper(data_mapping_with_schemas) # AUTO mode by default - assert mapper.mode == ValidationMode.STRICT + ) # AUTO mode by default + assert mapper_with_schemas.mode == ValidationMode.STRICT # Without schemas - should use flexible mode - data_mapping_no_schemas = DataMapping( + mapper_no_schemas: Mapper[Any] = Mapper( transformations={ "subject_ref": p.get("id"), "performer": p.get("name"), } - ) - - mapper2 = Mapper(data_mapping_no_schemas) # AUTO mode by default - assert mapper2.mode == ValidationMode.FLEXIBLE + ) # AUTO mode by default + assert mapper_no_schemas.mode == ValidationMode.FLEXIBLE class TestDataMappingWithoutSchemas: @@ -214,7 +212,7 @@ class TestDataMappingWithoutSchemas: def test_pure_transformation(self) -> None: """Test DataMapping as pure transformation without schemas.""" - data_mapping = DataMapping( + mapper: Mapper[Any] = Mapper( transformations={ "subject_ref": p.get("id"), "performer": p.get("name"), @@ -222,39 +220,38 @@ def test_pure_transformation(self) -> None: ) # Direct transformation - result = data_mapping.transform({"id": "123", "name": "John"}) + result = mapper.transform({"id": "123", "name": "John"}) assert result["subject_ref"] == "123" assert result["performer"] == "John" def test_with_flexible_mapper(self) -> None: """Test DataMapping without schemas using flexible Mapper.""" - data_mapping = DataMapping( + mapper: Mapper[Any] = Mapper( transformations={ "subject_ref": lambda data: f"Patient/{data.get('id', 'unknown')}", "performer": lambda data: data.get("name", "Unknown"), "status": lambda data: "processed", - } + }, + mode=ValidationMode.FLEXIBLE, ) - mapper = Mapper(data_mapping, mode=ValidationMode.FLEXIBLE) - # Should work with incomplete data result = mapper({"id": "123"}) - assert isinstance(result, MapperResult) - assert result.data["subject_ref"] == "Patient/123" - assert result.data["performer"] == "Unknown" - assert result.data["status"] == "processed" + # Without schemas, returns dict directly + assert isinstance(result, dict) + assert result["subject_ref"] == "Patient/123" + assert result["performer"] == "Unknown" + assert result["status"] == "processed" def test_mapper_result_interface(self) -> None: """Test MapperResult interface.""" - data_mapping = DataMapping( + mapper = Mapper( transformations={ "missing_field": p.get("nonexistent"), }, output_schema=Observation, + mode=ValidationMode.FLEXIBLE, ) - - mapper = Mapper(data_mapping, mode=ValidationMode.FLEXIBLE) result = mapper({"id": "123"}) assert isinstance(result, MapperResult) @@ -263,3 +260,133 @@ def test_mapper_result_interface(self) -> None: # Test raise_if_issues with pytest.raises(Exception): result.raise_if_issues() + + +class TestManyToOneMapping: + """Test many-to-one mapping metadata functionality.""" + + def test_min_input_schemas_metadata(self) -> None: + """Test that min_input_schemas is stored as metadata.""" + + class Encounter(BaseModel): + id: str + status: str + period_start: str + + mapper = Mapper( + transformations={ + "subject_ref": lambda data: f"Patient/{data.get('patient_id', 'unknown')}", + "encounter_ref": lambda data: f"Encounter/{data.get('encounter_id', 'unknown')}", + "status": lambda data: data.get("status", "unknown"), + }, + min_input_schemas=[Patient, Encounter], + output_schema=Observation, + ) + + # Verify metadata is stored + assert mapper.min_input_schemas == [Patient, Encounter] + assert len(mapper.min_input_schemas) == 2 + + def test_other_input_schemas_metadata(self) -> None: + """Test that other_input_schemas is stored as metadata.""" + + class Encounter(BaseModel): + id: str + status: str + + class Practitioner(BaseModel): + id: str + name: str + + mapper = Mapper( + transformations={ + "subject_ref": p.get("patient_id"), + "performer": p.get("practitioner_name"), + "encounter_ref": p.get("encounter_id"), + }, + min_input_schemas=[Patient], + other_input_schemas=[Encounter, Practitioner], + output_schema=Observation, + ) + + # Verify metadata is stored + assert mapper.min_input_schemas == [Patient] + # Type: ignore for mypy - local classes are BaseModel subclasses + assert mapper.other_input_schemas == [Encounter, Practitioner] # type: ignore[list-item] + assert len(mapper.other_input_schemas) == 2 + + def test_metadata_not_enforced_at_runtime(self) -> None: + """Test that input schemas are not enforced during transformation.""" + + class CompletelyDifferentModel(BaseModel): + foo: str + bar: int + + mapper = Mapper( + transformations={ + "subject_ref": lambda data: f"Patient/{data.get('totally_different_field', '123')}", + "performer": lambda data: "Dr. Smith", + }, + min_input_schemas=[CompletelyDifferentModel], # This is just metadata + output_schema=Observation, + mode=ValidationMode.STRICT, + ) + + # Can pass any dict, not enforced to match CompletelyDifferentModel + result = mapper( + {"totally_different_field": "xyz", "some_other_field": "ignored"} + ) + + assert isinstance(result, Observation) + assert result.subject_ref == "Patient/xyz" + assert result.performer == "Dr. Smith" + + def test_empty_schemas_lists(self) -> None: + """Test DataMapping with empty or None schema lists.""" + # Test with None (should default to empty lists) + mapper1: Mapper[Any] = Mapper( + transformations={ + "field1": p.get("source1"), + }, + min_input_schemas=None, + other_input_schemas=None, + ) + + assert mapper1.min_input_schemas == [] + assert mapper1.other_input_schemas == [] + + # Test with explicit empty lists + mapper2: Mapper[Any] = Mapper( + transformations={ + "field2": p.get("source2"), + }, + min_input_schemas=[], + other_input_schemas=[], + ) + + assert mapper2.min_input_schemas == [] + assert mapper2.other_input_schemas == [] + + def test_has_schemas_only_checks_output(self) -> None: + """Test that has_schemas only checks for output_schema.""" + # With min_input_schemas but no output_schema + mapper1: Mapper[Any] = Mapper( + transformations={"field": p.get("source")}, + min_input_schemas=[Patient], + ) + assert not mapper1.has_schemas + + # With output_schema + mapper2 = Mapper( + transformations={"field": p.get("source")}, + output_schema=Observation, + ) + assert mapper2.has_schemas + + # With both min_input_schemas and output_schema + mapper3 = Mapper( + transformations={"field": p.get("source")}, + min_input_schemas=[Patient], + output_schema=Observation, + ) + assert mapper3.has_schemas diff --git a/tests/test_mapper.py b/tests/test_mapper.py index 0cd70ee..4de5b77 100644 --- a/tests/test_mapper.py +++ b/tests/test_mapper.py @@ -1,4 +1,4 @@ -"""Tests for Mapper as independent dict->dict transformer and DataMapping executor.""" +"""Tests for Mapper as independent dict->dict transformer and validation engine.""" from typing import Any @@ -6,7 +6,7 @@ from pydantic import BaseModel import chidian.partials as p -from chidian import DataMapping, Mapper, MapperResult, ValidationMode, get +from chidian import Mapper, MapperResult, ValidationMode, get class TestMapperBasic: @@ -18,7 +18,7 @@ def test_simple_dict_mapping(self) -> None: "patient_id": p.get("data.patient.id"), "is_active": p.get("data.patient.active"), } - mapper = Mapper(mapping) + mapper: Mapper[Any] = Mapper(mapping) input_data = { "data": {"patient": {"id": "abc123", "active": True}, "other": "value"} @@ -38,7 +38,7 @@ def test_callable_mapping(self) -> None: "status": lambda data: "processed", } - mapper = Mapper(mapping) + mapper: Mapper[Any] = Mapper(mapping) input_data = { "data": {"patient": {"id": "abc123", "active": True}, "other": "value"} @@ -86,7 +86,7 @@ def backup_name_transform(data: dict) -> str: "backup_name": backup_name_transform, } - mapper = Mapper(mapping) + mapper: Mapper[Any] = Mapper(mapping) input_data = { "firstName": "John", @@ -127,7 +127,7 @@ def test_mapper_with_dict_mapping_containing_callable(self) -> None: "transformed": lambda data: data.get("value", "").upper(), "partial": p.get("nested.value") | p.upper, } - mapper = Mapper(mapping) + mapper: Mapper[Any] = Mapper(mapping) input_data = { "path": {"to": {"value": "hello"}}, @@ -148,14 +148,14 @@ def failing_mapper(data: dict) -> str: raise ValueError("Test error") mapping: dict[str, Any] = {"result": failing_mapper} - mapper = Mapper(mapping) + mapper: Mapper[Any] = Mapper(mapping) with pytest.raises(ValueError, match="Test error"): mapper({"test": "data"}) def test_mapper_with_empty_mapping(self) -> None: """Test Mapper with empty mapping.""" - mapper = Mapper({}) + mapper: Mapper[Any] = Mapper({}) result = mapper({"input": "data"}) assert result == {} @@ -168,7 +168,7 @@ def test_mapper_with_constant_values(self) -> None: "constant_none": None, "dynamic_value": p.get("input.value"), } - mapper = Mapper(mapping) + mapper: Mapper[Any] = Mapper(mapping) input_data = {"input": {"value": "dynamic"}, "ignored": "data"} result = mapper(input_data) @@ -192,7 +192,7 @@ def nested_transform(data: dict) -> dict: "nested": nested_transform, } - mapper = Mapper(mapping) + mapper: Mapper[Any] = Mapper(mapping) input_data = {"simple": {"value": "test"}, "another": {"path": "nested_test"}} @@ -209,7 +209,7 @@ class TestMapperCalling: def test_mapper_callable_interface(self) -> None: """Test that Mapper can be called directly.""" mapping = {"output": p.get("input")} - mapper = Mapper(mapping) + mapper: Mapper[Any] = Mapper(mapping) input_data = {"input": "test_value"} result = mapper(input_data) @@ -219,7 +219,7 @@ def test_mapper_callable_interface(self) -> None: def test_mapper_callable_only(self) -> None: """Test that Mapper only has __call__ method (no forward method).""" mapping = {"output": p.get("input")} - mapper = Mapper(mapping) + mapper: Mapper[Any] = Mapper(mapping) input_data = {"input": "test_value"} @@ -233,7 +233,7 @@ def test_mapper_callable_only(self) -> None: def test_mapper_no_reverse(self) -> None: """Test that Mapper doesn't support reverse operations.""" mapping = {"output": p.get("input")} - mapper = Mapper(mapping) + mapper: Mapper[Any] = Mapper(mapping) # Should not have reverse method assert not hasattr(mapper, "reverse") @@ -242,18 +242,77 @@ def test_mapper_no_reverse(self) -> None: assert not hasattr(mapper, "can_reverse") -class TestMapperWithDataMapping: - """Test new Mapper functionality with DataMapping.""" +class TestMapperNewSyntax: + """Test Mapper with new ergonomic syntax from README.""" + + def test_readme_example(self) -> None: + """Test the exact example from README with new syntax.""" + from pydantic import BaseModel + + source_data = { + "name": { + "first": "Gandalf", + "given": ["the", "Grey"], + "suffix": None, + }, + "address": { + "street": ["Bag End", "Hobbiton"], + "city": "The Shire", + "postal_code": "ME001", + "country": "Middle Earth", + }, + } + + class SourceSchema(BaseModel): + name: dict + address: dict + + class TargetSchema(BaseModel): + full_name: str + address: str + + person_mapping = Mapper( + { + "full_name": p.get( + [ + "name.first", + "name.given[*]", + "name.suffix", + ] + ).join(" ", flatten=True), + "address": p.get( + [ + "address.street[*]", + "address.city", + "address.postal_code", + "address.country", + ] + ).join("\n", flatten=True), + }, + min_input_schemas=[SourceSchema], + output_schema=TargetSchema, + ) + + source_obj = SourceSchema(**source_data) + result = person_mapping(source_obj) + + assert isinstance(result, TargetSchema) + assert result.full_name == "Gandalf the Grey" + assert result.address == "Bag End\nHobbiton\nThe Shire\nME001\nMiddle Earth" + + +class TestMapperWithValidation: + """Test Mapper functionality with validation modes.""" def test_mapper_backward_compatibility(self) -> None: """Test that Mapper maintains backward compatibility with dict.""" # Old-style dict mapping should still work - mapper = Mapper({"output": p.get("input")}) + mapper: Mapper[Any] = Mapper({"output": p.get("input")}) result = mapper({"input": "test"}) assert result == {"output": "test"} def test_mapper_with_data_mapping_strict(self) -> None: - """Test Mapper with DataMapping in strict mode.""" + """Test Mapper with schema validation in strict mode.""" class InputModel(BaseModel): name: str @@ -263,29 +322,30 @@ class OutputModel(BaseModel): display_name: str age_group: str - data_mapping = DataMapping( + mapper = Mapper( transformations={ "display_name": p.get("name") | p.upper, "age_group": lambda d: "adult" if d.get("age", 0) >= 18 else "child", }, - input_schema=InputModel, + min_input_schemas=[InputModel], output_schema=OutputModel, + mode=ValidationMode.STRICT, ) - mapper = Mapper(data_mapping, mode=ValidationMode.STRICT) - # Valid input result = mapper({"name": "John", "age": 25}) assert isinstance(result, OutputModel) assert result.display_name == "JOHN" assert result.age_group == "adult" - # Invalid input should raise - with pytest.raises(Exception): - mapper({"name": "John"}) # Missing age + # Invalid input no longer raises (input validation removed) + # but output validation will fail due to missing field transformation + result2 = mapper({"name": "John", "age": 10}) + assert isinstance(result2, OutputModel) + assert result2.age_group == "child" def test_mapper_with_data_mapping_flexible(self) -> None: - """Test Mapper with DataMapping in flexible mode.""" + """Test Mapper with schema validation in flexible mode.""" class InputModel(BaseModel): name: str @@ -295,57 +355,56 @@ class OutputModel(BaseModel): display_name: str age_group: str - data_mapping = DataMapping( + mapper = Mapper( transformations={ "display_name": p.get("name") | p.upper, "age_group": lambda d: "adult" if d.get("age", 0) >= 18 else "child", }, - input_schema=InputModel, + min_input_schemas=[InputModel], output_schema=OutputModel, + mode=ValidationMode.FLEXIBLE, ) - mapper = Mapper(data_mapping, mode=ValidationMode.FLEXIBLE) - # Valid input result = mapper({"name": "John", "age": 25}) assert isinstance(result, MapperResult) assert not result.has_issues + assert isinstance(result.data, OutputModel) assert result.data.display_name == "JOHN" - # Invalid input should return issues + # Invalid input no longer causes input validation issues + # but missing transformation data causes output issues result = mapper({"name": "John"}) # Missing age assert isinstance(result, MapperResult) - assert result.has_issues - assert any(issue.field == "age" for issue in result.issues) + # Since age is missing, age_group becomes "child" (default 0 < 18) + assert isinstance(result.data, OutputModel) + assert result.data.age_group == "child" def test_mapper_auto_mode(self) -> None: """Test Mapper auto mode selection.""" # With schemas -> strict - data_mapping_with_schemas = DataMapping( + mapper_with_schemas: Mapper[BaseModel] = Mapper( transformations={"out": p.get("in")}, - input_schema=BaseModel, + min_input_schemas=[BaseModel], output_schema=BaseModel, ) - mapper = Mapper(data_mapping_with_schemas) - assert mapper.mode == ValidationMode.STRICT + assert mapper_with_schemas.mode == ValidationMode.STRICT # Without schemas -> flexible - data_mapping_no_schemas = DataMapping(transformations={"out": p.get("in")}) - mapper = Mapper(data_mapping_no_schemas) - assert mapper.mode == ValidationMode.FLEXIBLE + mapper_no_schemas: Mapper[Any] = Mapper(transformations={"out": p.get("in")}) + assert mapper_no_schemas.mode == ValidationMode.FLEXIBLE - def test_mapper_with_pure_data_mapping(self) -> None: - """Test Mapper with DataMapping without schemas.""" - data_mapping = DataMapping( + def test_mapper_with_pure_transformation(self) -> None: + """Test Mapper without schemas.""" + mapper: Mapper[Any] = Mapper( transformations={ "id": p.get("patient.id"), "name": p.get("patient.name"), "provider": p.get("provider.name", default="Unknown"), - } + }, + mode=ValidationMode.FLEXIBLE, ) - mapper = Mapper(data_mapping, mode=ValidationMode.FLEXIBLE) - result = mapper( { "patient": {"id": "123", "name": "John"}, @@ -353,7 +412,8 @@ def test_mapper_with_pure_data_mapping(self) -> None: } ) - assert isinstance(result, MapperResult) - assert result.data["id"] == "123" - assert result.data["name"] == "John" - assert result.data["provider"] == "Dr. Smith" + # Without schemas, returns dict directly + assert isinstance(result, dict) + assert result["id"] == "123" + assert result["name"] == "John" + assert result["provider"] == "Dr. Smith" diff --git a/tests/test_partials.py b/tests/test_partials.py index e840619..e059088 100644 --- a/tests/test_partials.py +++ b/tests/test_partials.py @@ -231,3 +231,97 @@ def test_real_world_usage(): p.get("users[0].score") | p.to_float | p.round_to(0) | p.to_int ) assert get_first_user_score(data) == 86 + + +def test_multi_path_get(): + """Test multi-path get() functionality.""" + data = { + "name": { + "first": "John", + "middle": ["Robert", "James"], + "last": "Doe", + } + } + + # Multi-path extraction returns list of all values + get_names = p.get(["name.first", "name.middle[*]", "name.last"]) + result = get_names(data) + assert result == ["John", "Robert", "James", "Doe"] + + +def test_multi_path_get_with_join(): + """Test multi-path get() with join chaining.""" + data = { + "name": { + "first": "Gandalf", + "given": ["the", "Grey"], + "suffix": None, + } + } + + # Join without flatten - nested lists stay nested + get_name = p.get(["name.first", "name.given", "name.suffix"]).join(" ") + result = get_name(data) + # Result: "Gandalf ['the', 'Grey']" + assert "Gandalf" in result + + # Join with flatten - nested lists are flattened and None filtered + get_name_flat = p.get(["name.first", "name.given[*]", "name.suffix"]).join( + " ", flatten=True + ) + result = get_name_flat(data) + assert result == "Gandalf the Grey" + + +def test_method_chaining_syntax(): + """Test the new . method chaining syntax.""" + data = {"user": {"email": "JOHN.DOE@EXAMPLE.COM", "tags": ["admin", "user"]}} + + # Old pipe syntax still works + process_email_pipe = p.get("user.email") | p.lower | p.split("@") | p.first + assert process_email_pipe(data) == "john.doe" + + # New dot syntax + process_email_dot = p.get("user.email").lower().split("@").first() + assert process_email_dot(data) == "john.doe" + + # Chaining with parameters + process_tags = p.get("user.tags").join(", ") + assert process_tags(data) == "admin, user" + + +def test_join_flatten_nested_lists(): + """Test join() with flatten parameter for nested lists.""" + # Without flatten + data = ["a", ["b", "c"], "d", None] + result = p.join(", ")(data) + assert "a" in result and "d" in result + + # With flatten + result_flat = p.join(", ", flatten=True)(data) + assert result_flat == "a, b, c, d" + + +def test_multi_path_with_address(): + """Test multi-path example from README.""" + data = { + "address": { + "street": ["123 Main St", "Apt 4"], + "city": "Boston", + "postal_code": "02101", + "country": "USA", + } + } + + get_address = p.get( + [ + "address.street[*]", + "address.city", + "address.postal_code", + "address.country", + ] + ).join("\n", flatten=True) + + result = get_address(data) + expected = "123 Main St\nApt 4\nBoston\n02101\nUSA" + assert result == expected diff --git a/tests/test_types.py b/tests/test_types.py index 8b5d347..b4ce275 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -1,8 +1,8 @@ -"""Comprehensive tests for special types (DROP, KEEP) with Mapper and DataMapping.""" +"""Comprehensive tests for special types (DROP, KEEP) with Mapper.""" from typing import Any -from chidian import DROP, KEEP, DataMapping, Mapper, get +from chidian import DROP, KEEP, Mapper, get from tests.structstest import ( ComplexPersonData, FlatPersonData, @@ -53,8 +53,8 @@ def test_keep_complex_values(self) -> None: assert keep_obj.process({}) == complex_data -class TestSeedsWithDataMapping: - """Test special type integration with DataMapping and Mapper.""" +class TestSeedsWithMapper: + """Test special type integration with Mapper.""" def test_simple_data_flow_without_special_types( self, simple_data: dict[str, Any] @@ -67,10 +67,11 @@ def test_simple_data_flow_without_special_types( "is_active": p_get("data.patient.active"), } - data_mapping = DataMapping( - transformations=mapping, input_schema=SourceData, output_schema=SimpleTarget + mapper = Mapper( + transformations=mapping, + min_input_schemas=[SourceData], + output_schema=SimpleTarget, ) - mapper = Mapper(data_mapping) result = mapper(SourceData.model_validate(simple_data)) assert isinstance(result, SimpleTarget) @@ -81,7 +82,7 @@ def test_keep_in_transformation(self) -> None: """Test KEEP objects in data transformations. Note: This test demonstrates that special type processing is not yet implemented - in the current DataMapping/Mapper system. KEEP objects need to be processed + in the current Mapper system. KEEP objects need to be processed to extract their values before Pydantic validation. """ # For now, manually process KEEP objects since automatic processing isn't implemented @@ -92,12 +93,11 @@ def test_keep_in_transformation(self) -> None: "regular_value": lambda _data: "regular_string", } - data_mapping = DataMapping( + mapper = Mapper( transformations=mapping, - input_schema=SourceData, + min_input_schemas=[SourceData], output_schema=KeepTestTarget, ) - mapper = Mapper(data_mapping) source = SourceData(data={}) result = mapper(source) @@ -160,12 +160,11 @@ def last_previous_address_transform(data: dict) -> str: "last_previous_address": last_previous_address_transform, } - data_mapping = DataMapping( + mapper = Mapper( transformations=mapping, - input_schema=ComplexPersonData, + min_input_schemas=[ComplexPersonData], output_schema=FlatPersonData, ) - mapper = Mapper(data_mapping) source = ComplexPersonData.model_validate(test_A) result = mapper(source) diff --git a/uv.lock b/uv.lock index d4833a6..9b7e3c0 100644 --- a/uv.lock +++ b/uv.lock @@ -30,7 +30,7 @@ wheels = [ [[package]] name = "chidian" -version = "0.1.4" +version = "0.1.5" source = { editable = "." } dependencies = [ { name = "parsimonious" },