ericpan64 · ericpan64 · Oct 29, 2025 · Aug 22, 2025 · Aug 22, 2025 · Oct 29, 2025
diff --git a/.gitignore b/.gitignore
@@ -130,3 +130,6 @@ venv/
 .mypy_cache/
 .ruff_cache/
 # END roobert
+
+.pithy/
+.claude*
diff --git a/README.md b/README.md
@@ -2,174 +2,149 @@
 
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
-> Declarative, type-safe data mapping for humans.
+> Declarative, type-safe data mapping for savvy data engineers
 
-chidian is a pure Python framework for composable, readable, and sharable data mappings built on top of **Pydantic v2**.
+**chidian** is a composable framework for building readable data transformations with **Pydantic v2**.
 
-## 30-second tour
+## Quick Start
 ```python
 from pydantic import BaseModel
-from chidian import Mapper, DataMapping
+from chidian import Mapper
 import chidian.partials as p
 
-# 1️⃣ Define your source & target schemas
-class Source(BaseModel):
+# Source data (nested)
+source_data = {
+    "name": {"first": "Gandalf", "given": ["the", "Grey"], "suffix": None},
+    "address": {
+        "street": ["Bag End", "Hobbiton"],
+        "city": "The Shire",
+        "postal_code": "ME001",
+        "country": "Middle Earth"
+    }
+}
+
+# Target data (flat)
+target = {
+    "full_name": "Gandalf the Grey",
+    "address": "Bag End\nHobbiton\nThe Shire\nME001\nMiddle Earth"
+}
+
+# Define schemas
+class SourceSchema(BaseModel):
     name: dict
     address: dict
 
-class Target(BaseModel):
+class TargetSchema(BaseModel):
     full_name: str
     address: str
 
-# 2️⃣ Write pure dict→dict transformation logic with `Mapper`
-fmt = p.template("{} {} {}", skip_none=True)
-
-person_mapper = Mapper(
-    lambda src: {
-        "full_name": fmt(
-            p.get("name.first")(src),
-            p.get("name.given[*]") | p.join(" ")(src),
-            p.get("name.suffix")(src),
-        ),
-        "address": p.get("address") | p.flatten_paths(
-            [
-                "street[0]",
-                "street[1]",
-                "city",
-                "postal_code",
-                "country",
-            ],
-            delimiter="\n",
-        )(src),
-    }
-)
-
-# 3️⃣ Wrap it with `DataMapping` for schema validation
-person_mapping = DataMapping(
-    mapper=person_mapper,
-    input_schema=Source,
-    output_schema=Target,
+# Create type-safe mapper
+person_mapping = Mapper(
+    {
+        "full_name": p.get([
+            "name.first",
+            "name.given[*]",
+            "name.suffix"
+        ]).join(" ", flatten=True),
+
+        "address": p.get([
+            "address.street[*]",
+            "address.city",
+            "address.postal_code",
+            "address.country"
+        ]).join("\n", flatten=True),
+    },
+    min_input_schemas=[SourceSchema],
+    output_schema=TargetSchema,
 )
 
-# 4️⃣ Execute!
-source_obj = Source.model_validate(source_data)
-result = person_mapping.forward(source_obj)
-print(result)
+# Execute
+result = person_mapping(SourceSchema(**source_data))
+assert result == TargetSchema(**target)
 ```
 
-See the [tests](/chidian/tests) for some use-cases.
-
-## Feature highlights
-
-| Feature          | In one line                                                                  |
-| ---------------- | ---------------------------------------------------------------------------- |
-| **Mapper**       | Pure dict→dict runtime transformations – no schema required.                 |
-| **DataMapping**  | Adds Pydantic validation around a `Mapper` for safe, forward-only transforms. |
-| **Partials API** | `|` operator chains (`split | last | upper`) keep lambdas away.           |
-| **Table**        | Lightweight sparse table: path queries, joins, pandas/polars interop.        |
-| **Lexicon**      | Bidirectional code look‑ups *(LOINC ↔ SNOMED)* with defaults + metadata.     |
+## Core Features
 
+| Component        | Purpose                                                                  |
+| ---------------- | ------------------------------------------------------------------------ |
+| **Mapper**       | Dict→dict transformations with optional schema validation                |
+| **DataMapping**  | Pydantic-validated, type-safe transformations                            |
+| **Partials API** | Composable operators for concise transformation chains                   |
+| **Table**        | Sparse tables with path queries, joins, pandas/polars interop           |
+| **Lexicon**      | Bidirectional code lookups (e.g., LOINC ↔ SNOMED) with metadata         |
 
-## Table: DataFrames interoperability
+## Table & DataFrames
 
-The `Table` class provides seamless conversion to pandas and polars DataFrames via optional dependencies:
-
-### Installation
+Seamless conversion between chidian Tables and pandas/polars:
 
 ```bash
-# For pandas support
-pip install 'chidian[pandas]'
-
-# For polars support
-pip install 'chidian[polars]'
-
-# For both
-pip install 'chidian[dfs]'
+pip install 'chidian[pandas]'   # pandas support
+pip install 'chidian[polars]'   # polars support
+pip install 'chidian[df]'       # both
 ```
 
-### Usage
-
 ```python
 from chidian.table import Table
 
-# Create a table
 table = Table([
     {"name": "Alice", "age": 30},
     {"name": "Bob", "age": 25}
 ])
 
-# Convert to pandas (with row keys as index)
-df_pd = table.to_pandas(index=True)        # pandas index from row keys
-
-# Convert to polars (with row keys as column)
-df_pl = table.to_polars(add_index=True)    # polars gets '_index' column
+df_pd = table.to_pandas(index=True)
+df_pl = table.to_polars(add_index=True)
 ```
 
-## Flattening nested data
+### Flatten Nested Data
 
-The `Table` class provides powerful flattening capabilities to convert nested dictionaries and lists into flat, column-based structures using intuitive path notation:
+Convert nested structures into flat, column-based tables:
 
 ```python
-from chidian.table import Table
-
-# Create table with nested data
 table = Table([
     {"user": {"name": "John", "prefs": ["email", "sms"]}, "id": 123},
     {"user": {"name": "Jane", "prefs": ["phone"]}, "id": 456}
 ])
 
-# Flatten nested structures
+# Flatten with intuitive path notation
 flat = table.flatten()
 print(flat.columns)
 # {'id', 'user.name', 'user.prefs[0]', 'user.prefs[1]'}
 
-# Direct export with flattening
-df = table.to_pandas(flatten=True)     # Flat pandas DataFrame
-df = table.to_polars(flatten=True)     # Flat polars DataFrame
-table.to_csv("flat.csv", flatten=True) # Flat CSV with path columns
+# Export flattened data
+table.to_pandas(flatten=True)
+table.to_polars(flatten=True)
+table.to_csv("flat.csv", flatten=True)
 
-# Control flattening depth and array limits
-limited = table.flatten(max_depth=2, array_index_limit=5)
+# Control flattening behavior
+table.flatten(max_depth=2, array_index_limit=5)
 ```
 
-**Key features:**
-- **Intuitive paths**: `user.name`, `items[0]`, `data.settings.theme`
-- **Sparse-friendly**: Different nesting across rows creates union of all paths
-- **Special key handling**: Keys with dots/brackets use bracket notation: `["key.with.dots"]`
-- **Depth control**: Limit recursion to prevent over-flattening
-- **Array limits**: Cap array indices to manage large arrays
-- **Seamless integration**: All Table operations (join, select, group_by) work on flattened data
+**Features:**
+- Path notation: `user.name`, `items[0]`, `data.settings.theme`
+- Handles sparse data (different nesting per row)
+- Special key escaping for dots/brackets
+- Depth and array size controls
 
-## Powered by Pydantic
+## Design Philosophy
 
-chidian treats **Pydantic v2 models as first‑class citizens**:
+Built by data engineers, for data engineers. chidian solves common pain points:
 
-* Validate inputs & outputs automatically with Pydantic v2
-* `DataMapping` wraps your `Mapper` for IDE completion & mypy.
-* You can drop down to plain dicts when prototyping with `strict=False`.
+**Challenges:**
+- Verbose edge-case handling
+- Hard to share one-off code
+- Difficult collaboration on data transformations
 
+**Solutions:**
+- **Iterate over perfection**: Learn and adapt as you build
+- **Functions as first-class objects**: Compose transformations cleanly
+- **JSON-first**: Simple, universal data structures
 
-## Motivation + Philosophy
+chidian applies functional programming principles to data mappings, drawing inspiration from [Pydantic](https://github.com/pydantic/pydantic), [JMESPath](https://github.com/jmespath), [funcy](https://github.com/Suor/funcy), and others.
 
-This is a library for data engineers by a data engineer. Data engineering touches many parts of the stack, and the heuristics for data engineering offer some subtle differences from traditional software engineering.
-
-The goals of the library are:
-1. Make fast, reliable, and readable data mappings
-2. Make it easy to build-on + share pre-existing mappings (so we don't need to start from scratch every time!)
-
-Several challenges come up with traditional data mapping code:
-1. **It's verbose**: Data can be very messy and has a lot of edge cases
-2. **It's hard to share**: Code is often written for one-off use-cases
-3. **It's difficult to collaborate**: Data interoperability becomes more difficult when subtle cases
-
-chidian aims to solve these issues by taking stronger opinions on common operations:
-1. **Prefer iteration over exactness**: With data, we learn as we iterate and use what we need!
-2. **Prefer using functions as objects**: Simplify code by passing functions as first-class objects.
-3. **Prefer JSON-like structures**: No toml, yaml, xml -- just JSON (for now...).
+## Contributing
 
-The heart of chidian is applying [functional programming](https://en.wikipedia.org/wiki/Functional_programming) principles to data mappings.
-Ideas from this repo are inspired from functional programming and other libraries (e.g. [Pydantic](https://github.com/pydantic/pydantic), [JMESPath](https://github.com/jmespath), [funcy](https://github.com/Suor/funcy), [Boomerang](https://github.com/boomerang-lang/boomerang/tree/master), [lens](https://hackage.haskell.org/package/lens), etc.)
+Contributions welcome! Open an issue to discuss your idea before submitting a PR.
 
-## Contributing
+---
 
-All contributions welcome! Please open an Issue and tag me -- I'll make sure to get back to you and we can scope out a PR.
+See [tests](/chidian/tests) for more examples.
diff --git a/chidian/__init__.py b/chidian/__init__.py
@@ -1,5 +1,4 @@
 from .core import get, put
-from .data_mapping import DataMapping
 from .lexicon import Lexicon, LexiconBuilder
 from .lib.get_dsl_parser import parse_path_peg as parse_path
 from .mapper import DROP, KEEP, Mapper, MapperResult, ValidationMode
@@ -12,7 +11,6 @@
     "parse_path",
     "Table",
     "Mapper",
-    "DataMapping",
     "Lexicon",
     "LexiconBuilder",
     "DROP",

diff --git a/chidian/data_mapping.py b/chidian/data_mapping.py
diff --git a/chidian/lib/data_mapping_helpers.py b/chidian/lib/data_mapping_helpers.py
@@ -11,18 +11,6 @@
 _OutModel = TypeVar("_OutModel", bound=BaseModel)
 
 
-def validate_schemas(input_schema: Type, output_schema: Type) -> None:
-    """Validate that schemas are Pydantic BaseModel classes."""
-    if not is_pydantic_model(input_schema):
-        raise TypeError(
-            f"input_schema must be a Pydantic BaseModel, got {type(input_schema)}"
-        )
-    if not is_pydantic_model(output_schema):
-        raise TypeError(
-            f"output_schema must be a Pydantic BaseModel, got {type(output_schema)}"
-        )
-
-
 def is_pydantic_model(model_class: Type) -> bool:
     """Check if a class is a Pydantic BaseModel."""
     try:
@@ -35,19 +23,6 @@ def is_pydantic_model(model_class: Type) -> bool:
         return False
 
 
-def validate_input(data: Any, input_schema: Type[_InModel]) -> _InModel:
-    """Validate input data against input schema."""
-    if isinstance(data, input_schema):
-        return data  # type: ignore[return-value]
-
-    # Try to convert dict to model
-    if isinstance(data, dict):
-        return input_schema.model_validate(data)  # type: ignore[return-value]
-
-    # Try direct validation
-    return input_schema.model_validate(data)  # type: ignore[return-value]
-
-
 def to_dict(model: _InModel) -> dict[str, Any]:
     """Convert Pydantic model to dictionary."""
     return model.model_dump()