From 6059ad70a4b36c51c7cd984307e1d2d7a122b0a7 Mon Sep 17 00:00:00 2001
From: William Ayd <will_ayd@innobi.io>
Date: Sun, 14 Jan 2024 14:02:46 -0500
Subject: [PATCH] pantab 4.0 (#218)

---
 .github/workflows/unit-test.yml               |   2 +
 .gitignore                                    |   2 +
 CMakeLists.txt                                |  52 ++
 CONTRIBUTING.md                               |  16 +-
 environment.yml                               |   4 +-
 meson.build                                   |  33 -
 pantab/__init__.py                            | 113 +--
 pantab/_compat.py                             |   7 -
 pantab/_hyper_util.py                         |  29 -
 pantab/_reader.py                             | 191 ++----
 pantab/_types.py                              |  72 --
 pantab/_writer.py                             | 242 +------
 pantab/src/CMakeLists.txt                     |  30 +
 pantab/src/__init__.py                        |   0
 pantab/src/cffi.h                             |  73 --
 .../{numpy_datetime.c => numpy_datetime.cpp}  |   0
 pantab/src/pantab.c                           |  64 --
 pantab/src/pantab.cpp                         | 619 +++++++++++++++++
 pantab/src/reader.c                           | 204 ------
 pantab/src/reader.h                           |   9 -
 pantab/src/tableauhyperapi.h                  |  83 ---
 pantab/src/type.c                             |  37 -
 pantab/src/type.h                             |  62 --
 pantab/src/writer.c                           | 641 ------------------
 pantab/src/writer.h                           |  10 -
 pantab/tests/conftest.py                      |  80 ++-
 pantab/tests/test_reader.py                   |  71 +-
 pantab/tests/test_roundtrip.py                | 143 +---
 pantab/tests/test_types.py                    |  16 -
 pantab/tests/test_writer.py                   | 146 +---
 pyproject.toml                                |  37 +-
 31 files changed, 934 insertions(+), 2154 deletions(-)
 create mode 100644 CMakeLists.txt
 delete mode 100644 meson.build
 delete mode 100644 pantab/_compat.py
 delete mode 100644 pantab/_hyper_util.py
 create mode 100644 pantab/src/CMakeLists.txt
 create mode 100644 pantab/src/__init__.py
 delete mode 100644 pantab/src/cffi.h
 rename pantab/src/{numpy_datetime.c => numpy_datetime.cpp} (100%)
 delete mode 100644 pantab/src/pantab.c
 create mode 100644 pantab/src/pantab.cpp
 delete mode 100644 pantab/src/reader.c
 delete mode 100644 pantab/src/reader.h
 delete mode 100644 pantab/src/tableauhyperapi.h
 delete mode 100644 pantab/src/type.c
 delete mode 100644 pantab/src/type.h
 delete mode 100644 pantab/src/writer.c
 delete mode 100644 pantab/src/writer.h
 delete mode 100644 pantab/tests/test_types.py

diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
index 979fd6b3..478cf60b 100644
--- a/.github/workflows/unit-test.yml
+++ b/.github/workflows/unit-test.yml
@@ -30,3 +30,5 @@ jobs:
 
       - name: Build wheels for ${{ matrix.os }}
         uses: pypa/cibuildwheel@v2.16.2
+        env:
+          MACOSX_DEPLOYMENT_TARGET: "10.14"
diff --git a/.gitignore b/.gitignore
index f1d85790..326e5ba7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@
 .mypy_cache
 *.hyper
 hyper_db*
+compile_commands.json
+_deps
 
 #########################################
 # Editor temporary/working/backup files #
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000..f5fdb0b4
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,52 @@
+cmake_minimum_required(VERSION 3.18)
+project(${SKBUILD_PROJECT_NAME} LANGUAGES C CXX)
+set(CMAKE_C_STANDARD 17)
+set(CMAKE_C_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+if (MSVC)
+else()
+    add_compile_options(-Wall -Wextra)
+endif()
+
+find_package(Python COMPONENTS Interpreter Development.Module NumPy REQUIRED)
+
+# Detect the installed nanobind package and import it into CMake
+execute_process(
+  COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir
+  OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE NB_DIR)
+list(APPEND CMAKE_PREFIX_PATH "${NB_DIR}")
+find_package(nanobind CONFIG REQUIRED)
+
+if(WIN32)
+  set(TABLEAU_DOWNLOAD_URL "https://downloads.tableau.com/tssoftware//tableauhyperapi-cxx-windows-x86_64-release-main.0.0.18441.r118d57bb.zip")
+elseif(APPLE)
+  set(TABLEAU_DOWNLOAD_URL "https://downloads.tableau.com/tssoftware//tableauhyperapi-cxx-macos-x86_64-release-main.0.0.18441.r118d57bb.zip")
+else()
+  set(TABLEAU_DOWNLOAD_URL "https://downloads.tableau.com/tssoftware//tableauhyperapi-cxx-linux-x86_64-release-main.0.0.18441.r118d57bb.zip")
+endif()
+
+include(FetchContent)
+FetchContent_Declare(
+  tableauhyperapi-cxx
+  URL "${TABLEAU_DOWNLOAD_URL}"
+)
+
+FetchContent_MakeAvailable(tableauhyperapi-cxx)
+list(APPEND CMAKE_PREFIX_PATH "${tableauhyperapi-cxx_SOURCE_DIR}/share/cmake")
+find_package(tableauhyperapi-cxx CONFIG REQUIRED)
+
+
+FetchContent_Declare(nanoarrow-project
+  GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
+  GIT_TAG apache-arrow-nanoarrow-0.3.0
+)
+FetchContent_MakeAvailable(nanoarrow-project)
+
+if (PANTAB_USE_SANITIZERS)
+    add_compile_options(-fsanitize=address -fsanitize=undefined)
+    add_link_options(-fsanitize=address -fsanitize=undefined)
+endif()
+
+add_subdirectory(pantab/src)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e905d5d6..7e399c79 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -50,21 +50,7 @@ git checkout -b a-new-branch
 
 ### Building the Project
 
-To install pantab, simply run:
-
-```sh
-python -m pip install .
-```
-
-From the project root. Because pandas uses meson as a build backend, you can pass options (like building a debug version) via meson command line arguments:
-
-```
-python -m pip install . --config-settings=builddir="debug" --config-settings=setup-args="-Dbuildtype=debug"
-```
-
-At the moment editable installs are not supported.
-
-Please also note that the above will fail without a C compiler - if you don't have one installed check out the appropriate documentation from the [Python Developer Guide](https://devguide.python.org/setup/#compile-and-build) for your platform.
+For an editable install of pantab you can simply run `pip install -ve .` from the project root.
 
 ### Creating tests and running the test suite
 
diff --git a/environment.yml b/environment.yml
index c3addc18..19679ea0 100644
--- a/environment.yml
+++ b/environment.yml
@@ -5,13 +5,15 @@ dependencies:
   - black
   - flake8
   - isort
-  - meson-python
   - mypy
+  - nanobind
   - pandas
+  - pandas-stubs
   - pip
   - pyarrow
   - python
   - pytest
+  - scikit-build-core
   - sphinx
   - pre-commit
   - sphinx_rtd_theme
diff --git a/meson.build b/meson.build
deleted file mode 100644
index 70c06638..00000000
--- a/meson.build
+++ /dev/null
@@ -1,33 +0,0 @@
-project('pantab', 'c')
-
-py = import('python').find_installation(pure: false)
-
-incdir_numpy = run_command(py,
-  [
-    '-c',
-    '''
-import os
-import numpy as np
-try:
-    # Check if include directory is inside the pandas dir
-    # e.g. a venv created inside the pandas dir
-    # If so, convert it to a relative path
-    incdir = os.path.relpath(np.get_include())
-except Exception:
-    incdir = np.get_include()
-print(incdir)
-     '''
-  ],
-  check: true
-).stdout().strip()
-
-inc_np = include_directories(incdir_numpy)
-
-py.extension_module(
-    'libpantab',
-    ['pantab/src/pantab.c', 'pantab/src/numpy_datetime.c',
-     'pantab/src/reader.c', 'pantab/src/type.c',
-     'pantab/src/writer.c'],
-    include_directories: [inc_np],
-    install: true
-)
diff --git a/pantab/__init__.py b/pantab/__init__.py
index 26c0801f..384760a8 100644
--- a/pantab/__init__.py
+++ b/pantab/__init__.py
@@ -1,7 +1,5 @@
-__version__ = "3.0.3"
+__version__ = "4.0.0rc"
 
-import libpantab  # type: ignore
-from tableauhyperapi import __version__ as hyperapi_version
 
 from ._reader import frame_from_hyper, frame_from_hyper_query, frames_from_hyper
 from ._tester import test
@@ -16,112 +14,3 @@
     "frames_to_hyper",
     "test",
 ]
-
-# We link against HyperAPI in a fun way: In Python, we extract the function
-# pointers directly from the Python HyperAPI. We pass those function pointers
-# over to the C module which will then use those pointers to directly interact
-# with HyperAPI. Furthermore, we check the function signatures to guard
-# against API-breaking changes in HyperAPI.
-#
-# Directly using HyperAPI's C functions always was and still is discouraged and
-# unsupported by Tableu. In particular, Tableau will not be able to provide
-# official support for this hack.
-#
-# Because this is highly brittle, we try to make the error message as
-# actionable as possible and guide users in the right direction.
-
-api_incompatibility_msg = """
-pantab is incompatible with version {} of Tableau Hyper API. Please upgrade
-both `tableauhyperapi` and `pantab` to the latest version. See also
-https://pantab.readthedocs.io/en/latest/caveats.html#tableauhyperapi-compatability
-""".format(
-    hyperapi_version
-)
-
-try:
-    from tableauhyperapi.impl.dll import ffi, lib
-except ImportError as e:
-    raise NotImplementedError(api_incompatibility_msg) from e
-
-
-def _check_compatibility(check, message):
-    if not check:
-        raise NotImplementedError(message + "\n" + api_incompatibility_msg)
-
-
-def _get_hapi_function(name, sig):
-    _check_compatibility(hasattr(lib, name), f"function '{name}' missing")
-    f = getattr(lib, name)
-    func_type = ffi.typeof(f)
-    _check_compatibility(
-        func_type.kind == "function",
-        f"expected '{name}' to be a function, got {func_type.kind}",
-    )
-    _check_compatibility(
-        func_type.cname == sig,
-        f"expected '{name}' to have the signature '{sig}', got '{func_type.cname}'",
-    )
-    return f
-
-
-libpantab.load_hapi_functions(
-    _get_hapi_function("hyper_decode_date", "hyper_date_components_t(*)(uint32_t)"),
-    _get_hapi_function("hyper_encode_date", "uint32_t(*)(hyper_date_components_t)"),
-    _get_hapi_function("hyper_decode_time", "hyper_time_components_t(*)(uint64_t)"),
-    _get_hapi_function("hyper_encode_time", "uint64_t(*)(hyper_time_components_t)"),
-    _get_hapi_function(
-        "hyper_inserter_buffer_add_null",
-        "struct hyper_error_t *(*)(struct hyper_inserter_buffer_t *)",
-    ),
-    _get_hapi_function(
-        "hyper_inserter_buffer_add_bool",
-        "struct hyper_error_t *(*)(struct hyper_inserter_buffer_t *, _Bool)",
-    ),
-    _get_hapi_function(
-        "hyper_inserter_buffer_add_int16",
-        "struct hyper_error_t *(*)(struct hyper_inserter_buffer_t *, int16_t)",
-    ),
-    _get_hapi_function(
-        "hyper_inserter_buffer_add_int32",
-        "struct hyper_error_t *(*)(struct hyper_inserter_buffer_t *, int32_t)",
-    ),
-    _get_hapi_function(
-        "hyper_inserter_buffer_add_int64",
-        "struct hyper_error_t *(*)(struct hyper_inserter_buffer_t *, int64_t)",
-    ),
-    _get_hapi_function(
-        "hyper_inserter_buffer_add_double",
-        "struct hyper_error_t *(*)(struct hyper_inserter_buffer_t *, double)",
-    ),
-    _get_hapi_function(
-        "hyper_inserter_buffer_add_binary",
-        (
-            "struct hyper_error_t *(*)"
-            "(struct hyper_inserter_buffer_t *, uint8_t *, size_t)"
-        ),
-    ),
-    _get_hapi_function(
-        "hyper_inserter_buffer_add_raw",
-        (
-            "struct hyper_error_t *(*)(struct hyper_inserter_buffer_t *"
-            ", uint8_t *, size_t)"
-        ),
-    ),
-    _get_hapi_function(
-        "hyper_rowset_get_next_chunk",
-        (
-            "struct hyper_error_t *(*)(struct hyper_rowset_t *"
-            ", struct hyper_rowset_chunk_t * *)"
-        ),
-    ),
-    _get_hapi_function(
-        "hyper_destroy_rowset_chunk", "void(*)(struct hyper_rowset_chunk_t *)"
-    ),
-    _get_hapi_function(
-        "hyper_rowset_chunk_field_values",
-        (
-            "void(*)(struct hyper_rowset_chunk_t *"
-            ", size_t *, size_t *, uint8_t * * *, size_t * *)"
-        ),
-    ),
-)
diff --git a/pantab/_compat.py b/pantab/_compat.py
deleted file mode 100644
index fe03b20b..00000000
--- a/pantab/_compat.py
+++ /dev/null
@@ -1,7 +0,0 @@
-import pandas as pd
-from pandas.util.version import parse
-
-PANDAS_120 = parse(pd.__version__) >= parse("1.2.0")
-PANDAS_130 = parse(pd.__version__) >= parse("1.3.0")
-
-__all__ = ["PANDAS_120", "PANDAS_130"]
diff --git a/pantab/_hyper_util.py b/pantab/_hyper_util.py
deleted file mode 100644
index 98193233..00000000
--- a/pantab/_hyper_util.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from contextlib import nullcontext
-from typing import Optional
-
-import tableauhyperapi as tab_api
-
-
-def ensure_hyper_process(hyper_process: Optional[tab_api.HyperProcess]):
-    """
-    Spawns an adhoc HyperProcess if needed, i.e. if no existing HyperProcess is provided
-
-    Usage:
-    ```
-    with ensure_hyper_process(<HyperProcess or None>) as h:
-        h.execute_query(...)
-    ```
-    """
-    if hyper_process is None:
-        return tab_api.HyperProcess(tab_api.Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU)
-    else:
-        # Wrap the HyperProcess into a nullcontext such that the `with` doesn't close
-        # the HyperProcess
-        return nullcontext(hyper_process)
-
-
-def forbid_hyper_process(hyper_process: Optional[tab_api.HyperProcess]):
-    if hyper_process is not None:
-        raise ValueError(
-            "hyper_process parameter is useless because `Connection` is provided"
-        )
diff --git a/pantab/_reader.py b/pantab/_reader.py
index 4ab43bc4..a85b62d7 100644
--- a/pantab/_reader.py
+++ b/pantab/_reader.py
@@ -3,169 +3,90 @@
 import tempfile
 from typing import Dict, Optional, Union
 
-import libpantab  # type: ignore
-import numpy as np
 import pandas as pd
 import tableauhyperapi as tab_api
 
-import pantab._types as pantab_types
-from pantab._hyper_util import ensure_hyper_process, forbid_hyper_process
+import pantab.src.pantab as libpantab  # type: ignore
 
 TableType = Union[str, tab_api.Name, tab_api.TableName]
 
 
-def _read_query_result(
-    result: tab_api.Result, dtypes: Optional[Dict[str, str]], use_float_na: bool
-) -> pd.DataFrame:
-    if dtypes is None:
-        dtypes = {}
-        # Construct data types from result
-        for column in result.schema.columns:
-            # `result.schema` does not provide nullability information.
-            # Lwt's err on the safe side and always assume they are nullable
-            nullability = tab_api.Nullability.NULLABLE
-            column_type = pantab_types._ColumnType(column.type, nullability)
-            try:
-                dtypes[column.name.unescaped] = pantab_types._get_pandas_type(
-                    column_type
-                )
-            except KeyError as e:
-                raise TypeError(
-                    f"Column {column.name} has unsupported datatype {column.type} "
-                    f"with nullability {column.nullability}"
-                ) from e
-
-    # if the use_float_na flag is set to False
-    # then switch Float32/Float64 dtypes back to float32/float64
-    # to support np.nan rather than pd.NA
-    if not use_float_na:
-        for column, col_type in dtypes.items():
-            if col_type == "Float64":
-                dtypes[column] = "float64"
-            elif col_type == "Float32":
-                dtypes[column] = "float32"
-
-    # Call native library to read tuples from result set
-    dtype_strs = tuple(dtypes.values())
-    df = pd.DataFrame(libpantab.read_hyper_query(result._Result__cdata, dtype_strs))
-    if df.empty:
-        return pd.DataFrame({col: pd.Series(dtype="object") for col in dtypes})
-    df.columns = dtypes.keys()
-    # TODO: remove this hackery...
-    for k, v in dtypes.items():
-        if v == "date":
-            dtypes[k] = "datetime64[ns]"
-    date_types = ["datetime64[ns, UTC]", "datetime64[ns]"]
-    for col in df.select_dtypes(include=date_types):
-        df[col] = df[col].dt.tz_localize(None)
-    for col in df.select_dtypes(exclude=date_types):
-        df[col] = df[col].astype(dtypes[col])
-
-    df = df.fillna(value=np.nan)  # Replace any appearances of None
-
-    return df
-
-
-def _read_table(
-    *, connection: tab_api.Connection, table: TableType, use_float_na: bool
-) -> pd.DataFrame:
-    if isinstance(table, str):
-        table = tab_api.TableName(table)
-
-    table_def = connection.catalog.get_table_definition(table)
-    columns = table_def.columns
-
-    dtypes: Dict[str, str] = {}
-    for column in columns:
-        column_type = pantab_types._ColumnType(column.type, column.nullability)
-        try:
-            dtypes[column.name.unescaped] = pantab_types._get_pandas_type(column_type)
-        except KeyError as e:
-            raise TypeError(
-                f"Column {column.name} has unsupported datatype {column.type} "
-                f"with nullability {column.nullability}"
-            ) from e
-
-    query = f"SELECT * from {table}"
-    with connection.execute_query(query) as result:
-        return _read_query_result(result, dtypes, use_float_na)
-
-
 def frame_from_hyper(
-    source: Union[str, pathlib.Path, tab_api.Connection],
+    source: Union[str, pathlib.Path],
     *,
     table: TableType,
-    hyper_process: Optional[tab_api.HyperProcess] = None,
-    use_float_na: bool = False,
 ) -> pd.DataFrame:
     """See api.rst for documentation"""
+    if isinstance(table, (str, tab_api.Name)) or not table.schema_name:
+        table = tab_api.TableName("public", table)
+
+    data, columns, dtypes = libpantab.read_from_hyper_table(
+        str(source),
+        table.schema_name.name.unescaped,  # TODO: this probably allows injection
+        table.name.unescaped,
+    )
+    df = pd.DataFrame(data, columns=columns)
+    dtype_map = {k: v for k, v in zip(columns, dtypes) if v != "datetime64[ns, UTC]"}
+    df = df.astype(dtype_map)
+
+    tz_aware_columns = {
+        col for col, dtype in zip(columns, dtypes) if dtype == "datetime64[ns, UTC]"
+    }
+    for col in tz_aware_columns:
+        try:
+            df[col] = df[col].dt.tz_localize("UTC")
+        except AttributeError:  # happens when df[col] is empty
+            df[col] = df[col].astype("datetime64[ns, UTC]")
 
-    if isinstance(source, tab_api.Connection):
-        forbid_hyper_process(hyper_process)
-        return _read_table(connection=source, table=table, use_float_na=use_float_na)
-    else:
-        with tempfile.TemporaryDirectory() as tmp_dir, ensure_hyper_process(
-            hyper_process
-        ) as hpe:
-            tmp_db = shutil.copy(source, tmp_dir)
-            with tab_api.Connection(hpe.endpoint, tmp_db) as connection:
-                return _read_table(
-                    connection=connection, table=table, use_float_na=use_float_na
-                )
+    return df
 
 
 def frames_from_hyper(
-    source: Union[str, pathlib.Path, tab_api.Connection],
-    *,
-    hyper_process: Optional[tab_api.HyperProcess] = None,
-    use_float_na: bool = False,
+    source: Union[str, pathlib.Path],
 ) -> Dict[tab_api.TableName, pd.DataFrame]:
     """See api.rst for documentation."""
     result: Dict[TableType, pd.DataFrame] = {}
 
-    if isinstance(source, tab_api.Connection):
-        forbid_hyper_process(hyper_process)
-        connection = source
-        for schema in connection.catalog.get_schema_names():
-            for table in connection.catalog.get_table_names(schema=schema):
-                result[table] = _read_table(
-                    connection=connection, table=table, use_float_na=use_float_na
-                )
-    else:
-        with tempfile.TemporaryDirectory() as tmp_dir, ensure_hyper_process(
-            hyper_process
-        ) as hpe:
-            tmp_db = shutil.copy(source, tmp_dir)
-            with tab_api.Connection(hpe.endpoint, tmp_db) as connection:
-                for schema in connection.catalog.get_schema_names():
-                    for table in connection.catalog.get_table_names(schema=schema):
-                        result[table] = _read_table(
-                            connection=connection,
-                            table=table,
-                            use_float_na=use_float_na,
-                        )
+    table_names = []
+    with tempfile.TemporaryDirectory() as tmp_dir, tab_api.HyperProcess(
+        tab_api.Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU
+    ) as hpe:
+        tmp_db = shutil.copy(source, tmp_dir)
+        with tab_api.Connection(hpe.endpoint, tmp_db) as connection:
+            for schema in connection.catalog.get_schema_names():
+                for table in connection.catalog.get_table_names(schema=schema):
+                    table_names.append(table)
+
+    for table in table_names:
+        result[table] = frame_from_hyper(
+            source=source,
+            table=table,
+        )
 
     return result
 
 
 def frame_from_hyper_query(
-    source: Union[str, pathlib.Path, tab_api.Connection],
+    source: Union[str, pathlib.Path],
     query: str,
     *,
     hyper_process: Optional[tab_api.HyperProcess] = None,
-    use_float_na: bool = False,
 ) -> pd.DataFrame:
     """See api.rst for documentation."""
+    # Call native library to read tuples from result set
+    df = pd.DataFrame(libpantab.read_from_hyper_query(str(source), query))
+    data, columns, dtypes = libpantab.read_from_hyper_query(str(source), query)
+    df = pd.DataFrame(data, columns=columns)
+    dtype_map = {k: v for k, v in zip(columns, dtypes) if v != "datetime64[ns, UTC]"}
+    df = df.astype(dtype_map)
+
+    tz_aware_columns = {
+        col for col, dtype in zip(columns, dtypes) if dtype == "datetime64[ns, UTC]"
+    }
+    for col in tz_aware_columns:
+        try:
+            df[col] = df[col].dt.tz_localize("UTC")
+        except AttributeError:  # happens when df[col] is empty
+            df[col] = df[col].astype("datetime64[ns, UTC]")
 
-    if isinstance(source, tab_api.Connection):
-        forbid_hyper_process(hyper_process)
-        with source.execute_query(query) as result:
-            return _read_query_result(result, None, use_float_na)
-    else:
-        with tempfile.TemporaryDirectory() as tmp_dir, ensure_hyper_process(
-            hyper_process
-        ) as hpe:
-            tmp_db = shutil.copy(source, tmp_dir)
-            with tab_api.Connection(hpe.endpoint, tmp_db) as connection:
-                with connection.execute_query(query) as result:
-                    return _read_query_result(result, None, use_float_na)
+    return df
diff --git a/pantab/_types.py b/pantab/_types.py
index 25aa7c13..3bbfc232 100644
--- a/pantab/_types.py
+++ b/pantab/_types.py
@@ -1,77 +1,5 @@
-import collections
 from typing import Union
 
 import tableauhyperapi as tab_api
 
-import pantab._compat as compat
-
-# The Hyper API as of writing doesn't offer great hashability for column comparison
-# so we create out namedtuple for that purpose
-_ColumnType = collections.namedtuple("_ColumnType", ["type_", "nullability"])
-
 TableType = Union[str, tab_api.Name, tab_api.TableName]
-
-_column_types = {
-    "int16": _ColumnType(tab_api.SqlType.small_int(), tab_api.Nullability.NOT_NULLABLE),
-    "int32": _ColumnType(tab_api.SqlType.int(), tab_api.Nullability.NOT_NULLABLE),
-    "int64": _ColumnType(tab_api.SqlType.big_int(), tab_api.Nullability.NOT_NULLABLE),
-    "Int16": _ColumnType(tab_api.SqlType.small_int(), tab_api.Nullability.NULLABLE),
-    "Int32": _ColumnType(tab_api.SqlType.int(), tab_api.Nullability.NULLABLE),
-    "Int64": _ColumnType(tab_api.SqlType.big_int(), tab_api.Nullability.NULLABLE),
-    "float32": _ColumnType(tab_api.SqlType.double(), tab_api.Nullability.NULLABLE),
-    "float64": _ColumnType(tab_api.SqlType.double(), tab_api.Nullability.NULLABLE),
-    "bool": _ColumnType(tab_api.SqlType.bool(), tab_api.Nullability.NOT_NULLABLE),
-    "datetime64[ns]": _ColumnType(
-        tab_api.SqlType.timestamp(), tab_api.Nullability.NULLABLE
-    ),
-    "datetime64[ns, UTC]": _ColumnType(
-        tab_api.SqlType.timestamp_tz(), tab_api.Nullability.NULLABLE
-    ),
-    "timedelta64[ns]": _ColumnType(
-        tab_api.SqlType.interval(), tab_api.Nullability.NULLABLE
-    ),
-    "object": _ColumnType(tab_api.SqlType.text(), tab_api.Nullability.NULLABLE),
-}
-
-_column_types["string"] = _ColumnType(
-    tab_api.SqlType.text(), tab_api.Nullability.NULLABLE
-)
-_column_types["boolean"] = _ColumnType(
-    tab_api.SqlType.bool(), tab_api.Nullability.NULLABLE
-)
-
-if compat.PANDAS_120:
-    _column_types["Float32"] = _ColumnType(
-        tab_api.SqlType.double(), tab_api.Nullability.NULLABLE
-    )
-    _column_types["Float64"] = _ColumnType(
-        tab_api.SqlType.double(), tab_api.Nullability.NULLABLE
-    )
-
-
-# Invert this, but exclude float32 as that does not roundtrip
-_pandas_types = {v: k for k, v in _column_types.items() if k != "float32"}
-
-# Add things that we can't write to Hyper but can read
-_pandas_types[
-    _ColumnType(tab_api.SqlType.date(), tab_api.Nullability.NULLABLE)
-] = "date"
-_pandas_types[
-    _ColumnType(tab_api.SqlType.double(), tab_api.Nullability.NOT_NULLABLE)
-] = "float64"
-_pandas_types[
-    _ColumnType(tab_api.SqlType.text(), tab_api.Nullability.NOT_NULLABLE)
-] = "string"
-
-
-def _get_pandas_type(column_type: _ColumnType) -> str:
-    if column_type in _pandas_types:
-        return _pandas_types[column_type]
-
-    if column_type.type_.tag == tab_api.TypeTag.VARCHAR:
-        return "string"
-
-    raise KeyError(
-        f"Column has unsupported datatype {column_type.type_} "
-        f"with nullability {column_type.nullability}"
-    )
diff --git a/pantab/_writer.py b/pantab/_writer.py
index f5674626..0068979b 100644
--- a/pantab/_writer.py
+++ b/pantab/_writer.py
@@ -1,39 +1,15 @@
-import itertools
-import os
 import pathlib
 import shutil
 import tempfile
 import uuid
-from typing import Dict, List, Optional, Sequence, Tuple, Union
+from typing import Dict, Optional, Union
 
-import libpantab  # type: ignore
-import numpy as np
 import pandas as pd
+import pyarrow as pa
 import tableauhyperapi as tab_api
 
-import pantab._compat as compat
 import pantab._types as pantab_types
-from pantab._hyper_util import ensure_hyper_process
-
-
-def _pandas_to_tableau_type(typ: str) -> pantab_types._ColumnType:
-    try:
-        return pantab_types._column_types[typ]
-    except KeyError:
-        raise TypeError("Conversion of '{}' dtypes not supported!".format(typ))
-
-
-def _timedelta_to_interval(td: pd.Timedelta) -> Optional[tab_api.Interval]:
-    """Converts a pandas Timedelta to tableau Hyper API implementation."""
-    if pd.isnull(td):
-        return None
-
-    days = td.days
-    without_days = td - pd.Timedelta(days=days)
-    total_seconds = int(without_days.total_seconds())
-    microseconds = total_seconds * 1_000_000
-
-    return tab_api.Interval(months=0, days=days, microseconds=microseconds)
+import pantab.src.pantab as libpantab  # type: ignore
 
 
 def _validate_table_mode(table_mode: str) -> None:
@@ -41,193 +17,18 @@ def _validate_table_mode(table_mode: str) -> None:
         raise ValueError("'table_mode' must be either 'w' or 'a'")
 
 
-def _assert_columns_equal(
-    left: Sequence[tab_api.TableDefinition.Column],
-    right: Sequence[tab_api.TableDefinition.Column],
-) -> None:
-    """
-    Helper function to validate if sequences of columns are equal.
-
-    The TableauHyperAPI as of 0.0.8953 does not implement equality operations
-    for Column instances, hence the need for this.
-    """
-
-    class DummyColumn:
-        """Dummy class to match items needed for str repr of columns."""
-
-        @property
-        def name(self):
-            return None
-
-        @property
-        def type(self):
-            return None
-
-        @property
-        def nullability(self):
-            return None
-
-    for c1, c2 in itertools.zip_longest(left, right, fillvalue=DummyColumn()):
-        if c1.name != c2.name or c1.type != c2.type or c1.nullability != c2.nullability:
-            break  # go to error handler
-    else:
-        return None  # everything matched up, so bail out
-
-    c1_str = ", ".join(
-        f"(Name={x.name}, Type={x.type}, Nullability={x.nullability})" for x in left
-    )
-    c2_str = ", ".join(
-        f"(Name={x.name}, Type={x.type}, Nullability={x.nullability})" for x in right
-    )
-
-    raise TypeError(f"Mismatched column definitions: {c1_str} != {c2_str}")
-
-
-def _maybe_convert_timedelta(df: pd.DataFrame) -> Tuple[pd.DataFrame, Tuple[str, ...]]:
-    """
-    Hyper uses a different storage format than pandas / Python for timedeltas.
-
-    Ultimately this should be pushed to the C extension, but doesn't look to fully work
-    at the moment anyway so keep in Python until complete.
-    """
-    orig_dtypes = tuple(map(str, df.dtypes))
-    deltas = df.select_dtypes(include=["timedelta64[ns]"])
-
-    if deltas.empty:
-        pass
-    else:
-        df = df.copy()
-
-        for index, (_, content) in enumerate(df.items()):
-            if content.dtype == "timedelta64[ns]":
-                df.iloc[:, index] = content.apply(_timedelta_to_interval)
-
-    return df, orig_dtypes
-
-
-def _maybe_convert_utctimestamp(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Hyper implements a subset of postgres and doesn't implement timezone-aware datetimes
-    Thus, we localize to timezone-naive
-    """
-    for utc_col in df.select_dtypes("datetime64[ns, UTC]"):
-        df[utc_col] = df[utc_col].dt.tz_convert(None)
-    return df
-
-
-def _insert_frame(
-    df: pd.DataFrame,
-    *,
-    connection: tab_api.Connection,
-    table: pantab_types.TableType,
-    table_mode: str,
-    use_parquet: bool,
-) -> None:
-    _validate_table_mode(table_mode)
-
-    if isinstance(table, str):
-        table = tab_api.TableName(table)
-
-    # Populate insertion mechanisms dependent on column types
-    column_types: List[pantab_types._ColumnType] = []
-    columns: List[tab_api.TableDefinition.Column] = []
-    for col_name, dtype in df.dtypes.items():
-        column_type = _pandas_to_tableau_type(dtype.name)
-        column_types.append(column_type)
-        columns.append(
-            tab_api.TableDefinition.Column(
-                name=col_name,
-                type=column_type.type_,
-                nullability=column_type.nullability,
-            )
-        )
-
-    # Sanity check for existing table structures
-    if table_mode == "a" and connection.catalog.has_table(table):
-        table_def = connection.catalog.get_table_definition(table)
-        _assert_columns_equal(columns, table_def.columns)
-    else:  # New table, potentially new schema
-        table_def = tab_api.TableDefinition(table)
-
-        for column, column_type in zip(columns, column_types):
-            table_def.add_column(column)
-
-        if isinstance(table, tab_api.TableName) and table.schema_name:
-            connection.catalog.create_schema_if_not_exists(table.schema_name)
-
-        connection.catalog.create_table_if_not_exists(table_def)
-
-    if not use_parquet:
-        null_mask = np.ascontiguousarray(pd.isnull(df))
-        # Special handling for conversions
-        df, dtypes = _maybe_convert_timedelta(df)
-
-        with tab_api.Inserter(connection, table_def) as inserter:
-            if compat.PANDAS_130:
-                df = _maybe_convert_utctimestamp(df)
-                libpantab.write_to_hyper(df, null_mask, inserter._buffer, dtypes)
-            else:
-                libpantab.write_to_hyper_legacy(
-                    df.itertuples(index=False, name=None),
-                    null_mask,
-                    inserter._buffer,
-                    df.shape[1],
-                    dtypes,
-                )
-            inserter.execute()
-    else:
-        if any(x.name == "timedelta64[ns]" for x in df.dtypes):
-            raise ValueError(
-                "Writing timedelta values with use_parquet=True is not yet supported."
-            )
-
-        import pyarrow as pa
-        import pyarrow.parquet as pq
-
-        tbl = pa.Table.from_pandas(df)
-        non_nullable = {"int16", "int32", "int64", "bool"}
-        new_fields = []
-        for field, dtype in zip(tbl.schema, df.dtypes):
-            if dtype.name in non_nullable:
-                new_fields.append(
-                    pa.field(name=field.name, type=field.type, nullable=False)
-                )
-            else:
-                new_fields.append(field)
-
-        new_schema = pa.schema(new_fields)
-        tbl = tbl.cast(new_schema)
-
-        # Windows can't read and write a NamedTemporaryFile in one pass
-        with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
-            pq.write_table(tbl, tmp)
-
-        connection.execute_command(
-            f"COPY {table} FROM '{tmp.name}' WITH (FORMAT 'parquet')"
-        )
-
-        try:
-            os.unlink(tmp.name)
-        except FileNotFoundError:
-            pass
-
-
 def frame_to_hyper(
     df: pd.DataFrame,
     database: Union[str, pathlib.Path],
     *,
     table: pantab_types.TableType,
     table_mode: str = "w",
-    hyper_process: Optional[tab_api.HyperProcess] = None,
-    use_parquet: bool = False,
 ) -> None:
     """See api.rst for documentation"""
     frames_to_hyper(
         {table: df},
         database,
         table_mode,
-        hyper_process=hyper_process,
-        use_parquet=use_parquet,
     )
 
 
@@ -237,29 +38,28 @@ def frames_to_hyper(
     table_mode: str = "w",
     *,
     hyper_process: Optional[tab_api.HyperProcess] = None,
-    use_parquet: bool = False,
 ) -> None:
     """See api.rst for documentation."""
     _validate_table_mode(table_mode)
 
-    with ensure_hyper_process(hyper_process) as hpe:
-        tmp_db = pathlib.Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.hyper"
+    tmp_db = pathlib.Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.hyper"
+
+    if table_mode == "a" and pathlib.Path(database).exists():
+        shutil.copy(database, tmp_db)
+
+    def convert_to_table_name(table: pantab_types.TableType):
+        # nanobind expects a tuple of (schema, table) strings
+        if isinstance(table, (str, tab_api.Name)) or not table.schema_name:
+            table = tab_api.TableName("public", table)
 
-        if table_mode == "a" and pathlib.Path(database).exists():
-            shutil.copy(database, tmp_db)
+        return (table.schema_name.name.unescaped, table.name.unescaped)
 
-        with tab_api.Connection(
-            hpe.endpoint, tmp_db, tab_api.CreateMode.CREATE_IF_NOT_EXISTS
-        ) as connection:
-            for table, df in dict_of_frames.items():
-                _insert_frame(
-                    df,
-                    connection=connection,
-                    table=table,
-                    table_mode=table_mode,
-                    use_parquet=use_parquet,
-                )
+    data = {
+        convert_to_table_name(key): pa.Table.from_pandas(val)
+        for key, val in dict_of_frames.items()
+    }
+    libpantab.write_to_hyper(data, path=str(tmp_db), table_mode=table_mode)
 
-        # In Python 3.9+ we can just pass the path object, but due to bpo 32689
-        # and subsequent typeshed changes it is easier to just pass as str for now
-        shutil.move(str(tmp_db), database)
+    # In Python 3.9+ we can just pass the path object, but due to bpo 32689
+    # and subsequent typeshed changes it is easier to just pass as str for now
+    shutil.move(str(tmp_db), database)
diff --git a/pantab/src/CMakeLists.txt b/pantab/src/CMakeLists.txt
new file mode 100644
index 00000000..4fc881ad
--- /dev/null
+++ b/pantab/src/CMakeLists.txt
@@ -0,0 +1,30 @@
+nanobind_add_module(pantab NOMINSIZE pantab.cpp numpy_datetime.cpp)
+target_include_directories(pantab PUBLIC ${Python_NumPy_INCLUDE_DIRS})
+target_link_libraries(pantab
+  PRIVATE Tableau::tableauhyperapi-cxx
+  PRIVATE nanoarrow
+)
+set_target_properties(nanoarrow
+                      PROPERTIES POSITION_INDEPENDENT_CODE
+                      ON)
+
+install(TARGETS pantab
+  LIBRARY DESTINATION ${SKBUILD_PROJECT_NAME}/src)
+
+if(WIN32)
+  set(HYPERAPI_LIB_NAME "tableauhyperapi.lib")
+  set(HYPERAPI_BIN_LOC "bin/hyper")
+elseif(APPLE)
+  set(HYPERAPI_LIB_NAME "libtableauhyperapi.dylib")
+  set(HYPERAPI_BIN_LOC "lib/hyper")
+else()
+  set(HYPERAPI_LIB_NAME "libtableauhyperapi.so")
+  set(HYPERAPI_BIN_LOC "lib/hyper")
+endif()
+
+# Auditwheel doesn't know how to handle the cmake dependencies
+# so we manually install here and exclude from auditwheel
+install(FILES ${tableauhyperapi-cxx_SOURCE_DIR}/lib/${HYPERAPI_LIB_NAME}
+   DESTINATION ${SKBUILD_PROJECT_NAME}/src)
+install(DIRECTORY "${tableauhyperapi-cxx_SOURCE_DIR}/${HYPERAPI_BIN_LOC}/"
+  DESTINATION ${SKBUILD_PROJECT_NAME}/src/hyper)
diff --git a/pantab/src/__init__.py b/pantab/src/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pantab/src/cffi.h b/pantab/src/cffi.h
deleted file mode 100644
index 4e4f0cdb..00000000
--- a/pantab/src/cffi.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/* This header file is copied directly from cffi to allow interaction
-with cffi C-level objects without including the entire library.
-
-cffi is licensed under the MIT license, with originaly copyright included
-below:
-
-Except when otherwise stated (look for LICENSE files in directories or
-information at the beginning of each file) all software and
-documentation is licensed as follows:
-
-    The MIT License
-
-    Permission is hereby granted, free of charge, to any person
-    obtaining a copy of this software and associated documentation
-    files (the "Software"), to deal in the Software without
-    restriction, including without limitation the rights to use,
-    copy, modify, merge, publish, distribute, sublicense, and/or
-    sell copies of the Software, and to permit persons to whom the
-    Software is furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be included
-    in all copies or substantial portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-*/
-
-#ifndef PANTAB_CFFI_H
-#define PANTAB_CFFI_H
-
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-
-typedef struct _ctypedescr {
-  PyObject_VAR_HEAD
-
-      struct _ctypedescr *ct_itemdescr; /* ptrs and arrays: the item type */
-  PyObject *ct_stuff;                   /* structs: dict of the fields
-                                           arrays: ctypedescr of the ptr type
-                                           function: tuple(abi, ctres, ctargs..)
-                                           enum: pair {"name":x},{x:"name"}
-                                           ptrs: lazily, ctypedescr of array */
-  void *ct_extra;                       /* structs: first field (not a ref!)
-                                           function types: cif_description
-                                           primitives: prebuilt "cif" object */
-
-  PyObject *ct_weakreflist; /* weakref support */
-
-  PyObject *ct_unique_key; /* key in unique_cache (a string, but not
-                              human-readable) */
-
-  Py_ssize_t ct_size;   /* size of instances, or -1 if unknown */
-  Py_ssize_t ct_length; /* length of arrays, or -1 if unknown;
-                           or alignment of primitive and struct types;
-                           always -1 for pointers */
-  int ct_flags;         /* CT_xxx flags */
-
-  int ct_name_position; /* index in ct_name of where to put a var name */
-  char ct_name[1];      /* string, e.g. "int *" for pointers to ints */
-} CTypeDescrObject;
-
-typedef struct {
-  PyObject_HEAD CTypeDescrObject *c_type;
-  char *c_data;
-  PyObject *c_weakreflist;
-} CDataObject;
-
-#endif
diff --git a/pantab/src/numpy_datetime.c b/pantab/src/numpy_datetime.cpp
similarity index 100%
rename from pantab/src/numpy_datetime.c
rename to pantab/src/numpy_datetime.cpp
diff --git a/pantab/src/pantab.c b/pantab/src/pantab.c
deleted file mode 100644
index 8a9e06b4..00000000
--- a/pantab/src/pantab.c
+++ /dev/null
@@ -1,64 +0,0 @@
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-#define PY_ARRAY_UNIQUE_SYMBOL PANTAB_ARRAY_API
-#include <numpy/arrayobject.h>
-
-#include "cffi.h"
-#include "reader.h"
-#include "tableauhyperapi.h"
-#include "writer.h"
-
-// Function pointers, initialized by `load_hapi_functions` function
-#define C(RET, NAME, ARGS) RET(*NAME) ARGS = NULL;
-HYPERAPI_FUNCTIONS(C)
-#undef C
-
-static PyObject *load_hapi_functions(PyObject *Py_UNUSED(dummy),
-                                     PyObject *args) {
-  bool ok;
-#define C(RET, NAME, ARGS) PyObject *NAME##_arg;
-  HYPERAPI_FUNCTIONS(C)
-#undef C
-  const char *formatStr =
-#define C(RET, NAME, ARGS) "O"
-      HYPERAPI_FUNCTIONS(C)
-#undef C
-      ;
-
-  ok = PyArg_ParseTuple(args, formatStr
-#define C(RET, NAME, ARGS) , &NAME##_arg
-                                  HYPERAPI_FUNCTIONS(C)
-#undef C
-  );
-  if (!ok)
-    return NULL;
-
-    // TODO: check that we get an instance of CDataObject; else will
-    // segfault
-#define C(RET, NAME, ARGS)                                                     \
-  NAME = (RET(*) ARGS)(((CDataObject *)NAME##_arg)->c_data);
-  HYPERAPI_FUNCTIONS(C)
-#undef C
-
-  Py_RETURN_NONE;
-}
-
-static PyMethodDef methods[] = {
-    {"load_hapi_functions", load_hapi_functions, METH_VARARGS,
-     "Initializes the HyperAPI functions used by pantab."},
-    {"write_to_hyper_legacy", write_to_hyper_legacy, METH_VARARGS,
-     "Legacy method to Write a numpy array to a hyper file."},
-    {"write_to_hyper", write_to_hyper, METH_VARARGS,
-     "Writes a dataframe array to a hyper file."},
-    {"read_hyper_query", read_hyper_query, METH_VARARGS,
-     "Reads a hyper query from a given connection."},
-    {NULL, NULL, 0, NULL}};
-
-static struct PyModuleDef pantabmodule = {.m_base = PyModuleDef_HEAD_INIT,
-                                          .m_name = "libpantab",
-                                          .m_methods = methods};
-
-PyMODINIT_FUNC PyInit_libpantab(void) {
-  import_array();
-  return PyModule_Create(&pantabmodule);
-}
diff --git a/pantab/src/pantab.cpp b/pantab/src/pantab.cpp
new file mode 100644
index 00000000..6cdac616
--- /dev/null
+++ b/pantab/src/pantab.cpp
@@ -0,0 +1,619 @@
+#include <chrono>
+#include <cstddef>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <tuple>
+#include <vector>
+
+#include <hyperapi/hyperapi.hpp>
+#include <hyperapi/impl/Inserter.impl.hpp>
+#include <nanoarrow/nanoarrow.hpp>
+#include <nanobind/nanobind.h>
+#include <nanobind/stl/chrono.h>
+#include <nanobind/stl/map.h>
+#include <nanobind/stl/optional.h>
+#include <nanobind/stl/string.h>
+#include <nanobind/stl/tuple.h>
+#include <nanobind/stl/vector.h>
+
+#include "numpy_datetime.h"
+
+namespace nb = nanobind;
+
+using Dtype = std::tuple<int, int, std::string, std::string>;
+
+enum TimeUnit { SECOND, MILLI, MICRO, NANO };
+
+static hyperapi::SqlType hyperTypeFromArrowSchema(struct ArrowSchema *schema,
+                                                  ArrowError *error) {
+  struct ArrowSchemaView schema_view;
+  if (ArrowSchemaViewInit(&schema_view, schema, error) != 0) {
+    throw std::runtime_error("Issue converting to hyper type: " +
+                             std::string(error->message));
+  }
+
+  switch (schema_view.type) {
+  case NANOARROW_TYPE_INT16:
+    return hyperapi::SqlType::smallInt();
+  case NANOARROW_TYPE_INT32:
+    return hyperapi::SqlType::integer();
+  case NANOARROW_TYPE_INT64:
+    return hyperapi::SqlType::bigInt();
+  case NANOARROW_TYPE_FLOAT:
+  case NANOARROW_TYPE_DOUBLE:
+    return hyperapi::SqlType::doublePrecision();
+  case NANOARROW_TYPE_BOOL:
+    return hyperapi::SqlType::boolean();
+  case NANOARROW_TYPE_STRING:
+  case NANOARROW_TYPE_LARGE_STRING:
+    return hyperapi::SqlType::text();
+  case NANOARROW_TYPE_TIMESTAMP:
+    if (std::strcmp("", schema_view.timezone)) {
+      return hyperapi::SqlType::timestampTZ();
+    } else {
+      return hyperapi::SqlType::timestamp();
+    }
+  default:
+    throw std::invalid_argument("Unsupported Arrow type: " +
+                                std::to_string(schema_view.type));
+  }
+}
+
+class InsertHelper {
+public:
+  InsertHelper(std::shared_ptr<hyperapi::Inserter> inserter,
+               const struct ArrowArray *chunk, const struct ArrowSchema *schema,
+               struct ArrowError *error, int64_t column_position)
+      : inserter_(inserter), chunk_(chunk), schema_(schema), error_(error),
+        column_position_(column_position) {}
+
+  virtual ~InsertHelper() {}
+
+  void Init() {
+    struct ArrowSchema *child_schema = schema_->children[column_position_];
+
+    if (ArrowArrayViewInitFromSchema(&array_view_, child_schema, error_) != 0) {
+      throw std::runtime_error("Could not construct insert helper: " +
+                               std::string{error_->message});
+    }
+
+    if (ArrowArrayViewSetArray(&array_view_, chunk_->children[column_position_],
+                               error_) != 0) {
+      throw std::runtime_error("Could not set array view: " +
+                               std::string{error_->message});
+    }
+  }
+
+  virtual void insertValueAtIndex(size_t) {}
+
+protected:
+  std::shared_ptr<hyperapi::Inserter> inserter_;
+  const struct ArrowArray *chunk_;
+  const struct ArrowSchema *schema_;
+  struct ArrowError *error_;
+  const int64_t column_position_;
+  struct ArrowArrayView array_view_;
+};
+
+template <typename T> class PrimitiveInsertHelper : public InsertHelper {
+public:
+  using InsertHelper::InsertHelper;
+
+  void insertValueAtIndex(size_t idx) override {
+    if (ArrowArrayViewIsNull(&array_view_, idx)) {
+      // MSVC on cibuildwheel doesn't like this templated optional
+      // inserter_->add(std::optional<T>{std::nullopt});
+      hyperapi::internal::ValueInserter{*inserter_}.addNull();
+      return;
+    }
+    constexpr size_t elem_size = sizeof(T);
+    T result;
+    memcpy(&result,
+           array_view_.buffer_views[1].data.as_uint8 + (idx * elem_size),
+           elem_size);
+    inserter_->add(result);
+  }
+};
+
+template <typename OffsetT> class Utf8InsertHelper : public InsertHelper {
+public:
+  using InsertHelper::InsertHelper;
+
+  void insertValueAtIndex(size_t idx) override {
+    if (ArrowArrayViewIsNull(&array_view_, idx)) {
+      // MSVC on cibuildwheel doesn't like this templated optional
+      // inserter_->add(std::optional<std:::string_view>{std::nullopt});
+      hyperapi::internal::ValueInserter{*inserter_}.addNull();
+      return;
+    }
+
+    struct ArrowBufferView buffer_view =
+        ArrowArrayViewGetBytesUnsafe(&array_view_, idx);
+    auto result = std::string{buffer_view.data.as_char,
+                              static_cast<size_t>(buffer_view.size_bytes)};
+    inserter_->add(result);
+  }
+};
+
+template <enum TimeUnit TU, bool TZAware>
+class TimestampInsertHelper : public InsertHelper {
+public:
+  using InsertHelper::InsertHelper;
+
+  void insertValueAtIndex(size_t idx) override {
+    constexpr size_t elem_size = sizeof(int64_t);
+    if (ArrowArrayViewIsNull(&array_view_, idx)) {
+      // MSVC on cibuildwheel doesn't like this templated optional
+      // inserter_->add(std::optional<timestamp_t>{std::nullopt});
+      hyperapi::internal::ValueInserter{*inserter_}.addNull();
+      return;
+    }
+    int64_t value;
+
+    memcpy(&value,
+           array_view_.buffer_views[1].data.as_uint8 + (idx * elem_size),
+           elem_size);
+
+    // using timestamp_t =
+    //    typename std::conditional<TZAware, hyperapi::OffsetTimestamp,
+    //                              hyperapi::Timestamp>::type;
+
+    // TODO: need overflow checks here
+    npy_datetimestruct dts;
+    PyArray_DatetimeMetaData meta;
+    if constexpr (TU == TimeUnit::SECOND) {
+      meta = {NPY_FR_s, 1};
+    } else if constexpr (TU == TimeUnit::MILLI) {
+      meta = {NPY_FR_ms, 1};
+    } else if constexpr (TU == TimeUnit::MICRO) {
+      meta = {NPY_FR_us, 1};
+    } else if constexpr (TU == TimeUnit::NANO) {
+      // we assume pandas is ns here but should check format
+      meta = {NPY_FR_ns, 1};
+    }
+
+    int ret = convert_datetime_to_datetimestruct(&meta, value, &dts);
+    if (ret != 0) {
+      throw std::invalid_argument("could not convert datetime value ");
+    }
+    hyperapi::Date dt{static_cast<int32_t>(dts.year),
+                      static_cast<int16_t>(dts.month),
+                      static_cast<int16_t>(dts.day)};
+    hyperapi::Time time{static_cast<int8_t>(dts.hour),
+                        static_cast<int8_t>(dts.min),
+                        static_cast<int8_t>(dts.sec), dts.us};
+
+    if constexpr (TZAware) {
+      hyperapi::OffsetTimestamp ts{dt, time, std::chrono::minutes{0}};
+      inserter_->add<hyperapi::OffsetTimestamp>(ts);
+
+    } else {
+      hyperapi::Timestamp ts{dt, time};
+      inserter_->add<hyperapi::Timestamp>(ts);
+    }
+  }
+};
+
+static std::unique_ptr<InsertHelper>
+makeInsertHelper(std::shared_ptr<hyperapi::Inserter> inserter,
+                 struct ArrowArray *chunk, struct ArrowSchema *schema,
+                 struct ArrowError *error, int64_t column_position) {
+  // TODO: we should provide the full dtype here not just format string, so
+  // boolean fields can determine whether they are bit or byte masks
+
+  // right now we pass false as the template paramter to the
+  // PrimitiveInsertHelper as that is all pandas generates; other libraries may
+  // need the true variant
+  struct ArrowSchemaView schema_view;
+  if (ArrowSchemaViewInit(&schema_view, schema->children[column_position],
+                          error) != 0) {
+    throw std::runtime_error("Issue generating insert helper: " +
+                             std::string(error->message));
+  }
+
+  switch (schema_view.type) {
+  case NANOARROW_TYPE_INT16:
+    return std::unique_ptr<InsertHelper>(new PrimitiveInsertHelper<int16_t>(
+        inserter, chunk, schema, error, column_position));
+  case NANOARROW_TYPE_INT32:
+    return std::unique_ptr<InsertHelper>(new PrimitiveInsertHelper<int32_t>(
+        inserter, chunk, schema, error, column_position));
+  case NANOARROW_TYPE_INT64:
+    return std::unique_ptr<InsertHelper>(new PrimitiveInsertHelper<int64_t>(
+        inserter, chunk, schema, error, column_position));
+  case NANOARROW_TYPE_FLOAT:
+    return std::unique_ptr<InsertHelper>(new PrimitiveInsertHelper<float>(
+        inserter, chunk, schema, error, column_position));
+  case NANOARROW_TYPE_DOUBLE:
+    return std::unique_ptr<InsertHelper>(new PrimitiveInsertHelper<double>(
+        inserter, chunk, schema, error, column_position));
+  case NANOARROW_TYPE_BOOL:
+    return std::unique_ptr<InsertHelper>(new PrimitiveInsertHelper<bool>(
+        inserter, chunk, schema, error, column_position));
+  case NANOARROW_TYPE_STRING:
+  case NANOARROW_TYPE_LARGE_STRING:
+    return std::unique_ptr<InsertHelper>(new Utf8InsertHelper<int64_t>(
+        inserter, chunk, schema, error, column_position));
+  case NANOARROW_TYPE_TIMESTAMP:
+    switch (schema_view.time_unit) {
+    case NANOARROW_TIME_UNIT_SECOND:
+      if (std::strcmp("", schema_view.timezone)) {
+        return std::unique_ptr<InsertHelper>(
+            new TimestampInsertHelper<TimeUnit::SECOND, true>(
+                inserter, chunk, schema, error, column_position));
+      } else {
+        return std::unique_ptr<InsertHelper>(
+            new TimestampInsertHelper<TimeUnit::SECOND, false>(
+                inserter, chunk, schema, error, column_position));
+      }
+    case NANOARROW_TIME_UNIT_MILLI:
+      if (std::strcmp("", schema_view.timezone)) {
+        return std::unique_ptr<InsertHelper>(
+            new TimestampInsertHelper<TimeUnit::MILLI, true>(
+                inserter, chunk, schema, error, column_position));
+      } else {
+        return std::unique_ptr<InsertHelper>(
+            new TimestampInsertHelper<TimeUnit::MILLI, false>(
+                inserter, chunk, schema, error, column_position));
+      }
+    case NANOARROW_TIME_UNIT_MICRO:
+      if (std::strcmp("", schema_view.timezone)) {
+        return std::unique_ptr<InsertHelper>(
+            new TimestampInsertHelper<TimeUnit::MICRO, true>(
+                inserter, chunk, schema, error, column_position));
+      } else {
+        return std::unique_ptr<InsertHelper>(
+            new TimestampInsertHelper<TimeUnit::MICRO, false>(
+                inserter, chunk, schema, error, column_position));
+      }
+    case NANOARROW_TIME_UNIT_NANO:
+      if (std::strcmp("", schema_view.timezone)) {
+        return std::unique_ptr<InsertHelper>(
+            new TimestampInsertHelper<TimeUnit::NANO, true>(
+                inserter, chunk, schema, error, column_position));
+      } else {
+        return std::unique_ptr<InsertHelper>(
+            new TimestampInsertHelper<TimeUnit::NANO, false>(
+                inserter, chunk, schema, error, column_position));
+      }
+    }
+    throw std::runtime_error(
+        "This code block should not be hit - contact a developer");
+  default:
+    throw std::invalid_argument("makeInsertHelper: Unsupported Arrow type: " +
+                                std::to_string(schema_view.type));
+  }
+}
+
+using SchemaAndTableName = std::tuple<std::string, std::string>;
+
+void write_to_hyper(
+    const std::map<SchemaAndTableName, nb::object> &dict_of_exportable,
+    const std::string &path, const std::string &table_mode) {
+  hyperapi::HyperProcess hyper{
+      hyperapi::Telemetry::DoNotSendUsageDataToTableau};
+
+  // TODO: we don't have separate table / database create modes in the API
+  // but probably should; for now we infer this from table mode
+  const auto createMode = table_mode == "w"
+                              ? hyperapi::CreateMode::CreateAndReplace
+                              : hyperapi::CreateMode::CreateIfNotExists;
+
+  hyperapi::Connection connection{hyper.getEndpoint(), path, createMode};
+  const hyperapi::Catalog &catalog = connection.getCatalog();
+
+  for (auto const &[schema_and_table, exportable] : dict_of_exportable) {
+    const auto hyper_schema = std::get<0>(schema_and_table);
+    const auto hyper_table = std::get<1>(schema_and_table);
+    auto arrow_c_stream = nb::getattr(exportable, "__arrow_c_stream__")();
+
+    PyObject *obj = arrow_c_stream.ptr();
+    if (!PyCapsule_CheckExact(obj)) {
+      throw std::invalid_argument("Object does not provide capsule");
+    }
+    auto c_stream = static_cast<struct ArrowArrayStream *>(
+        PyCapsule_GetPointer(obj, "arrow_array_stream"));
+    auto stream = nanoarrow::UniqueArrayStream{c_stream};
+
+    struct ArrowSchema schema;
+    if (stream->get_schema(stream.get(), &schema) != 0) {
+      std::string error_msg{stream->get_last_error(stream.get())};
+      throw std::runtime_error("Could not read from arrow schema:" + error_msg);
+    }
+
+    struct ArrowError error;
+    auto names_vec = std::vector<std::string>{};
+    std::vector<hyperapi::TableDefinition::Column> hyper_columns;
+
+    for (int64_t i = 0; i < schema.n_children; i++) {
+      const auto hypertype =
+          hyperTypeFromArrowSchema(schema.children[i], &error);
+      const auto name = std::string{schema.children[i]->name};
+      names_vec.push_back(name);
+
+      // Almost all arrow types are nullable
+      hyper_columns.push_back(hyperapi::TableDefinition::Column{
+          name, hypertype, hyperapi::Nullability::Nullable});
+    }
+
+    hyperapi::TableName table_name{hyper_schema, hyper_table};
+    hyperapi::TableDefinition tableDef{table_name, hyper_columns};
+    catalog.createSchemaIfNotExists(*table_name.getSchemaName());
+    if (table_mode == "w") {
+      catalog.createTable(tableDef);
+    } else if (table_mode == "a") {
+      catalog.createTableIfNotExists(tableDef);
+    }
+    auto inserter = std::make_shared<hyperapi::Inserter>(connection, tableDef);
+
+    struct ArrowArray chunk;
+    int errcode;
+    while ((errcode = stream->get_next(stream.get(), &chunk) == 0) &&
+           chunk.release != NULL) {
+      const int nrows = chunk.length;
+      if (nrows < 0) {
+        throw std::runtime_error("Unexpected array length < 0");
+      }
+
+      std::vector<std::unique_ptr<InsertHelper>> insert_helpers;
+      for (int64_t i = 0; i < schema.n_children; i++) {
+        // the lifetime of the inserthelper cannot exceed that of chunk or
+        // schema this is implicit; we should make this explicit
+        auto insert_helper =
+            makeInsertHelper(inserter, &chunk, &schema, &error, i);
+
+        insert_helper->Init();
+        insert_helpers.push_back(std::move(insert_helper));
+      }
+
+      for (int64_t row_idx = 0; row_idx < nrows; row_idx++) {
+        for (const auto &insert_helper : insert_helpers) {
+          insert_helper->insertValueAtIndex(row_idx);
+        }
+        inserter->endRow();
+      }
+    }
+
+    inserter->execute();
+  }
+}
+
+class ReadHelper {
+public:
+  ReadHelper() {}
+  virtual ~ReadHelper() {}
+  virtual nb::object Read(const hyperapi::Value &) { return nb::none(); }
+};
+
+class IntegralReadHelper : public ReadHelper {
+  nb::object Read(const hyperapi::Value &value) {
+    if (value.isNull()) {
+      return nb::none();
+    }
+    return nb::int_(value.get<int64_t>());
+  }
+};
+
+class FloatReadHelper : public ReadHelper {
+  nb::object Read(const hyperapi::Value &value) {
+    if (value.isNull()) {
+      return nb::none();
+    }
+    return nb::float_(value.get<double>());
+  }
+};
+
+class BooleanReadHelper : public ReadHelper {
+  nb::object Read(const hyperapi::Value &value) {
+    // TODO: bool support added in nanobind >= 1..9.0
+    // return nb::bool_(value.get<bool>());
+    if (value.isNull()) {
+      return nb::none();
+    }
+    return nb::int_(value.get<bool>());
+  }
+};
+
+class StringReadHelper : public ReadHelper {
+  nb::object Read(const hyperapi::Value &value) {
+    if (value.isNull()) {
+      return nb::none();
+    }
+    return nb::str(value.get<std::string>().c_str());
+  }
+};
+
+class DateReadHelper : public ReadHelper {
+  nb::object Read(const hyperapi::Value &value) {
+    if (value.isNull()) {
+      return nb::none();
+    }
+
+    const auto hyper_date = value.get<hyperapi::Date>();
+    const auto year = hyper_date.getYear();
+    const auto month = hyper_date.getMonth();
+    const auto day = hyper_date.getDay();
+
+    PyObject *result = PyDate_FromDate(year, month, day);
+    if (result == nullptr) {
+      throw std::invalid_argument("could not parse date");
+    }
+    return nb::object(result, nb::detail::steal_t{});
+  }
+};
+
+template <bool TZAware> class DatetimeReadHelper : public ReadHelper {
+  nb::object Read(const hyperapi::Value &value) {
+    if (value.isNull()) {
+      return nb::none();
+    }
+
+    using timestamp_t =
+        typename std::conditional<TZAware, hyperapi::OffsetTimestamp,
+                                  hyperapi::Timestamp>::type;
+    const auto hyper_ts = value.get<timestamp_t>();
+    const auto hyper_date = hyper_ts.getDate();
+    const auto hyper_time = hyper_ts.getTime();
+    const auto year = hyper_date.getYear();
+    const auto month = hyper_date.getMonth();
+    const auto day = hyper_date.getDay();
+    const auto hour = hyper_time.getHour();
+    const auto min = hyper_time.getMinute();
+    const auto sec = hyper_time.getSecond();
+    const auto usec = hyper_time.getMicrosecond();
+
+    PyObject *result =
+        PyDateTime_FromDateAndTime(year, month, day, hour, min, sec, usec);
+    if (result == nullptr) {
+      throw std::invalid_argument("could not parse timestamp");
+    }
+    return nb::object(result, nb::detail::steal_t{});
+  }
+};
+
+static std::unique_ptr<ReadHelper> makeReadHelper(hyperapi::SqlType sqltype) {
+  if ((sqltype == hyperapi::SqlType::smallInt()) ||
+      (sqltype == hyperapi::SqlType::integer()) ||
+      (sqltype == hyperapi::SqlType::bigInt())) {
+    return std::unique_ptr<ReadHelper>(new IntegralReadHelper());
+  } else if (sqltype == hyperapi::SqlType::doublePrecision()) {
+    return std::unique_ptr<ReadHelper>(new FloatReadHelper());
+  } else if ((sqltype == hyperapi::SqlType::text())) {
+    return std::unique_ptr<ReadHelper>(new StringReadHelper());
+  } else if (sqltype == hyperapi::SqlType::boolean()) {
+    return std::unique_ptr<ReadHelper>(new BooleanReadHelper());
+  } else if (sqltype == hyperapi::SqlType::date()) {
+    return std::unique_ptr<ReadHelper>(new DateReadHelper());
+  } else if (sqltype == hyperapi::SqlType::timestamp()) {
+    return std::unique_ptr<ReadHelper>(new DatetimeReadHelper<false>());
+  } else if (sqltype == hyperapi::SqlType::timestampTZ()) {
+    return std::unique_ptr<ReadHelper>(new DatetimeReadHelper<true>());
+  }
+
+  throw nb::type_error(("cannot read sql type: " + sqltype.toString()).c_str());
+}
+
+static std::string pandasDtypeFromHyper(const hyperapi::SqlType &sqltype) {
+  if (sqltype == hyperapi::SqlType::smallInt()) {
+    return "int16[pyarrow]";
+  } else if (sqltype == hyperapi::SqlType::integer()) {
+    return "int32[pyarrow]";
+  } else if (sqltype == hyperapi::SqlType::bigInt()) {
+    return "int64[pyarrow]";
+  } else if (sqltype == hyperapi::SqlType::doublePrecision()) {
+    return "double[pyarrow]";
+  } else if (sqltype == hyperapi::SqlType::text()) {
+    return "string[pyarrow]";
+  } else if (sqltype == hyperapi::SqlType::boolean()) {
+    return "boolean[pyarrow]";
+  } else if (sqltype == hyperapi::SqlType::timestamp()) {
+    return "timestamp[us][pyarrow]";
+  } else if (sqltype == hyperapi::SqlType::timestampTZ()) {
+    return "timestamp[us, UTC][pyarrow]";
+  } else if (sqltype == hyperapi::SqlType::date()) {
+    return "date32[pyarrow]";
+  }
+
+  throw nb::type_error(
+      ("unimplemented pandas dtype for type: " + sqltype.toString()).c_str());
+}
+
+using ColumnNames = std::vector<std::string>;
+using ResultBody = std::vector<std::vector<nb::object>>;
+// In a future version of pantab it would be nice to not require pandas dtypes
+// However, the current reader just creates PyObjects and loses that information
+// when passing back to the Python runtime; hence the explicit passing
+using PandasDtypes = std::vector<std::string>;
+///
+/// read_from_hyper_query is slightly different than read_from_hyper_table
+/// because the former detects a schema from the hyper Result object
+/// which does not hold nullability information
+///
+std::tuple<ResultBody, ColumnNames, PandasDtypes>
+read_from_hyper_query(const std::string &path, const std::string &query) {
+  std::vector<std::vector<nb::object>> result;
+  hyperapi::HyperProcess hyper{
+      hyperapi::Telemetry::DoNotSendUsageDataToTableau};
+  hyperapi::Connection connection(hyper.getEndpoint(), path);
+
+  std::vector<std::string> columnNames;
+  std::vector<std::string> pandasDtypes;
+  std::vector<std::unique_ptr<ReadHelper>> read_helpers;
+
+  hyperapi::Result hyperResult = connection.executeQuery(query);
+  const auto resultSchema = hyperResult.getSchema();
+  for (const auto &column : resultSchema.getColumns()) {
+    read_helpers.push_back(makeReadHelper(column.getType()));
+    auto name = column.getName().getUnescaped();
+    columnNames.push_back(name);
+
+    // the query result set does not tell us if columns are nullable or not
+    auto const sqltype = column.getType();
+    pandasDtypes.push_back(pandasDtypeFromHyper(sqltype));
+  }
+  for (const hyperapi::Row &row : hyperResult) {
+    std::vector<nb::object> rowdata;
+    size_t column_idx = 0;
+    for (const hyperapi::Value &value : row) {
+      const auto &read_helper = read_helpers[column_idx];
+      rowdata.push_back(read_helper->Read(value));
+      column_idx++;
+    }
+    result.push_back(rowdata);
+  }
+
+  return std::make_tuple(result, columnNames, pandasDtypes);
+}
+
+std::tuple<ResultBody, ColumnNames, PandasDtypes>
+read_from_hyper_table(const std::string &path, const std::string &schema,
+                      const std::string &table) {
+  std::vector<std::vector<nb::object>> result;
+  hyperapi::HyperProcess hyper{
+      hyperapi::Telemetry::DoNotSendUsageDataToTableau};
+  hyperapi::Connection connection(hyper.getEndpoint(), path);
+  hyperapi::TableName extractTable{schema, table};
+  const hyperapi::Catalog &catalog = connection.getCatalog();
+  const hyperapi::TableDefinition tableDef =
+      catalog.getTableDefinition(extractTable);
+
+  std::vector<std::string> columnNames;
+  std::vector<std::string> pandasDtypes;
+  std::vector<std::unique_ptr<ReadHelper>> read_helpers;
+
+  for (auto &column : tableDef.getColumns()) {
+    read_helpers.push_back(makeReadHelper(column.getType()));
+    auto name = column.getName().getUnescaped();
+    columnNames.push_back(name);
+
+    auto const sqltype = column.getType();
+    pandasDtypes.push_back(pandasDtypeFromHyper(sqltype));
+  }
+
+  hyperapi::Result hyperResult =
+      connection.executeQuery("SELECT * FROM " + extractTable.toString());
+  for (const hyperapi::Row &row : hyperResult) {
+    std::vector<nb::object> rowdata;
+    size_t column_idx = 0;
+    for (const hyperapi::Value &value : row) {
+      const auto &read_helper = read_helpers[column_idx];
+      rowdata.push_back(read_helper->Read(value));
+      column_idx++;
+    }
+    result.push_back(rowdata);
+  }
+
+  return std::make_tuple(result, columnNames, pandasDtypes);
+}
+
+NB_MODULE(pantab, m) {
+  m.def("write_to_hyper", &write_to_hyper, nb::arg("dict_of_exportable"),
+        nb::arg("path"), nb::arg("table_mode"))
+      .def("read_from_hyper_query", &read_from_hyper_query, nb::arg("path"),
+           nb::arg("query"))
+      .def("read_from_hyper_table", &read_from_hyper_table, nb::arg("path"),
+           nb::arg("schema"), nb::arg("table"));
+  PyDateTime_IMPORT;
+}
diff --git a/pantab/src/reader.c b/pantab/src/reader.c
deleted file mode 100644
index 5f7e1131..00000000
--- a/pantab/src/reader.c
+++ /dev/null
@@ -1,204 +0,0 @@
-#include "cffi.h"
-#include "type.h"
-#include <datetime.h>
-
-static PyObject *cls_timedelta = NULL;
-
-// the pointer to size is only used if receiving a character array
-static PyObject *read_value(const uint8_t *value, DTYPE dtype,
-                            const size_t *size) {
-  if (PyErr_CheckSignals()) {
-    return NULL;
-  }
-
-  switch (dtype) {
-  case INT16_:
-  case INT16NA:
-    return PyLong_FromLong(*((int16_t *)value));
-  case INT32_:
-  case INT32NA:
-    return PyLong_FromLong(*((int32_t *)value));
-  case INT64_:
-  case INT64NA:
-    return PyLong_FromLongLong(*((int64_t *)value));
-
-  case BOOLEAN:
-  case BOOLEANNA:
-    return PyBool_FromLong(*value);
-
-  case FLOAT32_:
-  case FLOAT64_:
-  case FLOAT32NA:
-  case FLOAT64NA:
-    return PyFloat_FromDouble(*((double *)value));
-
-  case STRING:
-  case OBJECT:
-    return PyUnicode_FromStringAndSize((const char *)value, *size);
-
-  case DATE: {
-    hyper_date_components_t date = hyper_decode_date(*((hyper_date_t *)value));
-    return PyDate_FromDate(date.year, date.month, date.day);
-  }
-
-  case DATETIME64_NS:
-  case DATETIME64_NS_UTC: {
-    hyper_time_t val = *((hyper_time_t *)value);
-
-    hyper_date_t encoded_date =
-        (hyper_date_t)(val / (hyper_time_t)MICROSECONDS_PER_DAY);
-    hyper_time_t encoded_time = val % (hyper_time_t)MICROSECONDS_PER_DAY;
-    hyper_date_components_t date = hyper_decode_date(encoded_date);
-    hyper_time_components_t time = hyper_decode_time(encoded_time);
-
-    return PyDateTime_FromDateAndTime(date.year, date.month, date.day,
-                                      time.hour, time.minute, time.second,
-                                      time.microsecond);
-  }
-
-  case TIMEDELTA64_NS: {
-    // Unfortunately PyDelta_FromDSU and the pandas Timedelta class
-    // are not compatible in signature, particularly when it comes
-    // to handling negative days. As such, we construct the pandas
-    // object instead of using the CPython API
-
-    if (cls_timedelta == NULL) {
-      PyObject *mod_pandas = PyImport_ImportModule("pandas");
-      if (mod_pandas == NULL) {
-        return NULL;
-      }
-
-      cls_timedelta = PyObject_GetAttrString(mod_pandas, "Timedelta");
-      Py_DECREF(mod_pandas);
-      if (cls_timedelta == NULL) {
-        return NULL;
-      }
-    }
-
-    py_interval interval = *((py_interval *)value);
-    if (interval.months != 0) {
-      PyObject *errMsg =
-          PyUnicode_FromFormat("Cannot read Intervals with month components.");
-      PyErr_SetObject(PyExc_ValueError, errMsg);
-      Py_DECREF(errMsg);
-      return NULL;
-    }
-
-    PyObject *kwargs = PyDict_New();
-    if (kwargs == NULL)
-      return NULL;
-
-    PyDict_SetItemString(kwargs, "days", PyLong_FromLongLong(interval.days));
-    PyDict_SetItemString(kwargs, "microseconds",
-                         PyLong_FromLongLong(interval.microseconds));
-    PyObject *dummy = PyTuple_New(0); // need this for PyObject_Call
-
-    PyObject *td = PyObject_Call(cls_timedelta, dummy, kwargs);
-    Py_DECREF(dummy);
-    Py_DECREF(kwargs);
-
-    return td;
-  }
-
-  default: {
-    PyObject *errMsg = PyUnicode_FromFormat("Invalid dtype: \"%s\"");
-    PyErr_SetObject(PyExc_ValueError, errMsg);
-    Py_DECREF(errMsg);
-    return NULL;
-  }
-  }
-}
-
-PyObject *read_hyper_query(PyObject *Py_UNUSED(dummy), PyObject *args) {
-  int ok;
-  PyObject *row = NULL, *resultObj;
-  PyTupleObject *dtypes;
-  hyper_rowset_t *rowset;
-  hyper_rowset_chunk_t *chunk;
-  hyper_error_t *hyper_err;
-  size_t num_cols, num_rows;
-  const uint8_t *const *values;
-  const size_t *sizes;
-
-  PyDateTime_IMPORT;
-
-  ok = PyArg_ParseTuple(args, "OO!", &resultObj, &PyTuple_Type, &dtypes);
-  if (!ok)
-    return NULL;
-
-  // TODO: check that we get an instance of CDataObject; else will segfault
-  rowset = (hyper_rowset_t *)((CDataObject *)resultObj)->c_data;
-
-  // TODO: we need to free these somewhere as these currently leak...
-  DTYPE *enumeratedDtypes = makeEnumeratedDtypes(dtypes);
-  if (enumeratedDtypes == NULL)
-    return NULL;
-
-  PyObject *result = PyList_New(0);
-  if (result == NULL) {
-    return NULL;
-  }
-
-  // Iterate over each result chunk
-  while (1) {
-
-    hyper_err = hyper_rowset_get_next_chunk(rowset, &chunk);
-    if (hyper_err) {
-      goto ERROR_CLEANUP;
-    }
-
-    if (chunk == NULL) {
-      break; // No more to parse
-    }
-
-    hyper_rowset_chunk_field_values(chunk, &num_cols, &num_rows, &values,
-                                    &sizes);
-
-    // For each row inside the chunk...
-    for (size_t i = 0; i < num_rows; i++) {
-      row = PyTuple_New(num_cols);
-      if (row == NULL) {
-        goto ERROR_CLEANUP;
-      }
-
-      // For each column inside the row...
-      for (size_t j = 0; j < num_cols; j++) {
-        PyObject *val;
-        if (*values == NULL) {
-          val = Py_None;
-          Py_INCREF(val);
-        } else {
-          DTYPE dtype = enumeratedDtypes[j];
-          val = read_value(*values, dtype, sizes);
-        }
-
-        values++, sizes++;
-
-        if (val == NULL) {
-          goto ERROR_CLEANUP;
-        }
-
-        PyTuple_SET_ITEM(row, j, val);
-      }
-
-      int ret = PyList_Append(result, row);
-      if (ret != 0) {
-        goto ERROR_CLEANUP;
-      }
-    }
-    hyper_destroy_rowset_chunk(chunk);
-  }
-
-  Py_XDECREF(cls_timedelta);
-
-  return result;
-
-ERROR_CLEANUP:
-  Py_XDECREF(row);
-  Py_XDECREF(result);
-  Py_XDECREF(cls_timedelta);
-  if (chunk != NULL)
-    hyper_destroy_rowset_chunk(chunk);
-
-  return NULL;
-}
diff --git a/pantab/src/reader.h b/pantab/src/reader.h
deleted file mode 100644
index 00646067..00000000
--- a/pantab/src/reader.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef PANTAB_READER_H
-#define PANTAB_READER_H
-
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-
-PyObject *read_hyper_query(PyObject *Py_UNUSED(dummy), PyObject *args);
-
-#endif
diff --git a/pantab/src/tableauhyperapi.h b/pantab/src/tableauhyperapi.h
deleted file mode 100644
index 0d252d3b..00000000
--- a/pantab/src/tableauhyperapi.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* This file is a modified port of the lib_h.py file provided by Tableau.
-
-The original copyright notice is included below for reference.
-
-# -----------------------------------------------------------------------------
-#
-# This file is the copyrighted property of Tableau Software and is protected
-# by registered patents and other applicable U.S. and international laws and
-# regulations.
-#
-# Unlicensed use of the contents of this file is prohibited. Please refer to
-# the NOTICES.txt file for further details.
-#
-# -----------------------------------------------------------------------------
-*/
-
-#ifndef PANTAB_HYPER_API
-#define PANTAB_HYPER_API
-
-#include <inttypes.h>
-#include <stdbool.h>
-#include <stdio.h>
-
-typedef uint32_t hyper_date_t;
-typedef struct {
-  int32_t year;
-  int16_t month;
-  int16_t day;
-} hyper_date_components_t;
-typedef uint64_t hyper_time_t;
-typedef struct {
-  int8_t hour;
-  int8_t minute;
-  int8_t second;
-  int32_t microsecond;
-} hyper_time_components_t;
-
-typedef struct hyper_error_t hyper_error_t;
-typedef struct hyper_inserter_buffer_t hyper_inserter_buffer_t;
-typedef struct hyper_rowset_t hyper_rowset_t;
-typedef struct hyper_rowset_chunk_t hyper_rowset_chunk_t;
-
-#define HYPERAPI_FUNCTIONS(C)                                                  \
-  C(hyper_date_components_t, hyper_decode_date, (hyper_date_t date))           \
-  C(hyper_date_t, hyper_encode_date, (hyper_date_components_t components))     \
-  C(hyper_time_components_t, hyper_decode_time, (hyper_time_t time))           \
-  C(hyper_time_t, hyper_encode_time, (hyper_time_components_t components))     \
-  C(hyper_error_t *, hyper_inserter_buffer_add_null,                           \
-    (hyper_inserter_buffer_t * buffer))                                        \
-  C(hyper_error_t *, hyper_inserter_buffer_add_bool,                           \
-    (hyper_inserter_buffer_t * buffer, bool value))                            \
-  C(hyper_error_t *, hyper_inserter_buffer_add_int16,                          \
-    (hyper_inserter_buffer_t * buffer, int16_t value))                         \
-  C(hyper_error_t *, hyper_inserter_buffer_add_int32,                          \
-    (hyper_inserter_buffer_t * buffer, int32_t value))                         \
-  C(hyper_error_t *, hyper_inserter_buffer_add_int64,                          \
-    (hyper_inserter_buffer_t * buffer, int64_t value))                         \
-  C(hyper_error_t *, hyper_inserter_buffer_add_double,                         \
-    (hyper_inserter_buffer_t * buffer, double value))                          \
-  C(hyper_error_t *, hyper_inserter_buffer_add_binary,                         \
-    (hyper_inserter_buffer_t * buffer, const uint8_t *value, size_t size))     \
-  C(hyper_error_t *, hyper_inserter_buffer_add_raw,                            \
-    (hyper_inserter_buffer_t * buffer, const uint8_t *value, size_t size))     \
-  C(hyper_error_t *, hyper_rowset_get_next_chunk,                              \
-    (hyper_rowset_t * rowset, hyper_rowset_chunk_t * *rowset_chunk))           \
-  C(void, hyper_destroy_rowset_chunk,                                          \
-    (const hyper_rowset_chunk_t *rowset_chunk))                                \
-  C(void, hyper_rowset_chunk_field_values,                                     \
-    (hyper_rowset_chunk_t * rowset_chunk, size_t * col_count,                  \
-     size_t * row_count, const uint8_t *const *values[],                       \
-     const size_t *sizes[]))
-
-#define C(RET, NAME, ARGS) extern RET(*NAME) ARGS;
-HYPERAPI_FUNCTIONS(C)
-#undef C
-
-// custom addition from the Python binding; mistmatch with C API
-typedef struct {
-  int64_t microseconds;
-  int32_t days;
-  int32_t months;
-} py_interval;
-#endif
diff --git a/pantab/src/type.c b/pantab/src/type.c
deleted file mode 100644
index 83a6b869..00000000
--- a/pantab/src/type.c
+++ /dev/null
@@ -1,37 +0,0 @@
-#include "type.h"
-
-static DTYPE stringToDtype(const char *str) {
-  for (Py_ssize_t i = 0;
-       i < (Py_ssize_t)(sizeof(dtype_map) / sizeof(dtype_map[0])); i++) {
-    if (strcmp(str, dtype_map[i].str) == 0) {
-      return dtype_map[i].dtype;
-    }
-  }
-
-  return UNKNOWN;
-}
-
-// Caller is responsible for returned object
-DTYPE *makeEnumeratedDtypes(PyTupleObject *obj) {
-  Py_ssize_t len = PyTuple_GET_SIZE(obj);
-  DTYPE *result = malloc(len * sizeof(DTYPE));
-
-  for (Py_ssize_t i = 0; i < len; i++) {
-    PyObject *dtypeObj = PyTuple_GET_ITEM(obj, i);
-    const char *dtypeStr = PyUnicode_AsUTF8(dtypeObj);
-    DTYPE dtype = stringToDtype(dtypeStr);
-
-    if (dtype == UNKNOWN) {
-      free(result);
-      PyObject *errMsg =
-          PyUnicode_FromFormat("Unknown dtype: \"%s\"\n", dtypeStr);
-      PyErr_SetObject(PyExc_TypeError, errMsg);
-      Py_DECREF(errMsg);
-      return NULL;
-    }
-
-    result[i] = dtype;
-  }
-
-  return result;
-}
diff --git a/pantab/src/type.h b/pantab/src/type.h
deleted file mode 100644
index eca39cb1..00000000
--- a/pantab/src/type.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef PANTAB
-#define PANTAB
-
-#define PY_SSIZE_T_CLEAN
-#include "tableauhyperapi.h"
-#include <Python.h>
-#include <inttypes.h>
-
-#define MICROSECONDS_PER_DAY                                                   \
-  (INT64_C(24) * INT64_C(60) * INT64_C(60) * INT64_C(1000000))
-
-typedef enum {
-  INT16_ = 1,
-  INT32_,
-  INT64_,
-  INT16NA = 6,
-  INT32NA,
-  INT64NA,
-  FLOAT32_ = 11,
-  FLOAT64_,
-  FLOAT32NA,
-  FLOAT64NA,
-  BOOLEAN = 50,
-  BOOLEANNA,
-  DATETIME64_NS = 100,
-  DATETIME64_NS_UTC,
-  DATE,
-  TIMEDELTA64_NS = 200,
-  OBJECT = 220,
-  STRING,
-  UNKNOWN = 255
-} DTYPE;
-
-static const struct {
-  DTYPE dtype;
-  const char *str;
-} dtype_map[] = {{INT16_, "int16"},
-                 {INT32_, "int32"},
-                 {INT64_, "int64"},
-                 {INT16NA, "Int16"},
-                 {INT32NA, "Int32"},
-                 {INT64NA, "Int64"},
-                 {FLOAT32_, "float32"},
-                 {FLOAT64_, "float64"},
-                 {FLOAT32NA, "Float32"},
-                 {FLOAT64NA, "Float64"},
-                 {BOOLEAN, "bool"},
-                 {BOOLEANNA, "boolean"},
-                 {DATETIME64_NS, "datetime64[ns]"},
-                 {DATETIME64_NS_UTC, "datetime64[ns, UTC]"},
-                 {DATE, "date"}, // TODO: this isn't actually a dtype
-                 {TIMEDELTA64_NS, "timedelta64[ns]"},
-                 {STRING, "string"},
-                 {OBJECT, "object"}};
-
-// creates an enumeration from a tuple of strings,
-// so ("int16", "int32") -> [INT16_, INT32_]
-// caller is responsible for freeing memory
-// returns NULL on failure
-DTYPE *makeEnumeratedDtypes(PyTupleObject *obj);
-
-#endif
diff --git a/pantab/src/writer.c b/pantab/src/writer.c
deleted file mode 100644
index d615cf38..00000000
--- a/pantab/src/writer.c
+++ /dev/null
@@ -1,641 +0,0 @@
-#include "cffi.h"
-#include "type.h"
-#include <datetime.h>
-#define NO_IMPORT_ARRAY
-#define PY_ARRAY_UNIQUE_SYMBOL PANTAB_ARRAY_API
-#include "numpy_datetime.h"
-#include <numpy/arrayobject.h>
-#include <numpy/arrayscalars.h>
-
-/*
-Creates an array of NpyIter structs in the same order as the arrays supplied.
-
-Caller is responsible for freeing memory. Returns NULL on error
-*/
-static NpyIter **initiateIters(PyObject *arrList) {
-  NpyIter **npyIters =
-      PyObject_Malloc(sizeof(NpyIter *) * PyObject_Length(arrList));
-  if (npyIters == NULL) {
-    PyErr_NoMemory();
-    return NULL;
-  }
-
-  for (Py_ssize_t i = 0; i < PyObject_Length(arrList); i++) {
-    PyArrayObject *arr = (PyArrayObject *)PyList_GET_ITEM(arrList, i);
-
-    // Check contents of each numpy array
-    NpyIter *iter = NpyIter_New(arr, NPY_ITER_READONLY | NPY_ITER_REFS_OK,
-                                NPY_KEEPORDER, NPY_NO_CASTING, NULL);
-
-    // TODO: do we need to check NpyIter_IterationNeedsAPI(iter) anywhere?
-    // Applicable because of NPY_ITER_REFS_OK flags
-    if (iter == NULL) {
-      if (i > 0) {
-        while (--i) {
-          NpyIter_Deallocate(npyIters[i]);
-        }
-      }
-
-      PyErr_NoMemory();
-      return NULL;
-    }
-
-    npyIters[i] = iter;
-  }
-
-  return npyIters;
-}
-
-/* Initiate iters outside of any loop for performance.
-   Caller is responsible for releasing memory.
-
-   Returns NULL on error
-*/
-static NpyIter_IterNextFunc **initiateIterNextFuncs(NpyIter **npyIters,
-                                                    Py_ssize_t len) {
-  NpyIter_IterNextFunc **npyIterNextFuncs =
-      PyObject_Malloc(sizeof(NpyIter_IterNextFunc *) * len);
-  if (npyIterNextFuncs == NULL) {
-    PyErr_NoMemory();
-    return NULL;
-  }
-
-  for (Py_ssize_t i = 0; i < len; i++) {
-    NpyIter_IterNextFunc *func = NpyIter_GetIterNext(npyIters[i], NULL);
-    if (func == NULL) {
-      return NULL;
-    }
-
-    npyIterNextFuncs[i] = func;
-  }
-
-  return npyIterNextFuncs;
-}
-
-static char ***initiateDataPtrs(NpyIter **npyIters, Py_ssize_t len) {
-  char ***dataptrs = PyObject_Malloc(sizeof(char **) * len);
-  if (dataptrs == NULL) {
-    PyErr_NoMemory();
-    return NULL;
-  }
-
-  for (Py_ssize_t i = 0; i < len; i++) {
-    char **dataptr = NpyIter_GetDataPtrArray(npyIters[i]);
-    if (dataptr == NULL) {
-      return NULL;
-    }
-
-    dataptrs[i] = dataptr;
-  }
-
-  return dataptrs;
-}
-
-/*
-Free an array of numpy array iterators.
-
-TODO: dynamically calculate how many to free rather than require length as arg
-*/
-static void freeIters(NpyIter **iters, Py_ssize_t length) {
-  for (Py_ssize_t i = 0; i < length; i++) {
-    NpyIter_Deallocate(iters[i]);
-  }
-}
-
-static hyper_error_t *writeNonNullData(char **dataptr, DTYPE dtype,
-                                       hyper_inserter_buffer_t *insertBuffer,
-                                       Py_ssize_t row, Py_ssize_t col) {
-  hyper_error_t *result;
-  switch (dtype) {
-  case INT16_: {
-    int16_t **ptr = (int16_t **)dataptr;
-    int16_t val = **ptr;
-    result = hyper_inserter_buffer_add_int16(insertBuffer, val);
-    break;
-  }
-  case INT16NA: {
-    PyObject ***ptr = (PyObject ***)dataptr;
-    // The fact that NA datatypes are stored as objects is a bit
-    // unfortunate for sizing, as the CPython API only exposes
-    // Long / LongLong data types
-    PyObject *obj = **ptr;
-    long val = PyLong_AsLong(obj);
-    result = hyper_inserter_buffer_add_int16(insertBuffer, (int16_t)val);
-    break;
-  }
-  case INT32_: {
-    int32_t **ptr = (int32_t **)dataptr;
-    int32_t val = **ptr;
-    result = hyper_inserter_buffer_add_int32(insertBuffer, val);
-    break;
-  }
-  case INT32NA: {
-    PyObject ***ptr = (PyObject ***)dataptr;
-    PyObject *obj = **ptr;
-    long val = PyLong_AsLong(obj);
-    result = hyper_inserter_buffer_add_int32(insertBuffer, val);
-    break;
-  }
-  case INT64_: {
-    int64_t **ptr = (int64_t **)dataptr;
-    int64_t val = **ptr;
-    result = hyper_inserter_buffer_add_int64(insertBuffer, val);
-    break;
-  }
-  case INT64NA: {
-    PyObject ***ptr = (PyObject ***)dataptr;
-    PyObject *obj = **ptr;
-    long long val = PyLong_AsLongLong(obj);
-    result = hyper_inserter_buffer_add_int64(insertBuffer, val);
-    break;
-  }
-  case FLOAT32_: {
-    float_t **ptr = (float_t **)dataptr;
-    float_t val = **ptr;
-    result = hyper_inserter_buffer_add_double(insertBuffer, val);
-    break;
-  }
-  case FLOAT64_: {
-    double_t **ptr = (double_t **)dataptr;
-    double_t val = **ptr;
-    result = hyper_inserter_buffer_add_double(insertBuffer, val);
-    break;
-  }
-  case FLOAT32NA:
-  case FLOAT64NA: {
-    PyObject ***ptr = (PyObject ***)dataptr;
-    PyObject *obj = **ptr;
-    double_t val = PyFloat_AsDouble(obj);
-    result = hyper_inserter_buffer_add_double(insertBuffer, val);
-    break;
-  }
-  case BOOLEAN: {
-    npy_bool **ptr = (npy_bool **)dataptr;
-    npy_bool val = **ptr;
-    result = hyper_inserter_buffer_add_bool(insertBuffer, val);
-    break;
-  }
-  case BOOLEANNA: {
-    PyObject ***ptr = (PyObject ***)dataptr;
-    PyObject *obj = **ptr;
-    int val = obj == Py_True;
-    result = hyper_inserter_buffer_add_bool(insertBuffer, val);
-    break;
-  }
-  case DATETIME64_NS:
-  case DATETIME64_NS_UTC: {
-    npy_datetime **ptr = (npy_datetime **)dataptr;
-    npy_datetime val = **ptr;
-
-    npy_datetimestruct dts;
-
-    // TODO: here we are using dummy metadata, but ideally
-    // should get from array in case pandas ever allows for
-    // different precision datetimes
-    PyArray_DatetimeMetaData meta = {.base = NPY_FR_ns, .num = 1};
-    int ret = convert_datetime_to_datetimestruct(&meta, val, &dts);
-    if (ret != 0) {
-      PyObject *errMsg =
-          PyUnicode_FromFormat("Failed to convert numpy datetime");
-      PyErr_SetObject(PyExc_RuntimeError, errMsg);
-      Py_DECREF(errMsg);
-      return NULL;
-    }
-
-    hyper_date_components_t date_components = {
-        .year = dts.year, .month = dts.month, .day = dts.day};
-
-    hyper_time_components_t time_components = {.hour = dts.hour,
-                                               .minute = dts.min,
-                                               .second = dts.sec,
-                                               .microsecond = dts.us};
-
-    hyper_date_t date = hyper_encode_date(date_components);
-    hyper_time_t time = hyper_encode_time(time_components);
-
-    // TODO: Tableau uses typedefs for unsigned 32 / 64 integers for
-    // date and time respectively, but stores as int64; here we cast
-    // explicitly but should probably bounds check for overflow as well
-    int64_t ms = (int64_t)time + (int64_t)date * MICROSECONDS_PER_DAY;
-
-    result = hyper_inserter_buffer_add_int64(insertBuffer, ms);
-    break;
-  }
-  case TIMEDELTA64_NS: {
-    PyObject ***ptr = (PyObject ***)dataptr;
-    PyObject *data = **ptr;
-
-    // TODO: Add error message for failed attribute access
-    PyObject *us = PyObject_GetAttrString(data, "microseconds");
-    if (us == NULL) {
-      return NULL;
-    }
-    PyObject *days = PyObject_GetAttrString(data, "days");
-    if (days == NULL) {
-      Py_DECREF(us);
-      return NULL;
-    }
-
-    PyObject *months = PyObject_GetAttrString(data, "months");
-    if (months == NULL) {
-      Py_DECREF(us);
-      Py_DECREF(days);
-      return NULL;
-    }
-
-    py_interval interval = {.microseconds = PyLong_AsLongLong(us),
-                            .days = PyLong_AsLong(days),
-                            .months = PyLong_AsLong(months)};
-
-    // TODO: it appears there is some buffer packing being done, though
-    // not sure this actually works in Tableau
-    result = hyper_inserter_buffer_add_raw(
-        insertBuffer, (const unsigned char *)&interval, sizeof(py_interval));
-    Py_DECREF(us);
-    Py_DECREF(days);
-    Py_DECREF(months);
-    break;
-  }
-  case STRING:
-  case OBJECT: {
-    PyObject ***ptr = (PyObject ***)dataptr;
-    PyObject *obj = **ptr;
-    if (dtype == OBJECT) {
-      // N.B. all other dtypes in pandas are well defined, but object is
-      // really anything For purposes of Tableau these need to be strings,
-      // so error out if not In the future should enforce StringDtype from
-      // pandas once released (1.0.0)
-      if (!PyUnicode_Check(obj)) {
-        PyObject *errMsg = PyUnicode_FromFormat(
-            "Invalid value \"%R\" found (row %zd column %zd)", obj, row, col);
-        PyErr_SetObject(PyExc_TypeError, errMsg);
-        Py_DECREF(errMsg);
-        return NULL;
-      }
-    }
-    Py_ssize_t len;
-    // TODO: CPython uses a const char* buffer but Hyper accepts
-    // const unsigned char* - is this always safe?
-    const unsigned char *buf =
-        (const unsigned char *)PyUnicode_AsUTF8AndSize(obj, &len);
-    result = hyper_inserter_buffer_add_binary(insertBuffer, buf, len);
-    break;
-  }
-  default: {
-    PyObject *errMsg = PyUnicode_FromFormat("Invalid dtype: \"%s\"");
-    PyErr_SetObject(PyExc_ValueError, errMsg);
-    Py_DECREF(errMsg);
-    return NULL;
-  }
-  }
-
-  return result;
-}
-
-// TODO: Make error handling consistent. Right now errors occur if
-// 1. The return value is non-NULL OR
-// 2. PyErr is set within this function
-static hyper_error_t *
-writeNonNullDataLegacy(PyObject *data, DTYPE dtype,
-                       hyper_inserter_buffer_t *insertBuffer, Py_ssize_t row,
-                       Py_ssize_t col) {
-  hyper_error_t *result;
-  switch (dtype) {
-  case INT16_:
-  case INT16NA: {
-    int16_t val = (int16_t)PyLong_AsLong(data);
-    result = hyper_inserter_buffer_add_int16(insertBuffer, val);
-    break;
-  }
-  case INT32_:
-  case INT32NA: {
-    int32_t val = (int32_t)PyLong_AsLong(data);
-    result = hyper_inserter_buffer_add_int32(insertBuffer, val);
-    break;
-  }
-  case INT64_:
-  case INT64NA: {
-    int64_t val = (int64_t)PyLong_AsLongLong(data);
-    result = hyper_inserter_buffer_add_int64(insertBuffer, val);
-    break;
-  }
-  case FLOAT32_:
-  case FLOAT64_:
-  case FLOAT32NA:
-  case FLOAT64NA: {
-    double val = PyFloat_AsDouble(data);
-    result = hyper_inserter_buffer_add_double(insertBuffer, val);
-    break;
-  }
-  case BOOLEAN:
-  case BOOLEANNA: {
-    if (PyObject_IsTrue(data)) {
-      result = hyper_inserter_buffer_add_bool(insertBuffer, 1);
-    } else {
-      result = hyper_inserter_buffer_add_bool(insertBuffer, 0);
-    }
-    break;
-  }
-  case DATETIME64_NS:
-  case DATETIME64_NS_UTC: {
-    hyper_date_components_t date_components = {
-        .year = PyDateTime_GET_YEAR(data),
-        .month = PyDateTime_GET_MONTH(data),
-        .day = PyDateTime_GET_DAY(data)};
-
-    hyper_time_components_t time_components = {
-        .hour = PyDateTime_DATE_GET_HOUR(data),
-        .minute = PyDateTime_DATE_GET_MINUTE(data),
-        .second = PyDateTime_DATE_GET_SECOND(data),
-        .microsecond = PyDateTime_DATE_GET_MICROSECOND(data)};
-
-    hyper_date_t date = hyper_encode_date(date_components);
-    hyper_time_t time = hyper_encode_time(time_components);
-
-    // TODO: Tableau uses typedefs for unsigned 32 / 64 integers for
-    // date and time respectively, but stores as int64; here we cast
-    // explicitly but should probably bounds check for overflow as well
-    int64_t val = (int64_t)time + (int64_t)date * MICROSECONDS_PER_DAY;
-
-    result = hyper_inserter_buffer_add_int64(insertBuffer, val);
-    break;
-  }
-  case TIMEDELTA64_NS: {
-    // TODO: Add error message for failed attribute access
-    PyObject *us = PyObject_GetAttrString(data, "microseconds");
-    if (us == NULL) {
-      return NULL;
-    }
-    PyObject *days = PyObject_GetAttrString(data, "days");
-    if (days == NULL) {
-      Py_DECREF(us);
-      return NULL;
-    }
-
-    PyObject *months = PyObject_GetAttrString(data, "months");
-    if (months == NULL) {
-      Py_DECREF(us);
-      Py_DECREF(days);
-      return NULL;
-    }
-
-    py_interval interval = {.microseconds = PyLong_AsLongLong(us),
-                            .days = PyLong_AsLong(days),
-                            .months = PyLong_AsLong(months)};
-
-    // TODO: it appears there is some buffer packing being done, though
-    // not sure this actually works in Tableau
-    result = hyper_inserter_buffer_add_raw(
-        insertBuffer, (const unsigned char *)&interval, sizeof(py_interval));
-    Py_DECREF(us);
-    Py_DECREF(days);
-    Py_DECREF(months);
-    break;
-  }
-  case STRING:
-  case OBJECT: {
-    if (dtype == OBJECT) {
-      // N.B. all other dtypes in pandas are well defined, but object is
-      // really anything For purposes of Tableau these need to be strings,
-      // so error out if not In the future should enforce StringDtype from
-      // pandas once released (1.0.0)
-      if (!PyUnicode_Check(data)) {
-        PyObject *errMsg = PyUnicode_FromFormat(
-            "Invalid value \"%R\" found (row %zd column %zd)", data, row, col);
-        PyErr_SetObject(PyExc_TypeError, errMsg);
-        Py_DECREF(errMsg);
-        return NULL;
-      }
-    }
-    Py_ssize_t len;
-    // TODO: CPython uses a const char* buffer but Hyper accepts
-    // const unsigned char* - is this always safe?
-    const unsigned char *buf =
-        (const unsigned char *)PyUnicode_AsUTF8AndSize(data, &len);
-    result = hyper_inserter_buffer_add_binary(insertBuffer, buf, len);
-    break;
-  }
-  default: {
-    PyObject *errMsg = PyUnicode_FromFormat("Invalid dtype: \"%s\"");
-    PyErr_SetObject(PyExc_ValueError, errMsg);
-    Py_DECREF(errMsg);
-    return NULL;
-  }
-  }
-
-  return result;
-}
-
-// This function gets performance by sacrificing bounds checking
-// Particulary no checking happens that the length of each iterable
-// in data matches the length of the callables supplied at every step
-// in the process,though note that this is critical!
-// If this doesn't hold true behavior is undefined
-PyObject *write_to_hyper_legacy(PyObject *Py_UNUSED(dummy), PyObject *args) {
-  int ok;
-  PyObject *data, *iterator, *row, *val, *dtypes, *null_mask, *insertBufferObj;
-  Py_ssize_t row_counter, ncols;
-  hyper_inserter_buffer_t *insertBuffer;
-  hyper_error_t *result;
-  Py_buffer buf;
-
-  PyDateTime_IMPORT;
-
-  // TOOD: Find better way to accept buffer pointer than putting in long
-  ok = PyArg_ParseTuple(args, "OOOnO!", &data, &null_mask, &insertBufferObj,
-                        &ncols, &PyTuple_Type, &dtypes);
-  if (!ok)
-    return NULL;
-
-  if (!PyIter_Check(data)) {
-    PyErr_SetString(PyExc_TypeError, "First argument must be iterable");
-    return NULL;
-  }
-
-  if (!PyObject_CheckBuffer(null_mask)) {
-    PyErr_SetString(PyExc_TypeError,
-                    "Second argument must support buffer protocol");
-    return NULL;
-  }
-
-  // TODO: check that we get an instance of CDataObject; else will segfault
-  insertBuffer =
-      (hyper_inserter_buffer_t *)((CDataObject *)insertBufferObj)->c_data;
-
-  iterator = PyObject_GetIter(data);
-  if (iterator == NULL)
-    return NULL;
-
-  if (PyObject_GetBuffer(null_mask, &buf, PyBUF_CONTIG_RO | PyBUF_FORMAT) < 0) {
-    Py_DECREF(iterator);
-    return NULL;
-  }
-
-  if (buf.ndim != 2) {
-    Py_DECREF(iterator);
-    PyBuffer_Release(&buf);
-    PyErr_SetString(PyExc_ValueError, "null_mask must be 2D");
-    return NULL;
-  }
-
-  if (strncmp(buf.format, "?", 1) != 0) {
-    Py_DECREF(iterator);
-    PyBuffer_Release(&buf);
-    PyErr_SetString(PyExc_ValueError, "null_mask must be boolean");
-    return NULL;
-  }
-
-  DTYPE *enumerated_dtypes = makeEnumeratedDtypes((PyTupleObject *)dtypes);
-  row_counter = 0;
-  Py_ssize_t item_counter =
-      0; // Needed as pointer arith doesn't work for void * buf
-  while ((row = PyIter_Next(iterator))) {
-    // TODO: Add validation that the total length of all elements
-    //  matches the length of the null buffer, otherwise wrong data
-    //  is returned
-    for (Py_ssize_t i = 0; i < ncols; i++) {
-      if (((uint8_t *)buf.buf)[item_counter++] == 1) {
-        result = hyper_inserter_buffer_add_null(insertBuffer);
-      } else {
-        val = PyTuple_GET_ITEM(row, i);
-        result = writeNonNullDataLegacy(val, enumerated_dtypes[i], insertBuffer,
-                                        row_counter, i);
-      }
-
-      if ((result != NULL) || (PyErr_Occurred())) {
-        free(enumerated_dtypes);
-        Py_DECREF(row);
-        Py_DECREF(iterator);
-        PyBuffer_Release(&buf);
-        return NULL;
-      }
-    }
-    Py_DECREF(row);
-    row_counter += 1;
-  }
-
-  free(enumerated_dtypes);
-  Py_DECREF(iterator);
-  PyBuffer_Release(&buf);
-
-  if (PyErr_Occurred())
-    return NULL;
-
-  Py_RETURN_NONE;
-}
-
-PyObject *write_to_hyper(PyObject *Py_UNUSED(dummy), PyObject *args) {
-  int ok, success = 1;
-  PyObject *df, *dtypes, *null_mask, *insertBufferObj;
-  hyper_inserter_buffer_t *insertBuffer;
-  hyper_error_t *result;
-  Py_buffer buf;
-
-  // TOOD: Find better way to accept buffer pointer than putting in long
-  ok = PyArg_ParseTuple(args, "OOOO!", &df, &null_mask, &insertBufferObj,
-                        &PyTuple_Type, &dtypes);
-  if (!ok)
-    return NULL;
-
-  if (!PyObject_CheckBuffer(null_mask)) {
-    PyErr_SetString(PyExc_TypeError,
-                    "Second argument must support buffer protocol");
-    return NULL;
-  }
-
-  // TODO: check that we get an instance of CDataObject; else will segfault
-  insertBuffer =
-      (hyper_inserter_buffer_t *)((CDataObject *)insertBufferObj)->c_data;
-
-  if (PyObject_GetBuffer(null_mask, &buf, PyBUF_CONTIG_RO | PyBUF_FORMAT) < 0) {
-    return NULL;
-  }
-
-  if (buf.ndim != 2) {
-    PyBuffer_Release(&buf);
-    PyErr_SetString(PyExc_ValueError, "null_mask must be 2D");
-    return NULL;
-  }
-
-  if (strncmp(buf.format, "?", 1) != 0) {
-    PyBuffer_Release(&buf);
-    PyErr_SetString(PyExc_ValueError, "null_mask must be boolean");
-    return NULL;
-  }
-
-  DTYPE *enumerated_dtypes = makeEnumeratedDtypes((PyTupleObject *)dtypes);
-
-  PyObject *mgr = PyObject_GetAttrString(df, "_mgr");
-  if (mgr == NULL) {
-    PyBuffer_Release(&buf);
-    free(enumerated_dtypes);
-    return NULL;
-  }
-
-  PyObject *arrList = PyObject_GetAttrString(mgr, "column_arrays");
-  Py_DECREF(mgr);
-  if (arrList == NULL) {
-    PyBuffer_Release(&buf);
-    free(enumerated_dtypes);
-    return NULL;
-  }
-
-  Py_ssize_t rowcount = PyObject_Length(df);
-  Py_ssize_t colcount = PyObject_Length(arrList);
-  Py_ssize_t bufPos;
-  NpyIter **npyIters = initiateIters(arrList);
-  Py_DECREF(arrList);
-
-  if (npyIters == NULL) {
-    PyBuffer_Release(&buf);
-    free(enumerated_dtypes);
-    return NULL;
-  }
-  NpyIter_IterNextFunc **npyIterNextFuncs =
-      initiateIterNextFuncs(npyIters, colcount);
-  if (npyIterNextFuncs == NULL) {
-    success = 0;
-    goto CLEANUP;
-  }
-
-  char ***dataptrs = initiateDataPtrs(npyIters, colcount);
-  if (dataptrs == NULL) {
-    success = 0;
-    goto CLEANUP;
-  }
-
-  NpyIter *iter;
-  NpyIter_IterNextFunc *iternext;
-  char **dataptr;
-
-  for (Py_ssize_t rowIndex = 0; rowIndex < rowcount; rowIndex++) {
-    for (Py_ssize_t colIndex = 0; colIndex < colcount; colIndex++) {
-      bufPos = (rowIndex * colcount) + colIndex;
-      iter = npyIters[colIndex];
-      iternext = npyIterNextFuncs[colIndex];
-      dataptr = dataptrs[colIndex];
-      if (((uint8_t *)buf.buf)[bufPos] == 1) {
-        result = hyper_inserter_buffer_add_null(insertBuffer);
-      } else {
-        result = writeNonNullData(dataptr, enumerated_dtypes[colIndex],
-                                  insertBuffer, rowIndex, colIndex);
-      }
-      iternext(iter);
-
-      if ((result != NULL) || (PyErr_Occurred())) {
-        success = 0;
-        goto CLEANUP;
-      }
-    }
-  }
-
-CLEANUP:
-  freeIters(npyIters, colcount);
-  free(enumerated_dtypes);
-  PyBuffer_Release(&buf);
-
-  if (success)
-    Py_RETURN_NONE;
-  else
-    return NULL;
-}
diff --git a/pantab/src/writer.h b/pantab/src/writer.h
deleted file mode 100644
index 545a0e3f..00000000
--- a/pantab/src/writer.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef PANTAB_WRITER_H
-#define PANTAB_WRITER_H
-
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-
-PyObject *write_to_hyper_legacy(PyObject *Py_UNUSED(dummy), PyObject *args);
-PyObject *write_to_hyper(PyObject *Py_UNUSED(dummy), PyObject *args);
-
-#endif
diff --git a/pantab/tests/conftest.py b/pantab/tests/conftest.py
index 91f41057..3cf4116f 100644
--- a/pantab/tests/conftest.py
+++ b/pantab/tests/conftest.py
@@ -5,12 +5,8 @@
 import pytest
 import tableauhyperapi as tab_api
 
-import pantab._compat as compat
 
-
-@pytest.fixture
-def df():
-    """Fixture to use which should contain all data types."""
+def get_basic_dataframe():
     df = pd.DataFrame(
         [
             [
@@ -22,10 +18,13 @@ def df():
                 3,
                 4.0,
                 5.0,
+                1.0,
+                2.0,
+                True,
                 True,
                 pd.to_datetime("2018-01-01"),
                 pd.to_datetime("2018-01-01", utc=True),
-                pd.Timedelta("1 days 2 hours 3 minutes 4 seconds"),
+                "foo",
                 "foo",
                 np.iinfo(np.int16).min,
                 np.iinfo(np.int32).min,
@@ -43,10 +42,13 @@ def df():
                 np.nan,
                 9.0,
                 10.0,
+                1.0,
+                2.0,
+                False,
                 False,
                 pd.to_datetime("1/1/19"),
                 pd.to_datetime("2019-01-01", utc=True),
-                pd.Timedelta("-1 days 2 hours 3 minutes 4 seconds"),
+                "bar",
                 "bar",
                 np.iinfo(np.int16).max,
                 np.iinfo(np.int32).max,
@@ -64,11 +66,14 @@ def df():
                 np.nan,
                 np.nan,
                 np.nan,
+                pd.NA,
+                pd.NA,
                 False,
-                pd.NaT,
+                pd.NA,
                 pd.NaT,
                 pd.NaT,
                 np.nan,
+                pd.NA,
                 0,
                 0,
                 0,
@@ -86,11 +91,14 @@ def df():
             "Int64",
             "float32",
             "float64",
+            "Float32",
+            "Float64",
             "bool",
+            "boolean",
             "datetime64",
             "datetime64_utc",
-            "timedelta64",
             "object",
+            "string",
             "int16_limits",
             "int32_limits",
             "int64_limits",
@@ -110,11 +118,14 @@ def df():
             "Int64": "Int64",
             "float32": np.float32,
             "float64": np.float64,
+            "Float32": "Float32",
+            "Float64": "Float64",
             "bool": bool,
+            "boolean": "boolean",
             "datetime64": "datetime64[ns]",
             "datetime64_utc": "datetime64[ns, UTC]",
-            "timedelta64": "timedelta64[ns]",
             "object": "object",
+            "string": "string",
             "int16_limits": np.int16,
             "int32_limits": np.int32,
             "int64_limits": np.int64,
@@ -124,13 +135,46 @@ def df():
         }
     )
 
-    df["boolean"] = pd.Series([True, False, pd.NA], dtype="boolean")
-    df["string"] = pd.Series(["foo", "bar", pd.NA], dtype="string")
+    return df
 
-    if compat.PANDAS_120:
-        df["Float32"] = pd.Series([1.0, 2.0, pd.NA], dtype="Float32")
-        df["Float64"] = pd.Series([1.0, 2.0, pd.NA], dtype="Float64")
 
+@pytest.fixture
+def df():
+    """Fixture to use which should contain all data types."""
+    return get_basic_dataframe()
+
+
+@pytest.fixture
+def roundtripped():
+    """Roundtripped DataFrames should use arrow dtypes by default"""
+    df = get_basic_dataframe()
+    df = df.astype(
+        {
+            "int16": "int16[pyarrow]",
+            "int32": "int32[pyarrow]",
+            "int64": "int64[pyarrow]",
+            "Int16": "int16[pyarrow]",
+            "Int32": "int32[pyarrow]",
+            "Int64": "int64[pyarrow]",
+            "float32": "double[pyarrow]",
+            "float64": "double[pyarrow]",
+            "Float32": "double[pyarrow]",
+            "Float64": "double[pyarrow]",
+            "bool": "boolean[pyarrow]",
+            "boolean": "boolean[pyarrow]",
+            "datetime64": "timestamp[us][pyarrow]",
+            "datetime64_utc": "timestamp[us, UTC][pyarrow]",
+            # "timedelta64": "timedelta64[ns]",
+            "object": "string[pyarrow]",
+            "int16_limits": "int16[pyarrow]",
+            "int32_limits": "int32[pyarrow]",
+            "int64_limits": "int64[pyarrow]",
+            "float32_limits": "double[pyarrow]",
+            "float64_limits": "double[pyarrow]",
+            "non-ascii": "string[pyarrow]",
+            "string": "string[pyarrow]",
+        }
+    )
     return df
 
 
@@ -164,9 +208,3 @@ def table_name(request):
 def datapath():
     """Location of data files in test folder."""
     return pathlib.Path(__file__).parent / "data"
-
-
-@pytest.fixture(params=[False, True])
-def use_parquet(request):
-    """Whether to use parquet for intermediate file storage."""
-    return request.param
diff --git a/pantab/tests/test_reader.py b/pantab/tests/test_reader.py
index 377b29d7..466089e0 100644
--- a/pantab/tests/test_reader.py
+++ b/pantab/tests/test_reader.py
@@ -1,5 +1,3 @@
-from sqlite3 import connect
-
 import pandas as pd
 import pandas.testing as tm
 import pytest
@@ -27,57 +25,10 @@ def test_reports_unsupported_type(datapath):
     would be string columns. This led to very fascinating failures.
     """
     db_path = datapath / "geography.hyper"
-    with pytest.raises(
-        TypeError, match=r"Column \"x\" has unsupported datatype GEOGRAPHY"
-    ):
+    with pytest.raises(TypeError, match=r"GEOGRAPHY"):
         pantab.frame_from_hyper(db_path, table="test")
 
 
-def test_months_in_interval_raises(df, tmp_hyper, monkeypatch):
-    # Monkeypatch a new constructor that hard codes months
-    def __init__(self, months: int, days: int, microseconds: int):
-        self.months = 1
-        self.days = days
-        self.microseconds = microseconds
-
-    monkeypatch.setattr(pantab._writer.tab_api.Interval, "__init__", __init__)
-    pantab.frame_to_hyper(df, tmp_hyper, table="test")
-    with pytest.raises(
-        ValueError, match=r"Cannot read Intervals with month components\."
-    ):
-        pantab.frame_from_hyper(tmp_hyper, table="test")
-
-    with pytest.raises(
-        ValueError, match=r"Cannot read Intervals with month components\."
-    ):
-        pantab.frames_from_hyper(tmp_hyper)
-
-
-def test_error_on_first_column(df, tmp_hyper, monkeypatch):
-    """
-    We had a defect due to which pantab segfaulted when an error occured in one of
-    the first two columns. This test case is a regression test against that.
-    """
-
-    # Monkeypatch a new constructor that hard codes months
-    def __init__(self, months: int, days: int, microseconds: int):
-        self.months = 1
-        self.days = days
-        self.microseconds = microseconds
-
-    monkeypatch.setattr(pantab._writer.tab_api.Interval, "__init__", __init__)
-
-    df = pd.DataFrame(
-        [[pd.Timedelta("1 days 2 hours 3 minutes 4 seconds")]], columns=["timedelta64"]
-    ).astype({"timedelta64": "timedelta64[ns]"})
-    pantab.frame_to_hyper(df, tmp_hyper, table="test")
-
-    with pytest.raises(
-        ValueError, match=r"Cannot read Intervals with month components\."
-    ):
-        pantab.frame_from_hyper(tmp_hyper, table="test")
-
-
 def test_read_non_roundtrippable(datapath):
     result = pantab.frame_from_hyper(
         datapath / "dates.hyper", table=TableName("Extract", "Extract")
@@ -85,7 +36,7 @@ def test_read_non_roundtrippable(datapath):
     expected = pd.DataFrame(
         [["1900-01-01", "2000-01-01"], [pd.NaT, "2050-01-01"]],
         columns=["Date1", "Date2"],
-        dtype="datetime64[ns]",
+        dtype="date32[day][pyarrow]",
     )
     tm.assert_frame_equal(result, expected)
 
@@ -99,7 +50,12 @@ def test_reads_non_writeable(datapath):
         [["row1", 1.0], ["row2", 2.0]],
         columns=["Non-Nullable String", "Non-Nullable Float"],
     )
-    expected["Non-Nullable String"] = expected["Non-Nullable String"].astype("string")
+    expected["Non-Nullable Float"] = expected["Non-Nullable Float"].astype(
+        "double[pyarrow]"
+    )
+    expected["Non-Nullable String"] = expected["Non-Nullable String"].astype(
+        "string[pyarrow]"
+    )
 
     tm.assert_frame_equal(result, expected)
 
@@ -111,22 +67,21 @@ def test_read_query(df, tmp_hyper):
     result = pantab.frame_from_hyper_query(tmp_hyper, query)
 
     expected = pd.DataFrame([[1, "_2"], [6, "_7"], [0, "_0"]], columns=["i", "_i2"])
-    expected = expected.astype({"i": "Int16", "_i2": "string"})
+    expected = expected.astype({"i": "int16[pyarrow]", "_i2": "string[pyarrow]"})
 
     tm.assert_frame_equal(result, expected)
 
 
-def test_empty_read_query(df: pd.DataFrame, tmp_hyper):
+def test_empty_read_query(df: pd.DataFrame, roundtripped, tmp_hyper):
     """
     red-green for empty query results
     """
     # sql cols need to base case insensitive & unique
-    df = df[pd.Series(df.columns).apply(lambda s: s.lower()).drop_duplicates()]
-    conn = connect(":memory:")
     table_name = "test"
-    df.to_sql(name=table_name, con=conn, index=False)
     pantab.frame_to_hyper(df, tmp_hyper, table=table_name)
     query = f"SELECT * FROM {table_name} limit 0"
-    expected = pd.read_sql_query(query, conn)
+    expected = pd.DataFrame(columns=df.columns)
+    expected = expected.astype(roundtripped.dtypes)
+
     result = pantab.frame_from_hyper_query(tmp_hyper, query)
     tm.assert_frame_equal(result, expected)
diff --git a/pantab/tests/test_roundtrip.py b/pantab/tests/test_roundtrip.py
index 1d23c0dc..32c9f065 100644
--- a/pantab/tests/test_roundtrip.py
+++ b/pantab/tests/test_roundtrip.py
@@ -1,61 +1,28 @@
-from pathlib import Path
-
-import numpy as np
 import pandas as pd
 import pandas.testing as tm
-import pytest
-from tableauhyperapi import Connection, CreateMode, HyperProcess, TableName, Telemetry
+from tableauhyperapi import TableName
 
 import pantab
 
 
-def assert_roundtrip_equal(result, expected):
-    """Compat helper for comparing round-tripped results."""
-
-    expected["object"] = expected["object"].astype("string")
-    expected["non-ascii"] = expected["non-ascii"].astype("string")
-    expected["datetime64_utc"] = expected["datetime64_utc"].dt.tz_localize(None)
-
-    tm.assert_frame_equal(result, expected)
-
-
-def test_basic(df, tmp_hyper, table_name, table_mode):
+def test_basic(df, roundtripped, tmp_hyper, table_name, table_mode):
     # Write twice; depending on mode this should either overwrite or duplicate entries
     pantab.frame_to_hyper(df, tmp_hyper, table=table_name, table_mode=table_mode)
     pantab.frame_to_hyper(df, tmp_hyper, table=table_name, table_mode=table_mode)
     result = pantab.frame_from_hyper(tmp_hyper, table=table_name)
 
-    expected = df.copy()
-    expected["float32"] = expected["float32"].astype(np.float64)
-    expected["Float32"] = expected["Float32"].astype(np.float64)
-    expected["Float64"] = expected["Float64"].astype(np.float64)
-
+    expected = roundtripped
     if table_mode == "a":
         expected = pd.concat([expected, expected]).reset_index(drop=True)
 
-    assert_roundtrip_equal(result, expected)
+        # TODO: somehow concat turns string[pyarrow] into string python
+        for col in ("object", "non-ascii", "string"):
+            expected[col] = expected[col].astype("string[pyarrow]")
 
-
-def test_use_float_na_flag(df, tmp_hyper, table_name):
-    pantab.frame_to_hyper(df, tmp_hyper, table=table_name)
-    result = pantab.frame_from_hyper(tmp_hyper, table=table_name, use_float_na=False)
-    expected = df.copy()
-    expected["float32"] = expected["float32"].astype(np.float64)
-    expected["Float32"] = expected["Float32"].astype(np.float64)
-    expected["Float64"] = expected["Float64"].astype(np.float64)
-    assert_roundtrip_equal(result, expected)
-
-    result = pantab.frame_from_hyper(tmp_hyper, table=table_name, use_float_na=True)
-    expected = df.copy()
-    expected["float32"] = expected["float32"].astype("Float64")
-    expected["float64"] = expected["float64"].astype("Float64")
-    expected["float32_limits"] = expected["float32_limits"].astype("Float64")
-    expected["float64_limits"] = expected["float64_limits"].astype("Float64")
-    expected["Float32"] = expected["Float32"].astype("Float64")
-    assert_roundtrip_equal(result, expected)
+    tm.assert_frame_equal(result, expected)
 
 
-def test_multiple_tables(df, tmp_hyper, table_name, table_mode):
+def test_multiple_tables(df, roundtripped, tmp_hyper, table_name, table_mode):
     # Write twice; depending on mode this should either overwrite or duplicate entries
     pantab.frames_to_hyper(
         {table_name: df, "table2": df}, tmp_hyper, table_mode=table_mode
@@ -65,100 +32,18 @@ def test_multiple_tables(df, tmp_hyper, table_name, table_mode):
     )
     result = pantab.frames_from_hyper(tmp_hyper)
 
-    expected = df.copy()
-    expected["float32"] = expected["float32"].astype(np.float64)
-    expected["Float32"] = expected["Float32"].astype(np.float64)
-    expected["Float64"] = expected["Float64"].astype(np.float64)
+    expected = roundtripped
     if table_mode == "a":
         expected = pd.concat([expected, expected]).reset_index(drop=True)
 
+        # TODO: somehow concat turns string[pyarrow] into string python
+        for col in ("object", "non-ascii", "string"):
+            expected[col] = expected[col].astype("string[pyarrow]")
+
     # some test trickery here
     if not isinstance(table_name, TableName) or table_name.schema_name is None:
         table_name = TableName("public", table_name)
 
     assert set(result.keys()) == set((table_name, TableName("public", "table2")))
     for val in result.values():
-        assert_roundtrip_equal(val, expected)
-
-
-def test_roundtrip_with_external_hyper_process(df, tmp_hyper):
-    default_log_path = Path.cwd() / "hyperd.log"
-    if default_log_path.exists():
-        default_log_path.unlink()
-
-    # By passing in a pre-spawned HyperProcess, one can e.g. avoid creating a log file
-    parameters = {"log_config": ""}
-    with HyperProcess(
-        Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, parameters=parameters
-    ) as hyper:
-        # test frame_to_hyper/frame_from_hyper
-        pantab.frame_to_hyper(df, tmp_hyper, table="test", hyper_process=hyper)
-        result = pantab.frame_from_hyper(tmp_hyper, table="test", hyper_process=hyper)
-        expected = df.copy()
-        expected["float32"] = expected["float32"].astype(np.float64)
-        expected["Float32"] = expected["Float32"].astype(np.float64)
-        expected["Float64"] = expected["Float64"].astype(np.float64)
-        assert_roundtrip_equal(result, expected)
-
-        # test frame_from_hyper_query
-        result = pantab.frame_from_hyper_query(
-            tmp_hyper, "SELECT * FROM test", hyper_process=hyper
-        )
-        assert result.size == df.size
-
-        # test frames_to_hyper/frames_from_hyper
-        pantab.frames_to_hyper(
-            {"test2": df, "test": df}, tmp_hyper, hyper_process=hyper
-        )
-        result = pantab.frames_from_hyper(tmp_hyper, hyper_process=hyper)
-        assert set(result.keys()) == set(
-            (TableName("public", "test"), TableName("public", "test2"))
-        )
-
-        for val in result.values():
-            assert_roundtrip_equal(val, expected)
-
-    assert not default_log_path.exists()
-
-
-def test_roundtrip_with_external_hyper_connection(df, tmp_hyper):
-    with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
-        pantab.frames_to_hyper(
-            {"test": df, "test2": df}, tmp_hyper, hyper_process=hyper
-        )
-
-        with Connection(hyper.endpoint, tmp_hyper, CreateMode.NONE) as connection:
-            result = pantab.frame_from_hyper(connection, table="test")
-            expected = df.copy()
-            expected["float32"] = expected["float32"].astype(np.float64)
-            expected["Float32"] = expected["Float32"].astype(np.float64)
-            expected["Float64"] = expected["Float64"].astype(np.float64)
-            assert_roundtrip_equal(result, expected)
-
-            result = pantab.frame_from_hyper_query(connection, "SELECT * FROM test")
-            assert result.size == df.size
-
-            result = pantab.frames_from_hyper(connection)
-            assert set(result.keys()) == set(
-                (TableName("public", "test"), TableName("public", "test2"))
-            )
-            for val in result.values():
-                assert_roundtrip_equal(val, expected)
-
-
-def test_external_hyper_connection_and_process_error(df, tmp_hyper):
-    with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
-        with Connection(hyper.endpoint, tmp_hyper, CreateMode.CREATE) as connection:
-            expected_msg = (
-                "hyper_process parameter is useless because `Connection` is provided"
-            )
-            with pytest.raises(ValueError, match=expected_msg):
-                pantab.frame_from_hyper(connection, table="test", hyper_process=hyper)
-
-            with pytest.raises(ValueError, match=expected_msg):
-                pantab.frame_from_hyper_query(
-                    connection, "SELECT * FROM test", hyper_process=hyper
-                )
-
-            with pytest.raises(ValueError, match=expected_msg):
-                pantab.frames_from_hyper(connection, hyper_process=hyper)
+        tm.assert_frame_equal(val, expected)
diff --git a/pantab/tests/test_types.py b/pantab/tests/test_types.py
deleted file mode 100644
index 6612651d..00000000
--- a/pantab/tests/test_types.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import pytest
-import tableauhyperapi as tab_api
-
-import pantab._types
-
-
-@pytest.mark.parametrize(
-    "nullability", [tab_api.Nullability.NULLABLE, tab_api.Nullability.NOT_NULLABLE]
-)
-def test_read_varchar_type(nullability):
-    """
-    Test that we can read a VARCHAR column from Hyper.
-    """
-    vchar = tab_api.SqlType.varchar(255)
-    vchar_column = pantab._types._ColumnType(vchar, nullability)
-    assert pantab._types._get_pandas_type(vchar_column) == "string"
diff --git a/pantab/tests/test_writer.py b/pantab/tests/test_writer.py
index 922322e0..8ca2cee0 100644
--- a/pantab/tests/test_writer.py
+++ b/pantab/tests/test_writer.py
@@ -1,140 +1,78 @@
 import re
 from datetime import datetime, timezone
 
-import numpy as np
 import pandas as pd
 import pytest
-import tableauhyperapi as tab_api
 from tableauhyperapi import Connection, CreateMode, HyperProcess, Telemetry
 
 import pantab
 
 
-def test_bad_table_mode_raises(df, tmp_hyper, use_parquet):
-    if use_parquet:
-        pytest.importorskip("pyarrow")
-        df = df.drop(columns=["timedelta64"])
-
+def test_bad_table_mode_raises(df, tmp_hyper):
     msg = "'table_mode' must be either 'w' or 'a'"
     with pytest.raises(ValueError, match=msg):
         pantab.frame_to_hyper(
-            df, tmp_hyper, table="test", table_mode="x", use_parquet=use_parquet
+            df,
+            tmp_hyper,
+            table="test",
+            table_mode="x",
         )
 
     with pytest.raises(ValueError, match=msg):
         pantab.frames_to_hyper({"a": df}, tmp_hyper, table_mode="x")
 
 
-def test_append_mode_raises_column_mismatch(df, tmp_hyper, table_name, use_parquet):
-    if use_parquet:
-        pytest.importorskip("pyarrow")
-        df = df.drop(columns=["timedelta64"])
-
-    pantab.frame_to_hyper(df, tmp_hyper, table=table_name, use_parquet=use_parquet)
-
-    df = df.drop("object", axis=1)
-    msg = "^Mismatched column definitions:"
-    with pytest.raises(TypeError, match=msg):
-        pantab.frame_to_hyper(
-            df, tmp_hyper, table=table_name, table_mode="a", use_parquet=use_parquet
-        )
-
-
-def test_append_mode_raises_column_dtype_mismatch(
-    df, tmp_hyper, table_name, use_parquet
-):
-    if use_parquet:
-        pytest.importorskip("pyarrow")
-        df = df.drop(columns=["timedelta64"])
-
-    pantab.frame_to_hyper(df, tmp_hyper, table=table_name, use_parquet=use_parquet)
+@pytest.mark.parametrize("new_dtype", ["int64", float])
+def test_append_mode_raises_column_dtype_mismatch(new_dtype, df, tmp_hyper, table_name):
+    pantab.frame_to_hyper(df, tmp_hyper, table=table_name)
 
-    df["int16"] = df["int16"].astype(np.int64)
-    msg = "^Mismatched column definitions:"
-    with pytest.raises(TypeError, match=msg):
-        pantab.frame_to_hyper(
-            df, tmp_hyper, table=table_name, table_mode="a", use_parquet=use_parquet
-        )
+    df["int16"] = df["int16"].astype(new_dtype)
+    # TODO: a better error message from hyper would be nice here
+    # seems like a limitation of hyper api
+    msg = ""
+    with pytest.raises(RuntimeError, match=msg):
+        pantab.frame_to_hyper(df, tmp_hyper, table=table_name, table_mode="a")
 
 
-def test_failed_write_doesnt_overwrite_file(
-    df, tmp_hyper, monkeypatch, table_mode, use_parquet
-):
-    if use_parquet:
-        pytest.importorskip("pyarrow")
-        df = df.drop(columns=["timedelta64"])
-
+def test_failed_write_doesnt_overwrite_file(df, tmp_hyper, monkeypatch, table_mode):
     pantab.frame_to_hyper(
-        df, tmp_hyper, table="test", table_mode=table_mode, use_parquet=use_parquet
+        df,
+        tmp_hyper,
+        table="test",
+        table_mode=table_mode,
     )
     last_modified = tmp_hyper.stat().st_mtime
 
-    # Let's patch the Inserter to fail on creation
-    def failure(*args, **kwargs):
-        raise ValueError("dummy failure")
-
-    if use_parquet:
-        pytest.importorskip("pyarrow")
-        pytest.skip("TODO: should figure out patching here")
-        # monkeypatch.setattr(pantab._writer.pq, "write_table", failure, raising=True)
-    else:
-        monkeypatch.setattr(pantab._writer.tab_api, "Inserter", failure, raising=True)
+    # Pick a dtype we know will fail
+    df["should_fail"] = pd.Series([tuple((1, 2))])
 
     # Try out our write methods
-    with pytest.raises(ValueError, match="dummy failure"):
-        pantab.frame_to_hyper(
-            df, tmp_hyper, table="test", table_mode=table_mode, use_parquet=use_parquet
-        )
-        pantab.frames_to_hyper(
-            {"test": df}, tmp_hyper, table_mode=table_mode, use_parquet=use_parquet
-        )
+    with pytest.raises(Exception):
+        pantab.frame_to_hyper(df, tmp_hyper, table="test", table_mode=table_mode)
+        pantab.frames_to_hyper({"test": df}, tmp_hyper, table_mode=table_mode)
 
     # Neither should not update file stats
     assert last_modified == tmp_hyper.stat().st_mtime
 
 
-def test_duplicate_columns_raises(tmp_hyper, use_parquet):
+def test_duplicate_columns_raises(tmp_hyper):
     df = pd.DataFrame([[1, 1]], columns=[1, 1])
-    with pytest.raises(
-        tab_api.hyperexception.HyperException,
-        match="column '1' specified more than once",
-    ):
-        pantab.frame_to_hyper(df, tmp_hyper, table="foo", use_parquet=use_parquet)
-
-    with pytest.raises(
-        tab_api.hyperexception.HyperException,
-        match="column '1' specified more than once",
-    ):
-        pantab.frames_to_hyper({"test": df}, tmp_hyper, use_parquet=use_parquet)
-
-
-@pytest.mark.parametrize("dtype", ["UInt64", "datetime64[ns, US/Eastern]"])
-def test_unsupported_dtype_raises(dtype, tmp_hyper, use_parquet):
-    df = pd.DataFrame([[1]], dtype=dtype)
-
-    msg = re.escape(f"Conversion of '{dtype}' dtypes not supported!")
-    with pytest.raises(TypeError, match=msg):
-        pantab.frame_to_hyper(df, tmp_hyper, table="test", use_parquet=use_parquet)
-
-
-def test_bad_value_gives_clear_message(tmp_hyper):
-    df = pd.DataFrame([[{"a": "b"}]], columns=["a"])
+    msg = r"Duplicate column names found: \[1, 1\]"
+    with pytest.raises(ValueError, match=msg):
+        pantab.frame_to_hyper(df, tmp_hyper, table="foo")
 
-    msg = r"Invalid value \"{'a': 'b'}\" found \(row 0 column 0\)"
+    with pytest.raises(ValueError, match=msg):
+        pantab.frames_to_hyper({"test": df}, tmp_hyper)
 
-    with pytest.raises(TypeError, match=msg):
-        pantab.frame_to_hyper(df, tmp_hyper, table="test")
 
+def test_unsupported_dtype_raises(tmp_hyper):
+    df = pd.DataFrame([[pd.Timedelta("1D")]])
 
-def test_use_parquet_with_timedelta_raises(df, tmp_hyper):
-    msg = "Writing timedelta values with use_parquet=True is not yet supported."
+    msg = re.escape("Unsupported Arrow type")
     with pytest.raises(ValueError, match=msg):
-        pantab.frame_to_hyper(df, tmp_hyper, table="test", use_parquet=True)
+        pantab.frame_to_hyper(df, tmp_hyper, table="test")
 
 
-@pytest.mark.skipif(
-    not pantab._compat.PANDAS_130, reason="bug is specifically with >=pandas 1.3"
-)
 def test_utc_bug(tmp_hyper):
     """
     Red-Green for UTC bug
@@ -154,19 +92,3 @@ def test_utc_bug(tmp_hyper):
     expected: {df.utc_time}
     actual: {[c[0] for c in resp]}
     """
-
-
-@pytest.mark.skipif(
-    not pantab._compat.PANDAS_130, reason="bug is specifically with >=pandas 1.3"
-)
-def test_maybe_convert_utc(tmp_hyper):
-    """
-    timezone aware is not supported, thus we ensure timezone naive
-    """
-    df = pd.DataFrame(
-        {"utc_time": [datetime.now(timezone.utc), pd.Timestamp("today", tz="UTC")]}
-    )
-    assert not df.select_dtypes("datetime64[ns, UTC]").empty
-    df = pantab._writer._maybe_convert_utctimestamp(df)
-    assert df.select_dtypes("datetime64[ns, UTC]").empty
-    assert not df.select_dtypes("datetime64[ns]").empty
diff --git a/pyproject.toml b/pyproject.toml
index 9324a45c..a8316704 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,10 +1,14 @@
 [build-system]
-build-backend="mesonpy"
-requires=["meson-python", "tableauhyperapi", "oldest-supported-numpy"]
+requires = [
+  "scikit-build-core",
+  "nanobind",
+  "oldest-supported-numpy",  # only needed for datetime
+]
+build-backend = "scikit_build_core.build"
 
 [project]
 name = "pantab"
-version = "3.0.3"
+version = "4.0.0rc"
 description = "Converts pandas DataFrames into Tableau Hyper Extracts and back"
 license = {file = "LICENSE.txt"}
 readme = "README.md"
@@ -24,9 +28,12 @@ classifiers = [
 keywords = ["tableau", "visualization", "pandas", "dataframe"]
 
 dependencies = [
-    "pandas>=1.0.0",
+    "pandas>=2.0.0",
     "tableauhyperapi>=0.0.14567",
     "numpy",
+    # in the future we need not require pyarrow as pandas implements the
+    # PyCapsule interface. See pandas PR #56587
+    "pyarrow>=14.0.0",
 ]
 
 [project.urls]
@@ -48,10 +55,8 @@ testpaths = [
 ]
 
 [tool.mypy]
-ignore_missing_imports = true
-
-[tool.mypy.overrides]
-module = ["numpy", "pandas.*", "pytest", "setuptools", "tableauhyperapi.*", "pyarrow.*"]
+[[tool.mypy.overrides]]
+module = ["tableauhyperapi.*"]
 ignore_missing_imports = true
 
 [tool.isort]
@@ -59,14 +64,26 @@ include_trailing_comma = true
 line_length = 88
 multi_line_output = 3
 known_first_party = "pantab"
-known_third_party = "libpantab"
 
 [tool.cibuildwheel]
 build = "cp39-*64 cp310-*64 cp311-*64 cp312-*64"
 skip = "*musllinux*"
 
-test-command = "pytest {project}/pantab/tests"
+test-command = "pytest --import-mode=importlib {project}/pantab/tests"
 test-requires = ["pytest"]
 
 [tool.ruff]
 line-length = 88
+
+[tool.cibuildwheel.linux]
+repair-wheel-command = """
+auditwheel repair -w {dest_dir} {wheel} --exclude libtableauhyperapi.so
+"""
+
+[tool.cibuildwheel.macos]
+# --ignore-missing-dependencies is risky but didnt see a good way of convincing
+# delocate-wheel otherwise that it is OK to not see the libtableauhyperli
+# version 0.10.6 has an exclude option that *may* be helpful
+repair-wheel-command = """
+delocate-listdeps {wheel} && delocate-wheel --ignore-missing-dependencies --require-archs {delocate_archs} -w {dest_dir} {wheel}
+"""