Skip to content

Commit 3b87928

Browse files
committed
feat: Split dataframe into submodules to make dependency management easier.
1 parent ec14c20 commit 3b87928

122 files changed

Lines changed: 2351 additions & 1100 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.fourmolu-ignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Files containing CPP directives that fourmolu's ghc-lib-parser can't
2+
# parse (Haddock mode). These are the meta-package re-export hubs whose
3+
# imports + module exports are gated on cabal flags.
4+
src/DataFrame.hs
5+
src/DataFrame/TH.hs
6+
src/DataFrame/Typed.hs
7+
src/DataFrame/Typed/TH.hs

.github/workflows/ci.yml

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,12 @@ jobs:
2626
- name: Set LIBTORCH_HOME
2727
run: echo "LIBTORCH_HOME=$HOME/.cache/libtorch" >> $GITHUB_ENV
2828

29-
- name: Configure
30-
run: |
31-
cat > cabal.project <<EOF
32-
packages:
33-
.
34-
examples/
35-
dataframe-persistent/
36-
dataframe-hasktorch/
37-
dataframe-fastcsv/
38-
EOF
29+
# The repo's cabal.project already lists every satellite
30+
# (dataframe-core, dataframe-parsing, dataframe-operations,
31+
# dataframe-csv, dataframe-json, dataframe-parquet, dataframe-th,
32+
# dataframe-csv-th, dataframe-parquet-th, dataframe-viz,
33+
# dataframe-learn, dataframe-lazy) plus the meta package, examples,
34+
# and the pre-existing satellites. No regeneration needed.
3935

4036
- name: Freeze
4137
run: cabal freeze

.github/workflows/haskell-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
- name: Run HLint
3737
uses: haskell-actions/hlint-run@v2
3838
with:
39-
path: '["src/", "app/", "benchmark/", "examples/", "dataframe-hasktorch/", "dataframe-persistent/", "dataframe-fastcsv/"]'
39+
path: '["src/", "app/", "benchmark/", "examples/", "ffi/", "dataframe-arrow/", "dataframe-core/", "dataframe-csv/", "dataframe-csv-th/", "dataframe-fastcsv/", "dataframe-fusion/", "dataframe-hasktorch/", "dataframe-json/", "dataframe-lazy/", "dataframe-learn/", "dataframe-operations/", "dataframe-parquet/", "dataframe-parquet-th/", "dataframe-parsing/", "dataframe-persistent/", "dataframe-th/", "dataframe-viz/"]'
4040
fail-on: warning
4141
linux:
4242
name: Haskell-CI - Linux - ${{ matrix.compiler }}

.github/workflows/presubmit.yml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
name: Presubmit
2+
3+
# Mirror the local scripts/presubmit.sh pipeline (format, lint, build, test)
4+
# so PRs get the same checks contributors run with ./scripts/presubmit.sh.
5+
# Format and lint each run as their own job for fast, parallel feedback; the
6+
# heavy build+test job lives in ci.yml.
7+
8+
on:
9+
push:
10+
branches: [main]
11+
pull_request:
12+
13+
jobs:
14+
format:
15+
name: Format (fourmolu)
16+
runs-on: ubuntu-latest
17+
steps:
18+
- uses: actions/checkout@v4
19+
- uses: haskell-actions/run-fourmolu@v11
20+
with:
21+
version: "0.17.0.0"
22+
23+
lint:
24+
name: Lint (hlint)
25+
runs-on: ubuntu-latest
26+
steps:
27+
- uses: actions/checkout@v4
28+
- name: Set up HLint
29+
uses: haskell-actions/hlint-setup@v2
30+
with:
31+
version: "3.8"
32+
- name: Run HLint over every satellite
33+
uses: haskell-actions/hlint-run@v2
34+
with:
35+
path: >-
36+
[
37+
"src/", "app/", "benchmark/", "examples/", "ffi/", "tests/",
38+
"dataframe-arrow/",
39+
"dataframe-core/",
40+
"dataframe-csv/",
41+
"dataframe-csv-th/",
42+
"dataframe-fastcsv/",
43+
"dataframe-fusion/",
44+
"dataframe-hasktorch/",
45+
"dataframe-json/",
46+
"dataframe-lazy/",
47+
"dataframe-learn/",
48+
"dataframe-operations/",
49+
"dataframe-parquet/",
50+
"dataframe-parquet-th/",
51+
"dataframe-parsing/",
52+
"dataframe-persistent/",
53+
"dataframe-th/",
54+
"dataframe-viz/"
55+
]
56+
fail-on: warning

cabal.project

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
packages:
22
.
3+
dataframe-core
4+
dataframe-parsing
5+
dataframe-operations
6+
dataframe-csv
7+
dataframe-json
8+
dataframe-parquet
9+
dataframe-th
10+
dataframe-csv-th
11+
dataframe-parquet-th
12+
dataframe-viz
13+
dataframe-learn
14+
dataframe-lazy
315
examples
416
dataframe-fastcsv
517
dataframe-hasktorch

cabal.project.bare

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
-- Stripped-down project for exercising the meta `dataframe` package's
2+
-- cabal flags. Contains only the dataframe-* packages so the solver
3+
-- doesn't trip on peer satellites whose `buildable:` clauses depend on
4+
-- arrow-bridge / lazy / etc. being available.
5+
--
6+
-- Used by ./scripts/presubmit.sh to build lib:dataframe under each
7+
-- (+no-th / +no-csv / +no-parquet) opt-out flag combination.
8+
9+
packages:
10+
.
11+
dataframe-core
12+
dataframe-parsing
13+
dataframe-operations
14+
dataframe-csv
15+
dataframe-csv-th
16+
dataframe-json
17+
dataframe-parquet
18+
dataframe-parquet-th
19+
dataframe-th
20+
dataframe-viz
21+
dataframe-learn
22+
dataframe-lazy

dataframe-arrow/dataframe-arrow.cabal

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ foreign-library dataframe-arrow
3636
other-modules: DataFrame.FFI
3737
build-depends:
3838
base >= 4 && < 5,
39-
dataframe >= 1 && < 2,
39+
dataframe >= 1 && < 3,
40+
dataframe-core ^>= 1.0,
41+
dataframe-learn ^>= 1.0,
42+
dataframe-operations ^>= 1.0,
4043
dataframe:arrow-bridge,
4144
dataframe-fastcsv,
4245
text >= 2.0 && < 3,

dataframe-core/LICENSE

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Copyright (c) 2026 Michael Chavinda
2+
3+
Permission is hereby granted, free of charge, to any person obtaining
4+
a copy of this software and associated documentation files (the
5+
"Software"), to deal in the Software without restriction, including
6+
without limitation the rights to use, copy, modify, merge, publish,
7+
distribute, sublicense, and/or sell copies of the Software, and to
8+
permit persons to whom the Software is furnished to do so, subject to
9+
the following conditions:
10+
11+
The above copyright notice and this permission notice shall be included
12+
in all copies or substantial portions of the Software.
13+
14+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
cabal-version: 2.4
2+
name: dataframe-core
3+
version: 1.0.0.0
4+
5+
synopsis: Core data structures for the dataframe library.
6+
description:
7+
Minimal interchange-format types for the @dataframe@ ecosystem:
8+
'Column', 'DataFrame', 'Bitmap', the untyped expression/interpreter,
9+
and the typed-schema phantom layer. Contains no Template Haskell and
10+
no file I/O. Lightweight dependency footprint (base, vector,
11+
containers, time, random, bytestring, text) so other packages can
12+
exchange dataframes by-value without pulling in the full
13+
@dataframe@ package.
14+
15+
bug-reports: https://github.com/mchav/dataframe/issues
16+
license: MIT
17+
license-file: LICENSE
18+
author: Michael Chavinda
19+
maintainer: mschavinda@gmail.com
20+
copyright: (c) 2024-2025 Michael Chavinda
21+
category: Data
22+
23+
common warnings
24+
ghc-options:
25+
-Wincomplete-patterns
26+
-Wincomplete-uni-patterns
27+
-Wunused-imports
28+
-Wunused-local-binds
29+
30+
library
31+
import: warnings
32+
exposed-modules:
33+
DataFrame.Errors
34+
DataFrame.Operators
35+
DataFrame.Display.Terminal.Colours
36+
DataFrame.Display.Terminal.PrettyPrint
37+
DataFrame.Internal.Column
38+
DataFrame.Internal.DataFrame
39+
DataFrame.Internal.Expression
40+
DataFrame.Internal.Grouping
41+
DataFrame.Internal.Hash
42+
DataFrame.Internal.Interpreter
43+
DataFrame.Internal.Nullable
44+
DataFrame.Internal.Row
45+
DataFrame.Internal.Types
46+
DataFrame.Typed.Freeze
47+
DataFrame.Typed.Generic
48+
DataFrame.Typed.Record
49+
DataFrame.Typed.Schema
50+
DataFrame.Typed.Types
51+
DataFrame.Typed.Util
52+
build-depends: base >= 4 && < 5,
53+
bytestring >= 0.11 && < 0.13,
54+
containers >= 0.6.7 && < 0.9,
55+
random >= 1 && < 2,
56+
text >= 2.0 && < 3,
57+
time >= 1.12 && < 2,
58+
vector ^>= 0.13
59+
hs-source-dirs: src
60+
default-language: Haskell2010
File renamed without changes.

0 commit comments

Comments
 (0)