From 0bc9162c16ea64c97c5023c24b921e1fc80e52c3 Mon Sep 17 00:00:00 2001
From: Benjamin Gutzmann <gutzemann@gmail.com>
Date: Mon, 8 Jan 2024 19:04:53 +0100
Subject: [PATCH] Restructure library into record and batch

---
 CHANGELOG.md         |   6 ++
 README.md            |  12 +++-
 docs/api/mod.rst     |   2 +-
 docs/api/pandas.rst  |   5 --
 src/isd/__init__.py  |   3 +-
 src/isd/batch.py     |  87 +++++++++++++++++++++++
 src/isd/cli.py       |  18 ++---
 src/isd/io.py        |  23 +-----
 src/isd/pandas.py    | 165 -------------------------------------------
 src/isd/record.py    | 145 +++++++++++++++++++++++++------------
 src/isd/utils.py     |  18 -----
 tests/__init__.py    |   0
 tests/conftest.py    |  12 +++-
 tests/test_batch.py  |  64 +++++++++++++++++
 tests/test_io.py     |  15 ----
 tests/test_pandas.py |   8 ---
 tests/test_record.py |  51 +++++++++++++
 tests/test_utils.py  |  52 --------------
 18 files changed, 340 insertions(+), 346 deletions(-)
 delete mode 100644 docs/api/pandas.rst
 create mode 100644 src/isd/batch.py
 delete mode 100644 src/isd/pandas.py
 delete mode 100644 src/isd/utils.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_batch.py
 delete mode 100644 tests/test_pandas.py
 delete mode 100644 tests/test_utils.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c5d8c39..2d5a6c0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+
+### Added
+
+- Restructure library into record and batch
+
 ## [0.2.1] - 2023-12-27
 
 ### Fixed
diff --git a/README.md b/README.md
index b398e0c..c1868f3 100644
--- a/README.md
+++ b/README.md
@@ -19,11 +19,21 @@ There is a simple command line interface.
 The `isd record` command prints a single record in JSON format:
 
 ```shell
-isd record 720538-00164-2021
+isd record tests/data/720538-00164-2021
 ```
 
 The Python API allows reading compressed and uncompressed ISD files:
 
+```python
+from isd import Batch
+
+batch = Batch.from_path("isd-file")
+for record in batch:
+    print(record)
+```
+
+Streaming is also supported:
+
 ```python
 import isd.io
 
diff --git a/docs/api/mod.rst b/docs/api/mod.rst
index 3ead6d9..9e5664b 100644
--- a/docs/api/mod.rst
+++ b/docs/api/mod.rst
@@ -9,8 +9,8 @@ Most useful functions and classes are contained in submodules.
 
    errors
    io
-   pandas
    record
+   batch
 
 isd
 ---
diff --git a/docs/api/pandas.rst b/docs/api/pandas.rst
deleted file mode 100644
index 9a6db0d..0000000
--- a/docs/api/pandas.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-isd.pandas
-==========
-
-.. automodule:: isd.pandas
-    :members:
diff --git a/src/isd/__init__.py b/src/isd/__init__.py
index c960531..d8ed7f1 100644
--- a/src/isd/__init__.py
+++ b/src/isd/__init__.py
@@ -1,4 +1,5 @@
 from isd.errors import IsdError
+from isd.batch import Batch
 from isd.record import Record
 
-__all__ = ["IsdError", "Record"]
+__all__ = ["IsdError", "Batch", "Record"]
diff --git a/src/isd/batch.py b/src/isd/batch.py
new file mode 100644
index 0000000..b4da2ad
--- /dev/null
+++ b/src/isd/batch.py
@@ -0,0 +1,87 @@
+import gzip
+import json
+from io import BytesIO
+from pathlib import Path
+from dataclasses import dataclass
+from typing import List, Union, Optional, Dict, Any, Iterator
+import datetime as dt
+
+from isd.record import Record
+
+import pandas as pd
+
+
+@dataclass
+class Batch:
+    records: List[Record]
+
+    def __len__(self) -> int:
+        return len(self.records)
+
+    def __getitem__(self, index: int) -> Record:
+        return self.records[index]
+
+    def __iter__(self) -> Iterator[Record]:
+        return iter(self.records)
+
+    @classmethod
+    def parse(cls, lines: Union[str, BytesIO]) -> "Batch":
+        """Reads records from a text io stream."""
+        if isinstance(lines, BytesIO):
+            lines = lines.read().decode("utf-8")
+        return cls([Record.parse(line) for line in lines.splitlines()])
+
+    @classmethod
+    def from_path(cls, path: Union[str, Path]) -> "Batch":
+        """Opens a local ISD file and returns an iterator over its records.
+
+        If the path has a .gz extension, this function will assume it has gzip
+        compression and will attempt to open it using `gzip.open`.
+        """
+        path = Path(path)
+        if path.suffix == ".gz":
+            with gzip.open(path) as gzip_file:
+                return cls(
+                    [Record.parse(gzip_line.decode("utf-8")) for gzip_line in gzip_file]
+                )
+        else:
+            with open(path) as uncompressed_file:
+                return cls(
+                    [
+                        Record.parse(uncompressed_line)
+                        for uncompressed_line in uncompressed_file
+                    ]
+                )
+
+    def filter_by_datetime(
+        self,
+        start_date: Optional[dt.datetime] = None,
+        end_date: Optional[dt.datetime] = None,
+    ) -> "Batch":
+        """Returns an iterator over records filtered by start and end datetimes (both optional)."""
+        return Batch(
+            [
+                record
+                for record in self.records
+                if (not start_date or record.datetime() >= start_date)
+                and (not end_date or record.datetime() < end_date)
+            ]
+        )
+
+    def to_dict(self) -> List[Dict[str, Any]]:
+        """Returns a list of dictionaries, one for each record."""
+        return [record.to_dict() for record in self.records]
+
+    def to_json(self, indent: int = 4) -> str:
+        """Returns a JSON line of all records."""
+        data = []
+        for d in self.to_dict():
+            d["datetime"] = d["datetime"].isoformat()
+            data.append(d)
+        return json.dumps(data, indent=indent)
+
+    def to_df(self) -> pd.DataFrame:
+        """Reads a local ISD file into a DataFrame."""
+        import pandas as pd
+
+        return pd.DataFrame([record.to_dict() for record in self.records])
diff --git a/src/isd/cli.py b/src/isd/cli.py
index 533fd04..70ec655 100644
--- a/src/isd/cli.py
+++ b/src/isd/cli.py
@@ -1,13 +1,9 @@
 # type: ignore
 
-import dataclasses
-import itertools
-import json
-
 import click
 from click import ClickException
 
-import isd.io
+from isd.batch import Batch
 
 
 @click.group()
@@ -20,9 +16,9 @@ def main() -> None:
 @click.option("-i", "--index", default=0)
 def record(infile: str, index: int) -> None:
     """Prints a single record to standard output in JSON format."""
-    with isd.io.open(infile) as records:
-        record = next(itertools.islice(records, index, None), None)
-        if record:
-            print(json.dumps(dataclasses.asdict(record), indent=4))
-        else:
-            raise ClickException(f"No record with index {index}")
+    batch = Batch.from_path(infile)
+    try:
+        record_ = batch[index]
+        print(record_.to_json())
+    except IndexError:
+        raise ClickException(f"No record with index {index}")
diff --git a/src/isd/io.py b/src/isd/io.py
index f35621f..afbebee 100644
--- a/src/isd/io.py
+++ b/src/isd/io.py
@@ -1,12 +1,9 @@
-import datetime
 import gzip
 import os.path
 from contextlib import contextmanager
-from typing import Generator, Iterable, Iterator, Optional, TextIO
+from typing import Generator, Iterable
 
-from pandas import DataFrame
 
-from . import pandas as isd_pandas
 from .record import Record
 
 builtin_open = open
@@ -28,21 +25,3 @@ def open(path: str) -> Generator[Iterable[Record], None, None]:
                 Record.parse(uncompressed_line)
                 for uncompressed_line in uncompressed_file
             )
-
-
-def from_text_io(text_io: TextIO) -> Iterator[Record]:
-    """Reads records from a text io stream."""
-    while True:
-        line = text_io.readline()
-        if not line:
-            break
-        else:
-            yield Record.parse(line)
-
-
-def read_to_data_frame(
-    path: str, since: Optional[datetime.datetime] = None
-) -> DataFrame:
-    """Reads a local ISD file into a DataFrame."""
-    with open(path) as file:
-        return isd_pandas.data_frame(file, since=since)
diff --git a/src/isd/pandas.py b/src/isd/pandas.py
deleted file mode 100644
index a4b28ec..0000000
--- a/src/isd/pandas.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import datetime
-from typing import Iterable, Optional
-
-import pandas
-from pandas import CategoricalDtype, DataFrame
-
-from isd import Record
-
-DataSourceDtype = CategoricalDtype(
-    [
-        "1",
-        "2",
-        "3",
-        "4",
-        "5",
-        "6",
-        "7",
-        "8",
-        "A",
-        "B",
-        "C",
-        "D",
-        "E",
-        "F",
-        "G",
-        "H",
-        "I",
-        "J",
-        "K",
-        "L",
-        "M",
-        "N",
-        "O",
-    ]
-)
-ReportTypeDtype = CategoricalDtype(
-    [
-        "AERO",
-        "AUST",
-        "AUTO",
-        "BOGUS",
-        "BRAZ",
-        "COOPD",
-        "COOPS",
-        "CRB",
-        "CRN05",
-        "CRN15",
-        "FM-12",
-        "FM-13",
-        "FM-14",
-        "FM-15",
-        "FM-16",
-        "FM-18",
-        "GREEN",
-        "MESOH",
-        "MESOS",
-        "MESOW",
-        "MEXIC",
-        "NSRDB",
-        "PCP15",
-        "PCP60",
-        "S-S-A",
-        "SA-AU",
-        "SAO",
-        "SAOSP",
-        "SHEF",
-        "SMARS",
-        "SOD",
-        "SOM",
-        "SURF",
-        "SY-AE",
-        "SY-AU",
-        "SY-MT",
-        "SY-SA",
-        "WBO",
-        "WNO",
-    ]
-)
-QualityControlProcessDtype = CategoricalDtype(["V01", "V02", "V03"])
-QualityCodeDtype = CategoricalDtype(
-    [
-        "0",
-        "1",
-        "2",
-        "3",
-        "4",
-        "5",
-        "6",
-        "7",
-        "9",
-        "A",
-        "U",
-        "P",
-        "I",
-        "M",
-        "C",
-        "R",
-    ]
-)
-WindObservationTypeDtype = CategoricalDtype(
-    ["A", "B", "C", "H", "N", "R", "Q", "T", "V"]
-)
-CeilingDeterminationCodeDtype = CategoricalDtype(
-    ["A", "B", "C", "D", "E", "M", "P", "R", "S", "U", "V", "W"]
-)
-CavokCodeDtype = CategoricalDtype(["N", "Y"])
-VisibilityVariabilityCodeDtype = CategoricalDtype(["N", "V"])
-
-
-def data_frame(
-    records: Iterable[Record], since: Optional[datetime.datetime] = None
-) -> DataFrame:
-    """Constructs a pandas data frame from an iterable of Records.
-
-    Uses appropriate datatypes and categorical variables.
-    """
-    data_frame = DataFrame(records).astype(
-        {
-            "usaf_id": "string",
-            "ncei_id": "string",
-            "year": "UInt16",
-            "month": "UInt8",
-            "day": "UInt8",
-            "hour": "UInt8",
-            "minute": "UInt8",
-            "data_source": DataSourceDtype,
-            "latitude": "float",
-            "longitude": "float",
-            "report_type": ReportTypeDtype,
-            "elevation": "Int16",
-            "call_letters": "string",
-            "quality_control_process": QualityControlProcessDtype,
-            "wind_direction": "UInt16",
-            "wind_direction_quality_code": QualityCodeDtype,
-            "wind_observation_type": WindObservationTypeDtype,
-            "wind_speed": "float",
-            "wind_speed_quality_code": QualityCodeDtype,
-            "ceiling": "float",
-            "ceiling_quality_code": QualityCodeDtype,
-            "ceiling_determination_code": CeilingDeterminationCodeDtype,
-            "cavok_code": CavokCodeDtype,
-            "visibility": "UInt32",
-            "visibility_quality_code": QualityCodeDtype,
-            "visibility_variability_code": VisibilityVariabilityCodeDtype,
-            "visibility_variability_quality_code": QualityCodeDtype,
-            "air_temperature": "float",
-            "air_temperature_quality_code": QualityCodeDtype,
-            "dew_point_temperature": "float",
-            "dew_point_temperature_quality_code": QualityCodeDtype,
-            "sea_level_pressure": "float",
-            "sea_level_pressure_quality_code": QualityCodeDtype,
-            "additional_data": "string",
-            "remarks": "string",
-            "element_quality_data": "string",
-            "original_observation_data": "string",
-        }
-    )
-    timestamp = pandas.to_datetime(
-        data_frame[["year", "month", "day", "hour", "minute"]]
-    )
-    data_frame["timestamp"] = timestamp
-    if since:
-        return data_frame[data_frame["timestamp"] > since]
-    else:
-        return data_frame
diff --git a/src/isd/record.py b/src/isd/record.py
index e0cc94c..bb4b51a 100644
--- a/src/isd/record.py
+++ b/src/isd/record.py
@@ -1,6 +1,7 @@
 import datetime
+import json
 from dataclasses import dataclass
-from typing import Any, Callable, List, Optional, Tuple
+from typing import Any, Callable, List, Optional, Tuple, Dict
 
 from isd.errors import IsdError
 
@@ -64,37 +65,41 @@ def parse(cls, line: str) -> "Record":
         minute = int(line[25:27])
         data_source = line[27]
         # TODO test missing latitudes and longitudes
-        latitude = optional(line[28:34], "+99999", lambda s: float(s) / 1000)
-        longitude = optional(line[34:41], "+999999", lambda s: float(s) / 1000)
-        report_type = optional(line[41:46], "99999")
-        elevation = optional(line[46:51], "+9999", lambda s: float(s))
-        call_letters = optional(line[51:56], "99999")
+        latitude = cls.optional(line[28:34], "+99999", lambda s: float(s) / 1000)
+        longitude = cls.optional(line[34:41], "+999999", lambda s: float(s) / 1000)
+        report_type = cls.optional(line[41:46], "99999")
+        elevation = cls.optional(line[46:51], "+9999", lambda s: float(s))
+        call_letters = cls.optional(line[51:56], "99999")
         quality_control_process = line[56:60]
-        wind_direction = optional(line[60:63], "999", lambda s: int(s))
+        wind_direction = cls.optional(line[60:63], "999", lambda s: int(s))
         wind_direction_quality_code = line[63]
-        wind_observation_type = optional(line[64], "9")
-        wind_speed = optional(line[65:69], "9999", lambda s: float(s) / 10)
+        wind_observation_type = cls.optional(line[64], "9")
+        wind_speed = cls.optional(line[65:69], "9999", lambda s: float(s) / 10)
         wind_speed_quality_code = line[69]
-        ceiling = optional(line[70:75], "99999", lambda s: int(s))
+        ceiling = cls.optional(line[70:75], "99999", lambda s: int(s))
         ceiling_quality_code = line[75]
-        ceiling_determination_code = optional(line[76], "9")
-        cavok_code = optional(line[77], "9")
-        visibility = optional(line[78:84], "999999", lambda s: int(s))
+        ceiling_determination_code = cls.optional(line[76], "9")
+        cavok_code = cls.optional(line[77], "9")
+        visibility = cls.optional(line[78:84], "999999", lambda s: int(s))
         visibility_quality_code = line[84]
-        visibility_variability_code = optional(line[85], "9")
+        visibility_variability_code = cls.optional(line[85], "9")
         visibility_variability_quality_code = line[86]
-        air_temperature = optional(line[87:92], "+9999", lambda s: float(s) / 10)
+        air_temperature = cls.optional(line[87:92], "+9999", lambda s: float(s) / 10)
         air_temperature_quality_code = line[92]
-        dew_point_temperature = optional(line[93:98], "+9999", lambda s: float(s) / 10)
+        dew_point_temperature = cls.optional(
+            line[93:98], "+9999", lambda s: float(s) / 10
+        )
         dew_point_temperature_quality_code = line[98]
-        sea_level_pressure = optional(line[99:104], "99999", lambda s: float(s) / 10)
+        sea_level_pressure = cls.optional(
+            line[99:104], "99999", lambda s: float(s) / 10
+        )
         sea_level_pressure_quality_code = line[104]
-        additional_data, remainder = extract_data(
+        additional_data, remainder = cls._extract_data(
             line[105:], "ADD", ["REM", "EQD", "QNN"]
         )
-        remarks, remainder = extract_data(remainder, "REM", ["EQD", "QNN"])
-        element_quality_data, remainder = extract_data(remainder, "EQD", ["QNN"])
-        original_observation_data, remainder = extract_data(remainder, "QNN", [])
+        remarks, remainder = cls._extract_data(remainder, "REM", ["EQD", "QNN"])
+        element_quality_data, remainder = cls._extract_data(remainder, "EQD", ["QNN"])
+        original_observation_data, remainder = cls._extract_data(remainder, "QNN", [])
         assert not remainder
 
         return cls(
@@ -143,32 +148,80 @@ def datetime(self) -> datetime.datetime:
             self.year, self.month, self.day, self.hour, self.minute
         )
 
+    @staticmethod
+    def _extract_data(message: str, tag: str, later_tags: List[str]) -> Tuple[str, str]:
+        if message.startswith(tag):
+            index = None
+            for other_tag in later_tags:
+                try:
+                    index = message.find(other_tag)
+                except ValueError:
+                    continue
+                break
+            if index != -1:
+                data = message[len(tag) : index]
+                tail = message[index:]
+                return data, tail
+            else:
+                return message[len(tag) :], ""
+        else:
+            return "", message
 
-def extract_data(message: str, tag: str, later_tags: List[str]) -> Tuple[str, str]:
-    if message.startswith(tag):
-        index = None
-        for other_tag in later_tags:
-            try:
-                index = message.find(other_tag)
-            except ValueError:
-                continue
-            break
-        if index != -1:
-            data = message[len(tag) : index]
-            tail = message[index:]
-            return data, tail
+    @staticmethod
+    def optional(
+        string: str,
+        missing_value: str,
+        transform: Optional[Callable[[str], Any]] = None,
+    ) -> Any:
+        if string == missing_value:
+            return None
+        elif transform:
+            return transform(string)
         else:
-            return message[len(tag) :], ""
-    else:
-        return "", message
+            return string
 
+    def to_dict(self) -> Dict[str, Any]:
+        """Returns a dictionary representation of this record."""
+        return {
+            "usaf_id": self.usaf_id,
+            "ncei_id": self.ncei_id,
+            # use datetime instead of year, month, day, hour, minute
+            "datetime": self.datetime(),
+            "data_source": self.data_source,
+            "latitude": self.latitude,
+            "longitude": self.longitude,
+            "report_type": self.report_type,
+            "elevation": self.elevation,
+            "call_letters": self.call_letters,
+            "quality_control_process": self.quality_control_process,
+            "wind_direction": self.wind_direction,
+            "wind_direction_quality_code": self.wind_direction_quality_code,
+            "wind_observation_type": self.wind_observation_type,
+            "wind_speed": self.wind_speed,
+            "wind_speed_quality_code": self.wind_speed_quality_code,
+            "ceiling": self.ceiling,
+            "ceiling_quality_code": self.ceiling_quality_code,
+            "ceiling_determination_code": self.ceiling_determination_code,
+            "cavok_code": self.cavok_code,
+            "visibility": self.visibility,
+            "visibility_quality_code": self.visibility_quality_code,
+            "visibility_variability_code": self.visibility_variability_code,
+            "visibility_variability_quality_code": self.visibility_variability_quality_code,
+            "air_temperature": self.air_temperature,
+            "air_temperature_quality_code": self.air_temperature_quality_code,
+            "dew_point_temperature": self.dew_point_temperature,
+            "dew_point_temperature_quality_code": self.dew_point_temperature_quality_code,
+            "sea_level_pressure": self.sea_level_pressure,
+            "sea_level_pressure_quality_code": self.sea_level_pressure_quality_code,
+            "additional_data": self.additional_data,
+            "remarks": self.remarks,
+            "element_quality_data": self.element_quality_data,
+            "original_observation_data": self.original_observation_data,
+        }
 
-def optional(
-    string: str, missing_value: str, transform: Optional[Callable[[str], Any]] = None
-) -> Any:
-    if string == missing_value:
-        return None
-    elif transform:
-        return transform(string)
-    else:
-        return string
+    def to_json(self, indent: int = 4) -> str:
+        """Returns a JSON representation of this record."""
+        data = self.to_dict()
+        # use isoformat instead of datetime
+        data["datetime"] = data["datetime"].isoformat()
+        return json.dumps(data, indent=indent)
diff --git a/src/isd/utils.py b/src/isd/utils.py
deleted file mode 100644
index 849009f..0000000
--- a/src/isd/utils.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import datetime
-from typing import Iterable, Iterator, Optional
-
-from isd.record import Record
-
-
-def filter_by_datetime(
-    records: Iterable[Record],
-    start: Optional[datetime.datetime] = None,
-    end: Optional[datetime.datetime] = None,
-) -> Iterator[Record]:
-    """Returns an iterator over records filtered by start and end datetimes (both optional)."""
-    return (
-        record
-        for record in records
-        if (not start or record.datetime() >= start)
-        and (not end or record.datetime() < end)
-    )
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/conftest.py b/tests/conftest.py
index e660335..37f3084 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,7 +7,7 @@
 
 import pytest
 
-from isd import Record
+from isd import Record, Batch
 
 BARDUFOSS_FILE_NAME = "010230-99999-2021"
 VANCE_BRAND_FILE_NAME = "720538-00164-2021"
@@ -32,11 +32,21 @@ def record_line() -> str:
     return line
 
 
+@pytest.fixture
+def record(record_line: str) -> Record:
+    return Record.parse(record_line)
+
+
 @pytest.fixture
 def uncompressed_path() -> str:
     return data_file_path(VANCE_BRAND_FILE_NAME)
 
 
+@pytest.fixture
+def batch(uncompressed_path: str) -> Batch:
+    return Batch.from_path(uncompressed_path)
+
+
 @pytest.fixture
 def compressed_path() -> str:
     return data_file_path(VANCE_BRAND_COMPRESSED_FILE_NAME)
diff --git a/tests/test_batch.py b/tests/test_batch.py
new file mode 100644
index 0000000..f924a1a
--- /dev/null
+++ b/tests/test_batch.py
@@ -0,0 +1,64 @@
+import datetime as dt
+import json
+
+from isd import Batch
+
+
+def test_batch_from_uncompressed(uncompressed_path: str) -> None:
+    batch = Batch.from_path(uncompressed_path)
+    assert len(batch) == 500
+
+
+def test_batch_from_compressed(compressed_path: str) -> None:
+    batch = Batch.from_path(compressed_path)
+    assert len(batch) == 24252
+
+
+def test_batch_from_string(uncompressed_path: str) -> None:
+    with open(uncompressed_path) as file:
+        batch = Batch.parse(file.read())
+    assert len(batch) == 500
+
+
+def test_batch_filter_by_datetime(batch: Batch) -> None:
+    batch_filtered = batch.filter_by_datetime(start_date=dt.datetime(2021, 1, 1, 3, 30))
+    assert len(batch_filtered) == 490
+
+    batch_filtered = batch.filter_by_datetime(end_date=dt.datetime(2021, 1, 1, 3, 30))
+    assert len(batch_filtered) == 10
+
+    batch_filtered = batch.filter_by_datetime(
+        start_date=dt.datetime(2021, 1, 1, 3, 30),
+        end_date=dt.datetime(2021, 1, 1, 3, 55),
+    )
+    assert len(batch_filtered) == 1
+
+    batch_filtered = batch.filter_by_datetime(
+        start_date=dt.datetime(2021, 1, 1, 3, 30),
+        end_date=dt.datetime(2021, 1, 1, 3, 56),
+    )
+    assert len(batch_filtered) == 2
+
+
+def test_batch_to_dict(batch: Batch) -> None:
+    first = batch.to_dict()[0]
+    assert first["usaf_id"] == "720538"
+    assert first["ncei_id"] == "00164"
+    assert first["datetime"] == dt.datetime(2021, 1, 1, 0, 15)
+
+
+def test_batch_to_json(batch: Batch) -> None:
+    json_string = batch.to_json()
+    data = json.loads(json_string)
+    assert len(data) == 500
+    first = data[0]
+    assert first["usaf_id"] == "720538"
+    assert first["ncei_id"] == "00164"
+    assert first["datetime"] == "2021-01-01T00:15:00"
+
+
+def test_batch_to_df(batch: Batch) -> None:
+    datetime_min = dt.datetime(2021, 1, 5)
+    df = batch.to_df()
+    df = df[df["datetime"] >= datetime_min]
+    assert len(df) == 212
diff --git a/tests/test_io.py b/tests/test_io.py
index ed44c24..373165b 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -1,5 +1,3 @@
-import datetime
-
 import isd.io
 
 
@@ -13,16 +11,3 @@ def test_open_compressed(compressed_path: str) -> None:
     with isd.io.open(compressed_path) as generator:
         records = list(generator)
     assert len(records) == 24252
-
-
-def test_read_to_data_frame_since(uncompressed_path: str) -> None:
-    data_frame = isd.io.read_to_data_frame(
-        uncompressed_path, since=datetime.datetime(2021, 1, 5)
-    )
-    assert len(data_frame) == 212
-
-
-def test_from_text_io(uncompressed_path: str) -> None:
-    with open(uncompressed_path) as file:
-        records = list(isd.io.from_text_io(file))
-        assert len(records) == 500
diff --git a/tests/test_pandas.py b/tests/test_pandas.py
deleted file mode 100644
index a294581..0000000
--- a/tests/test_pandas.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from typing import List
-
-import isd.pandas
-from isd import Record
-
-
-def test_data_frame(records: List[Record]) -> None:
-    isd.pandas.data_frame(records)
diff --git a/tests/test_record.py b/tests/test_record.py
index b7f0826..2fb6bd9 100644
--- a/tests/test_record.py
+++ b/tests/test_record.py
@@ -1,3 +1,6 @@
+import datetime
+import json
+
 import pytest
 
 from isd import IsdError, Record
@@ -52,3 +55,51 @@ def test_parse(record_line: str) -> None:
 def test_line_too_short() -> None:
     with pytest.raises(IsdError):
         Record.parse("")
+
+
+def test_record_to_dict(record: Record) -> None:
+    assert record.to_dict() == {
+        "usaf_id": "720538",
+        "ncei_id": "00164",
+        "datetime": datetime.datetime(2021, 1, 1, 0, 15),
+        "data_source": "4",
+        "latitude": 40.167,
+        "longitude": -105.167,
+        "report_type": "FM-15",
+        "elevation": 1541,
+        "call_letters": None,
+        "quality_control_process": "V020",
+        "wind_direction": None,
+        "wind_direction_quality_code": "9",
+        "wind_observation_type": "C",
+        "wind_speed": 0,
+        "wind_speed_quality_code": "1",
+        "ceiling": 3353,
+        "ceiling_quality_code": "1",
+        "ceiling_determination_code": None,
+        "cavok_code": "N",
+        "visibility": 16093,
+        "visibility_quality_code": "1",
+        "visibility_variability_code": None,
+        "visibility_variability_quality_code": "9",
+        "air_temperature": 3.1,
+        "air_temperature_quality_code": "1",
+        "dew_point_temperature": -5.8,
+        "dew_point_temperature_quality_code": "1",
+        "sea_level_pressure": None,
+        "sea_level_pressure_quality_code": "9",
+        "additional_data": "GD14991+0335399GE19MSL   +99999+"
+        "99999GF199999999999033531999999MA1101561999999",
+        "remarks": "MET075METAR KLMO 010015Z AUTO 00000KT "
+        "10SM OVC110 03/M06 A2999 RMK AO2 T00311058=",
+        "element_quality_data": "",
+        "original_observation_data": "",
+    }
+
+
+def test_record_to_json(record: Record) -> None:
+    json_string = record.to_json()
+    data = json.loads(json_string)
+    assert data["usaf_id"] == "720538"
+    assert data["ncei_id"] == "00164"
+    assert data["datetime"] == "2021-01-01T00:15:00"
diff --git a/tests/test_utils.py b/tests/test_utils.py
deleted file mode 100644
index 169e034..0000000
--- a/tests/test_utils.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import datetime
-from typing import List
-
-import isd.utils
-from isd.record import Record
-
-
-def test_filter_by_datetime(records: List[Record]) -> None:
-    assert (
-        len(
-            list(
-                isd.utils.filter_by_datetime(
-                    records, start=datetime.datetime(2021, 1, 1, 3, 30)
-                )
-            )
-        )
-        == 490
-    )
-    assert (
-        len(
-            list(
-                isd.utils.filter_by_datetime(
-                    records, end=datetime.datetime(2021, 1, 1, 3, 30)
-                )
-            )
-        )
-        == 10
-    )
-    assert (
-        len(
-            list(
-                isd.utils.filter_by_datetime(
-                    records,
-                    start=datetime.datetime(2021, 1, 1, 3, 30),
-                    end=datetime.datetime(2021, 1, 1, 3, 55),
-                )
-            )
-        )
-        == 1
-    )
-    assert (
-        len(
-            list(
-                isd.utils.filter_by_datetime(
-                    records,
-                    start=datetime.datetime(2021, 1, 1, 3, 30),
-                    end=datetime.datetime(2021, 1, 1, 3, 56),
-                )
-            )
-        )
-        == 2
-    )