From 0bc9162c16ea64c97c5023c24b921e1fc80e52c3 Mon Sep 17 00:00:00 2001 From: Benjamin Gutzmann Date: Mon, 8 Jan 2024 19:04:53 +0100 Subject: [PATCH] Restructure library into record and batch --- CHANGELOG.md | 6 ++ README.md | 12 +++- docs/api/mod.rst | 2 +- docs/api/pandas.rst | 5 -- src/isd/__init__.py | 3 +- src/isd/batch.py | 87 +++++++++++++++++++++++ src/isd/cli.py | 18 ++--- src/isd/io.py | 23 +----- src/isd/pandas.py | 165 ------------------------------------------- src/isd/record.py | 145 +++++++++++++++++++++++++------------ src/isd/utils.py | 18 ----- tests/__init__.py | 0 tests/conftest.py | 12 +++- tests/test_batch.py | 64 +++++++++++++++++ tests/test_io.py | 15 ---- tests/test_pandas.py | 8 --- tests/test_record.py | 51 +++++++++++++ tests/test_utils.py | 52 -------------- 18 files changed, 340 insertions(+), 346 deletions(-) delete mode 100644 docs/api/pandas.rst create mode 100644 src/isd/batch.py delete mode 100644 src/isd/pandas.py delete mode 100644 src/isd/utils.py create mode 100644 tests/__init__.py create mode 100644 tests/test_batch.py delete mode 100644 tests/test_pandas.py delete mode 100644 tests/test_utils.py diff --git a/CHANGELOG.md b/CHANGELOG.md index c5d8c39..2d5a6c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- Restructure library into record and batch + ## [0.2.1] - 2023-12-27 ### Fixed diff --git a/README.md b/README.md index b398e0c..c1868f3 100644 --- a/README.md +++ b/README.md @@ -19,11 +19,21 @@ There is a simple command line interface. The `isd record` command prints a single record in JSON format: ```shell -isd record 720538-00164-2021 +isd record tests/data/720538-00164-2021 ``` The Python API allows reading compressed and uncompressed ISD files: +```python +from isd import Batch + +batch = Batch.from_path("isd-file") +for record in batch: + print(record) +``` + +Streaming is also supported: + ```python import isd.io diff --git a/docs/api/mod.rst b/docs/api/mod.rst index 3ead6d9..9e5664b 100644 --- a/docs/api/mod.rst +++ b/docs/api/mod.rst @@ -9,8 +9,8 @@ Most useful functions and classes are contained in submodules. errors io - pandas record + batch isd --- diff --git a/docs/api/pandas.rst b/docs/api/pandas.rst deleted file mode 100644 index 9a6db0d..0000000 --- a/docs/api/pandas.rst +++ /dev/null @@ -1,5 +0,0 @@ -isd.pandas -========== - -.. automodule:: isd.pandas - :members: diff --git a/src/isd/__init__.py b/src/isd/__init__.py index c960531..d8ed7f1 100644 --- a/src/isd/__init__.py +++ b/src/isd/__init__.py @@ -1,4 +1,5 @@ from isd.errors import IsdError +from isd.batch import Batch from isd.record import Record -__all__ = ["IsdError", "Record"] +__all__ = ["IsdError", "Batch", "Record"] diff --git a/src/isd/batch.py b/src/isd/batch.py new file mode 100644 index 0000000..b4da2ad --- /dev/null +++ b/src/isd/batch.py @@ -0,0 +1,87 @@ +import gzip +import json +from io import BytesIO +from pathlib import Path +from dataclasses import dataclass +from typing import List, Union, Optional, Dict, Any, Iterator +import datetime as dt + +from isd.record import Record + +import pandas as pd + + +@dataclass +class Batch: + records: List[Record] + + def __len__(self) -> int: + return len(self.records) + + def __getitem__(self, index: int) -> Record: + return self.records[index] + + def __iter__(self) -> Iterator[Record]: + return iter(self.records) + + @classmethod + def parse(cls, lines: Union[str, BytesIO]) -> "Batch": + """Reads records from a text io stream.""" + if isinstance(lines, BytesIO): + lines = lines.read().decode("utf-8") + return cls([Record.parse(line) for line in lines.splitlines()]) + + @classmethod + def from_path(cls, path: Union[str, Path]) -> "Batch": + """Opens a local ISD file and returns an iterator over its records. + + If the path has a .gz extension, this function will assume it has gzip + compression and will attempt to open it using `gzip.open`. + """ + path = Path(path) + if path.suffix == ".gz": + with gzip.open(path) as gzip_file: + return cls( + [Record.parse(gzip_line.decode("utf-8")) for gzip_line in gzip_file] + ) + else: + with open(path) as uncompressed_file: + return cls( + [ + Record.parse(uncompressed_line) + for uncompressed_line in uncompressed_file + ] + ) + + def filter_by_datetime( + self, + start_date: Optional[dt.datetime] = None, + end_date: Optional[dt.datetime] = None, + ) -> "Batch": + """Returns an iterator over records filtered by start and end datetimes (both optional).""" + return Batch( + [ + record + for record in self.records + if (not start_date or record.datetime() >= start_date) + and (not end_date or record.datetime() < end_date) + ] + ) + + def to_dict(self) -> List[Dict[str, Any]]: + """Returns a list of dictionaries, one for each record.""" + return [record.to_dict() for record in self.records] + + def to_json(self, indent: int = 4) -> str: + """Returns a JSON line of all records.""" + data = [] + for d in self.to_dict(): + d["datetime"] = d["datetime"].isoformat() + data.append(d) + return json.dumps(data, indent=indent) + + def to_df(self) -> pd.DataFrame: + """Reads a local ISD file into a DataFrame.""" + import pandas as pd + + return pd.DataFrame([record.to_dict() for record in self.records]) diff --git a/src/isd/cli.py b/src/isd/cli.py index 533fd04..70ec655 100644 --- a/src/isd/cli.py +++ b/src/isd/cli.py @@ -1,13 +1,9 @@ # type: ignore -import dataclasses -import itertools -import json - import click from click import ClickException -import isd.io +from isd.batch import Batch @click.group() @@ -20,9 +16,9 @@ def main() -> None: @click.option("-i", "--index", default=0) def record(infile: str, index: int) -> None: """Prints a single record to standard output in JSON format.""" - with isd.io.open(infile) as records: - record = next(itertools.islice(records, index, None), None) - if record: - print(json.dumps(dataclasses.asdict(record), indent=4)) - else: - raise ClickException(f"No record with index {index}") + batch = Batch.from_path(infile) + try: + record_ = batch[index] + print(record_.to_json()) + except IndexError: + raise ClickException(f"No record with index {index}") diff --git a/src/isd/io.py b/src/isd/io.py index f35621f..afbebee 100644 --- a/src/isd/io.py +++ b/src/isd/io.py @@ -1,12 +1,9 @@ -import datetime import gzip import os.path from contextlib import contextmanager -from typing import Generator, Iterable, Iterator, Optional, TextIO +from typing import Generator, Iterable -from pandas import DataFrame -from . import pandas as isd_pandas from .record import Record builtin_open = open @@ -28,21 +25,3 @@ def open(path: str) -> Generator[Iterable[Record], None, None]: Record.parse(uncompressed_line) for uncompressed_line in uncompressed_file ) - - -def from_text_io(text_io: TextIO) -> Iterator[Record]: - """Reads records from a text io stream.""" - while True: - line = text_io.readline() - if not line: - break - else: - yield Record.parse(line) - - -def read_to_data_frame( - path: str, since: Optional[datetime.datetime] = None -) -> DataFrame: - """Reads a local ISD file into a DataFrame.""" - with open(path) as file: - return isd_pandas.data_frame(file, since=since) diff --git a/src/isd/pandas.py b/src/isd/pandas.py deleted file mode 100644 index a4b28ec..0000000 --- a/src/isd/pandas.py +++ /dev/null @@ -1,165 +0,0 @@ -import datetime -from typing import Iterable, Optional - -import pandas -from pandas import CategoricalDtype, DataFrame - -from isd import Record - -DataSourceDtype = CategoricalDtype( - [ - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I", - "J", - "K", - "L", - "M", - "N", - "O", - ] -) -ReportTypeDtype = CategoricalDtype( - [ - "AERO", - "AUST", - "AUTO", - "BOGUS", - "BRAZ", - "COOPD", - "COOPS", - "CRB", - "CRN05", - "CRN15", - "FM-12", - "FM-13", - "FM-14", - "FM-15", - "FM-16", - "FM-18", - "GREEN", - "MESOH", - "MESOS", - "MESOW", - "MEXIC", - "NSRDB", - "PCP15", - "PCP60", - "S-S-A", - "SA-AU", - "SAO", - "SAOSP", - "SHEF", - "SMARS", - "SOD", - "SOM", - "SURF", - "SY-AE", - "SY-AU", - "SY-MT", - "SY-SA", - "WBO", - "WNO", - ] -) -QualityControlProcessDtype = CategoricalDtype(["V01", "V02", "V03"]) -QualityCodeDtype = CategoricalDtype( - [ - "0", - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "9", - "A", - "U", - "P", - "I", - "M", - "C", - "R", - ] -) -WindObservationTypeDtype = CategoricalDtype( - ["A", "B", "C", "H", "N", "R", "Q", "T", "V"] -) -CeilingDeterminationCodeDtype = CategoricalDtype( - ["A", "B", "C", "D", "E", "M", "P", "R", "S", "U", "V", "W"] -) -CavokCodeDtype = CategoricalDtype(["N", "Y"]) -VisibilityVariabilityCodeDtype = CategoricalDtype(["N", "V"]) - - -def data_frame( - records: Iterable[Record], since: Optional[datetime.datetime] = None -) -> DataFrame: - """Constructs a pandas data frame from an iterable of Records. - - Uses appropriate datatypes and categorical variables. - """ - data_frame = DataFrame(records).astype( - { - "usaf_id": "string", - "ncei_id": "string", - "year": "UInt16", - "month": "UInt8", - "day": "UInt8", - "hour": "UInt8", - "minute": "UInt8", - "data_source": DataSourceDtype, - "latitude": "float", - "longitude": "float", - "report_type": ReportTypeDtype, - "elevation": "Int16", - "call_letters": "string", - "quality_control_process": QualityControlProcessDtype, - "wind_direction": "UInt16", - "wind_direction_quality_code": QualityCodeDtype, - "wind_observation_type": WindObservationTypeDtype, - "wind_speed": "float", - "wind_speed_quality_code": QualityCodeDtype, - "ceiling": "float", - "ceiling_quality_code": QualityCodeDtype, - "ceiling_determination_code": CeilingDeterminationCodeDtype, - "cavok_code": CavokCodeDtype, - "visibility": "UInt32", - "visibility_quality_code": QualityCodeDtype, - "visibility_variability_code": VisibilityVariabilityCodeDtype, - "visibility_variability_quality_code": QualityCodeDtype, - "air_temperature": "float", - "air_temperature_quality_code": QualityCodeDtype, - "dew_point_temperature": "float", - "dew_point_temperature_quality_code": QualityCodeDtype, - "sea_level_pressure": "float", - "sea_level_pressure_quality_code": QualityCodeDtype, - "additional_data": "string", - "remarks": "string", - "element_quality_data": "string", - "original_observation_data": "string", - } - ) - timestamp = pandas.to_datetime( - data_frame[["year", "month", "day", "hour", "minute"]] - ) - data_frame["timestamp"] = timestamp - if since: - return data_frame[data_frame["timestamp"] > since] - else: - return data_frame diff --git a/src/isd/record.py b/src/isd/record.py index e0cc94c..bb4b51a 100644 --- a/src/isd/record.py +++ b/src/isd/record.py @@ -1,6 +1,7 @@ import datetime +import json from dataclasses import dataclass -from typing import Any, Callable, List, Optional, Tuple +from typing import Any, Callable, List, Optional, Tuple, Dict from isd.errors import IsdError @@ -64,37 +65,41 @@ def parse(cls, line: str) -> "Record": minute = int(line[25:27]) data_source = line[27] # TODO test missing latitudes and longitudes - latitude = optional(line[28:34], "+99999", lambda s: float(s) / 1000) - longitude = optional(line[34:41], "+999999", lambda s: float(s) / 1000) - report_type = optional(line[41:46], "99999") - elevation = optional(line[46:51], "+9999", lambda s: float(s)) - call_letters = optional(line[51:56], "99999") + latitude = cls.optional(line[28:34], "+99999", lambda s: float(s) / 1000) + longitude = cls.optional(line[34:41], "+999999", lambda s: float(s) / 1000) + report_type = cls.optional(line[41:46], "99999") + elevation = cls.optional(line[46:51], "+9999", lambda s: float(s)) + call_letters = cls.optional(line[51:56], "99999") quality_control_process = line[56:60] - wind_direction = optional(line[60:63], "999", lambda s: int(s)) + wind_direction = cls.optional(line[60:63], "999", lambda s: int(s)) wind_direction_quality_code = line[63] - wind_observation_type = optional(line[64], "9") - wind_speed = optional(line[65:69], "9999", lambda s: float(s) / 10) + wind_observation_type = cls.optional(line[64], "9") + wind_speed = cls.optional(line[65:69], "9999", lambda s: float(s) / 10) wind_speed_quality_code = line[69] - ceiling = optional(line[70:75], "99999", lambda s: int(s)) + ceiling = cls.optional(line[70:75], "99999", lambda s: int(s)) ceiling_quality_code = line[75] - ceiling_determination_code = optional(line[76], "9") - cavok_code = optional(line[77], "9") - visibility = optional(line[78:84], "999999", lambda s: int(s)) + ceiling_determination_code = cls.optional(line[76], "9") + cavok_code = cls.optional(line[77], "9") + visibility = cls.optional(line[78:84], "999999", lambda s: int(s)) visibility_quality_code = line[84] - visibility_variability_code = optional(line[85], "9") + visibility_variability_code = cls.optional(line[85], "9") visibility_variability_quality_code = line[86] - air_temperature = optional(line[87:92], "+9999", lambda s: float(s) / 10) + air_temperature = cls.optional(line[87:92], "+9999", lambda s: float(s) / 10) air_temperature_quality_code = line[92] - dew_point_temperature = optional(line[93:98], "+9999", lambda s: float(s) / 10) + dew_point_temperature = cls.optional( + line[93:98], "+9999", lambda s: float(s) / 10 + ) dew_point_temperature_quality_code = line[98] - sea_level_pressure = optional(line[99:104], "99999", lambda s: float(s) / 10) + sea_level_pressure = cls.optional( + line[99:104], "99999", lambda s: float(s) / 10 + ) sea_level_pressure_quality_code = line[104] - additional_data, remainder = extract_data( + additional_data, remainder = cls._extract_data( line[105:], "ADD", ["REM", "EQD", "QNN"] ) - remarks, remainder = extract_data(remainder, "REM", ["EQD", "QNN"]) - element_quality_data, remainder = extract_data(remainder, "EQD", ["QNN"]) - original_observation_data, remainder = extract_data(remainder, "QNN", []) + remarks, remainder = cls._extract_data(remainder, "REM", ["EQD", "QNN"]) + element_quality_data, remainder = cls._extract_data(remainder, "EQD", ["QNN"]) + original_observation_data, remainder = cls._extract_data(remainder, "QNN", []) assert not remainder return cls( @@ -143,32 +148,80 @@ def datetime(self) -> datetime.datetime: self.year, self.month, self.day, self.hour, self.minute ) + @staticmethod + def _extract_data(message: str, tag: str, later_tags: List[str]) -> Tuple[str, str]: + if message.startswith(tag): + index = None + for other_tag in later_tags: + try: + index = message.find(other_tag) + except ValueError: + continue + break + if index != -1: + data = message[len(tag) : index] + tail = message[index:] + return data, tail + else: + return message[len(tag) :], "" + else: + return "", message -def extract_data(message: str, tag: str, later_tags: List[str]) -> Tuple[str, str]: - if message.startswith(tag): - index = None - for other_tag in later_tags: - try: - index = message.find(other_tag) - except ValueError: - continue - break - if index != -1: - data = message[len(tag) : index] - tail = message[index:] - return data, tail + @staticmethod + def optional( + string: str, + missing_value: str, + transform: Optional[Callable[[str], Any]] = None, + ) -> Any: + if string == missing_value: + return None + elif transform: + return transform(string) else: - return message[len(tag) :], "" - else: - return "", message + return string + def to_dict(self) -> Dict[str, Any]: + """Returns a dictionary representation of this record.""" + return { + "usaf_id": self.usaf_id, + "ncei_id": self.ncei_id, + # use datetime instead of year, month, day, hour, minute + "datetime": self.datetime(), + "data_source": self.data_source, + "latitude": self.latitude, + "longitude": self.longitude, + "report_type": self.report_type, + "elevation": self.elevation, + "call_letters": self.call_letters, + "quality_control_process": self.quality_control_process, + "wind_direction": self.wind_direction, + "wind_direction_quality_code": self.wind_direction_quality_code, + "wind_observation_type": self.wind_observation_type, + "wind_speed": self.wind_speed, + "wind_speed_quality_code": self.wind_speed_quality_code, + "ceiling": self.ceiling, + "ceiling_quality_code": self.ceiling_quality_code, + "ceiling_determination_code": self.ceiling_determination_code, + "cavok_code": self.cavok_code, + "visibility": self.visibility, + "visibility_quality_code": self.visibility_quality_code, + "visibility_variability_code": self.visibility_variability_code, + "visibility_variability_quality_code": self.visibility_variability_quality_code, + "air_temperature": self.air_temperature, + "air_temperature_quality_code": self.air_temperature_quality_code, + "dew_point_temperature": self.dew_point_temperature, + "dew_point_temperature_quality_code": self.dew_point_temperature_quality_code, + "sea_level_pressure": self.sea_level_pressure, + "sea_level_pressure_quality_code": self.sea_level_pressure_quality_code, + "additional_data": self.additional_data, + "remarks": self.remarks, + "element_quality_data": self.element_quality_data, + "original_observation_data": self.original_observation_data, + } -def optional( - string: str, missing_value: str, transform: Optional[Callable[[str], Any]] = None -) -> Any: - if string == missing_value: - return None - elif transform: - return transform(string) - else: - return string + def to_json(self, indent: int = 4) -> str: + """Returns a JSON representation of this record.""" + data = self.to_dict() + # use isoformat instead of datetime + data["datetime"] = data["datetime"].isoformat() + return json.dumps(data, indent=indent) diff --git a/src/isd/utils.py b/src/isd/utils.py deleted file mode 100644 index 849009f..0000000 --- a/src/isd/utils.py +++ /dev/null @@ -1,18 +0,0 @@ -import datetime -from typing import Iterable, Iterator, Optional - -from isd.record import Record - - -def filter_by_datetime( - records: Iterable[Record], - start: Optional[datetime.datetime] = None, - end: Optional[datetime.datetime] = None, -) -> Iterator[Record]: - """Returns an iterator over records filtered by start and end datetimes (both optional).""" - return ( - record - for record in records - if (not start or record.datetime() >= start) - and (not end or record.datetime() < end) - ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py index e660335..37f3084 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,7 @@ import pytest -from isd import Record +from isd import Record, Batch BARDUFOSS_FILE_NAME = "010230-99999-2021" VANCE_BRAND_FILE_NAME = "720538-00164-2021" @@ -32,11 +32,21 @@ def record_line() -> str: return line +@pytest.fixture +def record(record_line: str) -> Record: + return Record.parse(record_line) + + @pytest.fixture def uncompressed_path() -> str: return data_file_path(VANCE_BRAND_FILE_NAME) +@pytest.fixture +def batch(uncompressed_path: str) -> Batch: + return Batch.from_path(uncompressed_path) + + @pytest.fixture def compressed_path() -> str: return data_file_path(VANCE_BRAND_COMPRESSED_FILE_NAME) diff --git a/tests/test_batch.py b/tests/test_batch.py new file mode 100644 index 0000000..f924a1a --- /dev/null +++ b/tests/test_batch.py @@ -0,0 +1,64 @@ +import datetime as dt +import json + +from isd import Batch + + +def test_batch_from_uncompressed(uncompressed_path: str) -> None: + batch = Batch.from_path(uncompressed_path) + assert len(batch) == 500 + + +def test_batch_from_compressed(compressed_path: str) -> None: + batch = Batch.from_path(compressed_path) + assert len(batch) == 24252 + + +def test_batch_from_string(uncompressed_path: str) -> None: + with open(uncompressed_path) as file: + batch = Batch.parse(file.read()) + assert len(batch) == 500 + + +def test_batch_filter_by_datetime(batch: Batch) -> None: + batch_filtered = batch.filter_by_datetime(start_date=dt.datetime(2021, 1, 1, 3, 30)) + assert len(batch_filtered) == 490 + + batch_filtered = batch.filter_by_datetime(end_date=dt.datetime(2021, 1, 1, 3, 30)) + assert len(batch_filtered) == 10 + + batch_filtered = batch.filter_by_datetime( + start_date=dt.datetime(2021, 1, 1, 3, 30), + end_date=dt.datetime(2021, 1, 1, 3, 55), + ) + assert len(batch_filtered) == 1 + + batch_filtered = batch.filter_by_datetime( + start_date=dt.datetime(2021, 1, 1, 3, 30), + end_date=dt.datetime(2021, 1, 1, 3, 56), + ) + assert len(batch_filtered) == 2 + + +def test_batch_to_dict(batch: Batch) -> None: + first = batch.to_dict()[0] + assert first["usaf_id"] == "720538" + assert first["ncei_id"] == "00164" + assert first["datetime"] == dt.datetime(2021, 1, 1, 0, 15) + + +def test_batch_to_json(batch: Batch) -> None: + json_string = batch.to_json() + data = json.loads(json_string) + assert len(data) == 500 + first = data[0] + assert first["usaf_id"] == "720538" + assert first["ncei_id"] == "00164" + assert first["datetime"] == "2021-01-01T00:15:00" + + +def test_batch_to_df(batch: Batch) -> None: + datetime_min = dt.datetime(2021, 1, 5) + df = batch.to_df() + df = df[df["datetime"] >= datetime_min] + assert len(df) == 212 diff --git a/tests/test_io.py b/tests/test_io.py index ed44c24..373165b 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,5 +1,3 @@ -import datetime - import isd.io @@ -13,16 +11,3 @@ def test_open_compressed(compressed_path: str) -> None: with isd.io.open(compressed_path) as generator: records = list(generator) assert len(records) == 24252 - - -def test_read_to_data_frame_since(uncompressed_path: str) -> None: - data_frame = isd.io.read_to_data_frame( - uncompressed_path, since=datetime.datetime(2021, 1, 5) - ) - assert len(data_frame) == 212 - - -def test_from_text_io(uncompressed_path: str) -> None: - with open(uncompressed_path) as file: - records = list(isd.io.from_text_io(file)) - assert len(records) == 500 diff --git a/tests/test_pandas.py b/tests/test_pandas.py deleted file mode 100644 index a294581..0000000 --- a/tests/test_pandas.py +++ /dev/null @@ -1,8 +0,0 @@ -from typing import List - -import isd.pandas -from isd import Record - - -def test_data_frame(records: List[Record]) -> None: - isd.pandas.data_frame(records) diff --git a/tests/test_record.py b/tests/test_record.py index b7f0826..2fb6bd9 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -1,3 +1,6 @@ +import datetime +import json + import pytest from isd import IsdError, Record @@ -52,3 +55,51 @@ def test_parse(record_line: str) -> None: def test_line_too_short() -> None: with pytest.raises(IsdError): Record.parse("") + + +def test_record_to_dict(record: Record) -> None: + assert record.to_dict() == { + "usaf_id": "720538", + "ncei_id": "00164", + "datetime": datetime.datetime(2021, 1, 1, 0, 15), + "data_source": "4", + "latitude": 40.167, + "longitude": -105.167, + "report_type": "FM-15", + "elevation": 1541, + "call_letters": None, + "quality_control_process": "V020", + "wind_direction": None, + "wind_direction_quality_code": "9", + "wind_observation_type": "C", + "wind_speed": 0, + "wind_speed_quality_code": "1", + "ceiling": 3353, + "ceiling_quality_code": "1", + "ceiling_determination_code": None, + "cavok_code": "N", + "visibility": 16093, + "visibility_quality_code": "1", + "visibility_variability_code": None, + "visibility_variability_quality_code": "9", + "air_temperature": 3.1, + "air_temperature_quality_code": "1", + "dew_point_temperature": -5.8, + "dew_point_temperature_quality_code": "1", + "sea_level_pressure": None, + "sea_level_pressure_quality_code": "9", + "additional_data": "GD14991+0335399GE19MSL +99999+" + "99999GF199999999999033531999999MA1101561999999", + "remarks": "MET075METAR KLMO 010015Z AUTO 00000KT " + "10SM OVC110 03/M06 A2999 RMK AO2 T00311058=", + "element_quality_data": "", + "original_observation_data": "", + } + + +def test_record_to_json(record: Record) -> None: + json_string = record.to_json() + data = json.loads(json_string) + assert data["usaf_id"] == "720538" + assert data["ncei_id"] == "00164" + assert data["datetime"] == "2021-01-01T00:15:00" diff --git a/tests/test_utils.py b/tests/test_utils.py deleted file mode 100644 index 169e034..0000000 --- a/tests/test_utils.py +++ /dev/null @@ -1,52 +0,0 @@ -import datetime -from typing import List - -import isd.utils -from isd.record import Record - - -def test_filter_by_datetime(records: List[Record]) -> None: - assert ( - len( - list( - isd.utils.filter_by_datetime( - records, start=datetime.datetime(2021, 1, 1, 3, 30) - ) - ) - ) - == 490 - ) - assert ( - len( - list( - isd.utils.filter_by_datetime( - records, end=datetime.datetime(2021, 1, 1, 3, 30) - ) - ) - ) - == 10 - ) - assert ( - len( - list( - isd.utils.filter_by_datetime( - records, - start=datetime.datetime(2021, 1, 1, 3, 30), - end=datetime.datetime(2021, 1, 1, 3, 55), - ) - ) - ) - == 1 - ) - assert ( - len( - list( - isd.utils.filter_by_datetime( - records, - start=datetime.datetime(2021, 1, 1, 3, 30), - end=datetime.datetime(2021, 1, 1, 3, 56), - ) - ) - ) - == 2 - )