Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
9b320e6
adds plotly dependency
jtec Jul 8, 2024
ac991e0
Moves benchamrk + beginning of pandas parsing over from georinex branch
jtec Jul 8, 2024
c1e15b9
extracts obs types for each constellation
jtec Jul 9, 2024
fb5011c
experimenting with regex
jtec Jul 9, 2024
13d6623
make observation type parsing more succinct
jtec Jul 10, 2024
2e9b8fd
adds test comparing parser output to georinex
jtec Jul 10, 2024
788f05c
re-organise
jtec Jul 10, 2024
c5c56a8
fix
jtec Jul 10, 2024
17962f7
ruff
jtec Jul 11, 2024
3be6044
missing files
jtec Jul 11, 2024
406794e
add new parser, don't use it yet
jtec Jul 11, 2024
81d16ea
fixes missing obs w.r.t. georinex
jtec Jul 12, 2024
9b5fb22
make it faster
jtec Jul 13, 2024
54e6d57
use delimiter insertion
jtec Jul 13, 2024
79374eb
use prx obs parser by default
jtec Jul 16, 2024
77bd9fe
oups
jtec Jul 16, 2024
5ab7b4b
switching branches
jtec Jul 18, 2024
f6dbab3
a start
jtec Jul 18, 2024
3dd4304
added dumb check to test rinex obs parsing
plutonheaven Jul 24, 2024
3a0a0f4
ruff
plutonheaven Jul 24, 2024
8e2f232
format
jtec Jul 28, 2024
fed0a85
getting there
jtec Jul 28, 2024
e890b75
Fixes nav file download
jtec Jul 31, 2024
97457a9
silence linter
jtec Jul 31, 2024
dfa1c98
lint
jtec Jul 31, 2024
30be5b0
Adds processing time to metadata
jtec Jul 31, 2024
84ad84e
Merge remote-tracking branch 'origin/main' into enable_fast_obs_parsing
jtec Jul 31, 2024
ff3d4b8
Merge remote-tracking branch 'origin/enable_fast_obs_parsing' into en…
jtec Jul 31, 2024
6b2e0cb
Merge branch 'enable_fast_obs_parsing' into improve_ephemeris_download
jtec Jul 31, 2024
aa20a10
Merge remote-tracking branch 'origin/main' into enable_fast_obs_parsing
jtec Jul 31, 2024
c3a384f
Merge remote-tracking branch 'origin/enable_fast_obs_parsing' into im…
jtec Jul 31, 2024
43cada8
fix
jtec Jul 31, 2024
b727a22
possible fix for concurrently runnign tests
jtec Jul 31, 2024
bca3b89
do not connect to IGS server if nav files provided by user
jtec Jul 31, 2024
37b8b4a
Merge remote-tracking branch 'origin/main' into improve_ephemeris_dow…
jtec Aug 2, 2024
4cd4707
Improves comment wording.
jtec Aug 2, 2024
bd55c56
fixes
jtec Aug 2, 2024
83b4a1c
oups
jtec Aug 2, 2024
f422c69
Presence of URA column depends on constellations present.
jtec Aug 2, 2024
9501ade
Adds files needed by tests
jtec Aug 2, 2024
13a409f
SDs done
jtec Aug 11, 2024
6f1cdc5
fix
jtec Aug 11, 2024
1e1de42
fix
jtec Aug 25, 2024
2f33fc4
basic setup
jtec Aug 25, 2024
ac305ac
bop
jtec Aug 25, 2024
4114768
setup
jtec Aug 25, 2024
a9b07b0
halfway there
jtec Aug 25, 2024
a994f8a
little progress
jtec Sep 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added documents/GFZRNX_Users_Guide.pdf
Binary file not shown.
214 changes: 213 additions & 1 deletion poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ setuptools = "^70.0.0"
viztracer = "^0.16.3"
plotly = "^5.22.0"
line-profiler = "^4.1.3"
requests = "^2.32.3"
numba = "^0.60.0"


[build-system]
Expand Down
2 changes: 1 addition & 1 deletion src/demos/main_gsdc2021.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
@helpers.timeit
def process_gsdc2021_dataset(path_dataset: Path):
"""
This script creates a local copy of gunzipped RINEX files from the GSDC2021 data set and process them with prx
This script creates a local copy of gunzipped RINEX files from the GSDC2021 data set and processes them with prx

The filepath to the original GSDC2021 folder is specified in `path_dataset`.
This has to be downloaded from the Kaggle website (requires login):
Expand Down
Binary file not shown.
Binary file not shown.
45 changes: 45 additions & 0 deletions src/demos/rover_base_differencing/rover_base_differencing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from pathlib import Path
from prx import helpers
from prx.converters import anything_to_rinex_3
from prx.user import parse_prx_csv_file
from prx.main import process as prx_process

log = helpers.get_logger(__name__)


def parse(obs_file: Path):
rnx_file = anything_to_rinex_3(obs_file)
assert rnx_file, f"Failed to convert {obs_file} to RINEX 3"
if not rnx_file.exists():
prx_process(rnx_file)
df, metadata = parse_prx_csv_file(rnx_file.with_suffix(".csv"))
# Receiver clocks are off by at most a few tens of milliseconds, so let's round for matching rover-base obs
df["time_of_reception_rounded"] = df.time_of_reception_in_receiver_time.dt.round(
"50ms"
)
df["sv"] = df["constellation"].astype(str) + df["prn"].astype(str).str.pad(
2, fillchar="0"
)
return df


# Provides and example of how to compute between-receivers single
# differences and double differences after processing obs files with prx.
def main():
# We'll use two IGS stations that are just roughly a kilometer apart
df_rover, df_base = (
parse(Path(__file__).parent / "obs/TLSE00FRA_R_20241900000_15M_01S_MO.crx.gz"),
parse(Path(__file__).parent / "obs/TLSG00FRA_R_20241900000_15M_01S_MO.crx.gz"),
)
df = df_rover.merge(
df_base,
on=["time_of_reception_rounded", "sv", "rnx_obs_identifier"],
suffixes=("_rover", "_base"),
).reset_index()
df["C_obs_m_sd"] = df.C_obs_m_rover - df.C_obs_m_base

print(df.info())


if __name__ == "__main__":
main()
71 changes: 11 additions & 60 deletions src/prx/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from pathlib import Path
import logging

from prx.util import is_rinex_3_obs_file, is_rinex_3_nav_file
from prx.rinex_obs.parser import parse as prx_obs_parse
from prx import rinex_nav as rinex_nav
import xarray
from imohash import imohash
from prx import constants
Expand All @@ -14,7 +14,6 @@
import subprocess
import math
import joblib
import georinex
import os
from astropy.utils import iers
from astropy import time as astrotime
Expand Down Expand Up @@ -73,8 +72,8 @@ def hash_of_file_content(file: Path, use_sampling: bool = False):
hash_time = pd.Timestamp.now() - t0
if hash_time > pd.Timedelta(seconds=1):
log.info(
f"Hashing file content took {hash_time}, we might want want to think about partially hashing the"
f" file, e.g. using https://github.com/kalafut/py-imohash"
f"Hashing file content took {hash_time}, you might want want to think about partially hashing the"
f" file by passing sampling=True to this function."
)

return hash_string
Expand Down Expand Up @@ -105,6 +104,14 @@ def timestamp_2_timedelta(timestamp: pd.Timestamp, time_scale):
assert False, f"Time scale {time_scale} not supported."


def timestamp_to_mid_day(ts):
return (
pd.Timestamp(year=ts.year, month=1, day=1)
+ pd.Timedelta(days=ts.day_of_year - 1)
+ pd.Timedelta(hours=12)
)


def timedelta_2_weeks_and_seconds(time_delta: pd.Timedelta):
if pd.isnull(time_delta):
return np.nan, np.nan
Expand Down Expand Up @@ -394,16 +401,6 @@ def obs_dataset_to_obs_dataframe(ds: xarray.Dataset):
return flat_obs


def parse_rinex_file(rinex_file_path: Path):
if is_rinex_3_obs_file(rinex_file_path):
return parse_rinex_obs_file(rinex_file_path)
elif is_rinex_3_nav_file(rinex_file_path):
return parse_rinex_nav_file(rinex_file_path)
assert (
False
), f"File {rinex_file_path} appears to be neither RINEX 3 OBS nor NAV file."


@timeit
def parse_rinex_obs_file(rinex_file_path: Path):
@disk_cache.cache(ignore=["rinex_file"])
Expand All @@ -417,52 +414,6 @@ def parse_rinex_obs_file_cached(rinex_file: Path, file_hash: str):
)


@timeit
def parse_rinex_nav_file(rinex_file_path: Path):
@disk_cache.cache(ignore=["rinex_file"])
def parse_rinex_nav_file_cached(rinex_file: Path, file_hash: str):
log.info(f"Parsing {rinex_file} (hash {file_hash}) ...")
repair_with_gfzrnx(rinex_file)
return georinex.load(rinex_file)

return parse_rinex_nav_file_cached(
rinex_file_path, hash_of_file_content(rinex_file_path)
)


def get_gpst_utc_leap_seconds(rinex_file: Path):
leap_seconds_astropy = compute_gps_utc_leap_seconds(
yyyy=int(rinex_file.name[12:16]), doy=int(rinex_file.name[16:19])
)

# sanity check if leap second information is in the header of the RNX NAV file
# compare astropy leap and RNX NAV leap seconds
header = georinex.rinexheader(rinex_file)
if "LEAP SECONDS" in header:
ls_before = header["LEAP SECONDS"][0:6].strip()
assert (
0 < len(ls_before) < 3
), f"Unexpected leap seconds {ls_before} in {rinex_file}"

ls_after = header["LEAP SECONDS"][6:12].strip()
if ls_after == "":
return int(ls_before)
assert (
0 < len(ls_after) < 3
), f"Unexpected leap seconds {ls_after} in {rinex_file}"

assert (
ls_after == ls_before
), f"Leap second change announcement in {rinex_file}, this case is not tested, aborting."

leap_seconds_rnx = int(ls_before)
assert (
leap_seconds_rnx == leap_seconds_astropy
), "leap second computed from astropy is different from RINEX NAV header"

return leap_seconds_astropy


def is_sorted(iterable):
return all(iterable[i] <= iterable[i + 1] for i in range(len(iterable) - 1))

Expand Down
83 changes: 43 additions & 40 deletions src/prx/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
import git

from prx import atmospheric_corrections as atmo
from prx.helpers import parse_rinex_file
from prx.helpers import parse_rinex_obs_file
from prx.rinex_nav import nav_file_discovery
from prx import constants, helpers, converters, user
from prx.rinex_nav import evaluate as rinex_evaluate
from prx.rinex_nav.evaluate import parse_rinex_nav_file

log = helpers.get_logger(__name__)

Expand All @@ -27,7 +28,9 @@ def write_prx_file(
output_writers = {"jsonseq": write_json_text_sequence_file, "csv": write_csv_file}
if output_format not in output_writers.keys():
assert False, f"Output format {output_format} not supported, we can do {list(output_writers.keys())}"
output_writers[output_format](prx_header, prx_records, file_name_without_extension)
return output_writers[output_format](
prx_header, prx_records, file_name_without_extension
)


def write_json_text_sequence_file(
Expand Down Expand Up @@ -78,9 +81,6 @@ def write_csv_file(
output_file = Path(
f"{str(file_name_without_extension)}.{constants.cPrxCsvFileExtension}"
)
# write header
with open(output_file, "w", encoding="utf-8") as file:
file.write(f"# {json.dumps(prx_header)}\n")
flat_records["sat_elevation_deg"] = np.rad2deg(
flat_records.elevation_rad.to_numpy()
)
Expand Down Expand Up @@ -134,6 +134,15 @@ def write_csv_file(
)
# Keep only records with valid sat states
records = records[records.sat_clock_offset_m.notna()]
# write header
prx_header["processing_time"] = str(
pd.Timestamp.now() - prx_header["processing_start_time"]
)
prx_header["processing_start_time"] = prx_header["processing_start_time"].strftime(
"%Y-%m-%d %H:%M:%S.%f3"
)
with open(output_file, "w", encoding="utf-8") as file:
file.write(f"# {json.dumps(prx_header)}\n")
records.to_csv(
path_or_buf=output_file,
index=False,
Expand All @@ -142,6 +151,7 @@ def write_csv_file(
date_format="%Y-%m-%d %H:%M:%S.%f",
)
log.info(f"Generated CSV prx file: {file}")
return output_file


def build_metadata(input_files):
Expand Down Expand Up @@ -192,8 +202,9 @@ def check_assumptions(
), "Handling of observation files using time scales other than GPST not implemented yet."


def warm_up_parser_cache(rinex_files):
_ = [parse_rinex_file(file) for file in rinex_files]
def warm_up_parser_cache(obs_files, nav_files):
_ = [parse_rinex_obs_file(file) for file in obs_files]
_ = [parse_rinex_nav_file(file) for file in nav_files]


@helpers.timeit
Expand All @@ -202,7 +213,8 @@ def build_records(
rinex_3_ephemerides_files,
approximate_receiver_ecef_position_m,
):
warm_up_parser_cache([rinex_3_obs_file] + rinex_3_ephemerides_files)
warm_up_parser_cache([rinex_3_obs_file], rinex_3_ephemerides_files)

approximate_receiver_ecef_position_m = np.array(
approximate_receiver_ecef_position_m
)
Expand Down Expand Up @@ -283,31 +295,10 @@ def build_records(
},
)

sat_states_per_day = []
for file in rinex_3_ephemerides_files:
# get year and doy from NAV filename
year = int(file.name[12:16])
doy = int(file.name[16:19])
day_query = query.loc[
(
query.query_time_isagpst
>= pd.Timestamp(year=year, month=1, day=1) + pd.Timedelta(days=doy - 1)
)
& (
query.query_time_isagpst
< pd.Timestamp(year=year, month=1, day=1) + pd.Timedelta(days=doy)
)
]
if day_query.empty:
continue
log.info(f"Computing satellite states for {year}-{doy:03d}")
sat_states_per_day.append(
rinex_evaluate.compute_parallel(
file,
day_query,
)
)
sat_states = pd.concat(sat_states_per_day)
sat_states = rinex_evaluate.compute_parallel(
rinex_3_ephemerides_files,
query,
)
sat_states = sat_states.rename(
columns={
"sv": "satellite",
Expand Down Expand Up @@ -405,7 +396,7 @@ def signal_2_carrier_frequency(row):
frequency_slot = row["frequency_slot"]
if np.isnan(row["frequency_slot"]):
return np.nan
# GLONASS satellites with both FDMA and CDMA signals have a frequency slot for FDMA signals,
# GLONASS satellites with both FDMA and CDMA signals have a frequency slot for FDMA signals.
# for CDMA signals we use the common carrier frequency of those signals
if len(carrier_freqs[constellation][frequency]) == 1:
return carrier_freqs[constellation][frequency][1]
Expand All @@ -415,18 +406,28 @@ def signal_2_carrier_frequency(row):
flat_obs.loc[:, "carrier_frequency_hz"] = flat_obs.apply(
signal_2_carrier_frequency, axis=1
)

file_2_to_year_and_day = {}
for file in rinex_3_ephemerides_files:
# get year and doy from NAV filename
nav_content = parse_rinex_nav_file(file)
year = int(nav_content["time"].dt.year.median())
doy = int(nav_content["time"].dt.day_of_year.median())
file_2_to_year_and_day[file] = (year, doy)
assert len(file_2_to_year_and_day.keys()) == len(rinex_3_ephemerides_files)
# create a dictionary containing the headers of the different NAV files.
# The keys are the "YYYYDDD" (year and day of year) and are located at
# [12:19] of the file name using RINEX naming convention
# The keys are the "YYYYDDD" (year and day of year)
nav_header_dict = {
file.name[12:19]: georinex.rinexheader(file)
f"{file_2_to_year_and_day[file][0]:03d}{file_2_to_year_and_day[file][1]:03d}": georinex.rinexheader(
file
)
for file in rinex_3_ephemerides_files
}

for file in rinex_3_ephemerides_files:
# get year and doy from NAV filename
year = int(file.name[12:16])
doy = int(file.name[16:19])
year = file_2_to_year_and_day[file][0]
doy = file_2_to_year_and_day[file][1]

# Selection criteria: time of emission belonging to the day of the current NAV file
mask = (
Expand Down Expand Up @@ -481,6 +482,7 @@ def signal_2_carrier_frequency(row):

@helpers.timeit
def process(observation_file_path: Path, output_format="csv"):
t0 = pd.Timestamp.now()
# We expect a Path, but might get a string here:
observation_file_path = Path(observation_file_path)
log.info(
Expand All @@ -495,13 +497,14 @@ def process(observation_file_path: Path, output_format="csv"):
metadata = build_metadata(
{"obs_file": rinex_3_obs_file, "nav_file": aux_files["broadcast_ephemerides"]}
)
metadata["processing_start_time"] = t0
helpers.repair_with_gfzrnx(rinex_3_obs_file)
records = build_records(
rinex_3_obs_file,
aux_files["broadcast_ephemerides"],
metadata["approximate_receiver_ecef_position_m"],
)
write_prx_file(
return write_prx_file(
metadata,
records,
prx_file,
Expand Down
Loading
Loading