2323
2424try :
2525 import omfiles
26- import omfiles ._numcodecs
2726except ModuleNotFoundError : # pragma: no cover
2827 raise ImportError (
2928 "omfiles is required for kerchunking Open-Meteo files. Please install with "
@@ -183,11 +182,9 @@ def __init__(
183182 inline_threshold = 500 ,
184183 storage_options = None ,
185184 chunk_no = None ,
186- domain = None ,
187- reference_time = None ,
188- time_step = 3600 ,
185+ domain = None
189186 ):
190- # Initialize a reader for your om file
187+ # Initialize a reader for om file
191188 if isinstance (om_file , (pathlib .Path , str )):
192189 fs , path = fsspec .core .url_to_fs (om_file , ** (storage_options or {}))
193190 self .input_file = fs .open (path , "rb" )
@@ -204,7 +201,10 @@ def __init__(
204201 self .inline = inline_threshold
205202 self .store_dict = {}
206203 self .store = dict_to_store (self .store_dict )
207- self .name = "data" # FIXME: This should be the name from om-variable
204+ # FIXME: This should be the name from om-variable, but currently variables don't need to be named in omfiles
205+ # self.name = self.reader.name
206+ # For now, hardcode the name to "data"
207+ self .name = "data"
208208
209209 if domain is not None and chunk_no is not None :
210210 start_step = chunk_number_to_start_time (domain = domain , chunk_no = chunk_no )
@@ -225,15 +225,19 @@ def translate(self):
225225 add_offset = self .reader .add_offset
226226 lut = self .reader .get_complete_lut ()
227227
228- # Get dimension names if available, otherwise use defaults
229- # FIXME: Currently we don't have dimension names exposed by the reader (or even necessarily in the file)
230- dim_names = getattr (self .reader , "dimension_names" , ["x" , "y" , "time" ])
228+ assert len (shape ) == 3 , "Only 3D arrays are currently supported"
229+ assert len (chunks ) == 3 , "Only 3D arrays are currently supported"
230+
231+ # FIXME: Currently we don't have a real convention how to store dimension names in om files
232+ # It can be easily achieved via the hierarchical structure, but just not finalized yet.
233+ # For now, just hardcode dimension values
234+ dim_names = ["x" , "y" , "time" ]
231235
232236 # Calculate number of chunks in each dimension
233237 chunks_per_dim = [math .ceil (s / c ) for s , c in zip (shape , chunks )]
234238
235239 # 2. Create Zarr array metadata (.zarray)
236- blocksize = chunks [0 ] * chunks [1 ] * chunks [2 ] if len ( chunks ) >= 3 else chunks [ 0 ] * chunks [ 1 ]
240+ blocksize = chunks [0 ] * chunks [1 ] * chunks [2 ]
237241
238242 zarray = {
239243 "zarr_format" : 2 ,
@@ -263,31 +267,27 @@ def translate(self):
263267 for chunk_idx in range (len (lut ) - 1 ):
264268 # Calculate chunk coordinates (i,j,k) from linear index
265269 chunk_coords = self ._get_chunk_coords (chunk_idx , chunks_per_dim )
270+ chunk_key = self .name + "/" + "." .join (map (str , chunk_coords ))
266271
267- # Calculate chunk size.
268- # Loop index is defined so this is safe!
269- chunk_size = lut [chunk_idx + 1 ] - lut [chunk_idx ]
270-
271- # Add to references
272- key = self .name + "/" + "." .join (map (str , chunk_coords ))
272+ # Calculate chunk offset and chunk size
273+ chunk_offset = lut [chunk_idx ]
274+ chunk_size = lut [chunk_idx + 1 ] - chunk_offset
273275
274276 # Check if chunk is small enough to inline
275277 if self .inline > 0 and chunk_size < self .inline :
276278 # Read the chunk data and inline it
277- self .input_file .seek (lut [ chunk_idx ] )
279+ self .input_file .seek (chunk_offset )
278280 data = self .input_file .read (chunk_size )
279- try :
280- # Try to decode as ASCII
281- self .store_dict [key ] = data .decode ('ascii' )
282- except UnicodeDecodeError :
283- # If not ASCII, encode as base64
284- self .store_dict [key ] = b"base64:" + base64 .b64encode (data )
281+ # Encode as base64, similar to what is done in hdf.py
282+ self .store_dict [chunk_key ] = b"base64:" + base64 .b64encode (data )
285283 else :
286284 # Otherwise store as reference
287- self .store_dict [key ] = [self .url , lut [ chunk_idx ] , chunk_size ]
285+ self .store_dict [chunk_key ] = [self .url , chunk_offset , chunk_size ]
288286
289- # 5. Create coordinate arrays. TODO: This needs to be improved
290- # Add coordinate arrays for ALL dimensions
287+ # 5. Create coordinate arrays.
288+ # TODO: This needs to be improved, because we need coordinates for all dimensions
289+ # Grid definitions / coordinate arrays might be calculated in the python-omfiles directly in the future:
290+ # https://github.com/open-meteo/python-omfiles/pull/32/files
291291 for i , dim_name in enumerate (dim_names ):
292292 dim_size = shape [i ]
293293 if dim_name == "time" :
@@ -299,10 +299,8 @@ def translate(self):
299299
300300 # Convert to proper format for return
301301 if self .spec < 1 :
302- print ("self.spec < 1" )
303302 return self .store
304303 else :
305- print ("translate_refs_serializable" )
306304 translate_refs_serializable (self .store_dict )
307305 store = _encode_for_JSON (self .store_dict )
308306 return {"version" : 1 , "refs" : store }
@@ -319,7 +317,7 @@ def _add_time_coordinate(self, time_dim, time_axis=0):
319317
320318 # Format the reference time as CF-compliant string
321319 if isinstance (ref_time , datetime .datetime ):
322- # Calculate hours since epoch (1970-01-01)
320+ # Calculate seconds since epoch (1970-01-01)
323321 epoch = datetime .datetime (1970 , 1 , 1 , 0 , 0 , 0 )
324322 seconds_since_epoch = int ((ref_time - epoch ).total_seconds ())
325323
@@ -367,12 +365,6 @@ def _add_time_coordinate(self, time_dim, time_axis=0):
367365 # Add time values inline (they're small)
368366 self .store_dict [f"{ time_dim_name } /0" ] = time_values .tobytes ()
369367
370- # Debug info
371- print (f"Created time coordinate '{ time_dim_name } ' with { time_dim } values" )
372- print (f"Time units: { units } " )
373- if time_dim > 0 :
374- print (f"First timestamp: { time_values [0 ]} seconds since 1970-01-01, Last: { time_values [- 1 ]} " )
375-
376368 def _get_chunk_coords (self , idx , chunks_per_dim ):
377369 """Convert linear chunk index to multidimensional coordinates
378370
0 commit comments