forked from EOGrady21/ODF
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_odf.R
426 lines (367 loc) · 13.4 KB
/
read_odf.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
#' READ_ODF: Read in an ODF file.
#' Copyright (C) 2006-2014 DFO, Bedford Institute of Oceanography, Canada.
#' You may distribute under the terms of either the GNU General Public
#' License or the Apache v2 License, as specified in the README file.
#' Description:
#' Read in an ODF file.
#'
#' @param filename location and name of the ODF file to be processed
#'
#' @export
#'
#' @details
#' ODSToolbox Version: 2.0
#'
#' Last Updated: September 3, 2015
#'
#' Source:
#' Ocean Data and Information Services,
#' Bedford Institute of Oceanography, DFO, Canada.
#' DataServicesDonnees@@dfo-mpo.gc.ca
#'
#' Notes:
#' This program was totally re-designed and re-written for
#' ODSToolbox Version 2.0. It is not based on Version 1.0.
#'
#' While this new version of read_odf corrects many errors in
#' Version 1.0, and includes some new functionalities such as
#' checking if all mandatory header blocks and all mandatory
#' fields are presented in the input ODF file etc., it is possible
#' that this program may have some conflicts with other tools in
#' ODSToolbox, please email yongcun.hu@@dfo-mpo.gc.ca if the
#' user find any problems.
#'
#' See also \code{\link{write_odf}}.
#'
#' @author Yongcun Hu, Patrick Upson
#'
#' Modified by Gordana Lazin, June 8, 2015
#' Import data using read.table function (line 190), much faster
#' Replace formats for SYTM_01: from strings to ISO time format (asumes UTC), line 231
#'
read_odf <- function(filename) {
# IMPORT: following 6 lines define some strings according to ODF file
# definition (see the last sub-function in this file), if these
# strings are changed in that definition, they must be changed
# here accordingly.
DATA_LINE = '-- DATA --' # starting line of data section
SYTM = 'SYTM'
GENERAL_CAL_HEADER = 'GENERAL_CAL_HEADER'
POLYNOMIAL_CAL_HEADER = 'POLYNOMIAL_CAL_HEADER'
COMPASS_CAL_HEADER = 'COMPASS_CAL_HEADER'
INTEGER = 'integer'
NUMERIC = 'numeric'
if( !file.exists(filename) ) {
stop("File does not exist")
}
# read the input ODF File
F <- readFile(filename)
# check if input ODF file is an empty file
if(length(filename) <= 0) {
stop("File contains no data")
}
# check if input ODF file has one and only one such line '-- DATA --'
dataLineArray <- grepl(DATA_LINE, F);
data_lines_index <- which(dataLineArray==TRUE)
if(length(data_lines_index)>0) {
#if one or more data lines is found use the last index as the data to be read
data_line_index <- data_lines_index[length(data_lines_index)]
} else {
#if no data line is found then use one line pas the end of the number of lines in the header
data_line_index <- length(dataLineArray)+1
}
if(length(data_lines_index) <= 0 ) {
stop(
' \n',
paste(' -- The input ODF file "', filename, '" does NOT have a\n'),
' separated beginning line for data section such as:\n',
paste(' ', DATA_LINE, '\n'),
' By definition, every ODF file must have one such line.\n',
' \n'
)
} else if(length(data_lines_index) > 1) {
stop(
' \n',
paste(' -- The input ODF file "', filename, '" has more than one\n'),
' separated beginning lines for data section such as:\n',
paste(' ', DATA_LINE, '\n'),
' By definition, only one such line is allowed to separate\n',
' header blocks and data section.\n',
' \n'
)
}
# get the ODF header information
odf_header <- define_ODF_header()
curParameterName = NULL
curParameter = NULL
headObject = NULL
#Create the ODF structure to be returned
S <- list()
#scan through the header lines, we know where the index line is because
#it was found in the above section
for( idxLine in 1:(data_lines_index+1) ) {
#remove the last character of each line in the header. in all but the
#last line it's a comma
line <- gsub( ",$", "", F[idxLine])
#remove leading and trailing whitespace
line <- gsub("^\\s+|\\s+$", "", line)
line <- gsub(x = line, ",$", "") #added by E. Chisholm, errors due to incorrect removal of comma at end of lines
#test to see if the current line is a header object
#if it exists in the list of ODF_Header names then
#create a list for the variables to follow and add
#them to the structrue to be returned
headIndex <- grep(line, names(odf_header), fixed=TRUE)
if( length(headIndex) > 0 || idxLine >= data_lines_index) {
headObject <- odf_header[headIndex]
if( !is.null(curParameterName) ) {
#set the accumulated field names
#names(curParameter) <- curParameterAttr
S <- addToPram(S, curParameterName, curParameter)
}
#clear the curList and set the new parameter header value
curParameterName <- line
curParameter <- NULL
} else {
val <- extract_val(line)
#find the parameter from the header definition
pramIndex <- grep(paste("^",val[1],"$",sep=""), headObject[[curParameterName]][,1])
headPram <- headObject[[curParameterName]][pramIndex,]
# print(paste("pram:",val[1]))
# print(headPram)
if( is.null(curParameter)) {
curParameter <- list()
}
#create or add values to the parameter currently being handled
#index 4 in the parameter array is the parameter type
if(!is.null(curParameter$TYPE) && curParameter$TYPE == SYTM) {
convertedVal <- tryCatch({
as.integer(val[2])
}, warning = function(war) {
val[2]
})
curParameter <- addToPram(curParameter, val[1], convertedVal)
} else if( headPram[2] == INTEGER ) {
curParameter <- addToPram(curParameter, val[1], as.integer(val[2]))
# print(paste(val[1], typeof(curParameter[[val[1]]])))
} else if( headPram[2] == NUMERIC ) {
if(curParameterName == GENERAL_CAL_HEADER ||
curParameterName == POLYNOMIAL_CAL_HEADER ||
curParameterName == COMPASS_CAL_HEADER ) {
tmpVals <- gsub("\\s+|\\t+", ",", val[2])
tmpVals <- strsplit(tmpVals, ",")[[1]]
for( i in 1:length(tmpVals) ) {
curParameter <- addToPram(curParameter, val[1], as.numeric(tmpVals[i]))
}
} else {
curParameter <- addToPram(curParameter, val[1], as.numeric(val[2]))
}
# print(typeof(curParameter[[val[1]]]))
} else {
curParameter <- addToPram(curParameter, val[1], val[2])
}
}
}
# read the data, skip the header
S$DATA = read.table( filename, skip=data_lines_index, as.is=T,stringsAsFactors=F)
# retrieve the parameter names and cast the columns into their proper types
# this all assumes a perfect case that the PARAMETER_HEADER has been set correctly
# and all values in the columns are of the proper type
pram_names = NULL
for( i in 1:length(S$PARAMETER_HEADER) ){
code = length(S$PARAMETER_HEADER[[i]][['CODE']]) > 0
wmo_code = length(S$PARAMETER_HEADER[[i]][['WMO_CODE']]) > 0
name = length(S$PARAMETER_HEADER[[i]][['NAME']]) > 0
if( code ) {
pram_names <- c(pram_names, S$PARAMETER_HEADER[[i]][['CODE']])
} else if(wmo_code) {
pram_names <- c(pram_names, S$PARAMETER_HEADER[[i]][['WMO_CODE']])
} else if( name ) {
warning("\n",
"The file contains no 'CODE' fields in the parameter list.\n",
"\n")
pram_names <- c(pram_names, S$PARAMETER_HEADER[[i]][['NAME']])
}
# if(S$PARAMETER_HEADER[[i]]$TYPE == 'DOUB' || S$PARAMETER_HEADER[[i]]$TYPE == 'SING') {
# print("Double column")
# #S$DATA[,i] <- as.numeric(S$DATA[,i])
# } else if(S$PARAMETER_HEADER[[i]]$TYPE == 'INTE') {
# print("Integer column")
# S$DATA[,i] <- as.integer(S$DATA[,i])
# }
}
#set the names for the columns in the matrix
colnames(S$DATA) <- pram_names
#add the input file to the structure for user convenience
S$INPUT_FILE = filename
#Replace formats for SYTM_01: from strings to ISO time format (asumes UTC)
if(length(grep("SYTM_01",pram_names))>0) {
S$DATA$SYTM_01=as.POSIXct(S$DATA$SYTM_01,format="%d-%b-%Y %H:%M:%S",tz='UTC')
}
#return the resulting data structure with the header parameters and the data object
S
}
#'
#' addToPram
#'
#' Description:
#' Used to create and add to a list, parameters using the same name are added
#' to a list of other parameters using the same name. The list is then returned.
#'
#' @param pram - Null or the existing list of parameters
#' @param name - the name of the sub-parameter list to add the value to
#' @param val - the value to add the the parameter ist.
#'
addToPram <- function(pram, name, val) {
pramIdx <- grep(name, names(pram), fixed=TRUE)
if( length(pramIdx) <= 0 ) {
#if the structrue for the current field doesn't already exists
#then create it and add the current list to it
pram[[name]] <- val
} else {
#if the parameter exists already, but it's a list of parameters
#rather than a list of lists of parameters. In other words if it has
#top level names instead of just being a list then move the
#top level parameters down one level in the list and start adding
#lists of parameters to the parameter
#
#so this is what it looks like with top level parameters
# $PARAMETER_HEADER$TYPE
# $PARAMETER_HEADER$NAME
# $PARAMETER_HEADER$UNITS
# $PARAMETER_HEADER$CODE
# ...
#
# this is what it should look like
# $PARAMETER_HEADER[[1]]
# $PARAMETER_HEADER[[1]]$TYPE
# $PARAMETER_HEADER[[1]]$NAME
# $PARAMETER_HEADER[[1]]$UNIT
# $PARAMETER_HEADER[[1]]$CODE
# ...
if( length(names(pram[[name]])) > 0 ) { # typeof(pram[[name]]) != 'list'){
#if the structure does already exist, but exists as a single parameter
#then we have to make room for additional parameters under it
tmp <- pram[[name]]
tmpNames <- names(pram[[name]])
names(tmp) <- tmpNames
pram[[name]] <- list()
pram[[name]][[1]] <- tmp
pram[[name]][[2]] <- val
} else {
#print(paste(name, "you are here"))
#if the structure for the current field already exists and is already
#capable of holding additional parameters then just tack the new one
#on to the end of the parameter list.
pram[[name]][[length(pram[[name]])+1]] <- val
}
}
pram
}
#'parameterReshape
#'
#' Description:
#' Used to convert lists of lists into matrices inorder to present
#' the data in a more compact easier to read, and access, way.
#'
#' @param paramList The parameter header to be reshaped
#'
parameterReshape <- function(paramList) {
nameArray = NULL
param = NULL
#if the parameter list has more than one element, but has no names
#then it's a list of lists
if( length(names(paramList) ) <= 0 ) {
for( i in 1:length(paramList) ){
nameArray <- c(nameArray, names(paramList[[i]]))
}
nameArray <- unique(nameArray)
param = data.frame(matrix(1, nrow=length(paramList), ncol=length(nameArray)))
for( i in 1:length(paramList) ) {
for( j in 1:length(nameArray)) {
param[i,j] <- paramList[[i]][[nameArray[j]]]
}
}
} else {
nameArray = names(paramList)
param = data.frame(matrix(1, nrow=1, ncol=length(nameArray)))
for( j in 1:length(nameArray)) {
param[j] <- paramList[[nameArray[j]]]
}
}
names(param) <- nameArray
param
}
#' readFile
#'
#' Description:
#' read the data from a file and return an array of lines
#'
#' @param filename input ODF file name
#'
#' @details
#' Output : OUTPUT = array of file data
# ========================================================================
# Sub-function : readFile
# Purpose : read the data from a file and return an array of lines
# Input : filename -- input ODF file name
# Output : OUTPUT = array of file data
# ------------------------------------------------------------------------
readFile <- function(filename) {
conn=file(filename,open="r")
line=readLines(conn)
close(conn)
line
}
#'
#' extract_val
#'
#' Description:
#' Used to split a line into a key and value pair. Leading and trailing spaces
#' are removed and 00000D+00 strings are replaced with 00000E+00 strings.
#'
#' @param line The line to be split up at an '=' symbol. The left side of the
#' symbol becomes the key, the right side becomes the value
#'
# ========================================================================
# Sub-function : EXTRACT_VAL
# Purpose : Extract field value from input string expression
# Input : LINE -- input string expression such as Variable='Value'
# Output : VAL -- extracted value of input string expression
# Example : If LINE is CRUISE_NUMBER='NED2009002',
# : then VAL is NED2009002
# ------------------------------------------------------------------------
extract_val <- function(line) {
#create a character array used for substrings and indexing
charArray <- strsplit(line, "")[[1]]
equIndex <- grep("=", charArray)[1]
#split the filed up into the field name and it's value
val <- c(substr(line, 1, equIndex-1), substr(line, equIndex+1, length(charArray)))
#remove leading and trailing whitespaces from both the field name and field value
val <- gsub("^\\s+", "", val)
#remove leading and trailing single quotes from both the field name and field value
val <- gsub("^'|'$", "", val)
#remove leading and trailing whitespaces from both the field name and field value
val <- gsub("\\s+$", "", val)
val <- convert_number(val)
val
}
#'
#' convert_number
#'
#' Description:
#' convert 00000D+00 strings to 00000E+00 strings.
#'
#' @param sVal - a string or array of strings
#'
convert_number <- function(sVal) {
val <- sVal
for( i in 1:length(val)) {
if( grepl("([0-9]+D(\\+|-)[0-9][0-9])", val[i]) ) {
#in older files numeric notation is sometimes 0.0000000D+00 for base 10 exponents
#Replce the D wtih E so it can be processed by modern string to numeric functions
val[i] <- gsub("D\\+", "E+", val[i])
val[i] <- gsub("D-", "E-", val[i])
}
}
val
}