Skip to content

Commit 778d60c

Browse files
committed
Add support for tagged missing values in reader
1 parent 3b49c4c commit 778d60c

File tree

7 files changed

+60
-14
lines changed

7 files changed

+60
-14
lines changed

src/DfReader.cpp

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,29 @@
66
using namespace Rcpp;
77
#include "readstat.h"
88
#include "haven_types.h"
9+
#include "tagged_na.h"
10+
11+
// Wrappers around readstat_*_value that convert missing values to appropriate
12+
// R sentinel values -----------------------------------------------------------
13+
14+
double haven_double_value(readstat_value_t value) {
15+
if (readstat_value_is_missing(value)) {
16+
return make_tagged_na(readstat_value_tag(value));
17+
} else {
18+
return readstat_double_value(value);
19+
}
20+
}
21+
22+
double haven_float_value(readstat_value_t value) {
23+
if (readstat_value_is_missing(value)) {
24+
return make_tagged_na(readstat_value_tag(value));
25+
} else {
26+
return readstat_float_value(value);
27+
}
28+
}
29+
30+
31+
// LabelSet -------------------------------------------------------------------
932

1033
class LabelSet {
1134
std::vector<std::string> labels_;
@@ -97,6 +120,8 @@ class LabelSet {
97120
}
98121
};
99122

123+
// DfReader ------------------------------------------------------------------
124+
100125
class DfReader {
101126
FileType type_;
102127
int nrows_, ncols_;
@@ -218,19 +243,10 @@ class DfReader {
218243
}
219244
} else if (value.type == READSTAT_TYPE_FLOAT) {
220245
NumericVector col = output_[var_index];
221-
if (readstat_value_is_missing(value)) {
222-
col[obs_index] = NA_REAL;
223-
} else {
224-
col[obs_index] = adjustDatetimeToR(type_, var_type, readstat_float_value(value));
225-
}
246+
col[obs_index] = adjustDatetimeToR(type_, var_type, haven_float_value(value));
226247
} else if (value.type == READSTAT_TYPE_DOUBLE) {
227248
NumericVector col = output_[var_index];
228-
if (readstat_value_is_missing(value)) {
229-
col[obs_index] = NA_REAL;
230-
} else {
231-
double val = readstat_double_value(value);
232-
col[obs_index] = std::isnan(val) ? NA_REAL : adjustDatetimeToR(type_, var_type, val);
233-
}
249+
col[obs_index] = adjustDatetimeToR(type_, var_type, haven_double_value(value));
234250
}
235251

236252
return 0;
@@ -258,7 +274,7 @@ class DfReader {
258274
label_set.add(readstat_int32_value(value), label_s, is_missing);
259275
break;
260276
case READSTAT_TYPE_DOUBLE:
261-
label_set.add(readstat_double_value(value), label_s, is_missing);
277+
label_set.add(haven_double_value(value), label_s, is_missing);
262278
break;
263279
default:
264280
Rf_warning("Unsupported label type: %s", value.type);

src/haven_types.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define __HAVEN_TYPES__
33

44
#include <Rcpp.h>
5+
#include <cmath>
56

67
enum FileType {
78
HAVEN_SPSS,
@@ -94,6 +95,9 @@ inline int daysOffset(FileType type) {
9495
}
9596

9697
inline double adjustDatetimeToR(FileType file, VarType var, double value) {
98+
if (std::isnan(value))
99+
return value;
100+
97101
double offset = daysOffset(file);
98102

99103
switch(var) {
@@ -111,6 +115,9 @@ inline double adjustDatetimeToR(FileType file, VarType var, double value) {
111115
}
112116

113117
inline double adjustDatetimeFromR(FileType file, SEXP col, double value) {
118+
if (std::isnan(value))
119+
return value;
120+
114121
double offset = daysOffset(file);
115122

116123
switch(numType(col)) {

src/tagged_na.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ const int TAG_BYTE = 3;
3131
const int TAG_BYTE = 4;
3232
#endif
3333

34-
inline double make_tagged_na(char x) {
34+
double make_tagged_na(char x) {
3535
ieee_double y;
3636

3737
y.value = NA_REAL;
@@ -40,7 +40,7 @@ inline double make_tagged_na(char x) {
4040
return y.value;
4141
}
4242

43-
inline char tagged_na_value(double x) {
43+
char tagged_na_value(double x) {
4444
ieee_double y;
4545
y.value = x;
4646

src/tagged_na.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#ifndef __TAGGED_NA__
2+
#define __TAGGED_NA__
3+
4+
#ifdef __cplusplus
5+
extern "C" {
6+
#endif
7+
8+
double make_tagged_na(char x);
9+
char tagged_na_value(double x);
10+
11+
#ifdef __cplusplus
12+
}
13+
#endif
14+
15+
#endif

tests/testthat/tagged-na.sas7bcat

17 KB
Binary file not shown.

tests/testthat/tagged-na.sas7bdat

128 KB
Binary file not shown.

tests/testthat/test-read-sas.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,11 @@ test_that("date times are converted into corresponding R types", {
3030
expect_equal(df$VAR5[1], hms::hms(52932))
3131

3232
})
33+
34+
test_that("tagged missings are read correctly", {
35+
x <- read_sas(test_path("tagged-na.sas7bdat"), test_path("tagged-na.sas7bcat"))$x
36+
expect_equal(na_tag(x), c(rep(NA, 5), "A", "H", "Z"))
37+
38+
labels <- attr(x, "labels")
39+
expect_equal(na_tag(labels), c("A", "Z"))
40+
})

0 commit comments

Comments
 (0)