From 418f042920f81a796d9904a48c4a9a98a399a7de Mon Sep 17 00:00:00 2001 From: rocketlaunchr-cto Date: Sat, 2 Apr 2022 21:46:27 +1100 Subject: [PATCH] - Address issue: https://github.com/rocketlaunchr/dataframe-go/issues/62 (Chinese characters and pythong BOM prefix) --- exports/parquet.go | 4 ++-- imports/csv.go | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/exports/parquet.go b/exports/parquet.go index e56ca3a..2fd5b7b 100644 --- a/exports/parquet.go +++ b/exports/parquet.go @@ -65,7 +65,7 @@ func ExportToParquet(ctx context.Context, w io.Writer, df *dataframe.DataFrame, // Create Schema dataSchema := dynamicstruct.NewStruct() for _, aSeries := range df.Series { - fieldName := strings.Title(strings.ToLower(aSeries.Name())) + fieldName := "Z" + strings.Title(strings.ToLower(aSeries.Name())) // Make it validly exported seriesName := santizeColumnName(aSeries.Name()) switch aSeries.(type) { @@ -125,7 +125,7 @@ func ExportToParquet(ctx context.Context, w io.Writer, df *dataframe.DataFrame, rec := schemaStruct.New() for _, aSeries := range df.Series { - fieldName := strings.Title(strings.ToLower(aSeries.Name())) + fieldName := "Z" + strings.Title(strings.ToLower(aSeries.Name())) v := reflect.ValueOf(rec).Elem().FieldByName(fieldName) if v.IsValid() { diff --git a/imports/csv.go b/imports/csv.go index b4849ad..fb56cff 100644 --- a/imports/csv.go +++ b/imports/csv.go @@ -74,6 +74,22 @@ func LoadFromCSV(ctx context.Context, r io.ReadSeeker, options ...CSVLoadOptions var init *dataframe.SeriesInit + // Check for bom characters in the beginning (that python seems to add). + // See: + // https://github.com/rocketlaunchr/dataframe-go/issues/62 + // https://github.com/golang/go/issues/33887 + // https://github.com/dimchansky/utfbom + // https://github.com/spkg/bom/ + checkBOM := make([]byte, 3) + readN, err := r.Read(checkBOM) + if err != nil { + return nil, err + } + if !(readN == 3 && checkBOM[0] == 0xef && checkBOM[1] == 0xbb && checkBOM[2] == 0xbf) { + // bom not found so reset reader + r.Seek(0, io.SeekStart) + } + var ( comma rune comment rune