Skip to content

Commit 617306f

Browse files
Separate csv and tsv function and remove use of sniff
Csv.sniff could cause random characters or spaces to be used as the delimiter. Separating these functions and using a hard coded dialect fixes this display problem.
1 parent 8bb2dd4 commit 617306f

File tree

3 files changed

+35
-11
lines changed

3 files changed

+35
-11
lines changed

mfr/extensions/tabular/libs/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ def csv_stdlib():
88
return csv_stdlib
99

1010

11+
def tsv_stdlib():
12+
from ..libs.stdlib_tools import tsv_stdlib
13+
return tsv_stdlib
14+
15+
1116
def csv_pandas():
1217
from ..libs.panda_tools import csv_pandas
1318
return csv_pandas

mfr/extensions/tabular/libs/stdlib_tools.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,45 @@
11
import re
22
import csv
33

4-
from mfr.extensions.tabular.exceptions import EmptyTableError, TabularRendererError
54
from mfr.extensions.tabular import utilities
5+
from mfr.extensions.tabular.exceptions import EmptyTableError, TabularRendererError
66

77

88
def csv_stdlib(fp):
9-
"""Read and convert a csv file to JSON format using the python standard library
10-
:param fp: File pointer object
11-
:return: tuple of table headers and data
12-
"""
13-
data = fp.read(2048)
9+
data = fp.seek(2048)
1410
fp.seek(0)
11+
# set the dialect instead of sniffing for it.
12+
# sniffing can cause things like spaces or characters to be the delimiter
13+
dialect = csv.excel
14+
try:
15+
_set_dialect_quote_attrs(dialect, data)
16+
except:
17+
# if this errors it is not an exception
18+
pass
1519

20+
reader = csv.DictReader(fp, dialect=dialect)
21+
return parse_stdlib(reader)
22+
23+
def tsv_stdlib(fp):
24+
data = fp.seek(2048)
25+
fp.seek(0)
26+
# set the dialect instead of sniffing for it.
27+
# sniffing can cause things like spaces or characters to be the delimiter
28+
dialect = csv.excel_tab
1629
try:
17-
dialect = csv.Sniffer().sniff(data)
18-
except csv.Error:
19-
dialect = csv.excel
20-
else:
2130
_set_dialect_quote_attrs(dialect, data)
31+
except:
32+
# if this errors it is not an exception
33+
pass
2234

2335
reader = csv.DictReader(fp, dialect=dialect)
36+
return parse_stdlib(reader)
37+
38+
def parse_stdlib(reader):
39+
"""Read and convert a csv like file to JSON format using the python standard library
40+
:param fp: File pointer object
41+
:return: tuple of table headers and data
42+
"""
2443
columns = []
2544
# update the reader field names to avoid duplicate column names when performing row extraction
2645
for idx, fieldname in enumerate(reader.fieldnames or []):

mfr/extensions/tabular/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
LIBS = config.get('LIBS', {
1212
'.csv': [libs.csv_stdlib],
13-
'.tsv': [libs.csv_stdlib],
13+
'.tsv': [libs.tsv_stdlib],
1414
'.gsheet': [libs.xlsx_xlrd],
1515
'.xlsx': [libs.xlsx_xlrd],
1616
'.xls': [libs.xlsx_xlrd],

0 commit comments

Comments
 (0)