Skip to content

Commit 1b666f9

Browse files
committed
--delimiter and --quotechar, closes #223
1 parent 36dc7e3 commit 1b666f9

File tree

3 files changed

+59
-8
lines changed

3 files changed

+59
-8
lines changed

docs/cli.rst

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,8 @@ This also means you pipe ``sqlite-utils`` together to easily create a new SQLite
474474
207368,920 Kirkham St,37.760210314285,-122.47073935813
475475
188702,1501 Evans Ave,37.7422086702947,-122.387293152263
476476

477+
.. _cli_insert_csv_tsv:
478+
477479
Inserting CSV or TSV data
478480
=========================
479481

@@ -483,14 +485,33 @@ If your data is in CSV format, you can insert it using the ``--csv`` option::
483485

484486
For tab-delimited data, use ``--tsv``::
485487

486-
$ sqlite-utils insert dogs.db dogs docs.tsv --tsv
488+
$ sqlite-utils insert dogs.db dogs dogs.tsv --tsv
487489

488490
Data is expected to be encoded as Unicode UTF-8. If your data is an another character encoding you can specify it using the ``--encoding`` option::
489491

490-
$ sqlite-utils insert dogs.db dogs docs.tsv --tsv --encoding=latin-1
492+
$ sqlite-utils insert dogs.db dogs dogs.tsv --tsv --encoding=latin-1
491493

492494
A progress bar is displayed when inserting data from a file. You can hide the progress bar using the ``--silent`` option.
493495

496+
.. _cli_insert_csv_tsv_delimiter:
497+
498+
Alternative delimiters and quote characters
499+
-------------------------------------------
500+
501+
If your file uses a delimiter other than ``,`` or a quote character other than ``"`` you can specify them using the ``--delimiter`` and ``--quotechar`` options.
502+
503+
Here's a CSV file that uses ``;`` for delimiters and the ``|`` symbol for quote characters::
504+
505+
name;description
506+
Cleo;|Very fine; a friendly dog|
507+
Pancakes;A local corgi
508+
509+
You can import that using::
510+
511+
$ sqlite-utils insert dogs.db dogs dogs.csv --delimiter=";" --quotechar="|"
512+
513+
Passing either ``--delimiter`` and ``--quotechar`` implies ``--csv``, so you can omit that option.
514+
494515
.. _cli_insert_replace:
495516

496517
Insert-replacing data

sqlite_utils/cli.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import base64
2+
from tests.test_cli import test_query_memory_does_not_create_file
23
import click
34
import codecs
45
from click_default_group import DefaultGroup
@@ -601,6 +602,8 @@ def insert_upsert_options(fn):
601602
click.option("--nl", is_flag=True, help="Expect newline-delimited JSON"),
602603
click.option("-c", "--csv", is_flag=True, help="Expect CSV"),
603604
click.option("--tsv", is_flag=True, help="Expect TSV"),
605+
click.option("--delimiter", help="Delimiter to use for CSV files"),
606+
click.option("--quotechar", help="Quote character to use for CSV/TSV"),
604607
click.option(
605608
"--batch-size", type=int, default=100, help="Commit every X records"
606609
),
@@ -640,6 +643,8 @@ def insert_upsert_implementation(
640643
nl,
641644
csv,
642645
tsv,
646+
delimiter,
647+
quotechar,
643648
batch_size,
644649
alter,
645650
upsert,
@@ -654,6 +659,8 @@ def insert_upsert_implementation(
654659
):
655660
db = sqlite_utils.Database(path)
656661
_load_extensions(db, load_extension)
662+
if delimiter or quotechar:
663+
csv = True
657664
if (nl + csv + tsv) >= 2:
658665
raise click.ClickException("Use just one of --nl, --csv or --tsv")
659666
if encoding and not (csv or tsv):
@@ -665,7 +672,12 @@ def insert_upsert_implementation(
665672
if csv or tsv:
666673
dialect = "excel-tab" if tsv else "excel"
667674
with file_progress(json_file, silent=silent) as json_file:
668-
reader = csv_std.reader(json_file, dialect=dialect)
675+
csv_reader_args = {"dialect": dialect}
676+
if delimiter:
677+
csv_reader_args["delimiter"] = delimiter
678+
if quotechar:
679+
csv_reader_args["quotechar"] = quotechar
680+
reader = csv_std.reader(json_file, **csv_reader_args)
669681
headers = next(reader)
670682
docs = (dict(zip(headers, row)) for row in reader)
671683
else:
@@ -720,6 +732,8 @@ def insert(
720732
nl,
721733
csv,
722734
tsv,
735+
delimiter,
736+
quotechar,
723737
batch_size,
724738
alter,
725739
encoding,
@@ -746,6 +760,8 @@ def insert(
746760
nl,
747761
csv,
748762
tsv,
763+
delimiter,
764+
quotechar,
749765
batch_size,
750766
alter=alter,
751767
upsert=False,
@@ -773,6 +789,8 @@ def upsert(
773789
csv,
774790
tsv,
775791
batch_size,
792+
delimiter,
793+
quotechar,
776794
alter,
777795
not_null,
778796
default,
@@ -794,6 +812,8 @@ def upsert(
794812
nl,
795813
csv,
796814
tsv,
815+
delimiter,
816+
quotechar,
797817
batch_size,
798818
alter=alter,
799819
upsert=True,

tests/test_cli.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -722,18 +722,28 @@ def test_insert_ignore(db_path, tmpdir):
722722

723723

724724
@pytest.mark.parametrize(
725-
"content,option",
726-
(("foo\tbar\tbaz\n1\t2\t3", "--tsv"), ("foo,bar,baz\n1,2,3", "--csv")),
725+
"content,options",
726+
[
727+
("foo\tbar\tbaz\n1\t2\tcat,dog", ["--tsv"]),
728+
('foo,bar,baz\n1,2,"cat,dog"', ["--csv"]),
729+
('foo;bar;baz\n1;2;"cat,dog"', ["--csv", "--delimiter", ";"]),
730+
# --delimiter implies --csv:
731+
('foo;bar;baz\n1;2;"cat,dog"', ["--delimiter", ";"]),
732+
("foo,bar,baz\n1,2,|cat,dog|", ["--csv", "--quotechar", "|"]),
733+
("foo,bar,baz\n1,2,|cat,dog|", ["--quotechar", "|"]),
734+
],
727735
)
728-
def test_insert_csv_tsv(content, option, db_path, tmpdir):
736+
def test_insert_csv_tsv(content, options, db_path, tmpdir):
729737
db = Database(db_path)
730738
file_path = str(tmpdir / "insert.csv-tsv")
731739
open(file_path, "w").write(content)
732740
result = CliRunner().invoke(
733-
cli.cli, ["insert", db_path, "data", file_path, option], catch_exceptions=False
741+
cli.cli,
742+
["insert", db_path, "data", file_path] + options,
743+
catch_exceptions=False,
734744
)
735745
assert 0 == result.exit_code
736-
assert [{"foo": "1", "bar": "2", "baz": "3"}] == list(db["data"].rows)
746+
assert [{"foo": "1", "bar": "2", "baz": "cat,dog"}] == list(db["data"].rows)
737747

738748

739749
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)