-
Notifications
You must be signed in to change notification settings - Fork 537
Open
Description
How to repeat
import tempfile
from pathlib import Path
import lance
import pyarrow as pa
# 1. Setup Data
tmp = Path(tempfile.mkdtemp(prefix="lance-label-list-nullable-"))
tbl = pa.table({"labels": [["foo", None], ["foo"]]})
ds = lance.write_dataset(tbl, tmp / "ds")
FILTERS = [
"array_has_any(labels, ['foo'])",
"array_has_all(labels, ['foo'])",
]
# 2. Results before index (Scan)
pre = {f: ds.to_table(filter=f).num_rows for f in FILTERS}
pre_rows = {f: ds.to_table(filter=f).column("labels").to_pylist() for f in FILTERS}
# 3. Create Index
ds.create_scalar_index("labels", index_type="LABEL_LIST")
# 4. Results after index (Scalar Index Search)
post = {f: ds.to_table(filter=f).num_rows for f in FILTERS}
post_rows = {f: ds.to_table(filter=f).column("labels").to_pylist() for f in FILTERS}
# pre create index and post create index result inconsistent
mismatches = {f: (pre[f], post[f]) for f in FILTERS if pre[f] != post[f]}
for f, (a, b) in mismatches.items():
print(f"\nfilter: {f}")
print("pre create index result:", pre_rows[f])
print("post create index result:", post_rows[f])
# panic
ds.scanner(filter="array_has_any(labels, [NULL])").explain_plan()Result
filter: array_has_any(labels, ['foo'])
pre create index result: [['foo', None], ['foo']]
post create index result: [['foo']]
filter: array_has_all(labels, ['foo'])
pre create index result: [['foo', None], ['foo']]
post create index result: [['foo']]
thread 'lance_background_thread' panicked at .../rust/lance-index/src/scalar.rs:577:18:
called `Result::unwrap()` on an `Err` value: InvalidArgumentError("Non-nullable field of ListArray \"item\" cannot contain nulls")
stack backtrace:
...
Traceback (most recent call last):
File "xxx.py", line 31, in <module>
ds.scanner(filter="array_has_any(labels, [NULL])").explain_plan()
File ".../python/lance/dataset.py", line 5115, in explain_plan
return self._scanner.explain_plan(verbose=verbose)
RuntimeError: Task was abortedMetadata
Metadata
Assignees
Labels
No labels