From 41e016a9d21ceb0eb77d73194d0597dd4aee2f4e Mon Sep 17 00:00:00 2001
From: Andy Atkinson <andyatkinson@gmail.com>
Date: Wed, 16 Oct 2024 17:08:01 -0500
Subject: [PATCH] Indexing JSON data

---
 indexing_json/example.sql | 56 +++++++++++++++++++++++++++++++++++++++
 indexing_json/readme.md   |  3 +++
 2 files changed, 59 insertions(+)
 create mode 100644 indexing_json/example.sql
 create mode 100644 indexing_json/readme.md

diff --git a/indexing_json/example.sql b/indexing_json/example.sql
new file mode 100644
index 0000000..c876329
--- /dev/null
+++ b/indexing_json/example.sql
@@ -0,0 +1,56 @@
+CREATE TABLE stripe_events (
+    data jsonb NOT NULL
+);
+
+-- Used ChatGPT for these fields
+INSERT INTO stripe_events (data)
+SELECT
+    jsonb_build_object(
+        'customer_id', (1000 + gs),
+        'event_type', CASE WHEN gs % 2 = 0 THEN 'payment.succeeded' ELSE 'payment.failed' END,
+        'amount', (random() * 100)::int,
+        'currency', 'usd',
+        'timestamp', NOW() - (gs || ' days')::interval
+    )
+FROM
+    GENERATE_SERIES(1, 100000) AS gs;
+
+\timing
+
+-- Takes about 30ms locally
+SELECT *
+FROM stripe_events
+WHERE data ->> 'customer_id' = '1010';
+
+-- postgres=# EXPLAIN (ANALYZE, BUFFERS) SELECT *
+-- FROM stripe_events
+-- WHERE data ->> 'customer_id' = '1010';
+--                                                   QUERY PLAN
+-- ---------------------------------------------------------------------------------------------------------------
+--  Seq Scan on stripe_events  (cost=0.00..3929.00 rows=500 width=161) (actual time=0.027..30.321 rows=1 loops=1)
+--    Filter: ((data ->> 'customer_id'::text) = '1010'::text)
+--    Rows Removed by Filter: 99999
+--    Buffers: shared hit=2429
+--  Planning Time: 0.066 ms
+--  Execution Time: 30.341 ms
+-- (6 rows)
+
+
+-- Add index
+CREATE INDEX idx_customer_id ON stripe_events ((data ->> 'customer_id'));
+
+-- Now it's an index scan on idx_customer_id and runs in sub-1ms!
+
+-- postgres=# EXPLAIN (ANALYZE, BUFFERS) SELECT *
+-- FROM stripe_events
+-- WHERE data ->> 'customer_id' = '1010';
+--                                                            QUERY PLAN
+-- ---------------------------------------------------------------------------------------------------------------------------------
+--  Index Scan using idx_customer_id on stripe_events  (cost=0.29..8.31 rows=1 width=161) (actual time=0.057..0.059 rows=1 loops=1)
+--    Index Cond: ((data ->> 'customer_id'::text) = '1010'::text)
+--    Buffers: shared hit=3
+--  Planning Time: 0.092 ms
+--  Execution Time: 0.077 ms
+-- (5 rows)
+--
+-- Time: 0.537 ms
diff --git a/indexing_json/readme.md b/indexing_json/readme.md
new file mode 100644
index 0000000..c7a8668
--- /dev/null
+++ b/indexing_json/readme.md
@@ -0,0 +1,3 @@
+## Indexing JSON data
+
+- <https://x.com/jpsilvashy/status/1841504951198941652>