pgflow-dev
diff --git a/‎pkgs/core/schemas/0120_function_start_tasks.sql‎
Lines changed: 75 additions & 3 deletions b/‎pkgs/core/schemas/0120_function_start_tasks.sql‎
Lines changed: 75 additions & 3 deletions
diff --git a/‎pkgs/core/supabase/migrations/20250916203905_pgflow_temp_handle_arrays_in_start_tasks.sql‎
Lines changed: 157 additions & 0 deletions b/‎pkgs/core/supabase/migrations/20250916203905_pgflow_temp_handle_arrays_in_start_tasks.sql‎
Lines changed: 157 additions & 0 deletions
diff --git a/‎pkgs/core/supabase/migrations/atlas.sum‎
Lines changed: 2 additions & 1 deletion b/‎pkgs/core/supabase/migrations/atlas.sum‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pkgs/core/supabase/tests/start_tasks/dependent_map_element_extraction.test.sql‎
Lines changed: 118 additions & 0 deletions b/‎pkgs/core/supabase/tests/start_tasks/dependent_map_element_extraction.test.sql‎
Lines changed: 118 additions & 0 deletions
@@ -56,7 +56,8 @@ as $$
     select
       d.run_id,
       d.step_slug,
-      jsonb_object_agg(d.dep_slug, d.dep_output) as deps_output
+      jsonb_object_agg(d.dep_slug, d.dep_output) as deps_output,
+      count(*) as dep_count
     from deps d
     group by d.run_id, d.step_slug
   ),
@@ -82,11 +83,82 @@ as $$
     st.flow_slug,
     st.run_id,
     st.step_slug,
-    jsonb_build_object('run', r.input) ||
-    coalesce(dep_out.deps_output, '{}'::jsonb) as input,
+    -- ==========================================
+    -- INPUT CONSTRUCTION LOGIC
+    -- ==========================================
+    -- This nested CASE statement determines how to construct the input
+    -- for each task based on the step type (map vs non-map).
+    --
+    -- The fundamental difference:
+    -- - Map steps: Receive RAW array elements (e.g., just 42 or "hello")
+    -- - Non-map steps: Receive structured objects with named keys
+    --                  (e.g., {"run": {...}, "dependency1": {...}})
+    -- ==========================================
+    CASE
+      -- -------------------- MAP STEPS --------------------
+      -- Map steps process arrays element-by-element.
+      -- Each task receives ONE element from the array at its task_index position.
+      WHEN step.step_type = 'map' THEN
+        -- Map steps get raw array elements without any wrapper object
+        CASE
+          -- ROOT MAP: Gets array from run input
+          -- Example: run input = [1, 2, 3]
+          --          task 0 gets: 1
+          --          task 1 gets: 2
+          --          task 2 gets: 3
+          WHEN step.deps_count = 0 THEN
+            -- Root map (deps_count = 0): no dependencies, reads from run input.
+            -- Extract the element at task_index from the run's input array.
+            -- Note: If run input is not an array, this will return NULL
+            -- and the flow will fail (validated in start_flow).
+            jsonb_array_element(r.input, st.task_index)
+
+          -- DEPENDENT MAP: Gets array from its single dependency
+          -- Example: dependency output = ["a", "b", "c"]
+          --          task 0 gets: "a"
+          --          task 1 gets: "b"
+          --          task 2 gets: "c"
+          ELSE
+            -- Has dependencies (should be exactly 1 for map steps).
+            -- Extract the element at task_index from the dependency's output array.
+            --
+            -- Why the subquery with jsonb_each?
+            -- - The dependency outputs a raw array: [1, 2, 3]
+            -- - deps_outputs aggregates it into: {"dep_name": [1, 2, 3]}
+            -- - We need to unwrap and get just the array value
+            -- - Map steps have exactly 1 dependency (enforced by add_step)
+            -- - So jsonb_each will return exactly 1 row
+            -- - We extract the 'value' which is the raw array [1, 2, 3]
+            -- - Then get the element at task_index from that array
+            (SELECT jsonb_array_element(value, st.task_index)
+            FROM jsonb_each(dep_out.deps_output)
+            LIMIT 1)
+        END
+
+      -- -------------------- NON-MAP STEPS --------------------
+      -- Regular (non-map) steps receive ALL inputs as a structured object.
+      -- This includes the original run input plus all dependency outputs.
+      ELSE
+        -- Non-map steps get structured input with named keys
+        -- Example output: {
+        --   "run": {"original": "input"},
+        --   "step1": {"output": "from_step1"},
+        --   "step2": {"output": "from_step2"}
+        -- }
+        --
+        -- Build object with 'run' key containing original input
+        jsonb_build_object('run', r.input) ||
+        -- Merge with deps_output which already has dependency outputs
+        -- deps_output format: {"dep1": output1, "dep2": output2, ...}
+        -- If no dependencies, defaults to empty object
+        coalesce(dep_out.deps_output, '{}'::jsonb)
+    END as input,
     st.message_id as msg_id
   from tasks st
   join runs r on st.run_id = r.run_id
+  join pgflow.steps step on
+    step.flow_slug = st.flow_slug and
+    step.step_slug = st.step_slug
   left join deps_outputs dep_out on
     dep_out.run_id = st.run_id and
     dep_out.step_slug = st.step_slug
 
@@ -0,0 +1,157 @@
+-- Modify "start_tasks" function
+CREATE OR REPLACE FUNCTION "pgflow"."start_tasks" ("flow_slug" text, "msg_ids" bigint[], "worker_id" uuid) RETURNS SETOF "pgflow"."step_task_record" LANGUAGE sql SET "search_path" = '' AS $$
+with tasks as (
+    select
+      task.flow_slug,
+      task.run_id,
+      task.step_slug,
+      task.task_index,
+      task.message_id
+    from pgflow.step_tasks as task
+    where task.flow_slug = start_tasks.flow_slug
+      and task.message_id = any(msg_ids)
+      and task.status = 'queued'
+  ),
+  start_tasks_update as (
+    update pgflow.step_tasks
+    set 
+      attempts_count = attempts_count + 1,
+      status = 'started',
+      started_at = now(),
+      last_worker_id = worker_id
+    from tasks
+    where step_tasks.message_id = tasks.message_id
+      and step_tasks.flow_slug = tasks.flow_slug
+      and step_tasks.status = 'queued'
+  ),
+  runs as (
+    select
+      r.run_id,
+      r.input
+    from pgflow.runs r
+    where r.run_id in (select run_id from tasks)
+  ),
+  deps as (
+    select
+      st.run_id,
+      st.step_slug,
+      dep.dep_slug,
+      dep_task.output as dep_output
+    from tasks st
+    join pgflow.deps dep on dep.flow_slug = st.flow_slug and dep.step_slug = st.step_slug
+    join pgflow.step_tasks dep_task on
+      dep_task.run_id = st.run_id and
+      dep_task.step_slug = dep.dep_slug and
+      dep_task.status = 'completed'
+  ),
+  deps_outputs as (
+    select
+      d.run_id,
+      d.step_slug,
+      jsonb_object_agg(d.dep_slug, d.dep_output) as deps_output,
+      count(*) as dep_count
+    from deps d
+    group by d.run_id, d.step_slug
+  ),
+  timeouts as (
+    select
+      task.message_id,
+      task.flow_slug,
+      coalesce(step.opt_timeout, flow.opt_timeout) + 2 as vt_delay
+    from tasks task
+    join pgflow.flows flow on flow.flow_slug = task.flow_slug
+    join pgflow.steps step on step.flow_slug = task.flow_slug and step.step_slug = task.step_slug
+  ),
+  -- Batch update visibility timeouts for all messages
+  set_vt_batch as (
+    select pgflow.set_vt_batch(
+      start_tasks.flow_slug,
+      array_agg(t.message_id order by t.message_id),
+      array_agg(t.vt_delay order by t.message_id)
+    )
+    from timeouts t
+  )
+  select
+    st.flow_slug,
+    st.run_id,
+    st.step_slug,
+    -- ==========================================
+    -- INPUT CONSTRUCTION LOGIC
+    -- ==========================================
+    -- This nested CASE statement determines how to construct the input
+    -- for each task based on the step type (map vs non-map).
+    --
+    -- The fundamental difference:
+    -- - Map steps: Receive RAW array elements (e.g., just 42 or "hello")
+    -- - Non-map steps: Receive structured objects with named keys
+    --                  (e.g., {"run": {...}, "dependency1": {...}})
+    -- ==========================================
+    CASE
+      -- -------------------- MAP STEPS --------------------
+      -- Map steps process arrays element-by-element.
+      -- Each task receives ONE element from the array at its task_index position.
+      WHEN step.step_type = 'map' THEN
+        -- Map steps get raw array elements without any wrapper object
+        CASE
+          -- ROOT MAP: Gets array from run input
+          -- Example: run input = [1, 2, 3]
+          --          task 0 gets: 1
+          --          task 1 gets: 2
+          --          task 2 gets: 3
+          WHEN step.deps_count = 0 THEN
+            -- Root map (deps_count = 0): no dependencies, reads from run input.
+            -- Extract the element at task_index from the run's input array.
+            -- Note: If run input is not an array, this will return NULL
+            -- and the flow will fail (validated in start_flow).
+            jsonb_array_element(r.input, st.task_index)
+
+          -- DEPENDENT MAP: Gets array from its single dependency
+          -- Example: dependency output = ["a", "b", "c"]
+          --          task 0 gets: "a"
+          --          task 1 gets: "b"
+          --          task 2 gets: "c"
+          ELSE
+            -- Has dependencies (should be exactly 1 for map steps).
+            -- Extract the element at task_index from the dependency's output array.
+            --
+            -- Why the subquery with jsonb_each?
+            -- - The dependency outputs a raw array: [1, 2, 3]
+            -- - deps_outputs aggregates it into: {"dep_name": [1, 2, 3]}
+            -- - We need to unwrap and get just the array value
+            -- - Map steps have exactly 1 dependency (enforced by add_step)
+            -- - So jsonb_each will return exactly 1 row
+            -- - We extract the 'value' which is the raw array [1, 2, 3]
+            -- - Then get the element at task_index from that array
+            (SELECT jsonb_array_element(value, st.task_index)
+            FROM jsonb_each(dep_out.deps_output)
+            LIMIT 1)
+        END
+
+      -- -------------------- NON-MAP STEPS --------------------
+      -- Regular (non-map) steps receive ALL inputs as a structured object.
+      -- This includes the original run input plus all dependency outputs.
+      ELSE
+        -- Non-map steps get structured input with named keys
+        -- Example output: {
+        --   "run": {"original": "input"},
+        --   "step1": {"output": "from_step1"},
+        --   "step2": {"output": "from_step2"}
+        -- }
+        --
+        -- Build object with 'run' key containing original input
+        jsonb_build_object('run', r.input) ||
+        -- Merge with deps_output which already has dependency outputs
+        -- deps_output format: {"dep1": output1, "dep2": output2, ...}
+        -- If no dependencies, defaults to empty object
+        coalesce(dep_out.deps_output, '{}'::jsonb)
+    END as input,
+    st.message_id as msg_id
+  from tasks st
+  join runs r on st.run_id = r.run_id
+  join pgflow.steps step on
+    step.flow_slug = st.flow_slug and
+    step.step_slug = st.step_slug
+  left join deps_outputs dep_out on
+    dep_out.run_id = st.run_id and
+    dep_out.step_slug = st.step_slug
+$$;
@@ -1,4 +1,4 @@
-h1:iTSgSZ3IR12NmZGRVc2bayttSSUytFA3+bximV7hb2U=
+h1:5eNDpXz1Ru5E6c9G7Glyo398mstJYLNNhjqcTjOaGxI=
 20250429164909_pgflow_initial.sql h1:5K7OqB/vj73TWJTQquUzn+i6H2wWduaW+Ir1an3QYmQ=
 20250517072017_pgflow_fix_poll_for_tasks_to_use_separate_statement_for_polling.sql h1:gnT6hYn43p5oIfr0HqoGlqX/4Si+uxMsCBtBa0/Z2Cg=
 20250609105135_pgflow_add_start_tasks_and_started_status.sql h1:9Yv/elMz9Nht9rCJOybx62eNrUyghsEMbMKeOJPUMVc=
@@ -13,3 +13,4 @@ h1:iTSgSZ3IR12NmZGRVc2bayttSSUytFA3+bximV7hb2U=
 20250912125339_pgflow_TEMP_task_spawning_optimization.sql h1:HTSShQweuTS1Sz5q/KLy5XW3J/6D/mA6jjVpCfvjBto=
 20250916093518_pgflow_temp_add_cascade_complete.sql h1:rQeqjEghqhGGUP+njrHFpPZxrxInjMHq5uSvYN1dTZc=
 20250916142327_pgflow_temp_make_initial_tasks_nullable.sql h1:YXBqH6MkLFm8+eadVLh/Pc3TwewCgmVyQZBFDCqYf+Y=
+20250916203905_pgflow_temp_handle_arrays_in_start_tasks.sql h1:hsesHyW890Z31WLJsXQIp9+LqnlOEE9tLIsLNCKRj+4=
@@ -0,0 +1,118 @@
+begin;
+select plan(7);
+select pgflow_tests.reset_db();
+
+-- Test: Dependent map tasks receive individual array elements from predecessor
+select diag('Testing dependent map tasks receive elements from predecessor output');
+
+-- SETUP: Create flow with single step -> map step
+select pgflow.create_flow('dep_map_flow');
+select pgflow.add_step(
+  flow_slug => 'dep_map_flow',
+  step_slug => 'producer_step',
+  deps_slugs => '{}',
+  step_type => 'single'
+);
+select pgflow.add_step(
+  flow_slug => 'dep_map_flow',
+  step_slug => 'map_consumer',
+  deps_slugs => ARRAY['producer_step'],
+  step_type => 'map'
+);
+
+-- Start flow with some input
+select run_id from pgflow.start_flow('dep_map_flow', '{"initial": "data"}'::jsonb) \gset
+
+-- Verify producer step was created and has a task
+select is(
+  (select count(*) from pgflow.step_tasks
+   where run_id = :'run_id' and step_slug = 'producer_step'),
+  1::bigint,
+  'Producer step should have 1 task'
+);
+
+-- Ensure worker exists
+select pgflow_tests.ensure_worker('dep_map_flow');
+
+-- Start and complete the producer task with array output
+with producer_task as (
+  select * from pgflow_tests.read_and_start('dep_map_flow', 1, 1) limit 1
+)
+select pgflow.complete_task(
+  (select run_id from producer_task),
+  'producer_step',
+  0,
+  '[10, 20, 30, 40]'::jsonb  -- Array output from producer
+)
+from producer_task;
+
+-- Verify producer step is completed
+select is(
+  (select status from pgflow.step_states
+   where run_id = :'run_id' and step_slug = 'producer_step'),
+  'completed',
+  'Producer step should be completed'
+);
+
+-- Verify map_consumer initial_tasks was set to 4
+select is(
+  (select initial_tasks from pgflow.step_states
+   where run_id = :'run_id' and step_slug = 'map_consumer'),
+  4,
+  'Map consumer should have initial_tasks = 4 (array length)'
+);
+
+-- Verify 4 tasks were created for map_consumer
+select is(
+  (select count(*) from pgflow.step_tasks
+   where run_id = :'run_id' and step_slug = 'map_consumer'),
+  4::bigint,
+  'Should create 4 tasks for map step'
+);
+
+-- Get message IDs for each map task
+select message_id as msg_id_0 from pgflow.step_tasks
+where run_id = :'run_id' and step_slug = 'map_consumer' and task_index = 0 \gset
+
+select message_id as msg_id_1 from pgflow.step_tasks
+where run_id = :'run_id' and step_slug = 'map_consumer' and task_index = 1 \gset
+
+select message_id as msg_id_2 from pgflow.step_tasks
+where run_id = :'run_id' and step_slug = 'map_consumer' and task_index = 2 \gset
+
+select message_id as msg_id_3 from pgflow.step_tasks
+where run_id = :'run_id' and step_slug = 'map_consumer' and task_index = 3 \gset
+
+-- TEST: Each map task receives its specific element from producer output
+select is(
+  (select input from pgflow.start_tasks(
+    'dep_map_flow',
+    ARRAY[:'msg_id_0'::bigint],
+    '11111111-1111-1111-1111-111111111111'::uuid
+  )),
+  '10'::jsonb,
+  'Task 0 should receive first element (10) from producer_step'
+);
+
+select is(
+  (select input from pgflow.start_tasks(
+    'dep_map_flow',
+    ARRAY[:'msg_id_1'::bigint],
+    '11111111-1111-1111-1111-111111111111'::uuid
+  )),
+  '20'::jsonb,
+  'Task 1 should receive second element (20) from producer_step'
+);
+
+select is(
+  (select input from pgflow.start_tasks(
+    'dep_map_flow',
+    ARRAY[:'msg_id_3'::bigint],
+    '11111111-1111-1111-1111-111111111111'::uuid
+  )),
+  '40'::jsonb,
+  'Task 3 should receive fourth element (40) from producer_step'
+);
+
+select finish();
+rollback;