diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1c3f40846..28c3c8356 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,7 +7,7 @@ on: env: REGISTRY: ghcr.io - IMAGE_NAME: nrel/api-umbrella + IMAGE_NAME: natlabrockies/api-umbrella DOCKER_BUILDKIT: 1 TESTS_GLOB: "test/**/test_*.rb" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6d22f3f01..516d5f619 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,7 +7,7 @@ on: env: REGISTRY: ghcr.io - IMAGE_NAME: nrel/api-umbrella + IMAGE_NAME: natlabrockies/api-umbrella DOCKER_BUILDKIT: 1 jobs: diff --git a/Dockerfile-opensearch b/Dockerfile-opensearch index 48cd0b045..5a31c7dcb 100644 --- a/Dockerfile-opensearch +++ b/Dockerfile-opensearch @@ -1,2 +1,2 @@ -FROM public.ecr.aws/opensearchproject/opensearch:2.17.1 +FROM public.ecr.aws/opensearchproject/opensearch:3.3.2 RUN /usr/share/opensearch/bin/opensearch-plugin install --batch mapper-murmur3 diff --git a/config/schema.cue b/config/schema.cue index 7c9ba298b..c8adb25bd 100644 --- a/config/schema.cue +++ b/config/schema.cue @@ -380,6 +380,8 @@ import "path" analytics_v0_summary_start_time: string | *"2013-07-01T00:00:00.000Z" analytics_v0_summary_end_time?: string analytics_v0_summary_filter?: string + analytics_v0_summary_db_timeout: uint | *900 // 15 minutes + analytics_v0_summary_analytics_timeout: uint | *2400 // 40 minutes max_body_size: string | *"1m" allowed_signup_embed_urls_regex?: string default_host?: string diff --git a/config/test.yml b/config/test.yml index 8d0a907f7..50d3212c2 100644 --- a/config/test.yml +++ b/config/test.yml @@ -83,7 +83,7 @@ web: user_name: api_umbrella password: dev_password contact_form_email: default-test-contact-email@example.com - analytics_v0_summary_start_time: "2013-07-01T00:00:00.000-06:00" + analytics_v0_summary_start_time: "2013-06-01T00:00:00.000-06:00" analytics_v0_summary_end_time: "2013-08-31T23:59:59.999-06:00" router: trusted_proxies: diff --git a/db/schema.sql b/db/schema.sql index 2e530de4d..77b21a314 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -67,7 +67,7 @@ CREATE FUNCTION api_umbrella.analytics_cache_extract_unique_user_ids() RETURNS t AS $$ BEGIN IF (jsonb_typeof(NEW.data->'aggregations'->'unique_user_ids'->'buckets') = 'array') THEN - NEW.unique_user_ids := (SELECT array_agg(DISTINCT bucket->>'key')::uuid[] FROM jsonb_array_elements(NEW.data->'aggregations'->'unique_user_ids'->'buckets') AS bucket); + NEW.unique_user_ids := (SELECT array_agg(DISTINCT bucket->'key'->>'user_id')::uuid[] FROM jsonb_array_elements(NEW.data->'aggregations'->'unique_user_ids'->'buckets') AS bucket); END IF; RETURN NEW; @@ -765,6 +765,9 @@ CREATE TABLE api_umbrella.analytics_cache ( created_at timestamp with time zone DEFAULT transaction_timestamp() NOT NULL, updated_at timestamp with time zone DEFAULT transaction_timestamp() NOT NULL, unique_user_ids uuid[], + data_date character varying GENERATED ALWAYS AS (((((((data -> 'aggregations'::text) -> 'hits_over_time'::text) -> 'buckets'::text) -> 0) ->> 'key_as_string'::text))::character varying) STORED, + hit_count bigint GENERATED ALWAYS AS (((((((data -> 'aggregations'::text) -> 'hits_over_time'::text) -> 'buckets'::text) -> 0) ->> 'doc_count'::text))::bigint) STORED, + response_time_average bigint GENERATED ALWAYS AS (round(((((data -> 'aggregations'::text) -> 'response_time_average'::text) ->> 'value'::text))::numeric)) STORED, CONSTRAINT analytics_cache_enforce_single_date_bucket CHECK ((NOT (jsonb_array_length((((data -> 'aggregations'::text) -> 'hits_over_time'::text) -> 'buckets'::text)) > 1))) ); @@ -2801,6 +2804,7 @@ ALTER TABLE ONLY api_umbrella.rate_limits -- PostgreSQL database dump complete -- + INSERT INTO api_umbrella.lapis_migrations (name) VALUES ('1498350289'); INSERT INTO api_umbrella.lapis_migrations (name) VALUES ('1554823736'); INSERT INTO api_umbrella.lapis_migrations (name) VALUES ('1560722058'); @@ -2820,3 +2824,5 @@ INSERT INTO api_umbrella.lapis_migrations (name) VALUES ('1701483732'); INSERT INTO api_umbrella.lapis_migrations (name) VALUES ('1721347955'); INSERT INTO api_umbrella.lapis_migrations (name) VALUES ('1738353016'); INSERT INTO api_umbrella.lapis_migrations (name) VALUES ('1753472899'); +INSERT INTO api_umbrella.lapis_migrations (name) VALUES ('1769633747'); +INSERT INTO api_umbrella.lapis_migrations (name) VALUES ('1769732670'); diff --git a/docker-compose.yml b/docker-compose.yml index 7bc92ad9d..975b2fbe9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,7 +5,7 @@ services: dockerfile: Dockerfile target: test cache_from: - - ${DOCKER_IMAGE_CACHE_FROM:-ghcr.io/nrel/api-umbrella:dev-env-main} + - ${DOCKER_IMAGE_CACHE_FROM:-ghcr.io/natlabrockies/api-umbrella:dev-env-main} entrypoint: /app/docker/dev/docker-entrypoint command: /app/docker/dev/docker-start volumes: diff --git a/src/api-umbrella/cli/migrate.lua b/src/api-umbrella/cli/migrate.lua index 718084e65..fdb0ea1ec 100644 --- a/src/api-umbrella/cli/migrate.lua +++ b/src/api-umbrella/cli/migrate.lua @@ -62,7 +62,7 @@ return function() local clean_lines = {} local removing_comments = true for _, line in ipairs(lines) do - if not removing_comments or (line ~= "" and not startswith(line, "--")) then + if (not removing_comments or (line ~= "" and not startswith(line, "--"))) and not startswith(line, "\\restrict") and not startswith(line, "\\unrestrict") then if startswith(line, "COMMENT ON EXTENSION") then line = "-- " .. line end diff --git a/src/api-umbrella/version.txt b/src/api-umbrella/version.txt index dc1e644a1..ec70f7556 100644 --- a/src/api-umbrella/version.txt +++ b/src/api-umbrella/version.txt @@ -1 +1 @@ -1.6.0 +1.6.6 diff --git a/src/api-umbrella/web-app/actions/v0/analytics.lua b/src/api-umbrella/web-app/actions/v0/analytics.lua index d954f0d3b..ec365bf8a 100644 --- a/src/api-umbrella/web-app/actions/v0/analytics.lua +++ b/src/api-umbrella/web-app/actions/v0/analytics.lua @@ -15,10 +15,17 @@ local stable_object_hash = require "api-umbrella.utils.stable_object_hash" local t = require("api-umbrella.web-app.utils.gettext").gettext local time = require "api-umbrella.utils.time" +local db_statement_timeout_ms = config["web"]["analytics_v0_summary_db_timeout"] * 1000 + local _M = {} -local function generate_organization_summary(start_time, end_time, recent_start_time, filters) - local cache_id = "analytics_summary:organization:" .. start_time .. ":" .. end_time .. ":" .. recent_start_time .. ":" .. stable_object_hash(filters) +local function generate_organization_summary(organization_name, start_time, end_time, recent_start_time, filters) + local cache_id = "analytics_summary:organization:" .. organization_name .. ":" .. start_time .. ":" .. end_time .. ":" .. recent_start_time .. ":" .. stable_object_hash({ + filters = filters, + timezone = config["analytics"]["timezone"], + max_buckets = config["opensearch"]["max_buckets"], + analytics_v0_summary_filter = config["web"]["analytics_v0_summary_filter"], + }) local cache = Cache:find(cache_id) if cache then ngx.log(ngx.NOTICE, "Using cached analytics response for " .. cache_id) @@ -38,10 +45,50 @@ local function generate_organization_summary(start_time, end_time, recent_start_ if config["web"]["analytics_v0_summary_filter"] then search:set_search_query_string(config["web"]["analytics_v0_summary_filter"]) end - search:set_timeout(20 * 60) -- 20 minutes + search:set_timeout(config["web"]["analytics_v0_summary_analytics_timeout"]) search:set_permission_scope(filters) local aggregate_sql = [[ + WITH interval_rows AS ( + SELECT + substring(data_date from 1 for :date_key_length) AS interval_date, + hit_count, + unique_user_ids, + response_time_average + FROM analytics_cache + WHERE id IN :ids + ), + interval_unique_user_ids AS ( + SELECT + interval_date, + array_agg(DISTINCT user_ids.user_id) FILTER (WHERE user_ids.user_id IS NOT NULL) AS unique_user_ids + FROM interval_rows + CROSS JOIN LATERAL unnest(unique_user_ids) AS user_ids(user_id) + GROUP BY interval_date + ), + interval_counts AS ( + SELECT + interval_date, + SUM(hit_count) AS hit_count, + SUM(response_time_average) AS response_time_average + FROM interval_rows + GROUP BY interval_date + ), + interval_totals AS ( + SELECT + interval_counts.interval_date, + interval_counts.hit_count, + interval_unique_user_ids.unique_user_ids, + interval_counts.response_time_average + FROM interval_counts + NATURAL LEFT JOIN interval_unique_user_ids + ORDER BY interval_date + ), + all_unique_users AS ( + SELECT COUNT(DISTINCT user_ids.user_id) FILTER (WHERE user_ids.user_id IS NOT NULL) AS total_unique_users + FROM interval_totals + LEFT JOIN LATERAL unnest(interval_totals.unique_user_ids) AS user_ids(user_id) ON true + ) SELECT jsonb_build_object( 'hits', jsonb_build_object( :interval_name, jsonb_agg(jsonb_build_array(interval_totals.interval_date, COALESCE(interval_totals.hit_count, 0))), @@ -49,39 +96,14 @@ local function generate_organization_summary(start_time, end_time, recent_start_ ), 'active_api_keys', jsonb_build_object( :interval_name, jsonb_agg(jsonb_build_array(interval_totals.interval_date, COALESCE(array_length(interval_totals.unique_user_ids, 1), 0))), - 'total', ( - SELECT COUNT(DISTINCT user_ids.id) - FROM unnest(array_accum(interval_totals.unique_user_ids)) AS user_ids(id) - ) + 'total', (SELECT total_unique_users FROM all_unique_users) ), 'average_response_times', jsonb_build_object( :interval_name, jsonb_agg(jsonb_build_array(interval_totals.interval_date, interval_totals.response_time_average)), 'average', ROUND(SUM(CASE WHEN interval_totals.response_time_average IS NOT NULL AND interval_totals.hit_count IS NOT NULL THEN interval_totals.response_time_average * interval_totals.hit_count END) / SUM(CASE WHEN interval_totals.response_time_average IS NOT NULL AND interval_totals.hit_count IS NOT NULL THEN interval_totals.hit_count END)) ) ) AS response - FROM ( - SELECT - interval_date, - hit_count, - response_time_average, - ( - SELECT array_agg(DISTINCT user_id) - FROM unnest(interval_agg.user_ids) AS user_id - LEFT JOIN api_users ON user_id = api_users.id - WHERE user_id IS NOT NULL AND api_users.disabled_at IS NULL - ) AS unique_user_ids - FROM ( - SELECT - substring(data->'aggregations'->'hits_over_time'->'buckets'->0->>'key_as_string' from 1 for :date_key_length) AS interval_date, - SUM((data->'aggregations'->'hits_over_time'->'buckets'->0->>'doc_count')::bigint) AS hit_count, - array_accum(unique_user_ids) AS user_ids, - SUM(ROUND((data->'aggregations'->'response_time_average'->>'value')::numeric)) AS response_time_average - FROM analytics_cache - WHERE id IN :ids - GROUP BY interval_date - ORDER BY interval_date - ) AS interval_agg - ) AS interval_totals + FROM interval_totals ]] -- Expire the monthly data in 3 months. While the historical data shouldn't @@ -97,7 +119,7 @@ local function generate_organization_summary(start_time, end_time, recent_start_ date_key_length = 7, }, { fatal = true, - statement_timeout = 5 * 60 * 1000, -- 5 minutes + statement_timeout = db_statement_timeout_ms, })[1]["response"] search:set_start_time(recent_start_time) @@ -112,13 +134,26 @@ local function generate_organization_summary(start_time, end_time, recent_start_ date_key_length = 10, }, { fatal = true, - statement_timeout = 5 * 60 * 1000, -- 5 minutes + statement_timeout = db_statement_timeout_ms, })[1]["response"] response["hits"]["recent"] = recent_response["hits"] response["active_api_keys"]["recent"] = recent_response["active_api_keys"] response["average_response_times"]["recent"] = recent_response["average_response_times"] + -- Only cache the data if it includes the expected latest month of data, and + -- also includes all months/days expected. This prevents returning and + -- caching incomplete data due to the underlying analytics queries failing + -- for certain time periods and forces the data to wait until all of the + -- underlying data is cached before returning the overall summary data. + local last_month = response["hits"]["monthly"][#response["hits"]["monthly"]] + local expected_last_month = string.sub(end_time, 1, 7) + local last_day = response["hits"]["recent"]["daily"][#response["hits"]["recent"]["daily"]] + local expected_last_day = string.sub(end_time, 1, 10) + if last_month[1] ~= expected_last_month or last_day[1] ~= expected_last_day or #analytics_cache_ids ~= #response["hits"]["monthly"] or #recent_analytics_cache_ids ~= #response["hits"]["recent"]["daily"] then + return nil, "incomplete data" + end + local response_json = json_encode(response) expires_at = ngx.now() + 60 * 60 * 24 * 2 -- 2 days Cache:upsert(cache_id, response_json, expires_at) @@ -127,6 +162,8 @@ local function generate_organization_summary(start_time, end_time, recent_start_ end local function generate_production_apis_summary(start_time, end_time, recent_start_time) + local any_err = false + local data = { organizations = {}, } @@ -139,7 +176,7 @@ local function generate_production_apis_summary(start_time, end_time, recent_sta WHERE api_backends.status_description = 'Production' ]], nil, { fatal = true, - statement_timeout = 5 * 60 * 1000, -- 5 minutes + statement_timeout = db_statement_timeout_ms, }) data["organization_count"] = int64_to_json_number(counts[1]["organization_count"]) data["api_backend_count"] = int64_to_json_number(counts[1]["api_backend_count"]) @@ -162,7 +199,7 @@ local function generate_production_apis_summary(start_time, end_time, recent_sta ORDER BY api_backends.organization_name ]], nil, { fatal = true, - statement_timeout = 5 * 60 * 1000, -- 5 minutes + statement_timeout = db_statement_timeout_ms, }) for _, organization in ipairs(organizations) do local filters = { @@ -190,18 +227,32 @@ local function generate_production_apis_summary(start_time, end_time, recent_sta end ngx.log(ngx.NOTICE, 'Fetching analytics for organization "' .. organization["organization_name"] .. '"') - local organization_data = generate_organization_summary(start_time, end_time, recent_start_time, filters) - organization_data["name"] = organization["organization_name"] - organization_data["api_backend_count"] = int64_to_json_number(organization["api_backend_count"]) - organization_data["api_backend_url_match_count"] = int64_to_json_number(organization["api_backend_url_match_count"]) - table.insert(data["organizations"], organization_data) + local organization_data, organization_data_err = generate_organization_summary(organization["organization_name"], start_time, end_time, recent_start_time, filters) + if organization_data_err then + ngx.log(ngx.ERR, 'Analytics for organization "' .. organization["organization_name"] .. '" failed: ', organization_data_err) + any_err = true + else + organization_data["name"] = organization["organization_name"] + organization_data["api_backend_count"] = int64_to_json_number(organization["api_backend_count"]) + organization_data["api_backend_url_match_count"] = int64_to_json_number(organization["api_backend_url_match_count"]) + table.insert(data["organizations"], organization_data) + end end ngx.log(ngx.NOTICE, "Fetching analytics for all organizations") - local all_data = generate_organization_summary(start_time, end_time, recent_start_time, all_filters) - data["all"] = all_data + local all_data, all_data_err = generate_organization_summary("all", start_time, end_time, recent_start_time, all_filters) + if all_data_err then + ngx.log(ngx.ERR, "Analytics for all organization failed: ", all_data_err) + any_err = true + else + data["all"] = all_data + end - return data + if any_err then + return nil, "incomplete data" + else + return data + end end local function generate_summary() @@ -236,21 +287,24 @@ local function generate_summary() date_tz:set(icu_date.fields.MILLISECOND, 0) local recent_start_time = date_tz:format(format_iso8601) - local response = { - production_apis = generate_production_apis_summary(start_time, end_time, recent_start_time), - start_time = time.timestamp_ms_to_iso8601(start_time_ms), - end_time = time.timestamp_ms_to_iso8601(end_time_ms), - timezone = date_tz:get_time_zone_id(), - } - - response["cached_at"] = time.timestamp_to_iso8601(ngx.now()) + local production_apis, production_apis_err = generate_production_apis_summary(start_time, end_time, recent_start_time) + if production_apis_err then + ngx.log(ngx.ERR, "Production APIs summary error: ", production_apis_err) + else + local response = { + production_apis = production_apis, + start_time = time.timestamp_ms_to_iso8601(start_time_ms), + end_time = time.timestamp_ms_to_iso8601(end_time_ms), + timezone = date_tz:get_time_zone_id(), + } - local cache_id = "analytics_summary" - local response_json = json_encode(response) - local expires_at = ngx.now() + 60 * 60 * 24 * 2 -- 2 days - Cache:upsert(cache_id, response_json, expires_at) + response["cached_at"] = time.timestamp_to_iso8601(ngx.now()) - return response_json + local cache_id = "analytics_summary" + local response_json = json_encode(response) + local expires_at = nil -- Never expire + Cache:upsert(cache_id, response_json, expires_at) + end end function _M.summary(self) diff --git a/src/api-umbrella/web-app/hooks/init_preload_modules.lua b/src/api-umbrella/web-app/hooks/init_preload_modules.lua index 1d4fdebbe..207cbea47 100644 --- a/src/api-umbrella/web-app/hooks/init_preload_modules.lua +++ b/src/api-umbrella/web-app/hooks/init_preload_modules.lua @@ -3,6 +3,7 @@ require "api-umbrella.utils.active_config_store.build_web_app_active_config" require "api-umbrella.utils.active_config_store.fetch_published_config_for_setting_active_config" require "api-umbrella.utils.active_config_store.polling_set_active_config" require "api-umbrella.utils.api_key_prefixer" +require "api-umbrella.utils.append_array" require "api-umbrella.utils.array_includes" require "api-umbrella.utils.array_last" require "api-umbrella.utils.build_url" diff --git a/src/api-umbrella/web-app/models/analytics_search_opensearch.lua b/src/api-umbrella/web-app/models/analytics_search_opensearch.lua index 34277073c..c46b6dcd8 100644 --- a/src/api-umbrella/web-app/models/analytics_search_opensearch.lua +++ b/src/api-umbrella/web-app/models/analytics_search_opensearch.lua @@ -1,5 +1,6 @@ local AnalyticsCache = require "api-umbrella.web-app.models.analytics_cache" local add_error = require("api-umbrella.web-app.utils.model_ext").add_error +local append_array = require "api-umbrella.utils.append_array" local cjson = require "cjson.safe" local config = require("api-umbrella.utils.load_config")() local deepcopy = require("pl.tablex").deepcopy @@ -401,11 +402,18 @@ end function _M:aggregate_by_unique_user_ids() self.body["aggregations"]["unique_user_ids"] = { - terms = { - field = "user_id", + composite = { size = config["opensearch"]["max_buckets"] - 5, - shard_size = config["opensearch"]["max_buckets"] * 4, - }, + sources = { + { + user_id = { + terms = { + field = "user_id", + }, + }, + }, + }, + } } end @@ -850,12 +858,58 @@ local function cache_interval_results_process_batch(self, cache_ids, batch) local expires_at = batch_elem["expires_at"] if not exist["cache_exists"] then - -- Perform the real OpenSearch query for uncached queries and cache - -- the result. - local results = self:fetch_results({ - override_header = id_data["header"], - override_body = id_data["body"], - }) + -- Perform the real OpenSearch query for uncached queries, accounting for + -- pagination on the `unique_user_ids` composite query. + local results + local page_body = deepcopy(id_data["body"]) + while true do + local page_results = self:fetch_results({ + override_header = id_data["header"], + override_body = page_body, + }) + if not page_results then + break + end + + -- Set the first page of results. + if not results then + results = page_results + end + + -- If aggregating on the `composite` bucket for unique user IDs, make + -- more requests to fetch all of the user IDs without running into the + -- `search.max_buckets` limit on the server by paginating through the + -- aggregation results. + if results["aggregations"] and results["aggregations"]["unique_user_ids"] and results["aggregations"]["unique_user_ids"]["buckets"] and page_body["aggregations"] and page_body["aggregations"]["unique_user_ids"] and page_body["aggregations"]["unique_user_ids"]["composite"] then + -- If this was a paginated result, then merge the subsequent pages of + -- user IDs onto the original result. This allows us to store a + -- single cached response that looks as though all of the user IDs + -- were fetched at once (instead of having all of these multiple + -- requests). + if page_body["aggregations"]["unique_user_ids"]["composite"]["after"] and page_results["aggregations"] and page_results["aggregations"]["unique_user_ids"] and page_results["aggregations"]["unique_user_ids"]["buckets"] then + append_array(results["aggregations"]["unique_user_ids"]["buckets"], page_results["aggregations"]["unique_user_ids"]["buckets"]) + end + + -- If there are still more paginated user IDs to fetch, then make + -- another request to fetch the remaining until none are left. + if page_results["aggregations"]["unique_user_ids"]["after_key"] then + -- When fetching the other pages of results, remove any other + -- aggregations (like hits histogram) and just perform the one + -- paginated `unique_user_ids` composite query, with the new + -- `after` field set. + page_body["aggregations"] = { + unique_user_ids = page_body["aggregations"]["unique_user_ids"], + } + page_body["aggregations"]["unique_user_ids"]["composite"]["after"] = page_results["aggregations"]["unique_user_ids"]["after_key"] + else + break + end + else + break + end + end + + -- Cache the results. if results then table.insert(cache_ids, exist["id"]) AnalyticsCache:upsert(id_data, results, expires_at) diff --git a/src/migrations.lua b/src/migrations.lua index 19e3630d4..2d755fc78 100644 --- a/src/migrations.lua +++ b/src/migrations.lua @@ -1551,4 +1551,39 @@ return { db.query(grants_sql) db.query("COMMIT") end, + + [1769633747] = function() + db.query("BEGIN") + + -- Add an extra column to store the unique user IDs in a fashion more + -- optimized for querying. But since this strategy only works if each row + -- represents a single date bucket, add extra constraints to ensure we + -- don't accidentally mess up this assumption in the future. + db.query("ALTER TABLE analytics_cache ADD COLUMN data_date varchar GENERATED ALWAYS AS ((data->'aggregations'->'hits_over_time'->'buckets'->0->>'key_as_string')::varchar) STORED") + db.query("ALTER TABLE analytics_cache ADD COLUMN hit_count bigint GENERATED ALWAYS AS ((data->'aggregations'->'hits_over_time'->'buckets'->0->>'doc_count')::bigint) STORED") + db.query("ALTER TABLE analytics_cache ADD COLUMN response_time_average bigint GENERATED ALWAYS AS (round((data->'aggregations'->'response_time_average'->>'value')::numeric)) STORED") + + db.query(grants_sql) + db.query("COMMIT") + end, + + [1769732670] = function() + db.query("BEGIN") + + db.query([[ + CREATE OR REPLACE FUNCTION analytics_cache_extract_unique_user_ids() + RETURNS TRIGGER AS $$ + BEGIN + IF (jsonb_typeof(NEW.data->'aggregations'->'unique_user_ids'->'buckets') = 'array') THEN + NEW.unique_user_ids := (SELECT array_agg(DISTINCT bucket->'key'->>'user_id')::uuid[] FROM jsonb_array_elements(NEW.data->'aggregations'->'unique_user_ids'->'buckets') AS bucket); + END IF; + + RETURN NEW; + END; + $$ LANGUAGE plpgsql; + ]]) + + db.query(grants_sql) + db.query("COMMIT") + end, } diff --git a/test/apis/v0/test_analytics.rb b/test/apis/v0/test_analytics.rb index 4e2cbaa8c..ef1dd8f35 100644 --- a/test/apis/v0/test_analytics.rb +++ b/test/apis/v0/test_analytics.rb @@ -7,6 +7,14 @@ def setup super setup_server LogItem.clean_indices! + + Time.use_zone($config["analytics"]["timezone"]) do + @start_time = Time.zone.parse($config["web"]["analytics_v0_summary_start_time"]) + @end_time = Time.zone.parse($config["web"]["analytics_v0_summary_end_time"]) + end + + FactoryBot.create_list(:log_item, 1, :request_at => @start_time) + LogItem.refresh_indices! end def test_forbids_api_key_without_any_role @@ -45,23 +53,22 @@ def test_allows_api_key_without_role_if_configured end def test_expected_response + LogItem.clean_indices! + backend1 = FactoryBot.create(:api_backend, :frontend_host => "localhost1") backend2 = FactoryBot.create(:api_backend, :frontend_host => "localhost2") backend3 = FactoryBot.create(:api_backend, :organization_name => "Another Org", :frontend_host => "localhost3") backend4 = FactoryBot.create(:api_backend, :status_description => nil, :frontend_host => "localhost4") + FactoryBot.create(:api_backend, :organization_name => "Org without data", :frontend_host => "localhost5") - start_time = nil - end_time = nil - Time.use_zone($config["analytics"]["timezone"]) do - start_time = Time.zone.parse($config["web"]["analytics_v0_summary_start_time"]) - end_time = Time.zone.parse($config["web"]["analytics_v0_summary_end_time"]) - end - FactoryBot.create_list(:api_user, 3, :created_at => start_time) - FactoryBot.create_list(:log_item, 1, :request_at => start_time, :response_time => 100, :request_host => backend1.frontend_host, :request_path => backend1.url_matches[0].frontend_prefix) - FactoryBot.create_list(:log_item, 1, :request_at => start_time, :response_time => 100, :request_host => backend2.frontend_host, :request_path => backend2.url_matches[0].frontend_prefix) - FactoryBot.create_list(:log_item, 1, :request_at => start_time, :response_time => 100, :request_host => backend3.frontend_host, :request_path => backend3.url_matches[0].frontend_prefix) - FactoryBot.create_list(:log_item, 1, :request_at => start_time, :response_time => 100, :request_host => backend4.frontend_host, :request_path => backend4.url_matches[0].frontend_prefix) - FactoryBot.create_list(:log_item, 1, :request_at => end_time, :response_time => 200, :request_host => backend1.frontend_host, :request_path => backend1.url_matches[0].frontend_prefix) + users = FactoryBot.create_list(:api_user, 3, :created_at => @start_time) + FactoryBot.create_list(:log_item, 1, :request_at => @start_time, :response_time => 100, :request_host => backend1.frontend_host, :request_path => backend1.url_matches[0].frontend_prefix) + FactoryBot.create_list(:log_item, 1, :request_at => @start_time, :response_time => 100, :request_host => backend2.frontend_host, :request_path => backend2.url_matches[0].frontend_prefix) + FactoryBot.create_list(:log_item, 1, :request_at => @start_time, :response_time => 100, :request_host => backend3.frontend_host, :request_path => backend3.url_matches[0].frontend_prefix) + FactoryBot.create_list(:log_item, 1, :request_at => @start_time, :response_time => 100, :request_host => backend4.frontend_host, :request_path => backend4.url_matches[0].frontend_prefix) + FactoryBot.create_list(:log_item, 1, :request_at => @end_time, :response_time => 200, :request_host => backend1.frontend_host, :request_path => backend1.url_matches[0].frontend_prefix) + FactoryBot.create_list(:log_item, 1, :request_at => @end_time, :response_time => 100, :request_host => backend3.frontend_host, :request_path => backend3.url_matches[0].frontend_prefix, :user_id => users.first.id) + FactoryBot.create_list(:log_item, 2, :request_at => @end_time, :response_time => 1000, :request_host => backend3.frontend_host, :request_path => backend3.url_matches[0].frontend_prefix, :user_id => users.last.id) LogItem.refresh_indices! response = make_request @@ -80,7 +87,7 @@ def test_expected_response assert_match_iso8601(data.fetch("end_time")) assert_kind_of(Hash, data.fetch("production_apis")) assert_match_iso8601(data.fetch("start_time")) - assert_equal("2013-07-01T06:00:00Z", data.fetch("start_time")) + assert_equal("2013-06-01T06:00:00Z", data.fetch("start_time")) assert_equal("America/Denver", data.fetch("timezone")) assert_equal({ @@ -88,11 +95,12 @@ def test_expected_response { "active_api_keys" => { "monthly" => [ - ["2013-07", 1], - ["2013-08", 0], + ["2013-06", 1], + ["2013-07", 0], + ["2013-08", 2], ], "recent" => { - "total" => 0, + "total" => 2, "daily" => [ ["2013-08-02", 0], ["2013-08-03", 0], @@ -123,19 +131,20 @@ def test_expected_response ["2013-08-28", 0], ["2013-08-29", 0], ["2013-08-30", 0], - ["2013-08-31", 0], + ["2013-08-31", 2], ], }, - "total" => 1, + "total" => 3, }, "average_response_times" => { - "average" => 100, + "average" => 550, "monthly" => [ - ["2013-07", 100], - ["2013-08", nil], + ["2013-06", 100], + ["2013-07", nil], + ["2013-08", 700], ], "recent" => { - "average" => nil, + "average" => 700, "daily" => [ ["2013-08-02", nil], ["2013-08-03", nil], @@ -166,7 +175,7 @@ def test_expected_response ["2013-08-28", nil], ["2013-08-29", nil], ["2013-08-30", nil], - ["2013-08-31", nil], + ["2013-08-31", 700], ], }, }, @@ -175,11 +184,12 @@ def test_expected_response "api_backend_count" => 1, "hits" => { "monthly" => [ - ["2013-07", 1], - ["2013-08", 0], + ["2013-06", 1], + ["2013-07", 0], + ["2013-08", 3], ], "recent" => { - "total" => 0, + "total" => 3, "daily" => [ ["2013-08-02", 0], ["2013-08-03", 0], @@ -210,16 +220,17 @@ def test_expected_response ["2013-08-28", 0], ["2013-08-29", 0], ["2013-08-30", 0], - ["2013-08-31", 0], + ["2013-08-31", 3], ], }, - "total" => 1, + "total" => 4, }, }, { "active_api_keys" => { "monthly" => [ - ["2013-07", 1], + ["2013-06", 1], + ["2013-07", 0], ["2013-08", 1], ], "recent" => { @@ -262,7 +273,8 @@ def test_expected_response "average_response_times" => { "average" => 133, "monthly" => [ - ["2013-07", 100], + ["2013-06", 100], + ["2013-07", nil], ["2013-08", 200], ], "recent" => { @@ -306,7 +318,8 @@ def test_expected_response "api_backend_count" => 2, "hits" => { "monthly" => [ - ["2013-07", 2], + ["2013-06", 2], + ["2013-07", 0], ["2013-08", 1], ], "recent" => { @@ -347,15 +360,150 @@ def test_expected_response "total" => 3, }, }, + { + "active_api_keys" => { + "monthly" => [ + ["2013-06", 0], + ["2013-07", 0], + ["2013-08", 0], + ], + "recent" => { + "total" => 0, + "daily" => [ + ["2013-08-02", 0], + ["2013-08-03", 0], + ["2013-08-04", 0], + ["2013-08-05", 0], + ["2013-08-06", 0], + ["2013-08-07", 0], + ["2013-08-08", 0], + ["2013-08-09", 0], + ["2013-08-10", 0], + ["2013-08-11", 0], + ["2013-08-12", 0], + ["2013-08-13", 0], + ["2013-08-14", 0], + ["2013-08-15", 0], + ["2013-08-16", 0], + ["2013-08-17", 0], + ["2013-08-18", 0], + ["2013-08-19", 0], + ["2013-08-20", 0], + ["2013-08-21", 0], + ["2013-08-22", 0], + ["2013-08-23", 0], + ["2013-08-24", 0], + ["2013-08-25", 0], + ["2013-08-26", 0], + ["2013-08-27", 0], + ["2013-08-28", 0], + ["2013-08-29", 0], + ["2013-08-30", 0], + ["2013-08-31", 0], + ], + }, + "total" => 0, + }, + "average_response_times" => { + "average" => nil, + "monthly" => [ + ["2013-06", nil], + ["2013-07", nil], + ["2013-08", nil], + ], + "recent" => { + "average" => nil, + "daily" => [ + ["2013-08-02", nil], + ["2013-08-03", nil], + ["2013-08-04", nil], + ["2013-08-05", nil], + ["2013-08-06", nil], + ["2013-08-07", nil], + ["2013-08-08", nil], + ["2013-08-09", nil], + ["2013-08-10", nil], + ["2013-08-11", nil], + ["2013-08-12", nil], + ["2013-08-13", nil], + ["2013-08-14", nil], + ["2013-08-15", nil], + ["2013-08-16", nil], + ["2013-08-17", nil], + ["2013-08-18", nil], + ["2013-08-19", nil], + ["2013-08-20", nil], + ["2013-08-21", nil], + ["2013-08-22", nil], + ["2013-08-23", nil], + ["2013-08-24", nil], + ["2013-08-25", nil], + ["2013-08-26", nil], + ["2013-08-27", nil], + ["2013-08-28", nil], + ["2013-08-29", nil], + ["2013-08-30", nil], + ["2013-08-31", nil], + ], + }, + }, + "api_backend_url_match_count" => 1, + "name" => "Org without data", + "api_backend_count" => 1, + "hits" => { + "monthly" => [ + ["2013-06", 0], + ["2013-07", 0], + ["2013-08", 0], + ], + "recent" => { + "total" => 0, + "daily" => [ + ["2013-08-02", 0], + ["2013-08-03", 0], + ["2013-08-04", 0], + ["2013-08-05", 0], + ["2013-08-06", 0], + ["2013-08-07", 0], + ["2013-08-08", 0], + ["2013-08-09", 0], + ["2013-08-10", 0], + ["2013-08-11", 0], + ["2013-08-12", 0], + ["2013-08-13", 0], + ["2013-08-14", 0], + ["2013-08-15", 0], + ["2013-08-16", 0], + ["2013-08-17", 0], + ["2013-08-18", 0], + ["2013-08-19", 0], + ["2013-08-20", 0], + ["2013-08-21", 0], + ["2013-08-22", 0], + ["2013-08-23", 0], + ["2013-08-24", 0], + ["2013-08-25", 0], + ["2013-08-26", 0], + ["2013-08-27", 0], + ["2013-08-28", 0], + ["2013-08-29", 0], + ["2013-08-30", 0], + ["2013-08-31", 0], + ], + }, + "total" => 0, + }, + }, ], "all" => { "active_api_keys" => { "monthly" => [ - ["2013-07", 1], - ["2013-08", 1], + ["2013-06", 1], + ["2013-07", 0], + ["2013-08", 3], ], "recent" => { - "total" => 1, + "total" => 3, "daily" => [ ["2013-08-02", 0], ["2013-08-03", 0], @@ -386,19 +534,20 @@ def test_expected_response ["2013-08-28", 0], ["2013-08-29", 0], ["2013-08-30", 0], - ["2013-08-31", 1], + ["2013-08-31", 3], ], }, - "total" => 1, + "total" => 3, }, "average_response_times" => { - "average" => 125, + "average" => 371, "monthly" => [ - ["2013-07", 100], - ["2013-08", 200], + ["2013-06", 100], + ["2013-07", nil], + ["2013-08", 575], ], "recent" => { - "average" => 200, + "average" => 575, "daily" => [ ["2013-08-02", nil], ["2013-08-03", nil], @@ -429,17 +578,18 @@ def test_expected_response ["2013-08-28", nil], ["2013-08-29", nil], ["2013-08-30", nil], - ["2013-08-31", 200], + ["2013-08-31", 575], ], }, }, "hits" => { "monthly" => [ - ["2013-07", 3], - ["2013-08", 1], + ["2013-06", 3], + ["2013-07", 0], + ["2013-08", 4], ], "recent" => { - "total" => 1, + "total" => 4, "daily" => [ ["2013-08-02", 0], ["2013-08-03", 0], @@ -470,15 +620,15 @@ def test_expected_response ["2013-08-28", 0], ["2013-08-29", 0], ["2013-08-30", 0], - ["2013-08-31", 1], + ["2013-08-31", 4], ], }, - "total" => 4, + "total" => 7, }, }, - "api_backend_count" => 3, - "organization_count" => 2, - "api_backend_url_match_count" => 3, + "api_backend_count" => 4, + "organization_count" => 3, + "api_backend_url_match_count" => 4, }, data.fetch("production_apis")) assert_equal([ @@ -504,7 +654,8 @@ def test_caches_results cache = Cache.find_by!(:id => "analytics_summary") assert_equal("analytics_summary", cache.id) assert_in_delta(Time.now.to_i, cache.created_at.to_i, 10) - assert_in_delta(Time.now.to_i + (60 * 60 * 24 * 2), cache.expires_at.to_i, 10) + assert_in_delta(Time.now.to_i, cache.updated_at.to_i, 10) + assert_nil(cache.expires_at) assert(cache.data) data = MultiJson.load(cache.data) assert_equal([ @@ -516,6 +667,35 @@ def test_caches_results ].sort, data.keys.sort) end + def test_unique_users_over_bucket_size_limit + LogItem.clean_indices! + + override_config({ + opensearch: { + max_buckets: 10, + }, + }) do + FactoryBot.create_list(:log_item, 3, request_at: @start_time, user_id: SecureRandom.uuid) + 15.times do + FactoryBot.create(:log_item, request_at: @start_time, user_id: SecureRandom.uuid) + end + LogItem.refresh_indices! + + response = make_request + assert_response_code(200, response) + assert_equal("MISS", response.headers["X-Cache"]) + + data = MultiJson.load(response.body) + assert_equal(18, data.fetch("production_apis").fetch("all").fetch("hits").fetch("total")) + assert_equal("2013-06", data.fetch("production_apis").fetch("all").fetch("hits").fetch("monthly")[0][0]) + assert_equal(18, data.fetch("production_apis").fetch("all").fetch("hits").fetch("monthly")[0][1]) + + assert_equal(16, data.fetch("production_apis").fetch("all").fetch("active_api_keys").fetch("total")) + assert_equal("2013-06", data.fetch("production_apis").fetch("all").fetch("active_api_keys").fetch("monthly")[0][0]) + assert_equal(16, data.fetch("production_apis").fetch("all").fetch("active_api_keys").fetch("monthly")[0][1]) + end + end + private def make_request(user = nil)