diff --git a/packages/spacecat-shared-utils/src/constants.js b/packages/spacecat-shared-utils/src/constants.js index 7fb54c1b3..d937aca11 100644 --- a/packages/spacecat-shared-utils/src/constants.js +++ b/packages/spacecat-shared-utils/src/constants.js @@ -66,6 +66,13 @@ const OPPORTUNITY_TYPES = { PAID_COOKIE_CONSENT: 'paid-cookie-consent', }; +/** + * Default CPC (Cost Per Click) value in dollars used when Ahrefs organic traffic data + * is not available or invalid. + */ +const DEFAULT_CPC_VALUE = 1.5; + export { OPPORTUNITY_TYPES, + DEFAULT_CPC_VALUE, }; diff --git a/packages/spacecat-shared-utils/src/index.d.ts b/packages/spacecat-shared-utils/src/index.d.ts index 7fe102f9d..414f7bdfe 100644 --- a/packages/spacecat-shared-utils/src/index.d.ts +++ b/packages/spacecat-shared-utils/src/index.d.ts @@ -253,9 +253,44 @@ export function getStoredMetrics(config: object, context: object): */ export function storeMetrics(content: object, config: object, context: object): Promise; +/** + * Retrieves an object from S3 by its key and returns its JSON parsed content. + * If the object is not JSON, returns the raw body. + * If the object is not found, returns null. + * @param s3Client - The S3 client + * @param bucketName - The name of the S3 bucket + * @param key - The key of the S3 object + * @param log - A logger instance + * @returns The content of the S3 object or null if not found + */ +export function getObjectFromKey( + s3Client: any, + bucketName: string, + key: string, + log: any +): Promise; + +/** + * Fetches the organic traffic data for a site from S3 and calculates the CPC value + * @param context - Context object + * @param context.env - Environment variables + * @param context.env.S3_IMPORTER_BUCKET_NAME - S3 importer bucket name + * @param context.s3Client - S3 client + * @param context.log - Logger + * @param siteId - The site ID + * @returns CPC value in dollars + */ +export function calculateCPCValue(context: object, siteId: string): Promise; + export function s3Wrapper(fn: (request: object, context: object) => Promise): (request: object, context: object) => Promise; +/** + * Default CPC (Cost Per Click) value in dollars used when Ahrefs organic traffic data + * is not available or invalid. + */ +export const DEFAULT_CPC_VALUE: number; + export function fetch(url: string | Request, options?: RequestOptions): Promise; export function tracingFetch(url: string | Request, options?: RequestOptions): Promise; diff --git a/packages/spacecat-shared-utils/src/index.js b/packages/spacecat-shared-utils/src/index.js index 41f7c11bb..63487c6c6 100644 --- a/packages/spacecat-shared-utils/src/index.js +++ b/packages/spacecat-shared-utils/src/index.js @@ -73,9 +73,11 @@ export { extractUrlsFromSuggestion, } from './url-extractors.js'; -export { getStoredMetrics, storeMetrics } from './metrics-store.js'; +export { getStoredMetrics, storeMetrics, calculateCPCValue } from './metrics-store.js'; -export { s3Wrapper } from './s3.js'; +export { s3Wrapper, getObjectFromKey } from './s3.js'; + +export { DEFAULT_CPC_VALUE } from './constants.js'; export { fetch } from './adobe-fetch.js'; export { tracingFetch, SPACECAT_USER_AGENT } from './tracing-fetch.js'; diff --git a/packages/spacecat-shared-utils/src/metrics-store.js b/packages/spacecat-shared-utils/src/metrics-store.js index 65883212a..8c2277362 100644 --- a/packages/spacecat-shared-utils/src/metrics-store.js +++ b/packages/spacecat-shared-utils/src/metrics-store.js @@ -10,6 +10,8 @@ * governing permissions and limitations under the License. */ import { GetObjectCommand, PutObjectCommand } from '@aws-sdk/client-s3'; +import { DEFAULT_CPC_VALUE } from './constants.js'; +import { getObjectFromKey } from './s3.js'; function createFilePath({ siteId, source, metric }) { if (!siteId) { @@ -80,3 +82,45 @@ export async function storeMetrics(content, config, context) { throw new Error(`Failed to upload metrics to ${filePath}, error: ${e.message}`); } } + +/** + * Fetches the organic traffic data for a site from S3 and calculate the CPC value as per + * https://wiki.corp.adobe.com/pages/viewpage.action?spaceKey=AEMSites&title=Success+Studio+Projected+Business+Impact+Metrics#SuccessStudioProjectedBusinessImpactMetrics-IdentifyingCPCvalueforadomain + * @param context + * @param siteId + * @returns {number} CPC value + */ +export async function calculateCPCValue(context, siteId) { + if (!context?.env?.S3_IMPORTER_BUCKET_NAME) { + throw new Error('S3 importer bucket name is required'); + } + if (!context.s3Client) { + throw new Error('S3 client is required'); + } + if (!context.log) { + throw new Error('Logger is required'); + } + if (!siteId) { + throw new Error('SiteId is required'); + } + const { s3Client, log } = context; + const bucketName = context.env.S3_IMPORTER_BUCKET_NAME; + const key = `metrics/${siteId}/ahrefs/organic-traffic.json`; + try { + const organicTrafficData = await getObjectFromKey(s3Client, bucketName, key, log); + if (!Array.isArray(organicTrafficData) || organicTrafficData.length === 0) { + log.warn(`Organic traffic data not available for ${siteId}. Using Default CPC value.`); + return DEFAULT_CPC_VALUE; + } + const lastTraffic = organicTrafficData.at(-1); + if (!lastTraffic.cost || !lastTraffic.value) { + log.warn(`Invalid organic traffic data present for ${siteId} - cost:${lastTraffic.cost} value:${lastTraffic.value}, Using Default CPC value.`); + return DEFAULT_CPC_VALUE; + } + // dividing by 100 for cents to dollar conversion + return lastTraffic.cost / lastTraffic.value / 100; + } catch (err) { + log.error(`Error fetching organic traffic data for site ${siteId}. Using Default CPC value.`, err); + return DEFAULT_CPC_VALUE; + } +} diff --git a/packages/spacecat-shared-utils/src/s3.js b/packages/spacecat-shared-utils/src/s3.js index a4fd19866..3ebd87ec6 100644 --- a/packages/spacecat-shared-utils/src/s3.js +++ b/packages/spacecat-shared-utils/src/s3.js @@ -10,9 +10,55 @@ * governing permissions and limitations under the License. */ -import { S3Client } from '@aws-sdk/client-s3'; +import { GetObjectCommand, S3Client } from '@aws-sdk/client-s3'; import { instrumentAWSClient } from './xray.js'; +/** + * Retrieves an object from S3 by its key and returns its JSON parsed content. + * If the object is not JSON, returns the raw body. + * If the object is not found, returns null. + * @param {import('@aws-sdk/client-s3').S3Client} s3Client - an S3 client + * @param {string} bucketName - the name of the S3 bucket + * @param {string} key - the key of the S3 object + * @param {import('@azure/logger').Logger} log - a logger instance + * @returns {Promise} + * - the content of the S3 object + */ +export async function getObjectFromKey(s3Client, bucketName, key, log) { + if (!s3Client || !bucketName || !key) { + log.error( + 'Invalid input parameters in getObjectFromKey: ensure s3Client, bucketName, and key are provided.', + ); + return null; + } + const command = new GetObjectCommand({ + Bucket: bucketName, + Key: key, + }); + try { + const response = await s3Client.send(command); + const contentType = response.ContentType; + const body = await response.Body.transformToString(); + + if (contentType && contentType.includes('application/json')) { + try { + return JSON.parse(body); + } catch (parseError) { + log.error(`Unable to parse content for key ${key}`, parseError); + return null; + } + } + // Always return body for non-JSON content types + return body; + } catch (err) { + log.error( + `Error while fetching S3 object from bucket ${bucketName} using key ${key}`, + err, + ); + return null; + } +} + /** * Adds an S3Client instance and bucket to the context. * diff --git a/packages/spacecat-shared-utils/test/index.test.js b/packages/spacecat-shared-utils/test/index.test.js index d08fee9cc..3169f0fa0 100644 --- a/packages/spacecat-shared-utils/test/index.test.js +++ b/packages/spacecat-shared-utils/test/index.test.js @@ -18,19 +18,42 @@ import * as allExports from '../src/index.js'; describe('Index Exports', () => { const expectedExports = [ 'arrayEquals', + 'calculateCPCValue', 'composeAuditURL', 'composeBaseURL', 'dateAfterDays', 'deepEqual', + 'DEFAULT_CPC_VALUE', + 'DELIVERY_TYPES', + 'detectAEMVersion', + 'detectLocale', + 'determineAEMCSPageId', + 'ensureHttps', + 'extractUrlsFromOpportunity', + 'extractUrlsFromSuggestion', 'fetch', + 'FORMS_AUDIT_INTERVAL', 'generateCSVFile', - 'getStoredMetrics', - 'replacePlaceholders', - 'getStaticContent', + 'getAccessToken', + 'getDateRanges', + 'getHighFormViewsLowConversionMetrics', + 'getHighPageViewsLowFormCtrMetrics', + 'getHighPageViewsLowFormViewsMetrics', + 'getLastNumberOfWeeks', + 'getMonthInfo', + 'getObjectFromKey', + 'getPageEditUrl', 'getPrompt', 'getQuery', + 'getSpacecatRequestHeaders', + 'getStaticContent', + 'getStoredMetrics', + 'getTemporalCondition', + 'getWeekInfo', 'hasText', + 'instrumentAWSClient', 'isArray', + 'isAWSLambda', 'isBoolean', 'isInteger', 'isIsoDate', @@ -42,16 +65,25 @@ describe('Index Exports', () => { 'isString', 'isValidDate', 'isValidEmail', + 'isValidHelixPreviewUrl', + 'isValidIMSOrgId', 'isValidUrl', 'isValidUUID', - 'isValidIMSOrgId', - 'isValidHelixPreviewUrl', + 'isoCalendarWeek', + 'isoCalendarWeekMonday', + 'isoCalendarWeekSunday', + 'llmoConfig', 'logWrapper', 'prependSchema', - 'getAccessToken', + 'prettifyLogForwardingConfig', + 'replacePlaceholders', + 'resolveCanonicalUrl', 'resolveCustomerSecretsName', 'resolveSecretsName', + 'retrievePageAuthentication', 's3Wrapper', + 'schemas', + 'SPACECAT_USER_AGENT', 'sqsEventAdapter', 'sqsWrapper', 'storeMetrics', @@ -61,36 +93,7 @@ describe('Index Exports', () => { 'stripWWW', 'toBoolean', 'tracingFetch', - 'getHighFormViewsLowConversionMetrics', - 'getHighPageViewsLowFormViewsMetrics', - 'getHighPageViewsLowFormCtrMetrics', - 'FORMS_AUDIT_INTERVAL', - 'SPACECAT_USER_AGENT', - 'isAWSLambda', - 'instrumentAWSClient', - 'retrievePageAuthentication', - 'getDateRanges', - 'getLastNumberOfWeeks', - 'resolveCanonicalUrl', - 'getSpacecatRequestHeaders', - 'ensureHttps', - 'getWeekInfo', - 'getMonthInfo', - 'getTemporalCondition', 'urlMatchesFilter', - 'extractUrlsFromOpportunity', - 'extractUrlsFromSuggestion', - 'detectAEMVersion', - 'determineAEMCSPageId', - 'DELIVERY_TYPES', - 'getPageEditUrl', - 'llmoConfig', - 'schemas', - 'detectLocale', - 'prettifyLogForwardingConfig', - 'isoCalendarWeek', - 'isoCalendarWeekSunday', - 'isoCalendarWeekMonday', ]; it('exports all expected functions', () => { diff --git a/packages/spacecat-shared-utils/test/metrics-store.test.js b/packages/spacecat-shared-utils/test/metrics-store.test.js index 8d057f3e2..d4684847b 100644 --- a/packages/spacecat-shared-utils/test/metrics-store.test.js +++ b/packages/spacecat-shared-utils/test/metrics-store.test.js @@ -16,6 +16,7 @@ import { expect, use } from 'chai'; import sinon from 'sinon'; import sinonChai from 'sinon-chai'; import chaiAsPromised from 'chai-as-promised'; +import esmock from 'esmock'; import { GetObjectCommand, PutObjectCommand } from '@aws-sdk/client-s3'; import { getStoredMetrics, storeMetrics } from '../src/metrics-store.js'; @@ -208,4 +209,142 @@ describe('Metrics Store', () => { } }); }); + + describe('calculateCPCValue', () => { + let getObjectFromKeyStub; + let calculateCPCValueFunc; + + beforeEach(async () => { + getObjectFromKeyStub = sinon.stub(); + const { calculateCPCValue } = await esmock('../src/metrics-store.js', { + '../src/s3.js': { + getObjectFromKey: getObjectFromKeyStub, + }, + }); + context = { + s3Client: {}, + log: { + warn: sinon.stub(), + error: sinon.stub(), + }, + env: { + S3_IMPORTER_BUCKET_NAME: 'test-bucket', + }, + }; + calculateCPCValueFunc = calculateCPCValue; + }); + + it('should calculate CPC value correctly from organic traffic data', async () => { + const organicTrafficData = [ + { cost: 150000, value: 10000 }, + { cost: 200000, value: 5000 }, + ]; + getObjectFromKeyStub.resolves(organicTrafficData); + + const result = await calculateCPCValueFunc(context, 'test-site'); + + expect(result).to.equal(0.4); // 200000 / 5000 / 100 = 0.4 + expect(getObjectFromKeyStub).to.have.been.calledWith( + context.s3Client, + 'test-bucket', + 'metrics/test-site/ahrefs/organic-traffic.json', + context.log, + ); + }); + + it('should return default CPC value when organic traffic data is not available', async () => { + getObjectFromKeyStub.resolves(null); + + const result = await calculateCPCValueFunc(context, 'test-site'); + + expect(result).to.equal(1.5); + expect(context.log.warn).to.have.been.calledWith('Organic traffic data not available for test-site. Using Default CPC value.'); + }); + + it('should return default CPC value when organic traffic data is empty array', async () => { + getObjectFromKeyStub.resolves([]); + + const result = await calculateCPCValueFunc(context, 'test-site'); + + expect(result).to.equal(1.5); + expect(context.log.warn).to.have.been.calledWith('Organic traffic data not available for test-site. Using Default CPC value.'); + }); + + it('should return default CPC value when cost is missing', async () => { + const organicTrafficData = [ + { cost: 150000, value: 10000 }, + { value: 5000 }, + ]; + getObjectFromKeyStub.resolves(organicTrafficData); + + const result = await calculateCPCValueFunc(context, 'test-site'); + + expect(result).to.equal(1.5); + expect(context.log.warn).to.have.been.calledWith('Invalid organic traffic data present for test-site - cost:undefined value:5000, Using Default CPC value.'); + }); + + it('should return default CPC value when value is missing', async () => { + const organicTrafficData = [ + { cost: 150000, value: 10000 }, + { cost: 200000 }, + ]; + getObjectFromKeyStub.resolves(organicTrafficData); + + const result = await calculateCPCValueFunc(context, 'test-site'); + + expect(result).to.equal(1.5); + expect(context.log.warn).to.have.been.calledWith('Invalid organic traffic data present for test-site - cost:200000 value:undefined, Using Default CPC value.'); + }); + + it('should return default CPC value on error', async () => { + getObjectFromKeyStub.rejects(new Error('S3 error')); + + const result = await calculateCPCValueFunc(context, 'test-site'); + + expect(result).to.equal(1.5); + expect(context.log.error).to.have.been.calledWith('Error fetching organic traffic data for site test-site. Using Default CPC value.'); + }); + + it('should throw error when S3_IMPORTER_BUCKET_NAME is missing', async () => { + context.env.S3_IMPORTER_BUCKET_NAME = null; + + try { + await calculateCPCValueFunc(context, 'test-site'); + expect.fail('Should have thrown an error'); + } catch (e) { + expect(e.message).to.equal('S3 importer bucket name is required'); + } + }); + + it('should throw error when s3Client is missing', async () => { + context.s3Client = null; + + try { + await calculateCPCValueFunc(context, 'test-site'); + expect.fail('Should have thrown an error'); + } catch (e) { + expect(e.message).to.equal('S3 client is required'); + } + }); + + it('should throw error when log is missing', async () => { + context.log = null; + + try { + await calculateCPCValueFunc(context, 'test-site'); + expect.fail('Should have thrown an error'); + } catch (e) { + expect(e.message).to.equal('Logger is required'); + } + }); + + it('should throw error when siteId is missing', async () => { + try { + await calculateCPCValueFunc(context, null); + expect.fail('Should have thrown an error'); + } catch (e) { + expect(e.message).to.equal('SiteId is required'); + } + }); + }); }); diff --git a/packages/spacecat-shared-utils/test/s3.test.js b/packages/spacecat-shared-utils/test/s3.test.js index da3faefd8..247c86774 100644 --- a/packages/spacecat-shared-utils/test/s3.test.js +++ b/packages/spacecat-shared-utils/test/s3.test.js @@ -15,7 +15,105 @@ import { expect } from 'chai'; import sinon from 'sinon'; import { S3Client } from '@aws-sdk/client-s3'; -import { s3Wrapper } from '../src/s3.js'; +import { s3Wrapper, getObjectFromKey } from '../src/s3.js'; + +describe('getObjectFromKey', () => { + let s3Client; + let log; + + beforeEach(() => { + s3Client = { + send: sinon.stub(), + }; + log = { + error: sinon.stub(), + }; + }); + + it('should retrieve and parse JSON object from S3', async () => { + const mockData = { key: 'value', nested: { data: 123 } }; + s3Client.send.resolves({ + ContentType: 'application/json', + Body: { + transformToString: sinon.stub().resolves(JSON.stringify(mockData)), + }, + }); + + const result = await getObjectFromKey(s3Client, 'test-bucket', 'test-key', log); + + expect(result).to.deep.equal(mockData); + expect(log.error).to.not.have.been.called; + }); + + it('should return raw body for non-JSON content', async () => { + const textContent = 'plain text content'; + s3Client.send.resolves({ + ContentType: 'text/plain', + Body: { + transformToString: sinon.stub().resolves(textContent), + }, + }); + + const result = await getObjectFromKey(s3Client, 'test-bucket', 'test-key', log); + + expect(result).to.equal(textContent); + expect(log.error).to.not.have.been.called; + }); + + it('should return null when S3 object is not found', async () => { + const error = new Error('NoSuchKey'); + error.name = 'NoSuchKey'; + s3Client.send.rejects(error); + + const result = await getObjectFromKey(s3Client, 'test-bucket', 'test-key', log); + + expect(result).to.be.null; + expect(log.error).to.have.been.calledWith('Error while fetching S3 object from bucket test-bucket using key test-key'); + }); + + it('should return null and log error when JSON parsing fails', async () => { + s3Client.send.resolves({ + ContentType: 'application/json', + Body: { + transformToString: sinon.stub().resolves('invalid json{'), + }, + }); + + const result = await getObjectFromKey(s3Client, 'test-bucket', 'test-key', log); + + expect(result).to.be.null; + expect(log.error).to.have.been.calledWith('Unable to parse content for key test-key'); + }); + + it('should return null when invalid parameters are provided', async () => { + const result1 = await getObjectFromKey(null, 'test-bucket', 'test-key', log); + expect(result1).to.be.null; + expect(log.error).to.have.been.calledWith('Invalid input parameters in getObjectFromKey: ensure s3Client, bucketName, and key are provided.'); + + log.error.resetHistory(); + + const result2 = await getObjectFromKey(s3Client, null, 'test-key', log); + expect(result2).to.be.null; + expect(log.error).to.have.been.calledWith('Invalid input parameters in getObjectFromKey: ensure s3Client, bucketName, and key are provided.'); + + log.error.resetHistory(); + + const result3 = await getObjectFromKey(s3Client, 'test-bucket', null, log); + expect(result3).to.be.null; + expect(log.error).to.have.been.calledWith('Invalid input parameters in getObjectFromKey: ensure s3Client, bucketName, and key are provided.'); + }); + + it('should handle S3 errors gracefully', async () => { + const error = new Error('AccessDenied'); + error.name = 'AccessDenied'; + s3Client.send.rejects(error); + + const result = await getObjectFromKey(s3Client, 'test-bucket', 'test-key', log); + + expect(result).to.be.null; + expect(log.error).to.have.been.calledWith('Error while fetching S3 object from bucket test-bucket using key test-key'); + }); +}); describe('S3 wrapper', () => { let fakeContext;