Skip to content

Commit

Permalink
adding es removal for posts, libguides, database and reindexing. Ensu…
Browse files Browse the repository at this point in the history
…ring triggers
  • Loading branch information
jrmerz committed Jul 18, 2022
1 parent c3ac7a4 commit 96f263e
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 29 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
service-account.json
node_modules
25 changes: 25 additions & 0 deletions cmds/npm-install-all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#! /bin/bash

set -e
ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/..

cd $ROOT_DIR/ucdlib-theme-wp/src/editor && npm install
cd $ROOT_DIR/ucdlib-theme-wp/src/public && npm install

cd $ROOT_DIR/ucdlib-wp-plugins/ucdlib-assets/src/editor && npm install
cd $ROOT_DIR/ucdlib-wp-plugins/ucdlib-assets/src/public && npm install

cd $ROOT_DIR/ucdlib-wp-plugins/ucdlib-assets/src/editor && npm install
cd $ROOT_DIR/ucdlib-wp-plugins/ucdlib-assets/src/public && npm install

cd $ROOT_DIR/ucdlib-wp-plugins/ucdlib-directory/src/editor && npm install

cd $ROOT_DIR/ucdlib-wp-plugins/ucdlib-locations/src/editor && npm install
cd $ROOT_DIR/ucdlib-wp-plugins/ucdlib-locations/src/public && npm install

cd $ROOT_DIR/ucdlib-wp-plugins/ucdlib-migration/src/editor && npm install

cd $ROOT_DIR/ucdlib-wp-plugins/ucdlib-search/src/public && npm install

cd $ROOT_DIR/ucdlib-wp-plugins/ucdlib-special/src/editor && npm install
cd $ROOT_DIR/ucdlib-wp-plugins/ucdlib-special/src/public && npm install
88 changes: 65 additions & 23 deletions elastic-search/lib/harvest/gcs.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,36 +92,78 @@ class GCSHarvest {
}
} catch(e) {}

// check db url
// JM - Here if we want it, but opens a can-o-worms
// try {
// let url = new URL(database.link);
// let protocol = url.protocol;
// if( protocol === 'http:' ) url.protocol = 'https:';

// let valid = await this.validUrl(url);
// if( !valid && protocol === 'http:' ) {
// url.protocol = 'http:';
// valid = await this.validUrl(url);
// }

// if( !valid ) {
// this.recordStatus(database, 'ignored-bad-url', database.link, null, 'database');
// continue;
// }

// } catch(e) {
// this.recordStatus(database, 'ignored-invalid-url', database.link, e.message, 'database');
// continue;
// }

let resp = await elasticSearch.insert(database);
if( resp.result !== 'updated' && resp.result !== 'created' ) {
throw new Error('Unknown result from elasicsearch insert: '+resp.result);
}

this.recordStatus(database, 'success', database.link, null, 'database');
}

// now check to remove old libguides
let results = await elasticSearch.search({
size: 9999,
query: {
term: {
type : "libguide"
}
}
});

results = results.hits.hits;
for( let result of results ) {
let exists = this.libguideExists(indexedData.urls, result);
if( !exists ) {
await elasticSearch.client.delete({
index : config.elasticSearch.indexAlias,
id : result._id
})
}
}

// now check to remove old databases
results = await elasticSearch.search({
size: 9999,
query: {
term: {
type : "database"
}
}
});
results = results.hits.hits;

for( let result of results ) {
let exists = this.databaseExists(databases, result);
if( !exists ) {
await elasticSearch.client.delete({
index : config.elasticSearch.indexAlias,
id : result._id
})
}
}

}

async remove(id) {
try {
logger.info(`${id} no longer exists, removing`);
await elasticSearch.client.delete({
index : config.elasticSearch.indexAlias,
id : id
});
this.recordStatus({id}, 'deleted', id);
} catch(e) {
this.recordStatus({id}, 'error', id, e);
logger.error(`Failed to remove ${id}`);
}
}

databaseExists(databases, item) {
return databases.find(db => db.id === item._id) ? true : false;
}

libguideExists(libguides, item) {
return libguides.find(url => url.url === item._id) ? true : false;
}

async validUrl(url) {
Expand Down
36 changes: 35 additions & 1 deletion elastic-search/lib/harvest/wp.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class WPHarvest {

constructor() {
this.POST_TYPES = config.wordpress.types;
logger.info('Wordpress indexer will index posts of types: '+this.POST_TYPES.join(', '));
}

startInterval() {
Expand Down Expand Up @@ -40,6 +41,19 @@ class WPHarvest {
async reharvestAll() {
await this.init();

// run elastic search delete by type
let result = await elasticSearch.client.deleteByQuery({
index : config.elasticSearch.indexAlias,
body : {
query: {
bool : {
should : this.POST_TYPES.map(type => ({term: {type}}))
}
}
}
});
logger.info(`Scrubbed ${result.deleted} posts from elastic search of types: ${this.POST_TYPES.join(', ')}`);

let resp = await mysql.query(`select ID from wp_posts where post_status = 'publish' and post_type IN (?)`, [this.POST_TYPES]);
for( let post of resp.results ) {
await this.harvestPost(post.ID, {recordAge: false});
Expand All @@ -55,20 +69,22 @@ class WPHarvest {
// post doesn't exists
// TODO: delete from elastic search
if( qResp.results.length === 0 ) {
logger.warn('Still need to wire up elastic search delete for removed wp_post');
await this.deletePostFromEs(postId);
this.recordStatus('unknown', 'ignored-deleted', postId);
return;
}

// check this is a post type we are interested in harvesting
post = qResp.results[0];
if( !this.POST_TYPES.includes(post.post_type) ) {
await this.deletePostFromEs(postId);
this.recordStatus(post.post_type, 'ignored-type', postId);
return;
}

// check this post is actually published
if( post.post_status !== 'publish' ) {
await this.deletePostFromEs(postId);
this.recordStatus(post.post_type, 'ignored-unpublished', postId);
return;
}
Expand Down Expand Up @@ -117,6 +133,24 @@ class WPHarvest {
}
}

async deletePostFromEs(id) {
try {
let exists = await elasticSearch.client.exists({
index : config.elasticSearch.indexAlias,
id : id
});
if( !exists ) return;

logger.info('Removing post from elastic search: '+id);
await elasticSearch.client.delete({
index : config.elasticSearch.indexAlias,
id : id
});
} catch(e) {
logger.error('Failed to remove post from elastic search: '+id, e);
}
}

recordStatus(type, status, postId, error='') {
metrics.log(
config.metrics.definitions['page-index-status'].type,
Expand Down
11 changes: 9 additions & 2 deletions elastic-search/lib/mysql.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,20 @@ class MySQL {
let q = `SELECT count(*) as count
FROM information_schema.TABLES
WHERE (TABLE_SCHEMA = '${config.mysql.database}') AND (TABLE_NAME = 'ucdlib_post_updates_log')`;

let tq = `SELECT count(*) as count
FROM information_schema.TRIGGERS
WHERE (EVENT_OBJECT_SCHEMA = 'wordpress') AND (TRIGGER_NAME = 'ucdlib_wp_posts_insert_log_trigger' OR TRIGGER_NAME = 'ucdlib_wp_posts_update_log_trigger')`;


let resp = await this.query(q);
if( resp.results.length && resp.results[0].count > 0 ) {
let tresp = await this.query(tq);
if( resp.results.length && resp.results[0].count > 0 &&
tresp.results.length && tresp.results[0].count > 1 ) {
return;
}

logger.info('Adding indexer sql schema to database');
logger.info('Ensuring indexer sql schema in database');
let schema = fs.readFileSync(path.join(__dirname, 'sql', 'post-updates-log.sql'), 'utf-8');

await this.query(schema);
Expand Down
4 changes: 3 additions & 1 deletion elastic-search/lib/sql/post-updates-log.sql
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
create table ucdlib_post_updates_log (
create table IF NOT EXISTS ucdlib_post_updates_log (
id bigint primary key auto_increment,
post_id bigint,
post_modified datetime
);

DROP TRIGGER IF EXISTS ucdlib_wp_posts_insert_log_trigger;
create trigger ucdlib_wp_posts_insert_log_trigger after insert on wp_posts
for each row
begin
insert into ucdlib_post_updates_log(post_id, post_modified) values (new.ID, new.post_modified);
end;

DROP TRIGGER IF EXISTS ucdlib_wp_posts_update_log_trigger;
create trigger ucdlib_wp_posts_update_log_trigger after update on wp_posts
for each row
begin
Expand Down
2 changes: 1 addition & 1 deletion ucdlib-wp-plugins
Submodule ucdlib-wp-plugins updated 28 files
+1 −0 ucdlib-assets/src/public/index.js
+3 −12 ucdlib-assets/src/public/lib/build-config.js
+15 −0 ucdlib-assets/src/public/lib/scss/style.scss
+25 −1 ucdlib-assets/src/public/package-lock.json
+1 −0 ucdlib-assets/src/public/package.json
+130 −68 ucdlib-directory/includes/block-transformations.php
+8 −2 ucdlib-directory/includes/blocks.php
+41 −1 ucdlib-directory/includes/departments.php
+6 −0 ucdlib-directory/includes/directory-tags.php
+6 −0 ucdlib-directory/includes/libraries.php
+61 −55 ucdlib-directory/includes/people.php
+92 −0 ucdlib-directory/includes/utils.php
+4 −0 ucdlib-directory/src/editor/lib/blocks/index.js
+12 −0 ucdlib-directory/src/editor/lib/blocks/ucdlib-directory-filters/edit.js
+21 −0 ucdlib-directory/src/editor/lib/blocks/ucdlib-directory-filters/index.js
+12 −0 ucdlib-directory/src/editor/lib/blocks/ucdlib-directory-results/edit.js
+21 −0 ucdlib-directory/src/editor/lib/blocks/ucdlib-directory-results/index.js
+1 −0 ucdlib-directory/src/public/index.js
+13 −0 ucdlib-directory/src/public/package.json
+150 −0 ucdlib-directory/src/public/src/elements/ucdlib-directory-filters.js
+103 −0 ucdlib-directory/src/public/src/elements/ucdlib-directory-filters.tpl.js
+1 −0 ucdlib-directory/views/blocks/directory-filters.twig
+25 −0 ucdlib-directory/views/blocks/directory-results.twig
+2 −11 ucdlib-directory/views/blocks/person-contact.twig
+4 −13 ucdlib-directory/views/blocks/person-library-locations.twig
+58 −0 ucdlib-directory/views/macros/person.twig
+20 −9 ucdlib-locations/includes/location.php
+5 −2 ucdlib-locations/includes/post-types.php

0 comments on commit 96f263e

Please sign in to comment.