Skip to content

Commit

Permalink
Merge pull request #75 from privacy-tech-lab/issue-54-4
Browse files Browse the repository at this point in the history
Issue #54 - Changes made during the validation set
  • Loading branch information
katehausladen authored Dec 26, 2023
2 parents 008148b + e303df7 commit 72f5d87
Show file tree
Hide file tree
Showing 13 changed files with 11,771 additions and 27 deletions.
54 changes: 31 additions & 23 deletions gpc-analysis-extension/src/background/analysis/analysis.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ var urlsWithUSPString = [];
var firstPartyDomain = "";
var changingSitesOnAnalysis = false;
var debugging_version = true; // assume that the debugging table exists
var urlclassification = { "firstParty": {}, "thirdParty": {} };
var run_halt_counter = {}
var urlclassification = {};
var run_halt_counter = {};
var last_committed_url = "";

/******************************************************************************/
/******************************************************************************/
Expand Down Expand Up @@ -265,7 +266,7 @@ async function fetchUSPStringData() {
//sends sql post request to db and then resets the global sql_data
function send_sql_and_reset(domain) {
analysis_userend[domain]["domain"] = domain;
analysis_userend[domain]["urlClassification"] = JSON.stringify(urlclassification[domain]); //add urlClassification info
analysis_userend[domain]["urlClassification"] = JSON.stringify(urlclassification[domain]).slice(0, 5000); //add urlClassification info, cap it at 5000 chars
axios
.post("http://localhost:8080/analysis", analysis_userend[domain], {
headers: {
Expand Down Expand Up @@ -313,7 +314,7 @@ async function runAnalysis() {

let url = new URL(uspapiData.location);
let domain = parseURL(url);
if (uspapiData.data !== "USPAPI_FAILED") {
if (uspapiData.data !== "USPAPI_FAILED" && uspapiData.data !== null) { //check for null return val for sites like cbs12.com
logData(domain, "USPAPI", uspapiData.data);
}
if (uspapiData.cookies) {
Expand All @@ -322,7 +323,7 @@ async function runAnalysis() {

url = new URL(gppData.location); // when we remove USPAPI, we can uncomment this
domain = parseURL(url);
if (gppData.data !== "GPP_FAILED") {
if (gppData.data !== "GPP_FAILED" && gppData.data !== null) {
if (gppData.data.gppVersion == '1.0') {
// getGPPData is where the GPP String is
const getGPPData = await fetch_getGPPData();
Expand All @@ -336,8 +337,10 @@ async function runAnalysis() {
}

changingSitesOnAnalysis = true; // Analysis=ON flag
post_to_debug(firstPartyDomain, "line 342", "runAnalysis-addingHeaders");
addGPCHeaders();
await new Promise((resolve) => setTimeout(resolve, 2500)); //new
// await new Promise((resolve) => setTimeout(resolve, 3000)); //for ground truth collection
chrome.tabs.reload();
post_to_debug(firstPartyDomain, "line 342", "runAnalysis-end");
}
Expand Down Expand Up @@ -595,6 +598,8 @@ function onCommittedCallback(details) {
if (validTransition) {
let url = new URL(details.url);
let domain = parseURL(url);
last_committed_url = domain;
post_to_debug('last_committed_url', last_committed_url);
if (changingSitesOnAnalysis) {
// add SENDING GPC TO FILE
// Turn off changing sites on analysis
Expand All @@ -608,6 +613,8 @@ function onCommittedCallback(details) {
// Used for crawling
async function runAnalysisonce(location) {
await new Promise((resolve) => setTimeout(resolve, 7000)); //give it 7s to get ready after DOM content has loaded
// await new Promise((resolve) => setTimeout(resolve, 35000)); // for ground truth

let analysis_started = await storage.get(stores.settings, "ANALYSIS_STARTED");
let url = new URL(location);
let domain = parseURL(url);
Expand Down Expand Up @@ -645,12 +652,14 @@ async function runAnalysisonce(location) {

} else {
post_to_debug(domain, "tried to run halt 2x", run_halt_counter[domain]);
await new Promise((resolve) => setTimeout(resolve, 1500)); // give it a bit more time to finish the first halt
}
// always reset vars/set analysis started to false so that it won't get stuck.
//resetting vars
function afterUSPStringFetched() {
changingSitesOnAnalysis = false;
firstPartyDomain = "";
last_committed_url = "";
updateAnalysisCounter();
removeGPCSignals();

Expand Down Expand Up @@ -697,9 +706,9 @@ function enableListeners() {
// listener that listens for web requests and filters for requests that have 1st/3rd parties that are on disconnect list ()
function (details) {
var match = details.documentUrl.match(/moz-extension:\/\//); // returns array if matched, else returns null
if (!match) {
let url = new URL(details.documentUrl);
let a = parseURL(url);
if (!match && last_committed_url != "") {
// let url = new URL(details.documentUrl);
// let a = parseURL(url);
let short_details_url = details.url.match(/https:\/\/([^\/]+)/g); //match with regex to get the domain
if (short_details_url.length > 0) {
short_details_url = short_details_url[0] //to decrease characters, get rid of https://www. or just https://
Expand All @@ -713,40 +722,39 @@ function enableListeners() {
else { short_details_url = details.url.slice(0, 50) } // if there aren't any matches, take up to the first 50 characters
var url_classes_we_want = ['fingerprinting', 'tracking_ad', 'tracking_social', 'any_basic_tracking', 'any_social_tracking'];


if (!(last_committed_url in urlclassification)) { // if this domain doesn't have data, init the domain
post_to_debug('init urlclass for:', last_committed_url)
urlclassification[last_committed_url] = { "firstParty": {}, "thirdParty": {} };
}
if (details.urlClassification.firstParty.length > 0) {
for (let url_class = 0; url_class < details.urlClassification.firstParty.length; i++) {
if (!(a in urlclassification)) { // if this domain doesn't have data, init the domain
urlclassification[a] = { "firstParty": {}, "thirdParty": {} };
}
if (details.urlClassification.firstParty[url_class] in urlclassification[a]['firstParty']) { // if the tracking type exists already
if (!(urlclassification[a]["firstParty"][details.urlClassification.firstParty[url_class]].includes(short_details_url))) {
urlclassification[a]["firstParty"][details.urlClassification.firstParty[url_class]].push(short_details_url);
if (details.urlClassification.firstParty[url_class] in urlclassification[last_committed_url]['firstParty']) { // if the tracking type exists already
if (!(urlclassification[last_committed_url]["firstParty"][details.urlClassification.firstParty[url_class]].includes(short_details_url))) {
urlclassification[last_committed_url]["firstParty"][details.urlClassification.firstParty[url_class]].push(short_details_url);
}
}
else { // if this tracking type hasn't been seen yet
// the only details.urlClassification.firstParty[url_class] values we care about are in classes_we_want. ignore all others
if (url_classes_we_want.includes(details.urlClassification.firstParty[url_class])) {
urlclassification[a]["firstParty"][details.urlClassification.firstParty[url_class]] = [short_details_url]
urlclassification[last_committed_url]["firstParty"][details.urlClassification.firstParty[url_class]] = [short_details_url]
}
}
}
}

if (details.urlClassification.thirdParty.length > 0) {
for (let url_class = 0; url_class < details.urlClassification.thirdParty.length; i++) {
if (!(a in urlclassification)) { // if this domain doesn't have data, init the domain
urlclassification[a] = { "firstParty": {}, "thirdParty": {} };
}
// if (url_classes_we_want.includes(details.urlClassification.thirdParty[url_class])) {
if (details.urlClassification.thirdParty[url_class] in urlclassification[a]['thirdParty']) { // if the tracking type exists already
if (!(urlclassification[a]["thirdParty"][details.urlClassification.thirdParty[url_class]].includes(short_details_url))) {
urlclassification[a]["thirdParty"][details.urlClassification.thirdParty[url_class]].push(short_details_url);
if (details.urlClassification.thirdParty[url_class] in urlclassification[last_committed_url]['thirdParty']) { // if the tracking type exists already
if (!(urlclassification[last_committed_url]["thirdParty"][details.urlClassification.thirdParty[url_class]].includes(short_details_url))) {
urlclassification[last_committed_url]["thirdParty"][details.urlClassification.thirdParty[url_class]].push(short_details_url);
}
}
else { // if this tracking type hasn't been seen yet
// the only details.urlClassification.thirdParty[url_class] values we care about are in url_classes_we_want. ignore all others
if (url_classes_we_want.includes(details.urlClassification.thirdParty[url_class])) {
urlclassification[a]["thirdParty"][details.urlClassification.thirdParty[url_class]] = [short_details_url]
urlclassification[last_committed_url]["thirdParty"][details.urlClassification.thirdParty[url_class]] = [short_details_url]
}
}
}
Expand Down Expand Up @@ -779,4 +787,4 @@ export function init() {

export function halt() {
disableListeners();
}
}
Loading

0 comments on commit 72f5d87

Please sign in to comment.