From 84b199588ce7adbd1d381cdb06aa82ba2df058d7 Mon Sep 17 00:00:00 2001 From: davidperezgar Date: Sun, 3 Nov 2024 11:17:24 +0100 Subject: [PATCH 1/4] init --- .../Checks/Plugin_Repo/Plugin_Readme_Check.php | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php b/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php index 71ee0e530..0685cb3cc 100644 --- a/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php +++ b/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php @@ -104,6 +104,9 @@ protected function check_files( Check_Result $result, array $files ) { // Check the readme file for contributors. $this->check_for_contributors( $result, $readme_file ); + + // Check for third parties privacy notes. + $this->check_for_privacy_notes( $result, $readme_file, $parser ); } /** @@ -716,6 +719,18 @@ private function check_for_contributors( Check_Result $result, string $readme_fi } } + /** + * Checks the readme file for contributors. + * + * @since 1.3.0 + * + * @param Check_Result $result The Check Result to amend. + * @param string $readme_file Readme file. + */ + private function check_for_privacy_notes( Check_Result $result, string $readme_file, Parser $parser ) { + + } + /** * Returns current major WordPress version. * From 2908a1930f646ae46fa51ec3194cf9e7f58360ce Mon Sep 17 00:00:00 2001 From: davidperezgar Date: Sat, 14 Dec 2024 13:28:00 +0100 Subject: [PATCH 2/4] read domains in readme --- .../Plugin_Repo/Plugin_Readme_Check.php | 20 +- includes/Traits/External_Utils.php | 257 + includes/Traits/TLD_Names.php | 9200 +++++++++++++++++ 3 files changed, 9473 insertions(+), 4 deletions(-) create mode 100644 includes/Traits/External_Utils.php create mode 100644 includes/Traits/TLD_Names.php diff --git a/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php b/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php index d1dd789b6..20540047a 100644 --- a/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php +++ b/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php @@ -12,6 +12,8 @@ use WordPress\Plugin_Check\Checker\Checks\Abstract_File_Check; use WordPress\Plugin_Check\Traits\Amend_Check_Result; use WordPress\Plugin_Check\Traits\Find_Readme; +use WordPress\Plugin_Check\Traits\TLD_Names; +use WordPress\Plugin_Check\Traits\External_Utils; use WordPress\Plugin_Check\Traits\License_Utils; use WordPress\Plugin_Check\Traits\Stable_Check; use WordPressdotorg\Plugin_Directory\Readme\Parser; @@ -27,6 +29,8 @@ class Plugin_Readme_Check extends Abstract_File_Check { use Amend_Check_Result; use Find_Readme; + use TLD_Names; + use External_Utils; use Stable_Check; use License_Utils; @@ -108,7 +112,7 @@ protected function check_files( Check_Result $result, array $files ) { $this->check_for_contributors( $result, $readme_file ); // Check for third parties privacy notes. - $this->check_for_privacy_notes( $result, $readme_file, $parser ); + $this->check_for_privacy_notes( $result, $readme_file, $parser, $files ); } /** @@ -656,14 +660,22 @@ private function check_for_contributors( Check_Result $result, string $readme_fi } /** - * Checks the readme file for contributors. + * Checks the readme file for external privacy notes. * - * @since 1.3.0 + * @since 1.4.0 * * @param Check_Result $result The Check Result to amend. * @param string $readme_file Readme file. */ - private function check_for_privacy_notes( Check_Result $result, string $readme_file, Parser $parser ) { + private function check_for_privacy_notes( Check_Result $result, string $readme_file, Parser $parser, array $files ) { + $existing_tld_names = $this->get_tld_names(); + $domains = $this->load_domains_mentioned_in_readme( $readme_file, $existing_tld_names ); + $files_ext = $this->filter_files_for_external( $files, $result->plugin()->path() ); + + foreach( $files_ext as $file ) { + $lines = file( $file ); + } + } diff --git a/includes/Traits/External_Utils.php b/includes/Traits/External_Utils.php new file mode 100644 index 000000000..8c9434e85 --- /dev/null +++ b/includes/Traits/External_Utils.php @@ -0,0 +1,257 @@ + strlen( $domain_tld ) ) { + $domain_tld = $tld; + } + } + } + + if ( ! empty( $domain_tld ) ) { + // Get domain from host and tld + $domain = str_replace( '.' . $domain_tld, '', $host ); // remove the TLD from the host + $parts = explode( '.', $domain ); // split the remaining host into parts + $domain = end( $parts ) . '.' . $domain_tld; + + //Find domain + $key = $this->getKeyDomainMentionedInReadme( $domain ); + if ( false !== $key ) { + // If found, just add URL + $domains_mentioned[ $key ]['urls'][] = $url; + if ( ! empty( $path ) ) { + $domains_mentioned[ $key ]['paths'][] = $path; + } + } else { + //Not found, create it. + $domain_mentioned = array( + 'domains' => $this->addDomainsOfSameService( $domain ), + 'urls' => array( $url ), + 'paths' => array(), + ); + if ( ! empty( $path ) ) { + $domain_mentioned['paths'] = array( $path ); + } + $domains_mentioned[] = $domain_mentioned; + } + } + } + } + } + } + } + + } + if ( ! empty( $domains_mentioned ) ) { + $domains_mentioned = array_map( function ( $domain ) { + $domain['urls'] = array_unique( $domain['urls']); + return $domain; + }, $domains_mentioned ); + } + + return $domains_mentioned; + } + + function getKeyDomainMentionedInReadme( $string ) { + if ( ! empty( $this->domainsMentionedReadme ) ) { + foreach ( $this->domainsMentionedReadme as $key => $domains ) { + if ( ! empty( $domains['domains'] ) ) { + foreach ( $domains['domains'] as $domain ) { + if ( str_contains( $string, $domain ) ) { + return $key; + } + } + } + } + } + + return false; + } + + function addDomainsOfSameService( $domain ) { + $domains = array( $domain ); + $domainsOfTheSameService = array( + 'paypal.com' => [ 'paypal.com', 'paypalobjects.com' ], + 'google.com' => [ 'google.com', 'googleapis.com', 'googletagmanager.com' ], + 'microsoft.com' => [ 'microsoft.com', 'outlook.com', 'live.com' ], + 'atlassian.net' => [ 'atlassian.com', 'trello.com' ], + 'dropbox.com' => [ 'dropbox.com', 'dropboxapi.com' ], + 'tiktok.com' => [ 'tiktok.com', 'tiktokapis.com' ], + 'zendesk.com' => [ 'zendesk.com', 'zdassets.com' ] + ); + foreach ( $domainsOfTheSameService as $key => $service ) { + foreach ( $service as $serviceDomain ) { + if ( $serviceDomain === $domain ) { + $domains = array_merge( $domains, $domainsOfTheSameService[ $key ] ); + $domains = array_unique( $domains ); + } + } + } + + return $domains; + } + + function isDomainMentionedInReadme( $domain ) { + $key = $this->getKeyDomainMentionedInReadme( $domain ); + if ( false !== $key ) { + return true; + } + + return false; + } + + function isDomainDocumentedReadme( $domain ) { + $key = $this->getKeyDomainMentionedInReadme( $domain ); + $privacy = false; + $terms = false; + + if ( ! empty( $this->domainsMentionedReadme[ $key ]['paths'] ) ) { + foreach ( $this->domainsMentionedReadme[ $key ]['paths'] as $path ) { + foreach ( $this->privacyCommonURIsPaths as $privacyStr ) { + if ( str_contains( $path, $privacyStr ) ) { + $privacy = $path; + break; + } + } + foreach ( $this->termsCommonURIsPaths as $termsStr ) { + if ( str_contains( $path, $termsStr ) ) { + $terms = $path; + break; + } + } + } + } + + if ( $privacy || $terms ) { // To lower down false positives while keeping the check we are ok to have just one of them. + return true; + } + + return false; + } + + protected function find_external_calls( $file ) { + $lines = file( $file ); + $this->find_functions(); + $this->regexKnownUrls( $lines ); + $this->findClasses(); + $this->regexEstructures( $lines ); + $this->findDeclarations( $lines ); + } + + /** + * Find functions in the file. + * + * @since 1.4.0 + */ + protected function find_functions() { + + } + +} diff --git a/includes/Traits/TLD_Names.php b/includes/Traits/TLD_Names.php new file mode 100644 index 000000000..be1ede8a5 --- /dev/null +++ b/includes/Traits/TLD_Names.php @@ -0,0 +1,9200 @@ + Date: Sat, 14 Dec 2024 13:31:47 +0100 Subject: [PATCH 3/4] documented functions --- .../Plugin_Repo/Plugin_Readme_Check.php | 2 +- includes/Traits/External_Utils.php | 48 +++++++++++++++---- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php b/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php index 20540047a..a70df8a7e 100644 --- a/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php +++ b/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php @@ -673,7 +673,7 @@ private function check_for_privacy_notes( Check_Result $result, string $readme_f $files_ext = $this->filter_files_for_external( $files, $result->plugin()->path() ); foreach( $files_ext as $file ) { - $lines = file( $file ); + } diff --git a/includes/Traits/External_Utils.php b/includes/Traits/External_Utils.php index 8c9434e85..1cffadcb0 100644 --- a/includes/Traits/External_Utils.php +++ b/includes/Traits/External_Utils.php @@ -122,7 +122,7 @@ protected function load_domains_mentioned_in_readme( $readme_file, $existing_tld $domain = end( $parts ) . '.' . $domain_tld; //Find domain - $key = $this->getKeyDomainMentionedInReadme( $domain ); + $key = $this->get_key_domain_mentioned_in_readme( $domain ); if ( false !== $key ) { // If found, just add URL $domains_mentioned[ $key ]['urls'][] = $url; @@ -132,7 +132,7 @@ protected function load_domains_mentioned_in_readme( $readme_file, $existing_tld } else { //Not found, create it. $domain_mentioned = array( - 'domains' => $this->addDomainsOfSameService( $domain ), + 'domains' => $this->add_domains_of_same_service( $domain ), 'urls' => array( $url ), 'paths' => array(), ); @@ -159,7 +159,15 @@ protected function load_domains_mentioned_in_readme( $readme_file, $existing_tld return $domains_mentioned; } - function getKeyDomainMentionedInReadme( $string ) { + /** + * Get key domain mentioned in readme file. + * + * @since 1.4.0 + * + * @param string $string String. + * @return string|bool Key of domain mentioned in readme file, or false if not found. + */ + function get_key_domain_mentioned_in_readme( $string ) { if ( ! empty( $this->domainsMentionedReadme ) ) { foreach ( $this->domainsMentionedReadme as $key => $domains ) { if ( ! empty( $domains['domains'] ) ) { @@ -175,7 +183,15 @@ function getKeyDomainMentionedInReadme( $string ) { return false; } - function addDomainsOfSameService( $domain ) { + /** + * Add domains of the same service. + * + * @since 1.4.0 + * + * @param string $domain Domain. + * @return array An array containing domains of the same service. + */ + protected function add_domains_of_same_service( $domain ) { $domains = array( $domain ); $domainsOfTheSameService = array( 'paypal.com' => [ 'paypal.com', 'paypalobjects.com' ], @@ -198,8 +214,16 @@ function addDomainsOfSameService( $domain ) { return $domains; } - function isDomainMentionedInReadme( $domain ) { - $key = $this->getKeyDomainMentionedInReadme( $domain ); + /** + * Check if domain is mentioned in readme file. + * + * @since 1.4.0 + * + * @param string $domain Domain. + * @return bool True if domain is mentioned in readme file, false otherwise. + */ + protected function is_domain_mentioned_in_readme( $domain ) { + $key = $this->get_key_domain_mentioned_in_readme( $domain ); if ( false !== $key ) { return true; } @@ -207,8 +231,16 @@ function isDomainMentionedInReadme( $domain ) { return false; } - function isDomainDocumentedReadme( $domain ) { - $key = $this->getKeyDomainMentionedInReadme( $domain ); + /** + * Check if domain is documented in readme file. + * + * @since 1.4.0 + * + * @param string $domain Domain. + * @return bool True if domain is documented in readme file, false otherwise. + */ + protected function is_domain_documented_readme( $domain ) { + $key = $this->get_key_domain_mentioned_in_readme( $domain ); $privacy = false; $terms = false; From 0459830f80b4d046de9a40114325b1609c8f6ecb Mon Sep 17 00:00:00 2001 From: davidperezgar Date: Thu, 2 Jan 2025 17:34:57 +0100 Subject: [PATCH 4/4] filter files and start find ext calls --- .../Plugin_Repo/Plugin_Readme_Check.php | 3 +- includes/Traits/External_Utils.php | 204 +++++++++++++++--- 2 files changed, 171 insertions(+), 36 deletions(-) diff --git a/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php b/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php index a70df8a7e..4a6778163 100644 --- a/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php +++ b/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php @@ -670,9 +670,10 @@ private function check_for_contributors( Check_Result $result, string $readme_fi private function check_for_privacy_notes( Check_Result $result, string $readme_file, Parser $parser, array $files ) { $existing_tld_names = $this->get_tld_names(); $domains = $this->load_domains_mentioned_in_readme( $readme_file, $existing_tld_names ); - $files_ext = $this->filter_files_for_external( $files, $result->plugin()->path() ); + $files_ext = self::filter_files_by_extensions( $files, array( 'php', 'css', 'js' ) ); foreach( $files_ext as $file ) { + $result = $this->find_external_calls( $file ); } diff --git a/includes/Traits/External_Utils.php b/includes/Traits/External_Utils.php index 1cffadcb0..a48fbf161 100644 --- a/includes/Traits/External_Utils.php +++ b/includes/Traits/External_Utils.php @@ -13,31 +13,6 @@ * @since 1.0.0 */ trait External_Utils { - /** - * Filter the given array of files for php,js,css files. - * - * @since 1.4.0 - * - * @param array $files Array of file files to be filtered. - * @param string $plugin_relative_path Plugin relative path. - * @return array An array containing php,js.css files, or an empty array if none are found. - */ - protected function filter_files_for_external( array $files, $plugin_relative_path ) { - // Find the readme file. - $ext_list = preg_grep( '/\.(php|js|css)$/i', $files ); - - // Filter the readme files located at root. - $potential_ext_files = array_filter( - $ext_list, - function ( $file ) use ( $plugin_relative_path ) { - $file = str_replace( $plugin_relative_path, '', $file ); - return ! str_contains( $file, '/' ); - } - ); - - return ! empty( $potential_ext_files ) ? $potential_ext_files : array(); - } - /** * Load domains mentioned in readme file. * @@ -271,19 +246,178 @@ protected function is_domain_documented_readme( $domain ) { protected function find_external_calls( $file ) { $lines = file( $file ); $this->find_functions(); - $this->regexKnownUrls( $lines ); - $this->findClasses(); - $this->regexEstructures( $lines ); - $this->findDeclarations( $lines ); + $this->find_classes(); + $this->regex_estructures( $lines ); + $this->find_declarations( $lines ); + } - /** - * Find functions in the file. - * - * @since 1.4.0 - */ - protected function find_functions() { + //Check PHP function calls loading URLs. + function find_functions() { + if ( ! empty( $this->stmts ) ) { + $funcCalls = $this->nodeFinder->findInstanceOf( $this->stmts, Node\Expr\FuncCall::class ); + if ( ! empty( $funcCalls ) ) { + foreach ( $funcCalls as $funccall ) { + $foundInSameLine = true; + $lastFoundExprArray = []; + if ( $this->hasFunctionName( $funccall ) ) { + $log = ''; + $functionName = $this->getCallName($funccall); + + //Enqueue functions + if ( in_array( $functionName, [ + 'wp_register_script', + 'wp_enqueue_script', + 'wp_register_style', + 'wp_enqueue_style' + ] ) ) { + // Look for second parameter of this PHP functions. + if ( isset( $funccall->args[1] ) ) { + $argValue = $funccall->args[1]->value; + if ( ! empty( $argValue ) ) { + $log = $this->checkArgGetLog( $argValue, $foundInSameLine, $lastFoundExprArray ); + } + } + } + + // External calls + if ( in_array( $functionName, [ + 'wp_remote_request', + 'wp_safe_remote_request', + 'wp_remote_get', + 'wp_safe_remote_get', + 'wp_remote_post', + 'wp_safe_remote_post', + 'wp_remote_head', + 'wp_safe_remote_head', + 'wp_remote_fopen', + 'file_get_contents', + 'download_url', + 'fopen', + 'file' + ] ) ) { + // Look for first parameter of this PHP functions. + if ( isset( $funccall->args[0] ) ) { + $argValue = $funccall->args[0]->value; + if ( ! empty( $argValue ) ) { + $log = $this->checkArgGetLog( $argValue, $foundInSameLine, $lastFoundExprArray ); + } + } + } + + if ( ! empty( $log ) ) { + if ( ! $this->isAlreadyLogged( $funccall->getStartLine() ) ) { + $this->logCallExpr( $funccall, 1, $log, true ); + if(!$foundInSameLine && !empty($lastFoundExprArray)){ + foreach ($lastFoundExprArray as $expr) { + $this->saveLog( 0, '# ↳ Found: ' . $this->prettyPrinter->prettyPrint( [ $expr ] ), $this->getLogPostContextId( $log, $this->getLogLineID( $funccall->getStartLine() ) ) ); + } + } + } + } + } + } + } + } + } + + //Check PHP class calls loading URLs. + function find_classes() { + if ( ! empty( $this->stmts ) ) { + $classNews = $this->nodeFinder->findInstanceOf( $this->stmts, Node\Expr\New_::class ); + if ( ! empty( $classNews ) ) { + foreach ( $classNews as $classNew ) { + $foundInSameLine = true; + $lastFoundExprArray = []; + if ( $this->hasClassNewName( $classNew ) ) { + $log = ''; + $className = $classNew->class->toString(); + if ( in_array( $className, [ + 'SoapClient', + 'nusoap_client', + ] ) ) { + if ( isset( $classNew->args[0] ) ) { + $argValue = $classNew->args[0]->value; + if ( ! empty( $argValue ) ) { + $log = $this->checkArgGetLog( $argValue, $foundInSameLine, $lastFoundExprArray ); + } + } + } + + if ( ! empty( $log ) ) { + if ( ! $this->isAlreadyLogged( $classNew->getStartLine() ) ) { + $this->saveLinesNodeDetailLog( $classNew, $log, true ); + if(!$foundInSameLine && !empty($lastFoundExprArray)){ + foreach ($lastFoundExprArray as $expr) { + $this->saveLog( 0, '# ↳ Found: ' . $this->prettyPrinter->prettyPrint( [ $expr ] ), $this->getLogPostContextId( $log, $this->getLogLineID( $classNew->getStartLine() ) ) ); + } + } + } + } + } + } + } + } + } + + // Regex over typical code structures cointaining URLs + function regex_estructures( $lines ) { + $regexArray = [ + 'src-simple' => '/src\s*=\s*\\\?\'((.*?(<\?.+?\?>)?.*?)+?)\\\?\'/', + 'src-double' => '/src\s*=\s*\\\?"((.*?(<\?.+?\?>)?.*?)+?)\\\?"/', + 'css-simple' => '/[:|\\s]\s*url\s*\(\s*\'((.*?(<\?.+?\?>)?.*?)+?)\'\s*\)/', + //We are not covering the case of doing url(https://example.com) as without ' or " this is hard to find. + 'css-double' => '/[:|\\s]\s*url\s*\(\s*"((.*?(<\?.+?\?>)?.*?)+?)"\s*\)/', + //'css' => '[:|\\s]url\s*\(\s*["|\']?(.+?)["|\']?\)', + 'jsImport' => '/@import\s*["|\'|`]((.*?(<\?.+?\?>)?.*?)+?)["|\'|`]/', + 'jsImportScripts' => '/importScripts\s*\(\s*["|\'|`]((.*?(<\?.+?\?>)?.*?)+?)["|\'|`]\s*\)/', + 'jsSetAttribute' => '/setAttribute\s*\(\s*["|\'|`]src["|\'|`]\s*,\s*["|\'|`](.+?)["|\'|`]\s*\)/', + 'jsAjax-simple' => '/\s*url\s*:\s*\'((.*?(<\?.+?\?>)?.*?)+?)\'\s*/', + 'jsAjax-double' => '/\s*url\s*:\s*"((.*?(<\?.+?\?>)?.*?)+?)"\s*/', + 'jsAjax-inverted' => '/\s*url\s*:\s*`((.*?(<\?.+?\?>)?.*?)+?)`\s*/', + 'jsFetch-simple' => '/\s*fetch\s*\(\s*\'((.*?(<\?.+?\?>)?.*?)+?)\'\s*/', + 'jsFetch-double' => '/\s*fetch\s*\(\s*"((.*?(<\?.+?\?>)?.*?)+?)"\s*/', + 'jsFetch-inverted' => '/\s*fetch\s*\(\s*`((.*?(<\?.+?\?>)?.*?)+?)`\s*/', + ]; + + foreach ( $regexArray as $regex ) { + $this->logRegexIncidences( $lines, $regex, '', false ); + } + } + + // Look for any PHP / JS variable declaration and guess if that looks like a external service. + // TODO this function consumes too much time because of getStringsFromAssignsExpr, find ways to optimize it. + function find_declarations( $lines ) { + // Find all the assings in PHP + if ( ! empty( $this->stmts ) ) { + $assigns = $this->nodeFinder->findInstanceOf( $this->stmts, Node\Expr\Assign::class ); + if ( ! empty( $assigns ) ) { + foreach ( $assigns as $assign ) { + if ( ! empty( $assign->expr ) ) { + $foundInSameLine = true; + $stringsArray = $this->getStringsFromAssignsExpr( $assign->expr, $foundInSameLine ); + if ( ! empty( $stringsArray ) ) { + foreach ( $stringsArray as $string ) { + $log = $this->checkStringGetLog( $string, true ); + if ( ! empty( $log ) ) { + if ( ! $this->isAlreadyLogged( $assign->getStartLine() ) ) { + $this->saveLinesNodeDetailLog( $assign, $log, true ); + if(!$foundInSameLine){ + $this->saveLog( 0, '# ↳ Detected: ' . $string, $this->getLogPostContextId( $log, $this->getLogLineID( $assign->getStartLine() ) ) ); + } + } + } + } + } + } + } + } + } + // Find anything else that looks like an assign (mostly for JS but will also catch PHP and HTML) + // Regex: anything looking like a URL preceded by "XXXX =" except for href. + $regex = '/[a-zA-Z_$][a-zA-Z_$0-9]*(?logRegexIncidences( $lines, $regex, '', true ); } }