From 92bc1da663c1fe57b3432bbb8a45841266a0c32f Mon Sep 17 00:00:00 2001 From: Sahil Suneja Date: Wed, 24 Jan 2018 18:36:40 -0500 Subject: [PATCH 1/4] adding alpine package crawler from UK codebase Signed-off-by: Sahil Suneja --- crawler/utils/package_utils.py | 60 ++++++++++++ .../test_functional_apk_package_crawler.py | 98 +++++++++++++++++++ tests/unit/single_package_apk_db | 24 +++++ tests/unit/test_apk_package_parser.py | 53 ++++++++++ tests/unit/two_packages_apk_db | 9 ++ 5 files changed, 244 insertions(+) create mode 100644 tests/functional/test_functional_apk_package_crawler.py create mode 100644 tests/unit/single_package_apk_db create mode 100644 tests/unit/test_apk_package_parser.py create mode 100644 tests/unit/two_packages_apk_db diff --git a/crawler/utils/package_utils.py b/crawler/utils/package_utils.py index 12f4ba68..7977cde1 100644 --- a/crawler/utils/package_utils.py +++ b/crawler/utils/package_utils.py @@ -142,6 +142,59 @@ def _rpm_reload_db( return reloaded_db_dir +# from UK crawler codebase + + +def apk_parser(filename): + try: + db_contents = open(filename).read() + packages = db_contents.split('\n\n') + logger.debug('Found {} APK packages'.format(len(packages))) + for package in packages: + if package: + attributes = package.split('\n') + name = "" + version = "" + architecture = "" + size = "" + for attribute in attributes: + if (attribute.startswith('P:')): + name = attribute[2:] + elif (attribute.startswith('V:')): + version = attribute[2:] + elif (attribute.startswith('A:')): + architecture = attribute[2:] + elif (attribute.startswith('S:')): + size = attribute[2:] + yield (name, PackageFeature(None, name, + size, version, + architecture)) + except IOError as e: + logger.error('Failed to read APK database to obtain packages. ' + 'Check if %s is present. [Exception: %s: %s]' + ' ' % (filename, type(e).__name__, e.strerror)) + raise + + +def get_apk_packages( + root_dir='/', + dbpath='lib/apk/db'): + + if os.path.isabs(dbpath): + logger.warning( + 'dbpath: ' + + dbpath + + ' is defined absolute. Ignoring prefix: ' + + root_dir + + '.') + + # Update for a different route. + dbpath = os.path.join(root_dir, dbpath) + + for feature_key, package_feature in apk_parser( + os.path.join(dbpath, 'installed')): + yield (feature_key, package_feature) + def crawl_packages( dbpath=None, @@ -166,6 +219,11 @@ def crawl_packages( for (key, feature) in get_rpm_packages( root_dir, dbpath, installed_since, reload_needed): yield (key, feature, 'package') + elif pkg_manager == 'apk': + dbpath = dbpath or 'lib/apk/db' + for (key, feature) in get_apk_packages( + root_dir, dbpath): + yield (key, feature, 'package') else: logger.warning('Unsupported package manager for Linux distro') except Exception as e: @@ -186,6 +244,8 @@ def _get_package_manager(root_dir): pkg_manager = 'dpkg' elif os_distro in ['redhat', 'red hat', 'rhel', 'fedora', 'centos']: pkg_manager = 'rpm' + if os_distro in ['alpine']: + pkg_manager = 'apk' elif os.path.exists(os.path.join(root_dir, 'var/lib/dpkg')): pkg_manager = 'dpkg' elif os.path.exists(os.path.join(root_dir, 'var/lib/rpm')): diff --git a/tests/functional/test_functional_apk_package_crawler.py b/tests/functional/test_functional_apk_package_crawler.py new file mode 100644 index 00000000..484b1b9c --- /dev/null +++ b/tests/functional/test_functional_apk_package_crawler.py @@ -0,0 +1,98 @@ +import unittest +import docker +import requests.exceptions +import tempfile +import os +import shutil +import subprocess +import sys +import pykafka + +# Tests for crawlers in kraken crawlers configuration. + +from containers_crawler import ContainersCrawler +from worker import Worker +from emitters_manager import EmittersManager + +import logging + +# Tests conducted with a single container running. + + +class ContainersCrawlerTests(unittest.TestCase): + + def setUp(self): + root = logging.getLogger() + root.setLevel(logging.INFO) + ch = logging.StreamHandler(sys.stdout) + ch.setLevel(logging.INFO) + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ch.setFormatter(formatter) + root.addHandler(ch) + + self.docker = docker.Client(base_url='unix://var/run/docker.sock', + version='auto') + try: + if len(self.docker.containers()) != 0: + raise Exception( + "Sorry, this test requires a machine with no docker" + "containers running.") + except requests.exceptions.ConnectionError: + print ("Error connecting to docker daemon, are you in the docker" + "group? You need to be in the docker group.") + + self.start_crawled_container() + + + def start_crawled_container(self): + # start a container to be crawled + self.docker.pull(repository='apline', tag='latest') + self.container = self.docker.create_container( + image='alpine:latest', command='/bin/sleep 60') + self.tempd = tempfile.mkdtemp(prefix='crawlertest.') + self.docker.start(container=self.container['Id']) + + def tearDown(self): + self.remove_crawled_container() + shutil.rmtree(self.tempd) + + def remove_crawled_container(self): + self.docker.stop(container=self.container['Id']) + self.docker.remove_container(container=self.container['Id']) + + def testCrawlContainer(self): + env = os.environ.copy() + mypath = os.path.dirname(os.path.realpath(__file__)) + os.makedirs(self.tempd + '/out') + + # crawler itself needs to be root + process = subprocess.Popen( + [ + '/usr/bin/python', mypath + '/../../crawler/crawler.py', + '--url', 'file://' + self.tempd + '/out/crawler', + '--features', 'os,package', + '--crawlContainers', self.container['Id'], + '--crawlmode', 'OUTCONTAINER', + '--numprocesses', '1' + ], + env=env) + stdout, stderr = process.communicate() + assert process.returncode == 0 + + print stderr + print stdout + + subprocess.call(['/bin/chmod', '-R', '777', self.tempd]) + + files = os.listdir(self.tempd + '/out') + assert len(files) == 1 + + f = open(self.tempd + '/out/' + files[0], 'r') + output = f.read() + print output # only printed if the test fails + assert 'alpine' in output + assert 'musl' in output + assert 'busybox' in output + f.close() + diff --git a/tests/unit/single_package_apk_db b/tests/unit/single_package_apk_db new file mode 100644 index 00000000..482b0d12 --- /dev/null +++ b/tests/unit/single_package_apk_db @@ -0,0 +1,24 @@ +C:Q128iBSO2PpkDnOWZqp3Watn4H20U= +P:test-package +V:999.9.9 +A:x86_64 +S:999 +I:581632 +T:the musl c library (libc) implementation +U:http://www.musl-libc.org/ +L:MIT +o:musl +m:Timo Teräs +t:1485463794 +c:2e4493888fff74afc6a6ef6257aeea469df32af5 +p:so:libc.musl-x86_64.so.1=1 +F:lib +R:libc.musl-x86_64.so.1 +a:0:0:777 +Z:Q17yJ3JFNypA4mxhJJr0ou6CzsJVI= +R:ld-musl-x86_64.so.1 +a:0:0:755 +Z:Q1OSxgRxSfAKG2m68kTiuRMQclyD8= +F:usr +F:usr/lib + diff --git a/tests/unit/test_apk_package_parser.py b/tests/unit/test_apk_package_parser.py new file mode 100644 index 00000000..aacb637f --- /dev/null +++ b/tests/unit/test_apk_package_parser.py @@ -0,0 +1,53 @@ +# from UK crawler codebase +import unittest +from utils import package_utils +from utils.features import PackageFeature +import os +import logging + +class PackageUtilsTest(unittest.TestCase): + + + def test_single_package_is_parsed(self): + input_file = os.path.join(os.path.dirname(__file__), 'single_package_apk_db') + parser = package_utils.apk_parser(input_file) + package = parser.next() + self.assert_package_is_correct(package, 'test-package', '999.9.9', '999', 'x86_64') + self.assertRaises(StopIteration, parser.next) + + def test_multiple_packages_parsed(self): + input_file = os.path.join(os.path.dirname(__file__), 'two_packages_apk_db') + parser = package_utils.apk_parser(input_file) + self.assert_package_is_correct(parser.next(), 'first-package', '111.1.1', '111', 'x86_64') + self.assert_package_is_correct(parser.next(), 'second-package', '222.2.2', '222', 'x86_64') + self.assertRaises(StopIteration, parser.next) + + def test_error_message_produced(self): + logger = logging.getLogger('crawlutils') + log_handler = CapturingLogHandler() + logger.addHandler(log_handler) + with self.assertRaises(IOError): + input_file = os.path.join(os.path.dirname(__file__), 'does.not.exist') + parser = package_utils.apk_parser(input_file) + parser.next() + self.assertIsNotNone(log_handler.msg) + self.assertIn('Failed to read APK database to obtain packages', log_handler.msg) + self.assertIn('does.not.exist', log_handler.msg) + self.assertIn('IOError: No such file or directory', log_handler.msg) + + def assert_package_is_correct(self, package, name, version, size, architecture): + self.assertEqual(name, package[0]) + packageFeature = package[1] + self.assertEqual(name, packageFeature.pkgname) + self.assertEqual(version, packageFeature.pkgversion) + self.assertEqual(size, packageFeature.pkgsize) + self.assertEqual(architecture, packageFeature.pkgarchitecture) + self.assertIsNone(packageFeature.installed) + +class CapturingLogHandler(logging.Handler): + msg = None + def emit(self, record): + self.msg = record.msg + +if __name__ == '__main__': + unittest.main() diff --git a/tests/unit/two_packages_apk_db b/tests/unit/two_packages_apk_db new file mode 100644 index 00000000..42faaa63 --- /dev/null +++ b/tests/unit/two_packages_apk_db @@ -0,0 +1,9 @@ +P:first-package +V:111.1.1 +A:x86_64 +S:111 + +P:second-package +V:222.2.2 +A:x86_64 +S:222 From 6cde53371f02c495bd6290f15fb4b24f2982bb5b Mon Sep 17 00:00:00 2001 From: Sahil Suneja Date: Wed, 24 Jan 2018 19:04:55 -0500 Subject: [PATCH 2/4] adding alpine package crawler from UK codebase Signed-off-by: Sahil Suneja --- tests/functional/test_functional_apk_package_crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/test_functional_apk_package_crawler.py b/tests/functional/test_functional_apk_package_crawler.py index 484b1b9c..8e4bdaaf 100644 --- a/tests/functional/test_functional_apk_package_crawler.py +++ b/tests/functional/test_functional_apk_package_crawler.py @@ -47,7 +47,7 @@ def setUp(self): def start_crawled_container(self): # start a container to be crawled - self.docker.pull(repository='apline', tag='latest') + self.docker.pull(repository='alpine', tag='latest') self.container = self.docker.create_container( image='alpine:latest', command='/bin/sleep 60') self.tempd = tempfile.mkdtemp(prefix='crawlertest.') From cf078e93ca16afa20d6c30ec6ac2203124c53f8c Mon Sep 17 00:00:00 2001 From: Sahil Suneja Date: Thu, 25 Jan 2018 10:31:01 -0500 Subject: [PATCH 3/4] adding alpine package crawler from UK codebase Signed-off-by: Sahil Suneja --- crawler/utils/package_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawler/utils/package_utils.py b/crawler/utils/package_utils.py index 7977cde1..327de230 100644 --- a/crawler/utils/package_utils.py +++ b/crawler/utils/package_utils.py @@ -244,7 +244,7 @@ def _get_package_manager(root_dir): pkg_manager = 'dpkg' elif os_distro in ['redhat', 'red hat', 'rhel', 'fedora', 'centos']: pkg_manager = 'rpm' - if os_distro in ['alpine']: + elif os_distro in ['alpine']: pkg_manager = 'apk' elif os.path.exists(os.path.join(root_dir, 'var/lib/dpkg')): pkg_manager = 'dpkg' From 5994630fc6d650b843d334c4d6831a388a5b7187 Mon Sep 17 00:00:00 2001 From: Sahil Suneja Date: Thu, 25 Jan 2018 10:56:41 -0500 Subject: [PATCH 4/4] adding alpine package crawler from UK codebase Signed-off-by: Sahil Suneja --- crawler/utils/package_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crawler/utils/package_utils.py b/crawler/utils/package_utils.py index 327de230..6ffcc2e3 100644 --- a/crawler/utils/package_utils.py +++ b/crawler/utils/package_utils.py @@ -203,12 +203,11 @@ def crawl_packages( reload_needed=True): # package attributes: ["installed", "name", "size", "version"] - logger.debug('Crawling Packages') - pkg_manager = _get_package_manager(root_dir) - try: + pkg_manager = _get_package_manager(root_dir) + if pkg_manager == 'dpkg': dbpath = dbpath or 'var/lib/dpkg' for (key, feature) in get_dpkg_packages(