diff --git a/.github/workflows/devsecops.yaml b/.github/workflows/devsecops.yaml new file mode 100644 index 0000000..dc2b05b --- /dev/null +++ b/.github/workflows/devsecops.yaml @@ -0,0 +1,137 @@ +# DevSecOps Workflow Definition +# This workflow is triggered on every push to the repository +name: DevSecOps Workflow + +on: push + +# Environment variables used across multiple jobs +env: + IMAGE_TAG: ghcr.io/${{ github.repository }}:unstable + +jobs: + # Secret scanning job to detect secrets in codebase + secret-scanning: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 # Check out the repository content to the runner + - name: Run Gitleaks Scan + # Running Gitleaks to scan the code for secrets + run: | + docker run --rm -v $(pwd):/code -u $(id -u):$(id -g) zricethezav/gitleaks:v8.18.1 -s /code detect -f sarif -r /code/gitleaks.sarif.json + - name: Upload sarif file + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: gitleaks.sarif.json + category: secret-scanning + + # Software Composition Analysis (SCA) to find vulnerabilities in project dependencies + sca: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Run Trivy vulnerability scanner in fs mode + # Running Trivy to scan the filesystem for vulnerabilities + uses: aquasecurity/trivy-action@master + with: + scan-type: "fs" + scan-ref: "." + severity: "CRITICAL,HIGH" + format: "sarif" + output: "trivy-results.sarif" + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: "trivy-results.sarif" + category: "sca" + + # Static Application Security Testing (SAST) to identify security vulnerabilities in source code + sast: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Run Semgrep + # Running Semgrep for static code analysis to identify security issues + uses: docker://returntocorp/semgrep + with: + args: semgrep scan /github/workspace --sarif -o /github/workspace/semgrep.sarif.json + - name: Upload sarif file + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: semgrep.sarif.json + category: sast + + # Docker image build job + build-image: + runs-on: ubuntu-latest + outputs: + image_path: ${{ steps.build_output.outputs.image_path }} + steps: + - uses: actions/checkout@v4 + - name: Set IMAGE_TAG if tagged + # Setting the image tag if the push is a tag push + run: echo "IMAGE_TAG=ghcr.io/${{ github.repository }}:${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV + if: startsWith(github.ref, 'refs/tags/') + + # make sure the IMAGE_TAG is lowercase + - name: Transform IMAGE_TAG to lowercase + run: echo "IMAGE_TAG=$(echo ${{ env.IMAGE_TAG }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV + - name: Build Docker image with Kaniko + # Building the Docker image using Kaniko + id: build_image + uses: docker://gcr.io/kaniko-project/executor:v1.9.2 + with: + args: --destination=${{ env.IMAGE_TAG }} --context=/github/workspace --dockerfile=/github/workspace/Dockerfile --no-push --tarPath /github/workspace/image.tar + - name: Upload artifact + # Uploading the built Docker image as an artifact + uses: actions/upload-artifact@v4 + with: + name: docker-image + path: image.tar + + # Image scanning job to detect vulnerabilities in the built Docker image + image-scanning: + needs: build-image + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + name: docker-image + path: . + - name: Run Trivy vulnerability scanner in tarball mode + # Running Trivy to scan the Docker image for vulnerabilities + uses: aquasecurity/trivy-action@master + with: + input: /github/workspace/image.tar + severity: "CRITICAL,HIGH" + format: "sarif" + output: "trivy-results.sarif" + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: "trivy-results.sarif" + category: "image-scanning" + + # Publish job to push the Docker image to a registry + publish: + needs: [build-image, image-scanning, secret-scanning, sca, sast] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + name: docker-image + path: . + - uses: imjasonh/setup-crane@v0.1 + - name: Set IMAGE_TAG if tagged + # Setting the image tag if the push is a tag push + run: echo "IMAGE_TAG=ghcr.io/${{ github.repository }}:${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV + if: startsWith(github.ref, 'refs/tags/') + # make sure the IMAGE_TAG is lowercase + - name: Transform IMAGE_TAG to lowercase + run: echo "IMAGE_TAG=$(echo ${{ env.IMAGE_TAG }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV + - name: Push Docker image to GitHub image Registry + # Pushing the Docker image to GitHub Container Registry + run: crane push image.tar ${{ env.IMAGE_TAG }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..acb739f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +# Build a virtualenv using the appropriate Debian release +# * Install python3-venv for the built-in Python3 venv module (not installed by default) +# * Install gcc libpython3-dev to compile C Python modules +# * In the virtualenv: Update pip setuputils and wheel to support building new packages +FROM debian:12-slim AS build +RUN apt-get update && \ + apt-get install --no-install-suggests --no-install-recommends --yes python3-venv gcc libpython3-dev && \ + python3 -m venv /venv && \ + /venv/bin/pip install --upgrade pip setuptools wheel + +# Build the virtualenv as a separate step: Only re-execute this step when requirements.txt changes +FROM build AS build-venv +COPY requirements.txt /requirements.txt +RUN /venv/bin/pip install --disable-pip-version-check -r /requirements.txt + +# Copy the virtualenv into a distroless image +FROM gcr.io/distroless/python3-debian12 +COPY --from=build-venv /venv /venv +COPY . /app +WORKDIR /app +ENTRYPOINT [""] +# This ensures that the application runs when the container starts. +CMD ["/venv/bin/python3", "-m", "Source", "RSS"] diff --git a/Source/Bots/RSS.py b/Source/Bots/RSS.py index 90c892b..9b8ebdb 100644 --- a/Source/Bots/RSS.py +++ b/Source/Bots/RSS.py @@ -9,6 +9,7 @@ import atexit import logging + logger = logging.getLogger("rss") import feedparser @@ -17,6 +18,21 @@ from .. import webhooks, config from ..Formatting import format_single_article +from datetime import datetime, timedelta + +START_DATETIME = datetime(2024, 1, 30) # articles before this datetime are not posted +DATETIME_FORMAT_RANSOMWARE = "%Y-%m-%d %H:%M:%S.%f" # assumed datetime format of all articles +DATETIME_FORMAT_NEWS = "%Y-%m-%dT%H:%M:%S" +DATETIME_FORMAT_ISO = "%Y-%m-%dT%H:%M:%S.%f" +VERBOSE_DISCORD_POSTING = False +CVE_SOURCE = "NIST" +# TODO: add CVE API queries. +# https://nvd.nist.gov/developers/vulnerabilities +# pubStartDate & pubEndDate are useful parameters +# Something like: +# https://services.nvd.nist.gov/rest/json/cves/2.0/?pubStartDate=2021-08-04T00:00:00.000&pubEndDate=2021-10-22T00:00 +# :00.000 + private_rss_feed_list = [ ['https://grahamcluley.com/feed/', 'Graham Cluley'], ['https://threatpost.com/feed/', 'Threatpost'], @@ -52,23 +68,28 @@ ["https://www.cisecurity.org/feed/advisories", "Center of Internet Security"], ] -FeedTypes = Enum("FeedTypes", "RSS JSON") +FeedTypes = Enum("FeedTypes", "RSS JSON CVEAPI") source_details = { "Private RSS Feed": { "source": private_rss_feed_list, - "hook": webhooks["PrivateSectorFeed"], - "type": FeedTypes.RSS, + "hook" : webhooks["PrivateSectorFeed"], + "type" : FeedTypes.RSS, }, - "Gov RSS Feed": { + "Gov RSS Feed" : { "source": gov_rss_feed_list, - "hook": webhooks["GovermentFeed"], - "type": FeedTypes.RSS, + "hook" : webhooks["GovermentFeed"], + "type" : FeedTypes.RSS, }, - "Ransomware News": { + "Ransomware News" : { "source": "https://raw.githubusercontent.com/joshhighet/ransomwatch/main/posts.json", - "hook": webhooks["RansomwareFeed"], - "type": FeedTypes.JSON, + "hook" : webhooks["RansomwareFeed"], + "type" : FeedTypes.JSON, + }, + "CVE News" : { + "source": "", # no source because API needs start and end date time for query + "hook" : webhooks["CVEFeed"], + "type" : FeedTypes.CVEAPI, }, } @@ -78,7 +99,6 @@ config.get("RSS", "RSSLogFile", raw=True, vars={"fallback": "RSSLog.txt"}), ) - rss_log = ConfigParser() rss_log.read(rss_log_file_path) @@ -86,38 +106,99 @@ def get_ransomware_news(source): logger.debug("Querying latest ransomware information") posts = requests.get(source).json() - for post in posts: post["publish_date"] = post["discovered"] post["title"] = "Post: " + post["post_title"] post["source"] = post["group_name"] - return posts +def get_nist_cve_between(dt_start_utc, dt_end_utc): + """ + See https://nvd.nist.gov/developers/vulnerabilities on API details + dt_start_utc: datetime object indicating earliest date-time to query, e.g. datetime.utcnow() + dt_end_utc: datetime object indicating latest date-time to query + returns: List of dict + dict has keys: ['id': str, 'source': str ("NIST"), 'source_id': str, 'publish_date': str, 'vuln_status': str, + 'description': str] + date_published is ISO-8601 format, example: '2024-01-28T23:15:58.350' + """ + # assert isinstance(dt_start_utc, datetime.datetime) + # assert isinstance(dt_end_utc, datetime.datetime) + url_request = f"https://services.nvd.nist.gov/rest/json/cves/2.0/?pubStartDate=" \ + f"{dt_start_utc.isoformat()}&pubEndDate={dt_end_utc.isoformat()}" + logger.debug(f"Querying NIST CVE API: {url_request}") + response = requests.get(url_request) + cves = [] + if response.status_code == 200: + response_json = response.json() + if ("vulnerabilities" not in response_json) or not isinstance(response_json["vulnerabilities"], list): + logger.critical(f"NIST CVE query JSON does not contain key 'vulnerabilities' or is not type of list") + return cves + for cve_dict in response_json["vulnerabilities"]: + cve_dict = cve_dict["cve"] + cve_id = cve_dict["id"] + cve_source_id = cve_dict["sourceIdentifier"] + cve_date_published = cve_dict["published"] + cve_vuln_status = cve_dict["vulnStatus"] + cve_link = "https://nvd.nist.gov/vuln/detail/" + cve_id + cve_description = None + for description in cve_dict["descriptions"]: + if description["lang"] == "en": + cve_description = description["value"] + break + if cve_description is None: + cve_description = "N/A in English" + cve = dict() + cve["id"] = cve_id + cve["link"] = cve_link + cve["source"] = CVE_SOURCE + cve["source_id"] = cve_source_id + cve[ + "publish_date"] = cve_date_published # change to uniformly used "publish_date" from NIST-specific + # "date_published" key + cve["vuln_status"] = cve_vuln_status + cve["description"] = cve_description + if cve["vuln_status"] != "Rejected": + cves.append(cve) + else: + logger.critical(f"NIST query failed with status code {response.status_code}.") + return cves + + +def get_new_nist_cve(source): + # TODO: this only queries from the last 7 days (~500 results in one test try). If more flexibility + # required, need to implement the handling of multiple possible pages (limit: 2000 results/page). See + # resultsPerPage under https://nvd.nist.gov/developers/vulnerabilities + # source: dummy variable, as right now only NIST is allowed as source. + dt_end_utc = datetime.utcnow() + dt_start_utc = dt_end_utc - timedelta(days=7) + cves = get_nist_cve_between(dt_start_utc, dt_end_utc) + return cves + + def get_news_from_rss(rss_item): logger.debug(f"Querying RSS feed at {rss_item[0]}") feed_entries = feedparser.parse(rss_item[0]).entries - - # This is needed to ensure that the oldest articles are proccessed first. See https://github.com/vxunderground/ThreatIntelligenceDiscordBot/issues/9 for reference + feed_entries_filtered = [] + # This is needed to ensure that the oldest articles are proccessed first. See + # https://github.com/vxunderground/ThreatIntelligenceDiscordBot/issues/9 for reference for rss_object in feed_entries: rss_object["source"] = rss_item[1] try: rss_object["publish_date"] = time.strftime( - "%Y-%m-%dT%H:%M:%S", rss_object.published_parsed + DATETIME_FORMAT_NEWS, rss_object.published_parsed ) except: rss_object["publish_date"] = time.strftime( - "%Y-%m-%dT%H:%M:%S", rss_object.updated_parsed + DATETIME_FORMAT_NEWS, rss_object.updated_parsed ) - return feed_entries -def proccess_articles(articles): +def process_articles(articles): messages, new_articles = [], [] articles.sort(key=lambda article: article["publish_date"]) - for article in articles: try: config_entry = rss_log.get("main", article["source"]) @@ -130,9 +211,19 @@ def proccess_articles(articles): else: if config_entry >= article["publish_date"]: continue - - messages.append(format_single_article(article)) - new_articles.append(article) + # TODO: unify timedate formats! + # Global earliest time point filtering + try: # format of ransomware feed + article_publish_date = datetime.strptime(article["publish_date"], DATETIME_FORMAT_RANSOMWARE) + except ValueError as e: + try: # format of rest of RSS + article_publish_date = datetime.strptime(article["publish_date"], DATETIME_FORMAT_NEWS) + except ValueError as e2: + # format of NIST API datetime strings + article_publish_date = datetime.strptime(article["publish_date"], DATETIME_FORMAT_ISO) + if article_publish_date >= START_DATETIME: + messages.append(format_single_article(article)) + new_articles.append(article) return messages, new_articles @@ -140,27 +231,27 @@ def proccess_articles(articles): def send_messages(hook, messages, articles, batch_size=10): logger.debug(f"Sending {len(messages)} messages in batches of {batch_size}") for i in range(0, len(messages), batch_size): - hook.send(embeds=messages[i : i + batch_size]) + hook.send(embeds=messages[i: i + batch_size]) - for article in articles[i : i + batch_size]: + for article in articles[i: i + batch_size]: rss_log.set("main", article["source"], article["publish_date"]) time.sleep(3) -def process_source(post_gathering_func, source, hook): +def process_source(post_gathering_func, source): raw_articles = post_gathering_func(source) - - processed_articles, new_raw_articles = proccess_articles(raw_articles) - send_messages(hook, processed_articles, new_raw_articles) + processed_articles, new_raw_articles = process_articles(raw_articles) + return processed_articles, new_raw_articles def handle_rss_feed_list(rss_feed_list, hook): for rss_feed in rss_feed_list: logger.info(f"Handling RSS feed for {rss_feed[1]}") - webhooks["StatusMessages"].send(f"> {rss_feed[1]}") - - process_source(get_news_from_rss, rss_feed, hook) + processed_articles, new_raw_articles = process_source(get_news_from_rss, rss_feed) + if VERBOSE_DISCORD_POSTING or len(processed_articles) > 0: + webhooks["StatusMessages"].send(f"> {rss_feed[1]}") + send_messages(hook, processed_articles, new_raw_articles) def write_status_message(message): @@ -183,20 +274,29 @@ def main(): while True: for detail_name, details in source_details.items(): - write_status_message(f"Checking {detail_name}") - + if VERBOSE_DISCORD_POSTING: + write_status_message(f"Checking {detail_name}") if details["type"] == FeedTypes.JSON: - process_source(get_ransomware_news, details["source"], details["hook"]) + processed_articles, new_raw_articles = process_source(get_ransomware_news, details["source"]) + if VERBOSE_DISCORD_POSTING or len(processed_articles) > 0: + webhooks["StatusMessages"].send(f"> Ransomware News") + send_messages(details["hook"], processed_articles, new_raw_articles) elif details["type"] == FeedTypes.RSS: handle_rss_feed_list(details["source"], details["hook"]) - + elif details["type"] == FeedTypes.CVEAPI: # data is gathered using function get_new_nist_cve() + processed_articles, new_raw_articles = process_source(get_new_nist_cve, CVE_SOURCE) + if VERBOSE_DISCORD_POSTING or len(processed_articles) > 0: + webhooks["StatusMessages"].send(f"> CVEs") + send_messages(details["hook"], processed_articles, new_raw_articles) time.sleep(3) - + # Update RSS log logger.debug("Writing new time to rss log file") with open(rss_log_file_path, "w") as f: rss_log.write(f) - - write_status_message("All done, going to sleep") + if VERBOSE_DISCORD_POSTING: + write_status_message("All done, going to sleep") + else: + logger.info("All done, going to sleep") time.sleep(1800) diff --git a/Source/Formatting.py b/Source/Formatting.py index 5abf876..6d2021d 100644 --- a/Source/Formatting.py +++ b/Source/Formatting.py @@ -3,7 +3,6 @@ from datetime import datetime import dateutil.parser - MAIN_COLOR = 0x000000 THUMBNAIL_URL = "https://avatars.githubusercontent.com/u/87911852?s=280&v=4" @@ -27,40 +26,53 @@ def format_single_article(article): if "summary" in article: for text_part in article["summary"].split("."): - if not (len(description) + len(text_part)) > 250: + if not (len(description) + len(text_part)) > 256: description += text_part + "." else: description += ".." break + elif "description" in article: + description = article["description"] + if len(description) > 2048: # Embed descriptions are limited to 2048 in Discord API + description = description[:2045] + "..." + if "source_id" in article: + source_text = f"**Source**: *{article['source']} | {article['source_id']}*" + else: + source_text = f"**Source**: *{article['source']}*" - source_text = f"**Source**: *{article['source']}*" date_text = ( - "**Date**: " + " | *".join(format_datetime(article["publish_date"])) + "*" + "**Date**: " + " | *".join(format_datetime(article["publish_date"])) + "*" ) - + if "title" in article: + title = article["title"] + elif "id" in article: + title = article["id"] if "link" in article: message = Embed( - title=article["title"], + title=title, url=article["link"], color=MAIN_COLOR, ) else: message = Embed( - title=article["title"], + title=title, color=MAIN_COLOR, ) - if description and "link" in article: - message.add_field(name=description, value=article["link"], inline=False) - - message.add_field( - name="Details: ", - value=source_text + "\n" + date_text, - inline=False, - ) - + if description: + if "link" in article: + message.add_field(name="Description", value=description, inline=False) + details_field_value =source_text + "\n" + date_text + if "vuln_status" in article: # if vulnerability status available (for CVE), append to details + details_field_value += "\n" + details_field_value += f"**Status:** {article['vuln_status']}" + message.add_field( + name="Details: ", + value= details_field_value, + inline=False, + ) else: - if article["title"]: + if title: message.set_thumbnail(url=THUMBNAIL_URL) message.add_field( @@ -68,5 +80,4 @@ def format_single_article(article): value=date_text, inline=False, ) - - return message + return message \ No newline at end of file diff --git a/config.ini b/config.ini index c281d36..31ec8e2 100644 --- a/config.ini +++ b/config.ini @@ -4,6 +4,7 @@ GovermentFeed = RansomwareFeed = TelegramFeed = StatusMessages = +CVEFeed = [Telegram] BotName =