exploitdb.py

#!/usr/bin/env python
import argparse
import json
from pathlib import Path
import requests
from bs4 import BeautifulSoup
import urllib3

__version__ = "1.1.1"

def extract_dork(url_title: str) -> str:
    """Extracts and cleans the dork from the given HTML string."""
    cleaned_title = url_title.replace("\t", "")
    soup = BeautifulSoup(cleaned_title, "html.parser")
    link = soup.find("a")
    if link:
        return link.get_text(strip=True)
    else:
        print(f"[-] No link found in url_title: {url_title}")
        return ""

def retrieve_google_dorks(
    save_json_response_to_file=False,
    save_all_dorks_to_file=False,
    save_individual_categories_to_files=False,
):
    """Retrieves Google dorks and optionally saves them in various formats."""
    url = "https://www.exploit-db.com/google-hacking-database"
    headers = {
        "Accept": "application/json, text/javascript, */*; q=0.01",
        "Accept-Encoding": "deflate, gzip, br",
        "Accept-Language": "en-US",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0",
        "X-Requested-With": "XMLHttpRequest",
    }

    print(f"[+] Requesting URL: {url}")
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        print("[+] Successfully retrieved the data.")
    except requests.exceptions.SSLError:
        print("[!] SSL Error encountered. Retrying without SSL verification.")
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        response = requests.get(url, headers=headers, timeout=10, verify=False)
        if response.status_code == 200:
            print("[+] Successfully retrieved the data without SSL verification.")
        else:
            print(f"[-] Error retrieving google dorks from: {url} (Status code: {response.status_code})")
            return
    except requests.exceptions.RequestException as e:
        print(f"[-] Error retrieving google dorks from: {url} ({e})")
        return

    try:
        json_response = response.json()
        print("[+] JSON response successfully parsed.")
    except json.JSONDecodeError:
        print("[-] Failed to decode JSON response.")
        return

    total_dorks = json_response.get("recordsTotal", 0)
    json_dorks = json_response.get("data", [])
    print(f"[+] Total dorks in response: {total_dorks}")

    extracted_dorks = [extract_dork(dork["url_title"]) for dork in json_dorks]
    category_dict = {}

    for dork in json_dorks:
        dork_text = extract_dork(dork["url_title"])
        if not dork_text:
            continue  # Skip if dork extraction failed
        category_id = int(dork["category"]["cat_id"])
        category_name = dork["category"]["cat_title"]

        category = category_dict.setdefault(category_id, {
            "category_name": category_name,
            "dorks": []
        })
        category["dorks"].append(dork)

    dorks_path = Path("dorks")
    dorks_path.mkdir(exist_ok=True)
    print(f"[+] 'dorks' directory is ready at: {dorks_path.resolve()}")

    if save_individual_categories_to_files:
        print("[*] Saving individual category files...")
        for cat_id in sorted(category_dict):
            category = category_dict[cat_id]
            print(f"[*] Category {cat_id} ('{category['category_name']}') has {len(category['dorks'])} dorks")

            # **Change made here: Use .txt extension instead of .dorks**
            file_name = dorks_path / f"{category['category_name'].lower().replace(' ', '_')}.txt"
            print(f"[*] Writing to {file_name}")
            dorks_to_write = [extract_dork(d["url_title"]) for d in category["dorks"]]
            with file_name.open("w", encoding="utf-8") as fh:
                fh.write("\n".join(dorks_to_write) + "\n")

    if save_json_response_to_file:
        json_file = dorks_path / "all_google_dorks.json"
        print(f"[*] Writing all dorks to {json_file}")
        with json_file.open("w", encoding="utf-8") as jf:
            json.dump(json_dorks, jf, indent=4)

    if save_all_dorks_to_file:
        txt_file = dorks_path / "all_google_dorks.txt"
        print(f"[*] Writing all dorks to {txt_file}")
        with txt_file.open("w", encoding="utf-8") as tf:
            tf.write("\n".join(filter(None, extracted_dorks)) + "\n")

    print(f"[*] Total Google dorks retrieved: {total_dorks}")

    return {
        "total_dorks": total_dorks,
        "extracted_dorks": extracted_dorks,
        "category_dict": category_dict,
    }

if __name__ == "__main__":
    categories = {
        1: "Footholds",
        2: "File Containing Usernames",
        3: "Sensitives Directories",
        4: "Web Server Detection",
        5: "Vulnerable Files",
        6: "Vulnerable Servers",
        7: "Error Messages",
        8: "File Containing Juicy Info",
        9: "File Containing Passwords",
        10: "Sensitive Online Shopping Info",
        11: "Network or Vulnerability Data",
        12: "Pages Containing Login Portals",
        13: "Various Online devices",
        14: "Advisories and Vulnerabilities",
    }

    epilog = f"Dork categories:\n\n{json.dumps(categories, indent=4)}"

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=(
            f"GHDB Scraper v{__version__} - Retrieve Google Hacking Database dorks from "
            "https://www.exploit-db.com/google-hacking-database."
        ),
        epilog=epilog,
    )

    parser.add_argument(
        "-i",
        action="store_true",
        help="Write all the individual dork categories types to separate files.",
    )
    parser.add_argument(
        "-j",
        action="store_true",
        help="Save GHDB json response to all_google_dorks.json",
    )
    parser.add_argument(
        "-s",
        action="store_true",
        help="Save all the Google dorks to all_google_dorks.txt",
    )

    args = parser.parse_args()
    retrieve_google_dorks(
        save_individual_categories_to_files=args.i,
        save_json_response_to_file=args.j,
        save_all_dorks_to_file=args.s,
    )