Skip to content

Add diagnostic tool selection with optional DTOOL/DTOOLARGS flags in … #306

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
title: Add diagnostic tool selection with DTOOL and DTOOLARGS support
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
title: Add diagnostic tool selection with DTOOL and DTOOLARGS support
title: Add support to select diagnostic tool with DTOOL and DTOOLARGS

kind: feature
date: 2025-07-30
---

173 changes: 105 additions & 68 deletions public/support/mdb_operator_diagnostic_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,45 @@

set -Eeou pipefail

# Check if oc and kubectl exist
command -v oc >/dev/null 2>&1
ocExists=$?

command -v kubectl >/dev/null 2>&1
kcExists=$?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
kcExists=$?
kubectlExists=$?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I think it's better to be explicit about what exactly we are talking about. wdyt?


# Let the user override via environment variable
dToolUser=${DTOOL:-""}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kubectl and oc are not really the diagnostic tools right? they are just tools to interact with the api server. So I would recommend naming this to something else. Maybe clientTool, kubeClientTool or something like that.

dToolUserArgs=${DTOOLARGS:-""}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what exactly is the motivation to introduce this? do we want users to be able to configure the kubeconfig?



if [[ -n "$dToolUser" ]]; then
echo "[INFO] DTOOL set by user: $dToolUser"
if command -v "$dToolUser" >/dev/null 2>&1; then
dTool="$dToolUser"
else
echo "Error: $dToolUser is not installed or not in PATH." >&2
exit 1
fi
else
echo "[INFO] No DTOOL set by user."

if [[ $ocExists -eq 0 && $kcExists -eq 0 ]]; then
echo "[INFO] Both oc and kubectl are available. Defaulting to oc."
dTool="oc"
elif [[ $ocExists -eq 0 ]]; then
echo "[INFO] Only oc found. Using oc."
dTool="oc"
elif [[ $kcExists -eq 0 ]]; then
echo "[INFO] Only kubectl found. Using kubectl."
dTool="kubectl"
else
echo "Error: Neither oc nor kubectl is available. Install at least one of them." >&2
exit 1
fi
fi

echo "[INFO] Using $dTool to run commands."
#
# mdb_operator_diagnostic_data.sh
#
Expand All @@ -12,6 +51,7 @@ set -Eeou pipefail
usage() {
local script_name
script_name=$(basename "${0}")

echo "------------------------------------------------------------------------------"
echo "Usage:"
echo "${script_name} <namespace> <mdb/om_resource_name> [<operator_namespace>] [<operator_name>] [--om] [--private]"
Expand Down Expand Up @@ -49,7 +89,7 @@ usage() {
contains() {
local e match=$1
shift
for e; do [[ "${e}" == "${match}" ]] && return 0; done
for e; do [[ "$e" == "$match" ]] && return 0; done
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this change necessary? this might upset the shellcheck. or the changed version is better suited for shellcheck? 😆

return 1
}

Expand All @@ -67,120 +107,117 @@ contains "--om" "$@" && collect_om=1
if [ ${collect_om} == 1 ]; then
if [[ $3 == "--om" ]]; then
operator_namespace="${1}"
operator_name="mongodb-kubernetes-operator"
operator_name="mongodb-enterprise-operator"
om_resource_name="${2}"
elif [[ $4 == "--om" ]]; then
operator_namespace="${3}"
operator_name="mongodb-kubernetes-operator"
operator_name="mongodb-enterprise-operator"
om_resource_name="${2}"
elif [[ $5 == "--om" ]]; then
operator_namespace="${3}"
operator_name="$4"
om_resource_name="${2}"
fi
else
operator_name="${4:-mongodb-kubernetes-operator}"
operator_name="${4:-mongodb-enterprise-operator}"
operator_namespace="${3:-$1}"
fi

dump_all() {
local central=${1}
if [ "${central}" == "member" ]; then
if [ "${collect_om}" == 0 ]; then
pod_name=$(kubectl get pods -l controller -n "${namespace}" --no-headers=true | awk '{print $1}' | head -n 1)
database_container_pretty_name=$(kubectl -n "${namespace}" exec -it "${pod_name}" -- sh -c "cat /etc/*release" | grep "PRETTY_NAME" | cut -d'=' -f 2)
pod_name=$($dTool $dToolUserArgs get pods -l controller -n "${namespace}" --no-headers=true | awk '{print $1}' | head -n 1)
database_container_pretty_name=$($dTool $dToolUserArgs -n "${namespace}" exec -it "${pod_name}" -- sh -c "cat /etc/*release" | grep "PRETTY_NAME" | cut -d'=' -f 2)
echo "+ Database is running on: ${database_container_pretty_name}"
fi

statefulset_filename="statefulset.yaml"
echo "+ Saving StatefulSet state to ${statefulset_filename}"
kubectl -n "${namespace}" -l controller get "sts" -o yaml >"${log_dir}/${statefulset_filename}"
$dTool $dToolUserArgs -n "${namespace}" -l controller get "sts" -o yaml >"${log_dir}/${statefulset_filename}"

echo "+ Deployment Pods"
kubectl -n "${namespace}" get pods | grep -E "^${mdb_resource}-+"
$dTool $dToolUserArgs -n "${namespace}" get pods | grep -E "^${mdb_resource}-+"

echo "+ Saving Pods state to ${mdb_resource}-N.logs"
pods_in_namespace=$(kubectl get pods --namespace "${namespace}" --selector=controller=mongodb-enterprise-operator --no-headers -o custom-columns=":metadata.name")
pods_in_namespace=$($dTool $dToolUserArgs get pods --namespace "${namespace}" --selector=controller=mongodb-enterprise-operator --no-headers -o custom-columns=":metadata.name")

mdb_container_name="mongodb-enterprise-database"
for pod in ${pods_in_namespace}; do
if ! kubectl -n "${namespace}" get pod "${pod}" --no-headers -o custom-columns=":spec.containers[*].name" | grep -E "^${mdb_container_name}$" > /dev/null; then
continue
fi
kubectl -n "${namespace}" logs "${pod}" -c "${mdb_container_name}" --tail 2000 >"${log_dir}/${pod}.log"
kubectl -n "${namespace}" get event --field-selector "involvedObject.name=${pod}" >"${log_dir}/${pod}_events.log"
$dTool $dToolUserArgs -n "${namespace}" logs "${pod}" -c ${mdb_container_name} --tail 2000 >"${log_dir}/${pod}.log"
$dTool $dToolUserArgs -n "${namespace}" get event --field-selector "involvedObject.name=${pod}" >"${log_dir}/${pod}_events.log"
done

echo "+ Persistent Volumes"
kubectl -n "${namespace}" get pv
$dTool $dToolUserArgs -n "${namespace}" get pv

echo "+ Persistent Volume Claims"
kubectl -n "${namespace}" get pvc
$dTool $dToolUserArgs -n "${namespace}" get pvc

pv_filename="persistent_volumes.yaml"
echo "+ Saving Persistent Volumes state to ${pv_filename}"
kubectl -n "${namespace}" get pv -o yaml >"${log_dir}/${pv_filename}"
$dTool $dToolUserArgs -n "${namespace}" get pv -o yaml >"${log_dir}/${pv_filename}"

pvc_filename="persistent_volume_claims.yaml"
echo "+ Saving Persistent Volumes Claims state to ${pvc_filename}"
kubectl -n "${namespace}" get pvc -o yaml >"${log_dir}/${pvc_filename}"
$dTool $dToolUserArgs -n "${namespace}" get pvc -o yaml >"${log_dir}/${pvc_filename}"

services_filename="services.yaml"
echo "+ Services"
kubectl -n "${namespace}" get services
$dTool $dToolUserArgs -n "${namespace}" get services

echo "+ Saving Services state to ${services_filename}"
kubectl -n "${namespace}" get services -o yaml >"${log_dir}/${services_filename}"
$dTool $dToolUserArgs -n "${namespace}" get services -o yaml >"${log_dir}/${services_filename}"

echo "+ Saving Events for the Namespace"
kubectl -n "${namespace}" get events >"${log_dir}/events.log"
$dTool $dToolUserArgs -n "${namespace}" get events >"${log_dir}/events.log"

else
echo "++ MongoDB Resource Running Environment"

if [ -z "${CENTRAL_CLUSTER}" ]; then
crd_filename="crd_mdb.yaml"
echo "+ Saving MDB Customer Resource Definition into ${crd_filename}"
kubectl -n "${operator_namespace}" get crd/mongodb.mongodb.com -o yaml >"${log_dir}/${crd_filename}"
$dTool $dToolUserArgs -n "${operator_namespace}" get crd/mongodb.mongodb.com -o yaml >"${log_dir}/${crd_filename}"
mdb_resource_name="mdb/${mdb_resource}"
resource_filename="mdb_object_${mdb_resource}.yaml"
else
crd_filename="crd_mdbmc.yaml"
echo "+ Saving MDBMC Customer Resource Definition into ${crd_filename}"
echo kubectl -n "${operator_namespace}" get crd/mongodbmulticluster.mongodb.com -o yaml >"${log_dir}/${crd_filename}"
echo $dTool $dToolUserArgs -n "${operator_namespace}" get crd/mongodbmulticluster.mongodb.com -o yaml >"${log_dir}/${crd_filename}"
mdb_resource_name="mdbmc/${mdb_resource}"
resource_filename="mdbmc_object_${mdb_resource}.yaml"
fi

project_filename="project.yaml"
project_name=$(kubectl -n "${namespace}" get "${mdb_resource_name}" -o jsonpath='{.spec.opsManager.configMapRef.name}')
credentials_name=$(kubectl -n "${namespace}" get "${mdb_resource_name}" -o jsonpath='{.spec.credentials}')
project_name=$($dTool $dToolUserArgs -n "${namespace}" get "${mdb_resource_name}" -o jsonpath='{.spec.opsManager.configMapRef.name}')
credentials_name=$($dTool $dToolUserArgs -n "${namespace}" get "${mdb_resource_name}" -o jsonpath='{.spec.credentials}')

echo "+ MongoDB Resource Status"
kubectl -n "${namespace}" get "${mdb_resource_name}" -o yaml >"${log_dir}/${resource_filename}"
$dTool $dToolUserArgs -n "${namespace}" get "${mdb_resource_name}" -o yaml >"${log_dir}/${resource_filename}"

echo "+ Saving Project YAML file to ${project_filename}"
kubectl -n "${namespace}" get "configmap/${project_name}" -o yaml >"${log_dir}/${project_filename}"
credentials_user=$(kubectl -n "${namespace}" get "secret/${credentials_name}" -o jsonpath='{.data.user}' | base64 --decode)
$dTool $dToolUserArgs -n "${namespace}" get "configmap/${project_name}" -o yaml >"${log_dir}/${project_filename}"
credentials_user=$($dTool $dToolUserArgs -n "${namespace}" get "secret/${credentials_name}" -o jsonpath='{.data.user}' | base64 --decode)
echo "+ User configured is (credentials.user): ${credentials_user}"

echo "= To get the Secret Public API Key use: kubectl -n ${namespace} get secret/${credentials_name} -o jsonpath='{.data.publicApiKey}' | base64 --decode)"
echo "= To get the Secret Public API Key use: $dTool $dToolUserArgs -n ${namespace} get secret/${credentials_name} -o jsonpath='{.data.publicApiKey}' | base64 --decode)"

echo "+ Certificates (no private keys are captured)"
csr_filename="csr.text"
kubectl get csr | grep "${namespace}" || true
$dTool $dToolUserArgs get csr | grep "${namespace}" || true
echo "+ Saving Certificate state into ${csr_filename}"
kubectl describe "$(kubectl get csr -o name | grep "${namespace}")" >"${log_dir}/${csr_filename}" || true
$dTool $dToolUserArgs describe "$($dTool $dToolUserArgs get csr -o name | grep "${namespace}")" >"${log_dir}/${csr_filename}" || true

echo "++ MongoDBUser Resource Status"
mdbusers_filename="mdbu.yaml"
kubectl -n "${namespace}" get mdbu
$dTool $dToolUserArgs -n "${namespace}" get mdbu
echo "+ Saving MongoDBUsers to ${mdbusers_filename}"
kubectl -n "${namespace}" get mdbu >"${log_dir}/${mdbusers_filename}"
$dTool $dToolUserArgs -n "${namespace}" get mdbu >"${log_dir}/${mdbusers_filename}"

crdu_filename="crd_mdbu.yaml"
echo "+ Saving MongoDBUser Customer Resource Definition into ${crdu_filename}"
kubectl -n "${namespace}" get crd/mongodbusers.mongodb.com -o yaml >"${log_dir}/${crdu_filename}"
$dTool $dToolUserArgs -n "${namespace}" get crd/mongodbusers.mongodb.com -o yaml >"${log_dir}/${crdu_filename}"
fi

}
Expand All @@ -202,7 +239,7 @@ if [ -n "${CENTRAL_CLUSTER}" ]; then
exit 1
else
echo "starting with the CENTRAL_CLUSTER!"
kubectl config use-context "${CENTRAL_CLUSTER}"
$dTool $dToolUserArgs config use-context "${CENTRAL_CLUSTER}"
fi
fi

Expand All @@ -213,36 +250,36 @@ if [ -n "${MEMBER_CLUSTERS}" ]; then
fi
fi

if ! kubectl get "namespace/${namespace}" &>/dev/null; then
if ! $dTool $dToolUserArgs get "namespace/${namespace}" &>/dev/null; then
echo "Error fetching namespace. Make sure name ${namespace} for Namespace is correct."
exit 1
fi

if [ ${collect_om} == 0 ]; then
if [ -z "${CENTRAL_CLUSTER}" ]; then
if ! kubectl -n "${namespace}" get "mdb/${mdb_resource}" &>/dev/null; then
if ! $dTool $dToolUserArgs -n "${namespace}" get "mdb/${mdb_resource}" &>/dev/null; then
echo "Error fetching the MongoDB resource. Make sure the '${namespace}/${mdb_resource}' is correct."
exit 1
fi
else
if ! kubectl -n "${namespace}" get "mdbmc/${mdb_resource}" &>/dev/null; then
if ! $dTool $dToolUserArgs -n "${namespace}" get "mdbmc/${mdb_resource}" &>/dev/null; then
echo "Error fetching the MongoDB MultiCluster resource. Make sure the '${namespace}/${mdb_resource}' is correct."
exit 1
fi
fi
fi

echo "++ Versions"
mdb_operator_pod=$(kubectl -n "${operator_namespace}" get pods -l "app.kubernetes.io/component=controller" -o name | cut -d'/' -f 2)
mdb_operator_pod=$($dTool $dToolUserArgs -n "${operator_namespace}" get pods -l "app.kubernetes.io/component=controller" -o name | cut -d'/' -f 2)
echo "${operator_namespace}"
echo "+ Operator Pod: pod/${mdb_operator_pod}"

if ! kubectl -n "${operator_namespace}" get "pod/${mdb_operator_pod}" &>/dev/null; then
if ! $dTool $dToolUserArgs -n "${operator_namespace}" get "pod/${mdb_operator_pod}" &>/dev/null; then
echo "Error fetching the MongoDB Operator Deployment. Make sure the pod/${mdb_operator_pod} exist and it is running."
exit 1
fi

if ! kubectl -n "${namespace}" get om -o wide &>/dev/null; then
if ! $dTool $dToolUserArgs -n "${namespace}" get om -o wide &>/dev/null; then
echo "Error fetching the MongoDB OpsManager Resource."
fi

Expand All @@ -252,10 +289,10 @@ fi

mdb_operator_filename="operator.yaml"
echo "+ Saving Operator Deployment into ${mdb_operator_filename}"
kubectl -n "${operator_namespace}" get "pod/${mdb_operator_pod}" -o yaml >"${log_dir}/${mdb_operator_filename}"
$dTool $dToolUserArgs -n "${operator_namespace}" get "pod/${mdb_operator_pod}" -o yaml >"${log_dir}/${mdb_operator_filename}"

echo "+ Kubernetes Version Reported by kubectl"
kubectl version
echo "+ Kubernetes Version Reported by $dTool"
$dTool $dToolUserArgs version

if type oc &>/dev/null; then
echo "+ Kubernetes Version Reported by oc"
Expand All @@ -264,42 +301,42 @@ fi

operator_logs_filename="${operator_name}_${current_date}.logs"
echo "+ Saving Operator logs to file ${operator_logs_filename}"
kubectl -n "${operator_namespace}" logs "pod/${mdb_operator_pod}" --tail 2000 >"${log_dir}/${operator_logs_filename}"
$dTool $dToolUserArgs -n "${operator_namespace}" logs "pod/${mdb_operator_pod}" --tail 2000 >"${log_dir}/${operator_logs_filename}"

operator_container_pretty_name=$(kubectl -n "${operator_namespace}" exec -it "${mdb_operator_pod}" -- sh -c "cat /etc/*release" | grep "PRETTY_NAME" | cut -d'=' -f 2)
operator_container_pretty_name=$($dTool $dToolUserArgs -n "${operator_namespace}" exec -it "${mdb_operator_pod}" -- sh -c "cat /etc/*release" | grep "PRETTY_NAME" | cut -d'=' -f 2)
echo "+ Operator is running on: ${operator_container_pretty_name}"

echo "++ Kubernetes Cluster Ecosystem"
echo "+ Kubectl Cluster Information"
kubectl cluster-info
echo "+ $dTool Cluster Information"
$dTool $dToolUserArgs cluster-info

if [ ${private_mode} == 0 ]; then
kubectl_cluster_info_filename="kubectl_cluster_info_${current_date}.logs"
echo "+ Saving Cluster Info to file ${kubectl_cluster_info_filename} (this might take a few minutes)"
kubectl cluster-info dump | gzip >"${log_dir}/${kubectl_cluster_info_filename}.gz"
$dTool_cluster_info_filename="$dTool_cluster_info_${current_date}.logs"
echo "+ Saving Cluster Info to file ${$dTool_cluster_info_filename} (this might take a few minutes)"
$dTool $dToolUserArgs cluster-info dump | gzip >"${log_dir}/${$dTool_cluster_info_filename}.gz"
else
echo "= Skipping Kubectl cluster information dump, use --private to enable."
echo "= Skipping $dTool cluster information dump, use --private to enable."
fi

kubectl_sc_dump_filename="kubectl_storage_class_${current_date}.yaml"
kubectl get storageclass -o yaml >"${log_dir}/${kubectl_sc_dump_filename}"
dTool_sc_dump_filename="${dTool}_storage_class_${current_date}.yaml"
$dTool $dToolUserArgs get storageclass -o yaml >"${log_dir}/${dTool_sc_dump_filename}"

nodes_filename="nodes.yaml"
echo "+ Nodes"
kubectl get nodes
$dTool $dToolUserArgs get nodes

echo "+ Saving Nodes full state to ${nodes_filename}"
kubectl get nodes -o yaml >"${log_dir}/${nodes_filename}"
$dTool $dToolUserArgs get nodes -o yaml >"${log_dir}/${nodes_filename}"

if [ ${collect_om} == 0 ]; then
if [ -n "${CENTRAL_CLUSTER}" ]; then
for member_cluster in ${MEMBER_CLUSTERS}; do
echo "Dumping diagnostics for context ${member_cluster}"
kubectl config use-context "${member_cluster}"
$dTool $dToolUserArgs config use-context "${member_cluster}"
dump_all "member"
done
echo "Dumping diagnostics for context ${CENTRAL_CLUSTER}"
kubectl config use-context "${CENTRAL_CLUSTER}"
$dTool $dToolUserArgs config use-context "${CENTRAL_CLUSTER}"
dump_all "central"
else
dump_all "member"
Expand All @@ -310,24 +347,24 @@ fi
if [ ${collect_om} == 1 ]; then
ops_manager_filename="ops_manager.yaml"
echo "+ Saving OpsManager Status"
kubectl -n "${namespace}" get om -o wide
$dTool $dToolUserArgs -n "${namespace}" get om -o wide
echo "+ Saving OpsManager Status to ${ops_manager_filename}"
kubectl -n "${namespace}" get om -o yaml >"${log_dir}/${ops_manager_filename}"
$dTool $dToolUserArgs -n "${namespace}" get om -o yaml >"${log_dir}/${ops_manager_filename}"
echo "+ Saving Pods state to ${om_resource_name}-N.logs"
pods_in_namespace=$(kubectl -n "${namespace}" get pods -o name -l "app=${om_resource_name}-svc" | cut -d'/' -f 2)
pods_in_namespace=$($dTool $dToolUserArgs -n "${namespace}" get pods -o name -l "app=${om_resource_name}-svc" | cut -d'/' -f 2)
for pod in ${pods_in_namespace}; do
kubectl -n "${namespace}" logs "${pod}" --tail 2000 >"${log_dir}/${pod}.log"
$dTool $dToolUserArgs -n "${namespace}" logs "${pod}" --tail 2000 >"${log_dir}/${pod}.log"
echo "Collecting Events: ${pod}"
kubectl -n "${namespace}" get event --field-selector "involvedObject.name=${pod}" >"${log_dir}/${pod}_events.log"
$dTool $dToolUserArgs -n "${namespace}" get event --field-selector "involvedObject.name=${pod}" >"${log_dir}/${pod}_events.log"
done
echo "+ Saving AppDB Pods state to ${om_resource_name}-db-N-<container_name>.logs"
pods_in_namespace=$(kubectl -n "${namespace}" get pods -o name -l "app=${om_resource_name}-db-svc" | cut -d'/' -f 2)
pods_in_namespace=$($dTool $dToolUserArgs -n "${namespace}" get pods -o name -l "app=${om_resource_name}-db-svc" | cut -d'/' -f 2)
for pod in ${pods_in_namespace}; do
kubectl -n "${namespace}" logs "${pod}" -c "mongod" --tail 2000 >"${log_dir}/${pod}-mongod.log"
kubectl -n "${namespace}" logs "${pod}" -c "mongodb-agent" --tail 2000 >"${log_dir}/${pod}-mongodb-agent.log"
kubectl -n "${namespace}" logs "${pod}" -c "mongodb-agent-monitoring" --tail 2000 >"${log_dir}/${pod}-mongodb-agent-monitoring.log"
$dTool $dToolUserArgs -n "${namespace}" logs "${pod}" -c "mongod" --tail 2000 >"${log_dir}/${pod}-mongod.log"
$dTool $dToolUserArgs -n "${namespace}" logs "${pod}" -c "mongodb-agent" --tail 2000 >"${log_dir}/${pod}-mongodb-agent.log"
$dTool $dToolUserArgs -n "${namespace}" logs "${pod}" -c "mongodb-agent-monitoring" --tail 2000 >"${log_dir}/${pod}-mongodb-agent-monitoring.log"
echo "Collecting Events: ${pod}"
kubectl -n "${namespace}" get event --field-selector "involvedObject.name=${pod}" >"${log_dir}/${pod}_events.log"
$dTool $dToolUserArgs -n "${namespace}" get event --field-selector "involvedObject.name=${pod}" >"${log_dir}/${pod}_events.log"
done
fi

Expand Down