This repository has been archived by the owner on Sep 1, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Andrey Ustyuzhanin
committed
Aug 16, 2016
1 parent
5570bc8
commit 1d4ec81
Showing
11 changed files
with
379 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
srv/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# Makefile for building & starting everware-containers | ||
# arguments can be supplied by -e: | ||
# | ||
# IMAGE -- name of image to use | ||
# | ||
|
||
DOCKER_CMD=docker | ||
DOCKER_CMD_NODE=sudo docker -H tcp://0.0.0.0:2375 | ||
PSSH=parallel-ssh -O StrictHostKeyChecking=no | ||
CLUSTER_HOSTS=etc/cluster.txt | ||
IMAGE ?= yandex/rep:0.6.5 | ||
HERE:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) | ||
HEADHOST=head.haze.yandex.net | ||
SRVDIR=srv | ||
|
||
|
||
include swarm.makefile | ||
include docker.makefile | ||
include system.makefile | ||
include bosun.makefile | ||
|
||
help: | ||
@echo Usage: make [-e VARIABLE=VALUE] targets | ||
@echo "variables:" | ||
@grep -h "#\s\+\w\+ -- " $(MAKEFILE_LIST) |sed "s/#\s//" | ||
@echo | ||
@echo targets and corresponding dependencies: | ||
@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' -e 's/^/ /' | sed -e 's/##//' | ||
|
||
|
||
$(SRVDIR): ## create srv for etcd & scollector | ||
[ -d $(SRVDIR) ] || mkdir -p $(SRVDIR) | ||
|
||
uptime: ## uptime cluster | ||
${PSSH} -h ${CLUSTER_HOSTS} -i uptime | ||
|
||
pull: ## pull image to cluster nodes | ||
${PSSH} -h ${CLUSTER_HOSTS} -i -t 0 ${DOCKER_CMD_NODE} pull ${IMAGE} | ||
|
||
ps-user-containers: ${CLUSTER_HOSTS} ## list container running on the cluster | ||
${PSSH} -h ${CLUSTER_HOSTS} -i '${DOCKER_CMD_NODE} ps -a' | ||
|
||
count-user-containers: ${CLUSTER_HOSTS} ## count container running on the cluster | ||
${PSSH} -h ${CLUSTER_HOSTS} -i '${DOCKER_CMD_NODE} ps | grep -v "CONTAINER ID" | wc -l' | ||
|
||
images: ${CLUSTER_HOSTS} ## list images created at clusters | ||
${PSSH} -h ${CLUSTER_HOSTS} -i '${DOCKER_CMD_NODE} images' | ||
|
||
rm-images: ${CLUSTER_HOSTS} ## remove all images | ||
${PSSH} -h ${CLUSTER_HOSTS} -i '${DOCKER_CMD_NODE} images -q | xargs ${DOCKER_CMD_NODE} rmi' | ||
|
||
rm-user-containers: ${CLUSTER_HOSTS} ## stop & remove user containers | ||
${PSSH} -h ${CLUSTER_HOSTS} --timeout=0 -i '${DOCKER_CMD_NODE} ps -aq|xargs --no-run-if-empty ${DOCKER_CMD_NODE} rm -f' | ||
|
||
df: ${CLUSTER_HOSTS} ## check disk free space on cluster nodes | ||
${PSSH} -h ${CLUSTER_HOSTS} -i df -h / | ||
|
||
mdu: ## mfs du | ||
du -m --max-depth 1 /mnt/shared |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
|
||
BOSUN_IMAGE = stackexchange/bosun | ||
SCOLLECTOR_SHARED = /mnt/shared/scollector | ||
|
||
|
||
bosun-start: ## start monitoring (bosun) | ||
${DOCKER_CMD} run -d -p 4242:4242 -p 8070:8070 --name bosun ${BOSUN_IMAGE} | ||
|
||
bosun-rm: | ||
docker rm -f bosun | ||
|
||
bosun-stop: | ||
docker stop bosun | ||
|
||
bosun-restart: | ||
docker restart bosun | ||
|
||
bosun-exec: | ||
docker exec -ti bosun bash | ||
|
||
bosun-update-conf: | ||
docker cp etc/bosun.conf bosun:/data/bosun.conf | ||
docker restart bosun | ||
|
||
scollector-install: ${CLUSTER_HOSTS} ${SRVDIR} | ||
[ -f ${SRVDIR}/scollector-linux ] || ( \ | ||
wget https://github.com/bosun-monitor/bosun/releases/download/0.5.0/scollector-linux-386 \ | ||
-O ${SRVDIR}/scollector-linux ; \ | ||
chmod +x ${SRVDIR}/scollector-linux ; \ | ||
) | ||
sudo cp ${SRVDIR}/scollector-linux etc/scollector_supervisord.conf etc/scollector.toml ${SCOLLECTOR_SHARED} | ||
sudo cp -r scollector_metrics ${SCOLLECTOR_SHARED} | ||
sudo sed -i -e "s/#HEAD#/${HEADHOST}/" -e "s|#BASE#|${SCOLLECTOR_SHARED}|" \ | ||
${SCOLLECTOR_SHARED}/scollector_supervisord.conf ${SCOLLECTOR_SHARED}/scollector.toml | ||
${PSSH} -h ${CLUSTER_HOSTS} -H ${HEADHOST} -i 'sudo cp ${SCOLLECTOR_SHARED}/scollector_supervisord.conf /etc/supervisor/conf.d; \ | ||
sudo supervisorctl reload' | ||
|
||
scollector-reload: ${CLUSTER_HOSTS} | ||
${PSSH} -h ${CLUSTER_HOSTS} -H ${HEADHOST} -i "sudo supervisorctl reload" | ||
|
||
scollector-start: ${CLUSTER_HOSTS} | ||
${PSSH} -h ${CLUSTER_HOSTS} -H ${HEADHOST} -i "sudo supervisorctl start scollector" | ||
|
||
scollector-check: ${CLUSTER_HOSTS} | ||
${PSSH} -h ${CLUSTER_HOSTS} -H ${HEADHOST} -i 'pgrep -f scollector-linux' | ||
|
||
scollector-stop: | ||
${PSSH} -h ${CLUSTER_HOSTS} -H ${HEADHOST} -i 'sudo supervisorctl stop scollector' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
|
||
DOCKER_OPTS = --ipv6 -H tcp://[::]:2375 -H unix:///var/run/docker.sock --fixed-cidr-v6=fc00::/64 | ||
DOCKER_CONFIG = /etc/default/docker | ||
DOCKER_VERSION = 1.11.2-0~trusty | ||
|
||
install_docker: pdocker-repo pdocker-install pdocker-version | ||
|
||
pdocker-repo: ${CLUSTER_HOSTS} | ||
${PSSH} -h ${CLUSTER_HOSTS} -i 'echo "deb https://apt.dockerproject.org/repo ubuntu-trusty main" | sudo tee /etc/apt/sources.list.d/docker.list; cat /etc/apt/sources.list.d/docker.list' | ||
${PSSH} -h ${CLUSTER_HOSTS} 'sudo apt-get update' | ||
|
||
pdocker-install: ${CLUSTER_HOSTS} | ||
${PSSH} -h ${CLUSTER_HOSTS} -i 'sudo apt-get install -y --force-yes docker-engine=${DOCKER_VERSION}' | ||
|
||
pdocker-version: ${CLUSTER_HOSTS} | ||
${PSSH} -h ${CLUSTER_HOSTS} -i "${DOCKER_CMD_NODE} version" | ||
|
||
pdocker-config: ${CLUSTER_HOSTS} | ||
${PSSH} -h ${CLUSTER_HOSTS} -i "sudo sed -i '/^DOCKER_OPTS/d' ${DOCKER_CONFIG} ; echo DOCKER_OPTS=\'${DOCKER_OPTS}\' | sudo tee -a ${DOCKER_CONFIG}" | ||
|
||
pdocker-restart: ${CLUSTER_HOSTS} | ||
${PSSH} -h ${CLUSTER_HOSTS} -i "sudo service docker restart" | ||
|
||
pdocker-stop: ## stop dockers on cluster | ||
${PSSH} -h ${CLUSTER_HOSTS} -i "sudo service docker stop" | ||
|
||
pdocker-start: ## start dockers on cluster | ||
${PSSH} -h ${CLUSTER_HOSTS} -i "sudo service docker start" | ||
|
||
pdocker-remove: ## remove docker directories | ||
${PSSH} -h ${CLUSTER_HOSTS} -i sudo rm -rf /var/lib/docker | ||
|
||
pdocker-check: ${CLUSTER_HOSTS} ## check docker version | ||
${PSSH} -h ${CLUSTER_HOSTS} -i "dpkg -l | grep docker && ps ax|grep 'docker daemon'|grep -v grep" | ||
|
||
pdocker-clean-key: ${CLUSTER_HOSTS} ## remove docker key (should be different for different nodes, otherwise swarm doesn't fly) | ||
${PSSH} -h ${CLUSTER_HOSTS} -i 'sudo rm -f /etc/docker/key.json && sudo service docker restart' | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
tsdbHost = localhost:4242 | ||
stateFile = /data/bosun.state | ||
ledisBindAddr = 0.0.0.0:9565 | ||
smtpHost = mx.yandex-team.ru:25 | ||
emailFrom = [email protected] | ||
|
||
|
||
template test { | ||
subject = {{.Last.Status}}: {{.Alert.Name}} on {{.Group.host}} | ||
body = `<p>Alert: {{.Alert.Name}} triggered on {{.Group.host}} | ||
<hr> | ||
<p><strong>Computation</strong> | ||
<table> | ||
{{range .Computations}} | ||
<tr><td><a href="{{$.Expr .Text}}">{{.Text}}</a></td><td>{{.Value}}</td></tr> | ||
{{end}} | ||
</table> | ||
<hr> | ||
{{ .Graph .Alert.Vars.metric }} | ||
<hr> | ||
<p><strong>Relevant Tags</strong> | ||
<table> | ||
{{range $k, $v := .Group}} | ||
<tr><td>{{$k}}</td><td>{{$v}}</td></tr> | ||
{{end}} | ||
</table>` | ||
} | ||
|
||
|
||
# email sysadmins and Nick each day until ack'd | ||
notification default { | ||
email = [email protected] | ||
next = default | ||
timeout = 1d | ||
} | ||
|
||
alert cpu.is.too.high { | ||
warnNotification = default | ||
template = test | ||
$metric = q("sum:rate{counter,,1}:os.cpu{host=*}", "1h", "") | ||
$avgcpu = avg($metric) | ||
crit = $avgcpu > 80 | ||
warn = $avgcpu > 50 | ||
} | ||
|
||
alert sockets.is.too.high { | ||
warnNotification = default | ||
template = test | ||
$metric = q("sum:linux.net.sockets.used{host=*}", "1h", "") | ||
$avgtime = avg($metric) | ||
crit = $avgtime > 30000 | ||
warn = $avgtime > 10000 | ||
} | ||
|
||
alert docker.containers.active.dev.high { | ||
warnNotification = default | ||
template = test | ||
$metric = q("dev:3m-avg:docker.contatiners.active", "1h", "") | ||
$maxdev = max($metric) | ||
warn = $maxdev > 2 | ||
crit = $maxdev > 3 | ||
} | ||
|
||
alert docker.containers.more.than.cpus { | ||
warnNotification = default | ||
template = test | ||
$metric = q("max:docker.contatiners.active{host=*}", "30m", "") | ||
$containers_hosts = max($metric) | ||
warn = $containers_hosts > 12 | ||
crit = $containers_hosts > 16 | ||
} | ||
|
||
alert disk.free.space.too.low { | ||
warnNotification = default | ||
template = test | ||
$metric = q("min:os.disk.fs.percent_free{host=*}", "1h", "") | ||
$containers_hosts = min($metric) | ||
warn = $containers_hosts < 50 | ||
crit = $containers_hosts < 10 | ||
} | ||
|
||
alert mem.free.too.low { | ||
warnNotification = default | ||
template = test | ||
$metric = q("min:os.mem.percent_free{host=*}", "10m", "") | ||
$containers_hosts = min($metric) | ||
warn = $containers_hosts < 10 | ||
crit = $containers_hosts < 5 | ||
} | ||
|
||
alert traffic.in.is.too.high { | ||
warnNotification = default | ||
template = test | ||
$metric = q("sum:rate:linux.net.stat.ip.inoctets{host=*}", "1h", "") | ||
$avgcpu = max($metric) | ||
crit = $avgcpu > 200000000 | ||
warn = $avgcpu > 10000000 | ||
} | ||
|
||
alert traffic.out.is.too.high { | ||
warnNotification = default | ||
template = test | ||
$metric = q("sum:rate:linux.net.stat.ip.outoctets{host=*}", "1h", "") | ||
$avgcpu = max($metric) | ||
crit = $avgcpu > 200000000 | ||
warn = $avgcpu > 10000000 | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
w-1.haze.yandex.net | ||
w-2.haze.yandex.net | ||
w-3.haze.yandex.net |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
ColDir = "#BASE#/scollector_metrics" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
[program:scollector] | ||
command=#BASE#/scollector-linux -d -h #HEAD#:8070 | ||
autostart=true | ||
autorestart=true | ||
stderr_logfile=/var/log/scollector.err.log | ||
stdout_logfile=/var/log/scollector.out.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/bin/bash | ||
|
||
s=`date +%s` | ||
echo "docker.images $s `docker images -q|wc -l`" | ||
echo "docker.contatiners.active $s `docker ps -q|wc -l`" | ||
echo "docker.contatiners.all $s `docker ps -a -q|wc -l`" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
|
||
SWARM_PORT=4000 | ||
SWARM_OPT=-H tcp://${HEADHOST}:${SWARM_PORT} | ||
SWARM_IMAGE=swarm:1.2.3 | ||
CLUSTER_SWARM=etc/_cluster_swarm.txt | ||
ETCD=etcd-v2.2.5-linux-amd64 | ||
|
||
etcd-start: $(SRVDIR) | ||
[ -d $(SRVDIR)/$(ETCD) ] || ( curl -L https://github.com/coreos/etcd/releases/download/v2.2.5/$(ETCD).tar.gz -o $(SRVDIR)/$(ETCD).tar.gz ; \ | ||
tar xzf $(SRVDIR)/$(ETCD).tar.gz -C $(SRVDIR) ) | ||
cd $(SRVDIR)/$(ETCD) && \ | ||
nohup ./etcd \ | ||
-initial-advertise-peer-urls http://${HEADHOST}:2380 \ | ||
-listen-peer-urls="http://0.0.0.0:2380,http://0.0.0.0:7001" \ | ||
-listen-client-urls="http://0.0.0.0:2379,http://0.0.0.0:4001" \ | ||
-advertise-client-urls="http://${HEADHOST}:2379" \ | ||
-initial-cluster-token etcd-01 \ | ||
-initial-cluster="default=http://${HEADHOST}:2380" \ | ||
-initial-cluster-state new > etcd.log & | ||
sleep 1 | ||
tail $(SRVDIR)/$(ETCD)/etcd.log | ||
|
||
etcd-stop: | ||
pkill -9 etcd | ||
|
||
etcd-check: | ||
curl -L -g http://${HEADHOST}:2379/v2/keys/?recursive=true | json_pp | ||
|
||
${CLUSTER_SWARM}: ${CLUSTER_HOSTS} | ||
# cat ${CLUSTER_HOSTS} | sed 's/$$/:2375/' > ${CLUSTER_SWARM} | ||
|
||
swarm-check: etcd-check swarm-info | ||
@echo "OK" | ||
|
||
_swarm-check-master-stopped: | ||
@if [[ `${DOCKER_CMD} ps | grep swarm` ]] ; then echo "swarm master is already running" ; exit 1; fi | ||
|
||
swarm-start-master: _swarm-check-master-stopped ${CLUSTER_SWARM} ## start swarm master | ||
# ${DOCKER_CMD} run -v ${HERE}:/cfg -d -p ${SWARM_PORT}:2375 --name=swarm_master ${SWARM_IMAGE} manage --strategy random file:///cfg/${CLUSTER_SWARM} | ||
${DOCKER_CMD} run -d -p ${SWARM_PORT}:2375 --name=swarm_master ${SWARM_IMAGE} manage --strategy random etcd://${HEADHOST}:2379 | ||
|
||
swarm-stop-master: ## stop swarm master | ||
if ${DOCKER_CMD} ps -a | grep swarm_master ; then \ | ||
${DOCKER_CMD} rm -f swarm_master ; \ | ||
fi | ||
|
||
swarm-logs: | ||
${DOCKER_CMD} logs swarm_master | ||
|
||
swarm-restart-master: swarm-stop-master swarm-start-master ## restart swarm master | ||
|
||
swarm-stop: swarm-unregister-nodes swarm-stop-master etcd-stop | ||
@echo Stop OK | ||
|
||
swarm-start: etcd-start swarm-start-master swarm-register-nodes | ||
@echo Start OK | ||
|
||
swarm-restart: swarm-stop swarm-start | ||
@echo Restart OK | ||
|
||
swarm-info: ## check swarm | ||
${DOCKER_CMD} ${SWARM_OPT} info | ||
|
||
swarm-ps: ## list containers running in swarm | ||
${DOCKER_CMD} ${SWARM_OPT} ps | ||
|
||
swarm-psa: ## list all containers in swarm | ||
${DOCKER_CMD} ${SWARM_OPT} ps -a | ||
|
||
swarm-register-nodes: | ||
${PSSH} -h ${CLUSTER_HOSTS} -i 'MYIP=$$(host `hostname -f`| awk "{print \$$5}") ; docker run --name swarm_node -d ${SWARM_IMAGE} join --advertise=[$$MYIP]:2375 etcd://${HEADHOST}:2379' | ||
|
||
swarm-unregister-nodes: | ||
${PSSH} -h ${CLUSTER_HOSTS} -i 'if docker ps -a|grep swarm_node ; then docker rm -f swarm_node ; fi' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
|
||
NAMESERVER = 2a02:6b8:0:3400::1023 | ||
install-dns: | ||
${PSSH} -h ${CLUSTER_HOSTS} -H ${HEADHOST} -i 'echo "debconf resolvconf/linkify-resolvconf select true" | \ | ||
sudo debconf-set-selections && sudo dpkg-reconfigure -f noninteractive resolvconf ; \ | ||
sudo resolvconf --disable-updates && (sudo resolvconf --updates-are-enabled && echo Hmm || echo OK) ; \ | ||
sudo sed -i -e "\$$ a nameserver ${NAMESERVER}" -e "/^nameserver/ d" /etc/resolv.conf \ | ||
' | ||
${PSSH} -h ${CLUSTER_HOSTS} -i "sudo ip6tables -t nat -L POSTROUTING | grep MASQ || sudo ip6tables -t nat -I POSTROUTING -j MASQUERADE" | ||
|
||
install-nfs-server: | ||
sudo apt-get install -y nfs-kernel-server | ||
mount | grep /mnt/shared | ||
sudo sed -i -e "\$$ a /mnt/shared *(rw,sync,no_root_squash)" -e "/^\/mnt\/shared/ d" /etc/exports | ||
sudo service nfs-kernel-server start | ||
|
||
install-nfs-client: | ||
# ${PSSH} -h ${CLUSTER_HOSTS} -i 'sudo sed -i -e "\$$ a ${HEADHOST}:/mnt/shared /mnt/shared nfs rsize=8192,wsize=8192,timeo=14,intr" | ||
${PSSH} -h ${CLUSTER_HOSTS} -i 'sudo sed -i -e "\$$ a ${HEADHOST}:/mnt/shared /mnt/shared nfs rsize=8192,wsize=8192,timeo=14,intr" \ | ||
-e "/^${HEADHOST}/ d" /etc/fstab ; \ | ||
sudo apt-get install -y nfs-common ; \ | ||
sudo mkdir -p /mnt/shared ; \ | ||
sudo mount /mnt/shared' | ||
|
||
install-supervisor: ${CLUSTER_HOSTS} | ||
${PSSH} -h ${CLUSTER_HOSTS} -H ${HEADHOST} -i 'sudo apt-get install -y --force-yes supervisor' | ||
|
||
system-check: | ||
${PSSH} -h ${CLUSTER_HOSTS} -H ${HEADHOST} -i "sudo ip6tables -t nat -L POSTROUTING | grep MASQ && \ | ||
grep 'nameserver ${NAMESERVER}' /etc/resolv.conf && \ | ||
test -d /mnt/shared/data && \ | ||
dpkg -l | grep supervisor \ | ||
" | ||
|