Skip to content

Commit 7a1b3d8

Browse files
authored
Merge pull request netdata#2857 from ktsaou/master
add tcp ListenOverflows and ListenDrops chart and alarm
2 parents 934c7d2 + c8a80bb commit 7a1b3d8

File tree

5 files changed

+79
-3
lines changed

5 files changed

+79
-3
lines changed

conf.d/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ dist_healthconfig_DATA = \
102102
health.d/softnet.conf \
103103
health.d/squid.conf \
104104
health.d/swap.conf \
105+
health.d/tcp_listen.conf \
105106
health.d/tcp_resets.conf \
106107
health.d/udp_errors.conf \
107108
health.d/varnish.conf \

conf.d/health.d/tcp_listen.conf

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# -----------------------------------------------------------------------------
2+
# tcp listen sockets issues
3+
4+
alarm: 1m_ipv4_tcp_listen_overflows
5+
on: ipv4.tcplistenissues
6+
os: linux
7+
hosts: *
8+
lookup: sum -60s unaligned absolute of ListenOverflows
9+
units: overflows
10+
every: 10s
11+
crit: $this > 0
12+
delay: up 0 down 5m multiplier 1.5 max 1h
13+
info: the number of TCP listen socket overflows during the last minute
14+
to: sysadmin
15+
16+
alarm: 1m_ipv4_tcp_listen_drops
17+
on: ipv4.tcplistenissues
18+
os: linux
19+
hosts: *
20+
lookup: sum -60s unaligned absolute of ListenDrops
21+
units: drops
22+
every: 10s
23+
crit: $this > 0
24+
delay: up 0 down 5m multiplier 1.5 max 1h
25+
info: the number of TCP listen socket drops during the last minute
26+
to: sysadmin
27+

conf.d/health.d/tcp_resets.conf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
every: 10s
3838
warn: $this > ((($1m_ipv4_tcp_resets_sent < 5)?(5):($1m_ipv4_tcp_resets_sent)) * (($status >= $WARNING) ? (1) : (20)))
3939
delay: up 0 down 60m multiplier 1.2 max 2h
40-
options: no-clear-notification
40+
options: no-clear-notification
4141
info: average TCP RESETS this host is sending, over the last 10 seconds (this can be an indication that a port scan is made, or that a service running on this host has crashed; clear notification for this alarm will not be sent)
4242
to: sysadmin
4343

@@ -62,6 +62,6 @@ options: no-clear-notification
6262
every: 10s
6363
warn: $this > ((($1m_ipv4_tcp_resets_received < 5)?(5):($1m_ipv4_tcp_resets_received)) * (($status >= $WARNING) ? (1) : (10)))
6464
delay: up 0 down 60m multiplier 1.2 max 2h
65-
options: no-clear-notification
65+
options: no-clear-notification
6666
info: average TCP RESETS this host is receiving, over the last 10 seconds (this can be an indication that a service this host needs, has crashed; clear notification for this alarm will not be sent)
6767
to: sysadmin

configs.signatures

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
declare -A configs_signatures=(
22
['0056936ce99788ed9ae1c611c87aa6d8']='apps_groups.conf'
3+
['007fc019fb32e952b509d455c016a002']='health.d/tcp_resets.conf'
34
['0102351817595a85d01ebd54a5f2f36b']='python.d/ovpn_status_log.conf'
45
['01302e01162d465614276de43fad7546']='python.d.conf'
56
['01c54057e0ca55b5bb49df1662d6b8c3']='python.d/web_log.conf'
@@ -485,6 +486,7 @@ declare -A configs_signatures=(
485486
['f4c5d88c34d3fb853498124177cc77f1']='python.d.conf'
486487
['f5736e0b2945182cb659cb0713eff923']='apps_groups.conf'
487488
['f66e5236ba1245bb2e5fd99191f114c6']='charts.d/hddtemp.conf'
489+
['f68ac0fca6b4ffc96097779344cabac6']='health.d/tcp_listen.conf'
488490
['f6c6656f900ff52d159dca12d624016a']='python.d/postgres.conf'
489491
['f7401a6e7c7d4fe2e0e2be7f7f523275']='health.d/web_log.conf'
490492
['f7a99e94231beda85c6254912d8d31c1']='python.d/tomcat.conf'

src/proc_net_netstat.c

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
2222
(void)dt;
2323

2424
static int do_bandwidth = -1, do_inerrors = -1, do_mcast = -1, do_bcast = -1, do_mcast_p = -1, do_bcast_p = -1, do_ecn = -1, \
25-
do_tcpext_reorder = -1, do_tcpext_syscookies = -1, do_tcpext_ofo = -1, do_tcpext_connaborts = -1, do_tcpext_memory = -1;
25+
do_tcpext_reorder = -1, do_tcpext_syscookies = -1, do_tcpext_ofo = -1, do_tcpext_connaborts = -1, do_tcpext_memory = -1,
26+
do_tcpext_listen = -1;
27+
2628
static uint32_t hash_ipext = 0, hash_tcpext = 0;
2729
static procfile *ff = NULL;
2830

@@ -93,6 +95,10 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
9395
static unsigned long long tcpext_TCPAbortOnLinger = 0; // connections aborted after user close in linger timeout
9496
static unsigned long long tcpext_TCPAbortFailed = 0; // times unable to send RST due to no memory
9597

98+
// https://perfchron.com/2015/12/26/investigating-linux-network-issues-with-netstat-and-nstat/
99+
static unsigned long long tcpext_ListenOverflows = 0; // times the listen queue of a socket overflowed
100+
static unsigned long long tcpext_ListenDrops = 0; // SYNs to LISTEN sockets ignored
101+
96102
// IPv4 TCP memory pressures
97103
static unsigned long long tcpext_TCPMemoryPressures = 0;
98104

@@ -116,6 +122,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
116122
do_tcpext_ofo = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "TCP out-of-order queue", CONFIG_BOOLEAN_AUTO);
117123
do_tcpext_connaborts = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "TCP connection aborts", CONFIG_BOOLEAN_AUTO);
118124
do_tcpext_memory = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "TCP memory pressures", CONFIG_BOOLEAN_AUTO);
125+
do_tcpext_listen = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "TCP listen issues", CONFIG_BOOLEAN_AUTO);
119126

120127
arl_ipext = arl_create("netstat/ipext", NULL, 60);
121128
arl_tcpext = arl_create("netstat/tcpext", NULL, 60);
@@ -197,6 +204,11 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
197204
arl_expect(arl_tcpext, "TCPMemoryPressures", &tcpext_TCPMemoryPressures);
198205
}
199206

207+
if(do_tcpext_listen != CONFIG_BOOLEAN_NO) {
208+
arl_expect(arl_tcpext, "ListenOverflows", &tcpext_ListenOverflows);
209+
arl_expect(arl_tcpext, "ListenDrops", &tcpext_ListenDrops);
210+
}
211+
200212
// shared metrics
201213
arl_expect(arl_tcpext, "TCPSynRetrans", &tcpext_TCPSynRetrans);
202214
}
@@ -682,6 +694,40 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
682694
rrdset_done(st_syncookies);
683695
}
684696

697+
// --------------------------------------------------------------------
698+
699+
if(do_tcpext_listen == CONFIG_BOOLEAN_YES || (do_tcpext_listen == CONFIG_BOOLEAN_AUTO && (tcpext_ListenOverflows || tcpext_ListenDrops))) {
700+
do_tcpext_listen = CONFIG_BOOLEAN_YES;
701+
702+
static RRDSET *st_listen = NULL;
703+
static RRDDIM *rd_overflows = NULL, *rd_drops = NULL;
704+
705+
if(unlikely(!st_listen)) {
706+
707+
st_listen = rrdset_create_localhost(
708+
"ipv4"
709+
, "tcplistenissues"
710+
, NULL
711+
, "tcp"
712+
, NULL
713+
, "TCP Listen Socket Issues"
714+
, "packets/s"
715+
, 3015
716+
, update_every
717+
, RRDSET_TYPE_LINE
718+
);
719+
720+
rd_overflows = rrddim_add(st_listen, "ListenOverflows", "overflows", 1, 1, RRD_ALGORITHM_INCREMENTAL);
721+
rd_drops = rrddim_add(st_listen, "ListenDrops", "drops", 1, 1, RRD_ALGORITHM_INCREMENTAL);
722+
}
723+
else
724+
rrdset_next(st_listen);
725+
726+
rrddim_set_by_pointer(st_listen, rd_overflows, tcpext_ListenOverflows);
727+
rrddim_set_by_pointer(st_listen, rd_drops, tcpext_ListenDrops);
728+
729+
rrdset_done(st_listen);
730+
}
685731
}
686732
}
687733

0 commit comments

Comments
 (0)