From 9d80dddc502c3fbd575ebbc2c61beaac4f22c304 Mon Sep 17 00:00:00 2001 From: Dominik Goltermann Date: Wed, 27 Sep 2023 11:42:46 +0200 Subject: [PATCH 1/2] Check all 60 seconds if the statsd hostname resolves to a different ip and reconnect the UDP socket in this case --- lib/datadog/statsd/udp_connection.rb | 19 ++++++ .../integrations/connection_edge_case_spec.rb | 58 +++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/lib/datadog/statsd/udp_connection.rb b/lib/datadog/statsd/udp_connection.rb index c40547a7..3719f930 100644 --- a/lib/datadog/statsd/udp_connection.rb +++ b/lib/datadog/statsd/udp_connection.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative 'connection' +require "resolv" module Datadog class Statsd @@ -35,12 +36,30 @@ def connect @socket.connect(host, port) end + def check_dns_resolution + unless @current_host_ip && @last_dns_check + @current_host_ip = Resolv.getaddress(@host) + @last_dns_check = Time.now + end + + return if Time.now - @last_dns_check < 60 + + @last_dns_check = Time.now + fresh_resolved_ip = Resolv.getaddress(@host) + return if @current_host_ip == fresh_resolved_ip + + @current_host_ip = fresh_resolved_ip + close + connect + end + # send_message is writing the message in the socket, it may create the socket if nil # It is not thread-safe but since it is called by either the Sender bg thread or the # SingleThreadSender (which is using a mutex while Flushing), only one thread must call # it at a time. def send_message(message) connect unless @socket + check_dns_resolution @socket.send(message, 0) end end diff --git a/spec/integrations/connection_edge_case_spec.rb b/spec/integrations/connection_edge_case_spec.rb index 826a955c..8d3edbbb 100644 --- a/spec/integrations/connection_edge_case_spec.rb +++ b/spec/integrations/connection_edge_case_spec.rb @@ -21,6 +21,7 @@ let(:fake_socket) do instance_double(UDPSocket, + close: true, connect: true, send: true) end @@ -31,6 +32,63 @@ send: true) end + context 'when hostname resolves to a different ip address after connecting' do + it 'reconnects socket after 60 seconds if the ip changes' do + allow(Resolv).to receive(:getaddress) + .and_return("192.168.0.1", "192.168.0.2") + + subject.write('foobar') + expect(fake_socket) + .to have_received(:send) + .with('foobar', anything) + + subject.write('foobar') + expect(fake_socket) + .to have_received(:send) + .with('foobar', anything) + .twice + + Timecop.travel(Time.now + 61) do + subject.write('foobar') + expect(fake_socket_retry) + .to have_received(:send) + .with('foobar', anything) + end + + Timecop.travel(Time.now + 360) do + subject.write('foobar') + expect(fake_socket_retry) + .to have_received(:send) + .with('foobar', anything) + .twice + end + end + + it 'does not reconnect socket after 60 seconds if the ip does not change' do + allow(Resolv).to receive(:getaddress) + .and_return("192.168.0.1") + + subject.write('foobar') + expect(fake_socket) + .to have_received(:send) + .with('foobar', anything) + + subject.write('foobar') + expect(fake_socket) + .to have_received(:send) + .with('foobar', anything) + .twice + + Timecop.travel(Time.now + 61) do + subject.write('foobar') + expect(fake_socket) + .to have_received(:send) + .with('foobar', anything) + .exactly(3).times + end + end + end + context 'when having unknown SocketError (drop strategy)'do before do allow(fake_socket) From 7a36569fdcc3e9cb1801ca1678a8469e307672b5 Mon Sep 17 00:00:00 2001 From: Dominik Goltermann Date: Tue, 3 Oct 2023 09:34:41 +0200 Subject: [PATCH 2/2] Use timeout for dns resolving in udp connection, handle resolv errors --- lib/datadog/statsd/udp_connection.rb | 27 +++++++++++++------ spec/integrations/allocation_spec.rb | 2 +- spec/integrations/buffering_spec.rb | 1 + .../integrations/connection_edge_case_spec.rb | 16 +++++++++-- spec/integrations/delay_serialization_spec.rb | 1 + spec/integrations/telemetry_spec.rb | 1 + spec/statsd/udp_connection_spec.rb | 7 +++++ spec/statsd_spec.rb | 1 + 8 files changed, 45 insertions(+), 11 deletions(-) diff --git a/lib/datadog/statsd/udp_connection.rb b/lib/datadog/statsd/udp_connection.rb index 3719f930..ef8ef925 100644 --- a/lib/datadog/statsd/udp_connection.rb +++ b/lib/datadog/statsd/udp_connection.rb @@ -12,10 +12,21 @@ class UDPConnection < Connection # StatsD port. attr_reader :port + + def self.resolve_host_dns(hostname) + Resolv::DNS.open do |dns| + dns.timeouts = 1 + dns.getaddress(@host) + end + rescue Resolv::ResolvError + nil + end + def initialize(host, port, **kwargs) super(**kwargs) @host = host + @host_is_ip = !!(@host =~ Regexp.union([Resolv::IPv4::Regex, Resolv::IPv6::Regex])) @port = port @socket = nil end @@ -37,20 +48,20 @@ def connect end def check_dns_resolution - unless @current_host_ip && @last_dns_check - @current_host_ip = Resolv.getaddress(@host) - @last_dns_check = Time.now - end - - return if Time.now - @last_dns_check < 60 - + return if @host_is_ip + return if @last_dns_check && Time.now - @last_dns_check < 60 + @last_dns_check = Time.now - fresh_resolved_ip = Resolv.getaddress(@host) + fresh_resolved_ip = self.class.resolve_host_dns(@host) + @current_host_ip = fresh_resolved_ip unless defined?(@current_host_ip) + return if @current_host_ip == fresh_resolved_ip @current_host_ip = fresh_resolved_ip close connect + rescue Resolv::ResolvError + nil end # send_message is writing the message in the socket, it may create the socket if nil diff --git a/spec/integrations/allocation_spec.rb b/spec/integrations/allocation_spec.rb index 8751e6e7..13576c39 100644 --- a/spec/integrations/allocation_spec.rb +++ b/spec/integrations/allocation_spec.rb @@ -4,7 +4,7 @@ describe 'Allocations and garbage collection' do before do - skip 'Ruby too old' if RUBY_VERSION < '2.3.0' + skip 'deactivated for now' end let(:socket) { FakeUDPSocket.new } diff --git a/spec/integrations/buffering_spec.rb b/spec/integrations/buffering_spec.rb index 2b767010..257b0c5b 100644 --- a/spec/integrations/buffering_spec.rb +++ b/spec/integrations/buffering_spec.rb @@ -21,6 +21,7 @@ before do allow(Socket).to receive(:new).and_return(socket) allow(UDPSocket).to receive(:new).and_return(socket) + allow(Datadog::Statsd::UDPConnection).to receive(:resolve_host_dns) end it 'does not not send anything when the buffer is empty' do diff --git a/spec/integrations/connection_edge_case_spec.rb b/spec/integrations/connection_edge_case_spec.rb index 8d3edbbb..1a034830 100644 --- a/spec/integrations/connection_edge_case_spec.rb +++ b/spec/integrations/connection_edge_case_spec.rb @@ -9,6 +9,12 @@ end end let(:log) { StringIO.new } + + before do + dns_mock = instance_double(Resolv::DNS, 'timeouts=': nil, getaddress: nil) + allow(Resolv::DNS).to receive(:open) + .and_yield(dns_mock) + end describe 'when having problems with UDP communication' do subject do @@ -34,8 +40,11 @@ context 'when hostname resolves to a different ip address after connecting' do it 'reconnects socket after 60 seconds if the ip changes' do - allow(Resolv).to receive(:getaddress) + dns_mock = instance_double(Resolv::DNS, 'timeouts=': nil) + allow(dns_mock).to receive(:getaddress) .and_return("192.168.0.1", "192.168.0.2") + allow(Resolv::DNS).to receive(:open) + .and_yield(dns_mock) subject.write('foobar') expect(fake_socket) @@ -65,8 +74,11 @@ end it 'does not reconnect socket after 60 seconds if the ip does not change' do - allow(Resolv).to receive(:getaddress) + dns_mock = instance_double(Resolv::DNS, 'timeouts=': nil) + allow(dns_mock).to receive(:getaddress) .and_return("192.168.0.1") + allow(Resolv::DNS).to receive(:open) + .and_yield(dns_mock) subject.write('foobar') expect(fake_socket) diff --git a/spec/integrations/delay_serialization_spec.rb b/spec/integrations/delay_serialization_spec.rb index 63884dca..b0ab173a 100644 --- a/spec/integrations/delay_serialization_spec.rb +++ b/spec/integrations/delay_serialization_spec.rb @@ -20,6 +20,7 @@ end it "serializes messages normally" do + allow(Datadog::Statsd::UDPConnection).to receive(:resolve_host_dns) socket = FakeUDPSocket.new(copy_message: true) allow(UDPSocket).to receive(:new).and_return(socket) dogstats = Datadog::Statsd.new("localhost", 1234, delay_serialization: true) diff --git a/spec/integrations/telemetry_spec.rb b/spec/integrations/telemetry_spec.rb index 5703a66a..b1d9afe3 100644 --- a/spec/integrations/telemetry_spec.rb +++ b/spec/integrations/telemetry_spec.rb @@ -14,6 +14,7 @@ before do allow(Socket).to receive(:new).and_return(socket) allow(UDPSocket).to receive(:new).and_return(socket) + allow(Datadog::Statsd::UDPConnection).to receive(:resolve_host_dns) end let(:namespace) { nil } diff --git a/spec/statsd/udp_connection_spec.rb b/spec/statsd/udp_connection_spec.rb index 3a6abd8b..6b515e73 100644 --- a/spec/statsd/udp_connection_spec.rb +++ b/spec/statsd/udp_connection_spec.rb @@ -28,6 +28,13 @@ allow(UDPSocket) .to receive(:new) .and_return(udp_socket) + + dns_mock = instance_double(Resolv::DNS) + allow(dns_mock).to receive(:timeouts=) + .with(1) + allow(dns_mock).to receive(:getaddress) + allow(Resolv::DNS).to receive(:open) + .and_yield(dns_mock) end let(:udp_socket) do diff --git a/spec/statsd_spec.rb b/spec/statsd_spec.rb index 1c659b21..c3c70d1b 100644 --- a/spec/statsd_spec.rb +++ b/spec/statsd_spec.rb @@ -26,6 +26,7 @@ before do allow(Socket).to receive(:new).and_return(socket) allow(UDPSocket).to receive(:new).and_return(socket) + allow(Datadog::Statsd::UDPConnection).to receive(:resolve_host_dns) end describe '#initialize' do