Skip to content

Commit 2d191f0

Browse files
Avoid SQL connection flakines in integration tests (#7854)
## Summary of changes We are getting [the following error](https://dev.azure.com/datadoghq/a51c4863-3eb4-4c5d-878a-58b41a049e4e/_apis/build/builds/191432/logs/22447) in our integration tests: ``` 2025-11-21T04:47:15.5891593Z 04:47:15 [DBG] Stack Trace: 2025-11-21T04:47:15.5892025Z 04:47:15 [DBG] at Datadog.Trace.TestHelpers.ExitCodeException.Throw(Int32 actualExitCode, Int32 expectedExitCode, String message) in /project/tracer/test/Datadog.Trace.TestHelpers/ExitCodeException.cs:line 44 2025-11-21T04:47:15.5892326Z 04:47:15 [DBG] at Datadog.Trace.TestHelpers.TestHelper.WaitForProcessResult(ProcessHelper helper, Int32 expectedExitCode, Boolean dumpChildProcesses) in /project/tracer/test/Datadog.Trace.TestHelpers.AutoInstrumentation/TestHelper.cs:line 212 2025-11-21T04:47:15.5892650Z 04:47:15 [DBG] at Datadog.Trace.TestHelpers.TestHelper.RunSampleAndWaitForExit(MockTracerAgent agent, String arguments, String packageVersion, String framework, Int32 aspNetCorePort, Boolean usePublishWithRID, String dotnetRuntimeArgs) in /project/tracer/test/Datadog.Trace.TestHelpers.AutoInstrumentation/TestHelper.cs:line 172 2025-11-21T04:47:15.5893224Z 04:47:15 [DBG] at Datadog.Trace.ClrProfiler.IntegrationTests.AdoNet.MicrosoftDataSqlClientTests.SubmitsTraces(String packageVersion, String metadataSchemaVersion, String propagation) in /project/tracer/test/Datadog.Trace.ClrProfiler.IntegrationTests/AdoNet/MicrosoftDataSqlClientTests.cs:line 62 2025-11-21T04:47:15.5893396Z 04:47:15 [DBG] --- End of stack trace from previous location --- 2025-11-21T04:47:15.5893558Z 04:47:15 [DBG] Standard Output Messages: 2025-11-21T04:47:15.5893704Z 04:47:15 [DBG] Platform: Arm64 2025-11-21T04:47:15.5893857Z 04:47:15 [DBG] TargetPlatform: ARM64 2025-11-21T04:47:15.5894007Z 04:47:15 [DBG] Configuration: Release 2025-11-21T04:47:15.5894160Z 04:47:15 [DBG] TargetFramework: net8.0 2025-11-21T04:47:15.5894453Z 04:47:15 [DBG] .NET Core: True 2025-11-21T04:47:15.5894656Z 04:47:15 [DBG] Native Loader DLL: /project/shared/bin/monitoring-home/linux-arm64/Datadog.Trace.ClrProfiler.Native.so 2025-11-21T04:47:15.5894828Z 04:47:15 [DBG] Agent listener info: Traces at port 45159 2025-11-21T04:47:15.5895049Z 04:47:15 [DBG] Starting Application: /project/artifacts/publish/Samples.Microsoft.Data.SqlClient/release_net8.0_6.1.3/Samples.Microsoft.Data.SqlClient.dll 2025-11-21T04:47:15.5895201Z 04:47:15 [DBG] ProcessId: 6967 2025-11-21T04:47:15.5895347Z 04:47:15 [DBG] StandardOutput: 2025-11-21T04:47:15.5895700Z 04:47:15 [DBG] Microsoft.Data.SqlClient.SqlException (0x80131904): A network-related or instance-specific error occurred while establishing a connection to SQL Server. The server was not found or was not accessible. Verify that the instance name is correct and that SQL Server is configured to allow remote connections. (provider: TCP Provider, error: 35 - An internal exception was caught) 2025-11-21T04:47:15.5895896Z 04:47:15 [DBG] ---> System.Net.Sockets.SocketException (00000005, 0xFFFDFFFF): Name or service not known 2025-11-21T04:47:15.5896124Z 04:47:15 [DBG] at System.Net.Dns.GetHostEntryOrAddressesCore(String hostName, Boolean justAddresses, AddressFamily addressFamily, Nullable`1 startingTimestamp) 2025-11-21T04:47:15.5896315Z 04:47:15 [DBG] at System.Net.Dns.GetHostAddresses(String hostNameOrAddress, AddressFamily family) 2025-11-21T04:47:15.5896553Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.ManagedSni.SniTcpHandle.GetHostAddressesSortedByPreference(String serverName, SqlConnectionIPAddressPreference ipPreference)+MoveNext() 2025-11-21T04:47:15.5896817Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.ManagedSni.SniTcpHandle.Connect(String serverName, Int32 port, TimeoutTimer timeout, SqlConnectionIPAddressPreference ipPreference, String cachedFQDN, SQLDNSInfo& pendingDNSInfo) 2025-11-21T04:47:15.5897128Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.ManagedSni.SniTcpHandle..ctor(String serverName, Int32 port, TimeoutTimer timeout, Boolean parallel, SqlConnectionIPAddressPreference ipPreference, String cachedFQDN, SQLDNSInfo& pendingDNSInfo, Boolean tlsFirst, String hostNameInCertificate, String serverCertificateFilename) 2025-11-21T04:47:15.5897354Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.SqlInternalConnection.OnError(SqlException exception, Boolean breakConnection, Action`1 wrapCloseInAction) 2025-11-21T04:47:15.5897591Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.TdsParser.ThrowExceptionAndWarning(TdsParserStateObject stateObj, SqlCommand command, Boolean callerHasConnectionLock, Boolean asyncClose) 2025-11-21T04:47:15.5897851Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.TdsParser.Connect(ServerInfo serverInfo, SqlInternalConnectionTds connHandler, TimeoutTimer timeout, SqlConnectionString connectionOptions, Boolean withFailover) 2025-11-21T04:47:15.5898098Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.SqlInternalConnectionTds.AttemptOneLogin(ServerInfo serverInfo, String newPassword, SecureString newSecurePassword, TimeoutTimer timeout, Boolean withFailover) 2025-11-21T04:47:15.5898394Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.SqlInternalConnectionTds.LoginNoFailover(ServerInfo serverInfo, String newPassword, SecureString newSecurePassword, Boolean redirectedUserInstance, SqlConnectionString connectionOptions, SqlCredential credential, TimeoutTimer timeout) 2025-11-21T04:47:15.5898801Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.SqlInternalConnectionTds.OpenLoginEnlist(TimeoutTimer timeout, SqlConnectionString connectionOptions, SqlCredential credential, String newPassword, SecureString newSecurePassword, Boolean redirectedUserInstance) 2025-11-21T04:47:15.5899380Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.SqlInternalConnectionTds..ctor(DbConnectionPoolIdentity identity, SqlConnectionString connectionOptions, SqlCredential credential, Object providerInfo, String newPassword, SecureString newSecurePassword, Boolean redirectedUserInstance, SqlConnectionString userConnectionOptions, SessionData reconnectSessionData, Boolean applyTransientFaultHandling, String access***, IDbConnectionPool pool, Func`3 access***Callback) 2025-11-21T04:47:15.5899800Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.SqlConnectionFactory.CreateConnection(DbConnectionOptions options, DbConnectionPoolKey poolKey, DbConnectionPoolGroupProviderInfo poolGroupProviderInfo, IDbConnectionPool pool, DbConnection owningConnection, DbConnectionOptions userOptions) 2025-11-21T04:47:15.5900076Z 04:47:15 [DBG] at Microsoft.Data.ProviderBase.DbConnectionFactory.CreatePooledConnection(IDbConnectionPool pool, DbConnection owningObject, DbConnectionOptions options, DbConnectionPoolKey poolKey, DbConnectionOptions userOptions) 2025-11-21T04:47:15.5900319Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.ConnectionPool.WaitHandleDbConnectionPool.CreateObject(DbConnection owningObject, DbConnectionOptions userOptions, DbConnectionInternal oldConnection) 2025-11-21T04:47:15.5900568Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.ConnectionPool.WaitHandleDbConnectionPool.UserCreateRequest(DbConnection owningObject, DbConnectionOptions userOptions, DbConnectionInternal oldConnection) 2025-11-21T04:47:15.5900866Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.ConnectionPool.WaitHandleDbConnectionPool.TryGetConnection(DbConnection owningObject, UInt32 waitForMultipleObjectsTimeout, Boolean allowCreate, Boolean onlyOneCheckConnection, DbConnectionOptions userOptions, DbConnectionInternal& connection) 2025-11-21T04:47:15.5901142Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.ConnectionPool.WaitHandleDbConnectionPool.TryGetConnection(DbConnection owningObject, TaskCompletionSource`1 taskCompletionSource, DbConnectionOptions userOptions, DbConnectionInternal& connection) 2025-11-21T04:47:15.5901419Z 04:47:15 [DBG] at Microsoft.Data.ProviderBase.DbConnectionFactory.TryGetConnection(DbConnection owningConnection, TaskCompletionSource`1 retry, DbConnectionOptions userOptions, DbConnectionInternal oldConnection, DbConnectionInternal& connection) 2025-11-21T04:47:15.5901688Z 04:47:15 [DBG] at Microsoft.Data.ProviderBase.DbConnectionInternal.TryOpenConnectionInternal(DbConnection outerConnection, DbConnectionFactory connectionFactory, TaskCompletionSource`1 retry, DbConnectionOptions userOptions) 2025-11-21T04:47:15.5902144Z 04:47:15 [DBG] at Microsoft.Data.ProviderBase.DbConnectionClosed.TryOpenConnection(DbConnection outerConnection, DbConnectionFactory connectionFactory, TaskCompletionSource`1 retry, DbConnectionOptions userOptions) 2025-11-21T04:47:15.5902394Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.SqlConnection.TryOpen(TaskCompletionSource`1 retry, SqlConnectionOverrides overrides) 2025-11-21T04:47:15.5902583Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.SqlConnection.Open(SqlConnectionOverrides overrides) 2025-11-21T04:47:15.5902754Z 04:47:15 [DBG] at Microsoft.Data.SqlClient.SqlConnection.Open() 2025-11-21T04:47:15.5903048Z 04:47:15 [DBG] at Samples.Microsoft.Data.SqlClient.Program.OpenConnection(Type connectionType) in D:\a\_work\1\s\tracer\test\test-applications\integrations\Samples.Microsoft.Data.SqlClient\Program.cs:line 49 ``` CI tests occasionally fail with DNS/network errors (Error 11001, Class 20) when SQL Server isn't immediately available. These are infrastructure issues, not test failures. By catching connection errors and returning exit code 13, the test framework can skip these tests instead of failing the build. SQL exceptions during test execution (after successful connection) will still fail the test as expected. ### Changes - Catch `SqlException` during connection attempts in `Samples.Microsoft.Data.SqlClient` - Exit with code 13 (skip) when SQL Server is unavailable after 3 retry attempts - Refactor retry logic from `goto` to a clean `for` loop - Improve logging to show attempt progress and detailed error information - Add some sleep time between tries ## Reason for change ## Implementation details ## Test coverage ## Other details <!-- Fixes #{issue} --> <!-- ⚠️ Note: Where possible, please obtain 2 approvals prior to merging. Unless CODEOWNERS specifies otherwise, for external teams it is typically best to have one review from a team member, and one review from apm-dotnet. Trivial changes do not require 2 reviews. MergeQueue is NOT enabled in this repository. If you have write access to the repo, the PR has 1-2 approvals (see above), and all of the required checks have passed, you can use the Squash and Merge button to merge the PR. If you don't have write access, or you need help, reach out in the #apm-dotnet channel in Slack. -->
1 parent d36555f commit 2d191f0

File tree

1 file changed

+79
-16
lines changed
  • tracer/test/test-applications/integrations/Samples.Microsoft.Data.SqlClient

1 file changed

+79
-16
lines changed
Lines changed: 79 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Data.Common;
3+
using System.Net.Sockets;
34
using System.Threading;
45
using System.Threading.Tasks;
56
using Microsoft.Data.SqlClient;
@@ -17,15 +18,28 @@ private static async Task<int> Main()
1718

1819
using (var connection = OpenConnection(typeof(SqlConnection)))
1920
{
21+
if (connection is null)
22+
{
23+
Console.WriteLine("No connection could be established. Exiting with skip code (13)");
24+
return 13;
25+
}
26+
2027
await RelationalDatabaseTestHarness.RunAllAsync<SqlCommand>(connection, commandFactory, commandExecutor, cts.Token);
2128
}
2229

2330
// Test the result when the ADO.NET provider assembly is loaded through Assembly.LoadFile
2431
// On .NET Core this results in a new assembly being loaded whose types are not considered the same
2532
// as the types loaded through the default loading mechanism, potentially causing type casting issues in CallSite instrumentation
2633
var loadFileType = AssemblyHelpers.LoadFileAndRetrieveType(typeof(SqlConnection));
34+
2735
using (var connection = OpenConnection(loadFileType))
2836
{
37+
if (connection is null)
38+
{
39+
Console.WriteLine("No connection could be established. Exiting with skip code (13)");
40+
return 13;
41+
}
42+
2943
// Do not use the strongly typed SqlCommandExecutor because the type casts will fail
3044
await RelationalDatabaseTestHarness.RunBaseClassesAsync(connection, commandFactory, cts.Token);
3145
}
@@ -37,31 +51,80 @@ private static async Task<int> Main()
3751

3852
private static DbConnection OpenConnection(Type connectionType)
3953
{
40-
var remainingAttempts = 3;
54+
const int maxAttempts = 3;
4155
var connectionString = Environment.GetEnvironmentVariable("SQLSERVER_CONNECTION_STRING") ??
4256
@"Server=(localdb)\MSSQLLocalDB;Integrated Security=true;Connection Timeout=60";
4357

44-
DbConnection connection = null;
45-
retry:
46-
try
47-
{
48-
connection = Activator.CreateInstance(connectionType, connectionString) as DbConnection;
49-
connection.Open();
50-
return connection;
51-
}
52-
catch (Exception ex)
58+
SqlException lastException = null;
59+
60+
for (int attempt = 1; attempt <= maxAttempts; attempt++)
5361
{
54-
if (remainingAttempts > 0)
62+
DbConnection connection = null;
63+
try
5564
{
56-
Console.WriteLine(ex);
65+
connection = Activator.CreateInstance(connectionType, connectionString) as DbConnection;
66+
connection.Open();
67+
return connection;
68+
}
69+
catch (SqlException ex) when (IsRetryableConnectionError(ex))
70+
{
71+
lastException = ex;
5772
connection?.Dispose();
58-
remainingAttempts--;
59-
goto retry;
73+
74+
if (attempt < maxAttempts)
75+
{
76+
Console.WriteLine($"Connection attempt {attempt}/{maxAttempts} failed. Retrying...");
77+
Console.WriteLine($"SqlException Number: {ex.Number}, State: {ex.State}, Class: {ex.Class}");
78+
Console.WriteLine($"Message: {ex.Message}");
79+
Thread.Sleep(1000 * attempt);
80+
}
81+
}
82+
catch (SqlException ex) when (!IsRetryableConnectionError(ex))
83+
{
84+
Console.WriteLine($"Fatal SqlException Number: {ex.Number}, State: {ex.State}, Class: {ex.Class}");
85+
Console.WriteLine($"Message: {ex.Message}");
86+
throw;
87+
}
88+
catch (Exception ex)
89+
{
90+
// Other errors (reflection issues, etc.) should fail the test
91+
Console.WriteLine($"Unexpected error opening connection: {ex}");
92+
throw;
6093
}
94+
}
95+
96+
// After all retry attempts exhausted, return null to signal connection failure
97+
Console.WriteLine($"Unable to establish SQL connection after {maxAttempts} attempts.");
98+
if (lastException != null)
99+
{
100+
Console.WriteLine($"Final SqlException Number: {lastException.Number}, State: {lastException.State}, Class: {lastException.Class}");
101+
Console.WriteLine($"Message: {lastException.Message}");
102+
}
103+
return null;
104+
}
61105

62-
// else
63-
throw;
106+
static bool IsRetryableConnectionError(SqlException ex)
107+
{
108+
// Known retryable error codes
109+
if (ex.Number == -1 || // Generic network error
110+
ex.Number == -2 || // Connection timeout
111+
ex.Number == 53 || // SQL Server not found
112+
ex.Number == 258 || // Connection timeout
113+
ex.Number == 10053 || // Connection aborted
114+
ex.Number == 10054 || // Connection reset
115+
ex.Number == 10060 || // Connection timeout
116+
ex.Number == 11001) // DNS failure
117+
{
118+
return true;
64119
}
120+
121+
// Number=0 with SocketException indicates network issue
122+
if (ex.Number == 0 && ex.InnerException is SocketException)
123+
{
124+
return true;
125+
}
126+
127+
return false;
65128
}
66129
}
67130
}

0 commit comments

Comments
 (0)