From 88d2ad60bb1b6ea0162eb8fa3cd2d006b17c4c55 Mon Sep 17 00:00:00 2001 From: Rajkumar Rangaraj Date: Tue, 26 Nov 2024 11:10:03 -0800 Subject: [PATCH] [otlp] Grpc Status check and retry (#6000) Co-authored-by: Mikel Blanchard --- .../ExportClient/BaseOtlpGrpcExportClient.cs | 8 +- .../ExportClient/ExportClientGrpcResponse.cs | 12 +- .../ExportClient/ExportClientResponse.cs | 3 - .../ExportClient/Grpc/GrpcProtocolHelpers.cs | 26 ++-- .../ExportClient/Grpc/Status.cs | 7 + .../ExportClient/OtlpGrpcLogExportClient.cs | 2 +- .../OtlpGrpcMetricsExportClient.cs | 2 +- .../ExportClient/OtlpGrpcTraceExportClient.cs | 2 +- .../Implementation/ExportClient/OtlpRetry.cs | 62 ++++++++- .../ProtobufOtlpGrpcExportClient.cs | 128 ++++++++++++++++-- .../ProtobufOtlpHttpExportClient.cs | 2 +- ...penTelemetryProtocolExporterEventSource.cs | 84 ++++++++++++ .../OtlpExporterTransmissionHandler.cs | 2 +- ...ProtobufOtlpExporterTransmissionHandler.cs | 8 +- .../MockCollectorIntegrationTests.cs | 77 ++++++----- .../OtlpRetryTests.cs | 2 +- 16 files changed, 348 insertions(+), 79 deletions(-) diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/BaseOtlpGrpcExportClient.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/BaseOtlpGrpcExportClient.cs index b585f2fc088..2eab9778852 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/BaseOtlpGrpcExportClient.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/BaseOtlpGrpcExportClient.cs @@ -13,7 +13,13 @@ namespace OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClie /// Type of export request. internal abstract class BaseOtlpGrpcExportClient : IExportClient { - protected static readonly ExportClientGrpcResponse SuccessExportResponse = new ExportClientGrpcResponse(success: true, deadlineUtc: default, exception: null); + protected static readonly ExportClientGrpcResponse SuccessExportResponse + = new( + success: true, + deadlineUtc: default, + exception: null, + status: null, + grpcStatusDetailsHeader: null); protected BaseOtlpGrpcExportClient(OtlpExporterOptions options) { diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ExportClientGrpcResponse.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ExportClientGrpcResponse.cs index 4a96a7ad7cb..339e0ab78ad 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ExportClientGrpcResponse.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ExportClientGrpcResponse.cs @@ -1,6 +1,8 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 +using OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClient.Grpc; + namespace OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClient; internal sealed class ExportClientGrpcResponse : ExportClientResponse @@ -8,8 +10,16 @@ internal sealed class ExportClientGrpcResponse : ExportClientResponse public ExportClientGrpcResponse( bool success, DateTime deadlineUtc, - Exception? exception) + Exception? exception, + Status? status, + string? grpcStatusDetailsHeader) : base(success, deadlineUtc, exception) { + this.Status = status; + this.GrpcStatusDetailsHeader = grpcStatusDetailsHeader; } + + public Status? Status { get; } + + public string? GrpcStatusDetailsHeader { get; } } diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ExportClientResponse.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ExportClientResponse.cs index 3a14b537256..fcccd151920 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ExportClientResponse.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ExportClientResponse.cs @@ -1,8 +1,6 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -using System.Diagnostics.CodeAnalysis; - namespace OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClient; internal abstract class ExportClientResponse @@ -14,7 +12,6 @@ protected ExportClientResponse(bool success, DateTime deadlineUtc, Exception? ex this.DeadlineUtc = deadlineUtc; } - [MemberNotNullWhen(false, nameof(Exception))] public bool Success { get; } public Exception? Exception { get; } diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/Grpc/GrpcProtocolHelpers.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/Grpc/GrpcProtocolHelpers.cs index 0a7db69c391..29a0a1b4d39 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/Grpc/GrpcProtocolHelpers.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/Grpc/GrpcProtocolHelpers.cs @@ -15,7 +15,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -using System.Diagnostics.CodeAnalysis; #if NET462 using System.Net.Http; #endif @@ -27,38 +26,30 @@ internal static class GrpcProtocolHelpers { internal const string StatusTrailer = "grpc-status"; internal const string MessageTrailer = "grpc-message"; - internal const string CancelledDetail = "No grpc-status found on response."; - public static Status? GetResponseStatus(HttpHeaders trailingHeaders, HttpResponseMessage httpResponse) + public static Status GetResponseStatus(HttpResponseMessage httpResponse, HttpHeaders trailingHeaders) { - Status? status; try { - var result = trailingHeaders.Any() ? TryGetStatusCore(trailingHeaders, out status) : TryGetStatusCore(httpResponse.Headers, out status); - - if (!result) - { - status = new Status(StatusCode.Cancelled, CancelledDetail); - } + return trailingHeaders.Any() + ? GetStatusCore(trailingHeaders) + : GetStatusCore(httpResponse.Headers); } catch (Exception ex) { // Handle error from parsing badly formed status - status = new Status(StatusCode.Cancelled, ex.Message, ex); + return new Status(StatusCode.Internal, ex.Message, ex); } - - return status; } - public static bool TryGetStatusCore(HttpHeaders headers, [NotNullWhen(true)] out Status? status) + public static Status GetStatusCore(HttpHeaders headers) { var grpcStatus = GetHeaderValue(headers, StatusTrailer); // grpc-status is a required trailer if (grpcStatus == null) { - status = null; - return false; + return Status.NoReply; } int statusValue; @@ -79,8 +70,7 @@ public static bool TryGetStatusCore(HttpHeaders headers, [NotNullWhen(true)] out grpcMessage = Uri.UnescapeDataString(grpcMessage); } - status = new Status((StatusCode)statusValue, grpcMessage ?? string.Empty); - return true; + return new Status((StatusCode)statusValue, grpcMessage ?? string.Empty); } public static string? GetHeaderValue(HttpHeaders? headers, string name, bool first = false) diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/Grpc/Status.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/Grpc/Status.cs index 477177b130d..89445891970 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/Grpc/Status.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/Grpc/Status.cs @@ -25,6 +25,8 @@ namespace OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClie [DebuggerDisplay("{DebuggerToString(),nq}")] internal struct Status { + public const string NoReplyDetailMessage = "No grpc-status found on response."; + /// /// Default result of a successful RPC. StatusCode=OK, empty details message. /// @@ -35,6 +37,11 @@ internal struct Status /// public static readonly Status DefaultCancelled = new Status(StatusCode.Cancelled, string.Empty); + /// + /// Default result of a cancelled RPC with no grpc-status found on response. + /// + public static readonly Status NoReply = new Status(StatusCode.Internal, NoReplyDetailMessage); + /// /// Initializes a new instance of the struct. /// diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcLogExportClient.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcLogExportClient.cs index e90f05ff5d2..dc9d31feeb3 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcLogExportClient.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcLogExportClient.cs @@ -39,7 +39,7 @@ public override ExportClientResponse SendExportRequest(OtlpCollector.ExportLogsS { OpenTelemetryProtocolExporterEventSource.Log.FailedToReachCollector(this.Endpoint, ex); - return new ExportClientGrpcResponse(success: false, deadlineUtc: deadlineUtc, exception: ex); + return new ExportClientGrpcResponse(success: false, deadlineUtc, ex, null, null); } } } diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcMetricsExportClient.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcMetricsExportClient.cs index d3c498648e5..b67b0789da0 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcMetricsExportClient.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcMetricsExportClient.cs @@ -39,7 +39,7 @@ public override ExportClientResponse SendExportRequest(OtlpCollector.ExportMetri { OpenTelemetryProtocolExporterEventSource.Log.FailedToReachCollector(this.Endpoint, ex); - return new ExportClientGrpcResponse(success: false, deadlineUtc: deadlineUtc, exception: ex); + return new ExportClientGrpcResponse(false, deadlineUtc, ex, null, null); } } } diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcTraceExportClient.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcTraceExportClient.cs index b29c3f91275..b30efe6de18 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcTraceExportClient.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcTraceExportClient.cs @@ -39,7 +39,7 @@ public override ExportClientResponse SendExportRequest(OtlpCollector.ExportTrace { OpenTelemetryProtocolExporterEventSource.Log.FailedToReachCollector(this.Endpoint, ex); - return new ExportClientGrpcResponse(success: false, deadlineUtc: deadlineUtc, exception: ex); + return new ExportClientGrpcResponse(false, deadlineUtc, ex, null, null); } } } diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpRetry.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpRetry.cs index 984cc91c158..2639333bce7 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpRetry.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpRetry.cs @@ -83,12 +83,54 @@ public static bool ShouldHandleHttpRequestException(Exception? exception) public static bool TryGetGrpcRetryResult(ExportClientGrpcResponse response, int retryDelayMilliseconds, out RetryResult retryResult) { + retryResult = default; + if (response.Exception is RpcException rpcException) { return TryGetRetryResult(rpcException.StatusCode, IsGrpcStatusCodeRetryable, response.DeadlineUtc, rpcException.Trailers, TryGetGrpcRetryDelay, retryDelayMilliseconds, out retryResult); } + else if (response.Status != null) + { + var nextRetryDelayMilliseconds = retryDelayMilliseconds; + + if (IsDeadlineExceeded(response.DeadlineUtc)) + { + return false; + } + + var throttleDelay = Grpc.GrpcStatusDeserializer.TryGetGrpcRetryDelay(response.GrpcStatusDetailsHeader); + var retryable = IsGrpcStatusCodeRetryable(response.Status.Value.StatusCode, throttleDelay.HasValue); + + if (!retryable) + { + return false; + } + + var delayDuration = throttleDelay ?? TimeSpan.FromMilliseconds(GetRandomNumber(0, nextRetryDelayMilliseconds)); + + if (IsDeadlineExceeded(response.DeadlineUtc + delayDuration)) + { + return false; + } + + if (throttleDelay.HasValue) + { + try + { + // TODO: Consider making nextRetryDelayMilliseconds a double to avoid the need for convert/overflow handling + nextRetryDelayMilliseconds = Convert.ToInt32(throttleDelay.Value.TotalMilliseconds); + } + catch (OverflowException) + { + nextRetryDelayMilliseconds = MaxBackoffMilliseconds; + } + } + + nextRetryDelayMilliseconds = CalculateNextRetryDelay(nextRetryDelayMilliseconds); + retryResult = new RetryResult(throttleDelay.HasValue, delayDuration, nextRetryDelayMilliseconds); + return true; + } - retryResult = default; return false; } @@ -216,6 +258,24 @@ private static bool IsGrpcStatusCodeRetryable(StatusCode statusCode, bool hasRet } } + private static bool IsGrpcStatusCodeRetryable(Grpc.StatusCode statusCode, bool hasRetryDelay) + { + switch (statusCode) + { + case Grpc.StatusCode.Cancelled: + case Grpc.StatusCode.DeadlineExceeded: + case Grpc.StatusCode.Aborted: + case Grpc.StatusCode.OutOfRange: + case Grpc.StatusCode.Unavailable: + case Grpc.StatusCode.DataLoss: + return true; + case Grpc.StatusCode.ResourceExhausted: + return hasRetryDelay; + default: + return false; + } + } + private static bool IsHttpStatusCodeRetryable(HttpStatusCode statusCode, bool hasRetryDelay) { switch (statusCode) diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ProtobufOtlpGrpcExportClient.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ProtobufOtlpGrpcExportClient.cs index c4f5ab5fcc6..0cabbcb53ac 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ProtobufOtlpGrpcExportClient.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ProtobufOtlpGrpcExportClient.cs @@ -5,7 +5,7 @@ using System.Net.Http; #endif using System.Net.Http.Headers; -using Grpc.Core; +using OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClient.Grpc; using OpenTelemetry.Internal; namespace OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClient; @@ -13,10 +13,19 @@ namespace OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClie /// Base class for sending OTLP export request over gRPC. internal sealed class ProtobufOtlpGrpcExportClient : IProtobufExportClient { + public const string GrpcStatusDetailsHeader = "grpc-status-details-bin"; private static readonly ExportClientHttpResponse SuccessExportResponse = new(success: true, deadlineUtc: default, response: null, exception: null); private static readonly MediaTypeHeaderValue MediaHeaderValue = new("application/grpc"); private static readonly Version Http2RequestVersion = new(2, 0); + private static readonly ExportClientGrpcResponse DefaultExceptionExportClientGrpcResponse + = new( + success: false, + deadlineUtc: default, + exception: null, + status: null, + grpcStatusDetailsHeader: null); + public ProtobufOtlpGrpcExportClient(OtlpExporterOptions options, HttpClient httpClient, string signalPath) { Guard.ThrowIfNull(options); @@ -44,25 +53,120 @@ public ExportClientResponse SendExportRequest(byte[] buffer, int contentLength, try { using var httpRequest = this.CreateHttpRequest(buffer, contentLength); - using var httpResponse = this.SendHttpRequest(httpRequest, cancellationToken); try { httpResponse.EnsureSuccessStatusCode(); } - catch (HttpRequestException ex) + catch (HttpRequestException) + { + throw; + } + + var trailingHeaders = httpResponse.TrailingHeaders(); + Status status = GrpcProtocolHelpers.GetResponseStatus(httpResponse, trailingHeaders); + + if (status.Detail.Equals(Status.NoReplyDetailMessage)) + { + using var responseStream = httpResponse.Content.ReadAsStreamAsync().GetAwaiter().GetResult(); + int firstByte = responseStream.ReadByte(); + + if (firstByte == -1) + { + if (status.StatusCode == StatusCode.OK) + { + status = new Status(StatusCode.Internal, "Failed to deserialize response message."); + } + + OpenTelemetryProtocolExporterEventSource.Log.ResponseDeserializationFailed(this.Endpoint.ToString()); + + return new ExportClientGrpcResponse( + success: false, + deadlineUtc: deadlineUtc, + exception: null, + status: status, + grpcStatusDetailsHeader: null); + } + + // Note: Trailing headers might not be fully available until the + // response stream is consumed. gRPC often sends critical + // information like error details or final statuses in trailing + // headers which can only be reliably accessed after reading + // the response body. + trailingHeaders = httpResponse.TrailingHeaders(); + status = GrpcProtocolHelpers.GetResponseStatus(httpResponse, trailingHeaders); + } + + if (status.StatusCode == StatusCode.OK) + { + OpenTelemetryProtocolExporterEventSource.Log.ExportSuccess(this.Endpoint.ToString(), "Export completed successfully."); + return SuccessExportResponse; + } + + string? grpcStatusDetailsHeader = null; + if (status.StatusCode == StatusCode.ResourceExhausted || status.StatusCode == StatusCode.Unavailable) { - return new ExportClientHttpResponse(success: false, deadlineUtc: deadlineUtc, response: httpResponse, ex); + grpcStatusDetailsHeader = GrpcProtocolHelpers.GetHeaderValue(trailingHeaders, GrpcStatusDetailsHeader); } - // TODO: Hande retries & failures. - return SuccessExportResponse; + OpenTelemetryProtocolExporterEventSource.Log.ExportFailure(this.Endpoint.ToString(), "Export failed due to unexpected status code."); + + return new ExportClientGrpcResponse( + success: false, + deadlineUtc: deadlineUtc, + exception: null, + status: status, + grpcStatusDetailsHeader: grpcStatusDetailsHeader); + } + catch (HttpRequestException ex) when (ex.InnerException is TimeoutException || IsTransientNetworkError(ex)) + { + // Handle transient HTTP errors (retryable) + OpenTelemetryProtocolExporterEventSource.Log.TransientHttpError(this.Endpoint, ex); + return new ExportClientGrpcResponse( + success: false, + deadlineUtc: deadlineUtc, + exception: ex, + status: new Status(StatusCode.Unavailable, "Transient HTTP error - retryable"), + grpcStatusDetailsHeader: null); + } + catch (HttpRequestException ex) + { + // Handle non-retryable HTTP errors. + OpenTelemetryProtocolExporterEventSource.Log.HttpRequestFailed(this.Endpoint, ex); + return new ExportClientGrpcResponse( + success: false, + deadlineUtc: deadlineUtc, + exception: ex, + status: null, + grpcStatusDetailsHeader: null); + } + catch (OperationCanceledException ex) when (!cancellationToken.IsCancellationRequested) + { + // Handle unexpected cancellation. + OpenTelemetryProtocolExporterEventSource.Log.OperationUnexpectedlyCanceled(this.Endpoint, ex); + return new ExportClientGrpcResponse( + success: false, + deadlineUtc: deadlineUtc, + exception: ex, + status: new Status(StatusCode.Cancelled, "Operation was canceled unexpectedly."), + grpcStatusDetailsHeader: null); + } + catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException) + { + // Handle TaskCanceledException caused by TimeoutException. + OpenTelemetryProtocolExporterEventSource.Log.RequestTimedOut(this.Endpoint, ex); + return new ExportClientGrpcResponse( + success: false, + deadlineUtc: deadlineUtc, + exception: ex, + status: new Status(StatusCode.DeadlineExceeded, "Request timed out."), + grpcStatusDetailsHeader: null); } - catch (RpcException ex) + catch (Exception ex) { OpenTelemetryProtocolExporterEventSource.Log.FailedToReachCollector(this.Endpoint, ex); - return new ExportClientGrpcResponse(success: false, deadlineUtc: deadlineUtc, exception: ex); + return DefaultExceptionExportClientGrpcResponse; } } @@ -99,4 +203,12 @@ public bool Shutdown(int timeoutMilliseconds) this.HttpClient.CancelPendingRequests(); return true; } + + private static bool IsTransientNetworkError(HttpRequestException ex) + { + return ex.InnerException is System.Net.Sockets.SocketException socketEx && + (socketEx.SocketErrorCode == System.Net.Sockets.SocketError.TimedOut || + socketEx.SocketErrorCode == System.Net.Sockets.SocketError.ConnectionReset || + socketEx.SocketErrorCode == System.Net.Sockets.SocketError.HostUnreachable); + } } diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ProtobufOtlpHttpExportClient.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ProtobufOtlpHttpExportClient.cs index 41ae58d7b69..118d428dcb5 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ProtobufOtlpHttpExportClient.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ProtobufOtlpHttpExportClient.cs @@ -51,7 +51,6 @@ public ExportClientResponse SendExportRequest(byte[] buffer, int contentLength, try { using var httpRequest = this.CreateHttpRequest(buffer, contentLength); - using var httpResponse = this.SendHttpRequest(httpRequest, cancellationToken); try @@ -60,6 +59,7 @@ public ExportClientResponse SendExportRequest(byte[] buffer, int contentLength, } catch (HttpRequestException ex) { + OpenTelemetryProtocolExporterEventSource.Log.HttpRequestFailed(this.Endpoint, ex); return new ExportClientHttpResponse(success: false, deadlineUtc: deadlineUtc, response: httpResponse, ex); } diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/OpenTelemetryProtocolExporterEventSource.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/OpenTelemetryProtocolExporterEventSource.cs index 721fc7359e2..e9544cf981e 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/OpenTelemetryProtocolExporterEventSource.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/OpenTelemetryProtocolExporterEventSource.cs @@ -49,6 +49,42 @@ public void RetryStoredRequestException(Exception ex) } } + [NonEvent] + public void TransientHttpError(Uri endpoint, Exception ex) + { + if (Log.IsEnabled(EventLevel.Warning, EventKeywords.All)) + { + this.TransientHttpError(endpoint.ToString(), ex.ToInvariantString()); + } + } + + [NonEvent] + public void HttpRequestFailed(Uri endpoint, Exception ex) + { + if (Log.IsEnabled(EventLevel.Error, EventKeywords.All)) + { + this.HttpRequestFailed(endpoint.ToString(), ex.ToInvariantString()); + } + } + + [NonEvent] + public void OperationUnexpectedlyCanceled(Uri endpoint, Exception ex) + { + if (Log.IsEnabled(EventLevel.Warning, EventKeywords.All)) + { + this.OperationUnexpectedlyCanceled(endpoint.ToString(), ex.ToInvariantString()); + } + } + + [NonEvent] + public void RequestTimedOut(Uri endpoint, Exception ex) + { + if (Log.IsEnabled(EventLevel.Warning, EventKeywords.All)) + { + this.RequestTimedOut(endpoint.ToString(), ex.ToInvariantString()); + } + } + [Event(2, Message = "Exporter failed send data to collector to {0} endpoint. Data will not be sent. Exception: {1}", Level = EventLevel.Error)] public void FailedToReachCollector(string rawCollectorUri, string ex) { @@ -121,6 +157,54 @@ public void BufferResizeFailedDueToMemory(string signalType) this.WriteEvent(15, signalType); } + [Event(16, Message = "Transient HTTP error occurred when communicating with {0}. Exception: {1}", Level = EventLevel.Warning)] + public void TransientHttpError(string endpoint, string exceptionMessage) + { + this.WriteEvent(16, endpoint, exceptionMessage); + } + + [Event(17, Message = "HTTP request to {0} failed. Exception: {1}", Level = EventLevel.Error)] + public void HttpRequestFailed(string endpoint, string exceptionMessage) + { + this.WriteEvent(17, endpoint, exceptionMessage); + } + + [Event(18, Message = "Operation unexpectedly canceled for endpoint {0}. Exception: {1}", Level = EventLevel.Warning)] + public void OperationUnexpectedlyCanceled(string endpoint, string exceptionMessage) + { + this.WriteEvent(18, endpoint, exceptionMessage); + } + + [Event(19, Message = "Request to endpoint {0} timed out. Exception: {1}", Level = EventLevel.Warning)] + public void RequestTimedOut(string endpoint, string exceptionMessage) + { + this.WriteEvent(19, endpoint, exceptionMessage); + } + + [Event(20, Message = "Failed to deserialize response from {0}.", Level = EventLevel.Error)] + public void ResponseDeserializationFailed(string endpoint) + { + this.WriteEvent(20, endpoint); + } + + [Event(21, Message = "Export succeeded for {0}. Message: {1}", Level = EventLevel.Informational)] + public void ExportSuccess(string endpoint, string message) + { + this.WriteEvent(21, endpoint, message); + } + + [Event(22, Message = "Export encountered GRPC status warning for {0}. Status code: {1}", Level = EventLevel.Warning)] + public void GrpcStatusWarning(string endpoint, string statusCode) + { + this.WriteEvent(22, endpoint, statusCode); + } + + [Event(23, Message = "Export failed for {0}. Message: {1}", Level = EventLevel.Error)] + public void ExportFailure(string endpoint, string message) + { + this.WriteEvent(23, endpoint, message); + } + void IConfigurationExtensionsLogger.LogInvalidConfigurationValue(string key, string value) { this.InvalidConfigurationValue(key, value); diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/Transmission/OtlpExporterTransmissionHandler.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/Transmission/OtlpExporterTransmissionHandler.cs index 12b76bdf83e..9ecb6c4785f 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/Transmission/OtlpExporterTransmissionHandler.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/Transmission/OtlpExporterTransmissionHandler.cs @@ -121,7 +121,7 @@ protected bool TryRetryRequest(TRequest request, DateTime deadlineUtc, out Expor response = this.ExportClient.SendExportRequest(request, deadlineUtc); if (!response.Success) { - OpenTelemetryProtocolExporterEventSource.Log.ExportMethodException(response.Exception, isRetry: true); + OpenTelemetryProtocolExporterEventSource.Log.ExportMethodException(response.Exception!, isRetry: true); return false; } diff --git a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/Transmission/ProtobufOtlpExporterTransmissionHandler.cs b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/Transmission/ProtobufOtlpExporterTransmissionHandler.cs index db7ef77f74c..70dad49f9cd 100644 --- a/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/Transmission/ProtobufOtlpExporterTransmissionHandler.cs +++ b/src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/Transmission/ProtobufOtlpExporterTransmissionHandler.cs @@ -119,13 +119,7 @@ protected virtual void OnShutdown(int timeoutMilliseconds) protected bool TryRetryRequest(byte[] request, int contentLength, DateTime deadlineUtc, out ExportClientResponse response) { response = this.ExportClient.SendExportRequest(request, contentLength, deadlineUtc); - if (!response.Success) - { - OpenTelemetryProtocolExporterEventSource.Log.ExportMethodException(response.Exception, isRetry: true); - return false; - } - - return true; + return response.Success; } /// diff --git a/test/OpenTelemetry.Exporter.OpenTelemetryProtocol.Tests/MockCollectorIntegrationTests.cs b/test/OpenTelemetry.Exporter.OpenTelemetryProtocol.Tests/MockCollectorIntegrationTests.cs index 27b64e08f63..c9e2c19ae09 100644 --- a/test/OpenTelemetry.Exporter.OpenTelemetryProtocol.Tests/MockCollectorIntegrationTests.cs +++ b/test/OpenTelemetry.Exporter.OpenTelemetryProtocol.Tests/MockCollectorIntegrationTests.cs @@ -5,7 +5,6 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Net; -using Google.Protobuf; using Grpc.Core; using Microsoft.AspNetCore.Builder; using Microsoft.AspNetCore.Hosting; @@ -75,7 +74,7 @@ public async Task TestRecoveryAfterFailedExport() await httpClient.GetAsync($"/MockCollector/SetResponseCodes/{string.Join(",", codes.Select(x => (int)x))}"); var exportResults = new List(); - using var otlpExporter = new OtlpTraceExporter(new OtlpExporterOptions() { Endpoint = new Uri($"http://localhost:{testGrpcPort}") }); + using var otlpExporter = new ProtobufOtlpTraceExporter(new OtlpExporterOptions() { Endpoint = new Uri($"http://localhost:{testGrpcPort}") }); var delegatingExporter = new DelegatingExporter { OnExportFunc = (batch) => @@ -180,10 +179,14 @@ public async Task GrpcRetryTests(bool useRetryTransmissionHandler, ExportResult var exporterOptions = new OtlpExporterOptions() { Endpoint = endpoint, TimeoutMilliseconds = 20000, Protocol = OtlpExportProtocol.Grpc }; var configuration = new ConfigurationBuilder() - .AddInMemoryCollection(new Dictionary { [ExperimentalOptions.OtlpRetryEnvVar] = useRetryTransmissionHandler ? "in_memory" : null }) - .Build(); + .AddInMemoryCollection(new Dictionary + { + [ExperimentalOptions.OtlpRetryEnvVar] = useRetryTransmissionHandler ? "in_memory" : null, + [ExperimentalOptions.OtlpUseCustomSerializer] = "true", + }) + .Build(); - using var otlpExporter = new OtlpTraceExporter(exporterOptions, new SdkLimitOptions(), new ExperimentalOptions(configuration)); + using var otlpExporter = new ProtobufOtlpTraceExporter(exporterOptions, new SdkLimitOptions(), new ExperimentalOptions(configuration)); var activitySourceName = "otel.grpc.retry.test"; using var source = new ActivitySource(activitySourceName); @@ -265,10 +268,14 @@ public async Task HttpRetryTests(bool useRetryTransmissionHandler, ExportResult var exporterOptions = new OtlpExporterOptions() { Endpoint = endpoint, TimeoutMilliseconds = 20000, Protocol = OtlpExportProtocol.HttpProtobuf }; var configuration = new ConfigurationBuilder() - .AddInMemoryCollection(new Dictionary { [ExperimentalOptions.OtlpRetryEnvVar] = useRetryTransmissionHandler ? "in_memory" : null }) - .Build(); + .AddInMemoryCollection(new Dictionary + { + [ExperimentalOptions.OtlpRetryEnvVar] = useRetryTransmissionHandler ? "in_memory" : null, + [ExperimentalOptions.OtlpUseCustomSerializer] = "true", + }) + .Build(); - using var otlpExporter = new OtlpTraceExporter(exporterOptions, new SdkLimitOptions(), new ExperimentalOptions(configuration)); + using var otlpExporter = new ProtobufOtlpTraceExporter(exporterOptions, new SdkLimitOptions(), new ExperimentalOptions(configuration)); var activitySourceName = "otel.http.retry.test"; using var source = new ActivitySource(activitySourceName); @@ -347,31 +354,32 @@ public async Task HttpPersistentStorageRetryTests(bool usePersistentStorageTrans var exporterOptions = new OtlpExporterOptions() { Endpoint = endpoint, TimeoutMilliseconds = 20000 }; - var exportClient = new OtlpHttpTraceExportClient(exporterOptions, new HttpClient()); + var exportClient = new ProtobufOtlpHttpExportClient(exporterOptions, new HttpClient(), "/v1/traces"); // TODO: update this to configure via experimental environment variable. - OtlpExporterTransmissionHandler transmissionHandler; + ProtobufOtlpExporterTransmissionHandler transmissionHandler; MockFileProvider? mockProvider = null; if (usePersistentStorageTransmissionHandler) { mockProvider = new MockFileProvider(); - transmissionHandler = new OtlpExporterPersistentStorageTransmissionHandler( + transmissionHandler = new ProtobufOtlpExporterPersistentStorageTransmissionHandler( mockProvider, exportClient, - exporterOptions.TimeoutMilliseconds, - (byte[] data) => - { - var request = new ExportTraceServiceRequest(); - request.MergeFrom(data); - return request; - }); + exporterOptions.TimeoutMilliseconds); } else { - transmissionHandler = new OtlpExporterTransmissionHandler(exportClient, exporterOptions.TimeoutMilliseconds); + transmissionHandler = new ProtobufOtlpExporterTransmissionHandler(exportClient, exporterOptions.TimeoutMilliseconds); } - using var otlpExporter = new OtlpTraceExporter(exporterOptions, new(), new(), transmissionHandler); + var configuration = new ConfigurationBuilder() + .AddInMemoryCollection(new Dictionary + { + [ExperimentalOptions.OtlpUseCustomSerializer] = "true", + }) + .Build(); + + using var otlpExporter = new ProtobufOtlpTraceExporter(exporterOptions, new(), new(configuration), transmissionHandler); var activitySourceName = "otel.http.persistent.storage.retry.test"; using var source = new ActivitySource(activitySourceName); @@ -397,7 +405,7 @@ public async Task HttpPersistentStorageRetryTests(bool usePersistentStorageTrans Assert.Single(mockProvider!.TryGetBlobs()); // Force Retry - Assert.True((transmissionHandler as OtlpExporterPersistentStorageTransmissionHandler)!.InitiateAndWaitForRetryProcess(-1)); + Assert.True((transmissionHandler as ProtobufOtlpExporterPersistentStorageTransmissionHandler)?.InitiateAndWaitForRetryProcess(-1)); Assert.False(mockProvider.TryGetBlob(out _)); } @@ -486,31 +494,32 @@ public async Task GrpcPersistentStorageRetryTests(bool usePersistentStorageTrans var exporterOptions = new OtlpExporterOptions() { Endpoint = endpoint, TimeoutMilliseconds = 20000 }; - var exportClient = new OtlpGrpcTraceExportClient(exporterOptions); + var exportClient = new ProtobufOtlpGrpcExportClient(exporterOptions, new HttpClient(), "opentelemetry.proto.collector.trace.v1.TraceService/Export"); // TODO: update this to configure via experimental environment variable. - OtlpExporterTransmissionHandler transmissionHandler; + ProtobufOtlpExporterTransmissionHandler transmissionHandler; MockFileProvider? mockProvider = null; if (usePersistentStorageTransmissionHandler) { mockProvider = new MockFileProvider(); - transmissionHandler = new OtlpExporterPersistentStorageTransmissionHandler( + transmissionHandler = new ProtobufOtlpExporterPersistentStorageTransmissionHandler( mockProvider, exportClient, - exporterOptions.TimeoutMilliseconds, - (byte[] data) => - { - var request = new ExportTraceServiceRequest(); - request.MergeFrom(data); - return request; - }); + exporterOptions.TimeoutMilliseconds); } else { - transmissionHandler = new OtlpExporterTransmissionHandler(exportClient, exporterOptions.TimeoutMilliseconds); + transmissionHandler = new ProtobufOtlpExporterTransmissionHandler(exportClient, exporterOptions.TimeoutMilliseconds); } - using var otlpExporter = new OtlpTraceExporter(exporterOptions, new(), new(), transmissionHandler); + var configuration = new ConfigurationBuilder() + .AddInMemoryCollection(new Dictionary + { + [ExperimentalOptions.OtlpUseCustomSerializer] = "true", + }) + .Build(); + + using var otlpExporter = new ProtobufOtlpTraceExporter(exporterOptions, new(), new(configuration), transmissionHandler); var activitySourceName = "otel.grpc.persistent.storage.retry.test"; using var source = new ActivitySource(activitySourceName); @@ -536,7 +545,7 @@ public async Task GrpcPersistentStorageRetryTests(bool usePersistentStorageTrans Assert.Single(mockProvider.TryGetBlobs()); // Force Retry - Assert.True((transmissionHandler as OtlpExporterPersistentStorageTransmissionHandler)!.InitiateAndWaitForRetryProcess(-1)); + Assert.True((transmissionHandler as ProtobufOtlpExporterPersistentStorageTransmissionHandler)?.InitiateAndWaitForRetryProcess(-1)); Assert.False(mockProvider.TryGetBlob(out _)); } diff --git a/test/OpenTelemetry.Exporter.OpenTelemetryProtocol.Tests/OtlpRetryTests.cs b/test/OpenTelemetry.Exporter.OpenTelemetryProtocol.Tests/OtlpRetryTests.cs index 23ba18d0f6e..e72faee429c 100644 --- a/test/OpenTelemetry.Exporter.OpenTelemetryProtocol.Tests/OtlpRetryTests.cs +++ b/test/OpenTelemetry.Exporter.OpenTelemetryProtocol.Tests/OtlpRetryTests.cs @@ -234,7 +234,7 @@ public GrpcRetryAttempt( this.ThrottleDelay = throttleDelay != null ? throttleDelay.ToTimeSpan() : null; - this.Response = new ExportClientGrpcResponse(expectedSuccess, deadlineUtc, rpcException); + this.Response = new ExportClientGrpcResponse(expectedSuccess, deadlineUtc, rpcException, null, null); this.ExpectedNextRetryDelayMilliseconds = expectedNextRetryDelayMilliseconds;