From 52b1bed7ef091236096514e58eef7c52c08a6a53 Mon Sep 17 00:00:00 2001 From: Mark Fields Date: Fri, 10 Jan 2025 16:35:22 -0800 Subject: [PATCH] fix(BlobManager): Don't log getBlob errors if the runtime is disposed (#23521) There's a race condition where the application may be closed while the BlobManager is awaiting a blob to be downloaded from the service. When this happens, the error "Storage Service is disposed. Cannot retry" is thrown, and the BlobManager logs it as an error. But this isn't really an interesting error (because we don't care about the result at this point anyway) and shouldn't be logged as such. This change downgrades the `BlobManager:AttachmentReadBlob_cancel` log to "generic" category in this case, not "error". --- .../src/blobManager/blobManager.ts | 15 +++++-- .../src/test/blobManager.spec.ts | 39 ++++++++++++++++++- packages/utils/telemetry-utils/src/logger.ts | 11 ++++-- .../src/test/performanceEvent.spec.ts | 30 ++++++++++++++ 4 files changed, 88 insertions(+), 7 deletions(-) diff --git a/packages/runtime/container-runtime/src/blobManager/blobManager.ts b/packages/runtime/container-runtime/src/blobManager/blobManager.ts index a4f19fb72a2d..8f2b4252de86 100644 --- a/packages/runtime/container-runtime/src/blobManager/blobManager.ts +++ b/packages/runtime/container-runtime/src/blobManager/blobManager.ts @@ -96,7 +96,7 @@ export class BlobHandle extends FluidHandleBase { // the contract explicit and reduces the amount of mocking required for tests. export type IBlobManagerRuntime = Pick< IContainerRuntime, - "attachState" | "connected" | "baseLogger" | "clientDetails" + "attachState" | "connected" | "baseLogger" | "clientDetails" | "disposed" > & TypedEventEmitter; @@ -370,8 +370,17 @@ export class BlobManager extends TypedEventEmitter { return PerformanceEvent.timedExecAsync( this.mc.logger, { eventName: "AttachmentReadBlob", id: storageId }, - async () => { - return this.getStorage().readBlob(storageId); + async (event) => { + return this.getStorage() + .readBlob(storageId) + .catch((error) => { + if (this.runtime.disposed) { + // If the runtime is disposed, this is not an error we care to track, it's expected behavior. + event.cancel({ category: "generic" }); + } + + throw error; + }); }, { end: true, cancel: "error" }, ); diff --git a/packages/runtime/container-runtime/src/test/blobManager.spec.ts b/packages/runtime/container-runtime/src/test/blobManager.spec.ts index 90826809aab3..bae77a3c3020 100644 --- a/packages/runtime/container-runtime/src/test/blobManager.spec.ts +++ b/packages/runtime/container-runtime/src/test/blobManager.spec.ts @@ -26,11 +26,13 @@ import { IDocumentStorageService } from "@fluidframework/driver-definitions/inte import type { ISequencedMessageEnvelope } from "@fluidframework/runtime-definitions/internal"; import { LoggingError, + MockLogger, MonitoringContext, createChildLogger, mixinMonitoringContext, type ITelemetryLoggerExt, } from "@fluidframework/telemetry-utils/internal"; +import Sinon from "sinon"; import { v4 as uuid } from "uuid"; import { @@ -102,6 +104,8 @@ export class MockRuntime }); } + public disposed: boolean = false; + public get storage() { return (this.attachState === AttachState.Detached ? this.detachedStorage @@ -299,6 +303,7 @@ export const validateSummary = (runtime: MockRuntime) => { describe("BlobManager", () => { const handlePs: Promise>[] = []; + const mockLogger = new MockLogger(); let runtime: MockRuntime; let createBlob: (blob: ArrayBufferLike, signal?: AbortSignal) => Promise; let waitForBlob: (blob: ArrayBufferLike) => Promise; @@ -309,7 +314,10 @@ describe("BlobManager", () => { const configProvider = (settings: Record): IConfigProviderBase => ({ getRawConfig: (name: string): ConfigTypes => settings[name], }); - mc = mixinMonitoringContext(createChildLogger(), configProvider(injectedSettings)); + mc = mixinMonitoringContext( + createChildLogger({ logger: mockLogger }), + configProvider(injectedSettings), + ); runtime = new MockRuntime(mc); handlePs.length = 0; @@ -343,6 +351,7 @@ describe("BlobManager", () => { afterEach(async () => { assert((runtime.blobManager as any).pendingBlobs.size === 0); injectedSettings = {}; + mockLogger.clear(); }); it("empty snapshot", () => { @@ -694,6 +703,34 @@ describe("BlobManager", () => { assert.strictEqual(runtime.blobManager.allBlobsAttached, true); }); + it("runtime disposed during readBlob - log no error", async () => { + const someId = "someId"; + (runtime.blobManager as any).setRedirection(someId, undefined); // To appease an assert + + // Mock storage.readBlob to dispose the runtime and throw an error + Sinon.stub(runtime.storage, "readBlob").callsFake(async (_id: string) => { + runtime.disposed = true; + throw new Error("BOOM!"); + }); + + await assert.rejects( + async () => runtime.blobManager.getBlob(someId), + (e: Error) => e.message === "BOOM!", + "Expected getBlob to throw with test error message", + ); + assert(runtime.disposed, "Runtime should be disposed"); + mockLogger.assertMatchNone( + [{ category: "error" }], + "Should not have logged any errors", + undefined, + false /* clearEventsAfterCheck */, + ); + mockLogger.assertMatch( + [{ category: "generic", eventName: "BlobManager:AttachmentReadBlob_cancel" }], + "Expected the _cancel event to be logged with 'generic' category", + ); + }); + describe("Abort Signal", () => { it("abort before upload", async () => { await runtime.attach(); diff --git a/packages/utils/telemetry-utils/src/logger.ts b/packages/utils/telemetry-utils/src/logger.ts index 11133973ef6e..a1ad3354ae3d 100644 --- a/packages/utils/telemetry-utils/src/logger.ts +++ b/packages/utils/telemetry-utils/src/logger.ts @@ -763,12 +763,16 @@ export class PerformanceEvent { this.reportEvent("end"); } this.performanceEndMark(); + + // To prevent the event from being reported again later this.event = undefined; } public end(props?: ITelemetryPropertiesExt): void { this.reportEvent("end", props); this.performanceEndMark(); + + // To prevent the event from being reported again later this.event = undefined; } @@ -785,6 +789,8 @@ export class PerformanceEvent { if (this.markers.cancel !== undefined) { this.reportEvent("cancel", { category: this.markers.cancel, ...props }, error); } + + // To prevent the event from being reported again later this.event = undefined; } @@ -796,9 +802,8 @@ export class PerformanceEvent { props?: ITelemetryPropertiesExt, error?: unknown, ): void { - // There are strange sequences involving multiple Promise chains - // where the event can be cancelled and then later a callback is invoked - // and the caller attempts to end directly, e.g. issue #3936. Just return. + // If the caller invokes cancel or end directly inside the callback for timedExec[Async], + // then it's possible to come back through reportEvent twice. Only the first time counts. if (!this.event) { return; } diff --git a/packages/utils/telemetry-utils/src/test/performanceEvent.spec.ts b/packages/utils/telemetry-utils/src/test/performanceEvent.spec.ts index 30b18ee5e145..45a612951688 100644 --- a/packages/utils/telemetry-utils/src/test/performanceEvent.spec.ts +++ b/packages/utils/telemetry-utils/src/test/performanceEvent.spec.ts @@ -65,6 +65,36 @@ describe("PerformanceEvent", () => { assert.equal(logger.errorsLogged, 0, "Shouldn't have logged any errors"); }); + it("Cancel then throw (double cancel)", async () => { + assert.throws( + () => + PerformanceEvent.timedExec( + logger, + { eventName: "Testing" }, + (event) => { + callbackCalls++; + + // This is how you can use custom logic to override the "error" category for cancel (specified in the markers below) + event.cancel({ category: "generic" }); + throw new Error("Cancelled already"); + }, + { + start: true, + end: true, + cancel: "error", + }, + ), + (e: Error) => e.message === "Cancelled already", + "Should have thrown the error", + ); + assert.equal(logger.errorsLogged, 0, "Shouldn't have logged any errors"); + assert.equal( + logger.eventsLogged, + 2, + "Should have logged a start and cancel event (not with error category)", + ); + }); + describe("Event sampling", () => { it("Events are logged at least once", async () => { await PerformanceEvent.timedExecAsync(