From c33591eacaf649e801ddc72ee653f0aa64919570 Mon Sep 17 00:00:00 2001 From: abp Date: Tue, 18 Nov 2025 16:42:23 +0100 Subject: [PATCH] instantiate and import spam classifier lazily Co-authored-by: das --- buildSrc/RollupConfig.js | 4 +-- .../SpamMailProcessor.ts | 25 ++++++++----------- .../api/worker/offline/OfflineStorage.ts | 2 +- .../api/worker/rest/CacheStorageProxy.ts | 2 +- .../api/worker/rest/DefaultEntityRestCache.ts | 2 +- .../api/worker/rest/EphemeralCacheStorage.ts | 2 +- .../spamClassification/SpamClassifier.ts | 5 +--- src/mail-app/workerUtils/worker/WorkerImpl.ts | 2 +- .../workerUtils/worker/WorkerLocator.ts | 13 ++++++---- .../spamClassification/SpamClassifierTest.ts | 5 ++-- 10 files changed, 29 insertions(+), 33 deletions(-) diff --git a/buildSrc/RollupConfig.js b/buildSrc/RollupConfig.js index 37a5fbb45a..d4f77e7e1b 100644 --- a/buildSrc/RollupConfig.js +++ b/buildSrc/RollupConfig.js @@ -33,7 +33,7 @@ export const allowedImports = { wasm: ["wasm-fallback"], "common-min": ["polyfill-helpers"], boot: ["polyfill-helpers", "common-min"], - common: ["polyfill-helpers", "common-min", "spam-classifier"], + common: ["polyfill-helpers", "common-min"], "gui-base": ["polyfill-helpers", "common-min", "common", "boot"], main: ["polyfill-helpers", "common-min", "common", "boot", "gui-base", "date"], sanitizer: ["polyfill-helpers", "common-min", "common", "boot", "gui-base"], @@ -47,7 +47,7 @@ export const allowedImports = { "calendar-view": ["polyfill-helpers", "common-min", "common", "boot", "gui-base", "main", "date", "date-gui", "sharing", "contacts"], login: ["polyfill-helpers", "common-min", "common", "boot", "gui-base", "main"], "spam-classifier": ["polyfill-helpers", "common", "common-min"], - worker: ["polyfill-helpers", "common-min", "common", "native-common", "native-worker", "wasm", "wasm-fallback", "spam-classifier"], + worker: ["polyfill-helpers", "common-min", "common", "native-common", "native-worker", "wasm", "wasm-fallback"], "pow-worker": [], settings: [ "polyfill-helpers", diff --git a/src/common/api/common/utils/spamClassificationUtils/SpamMailProcessor.ts b/src/common/api/common/utils/spamClassificationUtils/SpamMailProcessor.ts index 8873952903..8818c3bc91 100644 --- a/src/common/api/common/utils/spamClassificationUtils/SpamMailProcessor.ts +++ b/src/common/api/common/utils/spamClassificationUtils/SpamMailProcessor.ts @@ -1,4 +1,4 @@ -import { HashingVectorizer } from "../../../../../mail-app/workerUtils/spamClassification/HashingVectorizer" +import type { HashingVectorizer } from "../../../../../mail-app/workerUtils/spamClassification/HashingVectorizer" import { htmlToText } from "../IndexUtils" import { ML_BITCOIN_REGEX, @@ -19,11 +19,11 @@ import { ML_URL_TOKEN, } from "./PreprocessPatterns" import { SparseVectorCompressor } from "./SparseVectorCompressor" -import { ProgrammingError } from "../../error/ProgrammingError" -import { assertNotNull, tokenize } from "@tutao/tutanota-utils" +import { assertNotNull, lazyAsync, lazyMemoized, tokenize } from "@tutao/tutanota-utils" import { Mail, MailAddress, MailDetails } from "../../../entities/tutanota/TypeRefs" import { getMailBodyText } from "../../CommonMailUtils" import { MailAuthenticationStatus } from "../../TutanotaConstants" +import { ProgrammingError } from "../../error/ProgrammingError" export type PreprocessConfiguration = { isPreprocessMails: boolean @@ -69,15 +69,12 @@ export type PreprocessedMailContent = string export class SpamMailProcessor { constructor( private readonly preprocessConfiguration: PreprocessConfiguration = DEFAULT_PREPROCESS_CONFIGURATION, - readonly vectorizer: HashingVectorizer = new HashingVectorizer(), private readonly sparseVectorCompressor: SparseVectorCompressor = new SparseVectorCompressor(), - ) { - if (vectorizer.dimension !== sparseVectorCompressor.dimension) { - throw new ProgrammingError( - `a spam preprocessor was created with different dimensions. Vectorizer:${vectorizer.dimension} compressor: ${sparseVectorCompressor.dimension}`, - ) - } - } + private readonly vectorizer: lazyAsync = lazyMemoized(async () => { + const { HashingVectorizer } = await import("../../../../../mail-app/workerUtils/spamClassification/HashingVectorizer") + return new HashingVectorizer(this.sparseVectorCompressor.dimension) + }), + ) {} public async vectorizeAndCompress(spamMailDatum: SpamMailDatum): Promise { const vector = await this.vectorize(spamMailDatum) @@ -85,13 +82,13 @@ export class SpamMailProcessor { } public async vectorize(spamMailDatum: SpamMailDatum): Promise { + const vectorizer = await this.vectorizer() const preprocessedMail = this.preprocessMail(spamMailDatum) const tokenizedMail = spamClassifierTokenizer(preprocessedMail) - const vector = await this.vectorizer.vectorize(tokenizedMail) - return vector + return await vectorizer.vectorize(tokenizedMail) } - public async compress(uncompressedVector: number[]): Promise { + private async compress(uncompressedVector: number[]): Promise { return this.sparseVectorCompressor.vectorToBinary(uncompressedVector) } diff --git a/src/common/api/worker/offline/OfflineStorage.ts b/src/common/api/worker/offline/OfflineStorage.ts index 73dbef50dd..290b314f27 100644 --- a/src/common/api/worker/offline/OfflineStorage.ts +++ b/src/common/api/worker/offline/OfflineStorage.ts @@ -46,7 +46,7 @@ import { AttributeModel } from "../../common/AttributeModel" import { TypeModelResolver } from "../../common/EntityFunctions" import { collapseId, expandId } from "../rest/RestClientIdUtils" import { Category, syncMetrics } from "../utils/SyncMetrics" -import { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier" +import type { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier" /** * this is the value of SQLITE_MAX_VARIABLE_NUMBER in sqlite3.c diff --git a/src/common/api/worker/rest/CacheStorageProxy.ts b/src/common/api/worker/rest/CacheStorageProxy.ts index b907f0363a..c2e3413aff 100644 --- a/src/common/api/worker/rest/CacheStorageProxy.ts +++ b/src/common/api/worker/rest/CacheStorageProxy.ts @@ -5,7 +5,7 @@ import { Nullable, TypeRef } from "@tutao/tutanota-utils" import { OfflineStorage, OfflineStorageInitArgs } from "../offline/OfflineStorage.js" import { EphemeralCacheStorage, EphemeralStorageInitArgs } from "./EphemeralCacheStorage" import { CustomCacheHandlerMap } from "./cacheHandler/CustomCacheHandler.js" -import { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier" +import type { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier" export interface EphemeralStorageArgs extends EphemeralStorageInitArgs { type: "ephemeral" diff --git a/src/common/api/worker/rest/DefaultEntityRestCache.ts b/src/common/api/worker/rest/DefaultEntityRestCache.ts index 4507973630..a4a4ea9c9f 100644 --- a/src/common/api/worker/rest/DefaultEntityRestCache.ts +++ b/src/common/api/worker/rest/DefaultEntityRestCache.ts @@ -55,7 +55,7 @@ import { AttributeModel } from "../../common/AttributeModel" import { collapseId, expandId } from "./RestClientIdUtils" import { PatchMerger } from "../offline/PatchMerger" import { hasError, isExpectedErrorForSynchronization } from "../../common/utils/ErrorUtils" -import { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier" +import type { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier" assertWorkerOrNode() diff --git a/src/common/api/worker/rest/EphemeralCacheStorage.ts b/src/common/api/worker/rest/EphemeralCacheStorage.ts index 1a24105ef6..ace4fdefe3 100644 --- a/src/common/api/worker/rest/EphemeralCacheStorage.ts +++ b/src/common/api/worker/rest/EphemeralCacheStorage.ts @@ -10,7 +10,7 @@ import { ModelMapper } from "../crypto/ModelMapper" import { ServerTypeModelResolver } from "../../common/EntityFunctions" import { expandId } from "./RestClientIdUtils" import { hasError } from "../../common/utils/ErrorUtils" -import { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier" +import type { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier" /** Cache for a single list. */ type ListCache = { diff --git a/src/mail-app/workerUtils/spamClassification/SpamClassifier.ts b/src/mail-app/workerUtils/spamClassification/SpamClassifier.ts index 6ce82ce8a9..a8fb16edb9 100644 --- a/src/mail-app/workerUtils/spamClassification/SpamClassifier.ts +++ b/src/mail-app/workerUtils/spamClassification/SpamClassifier.ts @@ -20,9 +20,6 @@ import type { Tensor } from "@tensorflow/tfjs-core" import { DEFAULT_PREPROCESS_CONFIGURATION, SpamMailDatum, SpamMailProcessor } from "../../../common/api/common/utils/spamClassificationUtils/SpamMailProcessor" import { SparseVectorCompressor } from "../../../common/api/common/utils/spamClassificationUtils/SparseVectorCompressor" import { SpamDecision } from "../../../common/api/common/TutanotaConstants" -import { HashingVectorizer } from "./HashingVectorizer" - -assertWorkerOrNode() export type SpamClassificationModel = { modelTopology: string @@ -61,7 +58,7 @@ export class SpamClassifier { enableProdMode() this.classifiers = new Map() this.sparseVectorCompressor = new SparseVectorCompressor() - this.spamMailProcessor = new SpamMailProcessor(DEFAULT_PREPROCESS_CONFIGURATION, new HashingVectorizer(), this.sparseVectorCompressor) + this.spamMailProcessor = new SpamMailProcessor(DEFAULT_PREPROCESS_CONFIGURATION, this.sparseVectorCompressor) } calculateThreshold(hamCount: number, spamCount: number) { diff --git a/src/mail-app/workerUtils/worker/WorkerImpl.ts b/src/mail-app/workerUtils/worker/WorkerImpl.ts index 6d5234a797..a87f10b230 100644 --- a/src/mail-app/workerUtils/worker/WorkerImpl.ts +++ b/src/mail-app/workerUtils/worker/WorkerImpl.ts @@ -308,7 +308,7 @@ export class WorkerImpl implements NativeInterface { return locator.autosaveFacade() }, async spamClassifier() { - return locator.spamClassifier + return locator.spamClassifier() }, } } diff --git a/src/mail-app/workerUtils/worker/WorkerLocator.ts b/src/mail-app/workerUtils/worker/WorkerLocator.ts index d362e60837..e09d896a4e 100644 --- a/src/mail-app/workerUtils/worker/WorkerLocator.ts +++ b/src/mail-app/workerUtils/worker/WorkerLocator.ts @@ -112,10 +112,9 @@ import { PublicKeySignatureFacade } from "../../../common/api/worker/facades/Pub import { AdminKeyLoaderFacade } from "../../../common/api/worker/facades/AdminKeyLoaderFacade" import { IdentityKeyCreator } from "../../../common/api/worker/facades/lazy/IdentityKeyCreator" import { PublicIdentityKeyProvider } from "../../../common/api/worker/facades/PublicIdentityKeyProvider" -import { SpamClassifier } from "../spamClassification/SpamClassifier" import { IdentityKeyTrustDatabase } from "../../../common/api/worker/facades/IdentityKeyTrustDatabase" import { AutosaveFacade } from "../../../common/api/worker/facades/lazy/AutosaveFacade" -import { SpamClassificationDataDealer } from "../spamClassification/SpamClassificationDataDealer" +import type { SpamClassifier } from "../spamClassification/SpamClassifier" assertWorkerOrNode() @@ -198,7 +197,7 @@ export type WorkerLocatorType = { contactFacade: lazyAsync //spam classification - spamClassifier: SpamClassifier + spamClassifier: lazyAsync } export const locator: WorkerLocatorType = {} as any @@ -740,8 +739,12 @@ export async function initLocator(worker: WorkerImpl, browserData: BrowserData) ) }) - const spamClassificationDataDealer = new SpamClassificationDataDealer(locator.cachingEntityClient, locator.bulkMailLoader, locator.mail) - locator.spamClassifier = new SpamClassifier(locator.cacheStorage, spamClassificationDataDealer) + locator.spamClassifier = lazyMemoized(async () => { + const { SpamClassificationDataDealer } = await import("../spamClassification/SpamClassificationDataDealer") + const { SpamClassifier } = await import("../spamClassification/SpamClassifier") + const spamClassificationDataDealer = new SpamClassificationDataDealer(locator.cachingEntityClient, locator.bulkMailLoader, locator.mail) + return new SpamClassifier(locator.cacheStorage, spamClassificationDataDealer) + }) const nativePushFacade = new NativePushFacadeSendDispatcher(worker) locator.calendar = lazyMemoized(async () => { diff --git a/test/tests/api/worker/utils/spamClassification/SpamClassifierTest.ts b/test/tests/api/worker/utils/spamClassification/SpamClassifierTest.ts index cdcc4cf388..2e08d9be82 100644 --- a/test/tests/api/worker/utils/spamClassification/SpamClassifierTest.ts +++ b/test/tests/api/worker/utils/spamClassification/SpamClassifierTest.ts @@ -8,7 +8,6 @@ import { SpamClassificationDataDealer, TrainingDataset } from "../../../../../.. import { CacheStorage } from "../../../../../../src/common/api/worker/rest/DefaultEntityRestCache" import { mockAttribute } from "@tutao/tutanota-test-utils" import "@tensorflow/tfjs-backend-cpu" -import { HashingVectorizer } from "../../../../../../src/mail-app/workerUtils/spamClassification/HashingVectorizer" import { LayersModel, tensor1d } from "../../../../../../src/mail-app/workerUtils/spamClassification/tensorflow-custom" import { createTestEntity } from "../../../../TestUtils" import { ClientSpamTrainingDatum, ClientSpamTrainingDatumTypeRef, MailTypeRef } from "../../../../../../src/common/api/entities/tutanota/TypeRefs" @@ -111,7 +110,7 @@ o.spec("SpamClassifierTest", () => { const vectorLength = 512 compressor = new SparseVectorCompressor(vectorLength) - spamProcessor = new SpamMailProcessor(DEFAULT_PREPROCESS_CONFIGURATION, new HashingVectorizer(vectorLength), compressor) + spamProcessor = new SpamMailProcessor(DEFAULT_PREPROCESS_CONFIGURATION, compressor) spamClassifier = new SpamClassifier(mockCacheStorage, mockSpamClassificationDataDealer, true) spamClassifier.spamMailProcessor = spamProcessor spamClassifier.sparseVectorCompressor = compressor @@ -529,7 +528,7 @@ if (DO_RUN_PERFORMANCE_ANALYSIS) { mockSpamClassificationDataDealer.fetchAllTrainingData = async () => { return getTrainingDataset(dataSlice) } - spamProcessor = new SpamMailProcessor(DEFAULT_PREPROCESS_CONFIGURATION, new HashingVectorizer(), compressor) + spamProcessor = new SpamMailProcessor(DEFAULT_PREPROCESS_CONFIGURATION, compressor) spamClassifier = new SpamClassifier(mockOfflineStorageCache, mockSpamClassificationDataDealer, false) spamClassifier.spamMailProcessor = spamProcessor })