tutanota/test/tests/api/worker/rest/cacheHandler/CustomMailEventCacheHandlerTest.ts
das fd22294a18
[antispam] Add client-side local spam filtering
Implement a local machine learning model for client-side spam filtering.
The local model is implemented using tensorflow "LayersModel" to train
separate models in all available mailboxes, resulting in one model
per ownerGroup (i.e. mailbox).

Initially, the training data is aggregated from the last 30 days of
received mails, and the data is stored in a separate offline database
table named spam_classification_training_data. The trained model is
stored in the table spam_classification_model. The initial training
starts after indexing, with periodic training happening
every 30 minutes and on each subsequent login.

The model will predict on incoming mails once we have received the
entity event for said mail, moving it to either inbox or spam folder.
When users move mails, we update the training data labels accordingly,
by adjusting the isSpam classification and isSpamConfidence values in
the offline database. The MoveMailService now contains a moveReason,
which indicates that the mail has been moved by our spam filter.

Client-side spam filtering can be activated using the
SpamClientClassification feature flag, and is for now only
available on the desktop client.

Co-authored-by: sug <sug@tutao.de>
Co-authored-by: kib <104761667+kibibytium@users.noreply.github.com>
Co-authored-by: abp <abp@tutao.de>
Co-authored-by: map <mpfau@users.noreply.github.com>
Co-authored-by: jhm <17314077+jomapp@users.noreply.github.com>
Co-authored-by: frm <frm@tutao.de>
Co-authored-by: das <das@tutao.de>
Co-authored-by: nif <nif@tutao.de>
Co-authored-by: amm <amm@tutao.de>
2025-10-22 09:25:20 +02:00

320 lines
14 KiB
TypeScript

import o from "@tutao/otest"
import { func, matchers, object, verify, when } from "testdouble"
import { lazy, lazyAsync } from "@tutao/tutanota-utils"
import { MailIndexer } from "../../../../../../src/mail-app/workerUtils/index/MailIndexer"
import { MailFacade } from "../../../../../../src/common/api/worker/facades/lazy/MailFacade"
import { OfflineStoragePersistence } from "../../../../../../src/mail-app/workerUtils/index/OfflineStoragePersistence"
import { CacheStorage } from "../../../../../../src/common/api/worker/rest/DefaultEntityRestCache"
import { CustomMailEventCacheHandler } from "../../../../../../src/common/api/worker/rest/cacheHandler/CustomMailEventCacheHandler"
import { Body, Mail, MailDetails, MailFolderTypeRef, MailSetEntryTypeRef, MailTypeRef } from "../../../../../../src/common/api/entities/tutanota/TypeRefs"
import { MailSetKind } from "../../../../../../src/common/api/common/TutanotaConstants"
import { ClientClassifierType } from "../../../../../../src/common/api/common/ClientClassifierType"
import { EntityUpdateData } from "../../../../../../src/common/api/common/utils/EntityUpdateUtils"
import { SpamTrainMailDatum } from "../../../../../../src/mail-app/workerUtils/spamClassification/SpamClassifier"
import { getMailBodyText } from "../../../../../../src/common/api/common/CommonMailUtils"
import { createTestEntity } from "../../../../TestUtils"
/**
* These tests should verify that the following are obeyed:
* - All Mails in Spam have isSpamConfidence of 1 (during create)
* - Moved Mails have isSpamConfidence of 1 (event update)
* - Read Mails have isSpamConfidence of 1 (event update)
* - Mails in Inbox have isSpamConfidence of 0.
*/
o.spec("CustomMailEventCacheHandler", function () {
let cacheStorageMock: CacheStorage
let offlineStorageMock: lazy<Promise<OfflineStoragePersistence>>
let indexerAndMailFacadeMock: lazyAsync<{ mailIndexer: MailIndexer; mailFacade: MailFacade }>
const inboxFolder = createTestEntity(MailFolderTypeRef, { _id: ["listId", "inbox"], folderType: MailSetKind.INBOX })
const trashFolder = createTestEntity(MailFolderTypeRef, { _id: ["listId", "trash"], folderType: MailSetKind.TRASH })
const spamFolder = createTestEntity(MailFolderTypeRef, { _id: ["listId", "spam"], folderType: MailSetKind.SPAM })
const allFolders = [inboxFolder, trashFolder, spamFolder]
o.beforeEach(function () {
cacheStorageMock = object() as CacheStorage
offlineStorageMock = func() as lazy<Promise<OfflineStoragePersistence>>
indexerAndMailFacadeMock = func() as lazyAsync<{ mailIndexer: MailIndexer; mailFacade: MailFacade }>
when(cacheStorageMock.getWholeList(MailFolderTypeRef, matchers.anything())).thenResolve(allFolders)
})
o.spec("onEntityEventCreate", function () {
let mailIndexer = object() as MailIndexer
let mailFacade = object() as MailFacade
let body: Body
let mailDetails: MailDetails
let mail: Mail
o.beforeEach(function () {
when(indexerAndMailFacadeMock()).thenResolve({ mailIndexer, mailFacade })
body = object({ text: "Body Text" }) as Body
mailDetails = object({ body }) as MailDetails
mail = createTestEntity(MailTypeRef, {
_id: ["listId", "elementId"],
sets: [spamFolder._id],
subject: "subject",
_ownerGroup: "owner",
unread: false,
})
when(mailIndexer.downloadNewMailData(matchers.anything())).thenResolve({
mail,
mailDetails,
})
})
o("does not process spam e-mails when it fails to download new mail", async function () {
when(mailIndexer.downloadNewMailData(matchers.anything())).thenResolve(null)
const offlineStorage = object() as OfflineStoragePersistence
when(offlineStorageMock()).thenResolve(offlineStorage)
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
await cacheHandler.onEntityEventCreate(["listId", "elementId"], [])
verify(cacheStorageMock.getWholeList(MailFolderTypeRef, matchers.anything()), { times: 0 })
})
o("processSpam maintains server classification when client classification is not enabled", async function () {
const offlineStorage = object() as OfflineStoragePersistence
when(offlineStorageMock()).thenResolve(offlineStorage)
when(mailFacade.predictSpamResult(mail)).thenResolve(null)
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
await cacheHandler.onEntityEventCreate(["listId", "elementId"], [])
const spamTrainMailDatum: SpamTrainMailDatum = {
mailId: mail._id,
subject: mail.subject,
body: getMailBodyText(body),
isSpam: true,
isSpamConfidence: 1,
ownerGroup: "owner",
}
verify(offlineStorage.storeSpamClassification(spamTrainMailDatum), { times: 1 })
})
o("processSpam uses client classification when enabled", async function () {
const offlineStorage = object() as OfflineStoragePersistence
when(offlineStorageMock()).thenResolve(offlineStorage)
when(mailFacade.predictSpamResult(mail)).thenResolve(false)
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
await cacheHandler.onEntityEventCreate(["listId", "elementId"], [])
const spamTrainMailDatum: SpamTrainMailDatum = {
mailId: mail._id,
subject: mail.subject,
body: getMailBodyText(body),
isSpam: false,
isSpamConfidence: 0,
ownerGroup: "owner",
}
verify(offlineStorage.storeSpamClassification(spamTrainMailDatum), { times: 1 })
})
o("processSpam correctly verifies if email is stored in spam folder", async function () {
mail.sets = [spamFolder._id]
mail.unread = true
const offlineStorage = object() as OfflineStoragePersistence
when(offlineStorageMock()).thenResolve(offlineStorage)
when(mailFacade.predictSpamResult(mail)).thenResolve(false)
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
await cacheHandler.onEntityEventCreate(["listId", "elementId"], [])
const spamTrainMailDatum: SpamTrainMailDatum = {
mailId: mail._id,
subject: mail.subject,
body: getMailBodyText(body),
isSpam: false,
ownerGroup: "owner",
isSpamConfidence: 0,
}
verify(offlineStorage.storeSpamClassification(spamTrainMailDatum), { times: 1 })
})
o("getSpamConfidence is 0 for mail in trash folder ", async function () {
mail.unread = false
mail.sets = [["listId", "trash"]]
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
o(cacheHandler.getSpamConfidence(allFolders, mail).confidence).equals(0)
})
o("getSpamConfidence is 1 for mail in spam folder ", async function () {
mail.unread = true
mail.sets = [spamFolder._id]
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
o(cacheHandler.getSpamConfidence(allFolders, mail).confidence).equals(1)
})
o("getSpamConfidence for inbox folder depends on read status", async function () {
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
mail.sets = [inboxFolder._id]
mail.unread = true
o(cacheHandler.getSpamConfidence(allFolders, mail).confidence).equals(0)
mail.unread = false
o(cacheHandler.getSpamConfidence(allFolders, mail).confidence).equals(1)
})
o("processSpam moves mail to spam when detected as such and its not already in spam", async function () {
mail.sets = [inboxFolder._id]
const offlineStorage = object() as OfflineStoragePersistence
when(offlineStorageMock()).thenResolve(offlineStorage)
when(mailFacade.predictSpamResult(mail)).thenResolve(true)
when(mailFacade.isSpamClassificationEnabled("owner")).thenReturn(true)
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
await cacheHandler.onEntityEventCreate(["listId", "elementId"], [])
const spamTrainMailDatum: SpamTrainMailDatum = {
mailId: mail._id,
subject: mail.subject,
body: getMailBodyText(body),
isSpam: true,
isSpamConfidence: 1,
ownerGroup: "owner",
}
verify(offlineStorage.storeSpamClassification(spamTrainMailDatum), { times: 1 })
verify(mailFacade.simpleMoveMails([["listId", "elementId"]], MailSetKind.SPAM, ClientClassifierType.CLIENT_CLASSIFICATION))
})
o("processSpam moves mail to inbox when detected as such and its not already in inbox", async function () {
const offlineStorage = object() as OfflineStoragePersistence
when(offlineStorageMock()).thenResolve(offlineStorage)
mail.sets = [spamFolder._id] // the mail is in spam folder
when(mailFacade.predictSpamResult(mail)).thenResolve(false)
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
await cacheHandler.onEntityEventCreate(["listId", "elementId"], [])
const spamTrainMailDatum: SpamTrainMailDatum = {
mailId: mail._id,
subject: mail.subject,
body: getMailBodyText(body),
isSpam: false,
isSpamConfidence: 0,
ownerGroup: "owner",
}
verify(offlineStorage.storeSpamClassification(spamTrainMailDatum), { times: 1 })
verify(mailFacade.simpleMoveMails([["listId", "elementId"]], MailSetKind.INBOX, ClientClassifierType.CLIENT_CLASSIFICATION))
})
})
o.spec("onEntityEventUpdate", function () {
let mailIndexer = object() as MailIndexer
let mailFacade = object() as MailFacade
let mail: Mail
let body: Body
let mailDetails: MailDetails
o.beforeEach(function () {
when(indexerAndMailFacadeMock()).thenResolve({ mailIndexer, mailFacade })
body = object({ text: "Body Text" }) as Body
mailDetails = object({ body }) as MailDetails
mail = createTestEntity(MailTypeRef, {
_id: ["listId", "elementId"],
subject: "subject",
sets: [inboxFolder._id],
_ownerGroup: "owner",
})
when(mailIndexer.downloadNewMailData(matchers.anything())).thenResolve({
mail,
mailDetails,
})
})
o("does nothing if mail has not been read and not moved or had label applied.", async function () {
const offlineStorage = object() as OfflineStoragePersistence
when(offlineStorageMock()).thenResolve(offlineStorage)
mail.unread = true
when(cacheStorageMock.get(MailTypeRef, "listId", "elementId")).thenResolve(mail)
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
await cacheHandler.onEntityEventUpdate(["listId", "elementId"], [])
verify(offlineStorage.updateSpamClassificationData(matchers.anything(), matchers.anything(), matchers.anything()), { times: 0 })
})
o("does nothing if we delete a mail from spam folder", async function () {
const offlineStorage = object() as OfflineStoragePersistence
when(offlineStorageMock()).thenResolve(offlineStorage)
when(cacheStorageMock.get(MailTypeRef, "listId", "elementId")).thenResolve(mail)
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
mail.sets = [spamFolder._id]
await cacheHandler.onEntityEventCreate(["listId", "elementId"], [])
verify(offlineStorage.storeSpamClassification(matchers.anything()), { times: 1 })
mail.sets = [trashFolder._id]
await cacheHandler.onEntityEventUpdate(["listId", "elementId"], [])
verify(offlineStorage.updateSpamClassificationData(matchers.anything(), matchers.anything(), matchers.anything()), { times: 0 })
})
o("does update spam classification data if mail has been read in inbox and not moved", async function () {
mail.sets = [inboxFolder._id]
const offlineStorage = object() as OfflineStoragePersistence
when(offlineStorage.getStoredClassification(mail)).thenResolve({ isSpam: false, isSpamConfidence: 0 })
when(offlineStorageMock()).thenResolve(offlineStorage)
mail.unread = false
when(cacheStorageMock.get(MailTypeRef, "listId", "elementId")).thenResolve(mail)
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
await cacheHandler.onEntityEventUpdate(["listId", "elementId"], [])
verify(offlineStorage.updateSpamClassificationData(["listId", "elementId"], false, 1), { times: 1 })
verify(mailFacade.predictSpamResult(mail), { times: 0 })
})
o("does update spam classification data if mail has not been read but moved", async function () {
mail.sets = [spamFolder._id]
const offlineStorage = object() as OfflineStoragePersistence
when(offlineStorage.getStoredClassification(mail)).thenResolve({ isSpam: false, isSpamConfidence: 0 })
when(offlineStorageMock()).thenResolve(offlineStorage)
mail.unread = true
when(cacheStorageMock.get(MailTypeRef, "listId", "elementId")).thenResolve(mail)
const event = object({ typeRef: MailSetEntryTypeRef }) as unknown as EntityUpdateData
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
await cacheHandler.onEntityEventUpdate(["listId", "elementId"], [event])
verify(offlineStorage.updateSpamClassificationData(["listId", "elementId"], true, 1), { times: 1 })
})
o("does update spam classification data if mail was not previously included", async function () {
mail.sets = [inboxFolder._id]
const offlineStorage = object() as OfflineStoragePersistence
when(offlineStorage.getStoredClassification(mail)).thenResolve(null)
when(offlineStorageMock()).thenResolve(offlineStorage)
mail.unread = true
when(cacheStorageMock.get(MailTypeRef, "listId", "elementId")).thenResolve(mail)
const event = object({ typeRef: MailSetEntryTypeRef }) as unknown as EntityUpdateData
const cacheHandler = new CustomMailEventCacheHandler(indexerAndMailFacadeMock, offlineStorageMock, cacheStorageMock)
await cacheHandler.onEntityEventUpdate(["listId", "elementId"], [event])
const spamTrainMailDatum: SpamTrainMailDatum = {
mailId: mail._id,
subject: mail.subject,
body: getMailBodyText(body),
isSpam: false,
isSpamConfidence: 0,
ownerGroup: "owner",
}
verify(offlineStorage.storeSpamClassification(spamTrainMailDatum), { times: 1 })
})
})
})