tutanota/test/tests/mail/ProcessInboxHandlerTest.ts
map 5293be6a4a
Implement spam training data sync and add TutanotaModelV98
We sync the spam training data encrypted through our server to make
sure that all clients for a specific user behave the same when
classifying mails. Additionally, this enables the spam classification
in the webApp. We compress the training data vectors
(see clientSpamTrainingDatum) before uploading to our server using
SparseVectorCompressor.ts. When a user has the ClientSpamClassification
enabled, the spam training data sync will happen for every mail
received.

ClientSpamTrainingDatum are not stored in the CacheStorage.
No entityEvents are emitted for this type.
However, we retrieve creations and updates for ClientSpamTrainingData
through the modifiedClientSpamTrainingDataIndex.

We calculate a threshold per classifier based on the dataset ham to spam
ratio, we also subsample our training data to cap the ham to spam ratio
within a certain limit.

Co-authored-by: jomapp <17314077+jomapp@users.noreply.github.com>
Co-authored-by: das <das@tutao.de>
Co-authored-by: abp <abp@tutao.de>
Co-authored-by: Kinan <104761667+kibibytium@users.noreply.github.com>
Co-authored-by: sug <sug@tutao.de>
Co-authored-by: nif <nif@tutao.de>
Co-authored-by: map <mpfau@users.noreply.github.com>
2025-11-18 13:56:19 +01:00

224 lines
9.5 KiB
TypeScript

import o from "@tutao/otest"
import { matchers, object, verify, when } from "testdouble"
import {
Body,
BodyTypeRef,
ClientSpamClassifierResultTypeRef,
Mail,
MailDetails,
MailDetailsTypeRef,
MailFolderTypeRef,
MailTypeRef,
} from "../../../src/common/api/entities/tutanota/TypeRefs"
import { FeatureType, MailSetKind, ProcessingState, SpamDecision } from "../../../src/common/api/common/TutanotaConstants"
import { ClientClassifierType } from "../../../src/common/api/common/ClientClassifierType"
import { assertNotNull, delay } from "@tutao/tutanota-utils"
import { MailFacade } from "../../../src/common/api/worker/facades/lazy/MailFacade"
import { createTestEntity } from "../TestUtils"
import { SpamClassificationHandler } from "../../../src/mail-app/mail/model/SpamClassificationHandler"
import { FolderSystem } from "../../../src/common/api/common/mail/FolderSystem"
import { isSameId } from "../../../src/common/api/common/utils/EntityUtils"
import { InboxRuleHandler } from "../../../src/mail-app/mail/model/InboxRuleHandler"
import { ProcessInboxHandler, UnencryptedProcessInboxDatum } from "../../../src/mail-app/mail/model/ProcessInboxHandler"
import { MailboxDetail } from "../../../src/common/mailFunctionality/MailboxModel"
import { createSpamMailDatum, SpamMailProcessor } from "../../../src/common/api/common/utils/spamClassificationUtils/SpamMailProcessor"
import { LoginController } from "../../../src/common/api/main/LoginController"
const { anything } = matchers
o.spec("ProcessInboxHandlerTest", function () {
let mailFacade = object<MailFacade>()
let logins = object<LoginController>()
let body: Body
let mail: Mail
let spamHandler: SpamClassificationHandler
let folderSystem: FolderSystem
let mailboxDetail: MailboxDetail
let mailDetails: MailDetails
let inboxRuleHandler: InboxRuleHandler = object<InboxRuleHandler>()
let processInboxHandler: ProcessInboxHandler
const inboxFolder = createTestEntity(MailFolderTypeRef, { _id: ["listId", "inbox"], folderType: MailSetKind.INBOX })
const trashFolder = createTestEntity(MailFolderTypeRef, { _id: ["listId", "trash"], folderType: MailSetKind.TRASH })
const spamFolder = createTestEntity(MailFolderTypeRef, { _id: ["listId", "spam"], folderType: MailSetKind.SPAM })
o.beforeEach(function () {
spamHandler = object<SpamClassificationHandler>()
inboxRuleHandler = object<InboxRuleHandler>()
body = createTestEntity(BodyTypeRef, { text: "Body Text" })
mailDetails = createTestEntity(MailDetailsTypeRef, { _id: "mailDetail", body })
mail = createTestEntity(MailTypeRef, {
_id: ["listId", "elementId"],
sets: [spamFolder._id],
subject: "subject",
_ownerGroup: "owner",
mailDetails: ["detailsList", mailDetails._id],
unread: true,
processingState: ProcessingState.INBOX_RULE_NOT_PROCESSED,
clientSpamClassifierResult: createTestEntity(ClientSpamClassifierResultTypeRef, { spamDecision: SpamDecision.NONE }),
processNeeded: true,
})
folderSystem = object<FolderSystem>()
mailboxDetail = object()
when(mailFacade.moveMails(anything(), anything(), anything())).thenResolve([])
when(
mailFacade.loadMailDetailsBlob(
matchers.argThat((requestedMails: Mail) => {
return isSameId(requestedMails._id, mail._id)
}),
),
).thenDo(async () => mailDetails)
processInboxHandler = new ProcessInboxHandler(
logins,
mailFacade,
() => spamHandler,
() => inboxRuleHandler,
new Map(),
0,
)
when(logins.isEnabled(FeatureType.SpamClientClassification)).thenReturn(true)
})
o("handleIncomingMail does move mail if it has been processed already", async function () {
mail.sets = [inboxFolder._id]
mail.processNeeded = false
verify(inboxRuleHandler.findAndApplyMatchingRule(anything(), anything(), anything()), { times: 0 })
verify(spamHandler.predictSpamForNewMail(anything(), anything(), anything(), anything()), { times: 0 })
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
o(targetFolder).deepEquals(inboxFolder)
verify(mailFacade.processNewMails(anything(), anything()), { times: 0 })
})
o("handleIncomingMail does move mail from inbox to other folder if inbox rule applies", async function () {
mail.sets = [inboxFolder._id]
const processInboxDatum: UnencryptedProcessInboxDatum = {
classifierType: ClientClassifierType.CUSTOMER_INBOX_RULES,
mailId: mail._id,
targetMoveFolder: trashFolder._id,
vector: new Uint8Array(),
}
when(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)).thenResolve({
targetFolder: trashFolder,
processInboxDatum,
})
verify(spamHandler.predictSpamForNewMail(anything(), anything(), anything(), anything()), { times: 0 })
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
o(targetFolder).deepEquals(trashFolder)
await delay(0)
verify(mailFacade.processNewMails(assertNotNull(mail._ownerGroup), [processInboxDatum]))
})
o("handleIncomingMail does move mail from inbox to spam folder if mail is spam", async function () {
mail.sets = [inboxFolder._id]
when(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)).thenResolve(null)
const processInboxDatum: UnencryptedProcessInboxDatum = {
classifierType: ClientClassifierType.CLIENT_CLASSIFICATION,
mailId: mail._id,
targetMoveFolder: spamFolder._id,
vector: new Uint8Array(),
}
when(spamHandler.predictSpamForNewMail(mail, mailDetails, inboxFolder, folderSystem)).thenResolve({
targetFolder: spamFolder,
processInboxDatum,
})
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
o(targetFolder).deepEquals(spamFolder)
await delay(0)
verify(mailFacade.processNewMails(assertNotNull(mail._ownerGroup), [processInboxDatum]))
})
o("handleIncomingMail does NOT move mail from inbox to spam folder if mail is ham", async function () {
mail.sets = [inboxFolder._id]
when(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)).thenResolve(null)
const processInboxDatum: UnencryptedProcessInboxDatum = {
classifierType: null,
mailId: mail._id,
targetMoveFolder: inboxFolder._id,
vector: new Uint8Array(),
}
when(spamHandler.predictSpamForNewMail(mail, mailDetails, inboxFolder, folderSystem)).thenResolve({
targetFolder: inboxFolder,
processInboxDatum,
})
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
o(targetFolder).deepEquals(inboxFolder)
await delay(0)
verify(mailFacade.processNewMails(assertNotNull(mail._ownerGroup), [processInboxDatum]))
})
o("handleIncomingMail does NOT move mail from spam to inbox folder if mail is spam", async function () {
mail.sets = [spamFolder._id]
when(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)).thenResolve(null)
const processInboxDatum: UnencryptedProcessInboxDatum = {
classifierType: ClientClassifierType.CLIENT_CLASSIFICATION,
mailId: mail._id,
targetMoveFolder: spamFolder._id,
vector: new Uint8Array(),
}
when(spamHandler.predictSpamForNewMail(mail, mailDetails, inboxFolder, folderSystem)).thenResolve({
targetFolder: spamFolder,
processInboxDatum,
})
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
o(targetFolder).deepEquals(spamFolder)
await delay(0)
verify(mailFacade.processNewMails(assertNotNull(mail._ownerGroup), [processInboxDatum]))
})
o("handleIncomingMail moves mail from spam to inbox folder if mail is ham", async function () {
mail.sets = [spamFolder._id]
when(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)).thenResolve(null)
const processInboxDatum: UnencryptedProcessInboxDatum = {
classifierType: ClientClassifierType.CLIENT_CLASSIFICATION,
mailId: mail._id,
targetMoveFolder: inboxFolder._id,
vector: new Uint8Array(),
}
when(spamHandler.predictSpamForNewMail(mail, mailDetails, inboxFolder, folderSystem)).thenResolve({
targetFolder: inboxFolder,
processInboxDatum,
})
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
o(targetFolder).deepEquals(inboxFolder)
await delay(0)
verify(mailFacade.processNewMails(assertNotNull(mail._ownerGroup), [processInboxDatum]))
})
o("handleIncomingMail does NOT move mail from inbox to spam folder if spam classification is disabled", async function () {
when(logins.isEnabled(FeatureType.SpamClientClassification)).thenReturn(false)
mail.sets = [inboxFolder._id]
const compressedVector = new Uint8Array([2, 4, 8, 16])
const datum = createSpamMailDatum(mail, mailDetails)
when(mailFacade.vectorizeAndCompressMails({ mail, mailDetails })).thenResolve(compressedVector)
processInboxHandler = new ProcessInboxHandler(
logins,
mailFacade,
() => spamHandler,
() => inboxRuleHandler,
new Map(),
0,
)
when(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)).thenResolve(null)
const processedMail: UnencryptedProcessInboxDatum = {
classifierType: null,
mailId: mail._id,
targetMoveFolder: inboxFolder._id,
vector: compressedVector,
}
verify(spamHandler.predictSpamForNewMail(anything(), anything(), anything(), anything()), { times: 0 })
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
o(targetFolder).deepEquals(inboxFolder)
await delay(0)
verify(mailFacade.processNewMails(assertNotNull(mail._ownerGroup), [processedMail]))
})
})