mirror of
https://github.com/tutao/tutanota.git
synced 2025-12-07 13:49:47 +00:00
Include header fields as tokens in the anti-spam
Add the header fields(sender, toRecipients, ccRecipients, bccRecipients, authStatus) to the anti-spam vectors. We also improve some of the preprocessing steps and add offline migrations by deleting old spam tables Co-authored-by: amm@tutao.de Co-authored-by: jhm <17314077+jomapp@users.noreply.github.com>
This commit is contained in:
parent
21ad4ce2c3
commit
f8bbd32695
13 changed files with 10918 additions and 10788 deletions
|
|
@ -46,7 +46,7 @@ export const allowedImports = {
|
|||
contacts: ["polyfill-helpers", "common-min", "common", "boot", "gui-base", "main", "mail-view", "date", "date-gui", "mail-editor"],
|
||||
"calendar-view": ["polyfill-helpers", "common-min", "common", "boot", "gui-base", "main", "date", "date-gui", "sharing", "contacts"],
|
||||
login: ["polyfill-helpers", "common-min", "common", "boot", "gui-base", "main"],
|
||||
"spam-classifier": ["polyfill-helpers", "common", "common-min"],
|
||||
"spam-classifier": ["polyfill-helpers", "common", "common-min", "main"],
|
||||
worker: ["polyfill-helpers", "common-min", "common", "native-common", "native-worker", "wasm", "wasm-fallback"],
|
||||
"pow-worker": [],
|
||||
settings: [
|
||||
|
|
|
|||
|
|
@ -46,9 +46,6 @@ import { AttributeModel } from "../../common/AttributeModel"
|
|||
import { TypeModelResolver } from "../../common/EntityFunctions"
|
||||
import { collapseId, expandId } from "../rest/RestClientIdUtils"
|
||||
import { Category, syncMetrics } from "../utils/SyncMetrics"
|
||||
import { hasError } from "../../common/utils/ErrorUtils"
|
||||
import { SpamClassificationModel, SpamTrainMailDatum } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
import { Mail } from "../../entities/tutanota/TypeRefs"
|
||||
|
||||
/**
|
||||
* this is the value of SQLITE_MAX_VARIABLE_NUMBER in sqlite3.c
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import { offline6 } from "./migrations/offline-v6"
|
|||
import { offline7 } from "./migrations/offline-v7"
|
||||
import { offline8 } from "./migrations/offline-v8"
|
||||
import { ProgrammingError } from "../../common/error/ProgrammingError"
|
||||
import { offline9 } from "./migrations/offline-v9"
|
||||
|
||||
export interface OfflineMigration {
|
||||
readonly version: number
|
||||
|
|
@ -20,11 +21,11 @@ export interface OfflineMigration {
|
|||
* Normally you should only add them to the end of the list but with offline ones it can be a bit tricky since they change the db structure itself so sometimes
|
||||
* they should rather be in the beginning.
|
||||
*/
|
||||
export const OFFLINE_STORAGE_MIGRATIONS: ReadonlyArray<OfflineMigration> = [offline5, offline6, offline7, offline8]
|
||||
export const OFFLINE_STORAGE_MIGRATIONS: ReadonlyArray<OfflineMigration> = [offline5, offline6, offline7, offline8, offline9]
|
||||
|
||||
// in cases where the actual migration is not there anymore (we clean up old migrations no client would apply anymore)
|
||||
// and we create a new offline database, we still need to set the offline version to the current value.
|
||||
export const CURRENT_OFFLINE_VERSION = 8
|
||||
export const CURRENT_OFFLINE_VERSION = 9
|
||||
|
||||
/**
|
||||
* Migrator for the offline storage between different versions of model. It is tightly couples to the versions of API entities: every time we make an
|
||||
|
|
|
|||
12
src/common/api/worker/offline/migrations/offline-v9.ts
Normal file
12
src/common/api/worker/offline/migrations/offline-v9.ts
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
import { OfflineMigration } from "../OfflineStorageMigrator.js"
|
||||
import { OfflineStorage } from "../OfflineStorage.js"
|
||||
import { SqlCipherFacade } from "../../../../native/common/generatedipc/SqlCipherFacade"
|
||||
|
||||
export const offline9: OfflineMigration = {
|
||||
version: 9,
|
||||
async migrate(storage: OfflineStorage, sqlCipherFacade: SqlCipherFacade) {
|
||||
console.log("dropping spam_classification_training_data and spam_classification_model, due to new fields")
|
||||
await sqlCipherFacade.run(`DROP TABLE IF EXISTS spam_classification_training_data`, [])
|
||||
await sqlCipherFacade.run(`DROP TABLE IF EXISTS spam_classification_model`, [])
|
||||
},
|
||||
}
|
||||
|
|
@ -1,13 +1,14 @@
|
|||
import { createMoveMailData, Mail, MailDetails, MailFolder, MoveMailData } from "../../../common/api/entities/tutanota/TypeRefs"
|
||||
import { createMoveMailData, Mail, MailAddress, MailDetails, MailFolder, MoveMailData } from "../../../common/api/entities/tutanota/TypeRefs"
|
||||
import {
|
||||
DEFAULT_IS_SPAM,
|
||||
DEFAULT_IS_SPAM_CONFIDENCE,
|
||||
getSpamConfidence,
|
||||
MailAuthenticationStatus,
|
||||
MailSetKind,
|
||||
ProcessingState,
|
||||
SpamDecision,
|
||||
} from "../../../common/api/common/TutanotaConstants"
|
||||
import type { SpamClassifier, SpamPredMailDatum, SpamTrainMailDatum } from "../../workerUtils/spamClassification/SpamClassifier"
|
||||
import { SpamClassifier, SpamPredMailDatum, SpamTrainMailDatum } from "../../workerUtils/spamClassification/SpamClassifier"
|
||||
import { getMailBodyText } from "../../../common/api/common/CommonMailUtils"
|
||||
import { assertNotNull, debounce, isNotNull, Nullable, ofClass } from "@tutao/tutanota-utils"
|
||||
import { MailFacade } from "../../../common/api/worker/facades/lazy/MailFacade"
|
||||
|
|
@ -75,6 +76,7 @@ export class SpamClassificationHandler {
|
|||
subject: mail.subject,
|
||||
body: getMailBodyText(mailDetails.body),
|
||||
ownerGroup: assertNotNull(mail._ownerGroup),
|
||||
...extractSpamHeaderFeatures(mail, mailDetails),
|
||||
}
|
||||
const isSpam = (await this.spamClassifier?.predict(spamPredMailDatum)) ?? null
|
||||
|
||||
|
|
@ -141,7 +143,44 @@ export class SpamClassificationHandler {
|
|||
isSpam: DEFAULT_IS_SPAM,
|
||||
isSpamConfidence: DEFAULT_IS_SPAM_CONFIDENCE,
|
||||
ownerGroup: assertNotNull(mail._ownerGroup),
|
||||
...extractSpamHeaderFeatures(mail, mailDetails),
|
||||
}
|
||||
await this.spamClassifier?.storeSpamClassification(spamTrainMailDatum)
|
||||
}
|
||||
}
|
||||
|
||||
export function extractSpamHeaderFeatures(mail: Mail, mailDetails: MailDetails) {
|
||||
const sender = joinNamesAndMailAddresses([mail?.sender])
|
||||
const { toRecipients, ccRecipients, bccRecipients } = extractRecipients(mailDetails)
|
||||
const authStatus = convertAuthStatusToSpamCategorizationToken(mail.authStatus)
|
||||
|
||||
return { sender, toRecipients, ccRecipients, bccRecipients, authStatus }
|
||||
}
|
||||
|
||||
function extractRecipients({ recipients }: MailDetails) {
|
||||
const toRecipients = joinNamesAndMailAddresses(recipients?.toRecipients)
|
||||
const ccRecipients = joinNamesAndMailAddresses(recipients?.ccRecipients)
|
||||
const bccRecipients = joinNamesAndMailAddresses(recipients?.bccRecipients)
|
||||
|
||||
return { toRecipients, ccRecipients, bccRecipients }
|
||||
}
|
||||
|
||||
function joinNamesAndMailAddresses(recipients: MailAddress[] | null) {
|
||||
return recipients?.map((recipient) => `${recipient?.name} ${recipient?.address}`).join(" ") || ""
|
||||
}
|
||||
|
||||
function convertAuthStatusToSpamCategorizationToken(authStatus: string | null): string {
|
||||
if (authStatus === MailAuthenticationStatus.AUTHENTICATED) {
|
||||
return "TAUTHENTICATED"
|
||||
} else if (authStatus === MailAuthenticationStatus.HARD_FAIL) {
|
||||
return "THARDFAIL"
|
||||
} else if (authStatus === MailAuthenticationStatus.SOFT_FAIL) {
|
||||
return "TSOFTFAIL"
|
||||
} else if (authStatus === MailAuthenticationStatus.INVALID_MAIL_FROM) {
|
||||
return "TINVALIDMAILFROM"
|
||||
} else if (authStatus === MailAuthenticationStatus.MISSING_MAIL_FROM) {
|
||||
return "TMISSINGMAILFROM"
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,12 +67,14 @@ export const SearchTableDefinitions: Record<string, OfflineStorageTable> = Objec
|
|||
})
|
||||
|
||||
export const SpamClassificationDefinitions: Record<string, OfflineStorageTable> = Object.freeze({
|
||||
// Spam classification training data
|
||||
spam_classification_training_data: {
|
||||
definition:
|
||||
"CREATE TABLE IF NOT EXISTS spam_classification_training_data (listId TEXT NOT NULL, elementId TEXT NOT NULL," +
|
||||
" ownerGroup TEXT NOT NULL, subject TEXT NOT NULL, body TEXT NOT NULL, isSpam NUMBER, " +
|
||||
"lastModified NUMBER NOT NULL, isSpamConfidence NUMBER NOT NULL, PRIMARY KEY (listId, elementId))",
|
||||
"ownerGroup TEXT NOT NULL, subject TEXT NOT NULL, body TEXT NOT NULL, isSpam NUMBER," +
|
||||
"lastModified NUMBER NOT NULL, isSpamConfidence NUMBER NOT NULL, sender TEXT NOT NULL," +
|
||||
"toRecipients TEXT NOT NULL, ccRecipients TEXT NOT NULL, bccRecipients TEXT NOT NULL," +
|
||||
"authStatus TEXT NOT NULL, PRIMARY KEY (listId, elementId))",
|
||||
|
||||
purgedWithCache: true,
|
||||
},
|
||||
|
||||
|
|
@ -187,18 +189,24 @@ export class OfflineStoragePersistence {
|
|||
|
||||
async storeSpamClassification(spamTrainMailDatum: SpamTrainMailDatum): Promise<void> {
|
||||
const { query, params } = sql`
|
||||
INSERT
|
||||
OR REPLACE INTO spam_classification_training_data(listId, elementId, ownerGroup, subject, body, isSpam, lastModified, isSpamConfidence)
|
||||
INSERT
|
||||
OR REPLACE INTO spam_classification_training_data(listId, elementId, ownerGroup, subject, body, isSpam,
|
||||
lastModified, isSpamConfidence, sender, toRecipients, ccRecipients, bccRecipients, authStatus)
|
||||
VALUES (
|
||||
${listIdPart(spamTrainMailDatum.mailId)},
|
||||
${elementIdPart(spamTrainMailDatum.mailId)},
|
||||
${spamTrainMailDatum.ownerGroup},
|
||||
${spamTrainMailDatum.subject},
|
||||
${spamTrainMailDatum.body},
|
||||
${spamTrainMailDatum.isSpam ? 1 : 0},
|
||||
${Date.now()},
|
||||
${spamTrainMailDatum.isSpamConfidence}
|
||||
)`
|
||||
${listIdPart(spamTrainMailDatum.mailId)},
|
||||
${elementIdPart(spamTrainMailDatum.mailId)},
|
||||
${spamTrainMailDatum.ownerGroup},
|
||||
${spamTrainMailDatum.subject},
|
||||
${spamTrainMailDatum.body},
|
||||
${spamTrainMailDatum.isSpam ? 1 : 0},
|
||||
${Date.now()},
|
||||
${spamTrainMailDatum.isSpamConfidence},
|
||||
${spamTrainMailDatum.sender},
|
||||
${spamTrainMailDatum.toRecipients},
|
||||
${spamTrainMailDatum.ccRecipients},
|
||||
${spamTrainMailDatum.bccRecipients},
|
||||
${spamTrainMailDatum.authStatus}
|
||||
)`
|
||||
await this.sqlCipherFacade.run(query, params)
|
||||
}
|
||||
|
||||
|
|
@ -250,11 +258,21 @@ export class OfflineStoragePersistence {
|
|||
}
|
||||
|
||||
async getCertainSpamClassificationTrainingDataAfterCutoff(cutoffTimestamp: number, ownerGroupId: Id): Promise<SpamTrainMailDatum[]> {
|
||||
const { query, params } = sql`SELECT listId, elementId, subject, body, isSpam, isSpamConfidence
|
||||
FROM spam_classification_training_data
|
||||
WHERE lastModified > ${cutoffTimestamp}
|
||||
AND isSpamConfidence > 0
|
||||
AND ownerGroup = ${ownerGroupId}`
|
||||
const { query, params } = sql`SELECT listId,
|
||||
elementId,
|
||||
subject,
|
||||
body,
|
||||
isSpam,
|
||||
isSpamConfidence,
|
||||
sender,
|
||||
toRecipients,
|
||||
ccRecipients,
|
||||
bccRecipients,
|
||||
authStatus
|
||||
FROM spam_classification_training_data
|
||||
WHERE lastModified > ${cutoffTimestamp}
|
||||
AND isSpamConfidence > 0
|
||||
AND ownerGroup = ${ownerGroupId}`
|
||||
const resultRows = await this.sqlCipherFacade.all(query, params)
|
||||
return resultRows.map(untagSqlObject).map((row) => row as unknown as SpamTrainMailDatum)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,30 +9,30 @@ export const ML_DATE_REGEX = [
|
|||
/\b(?<!-)\d{4}(?:-\d{1,2}){2}(?!-)\b/g, // 2023-12-01 | 2023-12-1
|
||||
]
|
||||
|
||||
export const ML_DATE_TOKEN = " <DATE> "
|
||||
export const ML_DATE_TOKEN = " TDATE "
|
||||
|
||||
export const ML_URL_REGEX = /(?:http|https|ftp|sftp):\/\/([\w.-]+)(?:\/[^\s]*)?/g
|
||||
|
||||
export const ML_URL_TOKEN = " <URL-$1> "
|
||||
export const ML_URL_TOKEN = " TURL$1 "
|
||||
|
||||
export const ML_EMAIL_ADDR_REGEX = /(?:mailto:)?[A-Za-z0-9_+\-.]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/g
|
||||
export const ML_EMAIL_ADDR_TOKEN = " <EMAIL> "
|
||||
export const ML_EMAIL_ADDR_TOKEN = " TEMAIL "
|
||||
|
||||
export const ML_BITCOIN_REGEX = /\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b/g
|
||||
|
||||
export const ML_BITCOIN_TOKEN = " <BITCOIN> "
|
||||
export const ML_BITCOIN_TOKEN = " TBITCOIN "
|
||||
|
||||
export const ML_CREDIT_CARD_REGEX = /\b(\d{4}\s?){4}\b|\b[0-9]\d{13,16}\b/g
|
||||
|
||||
export const ML_CREDIT_CARD_TOKEN = " <CREDIT-CARD> "
|
||||
export const ML_CREDIT_CARD_TOKEN = " TCREDITCARD "
|
||||
|
||||
export const ML_NUMBER_SEQUENCE_REGEX = /\b\d+\b/g
|
||||
|
||||
export const ML_NUMBER_SEQUENCE_TOKEN = " <NUMBER> "
|
||||
export const ML_NUMBER_SEQUENCE_TOKEN = " TNUMBER "
|
||||
|
||||
export const ML_SPECIAL_CHARACTER_REGEX = /([!@#$%^&*()+`_=\\{}"':;?/,.~]+)(?![^<]*>)|(?!\w)[-]+(?!\w)/g
|
||||
export const ML_SPECIAL_CHARACTER_REGEX = /([!@#$%^&*()[\]<>+`_=\\{}"':;?/,-.~]+)/g
|
||||
|
||||
export const ML_SPECIAL_CHARACTER_TOKEN = " <SPECIAL-CHAR> "
|
||||
export const ML_SPECIAL_CHARACTER_TOKEN = " TSPECIALCHAR "
|
||||
|
||||
export const ML_SPACE_BEFORE_NEW_LINE_REGEX = /\s+\n/g
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,16 @@
|
|||
import { EntityClient } from "../../../common/api/common/EntityClient"
|
||||
import { assertNotNull, isNotNull, lazyAsync } from "@tutao/tutanota-utils"
|
||||
import { MailBag, MailboxGroupRootTypeRef, MailBoxTypeRef, MailFolder, MailFolderTypeRef, MailTypeRef } from "../../../common/api/entities/tutanota/TypeRefs"
|
||||
import {
|
||||
MailAddress,
|
||||
MailBag,
|
||||
MailboxGroupRootTypeRef,
|
||||
MailBoxTypeRef,
|
||||
MailDetails,
|
||||
MailFolder,
|
||||
MailFolderTypeRef,
|
||||
MailTypeRef,
|
||||
Recipients,
|
||||
} from "../../../common/api/entities/tutanota/TypeRefs"
|
||||
import { getMailSetKind, getSpamConfidence, MailSetKind } from "../../../common/api/common/TutanotaConstants"
|
||||
import { elementIdPart, isSameId, listIdPart, timestampToGeneratedId } from "../../../common/api/common/utils/EntityUtils"
|
||||
import { OfflineStoragePersistence } from "../index/OfflineStoragePersistence"
|
||||
|
|
@ -8,6 +18,7 @@ import { getMailBodyText } from "../../../common/api/common/CommonMailUtils"
|
|||
import { BulkMailLoader, MailWithMailDetails } from "../index/BulkMailLoader"
|
||||
import { hasError } from "../../../common/api/common/utils/ErrorUtils"
|
||||
import { SpamTrainMailDatum } from "./SpamClassifier"
|
||||
import { extractSpamHeaderFeatures } from "../../mail/model/SpamClassificationHandler"
|
||||
|
||||
const INITIAL_SPAM_CLASSIFICATION_INDEX_INTERVAL_DAYS = 28
|
||||
|
||||
|
|
@ -30,7 +41,6 @@ export class SpamClassificationInitializer {
|
|||
// available in the current mail bag
|
||||
const data = await this.downloadMailAndMailDetailsByGroupMembership(ownerGroup)
|
||||
data.filter((datum) => datum.isSpamConfidence > 0)
|
||||
data.map((datum) => this.offlineStorage.storeSpamClassification(datum))
|
||||
|
||||
let spamMailsCount = 0
|
||||
let hamMailsCount = 0
|
||||
|
|
@ -53,7 +63,6 @@ export class SpamClassificationInitializer {
|
|||
const mailbox = await this.entityClient.load(MailBoxTypeRef, mailboxGroupRoot.mailbox)
|
||||
const mailSets = await this.entityClient.loadAll(MailFolderTypeRef, assertNotNull(mailbox.folders).folders)
|
||||
const spamFolder = mailSets.find((s) => getMailSetKind(s) === MailSetKind.SPAM)!
|
||||
const inboxFolder = mailSets.find((s) => getMailSetKind(s) === MailSetKind.INBOX)!
|
||||
|
||||
const downloadedMailClassificationDatas = new Array<SpamTrainMailDatum>()
|
||||
const allMailbags = [assertNotNull(mailbox.currentMailBag), ...mailbox.archivedMailBags].reverse() // sorted from latest to oldest
|
||||
|
|
@ -63,14 +72,14 @@ export class SpamClassificationInitializer {
|
|||
isNotNull(currentMailbag) && downloadedMailClassificationDatas.length < this.MIN_MAILS_COUNT;
|
||||
currentMailbag = allMailbags.pop()
|
||||
) {
|
||||
const mailsOfThisMailbag = await this.downloadMailAndMailDetailsByMailbag(currentMailbag, spamFolder, inboxFolder)
|
||||
const mailsOfThisMailbag = await this.downloadMailAndMailDetailsByMailbag(currentMailbag, spamFolder)
|
||||
downloadedMailClassificationDatas.push(...mailsOfThisMailbag)
|
||||
}
|
||||
|
||||
return downloadedMailClassificationDatas
|
||||
}
|
||||
|
||||
private async downloadMailAndMailDetailsByMailbag(mailbag: MailBag, spamFolder: MailFolder, inboxFolder: MailFolder): Promise<Array<SpamTrainMailDatum>> {
|
||||
private async downloadMailAndMailDetailsByMailbag(mailbag: MailBag, spamFolder: MailFolder): Promise<Array<SpamTrainMailDatum>> {
|
||||
const { LocalTimeDateProvider } = await import("../../../common/api/worker/DateProvider.js")
|
||||
const dateProvider = new LocalTimeDateProvider()
|
||||
const startTime = dateProvider.getStartOfDayShiftedBy(this.TIME_LIMIT).getTime()
|
||||
|
|
@ -84,11 +93,12 @@ export class SpamClassificationInitializer {
|
|||
// Download mail details
|
||||
.then((mails) => bulkMailLoader.loadMailDetails(mails))
|
||||
// Map to spam mail datum
|
||||
.then((mails) => mails.map((m) => this.mailWithDetailsToMailDatum(spamFolder, inboxFolder, m)))
|
||||
.then((mails) => mails.map((m) => this.mailWithDetailsToMailDatum(spamFolder, m)))
|
||||
}
|
||||
|
||||
private mailWithDetailsToMailDatum(spamFolder: MailFolder, inboxFolder: MailFolder, { mail, mailDetails }: MailWithMailDetails): SpamTrainMailDatum {
|
||||
private mailWithDetailsToMailDatum(spamFolder: MailFolder, { mail, mailDetails }: MailWithMailDetails): SpamTrainMailDatum {
|
||||
const isSpam = mail.sets.some((folderId) => isSameId(folderId, spamFolder._id))
|
||||
|
||||
return {
|
||||
mailId: mail._id,
|
||||
subject: mail.subject,
|
||||
|
|
@ -98,6 +108,7 @@ export class SpamClassificationInitializer {
|
|||
listId: listIdPart(mail._id),
|
||||
elementId: elementIdPart(mail._id),
|
||||
ownerGroup: assertNotNull(mail._ownerGroup),
|
||||
...extractSpamHeaderFeatures(mail, mailDetails),
|
||||
} as SpamTrainMailDatum
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,12 +54,22 @@ export type SpamTrainMailDatum = {
|
|||
isSpam: boolean
|
||||
isSpamConfidence: number
|
||||
ownerGroup: Id
|
||||
sender: string
|
||||
toRecipients: string
|
||||
ccRecipients: string
|
||||
bccRecipients: string
|
||||
authStatus: string
|
||||
}
|
||||
|
||||
export type SpamPredMailDatum = {
|
||||
subject: string
|
||||
body: string
|
||||
ownerGroup: Id
|
||||
sender: string
|
||||
toRecipients: string
|
||||
ccRecipients: string
|
||||
bccRecipients: string
|
||||
authStatus: string
|
||||
}
|
||||
|
||||
const PREDICTION_THRESHOLD = 0.55
|
||||
|
|
@ -223,9 +233,16 @@ export class SpamClassifier {
|
|||
preprocessedMail = preprocessedMail.replaceAll(ML_SPACE_BEFORE_NEW_LINE_REGEX, ML_SPACE_BEFORE_NEW_LINE_TOKEN)
|
||||
}
|
||||
|
||||
preprocessedMail += this.getHeaderFeatures(mail)
|
||||
|
||||
return preprocessedMail
|
||||
}
|
||||
|
||||
private getHeaderFeatures(mail: SpamTrainMailDatum | SpamPredMailDatum): string {
|
||||
const { sender, toRecipients, ccRecipients, bccRecipients, authStatus } = mail
|
||||
return `\n${sender}\n${toRecipients}\n${ccRecipients}\n${bccRecipients}\n${authStatus}`
|
||||
}
|
||||
|
||||
public async initialTraining(mails: SpamTrainMailDatum[]): Promise<TrainingPerformance> {
|
||||
const preprocessingStart = performance.now()
|
||||
const tokenizedMails = await promiseMap(mails, (mail) => spamClassifierTokenizer(this.preprocessMail(mail)))
|
||||
|
|
@ -497,7 +514,8 @@ export class SpamClassifier {
|
|||
private concatSubjectAndBody(mail: SpamTrainMailDatum | SpamPredMailDatum) {
|
||||
const subject = mail.subject || ""
|
||||
const body = mail.body || ""
|
||||
const concatenated = `${subject} ${body}`.trim()
|
||||
const concatenated = `${subject}\n${body}`.trim()
|
||||
|
||||
return concatenated.length > 0 ? concatenated : " "
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -130,11 +130,11 @@ o.spec("PreprocessPatterns", () => {
|
|||
o.spec("Url patterns", () => {
|
||||
o.test("All recognized url patterns", async () => {
|
||||
const urlsMap = new Map([
|
||||
["https://tuta.com", "<URL-tuta.com>"],
|
||||
["https://microsoft.com/outlook/test", "<URL-microsoft.com>"],
|
||||
["https://subdomain.microsoft.com/outlook/test", "<URL-subdomain.microsoft.com>"],
|
||||
["https://subdomain.spam.com/this/is/not/cool/dsfalkfjd2309jlk234oi2k", "<URL-subdomain.spam.com>"],
|
||||
["https://subdomain.test.de/spam!", "<URL-subdomain.test.de>"],
|
||||
["https://tuta.com", "TURLtuta.com"],
|
||||
["https://microsoft.com/outlook/test", "TURLmicrosoft.com"],
|
||||
["https://subdomain.microsoft.com/outlook/test", "TURLsubdomain.microsoft.com"],
|
||||
["https://subdomain.spam.com/this/is/not/cool/dsfalkfjd2309jlk234oi2k", "TURLsubdomain.spam.com"],
|
||||
["https://subdomain.test.de/spam!", "TURLsubdomain.test.de"],
|
||||
])
|
||||
|
||||
for (const [domain, expectedToken] of urlsMap.entries()) {
|
||||
|
|
@ -297,6 +297,8 @@ o.spec("PreprocessPatterns", () => {
|
|||
["*", ML_SPECIAL_CHARACTER_TOKEN],
|
||||
["(", ML_SPECIAL_CHARACTER_TOKEN],
|
||||
[")", ML_SPECIAL_CHARACTER_TOKEN],
|
||||
["<", ML_SPECIAL_CHARACTER_TOKEN],
|
||||
[">", ML_SPECIAL_CHARACTER_TOKEN],
|
||||
["+", ML_SPECIAL_CHARACTER_TOKEN],
|
||||
["`", ML_SPECIAL_CHARACTER_TOKEN],
|
||||
["_", ML_SPECIAL_CHARACTER_TOKEN],
|
||||
|
|
@ -318,6 +320,7 @@ o.spec("PreprocessPatterns", () => {
|
|||
["--", ML_SPECIAL_CHARACTER_TOKEN],
|
||||
["---", ML_SPECIAL_CHARACTER_TOKEN],
|
||||
["--- ---", `${ML_SPECIAL_CHARACTER_TOKEN} ${ML_SPECIAL_CHARACTER_TOKEN}`],
|
||||
["[ ]", `${ML_SPECIAL_CHARACTER_TOKEN} ${ML_SPECIAL_CHARACTER_TOKEN}`],
|
||||
])
|
||||
|
||||
for (const [specialCharSequence, expectedResult] of specialCharsMap) {
|
||||
|
|
@ -327,7 +330,7 @@ o.spec("PreprocessPatterns", () => {
|
|||
})
|
||||
|
||||
o.test("Not recognized special-character-like patterns", async () => {
|
||||
const notSpecialChars = ["[", "]", "<", ">", "test-test"]
|
||||
const notSpecialChars = ["§", "€"]
|
||||
|
||||
const notSpecialCharsText = notSpecialChars.join("\n")
|
||||
let resultNotSpecialCharsText = notSpecialCharsText
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ import { parseCsv } from "../../../../../../src/common/misc/parsing/CsvParser"
|
|||
import {
|
||||
DEFAULT_PREPROCESS_CONFIGURATION,
|
||||
SpamClassifier,
|
||||
spamClassifierTokenizer as testTokenize,
|
||||
SpamTrainMailDatum,
|
||||
} from "../../../../../../src/mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
import { OfflineStoragePersistence } from "../../../../../../src/mail-app/workerUtils/index/OfflineStoragePersistence"
|
||||
|
|
@ -36,6 +35,11 @@ export async function readMailDataFromCSV(filePath: string): Promise<{
|
|||
const subject = row[8]
|
||||
const body = row[10]
|
||||
const label = row[11]
|
||||
const from = row[0]
|
||||
const to = row[1]
|
||||
const cc = row[2]
|
||||
const bcc = row[3]
|
||||
const authStatus = row[4]
|
||||
|
||||
let isSpam = label === "spam" ? true : label === "ham" ? false : null
|
||||
isSpam = assertNotNull(isSpam, "Unknown label detected: " + label)
|
||||
|
|
@ -47,6 +51,11 @@ export async function readMailDataFromCSV(filePath: string): Promise<{
|
|||
isSpam,
|
||||
isSpamConfidence: 1,
|
||||
ownerGroup: "owner",
|
||||
sender: from,
|
||||
toRecipients: to,
|
||||
ccRecipients: cc,
|
||||
bccRecipients: bcc,
|
||||
authStatus: authStatus,
|
||||
} as SpamTrainMailDatum)
|
||||
}
|
||||
|
||||
|
|
@ -99,6 +108,11 @@ o.spec("SpamClassifierTest", () => {
|
|||
isSpam: true,
|
||||
isSpamConfidence: 1,
|
||||
ownerGroup: "owner",
|
||||
sender: "",
|
||||
toRecipients: "",
|
||||
ccRecipients: "",
|
||||
bccRecipients: "",
|
||||
authStatus: "",
|
||||
}
|
||||
const layersModel = object<Sequential>()
|
||||
spamClassifier.addSpamClassifierForOwner(spamTrainMailDatum.ownerGroup, layersModel, false)
|
||||
|
|
@ -119,6 +133,11 @@ o.spec("SpamClassifierTest", () => {
|
|||
isSpam: false,
|
||||
isSpamConfidence: 0,
|
||||
ownerGroup: "owner",
|
||||
sender: "",
|
||||
toRecipients: "",
|
||||
ccRecipients: "",
|
||||
bccRecipients: "",
|
||||
authStatus: "",
|
||||
}
|
||||
|
||||
const layersModel = object<Sequential>()
|
||||
|
|
@ -165,6 +184,11 @@ o.spec("SpamClassifierTest", () => {
|
|||
const classifier = new SpamClassifier(object(), object(), object())
|
||||
const mail = {
|
||||
subject: `Sample Tokens and values`,
|
||||
sender: "sender",
|
||||
toRecipients: "toRecipients",
|
||||
ccRecipients: "ccRecipients",
|
||||
bccRecipients: "bccRecipients",
|
||||
authStatus: "authStatus",
|
||||
// prettier-ignore
|
||||
body: `Hello, these are my MAC Address
|
||||
FB-94-77-45-96-74
|
||||
|
|
@ -228,8 +252,8 @@ o.spec("SpamClassifierTest", () => {
|
|||
Special Characters
|
||||
!
|
||||
@
|
||||
Not Special Characters
|
||||
]
|
||||
Not Special Character
|
||||
§
|
||||
Number Sequences:
|
||||
26098375
|
||||
IBAN: DE91 1002 0370 0320 2239 82
|
||||
|
|
@ -252,84 +276,90 @@ this text is shown
|
|||
} as SpamTrainMailDatum
|
||||
const preprocessedMail = classifier.preprocessMail(mail)
|
||||
// prettier-ignore
|
||||
const expectedOutput = `Sample Tokens and values Hello <SPECIAL-CHAR> these are my MAC Address
|
||||
\t\t\t\tFB <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER> -D5 <SPECIAL-CHAR> <NUMBER> -7C
|
||||
\t\t\t\tB4 <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER> -2A-DE-D4
|
||||
const expectedOutput = `Sample Tokens and values
|
||||
Hello TSPECIALCHAR these are my MAC Address
|
||||
\t\t\t\tFB TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR D5 TSPECIALCHAR TNUMBER TSPECIALCHAR 7C
|
||||
\t\t\t\tB4 TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR 2A TSPECIALCHAR DE TSPECIALCHAR D4
|
||||
\t\t\t\talong with my ISBNs
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\t <NUMBER> -X
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR X
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\tSSN
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\tSHAs
|
||||
\t\t\t\t585eab9b3a5e4430e08f5096d636d0d475a8c69dae21a61c6f1b26c4bd8dd8c1
|
||||
\t\t\t\t7233d153f2e0725d3d212d1f27f30258fafd72b286d07b3b1d94e7e3c35dce67
|
||||
\t\t\t\t769f65bf44557df44fc5f99c014cbe98894107c9d7be0801f37c55b3776c3990
|
||||
\t\t\t\tPhone Numbers
|
||||
\t\t\t\t <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\t <SPECIAL-CHAR> <NUMBER> <NUMBER> <NUMBER> <NUMBER>
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\tVIN <SPECIAL-CHAR> Vehicle identification number <SPECIAL-CHAR>
|
||||
\t\t\t\t TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\t TSPECIALCHAR TNUMBER TNUMBER TNUMBER TNUMBER
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\tVIN TSPECIALCHAR Vehicle identification number TSPECIALCHAR
|
||||
\t\t\t\t3FADP4AJ3BM438397
|
||||
\t\t\t\tWAULT64B82N564937
|
||||
\t\t\t\tGUIDs
|
||||
\t\t\t\t781a9631 <SPECIAL-CHAR> <NUMBER> -4f9c-bb36-25c3364b754b
|
||||
\t\t\t\t325783d4-a64e-453b-85e6-ed4b2cd4c9bf
|
||||
\t\t\t\t781a9631 TSPECIALCHAR TNUMBER TSPECIALCHAR 4f9c TSPECIALCHAR bb36 TSPECIALCHAR 25c3364b754b
|
||||
\t\t\t\t325783d4 TSPECIALCHAR a64e TSPECIALCHAR 453b TSPECIALCHAR 85e6 TSPECIALCHAR ed4b2cd4c9bf
|
||||
\t\t\t\tHex Colors
|
||||
\t\t\t\t <SPECIAL-CHAR> 2016c1
|
||||
\t\t\t\t <SPECIAL-CHAR> c090a4
|
||||
\t\t\t\t <SPECIAL-CHAR> c855f5
|
||||
\t\t\t\t <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\t TSPECIALCHAR 2016c1
|
||||
\t\t\t\t TSPECIALCHAR c090a4
|
||||
\t\t\t\t TSPECIALCHAR c855f5
|
||||
\t\t\t\t TSPECIALCHAR TNUMBER
|
||||
\t\t\t\tIPV4
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\tOn Date <SPECIAL-CHAR>
|
||||
\t\t\t\t <DATE>
|
||||
\t\t\t\t <DATE>
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\tOn Date TSPECIALCHAR
|
||||
\t\t\t\t TDATE
|
||||
\t\t\t\t TDATE
|
||||
\t\t\t\tNot Date
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\tURL
|
||||
\t\t\t\t <URL-tuta.com>
|
||||
\t\t\t\t <URL-subdomain.microsoft.com>
|
||||
\t\t\t\t TURLtuta TSPECIALCHAR com
|
||||
\t\t\t\t TURLsubdomain TSPECIALCHAR microsoft TSPECIALCHAR com
|
||||
\t\t\t\tNOT URL
|
||||
\t\t\t\t <URL-tuta>
|
||||
\t\t\t\t TURLtuta
|
||||
\t\t\t\tMAIL
|
||||
\t\t\t\t <EMAIL>
|
||||
\t\t\t\t <EMAIL>
|
||||
\t\t\t\t TEMAIL
|
||||
\t\t\t\t TEMAIL
|
||||
\t\t\t\tCredit Card
|
||||
\t\t\t\t <CREDIT-CARD>
|
||||
\t\t\t\t <CREDIT-CARD>
|
||||
\t\t\t\t TCREDITCARD
|
||||
\t\t\t\t TCREDITCARD
|
||||
\t\t\t\tNot Credit Card
|
||||
\t\t\t\t <NUMBER> <NUMBER>
|
||||
\t\t\t\t TNUMBER TNUMBER
|
||||
\t\t\t\tBit Coin Address
|
||||
\t\t\t\t <BITCOIN>
|
||||
\t\t\t\t <BITCOIN>
|
||||
\t\t\t\t TBITCOIN
|
||||
\t\t\t\t TBITCOIN
|
||||
\t\t\t\tNot BTC
|
||||
\t\t\t\t5213nYwhhGw2qpNijzfnKcbCG4z3hnrVA
|
||||
\t\t\t\t1OUm2eZK2ETeAo8v95WhZioQDy32YSerkD
|
||||
\t\t\t\tSpecial Characters
|
||||
\t\t\t\t <SPECIAL-CHAR>
|
||||
\t\t\t\t <SPECIAL-CHAR>
|
||||
\t\t\t\tNot Special Characters
|
||||
\t\t\t\t]
|
||||
\t\t\t\tNumber Sequences <SPECIAL-CHAR>
|
||||
\t\t\t\t <NUMBER>
|
||||
\t\t\t\tIBAN <SPECIAL-CHAR> DE91 <CREDIT-CARD> <NUMBER>
|
||||
\t\t\t\t TSPECIALCHAR
|
||||
\t\t\t\t TSPECIALCHAR
|
||||
\t\t\t\tNot Special Character
|
||||
\t\t\t\t§
|
||||
\t\t\t\tNumber Sequences TSPECIALCHAR
|
||||
\t\t\t\t TNUMBER
|
||||
\t\t\t\tIBAN TSPECIALCHAR DE91 TCREDITCARD TNUMBER
|
||||
\t\t\t\tNot Number Sequences
|
||||
\t\t\t\tSHLT116
|
||||
\t\t\t\tgb <SPECIAL-CHAR> 67ca4b
|
||||
\t\t\t\tgb TSPECIALCHAR 67ca4b
|
||||
\t\t\t\tOther values found in mails
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER> € <NUMBER> m <NUMBER> Zi <NUMBER> <SPECIAL-CHAR>
|
||||
\t\t\t\tFax <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER> <NUMBER> <NUMBER> <NUMBER>
|
||||
\t\t\t\tAugust <NUMBER> <SPECIAL-CHAR> <NUMBER>
|
||||
\t\t\t\t <NUMBER> <SPECIAL-CHAR> <NUMBER> PM <SPECIAL-CHAR> <NUMBER> <SPECIAL-CHAR> <NUMBER> PM
|
||||
\t\t\t\tand all text on other lines it seems <SPECIAL-CHAR>
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER € TNUMBER m TNUMBER Zi TNUMBER TSPECIALCHAR
|
||||
\t\t\t\tFax TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER TNUMBER TNUMBER TNUMBER
|
||||
\t\t\t\tAugust TNUMBER TSPECIALCHAR TNUMBER
|
||||
\t\t\t\t TNUMBER TSPECIALCHAR TNUMBER PM TSPECIALCHAR TNUMBER TSPECIALCHAR TNUMBER PM
|
||||
\t\t\t\tand all text on other lines it seems TSPECIALCHAR
|
||||
Button Text
|
||||
this text is shown`
|
||||
this text is shown
|
||||
sender
|
||||
toRecipients
|
||||
ccRecipients
|
||||
bccRecipients
|
||||
authStatus`
|
||||
o.check(preprocessedMail).equals(expectedOutput)
|
||||
})
|
||||
|
||||
|
|
@ -357,8 +387,24 @@ this text is shown`
|
|||
await spamClassifier.initialize("firstGroup")
|
||||
await spamClassifier.initialize("secondGroup")
|
||||
|
||||
const isSpamFirstMail = await spamClassifier.predict({ subject: "", body: "", ownerGroup: "firstGroup" })
|
||||
const isSpamSecondMail = await spamClassifier.predict({ subject: "", body: "", ownerGroup: "secondGroup" })
|
||||
const commonSpamFields = {
|
||||
subject: "",
|
||||
body: "",
|
||||
sender: "string",
|
||||
toRecipients: "string",
|
||||
ccRecipients: "string",
|
||||
bccRecipients: "string",
|
||||
authStatus: "",
|
||||
}
|
||||
|
||||
const isSpamFirstMail = await spamClassifier.predict({
|
||||
ownerGroup: "firstGroup",
|
||||
...commonSpamFields,
|
||||
})
|
||||
const isSpamSecondMail = await spamClassifier.predict({
|
||||
ownerGroup: "secondGroup",
|
||||
...commonSpamFields,
|
||||
})
|
||||
|
||||
o(isSpamFirstMail).equals(true)
|
||||
o(isSpamSecondMail).equals(false)
|
||||
|
|
@ -434,48 +480,6 @@ if (DO_RUN_PERFORMANCE_ANALYSIS) {
|
|||
let retrainCount = 0
|
||||
let predictedSpam = false
|
||||
while (!predictedSpam && retrainCount++ <= 3) {
|
||||
// await copiedClassifier.updateModel([{ ...sample, isSpam: false }])
|
||||
|
||||
/*
|
||||
isSpamConfidence: 2
|
||||
[
|
||||
3, 2, 1, 3, 1,
|
||||
1, 3, 2, 1, 5
|
||||
] = 22
|
||||
isSpamConfidence: 3
|
||||
[
|
||||
2, 5, 1, 2, 1,
|
||||
1, 1, 2, 1, 2
|
||||
] = 18
|
||||
|
||||
isSpamConfidence: 4
|
||||
[
|
||||
1, 1, 1, 2, 5,
|
||||
1, 1, 1, 1, 5
|
||||
] = 19
|
||||
Retraining finished. Took: 477ms
|
||||
Retraining finished. Took: 1259ms
|
||||
predicted new mail to be with probability 0.46 spam
|
||||
Retraining finished. Took: 560ms
|
||||
Retraining finished. Took: 1273ms
|
||||
|
||||
isSpamConfidence: 8
|
||||
Retraining finished. Took: 486ms
|
||||
Retraining finished. Took: 2289ms
|
||||
predicted new mail to be with probability 0.82 spam
|
||||
Retraining finished. Took: 580ms
|
||||
Retraining finished. Took: 2356ms
|
||||
predicted new mail to be with probability 1.00 spam
|
||||
Retraining finished. Took: 556ms
|
||||
Retraining finished. Took: 2357ms
|
||||
predicted new mail to be with probability 0.52 spam
|
||||
[
|
||||
1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1
|
||||
]
|
||||
|
||||
|
||||
*/
|
||||
await copiedClassifier.updateModel("owner", [{ ...sample, isSpam: true, isSpamConfidence: 1 }])
|
||||
predictedSpam = assertNotNull(await copiedClassifier.predict(sample))
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -6,12 +6,14 @@ import {
|
|||
BodyTypeRef,
|
||||
ClientSpamClassifierResultTypeRef,
|
||||
Mail,
|
||||
MailAddressTypeRef,
|
||||
MailDetails,
|
||||
MailDetailsBlob,
|
||||
MailDetailsBlobTypeRef,
|
||||
MailDetailsTypeRef,
|
||||
MailFolderTypeRef,
|
||||
MailTypeRef,
|
||||
RecipientsTypeRef,
|
||||
} from "../../../src/common/api/entities/tutanota/TypeRefs.js"
|
||||
import { EntityClient } from "../../../src/common/api/common/EntityClient.js"
|
||||
import { EntityRestClientMock } from "../api/worker/rest/EntityRestClientMock.js"
|
||||
|
|
@ -140,6 +142,14 @@ o.spec("MailModelTest", function () {
|
|||
mailDetails = createTestEntity(MailDetailsTypeRef, {
|
||||
_id: "mailDetail",
|
||||
body: createTestEntity(BodyTypeRef, { text: "some text" }),
|
||||
recipients: createTestEntity(RecipientsTypeRef, {
|
||||
toRecipients: [
|
||||
createTestEntity(MailAddressTypeRef, {
|
||||
name: "Recipient",
|
||||
address: "recipient@tuta.com",
|
||||
}),
|
||||
],
|
||||
}),
|
||||
})
|
||||
mail = createTestEntity(MailTypeRef, {
|
||||
_id: ["mailListId", "mailId"],
|
||||
|
|
@ -147,7 +157,9 @@ o.spec("MailModelTest", function () {
|
|||
mailDetails: ["detailsList", mailDetails._id],
|
||||
subject: "subject",
|
||||
sets: [inboxFolder._id],
|
||||
sender: createTestEntity(MailAddressTypeRef, { name: "Sender", address: "sender@tuta.com" }),
|
||||
processingState: ProcessingState.INBOX_RULE_NOT_PROCESSED,
|
||||
authStatus: "0",
|
||||
})
|
||||
const mailDetailsBlob: MailDetailsBlob = createTestEntity(MailDetailsBlobTypeRef, {
|
||||
_id: mail.mailDetails!,
|
||||
|
|
@ -296,6 +308,11 @@ o.spec("MailModelTest", function () {
|
|||
subject: "subject",
|
||||
isSpam: false,
|
||||
isSpamConfidence: 1,
|
||||
sender: "Sender sender@tuta.com",
|
||||
toRecipients: "Recipient recipient@tuta.com",
|
||||
ccRecipients: "",
|
||||
bccRecipients: "",
|
||||
authStatus: "TAUTHENTICATED",
|
||||
}
|
||||
verify(spamClassifier.storeSpamClassification(expectedSpamTrainMailDatum), { times: 1 })
|
||||
verify(spamClassifier.predict(anything()), { times: 0 })
|
||||
|
|
@ -321,6 +338,11 @@ o.spec("MailModelTest", function () {
|
|||
subject: "subject",
|
||||
isSpam: false,
|
||||
isSpamConfidence: 1,
|
||||
sender: "Sender sender@tuta.com",
|
||||
toRecipients: "Recipient recipient@tuta.com",
|
||||
ccRecipients: "",
|
||||
bccRecipients: "",
|
||||
authStatus: "TAUTHENTICATED",
|
||||
}
|
||||
verify(spamClassifier.storeSpamClassification(expectedSpamTrainMailDatum), { times: 1 })
|
||||
verify(spamClassifier.predict(anything()), { times: 1 })
|
||||
|
|
@ -352,6 +374,11 @@ o.spec("MailModelTest", function () {
|
|||
subject: "subject",
|
||||
isSpam: false,
|
||||
isSpamConfidence: 1,
|
||||
sender: "Sender sender@tuta.com",
|
||||
toRecipients: "Recipient recipient@tuta.com",
|
||||
ccRecipients: "",
|
||||
bccRecipients: "",
|
||||
authStatus: "TAUTHENTICATED",
|
||||
}
|
||||
verify(spamClassifier.storeSpamClassification(expectedSpamTrainMailDatum), { times: 1 })
|
||||
verify(spamClassifier.predict(anything()), { times: 1 })
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue