mirror of
https://github.com/tutao/tutanota.git
synced 2025-12-07 05:39:56 +00:00
Implement spam training data sync and add TutanotaModelV98
We sync the spam training data encrypted through our server to make sure that all clients for a specific user behave the same when classifying mails. Additionally, this enables the spam classification in the webApp. We compress the training data vectors (see clientSpamTrainingDatum) before uploading to our server using SparseVectorCompressor.ts. When a user has the ClientSpamClassification enabled, the spam training data sync will happen for every mail received. ClientSpamTrainingDatum are not stored in the CacheStorage. No entityEvents are emitted for this type. However, we retrieve creations and updates for ClientSpamTrainingData through the modifiedClientSpamTrainingDataIndex. We calculate a threshold per classifier based on the dataset ham to spam ratio, we also subsample our training data to cap the ham to spam ratio within a certain limit. Co-authored-by: jomapp <17314077+jomapp@users.noreply.github.com> Co-authored-by: das <das@tutao.de> Co-authored-by: abp <abp@tutao.de> Co-authored-by: Kinan <104761667+kibibytium@users.noreply.github.com> Co-authored-by: sug <sug@tutao.de> Co-authored-by: nif <nif@tutao.de> Co-authored-by: map <mpfau@users.noreply.github.com>
This commit is contained in:
parent
f8bbd32695
commit
5293be6a4a
63 changed files with 3877 additions and 1963 deletions
|
|
@ -33,7 +33,7 @@ export const allowedImports = {
|
|||
wasm: ["wasm-fallback"],
|
||||
"common-min": ["polyfill-helpers"],
|
||||
boot: ["polyfill-helpers", "common-min"],
|
||||
common: ["polyfill-helpers", "common-min"],
|
||||
common: ["polyfill-helpers", "common-min", "spam-classifier"],
|
||||
"gui-base": ["polyfill-helpers", "common-min", "common", "boot"],
|
||||
main: ["polyfill-helpers", "common-min", "common", "boot", "gui-base", "date"],
|
||||
sanitizer: ["polyfill-helpers", "common-min", "common", "boot", "gui-base"],
|
||||
|
|
@ -46,8 +46,8 @@ export const allowedImports = {
|
|||
contacts: ["polyfill-helpers", "common-min", "common", "boot", "gui-base", "main", "mail-view", "date", "date-gui", "mail-editor"],
|
||||
"calendar-view": ["polyfill-helpers", "common-min", "common", "boot", "gui-base", "main", "date", "date-gui", "sharing", "contacts"],
|
||||
login: ["polyfill-helpers", "common-min", "common", "boot", "gui-base", "main"],
|
||||
"spam-classifier": ["polyfill-helpers", "common", "common-min", "main"],
|
||||
worker: ["polyfill-helpers", "common-min", "common", "native-common", "native-worker", "wasm", "wasm-fallback"],
|
||||
"spam-classifier": ["polyfill-helpers", "common", "common-min"],
|
||||
worker: ["polyfill-helpers", "common-min", "common", "native-common", "native-worker", "wasm", "wasm-fallback", "spam-classifier"],
|
||||
"pow-worker": [],
|
||||
settings: [
|
||||
"polyfill-helpers",
|
||||
|
|
|
|||
6
libs/tensorflow-stripped.js
vendored
6
libs/tensorflow-stripped.js
vendored
|
|
@ -4089,6 +4089,10 @@ setOpHandler(opHandler);
|
|||
|
||||
|
||||
|
||||
function enableProdMode() {
|
||||
env().set('PROD', true);
|
||||
}
|
||||
|
||||
function engine() {
|
||||
return ENGINE;
|
||||
}
|
||||
|
|
@ -39156,4 +39160,4 @@ function dropout(args) {
|
|||
return new Dropout(args);
|
||||
}
|
||||
|
||||
export { LayersModel, dense, dropout, fromMemory, glorotUniform, loadLayersModelFromIOHandler, sequential, stringToHashBucketFast$1 as stringToHashBucketFast, tensor1d, tensor2d, withSaveHandler };
|
||||
export { LayersModel, dense, dropout, enableProdMode, fromMemory, glorotUniform, loadLayersModelFromIOHandler, sequential, stringToHashBucketFast$1 as stringToHashBucketFast, tensor1d, tensor2d, withSaveHandler };
|
||||
|
|
|
|||
11
libs/tensorflow.js
vendored
11
libs/tensorflow.js
vendored
|
|
@ -5387,6 +5387,15 @@ setOpHandler(opHandler);
|
|||
* limitations under the License.
|
||||
* =============================================================================
|
||||
*/
|
||||
/**
|
||||
* Enables production mode which disables correctness checks in favor of
|
||||
* performance.
|
||||
*
|
||||
* @doc {heading: 'Environment'}
|
||||
*/
|
||||
function enableProdMode() {
|
||||
env().set('PROD', true);
|
||||
}
|
||||
/**
|
||||
* It returns the global engine that keeps track of all tensors and backends.
|
||||
*
|
||||
|
|
@ -55873,4 +55882,4 @@ function dropout(args) {
|
|||
return new Dropout(args);
|
||||
}
|
||||
|
||||
export { LayersModel, dense, dropout, fromMemory, glorotUniform, loadLayersModelFromIOHandler, sequential, stringToHashBucketFast$1 as stringToHashBucketFast, tensor1d, tensor2d, withSaveHandler };
|
||||
export { LayersModel, dense, dropout, enableProdMode, fromMemory, glorotUniform, loadLayersModelFromIOHandler, sequential, stringToHashBucketFast$1 as stringToHashBucketFast, tensor1d, tensor2d, withSaveHandler };
|
||||
|
|
|
|||
|
|
@ -244,7 +244,6 @@ export function debounce<F extends (...args: any) => void>(timeout: number, toTh
|
|||
if (timeoutId) {
|
||||
clearTimeout(timeoutId)
|
||||
}
|
||||
|
||||
toInvoke = toThrottle.bind(null, ...args)
|
||||
timeoutId = setTimeout(toInvoke, timeout)
|
||||
})
|
||||
|
|
|
|||
|
|
@ -518,6 +518,7 @@ export async function initLocator(worker: CalendarWorkerImpl, browserData: Brows
|
|||
locator.user,
|
||||
locator.cachingEntityClient,
|
||||
locator.crypto,
|
||||
locator.cryptoWrapper,
|
||||
locator.serviceExecutor,
|
||||
await locator.blob(),
|
||||
fileApp,
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ import {
|
|||
ContactCustomDate,
|
||||
ContactRelationship,
|
||||
ContactSocialId,
|
||||
Mail,
|
||||
MailFolder,
|
||||
UserSettingsGroupRoot,
|
||||
} from "../entities/tutanota/TypeRefs.js"
|
||||
|
|
@ -1418,9 +1417,3 @@ export enum ProcessingState {
|
|||
}
|
||||
|
||||
export const PLAN_SELECTOR_SELECTED_BOX_SCALE = "1.03"
|
||||
export const DEFAULT_IS_SPAM_CONFIDENCE = 1
|
||||
export const DEFAULT_IS_SPAM = false
|
||||
|
||||
export function getSpamConfidence(mail: Mail): number {
|
||||
return Number(mail.clientSpamClassifierResult?.confidence ?? DEFAULT_IS_SPAM_CONFIDENCE)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,7 +61,20 @@ export const DELETE_MULTIPLE_LIMIT = 100
|
|||
*/
|
||||
export type Stripped<T extends Partial<SomeEntity>> = Omit<
|
||||
T,
|
||||
"_id" | "_area" | "_owner" | "_ownerGroup" | "_ownerEncSessionKey" | "_ownerKeyVersion" | "_permissions" | "_errors" | "_format" | "_type" | "_original"
|
||||
| "_id"
|
||||
| "_area"
|
||||
| "_owner"
|
||||
| "_ownerGroup"
|
||||
| "_ownerEncSessionKey"
|
||||
| "_ownerKeyVersion"
|
||||
| "ownerGroup"
|
||||
| "ownerEncSessionKey"
|
||||
| "ownerKeyVersion"
|
||||
| "_permissions"
|
||||
| "_errors"
|
||||
| "_format"
|
||||
| "_type"
|
||||
| "_original"
|
||||
>
|
||||
|
||||
type OptionalEntity<T extends Entity> = T & {
|
||||
|
|
@ -76,6 +89,9 @@ export type StrippedEntity<T extends Entity> =
|
|||
| "_ownerGroup"
|
||||
| "_ownerEncSessionKey"
|
||||
| "_ownerKeyVersion"
|
||||
| "ownerGroup"
|
||||
| "ownerEncSessionKey"
|
||||
| "ownerKeyVersion"
|
||||
| "_permissions"
|
||||
| "_errors"
|
||||
| "_format"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,229 @@
|
|||
import { HashingVectorizer } from "../../../../../mail-app/workerUtils/spamClassification/HashingVectorizer"
|
||||
import { htmlToText } from "../IndexUtils"
|
||||
import {
|
||||
ML_BITCOIN_REGEX,
|
||||
ML_BITCOIN_TOKEN,
|
||||
ML_CREDIT_CARD_REGEX,
|
||||
ML_CREDIT_CARD_TOKEN,
|
||||
ML_DATE_REGEX,
|
||||
ML_DATE_TOKEN,
|
||||
ML_EMAIL_ADDR_REGEX,
|
||||
ML_EMAIL_ADDR_TOKEN,
|
||||
ML_NUMBER_SEQUENCE_REGEX,
|
||||
ML_NUMBER_SEQUENCE_TOKEN,
|
||||
ML_SPACE_BEFORE_NEW_LINE_REGEX,
|
||||
ML_SPACE_BEFORE_NEW_LINE_TOKEN,
|
||||
ML_SPECIAL_CHARACTER_REGEX,
|
||||
ML_SPECIAL_CHARACTER_TOKEN,
|
||||
ML_URL_REGEX,
|
||||
ML_URL_TOKEN,
|
||||
} from "./PreprocessPatterns"
|
||||
import { SparseVectorCompressor } from "./SparseVectorCompressor"
|
||||
import { ProgrammingError } from "../../error/ProgrammingError"
|
||||
import { assertNotNull, tokenize } from "@tutao/tutanota-utils"
|
||||
import { Mail, MailAddress, MailDetails } from "../../../entities/tutanota/TypeRefs"
|
||||
import { getMailBodyText } from "../../CommonMailUtils"
|
||||
import { MailAuthenticationStatus } from "../../TutanotaConstants"
|
||||
|
||||
export type PreprocessConfiguration = {
|
||||
isPreprocessMails: boolean
|
||||
isRemoveHTML: boolean
|
||||
isReplaceDates: boolean
|
||||
isReplaceUrls: boolean
|
||||
isReplaceMailAddresses: boolean
|
||||
isReplaceBitcoinAddress: boolean
|
||||
isReplaceCreditCards: boolean
|
||||
isReplaceNumbers: boolean
|
||||
isReplaceSpecialCharacters: boolean
|
||||
isRemoveSpaceBeforeNewLine: boolean
|
||||
}
|
||||
|
||||
export const spamClassifierTokenizer = (text: PreprocessedMailContent): string[] => tokenize(text)
|
||||
|
||||
export const DEFAULT_PREPROCESS_CONFIGURATION: PreprocessConfiguration = {
|
||||
isPreprocessMails: true,
|
||||
isRemoveHTML: true,
|
||||
isReplaceDates: true,
|
||||
isReplaceUrls: true,
|
||||
isReplaceMailAddresses: true,
|
||||
isReplaceBitcoinAddress: true,
|
||||
isReplaceCreditCards: true,
|
||||
isReplaceNumbers: true,
|
||||
isReplaceSpecialCharacters: true,
|
||||
isRemoveSpaceBeforeNewLine: true,
|
||||
}
|
||||
|
||||
export type SpamMailDatum = {
|
||||
subject: string
|
||||
body: string
|
||||
ownerGroup: Id
|
||||
sender: string
|
||||
toRecipients: string
|
||||
ccRecipients: string
|
||||
bccRecipients: string
|
||||
authStatus: string
|
||||
}
|
||||
|
||||
export type PreprocessedMailContent = string
|
||||
|
||||
export class SpamMailProcessor {
|
||||
constructor(
|
||||
private readonly preprocessConfiguration: PreprocessConfiguration = DEFAULT_PREPROCESS_CONFIGURATION,
|
||||
readonly vectorizer: HashingVectorizer = new HashingVectorizer(),
|
||||
private readonly sparseVectorCompressor: SparseVectorCompressor = new SparseVectorCompressor(),
|
||||
) {
|
||||
if (vectorizer.dimension !== sparseVectorCompressor.dimension) {
|
||||
throw new ProgrammingError(
|
||||
`a spam preprocessor was created with different dimensions. Vectorizer:${vectorizer.dimension} compressor: ${sparseVectorCompressor.dimension}`,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
public async vectorizeAndCompress(spamMailDatum: SpamMailDatum): Promise<Uint8Array> {
|
||||
const vector = await this.vectorize(spamMailDatum)
|
||||
return this.compress(vector)
|
||||
}
|
||||
|
||||
public async vectorize(spamMailDatum: SpamMailDatum): Promise<number[]> {
|
||||
const preprocessedMail = this.preprocessMail(spamMailDatum)
|
||||
const tokenizedMail = spamClassifierTokenizer(preprocessedMail)
|
||||
const vector = await this.vectorizer.vectorize(tokenizedMail)
|
||||
return vector
|
||||
}
|
||||
|
||||
public async compress(uncompressedVector: number[]): Promise<Uint8Array> {
|
||||
return this.sparseVectorCompressor.vectorToBinary(uncompressedVector)
|
||||
}
|
||||
|
||||
// visibleForTesting
|
||||
public preprocessMail(mail: SpamMailDatum): PreprocessedMailContent {
|
||||
const mailText = this.concatSubjectAndBody(mail)
|
||||
|
||||
if (!this.preprocessConfiguration.isPreprocessMails) {
|
||||
return mailText
|
||||
}
|
||||
|
||||
let preprocessedMail = mailText
|
||||
|
||||
// 1. Remove HTML code
|
||||
if (this.preprocessConfiguration.isRemoveHTML) {
|
||||
preprocessedMail = htmlToText(preprocessedMail)
|
||||
}
|
||||
|
||||
// 2. Replace dates
|
||||
if (this.preprocessConfiguration.isReplaceDates) {
|
||||
for (const datePattern of ML_DATE_REGEX) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(datePattern, ML_DATE_TOKEN)
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Replace urls
|
||||
if (this.preprocessConfiguration.isReplaceUrls) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_URL_REGEX, ML_URL_TOKEN)
|
||||
}
|
||||
|
||||
// 4. Replace email addresses
|
||||
if (this.preprocessConfiguration.isReplaceMailAddresses) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_EMAIL_ADDR_REGEX, ML_EMAIL_ADDR_TOKEN)
|
||||
}
|
||||
|
||||
// 5. Replace Bitcoin addresses
|
||||
if (this.preprocessConfiguration.isReplaceBitcoinAddress) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_BITCOIN_REGEX, ML_BITCOIN_TOKEN)
|
||||
}
|
||||
|
||||
// 6. Replace credit card numbers
|
||||
if (this.preprocessConfiguration.isReplaceCreditCards) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_CREDIT_CARD_REGEX, ML_CREDIT_CARD_TOKEN)
|
||||
}
|
||||
|
||||
// 7. Replace remaining numbers
|
||||
if (this.preprocessConfiguration.isReplaceNumbers) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_NUMBER_SEQUENCE_REGEX, ML_NUMBER_SEQUENCE_TOKEN)
|
||||
}
|
||||
|
||||
// 8. Remove special characters
|
||||
if (this.preprocessConfiguration.isReplaceSpecialCharacters) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_SPECIAL_CHARACTER_REGEX, ML_SPECIAL_CHARACTER_TOKEN)
|
||||
}
|
||||
|
||||
// 9. Remove spaces at end of lines
|
||||
if (this.preprocessConfiguration.isRemoveSpaceBeforeNewLine) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_SPACE_BEFORE_NEW_LINE_REGEX, ML_SPACE_BEFORE_NEW_LINE_TOKEN)
|
||||
}
|
||||
|
||||
preprocessedMail += this.getHeaderFeatures(mail)
|
||||
|
||||
return preprocessedMail
|
||||
}
|
||||
|
||||
private concatSubjectAndBody(mail: SpamMailDatum) {
|
||||
const subject = mail.subject || ""
|
||||
const body = mail.body || ""
|
||||
const concatenated = `${subject}\n${body}`.trim()
|
||||
|
||||
return concatenated.length > 0 ? concatenated : " "
|
||||
}
|
||||
|
||||
private getHeaderFeatures(mail: SpamMailDatum): string {
|
||||
const { sender, toRecipients, ccRecipients, bccRecipients, authStatus } = mail
|
||||
return `\n${sender}\n${toRecipients}\n${ccRecipients}\n${bccRecipients}\n${authStatus}`
|
||||
}
|
||||
}
|
||||
export function createSpamMailDatum(mail: Mail, mailDetails: MailDetails) {
|
||||
const spamMailDatum: SpamMailDatum = {
|
||||
subject: mail.subject,
|
||||
body: getMailBodyText(mailDetails.body),
|
||||
ownerGroup: assertNotNull(mail._ownerGroup),
|
||||
...extractSpamHeaderFeatures(mail, mailDetails),
|
||||
}
|
||||
return spamMailDatum
|
||||
}
|
||||
|
||||
export function extractSpamHeaderFeatures(mail: Mail, mailDetails: MailDetails) {
|
||||
const sender = joinNamesAndMailAddresses([mail?.sender])
|
||||
const { toRecipients, ccRecipients, bccRecipients } = extractRecipients(mailDetails)
|
||||
const authStatus = convertAuthStatusToSpamCategorizationToken(mail.authStatus)
|
||||
|
||||
return { sender, toRecipients, ccRecipients, bccRecipients, authStatus }
|
||||
}
|
||||
|
||||
function extractRecipients({ recipients }: MailDetails) {
|
||||
const toRecipients = joinNamesAndMailAddresses(recipients?.toRecipients)
|
||||
const ccRecipients = joinNamesAndMailAddresses(recipients?.ccRecipients)
|
||||
const bccRecipients = joinNamesAndMailAddresses(recipients?.bccRecipients)
|
||||
|
||||
return { toRecipients, ccRecipients, bccRecipients }
|
||||
}
|
||||
|
||||
function joinNamesAndMailAddresses(recipients: MailAddress[] | null) {
|
||||
return recipients?.map((recipient) => `${recipient?.name} ${recipient?.address}`).join(" ") || ""
|
||||
}
|
||||
|
||||
function convertAuthStatusToSpamCategorizationToken(authStatus: string | null): string {
|
||||
if (authStatus === MailAuthenticationStatus.AUTHENTICATED) {
|
||||
return "TAUTHENTICATED"
|
||||
} else if (authStatus === MailAuthenticationStatus.HARD_FAIL) {
|
||||
return "THARDFAIL"
|
||||
} else if (authStatus === MailAuthenticationStatus.SOFT_FAIL) {
|
||||
return "TSOFTFAIL"
|
||||
} else if (authStatus === MailAuthenticationStatus.INVALID_MAIL_FROM) {
|
||||
return "TINVALIDMAILFROM"
|
||||
} else if (authStatus === MailAuthenticationStatus.MISSING_MAIL_FROM) {
|
||||
return "TMISSINGMAILFROM"
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
export const DEFAULT_IS_SPAM_CONFIDENCE = "1"
|
||||
|
||||
export function getSpamConfidence(mail: Mail): string {
|
||||
return mail.clientSpamClassifierResult?.confidence ?? DEFAULT_IS_SPAM_CONFIDENCE
|
||||
}
|
||||
|
||||
/**
|
||||
* We pick a max word frequency of 2^5 so that we can compress it together
|
||||
* with the index (which is 2^11 =2048) into two bytes
|
||||
*/
|
||||
export const MAX_WORD_FREQUENCY = 31
|
||||
export const DEFAULT_VECTOR_MAX_LENGTH = 2048
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
import { ProgrammingError } from "../../error/ProgrammingError"
|
||||
import { DEFAULT_VECTOR_MAX_LENGTH, MAX_WORD_FREQUENCY } from "./SpamMailProcessor"
|
||||
|
||||
/**
|
||||
* Example:
|
||||
*
|
||||
* const vector = [0,0,7,0,0,4,4,0,0]
|
||||
*
|
||||
* const compressedSparseVector = {
|
||||
* indices: [2, 5, 6],
|
||||
* values: [7, 4, 4]
|
||||
* }
|
||||
*/
|
||||
export type CompressedSparseVector = {
|
||||
indices: number[] // this can be UInt16 (max. 2048) (delta encoding still doesn't guarantee values would be below 256 so we cannot use it + UInt8?)
|
||||
values: number[] // values: [val, val, ...] (values are limited to [0..32] range
|
||||
}
|
||||
|
||||
/**
|
||||
* Class for compressing and decompressing sparse numerical vectors using delta encoding
|
||||
* and run-length encoding techniques. This allows efficient storage and manipulation of
|
||||
* sparse data by reducing unnecessary memory usage.
|
||||
*/
|
||||
export class SparseVectorCompressor {
|
||||
constructor(public readonly dimension: number = DEFAULT_VECTOR_MAX_LENGTH) {}
|
||||
|
||||
public vectorToBinary(vector: number[]): Uint8Array {
|
||||
const compressedSparseVector = this.compressVector(vector)
|
||||
const result: number[] = []
|
||||
result.length = compressedSparseVector.indices.length
|
||||
|
||||
for (let i = 0; i < compressedSparseVector.indices.length; i++) {
|
||||
const index = compressedSparseVector.indices[i]
|
||||
const value = compressedSparseVector.values[i]
|
||||
|
||||
result[i] = ((index & 0x7ff) << 5) | (value & 0x1f)
|
||||
}
|
||||
|
||||
return new Uint8Array(new Uint16Array(result).buffer)
|
||||
}
|
||||
|
||||
public binaryToVector(binary: Uint8Array): number[] {
|
||||
const vector = new Array(this.dimension).fill(0)
|
||||
const array = new Uint16Array(binary.buffer)
|
||||
|
||||
for (let i = 0; i < array.length; i++) {
|
||||
const packedValue = array[i]
|
||||
const index = (packedValue >> 5) & 0x7ff // Extract 11 bits for index
|
||||
const value = packedValue & 0x1f // Extract 5 bits for value
|
||||
|
||||
vector[index] = value
|
||||
}
|
||||
return vector
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a dense vector to flat sparse form: { indices, values }
|
||||
*/
|
||||
public compressVector(vector: number[]): CompressedSparseVector {
|
||||
if (vector.length > this.dimension) {
|
||||
throw new ProgrammingError("vector is too big for dimension")
|
||||
}
|
||||
const indices: number[] = []
|
||||
const values: number[] = []
|
||||
for (let i = 0; i < vector.length; i++) {
|
||||
const val = vector[i]
|
||||
if (val !== 0) {
|
||||
indices.push(i)
|
||||
values.push(Math.min(val, MAX_WORD_FREQUENCY))
|
||||
}
|
||||
}
|
||||
return { indices, values }
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
const modelInfo = {
|
||||
version: 97,
|
||||
version: 98,
|
||||
}
|
||||
|
||||
export default modelInfo
|
||||
|
|
@ -37,6 +37,8 @@ import { MoveMailDataTypeRef } from "./TypeRefs.js"
|
|||
import { MoveMailPostOutTypeRef } from "./TypeRefs.js"
|
||||
import { NewsOutTypeRef } from "./TypeRefs.js"
|
||||
import { NewsInTypeRef } from "./TypeRefs.js"
|
||||
import { PopulateClientSpamTrainingDataPostInTypeRef } from "./TypeRefs.js"
|
||||
import { ProcessInboxPostInTypeRef } from "./TypeRefs.js"
|
||||
import { ReceiveInfoServiceDataTypeRef } from "./TypeRefs.js"
|
||||
import { ReceiveInfoServicePostOutTypeRef } from "./TypeRefs.js"
|
||||
import { ReportMailPostDataTypeRef } from "./TypeRefs.js"
|
||||
|
|
@ -231,6 +233,24 @@ export const NewsService = Object.freeze({
|
|||
delete: null,
|
||||
} as const)
|
||||
|
||||
export const PopulateClientSpamTrainingDataService = Object.freeze({
|
||||
app: "tutanota",
|
||||
name: "PopulateClientSpamTrainingDataService",
|
||||
get: null,
|
||||
post: { data: PopulateClientSpamTrainingDataPostInTypeRef, return: null },
|
||||
put: null,
|
||||
delete: null,
|
||||
} as const)
|
||||
|
||||
export const ProcessInboxService = Object.freeze({
|
||||
app: "tutanota",
|
||||
name: "ProcessInboxService",
|
||||
get: null,
|
||||
post: { data: ProcessInboxPostInTypeRef, return: null },
|
||||
put: null,
|
||||
delete: null,
|
||||
} as const)
|
||||
|
||||
export const ReceiveInfoService = Object.freeze({
|
||||
app: "tutanota",
|
||||
name: "ReceiveInfoService",
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -244,6 +244,7 @@ export type Mail = {
|
|||
_ownerKeyVersion: null | NumberString;
|
||||
keyVerificationState: null | NumberString;
|
||||
processingState: NumberString;
|
||||
processNeeded: boolean;
|
||||
|
||||
sender: MailAddress;
|
||||
attachments: IdTuple[];
|
||||
|
|
@ -284,6 +285,8 @@ export type MailBox = {
|
|||
importedAttachments: Id;
|
||||
mailImportStates: Id;
|
||||
extractedFeatures: null | Id;
|
||||
clientSpamTrainingData: null | Id;
|
||||
modifiedClientSpamTrainingDataIndex: null | Id;
|
||||
}
|
||||
export const CreateExternalUserGroupDataTypeRef: TypeRef<CreateExternalUserGroupData> = new TypeRef("tutanota", 138)
|
||||
|
||||
|
|
@ -2609,3 +2612,108 @@ export type ClientClassifierResultPostIn = {
|
|||
|
||||
mails: IdTuple[];
|
||||
}
|
||||
export const ClientSpamTrainingDatumTypeRef: TypeRef<ClientSpamTrainingDatum> = new TypeRef("tutanota", 1736)
|
||||
|
||||
export function createClientSpamTrainingDatum(values: StrippedEntity<ClientSpamTrainingDatum>): ClientSpamTrainingDatum {
|
||||
return Object.assign(create(typeModels[ClientSpamTrainingDatumTypeRef.typeId], ClientSpamTrainingDatumTypeRef), values)
|
||||
}
|
||||
|
||||
export type ClientSpamTrainingDatum = {
|
||||
_type: TypeRef<ClientSpamTrainingDatum>;
|
||||
_errors: Object;
|
||||
_original?: ClientSpamTrainingDatum
|
||||
|
||||
_id: IdTuple;
|
||||
_permissions: Id;
|
||||
_format: NumberString;
|
||||
_ownerGroup: null | Id;
|
||||
_ownerEncSessionKey: null | Uint8Array;
|
||||
_ownerKeyVersion: null | NumberString;
|
||||
confidence: NumberString;
|
||||
spamDecision: NumberString;
|
||||
vector: Uint8Array;
|
||||
}
|
||||
export const ClientSpamTrainingDatumIndexEntryTypeRef: TypeRef<ClientSpamTrainingDatumIndexEntry> = new TypeRef("tutanota", 1747)
|
||||
|
||||
export function createClientSpamTrainingDatumIndexEntry(values: StrippedEntity<ClientSpamTrainingDatumIndexEntry>): ClientSpamTrainingDatumIndexEntry {
|
||||
return Object.assign(create(typeModels[ClientSpamTrainingDatumIndexEntryTypeRef.typeId], ClientSpamTrainingDatumIndexEntryTypeRef), values)
|
||||
}
|
||||
|
||||
export type ClientSpamTrainingDatumIndexEntry = {
|
||||
_type: TypeRef<ClientSpamTrainingDatumIndexEntry>;
|
||||
_original?: ClientSpamTrainingDatumIndexEntry
|
||||
|
||||
_id: IdTuple;
|
||||
_permissions: Id;
|
||||
_format: NumberString;
|
||||
_ownerGroup: null | Id;
|
||||
clientSpamTrainingDatumElementId: Id;
|
||||
}
|
||||
export const ProcessInboxDatumTypeRef: TypeRef<ProcessInboxDatum> = new TypeRef("tutanota", 1756)
|
||||
|
||||
export function createProcessInboxDatum(values: StrippedEntity<ProcessInboxDatum>): ProcessInboxDatum {
|
||||
return Object.assign(create(typeModels[ProcessInboxDatumTypeRef.typeId], ProcessInboxDatumTypeRef), values)
|
||||
}
|
||||
|
||||
export type ProcessInboxDatum = {
|
||||
_type: TypeRef<ProcessInboxDatum>;
|
||||
_original?: ProcessInboxDatum
|
||||
|
||||
_id: Id;
|
||||
ownerEncVectorSessionKey: Uint8Array;
|
||||
ownerKeyVersion: NumberString;
|
||||
classifierType: null | NumberString;
|
||||
encVector: Uint8Array;
|
||||
|
||||
mailId: IdTuple;
|
||||
targetMoveFolder: IdTuple;
|
||||
}
|
||||
export const ProcessInboxPostInTypeRef: TypeRef<ProcessInboxPostIn> = new TypeRef("tutanota", 1764)
|
||||
|
||||
export function createProcessInboxPostIn(values: StrippedEntity<ProcessInboxPostIn>): ProcessInboxPostIn {
|
||||
return Object.assign(create(typeModels[ProcessInboxPostInTypeRef.typeId], ProcessInboxPostInTypeRef), values)
|
||||
}
|
||||
|
||||
export type ProcessInboxPostIn = {
|
||||
_type: TypeRef<ProcessInboxPostIn>;
|
||||
_original?: ProcessInboxPostIn
|
||||
|
||||
_format: NumberString;
|
||||
mailOwnerGroup: Id;
|
||||
|
||||
processInboxDatum: ProcessInboxDatum[];
|
||||
}
|
||||
export const PopulateClientSpamTrainingDatumTypeRef: TypeRef<PopulateClientSpamTrainingDatum> = new TypeRef("tutanota", 1770)
|
||||
|
||||
export function createPopulateClientSpamTrainingDatum(values: StrippedEntity<PopulateClientSpamTrainingDatum>): PopulateClientSpamTrainingDatum {
|
||||
return Object.assign(create(typeModels[PopulateClientSpamTrainingDatumTypeRef.typeId], PopulateClientSpamTrainingDatumTypeRef), values)
|
||||
}
|
||||
|
||||
export type PopulateClientSpamTrainingDatum = {
|
||||
_type: TypeRef<PopulateClientSpamTrainingDatum>;
|
||||
_original?: PopulateClientSpamTrainingDatum
|
||||
|
||||
_id: Id;
|
||||
ownerEncVectorSessionKey: Uint8Array;
|
||||
ownerKeyVersion: NumberString;
|
||||
isSpam: boolean;
|
||||
confidence: NumberString;
|
||||
encVector: Uint8Array;
|
||||
|
||||
mailId: IdTuple;
|
||||
}
|
||||
export const PopulateClientSpamTrainingDataPostInTypeRef: TypeRef<PopulateClientSpamTrainingDataPostIn> = new TypeRef("tutanota", 1778)
|
||||
|
||||
export function createPopulateClientSpamTrainingDataPostIn(values: StrippedEntity<PopulateClientSpamTrainingDataPostIn>): PopulateClientSpamTrainingDataPostIn {
|
||||
return Object.assign(create(typeModels[PopulateClientSpamTrainingDataPostInTypeRef.typeId], PopulateClientSpamTrainingDataPostInTypeRef), values)
|
||||
}
|
||||
|
||||
export type PopulateClientSpamTrainingDataPostIn = {
|
||||
_type: TypeRef<PopulateClientSpamTrainingDataPostIn>;
|
||||
_original?: PopulateClientSpamTrainingDataPostIn
|
||||
|
||||
_format: NumberString;
|
||||
mailOwnerGroup: Id;
|
||||
|
||||
populateClientSpamTrainingDatum: PopulateClientSpamTrainingDatum[];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,9 +45,13 @@ export class EventController {
|
|||
// the UserController must be notified first as other event receivers depend on it to be up-to-date
|
||||
await this.logins.getUserController().entityEventsReceived(entityUpdates, eventOwnerGroupId)
|
||||
}
|
||||
// sequentially to prevent parallel loading of instances
|
||||
for (const listener of this.entityListeners) {
|
||||
await listener(entityUpdates, eventOwnerGroupId)
|
||||
// run listeners async to speed up processing
|
||||
// we ran it sequentially before to prevent parallel loading of instances
|
||||
// this should not be a problem anymore as we prefetch now
|
||||
|
||||
// noinspection ES6MissingAwait
|
||||
listener(entityUpdates, eventOwnerGroupId)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ import {
|
|||
MailService,
|
||||
ManageLabelService,
|
||||
MoveMailService,
|
||||
PopulateClientSpamTrainingDataService,
|
||||
ProcessInboxService,
|
||||
ReportMailService,
|
||||
ResolveConversationsService,
|
||||
SendDraftService,
|
||||
|
|
@ -60,6 +62,10 @@ import {
|
|||
createManageLabelServicePostIn,
|
||||
createMoveMailData,
|
||||
createNewDraftAttachment,
|
||||
createPopulateClientSpamTrainingDataPostIn,
|
||||
createPopulateClientSpamTrainingDatum,
|
||||
createProcessInboxDatum,
|
||||
createProcessInboxPostIn,
|
||||
createReportMailPostData,
|
||||
createResolveConversationsServiceGetIn,
|
||||
createSecureExternalRecipientKeyData,
|
||||
|
|
@ -81,6 +87,8 @@ import {
|
|||
MailFolder,
|
||||
MailTypeRef,
|
||||
MovedMails,
|
||||
PopulateClientSpamTrainingDatum,
|
||||
ProcessInboxDatum,
|
||||
ReportedMailFieldMarker,
|
||||
SendDraftData,
|
||||
SymEncInternalRecipientKeyData,
|
||||
|
|
@ -114,7 +122,6 @@ import {
|
|||
isNotNull,
|
||||
isSameTypeRef,
|
||||
noOp,
|
||||
Nullable,
|
||||
ofClass,
|
||||
parseUrl,
|
||||
promiseFilter,
|
||||
|
|
@ -132,6 +139,7 @@ import { UNCOMPRESSED_MAX_SIZE } from "../../Compression.js"
|
|||
import {
|
||||
Aes128Key,
|
||||
aes256RandomKey,
|
||||
aesEncrypt,
|
||||
AesKey,
|
||||
bitArrayToUint8Array,
|
||||
createAuthVerifier,
|
||||
|
|
@ -155,13 +163,16 @@ import { LoginFacade } from "../LoginFacade.js"
|
|||
import { ProgrammingError } from "../../../common/error/ProgrammingError.js"
|
||||
import { OwnerEncSessionKeyProvider } from "../../rest/EntityRestClient.js"
|
||||
import { KeyLoaderFacade, parseKeyVersion } from "../KeyLoaderFacade.js"
|
||||
import { _encryptBytes, _encryptKeyWithVersionedKey, _encryptString, VersionedKey } from "../../crypto/CryptoWrapper.js"
|
||||
import { CryptoWrapper, VersionedKey } from "../../crypto/CryptoWrapper.js"
|
||||
import { PublicEncryptionKeyProvider } from "../PublicEncryptionKeyProvider.js"
|
||||
import { EntityUpdateData, isUpdateForTypeRef } from "../../../common/utils/EntityUpdateUtils"
|
||||
import { Entity } from "../../../common/EntityTypes"
|
||||
import { KeyVerificationMismatchError } from "../../../common/error/KeyVerificationMismatchError"
|
||||
import { VerifiedPublicEncryptionKey } from "./KeyVerificationFacade"
|
||||
import { ClientClassifierType } from "../../../common/ClientClassifierType"
|
||||
import { UnencryptedProcessInboxDatum } from "../../../../../mail-app/mail/model/ProcessInboxHandler"
|
||||
import { UnencryptedPopulateClientSpamTrainingDatum } from "../../../../../mail-app/workerUtils/spamClassification/SpamClassificationDataDealer"
|
||||
import { MailWithMailDetails } from "../../../../../mail-app/workerUtils/index/BulkMailLoader"
|
||||
import { createSpamMailDatum, SpamMailProcessor } from "../../../common/utils/spamClassificationUtils/SpamMailProcessor"
|
||||
|
||||
assertWorkerOrNode()
|
||||
type Attachments = ReadonlyArray<TutanotaFile | DataFile | FileReference>
|
||||
|
|
@ -199,11 +210,13 @@ export class MailFacade {
|
|||
private phishingMarkers: Set<string> = new Set()
|
||||
private deferredDraftId: IdTuple | null = null // the mail id of the draft that we are waiting for to be updated via websocket
|
||||
private deferredDraftUpdate: Record<string, any> | null = null // this deferred promise is resolved as soon as the update of the draft is received
|
||||
private spamMailProcessor: SpamMailProcessor = new SpamMailProcessor()
|
||||
|
||||
constructor(
|
||||
private readonly userFacade: UserFacade,
|
||||
private readonly entityClient: EntityClient,
|
||||
private readonly crypto: CryptoFacade,
|
||||
private readonly cryptoWrapper: CryptoWrapper,
|
||||
private readonly serviceExecutor: IServiceExecutor,
|
||||
private readonly blobFacade: BlobFacade,
|
||||
private readonly fileApp: NativeFileApp,
|
||||
|
|
@ -216,7 +229,7 @@ export class MailFacade {
|
|||
const mailGroupKey = await this.keyLoaderFacade.getCurrentSymGroupKey(ownerGroupId)
|
||||
|
||||
const sk = aes256RandomKey()
|
||||
const ownerEncSessionKey = _encryptKeyWithVersionedKey(mailGroupKey, sk)
|
||||
const ownerEncSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(mailGroupKey, sk)
|
||||
const newFolder = createCreateMailFolderData({
|
||||
folderName: name,
|
||||
parentFolder: parent,
|
||||
|
|
@ -295,7 +308,7 @@ export class MailFacade {
|
|||
const mailGroupKey = await this.keyLoaderFacade.getCurrentSymGroupKey(senderMailGroupId)
|
||||
|
||||
const sk = aes256RandomKey()
|
||||
const ownerEncSessionKey = _encryptKeyWithVersionedKey(mailGroupKey, sk)
|
||||
const ownerEncSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(mailGroupKey, sk)
|
||||
const service = createDraftCreateData({
|
||||
previousMessageId: previousMessageId,
|
||||
conversationType: conversationType,
|
||||
|
|
@ -393,12 +406,7 @@ export class MailFacade {
|
|||
/**
|
||||
* Move mails from {@param targetFolder} except those that are in {@param excludeMailSet}.
|
||||
*/
|
||||
async moveMails(
|
||||
mails: readonly IdTuple[],
|
||||
targetFolder: IdTuple,
|
||||
excludeMailSet: IdTuple | null,
|
||||
moveReason: ClientClassifierType | null = null,
|
||||
): Promise<MovedMails[]> {
|
||||
async moveMails(mails: readonly IdTuple[], targetFolder: IdTuple, excludeMailSet: IdTuple | null): Promise<MovedMails[]> {
|
||||
if (isEmpty(mails)) {
|
||||
return []
|
||||
}
|
||||
|
|
@ -415,7 +423,7 @@ export class MailFacade {
|
|||
mails,
|
||||
excludeMailSet,
|
||||
targetFolder,
|
||||
moveReason,
|
||||
moveReason: null, // moveReason is not needed anymore from clients using TutanotaModel > 97
|
||||
}),
|
||||
)
|
||||
movedMails.push(...moveMailPostOut.movedMails)
|
||||
|
|
@ -424,11 +432,7 @@ export class MailFacade {
|
|||
return movedMails
|
||||
}
|
||||
|
||||
async simpleMoveMails(
|
||||
mails: readonly IdTuple[],
|
||||
targetFolderKind: SimpleMoveMailTarget,
|
||||
moveReason: Nullable<ClientClassifierType>,
|
||||
): Promise<MovedMails[]> {
|
||||
async simpleMoveMails(mails: readonly IdTuple[], targetFolderKind: SimpleMoveMailTarget): Promise<MovedMails[]> {
|
||||
if (isEmpty(mails)) {
|
||||
return []
|
||||
}
|
||||
|
|
@ -441,7 +445,7 @@ export class MailFacade {
|
|||
createSimpleMoveMailPostIn({
|
||||
mails,
|
||||
destinationSetType: targetFolderKind,
|
||||
moveReason,
|
||||
moveReason: null, // moveReason is not needed anymore from clients using TutanotaModel > 97
|
||||
}),
|
||||
)
|
||||
movedMails.push(...simpleMove.movedMails)
|
||||
|
|
@ -541,7 +545,7 @@ export class MailFacade {
|
|||
// forwarded attachment which was not in the draft before
|
||||
return this.crypto.resolveSessionKey(providedFile).then((fileSessionKey) => {
|
||||
const sessionKey = assertNotNull(fileSessionKey, "filesessionkey was not resolved")
|
||||
const ownerEncFileSessionKey = _encryptKeyWithVersionedKey(mailGroupKey, sessionKey)
|
||||
const ownerEncFileSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(mailGroupKey, sessionKey)
|
||||
const attachment = createDraftAttachment({
|
||||
existingFile: getLetId(providedFile),
|
||||
ownerEncFileSessionKey: ownerEncFileSessionKey.key,
|
||||
|
|
@ -571,13 +575,13 @@ export class MailFacade {
|
|||
providedFile: DataFile | FileReference,
|
||||
mailGroupKey: VersionedKey,
|
||||
): DraftAttachment {
|
||||
const ownerEncFileSessionKey = _encryptKeyWithVersionedKey(mailGroupKey, fileSessionKey)
|
||||
const ownerEncFileSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(mailGroupKey, fileSessionKey)
|
||||
return createDraftAttachment({
|
||||
newFile: createNewDraftAttachment({
|
||||
encFileName: _encryptString(fileSessionKey, providedFile.name),
|
||||
encMimeType: _encryptString(fileSessionKey, providedFile.mimeType),
|
||||
encFileName: this.cryptoWrapper.encryptString(fileSessionKey, providedFile.name),
|
||||
encMimeType: this.cryptoWrapper.encryptString(fileSessionKey, providedFile.mimeType),
|
||||
referenceTokens: referenceTokens,
|
||||
encCid: providedFile.cid == null ? null : _encryptString(fileSessionKey, providedFile.cid),
|
||||
encCid: providedFile.cid == null ? null : this.cryptoWrapper.encryptString(fileSessionKey, providedFile.cid),
|
||||
}),
|
||||
ownerEncFileSessionKey: ownerEncFileSessionKey.key,
|
||||
ownerKeyVersion: ownerEncFileSessionKey.encryptingKeyVersion.toString(),
|
||||
|
|
@ -640,7 +644,7 @@ export class MailFacade {
|
|||
|
||||
await this.addRecipientKeyData(bucketKey, sendDraftData, recipients, senderMailGroupId)
|
||||
if (this.isTutaCryptMail(sendDraftData)) {
|
||||
sendDraftData.sessionEncEncryptionAuthStatus = _encryptString(sk, EncryptionAuthStatus.TUTACRYPT_SENDER)
|
||||
sendDraftData.sessionEncEncryptionAuthStatus = this.cryptoWrapper.encryptString(sk, EncryptionAuthStatus.TUTACRYPT_SENDER)
|
||||
}
|
||||
} else {
|
||||
sendDraftData.mailSessionKey = bitArrayToUint8Array(sk)
|
||||
|
|
@ -788,7 +792,7 @@ export class MailFacade {
|
|||
const passwordKey = await this.loginFacade.deriveUserPassphraseKey({ kdfType, passphrase, salt })
|
||||
const passwordVerifier = createAuthVerifier(passwordKey)
|
||||
const externalGroupKeys = await this.getExternalGroupKeys(recipient.address, kdfType, passwordKey, passwordVerifier)
|
||||
const ownerEncBucketKey = _encryptKeyWithVersionedKey(externalGroupKeys.currentExternalMailGroupKey, bucketKey)
|
||||
const ownerEncBucketKey = this.cryptoWrapper.encryptKeyWithVersionedKey(externalGroupKeys.currentExternalMailGroupKey, bucketKey)
|
||||
const data = createSecureExternalRecipientKeyData({
|
||||
mailAddress: recipient.address,
|
||||
kdfVersion: kdfType,
|
||||
|
|
@ -968,9 +972,9 @@ export class MailFacade {
|
|||
const externalMailGroupInfoSessionKey = aes256RandomKey()
|
||||
const tutanotaPropertiesSessionKey = aes256RandomKey()
|
||||
const mailboxSessionKey = aes256RandomKey()
|
||||
const externalUserEncEntropy = _encryptBytes(currentExternalUserGroupKey.object, random.generateRandomData(32))
|
||||
const externalUserEncEntropy = this.cryptoWrapper.encryptBytes(currentExternalUserGroupKey.object, random.generateRandomData(32))
|
||||
|
||||
const internalUserEncGroupKey = _encryptKeyWithVersionedKey(internalUserGroupKey, currentExternalUserGroupKey.object)
|
||||
const internalUserEncGroupKey = this.cryptoWrapper.encryptKeyWithVersionedKey(internalUserGroupKey, currentExternalUserGroupKey.object)
|
||||
const userGroupData = createCreateExternalUserGroupData({
|
||||
mailAddress: cleanedMailAddress,
|
||||
externalPwEncUserGroupKey: encryptKey(externalUserPwKey, currentExternalUserGroupKey.object),
|
||||
|
|
@ -978,15 +982,24 @@ export class MailFacade {
|
|||
internalUserGroupKeyVersion: internalUserEncGroupKey.encryptingKeyVersion.toString(),
|
||||
})
|
||||
|
||||
const externalUserEncUserGroupInfoSessionKey = _encryptKeyWithVersionedKey(currentExternalUserGroupKey, externalUserGroupInfoSessionKey)
|
||||
const externalUserEncMailGroupKey = _encryptKeyWithVersionedKey(currentExternalUserGroupKey, currentExternalMailGroupKey.object)
|
||||
const externalUserEncTutanotaPropertiesSessionKey = _encryptKeyWithVersionedKey(currentExternalUserGroupKey, tutanotaPropertiesSessionKey)
|
||||
const externalUserEncUserGroupInfoSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(
|
||||
currentExternalUserGroupKey,
|
||||
externalUserGroupInfoSessionKey,
|
||||
)
|
||||
const externalUserEncMailGroupKey = this.cryptoWrapper.encryptKeyWithVersionedKey(currentExternalUserGroupKey, currentExternalMailGroupKey.object)
|
||||
const externalUserEncTutanotaPropertiesSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(
|
||||
currentExternalUserGroupKey,
|
||||
tutanotaPropertiesSessionKey,
|
||||
)
|
||||
|
||||
const externalMailEncMailGroupInfoSessionKey = _encryptKeyWithVersionedKey(currentExternalMailGroupKey, externalMailGroupInfoSessionKey)
|
||||
const externalMailEncMailBoxSessionKey = _encryptKeyWithVersionedKey(currentExternalMailGroupKey, mailboxSessionKey)
|
||||
const externalMailEncMailGroupInfoSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(
|
||||
currentExternalMailGroupKey,
|
||||
externalMailGroupInfoSessionKey,
|
||||
)
|
||||
const externalMailEncMailBoxSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(currentExternalMailGroupKey, mailboxSessionKey)
|
||||
|
||||
const internalMailEncUserGroupInfoSessionKey = _encryptKeyWithVersionedKey(internalMailGroupKey, externalUserGroupInfoSessionKey)
|
||||
const internalMailEncMailGroupInfoSessionKey = _encryptKeyWithVersionedKey(internalMailGroupKey, externalMailGroupInfoSessionKey)
|
||||
const internalMailEncUserGroupInfoSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(internalMailGroupKey, externalUserGroupInfoSessionKey)
|
||||
const internalMailEncMailGroupInfoSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(internalMailGroupKey, externalMailGroupInfoSessionKey)
|
||||
|
||||
const externalUserData = createExternalUserData({
|
||||
verifier,
|
||||
|
|
@ -1123,7 +1136,7 @@ export class MailFacade {
|
|||
async createLabel(mailGroupId: Id, labelData: { name: string; color: string }) {
|
||||
const mailGroupKey = await this.keyLoaderFacade.getCurrentSymGroupKey(mailGroupId)
|
||||
const sk = aes256RandomKey()
|
||||
const ownerEncSessionKey = _encryptKeyWithVersionedKey(mailGroupKey, sk)
|
||||
const ownerEncSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(mailGroupKey, sk)
|
||||
|
||||
await this.serviceExecutor.post(
|
||||
ManageLabelService,
|
||||
|
|
@ -1215,6 +1228,96 @@ export class MailFacade {
|
|||
)
|
||||
}
|
||||
|
||||
private async encryptUnencryptedProcessInboxData(
|
||||
mailGroupId: Id,
|
||||
unencryptedProcessInboxData: readonly UnencryptedProcessInboxDatum[],
|
||||
): Promise<ProcessInboxDatum[]> {
|
||||
const processInboxData: ProcessInboxDatum[] = []
|
||||
for (const unencryptedProcessInboxDatum of unencryptedProcessInboxData) {
|
||||
const mailGroupKey = await this.keyLoaderFacade.getCurrentSymGroupKey(mailGroupId)
|
||||
const sk = aes256RandomKey()
|
||||
const ownerEncSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(mailGroupKey, sk)
|
||||
const { targetMoveFolder, classifierType, mailId } = unencryptedProcessInboxDatum
|
||||
processInboxData.push(
|
||||
createProcessInboxDatum({
|
||||
ownerEncVectorSessionKey: ownerEncSessionKey.key,
|
||||
ownerKeyVersion: ownerEncSessionKey.encryptingKeyVersion.toString(),
|
||||
encVector: aesEncrypt(sk, unencryptedProcessInboxDatum.vector),
|
||||
classifierType,
|
||||
mailId,
|
||||
targetMoveFolder,
|
||||
}),
|
||||
)
|
||||
}
|
||||
return processInboxData
|
||||
}
|
||||
|
||||
async processNewMails(mailGroupId: Id, unencryptedProcessInboxData: readonly UnencryptedProcessInboxDatum[]) {
|
||||
const processInboxData = await this.encryptUnencryptedProcessInboxData(mailGroupId, unencryptedProcessInboxData)
|
||||
await promiseMap(
|
||||
splitInChunks(MAX_NBR_OF_MAILS_SYNC_OPERATION, processInboxData),
|
||||
async (inboxData) =>
|
||||
this.serviceExecutor.post(
|
||||
ProcessInboxService,
|
||||
createProcessInboxPostIn({
|
||||
mailOwnerGroup: mailGroupId,
|
||||
processInboxDatum: inboxData,
|
||||
}),
|
||||
),
|
||||
{ concurrency: 5 },
|
||||
)
|
||||
}
|
||||
|
||||
private async encryptUnencryptedPopulateClientSpamTrainingDatum(
|
||||
mailGroupId: Id,
|
||||
unencryptedPopulateClientSpamTrainingData: ReadonlyArray<UnencryptedPopulateClientSpamTrainingDatum>,
|
||||
): Promise<Array<PopulateClientSpamTrainingDatum>> {
|
||||
const populateClientSpamTrainingData: PopulateClientSpamTrainingDatum[] = []
|
||||
for (const unencryptedProcessInboxDatum of unencryptedPopulateClientSpamTrainingData) {
|
||||
const mailGroupKey = await this.keyLoaderFacade.getCurrentSymGroupKey(mailGroupId)
|
||||
const sk = aes256RandomKey()
|
||||
const ownerEncSessionKey = this.cryptoWrapper.encryptKeyWithVersionedKey(mailGroupKey, sk)
|
||||
const { isSpam, confidence, mailId } = unencryptedProcessInboxDatum
|
||||
populateClientSpamTrainingData.push(
|
||||
createPopulateClientSpamTrainingDatum({
|
||||
ownerEncVectorSessionKey: ownerEncSessionKey.key,
|
||||
ownerKeyVersion: ownerEncSessionKey.encryptingKeyVersion.toString(),
|
||||
encVector: aesEncrypt(sk, unencryptedProcessInboxDatum.vector),
|
||||
isSpam,
|
||||
mailId,
|
||||
confidence,
|
||||
}),
|
||||
)
|
||||
}
|
||||
return populateClientSpamTrainingData
|
||||
}
|
||||
|
||||
async populateClientSpamTrainingData(
|
||||
mailGroupId: Id,
|
||||
unencryptedPopulateClientSpamTrainingData: ReadonlyArray<UnencryptedPopulateClientSpamTrainingDatum>,
|
||||
) {
|
||||
const populateClientSpamTrainingData = await this.encryptUnencryptedPopulateClientSpamTrainingDatum(
|
||||
mailGroupId,
|
||||
unencryptedPopulateClientSpamTrainingData,
|
||||
)
|
||||
await promiseMap(
|
||||
splitInChunks(MAX_NBR_OF_MAILS_SYNC_OPERATION, populateClientSpamTrainingData),
|
||||
async (clientSpamTrainingData) =>
|
||||
this.serviceExecutor.post(
|
||||
PopulateClientSpamTrainingDataService,
|
||||
createPopulateClientSpamTrainingDataPostIn({
|
||||
mailOwnerGroup: mailGroupId,
|
||||
populateClientSpamTrainingDatum: clientSpamTrainingData,
|
||||
}),
|
||||
),
|
||||
{ concurrency: 5 },
|
||||
)
|
||||
}
|
||||
|
||||
async vectorizeAndCompressMails(mailWithDetails: MailWithMailDetails) {
|
||||
return this.spamMailProcessor.vectorizeAndCompress(createSpamMailDatum(mailWithDetails.mail, mailWithDetails.mailDetails))
|
||||
}
|
||||
|
||||
/** Resolve conversation list ids to the IDs of mails in those conversations. */
|
||||
async resolveConversations(conversationListIds: readonly Id[]): Promise<IdTuple[]> {
|
||||
const result = await promiseMap(
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ import { AttributeModel } from "../../common/AttributeModel"
|
|||
import { TypeModelResolver } from "../../common/EntityFunctions"
|
||||
import { collapseId, expandId } from "../rest/RestClientIdUtils"
|
||||
import { Category, syncMetrics } from "../utils/SyncMetrics"
|
||||
import { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
|
||||
/**
|
||||
* this is the value of SQLITE_MAX_VARIABLE_NUMBER in sqlite3.c
|
||||
|
|
@ -102,6 +103,7 @@ export interface OfflineDbMeta {
|
|||
"offline-version": number
|
||||
lastTrainedTime: number
|
||||
lastTrainedFromScratchTime: number
|
||||
lastTrainingDataId: Id
|
||||
}
|
||||
|
||||
export const TableDefinitions = Object.freeze({
|
||||
|
|
@ -140,6 +142,11 @@ export const TableDefinitions = Object.freeze({
|
|||
"CREATE TABLE IF NOT EXISTS blob_element_entities (type TEXT NOT NULL, listId TEXT NOT NULL, elementId TEXT NOT NULL, ownerGroup TEXT, entity BLOB NOT NULL, PRIMARY KEY (type, listId, elementId))",
|
||||
purgedWithCache: true,
|
||||
},
|
||||
spam_classification_model: {
|
||||
definition:
|
||||
"CREATE TABLE IF NOT EXISTS spam_classification_model (version NUMBER NOT NULL, ownerGroup TEXT NOT NULL, modelTopology TEXT NOT NULL, weightSpecs TEXT NOT NULL, weightData BLOB NOT NULL, hamCount NUMBER NOT NULL, spamCount NUMBER NOT NULL, PRIMARY KEY(version, ownerGroup))",
|
||||
purgedWithCache: true,
|
||||
},
|
||||
} as const) satisfies Record<string, OfflineStorageTable>
|
||||
|
||||
type Range = { lower: Id; upper: Id }
|
||||
|
|
@ -711,12 +718,12 @@ export class OfflineStorage implements CacheStorage {
|
|||
await this.putMetadata("lastUpdateTime", ms)
|
||||
}
|
||||
|
||||
async getLastTrainedTime(): Promise<number> {
|
||||
return (await this.getMetadata("lastTrainedTime")) ?? 0
|
||||
async getLastTrainingDataIndexId(): Promise<Id> {
|
||||
return (await this.getMetadata("lastTrainingDataId")) ?? GENERATED_MIN_ID
|
||||
}
|
||||
|
||||
async setLastTrainedTime(ms: number): Promise<void> {
|
||||
await this.putMetadata("lastTrainedTime", ms)
|
||||
async setLastTrainingDataIndexId(id: Id): Promise<void> {
|
||||
await this.putMetadata("lastTrainingDataId", id)
|
||||
}
|
||||
|
||||
async getLastTrainedFromScratchTime(): Promise<number> {
|
||||
|
|
@ -727,6 +734,41 @@ export class OfflineStorage implements CacheStorage {
|
|||
await this.putMetadata("lastTrainedFromScratchTime", ms)
|
||||
}
|
||||
|
||||
async setSpamClassificationModel(model: SpamClassificationModel) {
|
||||
const { query, params } = sql`INSERT
|
||||
OR REPLACE INTO
|
||||
spam_classification_model VALUES (
|
||||
${1},
|
||||
${model.ownerGroup},
|
||||
${model.modelTopology},
|
||||
${model.weightSpecs},
|
||||
${model.weightData},
|
||||
${model.hamCount},
|
||||
${model.spamCount}
|
||||
)`
|
||||
await this.sqlCipherFacade.run(query, params)
|
||||
}
|
||||
|
||||
async getSpamClassificationModel(ownerGroup: Id): Promise<Nullable<SpamClassificationModel>> {
|
||||
const { query, params } = sql`SELECT modelTopology, weightSpecs, weightData, ownerGroup, hamCount, spamCount
|
||||
FROM spam_classification_model
|
||||
WHERE version = ${1}
|
||||
AND ownerGroup = ${ownerGroup}`
|
||||
const resultRows = await this.sqlCipherFacade.get(query, params)
|
||||
if (resultRows !== null) {
|
||||
const untaggedValue = untagSqlObject(resultRows)
|
||||
return {
|
||||
modelTopology: untaggedValue.modelTopology,
|
||||
weightSpecs: untaggedValue.weightSpecs,
|
||||
weightData: untaggedValue.weightData,
|
||||
ownerGroup: untaggedValue.ownerGroup,
|
||||
hamCount: untaggedValue.hamCount,
|
||||
spamCount: untaggedValue.spamCount,
|
||||
} as SpamClassificationModel
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
async purgeStorage(): Promise<void> {
|
||||
if (this.userId == null || this.databaseKey == null) {
|
||||
console.warn("not purging storage since we don't have an open db")
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import { Nullable, TypeRef } from "@tutao/tutanota-utils"
|
|||
import { OfflineStorage, OfflineStorageInitArgs } from "../offline/OfflineStorage.js"
|
||||
import { EphemeralCacheStorage, EphemeralStorageInitArgs } from "./EphemeralCacheStorage"
|
||||
import { CustomCacheHandlerMap } from "./cacheHandler/CustomCacheHandler.js"
|
||||
import { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
|
||||
export interface EphemeralStorageArgs extends EphemeralStorageInitArgs {
|
||||
type: "ephemeral"
|
||||
|
|
@ -185,12 +186,12 @@ export class LateInitializedCacheStorageImpl implements CacheStorageLateInitiali
|
|||
return this.inner.putLastUpdateTime(value)
|
||||
}
|
||||
|
||||
setLastTrainedTime(value: number): Promise<void> {
|
||||
return this.inner.setLastTrainedTime(value)
|
||||
setLastTrainingDataIndexId(id: Id): Promise<void> {
|
||||
return this.inner.setLastTrainingDataIndexId(id)
|
||||
}
|
||||
|
||||
getLastTrainedTime(): Promise<number> {
|
||||
return this.inner.getLastTrainedTime()
|
||||
getLastTrainingDataIndexId(): Promise<Id> {
|
||||
return this.inner.getLastTrainingDataIndexId()
|
||||
}
|
||||
|
||||
setLastTrainedFromScratchTime(ms: number): Promise<void> {
|
||||
|
|
@ -201,6 +202,14 @@ export class LateInitializedCacheStorageImpl implements CacheStorageLateInitiali
|
|||
return this.inner.getLastTrainedFromScratchTime() ?? Date.now()
|
||||
}
|
||||
|
||||
setSpamClassificationModel(model: SpamClassificationModel): Promise<void> {
|
||||
return this.inner.setSpamClassificationModel(model)
|
||||
}
|
||||
|
||||
getSpamClassificationModel(ownerGroup: Id): Promise<Nullable<SpamClassificationModel>> {
|
||||
return this.inner.getSpamClassificationModel(ownerGroup)
|
||||
}
|
||||
|
||||
setLowerRangeForList<T extends ListElementEntity>(typeRef: TypeRef<T>, listId: Id, id: Id): Promise<void> {
|
||||
return this.inner.setLowerRangeForList(typeRef, listId, id)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,14 @@ import {
|
|||
UserGroupRootTypeRef,
|
||||
} from "../../entities/sys/TypeRefs.js"
|
||||
import { ValueType } from "../../common/EntityConstants.js"
|
||||
import { CalendarEventUidIndexTypeRef, MailDetailsBlobTypeRef, MailSetEntryTypeRef, MailTypeRef } from "../../entities/tutanota/TypeRefs.js"
|
||||
import {
|
||||
CalendarEventUidIndexTypeRef,
|
||||
ClientSpamTrainingDatumIndexEntryTypeRef,
|
||||
ClientSpamTrainingDatumTypeRef,
|
||||
MailDetailsBlobTypeRef,
|
||||
MailSetEntryTypeRef,
|
||||
MailTypeRef,
|
||||
} from "../../entities/tutanota/TypeRefs.js"
|
||||
import {
|
||||
CUSTOM_MAX_ID,
|
||||
CUSTOM_MIN_ID,
|
||||
|
|
@ -48,6 +55,7 @@ import { AttributeModel } from "../../common/AttributeModel"
|
|||
import { collapseId, expandId } from "./RestClientIdUtils"
|
||||
import { PatchMerger } from "../offline/PatchMerger"
|
||||
import { hasError, isExpectedErrorForSynchronization } from "../../common/utils/ErrorUtils"
|
||||
import { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
|
||||
assertWorkerOrNode()
|
||||
|
||||
|
|
@ -74,6 +82,8 @@ const IGNORED_TYPES = [
|
|||
UserGroupRootTypeRef,
|
||||
UserGroupKeyDistributionTypeRef,
|
||||
AuditLogEntryTypeRef, // Should not be part of cached data because there are errors inside entity event processing after rotating the admin group key
|
||||
ClientSpamTrainingDatumTypeRef,
|
||||
ClientSpamTrainingDatumIndexEntryTypeRef,
|
||||
] as const
|
||||
|
||||
/**
|
||||
|
|
@ -253,14 +263,18 @@ export interface CacheStorage extends ExposedCacheStorage {
|
|||
|
||||
putLastUpdateTime(value: number): Promise<void>
|
||||
|
||||
getLastTrainedTime(): Promise<number>
|
||||
getLastTrainingDataIndexId(): Promise<Id>
|
||||
|
||||
setLastTrainedTime(value: number): Promise<void>
|
||||
setLastTrainingDataIndexId(id: Id): Promise<void>
|
||||
|
||||
getLastTrainedFromScratchTime(): Promise<number>
|
||||
|
||||
setLastTrainedFromScratchTime(value: number): Promise<void>
|
||||
|
||||
getSpamClassificationModel(ownerGroup: Id): Promise<Nullable<SpamClassificationModel>>
|
||||
|
||||
setSpamClassificationModel(model: SpamClassificationModel): Promise<void>
|
||||
|
||||
getUserId(): Id
|
||||
|
||||
deleteAllOwnedBy(owner: Id): Promise<void>
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import { BlobElementEntity, Entity, ListElementEntity, ServerModelParsedInstance, SomeEntity, TypeModel } from "../../common/EntityTypes.js"
|
||||
import { customIdToBase64Url, ensureBase64Ext, firstBiggerThanSecond } from "../../common/utils/EntityUtils.js"
|
||||
import { customIdToBase64Url, ensureBase64Ext, firstBiggerThanSecond, GENERATED_MIN_ID } from "../../common/utils/EntityUtils.js"
|
||||
import { CacheStorage, LastUpdateTime } from "./DefaultEntityRestCache.js"
|
||||
import { assertNotNull, clone, filterNull, getFromMap, getTypeString, Nullable, parseTypeString, remove, TypeRef } from "@tutao/tutanota-utils"
|
||||
import { assertNotNull, clone, filterNull, getFromMap, getTypeString, newPromise, Nullable, parseTypeString, remove, TypeRef } from "@tutao/tutanota-utils"
|
||||
import { CustomCacheHandlerMap } from "./cacheHandler/CustomCacheHandler.js"
|
||||
import { Type as TypeId } from "../../common/EntityConstants.js"
|
||||
import { ProgrammingError } from "../../common/error/ProgrammingError.js"
|
||||
|
|
@ -10,6 +10,7 @@ import { ModelMapper } from "../crypto/ModelMapper"
|
|||
import { ServerTypeModelResolver } from "../../common/EntityFunctions"
|
||||
import { expandId } from "./RestClientIdUtils"
|
||||
import { hasError } from "../../common/utils/ErrorUtils"
|
||||
import { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
|
||||
/** Cache for a single list. */
|
||||
type ListCache = {
|
||||
|
|
@ -41,8 +42,9 @@ export class EphemeralCacheStorage implements CacheStorage {
|
|||
private readonly entities: Map<string, Map<Id, ServerModelParsedInstance>> = new Map()
|
||||
private readonly lists: Map<string, ListTypeCache> = new Map()
|
||||
private readonly blobEntities: Map<string, BlobElementTypeCache> = new Map()
|
||||
private readonly spamClassificationModelCache: Map<Id, SpamClassificationModel> = new Map()
|
||||
private lastUpdateTime: number | null = null
|
||||
private lastTrainedTime: number | null = null
|
||||
private lastTrainingDataId: Id = GENERATED_MIN_ID
|
||||
private lastTrainedFromScratchTime: number | null = null
|
||||
private userId: Id | null = null
|
||||
private lastBatchIdPerGroup = new Map<Id, Id>()
|
||||
|
|
@ -419,12 +421,12 @@ export class EphemeralCacheStorage implements CacheStorage {
|
|||
this.lastUpdateTime = value
|
||||
}
|
||||
|
||||
async getLastTrainedTime(): Promise<number> {
|
||||
return this.lastTrainedTime ?? 0
|
||||
async getLastTrainingDataIndexId(): Promise<Id> {
|
||||
return this.lastTrainingDataId
|
||||
}
|
||||
|
||||
async setLastTrainedTime(value: number): Promise<void> {
|
||||
this.lastTrainedTime = value
|
||||
async setLastTrainingDataIndexId(id: Id): Promise<void> {
|
||||
this.lastTrainingDataId = id
|
||||
}
|
||||
|
||||
async getLastTrainedFromScratchTime(): Promise<number> {
|
||||
|
|
@ -435,6 +437,14 @@ export class EphemeralCacheStorage implements CacheStorage {
|
|||
this.lastTrainedFromScratchTime = ms
|
||||
}
|
||||
|
||||
async setSpamClassificationModel(model: SpamClassificationModel): Promise<void> {
|
||||
this.spamClassificationModelCache.set(model.ownerGroup, model)
|
||||
}
|
||||
|
||||
async getSpamClassificationModel(ownerGroup: Id): Promise<Nullable<SpamClassificationModel>> {
|
||||
return this.spamClassificationModelCache.get(ownerGroup) ?? null
|
||||
}
|
||||
|
||||
async getWholeList<T extends ListElementEntity>(typeRef: TypeRef<T>, listId: Id): Promise<Array<T>> {
|
||||
const parsedInstances = await this.getWholeListParsed(typeRef, listId)
|
||||
return await this.modelMapper.mapToInstances(typeRef, parsedInstances)
|
||||
|
|
|
|||
|
|
@ -97,6 +97,7 @@ export class CustomColorEditorPreview implements Component {
|
|||
sets: [],
|
||||
processingState: ProcessingState.INBOX_RULE_NOT_PROCESSED,
|
||||
clientSpamClassifierResult: null,
|
||||
processNeeded: true,
|
||||
} satisfies Partial<Mail>
|
||||
const mail = createMail({
|
||||
sender: createMailAddress({
|
||||
|
|
|
|||
|
|
@ -246,17 +246,17 @@ import("./translations/en.js")
|
|||
),
|
||||
)
|
||||
mailLocator.logins.addPostLoginAction(async () => {
|
||||
const { MailIndexAndSpamClassificationPostLoginAction } = await import("./search/model/MailIndexAndSpamClassificationPostLoginAction")
|
||||
const { MailIndexerPostLoginAction } = await import("./search/model/MailIndexerPostLoginAction")
|
||||
const offlineStorageSettings = await mailLocator.offlineStorageSettingsModel()
|
||||
return new MailIndexAndSpamClassificationPostLoginAction(
|
||||
assertNotNull(offlineStorageSettings),
|
||||
mailLocator.indexerFacade,
|
||||
mailLocator.spamClassifier,
|
||||
mailLocator.customerFacade,
|
||||
)
|
||||
return new MailIndexerPostLoginAction(assertNotNull(offlineStorageSettings), mailLocator.indexerFacade)
|
||||
})
|
||||
}
|
||||
|
||||
mailLocator.logins.addPostLoginAction(async () => {
|
||||
const { SpamClassificationPostLoginAction } = await import("./mail/model/SpamClassificationPostLoginAction")
|
||||
return new SpamClassificationPostLoginAction(mailLocator.spamClassifier, mailLocator.customerFacade)
|
||||
})
|
||||
|
||||
mailLocator.logins.addPostLoginAction(async () => {
|
||||
const { OpenLocallySavedDraftAction } = await import("./mail/editor/OpenLocallySavedDraftAction.js")
|
||||
const { newMailEditorFromLocalDraftData } = await import("./mail/editor/MailEditor.js")
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { applyInboxRulesToEntries, LoadedMail, MailSetListModel, resolveMailSetEntries } from "./MailSetListModel"
|
||||
import { applyInboxRulesAndSpamPrediction, LoadedMail, MailSetListModel, resolveMailSetEntries } from "./MailSetListModel"
|
||||
import { ListLoadingState, ListState } from "../../../common/gui/base/List"
|
||||
import { Mail, MailFolder, MailFolderTypeRef, MailSetEntry, MailSetEntryTypeRef, MailTypeRef } from "../../../common/api/entities/tutanota/TypeRefs"
|
||||
import { EntityUpdateData, isUpdateForTypeRef } from "../../../common/api/common/utils/EntityUpdateUtils"
|
||||
|
|
@ -7,7 +7,6 @@ import Stream from "mithril/stream"
|
|||
import { ConversationPrefProvider } from "../view/ConversationViewModel"
|
||||
import { EntityClient } from "../../../common/api/common/EntityClient"
|
||||
import { MailModel } from "./MailModel"
|
||||
import { InboxRuleHandler } from "./InboxRuleHandler"
|
||||
import { ExposedCacheStorage } from "../../../common/api/worker/rest/DefaultEntityRestCache"
|
||||
import {
|
||||
CUSTOM_MAX_ID,
|
||||
|
|
@ -34,6 +33,7 @@ import {
|
|||
import { ListFetchResult } from "../../../common/gui/base/ListUtils"
|
||||
import { isOfflineError } from "../../../common/api/common/utils/ErrorUtils"
|
||||
import { OperationType } from "../../../common/api/common/TutanotaConstants"
|
||||
import { ProcessInboxHandler } from "./ProcessInboxHandler"
|
||||
|
||||
/**
|
||||
* Organizes mails into conversations and handles state upkeep.
|
||||
|
|
@ -67,7 +67,7 @@ export class ConversationListModel implements MailSetListModel {
|
|||
private readonly conversationPrefProvider: ConversationPrefProvider,
|
||||
private readonly entityClient: EntityClient,
|
||||
private readonly mailModel: MailModel,
|
||||
private readonly inboxRuleHandler: InboxRuleHandler,
|
||||
private readonly processInboxHandler: ProcessInboxHandler,
|
||||
private readonly cacheStorage: ExposedCacheStorage,
|
||||
) {
|
||||
this.listModel = new ListModel({
|
||||
|
|
@ -467,7 +467,7 @@ export class ConversationListModel implements MailSetListModel {
|
|||
if (mailSetEntries.length > 0) {
|
||||
this.lastFetchedMailSetEntryId = getElementId(lastThrow(mailSetEntries))
|
||||
items = await this.resolveMailSetEntries(mailSetEntries, this.defaultMailProvider)
|
||||
items = await this.applyInboxRulesToEntries(items)
|
||||
items = await this.applyInboxRulesAndSpamPrediction(items)
|
||||
}
|
||||
} catch (e) {
|
||||
if (isOfflineError(e)) {
|
||||
|
|
@ -496,8 +496,8 @@ export class ConversationListModel implements MailSetListModel {
|
|||
}
|
||||
}
|
||||
|
||||
private async applyInboxRulesToEntries(entries: LoadedMail[]): Promise<LoadedMail[]> {
|
||||
return applyInboxRulesToEntries(entries, this.mailSet, this.mailModel, this.inboxRuleHandler)
|
||||
private async applyInboxRulesAndSpamPrediction(entries: LoadedMail[]): Promise<LoadedMail[]> {
|
||||
return applyInboxRulesAndSpamPrediction(entries, this.mailSet, this.mailModel, this.processInboxHandler)
|
||||
}
|
||||
|
||||
// @VisibleForTesting
|
||||
|
|
|
|||
|
|
@ -1,72 +1,21 @@
|
|||
import { createMoveMailData, InboxRule, Mail, MailFolder, MoveMailData } from "../../../common/api/entities/tutanota/TypeRefs.js"
|
||||
import { FeatureType, InboxRuleType, MailSetKind, MAX_NBR_OF_MAILS_SYNC_OPERATION, ProcessingState } from "../../../common/api/common/TutanotaConstants"
|
||||
import { InboxRule, Mail, MailFolder } from "../../../common/api/entities/tutanota/TypeRefs.js"
|
||||
import { InboxRuleType, MailSetKind, ProcessingState } from "../../../common/api/common/TutanotaConstants"
|
||||
import { isDomainName, isRegularExpression } from "../../../common/misc/FormatValidator"
|
||||
import { assertNotNull, asyncFind, debounce, ofClass, promiseMap, splitInChunks, throttleStart } from "@tutao/tutanota-utils"
|
||||
import { assertNotNull, asyncFind, Nullable } from "@tutao/tutanota-utils"
|
||||
import { lang } from "../../../common/misc/LanguageViewModel"
|
||||
import type { MailboxDetail } from "../../../common/mailFunctionality/MailboxModel.js"
|
||||
import { LockedError, PreconditionFailedError } from "../../../common/api/common/error/RestError"
|
||||
import type { SelectorItemList } from "../../../common/gui/base/DropDownSelector.js"
|
||||
import { elementIdPart, isSameId } from "../../../common/api/common/utils/EntityUtils"
|
||||
import { assertMainOrNode, isWebClient } from "../../../common/api/common/Env"
|
||||
import { elementIdPart } from "../../../common/api/common/utils/EntityUtils"
|
||||
import { assertMainOrNode } from "../../../common/api/common/Env"
|
||||
import { MailFacade } from "../../../common/api/worker/facades/lazy/MailFacade.js"
|
||||
import { LoginController } from "../../../common/api/main/LoginController.js"
|
||||
import { getMailHeaders } from "./MailUtils.js"
|
||||
import { MailModel } from "./MailModel"
|
||||
import { UnencryptedProcessInboxDatum } from "./ProcessInboxHandler"
|
||||
import { ClientClassifierType } from "../../../common/api/common/ClientClassifierType"
|
||||
|
||||
assertMainOrNode()
|
||||
|
||||
const moveMailDataPerFolder: MoveMailData[] = []
|
||||
let noRuleMatchMailIds: IdTuple[] = []
|
||||
|
||||
const THROTTLE_MOVE_MAIL_SERVICE_REQUESTS_MS = 200
|
||||
const DEBOUNCE_CLIENT_CLASSIFIER_RESULT_SERVICE_REQUESTS_MS = 1000
|
||||
|
||||
async function sendMoveMailRequest(mailFacade: MailFacade): Promise<void> {
|
||||
if (moveMailDataPerFolder.length) {
|
||||
const moveToTargetFolder = assertNotNull(moveMailDataPerFolder.shift())
|
||||
const mailChunks = splitInChunks(MAX_NBR_OF_MAILS_SYNC_OPERATION, moveToTargetFolder.mails)
|
||||
await promiseMap(mailChunks, (mailChunk) => {
|
||||
moveToTargetFolder.mails = mailChunk
|
||||
return mailFacade.moveMails(mailChunk, moveToTargetFolder.targetFolder, null, ClientClassifierType.CUSTOMER_INBOX_RULES)
|
||||
})
|
||||
.catch(
|
||||
ofClass(LockedError, (e) => {
|
||||
//LockedError should no longer be thrown!?!
|
||||
console.log("moving mail failed", e, moveToTargetFolder)
|
||||
}),
|
||||
)
|
||||
.catch(
|
||||
ofClass(PreconditionFailedError, (e) => {
|
||||
// move mail operation may have been locked by other process
|
||||
console.log("moving mail failed", e, moveToTargetFolder)
|
||||
}),
|
||||
)
|
||||
.finally(() => {
|
||||
return processMatchingRules(mailFacade)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const processMatchingRules = throttleStart(THROTTLE_MOVE_MAIL_SERVICE_REQUESTS_MS, async (mailFacade: MailFacade) => {
|
||||
// Each target folder requires one request,
|
||||
// We debounce the requests to a rate of THROTTLE_MOVE_MAIL_SERVICE_REQUESTS_MS
|
||||
return sendMoveMailRequest(mailFacade)
|
||||
})
|
||||
|
||||
const processNotMatchingRules = debounce(
|
||||
DEBOUNCE_CLIENT_CLASSIFIER_RESULT_SERVICE_REQUESTS_MS,
|
||||
async (mailFacade: MailFacade, processingState: ProcessingState) => {
|
||||
// Each update to ClientClassifierResultService (for mails that did not move) requires one request
|
||||
// We debounce the requests to a rate of DEBOUNCE_CLIENT_CLASSIFIER_RESULT_SERVICE_REQUESTS_MS
|
||||
if (noRuleMatchMailIds.length) {
|
||||
const mailIds = noRuleMatchMailIds
|
||||
noRuleMatchMailIds = []
|
||||
return mailFacade.updateMailPredictionState(mailIds, processingState)
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
export function getInboxRuleTypeNameMapping(): SelectorItemList<string> {
|
||||
return [
|
||||
{
|
||||
|
|
@ -112,10 +61,15 @@ export class InboxRuleHandler {
|
|||
* Checks the mail for an existing inbox rule and moves the mail to the target folder of the rule.
|
||||
* @returns true if a rule matches otherwise false
|
||||
*/
|
||||
async findAndApplyMatchingRule(mailboxDetail: MailboxDetail, mail: Readonly<Mail>, applyRulesOnServer: boolean): Promise<MailFolder | null> {
|
||||
async findAndApplyMatchingRule(
|
||||
mailboxDetail: MailboxDetail,
|
||||
mail: Readonly<Mail>,
|
||||
applyRulesOnServer: boolean,
|
||||
): Promise<Nullable<{ targetFolder: MailFolder; processInboxDatum: UnencryptedProcessInboxDatum }>> {
|
||||
const shouldApply =
|
||||
mail.processingState === ProcessingState.INBOX_RULE_NOT_PROCESSED ||
|
||||
mail.processingState === ProcessingState.INBOX_RULE_NOT_PROCESSED_AND_DO_NOT_RUN_SPAM_PREDICTION
|
||||
(mail.processingState === ProcessingState.INBOX_RULE_NOT_PROCESSED ||
|
||||
mail.processingState === ProcessingState.INBOX_RULE_NOT_PROCESSED_AND_DO_NOT_RUN_SPAM_PREDICTION) &&
|
||||
mail.processNeeded
|
||||
|
||||
if (
|
||||
mail._errors ||
|
||||
|
|
@ -128,46 +82,30 @@ export class InboxRuleHandler {
|
|||
}
|
||||
|
||||
const inboxRule = await _findMatchingRule(this.mailFacade, mail, this.logins.getUserController().props.inboxRules)
|
||||
const mailDetails = await this.mailFacade.loadMailDetailsBlob(mail)
|
||||
if (inboxRule) {
|
||||
const folders = await this.mailModel.getMailboxFoldersForId(mailboxDetail.mailbox.folders._id)
|
||||
const targetFolder = folders.getFolderById(elementIdPart(inboxRule.targetFolder))
|
||||
|
||||
if (targetFolder && targetFolder.folderType !== MailSetKind.INBOX) {
|
||||
if (applyRulesOnServer) {
|
||||
let moveMailData = moveMailDataPerFolder.find((folderMoveMailData) => isSameId(folderMoveMailData.targetFolder, inboxRule.targetFolder))
|
||||
|
||||
if (moveMailData) {
|
||||
moveMailData.mails.push(mail._id)
|
||||
} else {
|
||||
moveMailData = createMoveMailData({
|
||||
targetFolder: inboxRule.targetFolder,
|
||||
mails: [mail._id],
|
||||
excludeMailSet: null,
|
||||
moveReason: ClientClassifierType.CUSTOMER_INBOX_RULES,
|
||||
})
|
||||
moveMailDataPerFolder.push(moveMailData)
|
||||
const processInboxDatum: UnencryptedProcessInboxDatum = {
|
||||
mailId: mail._id,
|
||||
targetMoveFolder: targetFolder._id,
|
||||
classifierType: ClientClassifierType.CUSTOMER_INBOX_RULES,
|
||||
vector: await this.mailFacade.vectorizeAndCompressMails({ mail, mailDetails }),
|
||||
}
|
||||
return { targetFolder, processInboxDatum }
|
||||
} else {
|
||||
// non leader client
|
||||
return null
|
||||
}
|
||||
|
||||
processMatchingRules(this.mailFacade)
|
||||
|
||||
return targetFolder
|
||||
} else {
|
||||
// target folder of inbox rule was deleted
|
||||
return null
|
||||
}
|
||||
} else {
|
||||
await this.logins.loadCustomizations()
|
||||
const isSpamClassificationFeatureEnabled = this.logins.isEnabled(FeatureType.SpamClientClassification)
|
||||
|
||||
// we set the processing state to a final state in case the feature is not enabled,
|
||||
// to not re-classify when the feature gets enabled for the user
|
||||
let processingState = isSpamClassificationFeatureEnabled
|
||||
? ProcessingState.INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_PENDING
|
||||
: ProcessingState.INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_MADE
|
||||
|
||||
noRuleMatchMailIds.push(mail._id)
|
||||
processNotMatchingRules(this.mailFacade, processingState)
|
||||
|
||||
// no inbox rule applies to the mail
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,12 +17,12 @@ import { ListLoadingState, ListState } from "../../../common/gui/base/List"
|
|||
import Stream from "mithril/stream"
|
||||
import { EntityUpdateData, isUpdateForTypeRef } from "../../../common/api/common/utils/EntityUpdateUtils"
|
||||
import { OperationType } from "../../../common/api/common/TutanotaConstants"
|
||||
import { InboxRuleHandler } from "./InboxRuleHandler"
|
||||
import { MailModel } from "./MailModel"
|
||||
import { ListFetchResult } from "../../../common/gui/base/ListUtils"
|
||||
import { isOfflineError } from "../../../common/api/common/utils/ErrorUtils"
|
||||
import { ExposedCacheStorage } from "../../../common/api/worker/rest/DefaultEntityRestCache"
|
||||
import { applyInboxRulesToEntries, LoadedMail, MailSetListModel, resolveMailSetEntries } from "./MailSetListModel"
|
||||
import { applyInboxRulesAndSpamPrediction, LoadedMail, MailSetListModel, resolveMailSetEntries } from "./MailSetListModel"
|
||||
import { ProcessInboxHandler } from "./ProcessInboxHandler"
|
||||
|
||||
assertMainOrNode()
|
||||
|
||||
|
|
@ -41,7 +41,7 @@ export class MailListModel implements MailSetListModel {
|
|||
private readonly conversationPrefProvider: ConversationPrefProvider,
|
||||
private readonly entityClient: EntityClient,
|
||||
private readonly mailModel: MailModel,
|
||||
private readonly inboxRuleHandler: InboxRuleHandler,
|
||||
private readonly processInboxHandler: ProcessInboxHandler,
|
||||
private readonly cacheStorage: ExposedCacheStorage,
|
||||
) {
|
||||
this.listModel = new ListModel({
|
||||
|
|
@ -304,7 +304,7 @@ export class MailListModel implements MailSetListModel {
|
|||
complete = mailSetEntries.length < count
|
||||
if (mailSetEntries.length > 0) {
|
||||
items = await this.resolveMailSetEntries(mailSetEntries, this.defaultMailProvider)
|
||||
items = await this.applyInboxRulesToEntries(items)
|
||||
items = await this.applyInboxRulesAndSpamPrediction(items)
|
||||
}
|
||||
} catch (e) {
|
||||
if (isOfflineError(e)) {
|
||||
|
|
@ -345,11 +345,8 @@ export class MailListModel implements MailSetListModel {
|
|||
return await this.resolveMailSetEntries(mailSetEntries, (list, elements) => this.cacheStorage.provideMultiple(MailTypeRef, list, elements))
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply inbox rules to an array of mails, returning all mails that were not moved
|
||||
*/
|
||||
private async applyInboxRulesToEntries(entries: LoadedMail[]): Promise<LoadedMail[]> {
|
||||
return applyInboxRulesToEntries(entries, this.mailSet, this.mailModel, this.inboxRuleHandler)
|
||||
private async applyInboxRulesAndSpamPrediction(entries: LoadedMail[]): Promise<LoadedMail[]> {
|
||||
return applyInboxRulesAndSpamPrediction(entries, this.mailSet, this.mailModel, this.processInboxHandler)
|
||||
}
|
||||
|
||||
private async loadSingleMail(id: IdTuple): Promise<LoadedMail> {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ import { FolderSystem } from "../../../common/api/common/mail/FolderSystem.js"
|
|||
import {
|
||||
assertNotNull,
|
||||
collectToMap,
|
||||
downcast,
|
||||
getFirstOrThrow,
|
||||
groupBy,
|
||||
groupByAndMap,
|
||||
|
|
@ -34,7 +33,6 @@ import {
|
|||
MailSetKind,
|
||||
MAX_NBR_OF_MAILS_SYNC_OPERATION,
|
||||
OperationType,
|
||||
ProcessingState,
|
||||
ReportMovedMailsType,
|
||||
SimpleMoveMailTarget,
|
||||
SystemFolderType,
|
||||
|
|
@ -49,16 +47,15 @@ import { ProgrammingError } from "../../../common/api/common/error/ProgrammingEr
|
|||
import { NotAuthorizedError, NotFoundError, PreconditionFailedError } from "../../../common/api/common/error/RestError.js"
|
||||
import { UserError } from "../../../common/api/main/UserError.js"
|
||||
import { EventController } from "../../../common/api/main/EventController.js"
|
||||
import { InboxRuleHandler } from "./InboxRuleHandler.js"
|
||||
import { WebsocketConnectivityModel } from "../../../common/misc/WebsocketConnectivityModel.js"
|
||||
import { EntityClient } from "../../../common/api/common/EntityClient.js"
|
||||
import { LoginController } from "../../../common/api/main/LoginController.js"
|
||||
import { MailFacade } from "../../../common/api/worker/facades/lazy/MailFacade.js"
|
||||
import { assertSystemFolderOfType } from "./MailUtils.js"
|
||||
import { TutanotaError } from "@tutao/tutanota-error"
|
||||
import { SpamClassificationHandler } from "./SpamClassificationHandler"
|
||||
import { isWebClient } from "../../../common/api/common/Env"
|
||||
import { isExpectedErrorForSynchronization } from "../../../common/api/common/utils/ErrorUtils"
|
||||
import { ProcessInboxHandler } from "./ProcessInboxHandler"
|
||||
import { isWebClient } from "../../../common/api/common/Env"
|
||||
|
||||
interface MailboxSets {
|
||||
folders: FolderSystem
|
||||
|
|
@ -95,8 +92,7 @@ export class MailModel {
|
|||
private readonly logins: LoginController,
|
||||
private readonly mailFacade: MailFacade,
|
||||
private readonly connectivityModel: WebsocketConnectivityModel | null,
|
||||
private spamHandler: () => SpamClassificationHandler | null,
|
||||
private readonly inboxRuleHandler: () => InboxRuleHandler | null,
|
||||
private readonly processInboxHandler: () => ProcessInboxHandler,
|
||||
) {}
|
||||
|
||||
// only init listeners once
|
||||
|
|
@ -116,12 +112,6 @@ export class MailModel {
|
|||
async init(): Promise<void> {
|
||||
this.initListeners()
|
||||
this.mailSets = await this.loadMailSets()
|
||||
|
||||
await this.logins.loadCustomizations()
|
||||
const isSpamClassificationFeatureEnabled = this.logins.isEnabled(FeatureType.SpamClientClassification)
|
||||
if (!isSpamClassificationFeatureEnabled) {
|
||||
this.spamHandler = () => null
|
||||
}
|
||||
}
|
||||
|
||||
private async loadMailSets(): Promise<Map<Id, MailboxSets>> {
|
||||
|
|
@ -194,119 +184,57 @@ export class MailModel {
|
|||
}
|
||||
|
||||
// visibleForTesting
|
||||
async entityEventsReceived(updates: ReadonlyArray<EntityUpdateData>): Promise<{ processingDone: Promise<void> }> {
|
||||
async entityEventsReceived(updates: ReadonlyArray<EntityUpdateData>): Promise<void> {
|
||||
for (const update of updates) {
|
||||
if (isUpdateForTypeRef(MailFolderTypeRef, update)) {
|
||||
await this.init()
|
||||
m.redraw()
|
||||
} else if (isUpdateForTypeRef(MailTypeRef, update) && update.operation === OperationType.UPDATE) {
|
||||
const mailId: IdTuple = [update.instanceListId, update.instanceId]
|
||||
const mail = await this.loadMail(mailId)
|
||||
if (mail == null) {
|
||||
return { processingDone: Promise.resolve() }
|
||||
}
|
||||
const spamHandler = this.spamHandler()
|
||||
await spamHandler?.updateSpamClassificationData(mail)
|
||||
} else if (isUpdateForTypeRef(MailTypeRef, update) && update.operation === OperationType.CREATE) {
|
||||
const mailId: IdTuple = [update.instanceListId, update.instanceId]
|
||||
const mail = await this.loadMail(mailId)
|
||||
if (mail == null) {
|
||||
return { processingDone: Promise.resolve() }
|
||||
return
|
||||
}
|
||||
|
||||
// If an inbox rule has been applied or a spam prediction has been made
|
||||
// we can return, because those are the two final processing states
|
||||
if (
|
||||
mail.processingState === ProcessingState.INBOX_RULE_APPLIED ||
|
||||
mail.processingState === ProcessingState.INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_MADE
|
||||
) {
|
||||
return { processingDone: Promise.resolve() }
|
||||
}
|
||||
|
||||
// The webapp currently does not support spam prediction, and the inbox rule has been processed
|
||||
if (isWebClient() && mail.processingState === ProcessingState.INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_PENDING) {
|
||||
return { processingDone: Promise.resolve() }
|
||||
if (!mail.processNeeded) {
|
||||
return
|
||||
}
|
||||
|
||||
const sourceMailFolder = this.getMailFolderForMail(mail)
|
||||
if (sourceMailFolder == null) {
|
||||
return { processingDone: Promise.resolve() }
|
||||
return
|
||||
}
|
||||
|
||||
const isLeaderClient = this.connectivityModel?.isLeader() ?? false
|
||||
if (sourceMailFolder.folderType === MailSetKind.INBOX) {
|
||||
const isInboxRuleTargetFolder = await this.getMailboxDetailsForMail(mail).then((mailboxDetail) => {
|
||||
// We only apply rules on server if we are the leader in case of incoming messages
|
||||
return mailboxDetail && this.inboxRuleHandler()?.findAndApplyMatchingRule(mailboxDetail, mail, isLeaderClient)
|
||||
})
|
||||
const mailboxDetail = await this.getMailboxDetailsForMail(mail)
|
||||
const folderSystem = this.getFolderSystemByGroupId(assertNotNull(mail._ownerGroup))
|
||||
|
||||
if (isWebClient()) {
|
||||
// we only need to show notifications explicitly on the webapp
|
||||
this._showNotification(isInboxRuleTargetFolder ?? sourceMailFolder, mail)
|
||||
} else if (this.spamHandler() != null) {
|
||||
const mailDetails = await this.mailFacade.loadMailDetailsBlob(mail)
|
||||
this.spamHandler()?.storeTrainingDatum(mail, mailDetails)
|
||||
|
||||
if (isInboxRuleTargetFolder) {
|
||||
return { processingDone: Promise.resolve() }
|
||||
} else if (
|
||||
(isLeaderClient && mail.processingState === ProcessingState.INBOX_RULE_NOT_PROCESSED) ||
|
||||
(mail.processingState === ProcessingState.INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_PENDING && mail.unread)
|
||||
) {
|
||||
const folderSystem = this.getFolderSystemByGroupId(assertNotNull(mail._ownerGroup))
|
||||
if (sourceMailFolder && folderSystem) {
|
||||
const predictPromise = this.spamHandler()?.predictSpamForNewMail(mail, mailDetails, sourceMailFolder, folderSystem)
|
||||
return { processingDone: downcast(predictPromise) }
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (sourceMailFolder.folderType === MailSetKind.SPAM) {
|
||||
const mailDetails = await this.mailFacade.loadMailDetailsBlob(mail)
|
||||
this.spamHandler()?.storeTrainingDatum(mail, mailDetails)
|
||||
|
||||
if (
|
||||
(isLeaderClient && mail.processingState === ProcessingState.INBOX_RULE_NOT_PROCESSED) ||
|
||||
mail.processingState === ProcessingState.INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_PENDING
|
||||
) {
|
||||
const folderSystem = this.getFolderSystemByGroupId(assertNotNull(mail._ownerGroup))
|
||||
if (sourceMailFolder && folderSystem) {
|
||||
const predictPromise = this.spamHandler()?.predictSpamForNewMail(mail, mailDetails, sourceMailFolder, folderSystem)
|
||||
return { processingDone: downcast(predictPromise) }
|
||||
}
|
||||
}
|
||||
let targetFolder = sourceMailFolder
|
||||
const isInternalUser = this.logins.getUserController().isInternalUser()
|
||||
if (isLeaderClient && isInternalUser && mailboxDetail && folderSystem) {
|
||||
targetFolder = await this.processInboxHandler().handleIncomingMail(mail, sourceMailFolder, mailboxDetail, folderSystem)
|
||||
}
|
||||
if (isWebClient()) {
|
||||
this._showNotification(targetFolder, mail)
|
||||
}
|
||||
} else if (isUpdateForTypeRef(MailTypeRef, update) && update.operation === OperationType.DELETE) {
|
||||
const mailId: IdTuple = [update.instanceListId, update.instanceId]
|
||||
await this.spamHandler()?.dropClassificationData(mailId)
|
||||
}
|
||||
}
|
||||
return { processingDone: Promise.resolve() }
|
||||
}
|
||||
|
||||
public async loadMail(mailId: IdTuple): Promise<Nullable<Mail>> {
|
||||
return await this.entityClient.load(MailTypeRef, mailId).catch((e) => {
|
||||
if (isExpectedErrorForSynchronization(e)) {
|
||||
console.log(`Could not find mail ${JSON.stringify(mailId)}`)
|
||||
console.log(`could not find mail ${JSON.stringify(mailId)}`)
|
||||
return null
|
||||
}
|
||||
throw e
|
||||
})
|
||||
}
|
||||
|
||||
async applyInboxRuleToMail(mail: Mail) {
|
||||
const inboxRuleHandler = this.inboxRuleHandler()
|
||||
if (inboxRuleHandler) {
|
||||
const mailboxDetail = await this.getMailboxDetailsForMail(mail)
|
||||
if (mailboxDetail) {
|
||||
return inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async getMailboxDetailsForMail(mail: Mail): Promise<MailboxDetail | null> {
|
||||
const detail = await this.mailboxModel.getMailboxDetailsForMailGroup(assertNotNull(mail._ownerGroup))
|
||||
if (detail == null) {
|
||||
console.warn("Mailbox detail for mail does not exist", mail)
|
||||
console.warn("mailboxDetail for mail does not exist", mail)
|
||||
}
|
||||
return detail
|
||||
}
|
||||
|
|
@ -314,7 +242,7 @@ export class MailModel {
|
|||
async getMailboxDetailsForMailFolder(mailFolder: MailFolder): Promise<MailboxDetail | null> {
|
||||
const detail = await this.mailboxModel.getMailboxDetailsForMailGroup(assertNotNull(mailFolder._ownerGroup))
|
||||
if (detail == null) {
|
||||
console.warn("Mailbox detail for mail folder does not exist", mailFolder)
|
||||
console.warn("mailbox detail for mail folder does not exist", mailFolder)
|
||||
}
|
||||
return detail
|
||||
}
|
||||
|
|
@ -411,7 +339,7 @@ export class MailModel {
|
|||
* @param targetMailFolderKind
|
||||
*/
|
||||
async simpleMoveMails(mails: readonly IdTuple[], targetMailFolderKind: SimpleMoveMailTarget): Promise<MovedMails[]> {
|
||||
return await this.mailFacade.simpleMoveMails(mails, targetMailFolderKind, null)
|
||||
return await this.mailFacade.simpleMoveMails(mails, targetMailFolderKind)
|
||||
}
|
||||
|
||||
getFolderExcludedFromMove(moveMode: MoveMode): SystemFolderType | null {
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ import Stream from "mithril/stream"
|
|||
import { MailModel } from "./MailModel"
|
||||
import { elementIdPart, getElementId, listIdPart } from "../../../common/api/common/utils/EntityUtils"
|
||||
import { MailSetKind } from "../../../common/api/common/TutanotaConstants"
|
||||
import { groupByAndMap, promiseFilter } from "@tutao/tutanota-utils"
|
||||
import { InboxRuleHandler } from "./InboxRuleHandler"
|
||||
import { groupByAndMap, isEmpty, promiseFilter } from "@tutao/tutanota-utils"
|
||||
import { ProcessInboxHandler } from "./ProcessInboxHandler"
|
||||
|
||||
/**
|
||||
* Interface for retrieving and listing mails
|
||||
|
|
@ -274,23 +274,30 @@ export async function provideAllMails(ids: IdTuple[], mailProvider: (listId: Id,
|
|||
}
|
||||
|
||||
/**
|
||||
* Apply inbox rules to an array of mails, returning all mails that were not moved
|
||||
* Apply inbox rules and run spam prediction on an array of mails, returning all mails that were not moved
|
||||
*/
|
||||
export async function applyInboxRulesToEntries(
|
||||
export async function applyInboxRulesAndSpamPrediction(
|
||||
entries: LoadedMail[],
|
||||
mailSet: MailFolder,
|
||||
sourceFolder: MailFolder,
|
||||
mailModel: MailModel,
|
||||
inboxRuleHandler: InboxRuleHandler,
|
||||
processInboxHandler: ProcessInboxHandler,
|
||||
): Promise<LoadedMail[]> {
|
||||
if (mailSet.folderType !== MailSetKind.INBOX || entries.length === 0) {
|
||||
if (isEmpty(entries)) {
|
||||
return entries
|
||||
}
|
||||
const mailboxDetail = await mailModel.getMailboxDetailsForMailFolder(mailSet)
|
||||
if (!(sourceFolder.folderType === MailSetKind.SPAM || sourceFolder.folderType === MailSetKind.INBOX)) {
|
||||
return entries
|
||||
}
|
||||
const mailboxDetail = await mailModel.getMailboxDetailsForMailFolder(sourceFolder)
|
||||
if (!mailboxDetail) {
|
||||
return entries
|
||||
}
|
||||
const folderSystem = mailModel.getFolderSystemByGroupId(mailboxDetail.mailGroup._id)
|
||||
if (!folderSystem) {
|
||||
return entries
|
||||
}
|
||||
return await promiseFilter(entries, async (entry) => {
|
||||
const ruleApplied = await inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, entry.mail, true)
|
||||
return ruleApplied == null
|
||||
const targetFolder = await processInboxHandler.handleIncomingMail(entry.mail, sourceFolder, mailboxDetail, folderSystem)
|
||||
return sourceFolder.folderType === targetFolder.folderType
|
||||
})
|
||||
}
|
||||
|
|
|
|||
93
src/mail-app/mail/model/ProcessInboxHandler.ts
Normal file
93
src/mail-app/mail/model/ProcessInboxHandler.ts
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
import { SpamClassificationHandler } from "./SpamClassificationHandler"
|
||||
import { InboxRuleHandler } from "./InboxRuleHandler"
|
||||
import { Mail, MailFolder, ProcessInboxDatum } from "../../../common/api/entities/tutanota/TypeRefs"
|
||||
import { FeatureType, MailSetKind } from "../../../common/api/common/TutanotaConstants"
|
||||
import { assertNotNull, debounce, Nullable } from "@tutao/tutanota-utils"
|
||||
import { MailFacade } from "../../../common/api/worker/facades/lazy/MailFacade"
|
||||
import { MailboxDetail } from "../../../common/mailFunctionality/MailboxModel"
|
||||
import { FolderSystem } from "../../../common/api/common/mail/FolderSystem"
|
||||
import { assertMainOrNode } from "../../../common/api/common/Env"
|
||||
import { StrippedEntity } from "../../../common/api/common/utils/EntityUtils"
|
||||
import { LoginController } from "../../../common/api/main/LoginController"
|
||||
|
||||
assertMainOrNode()
|
||||
|
||||
export type UnencryptedProcessInboxDatum = Omit<StrippedEntity<ProcessInboxDatum>, "encVector" | "ownerEncVectorSessionKey"> & {
|
||||
vector: Uint8Array
|
||||
}
|
||||
|
||||
const DEFAULT_DEBOUNCE_PROCESS_INBOX_SERVICE_REQUESTS_MS = 1000
|
||||
|
||||
export class ProcessInboxHandler {
|
||||
sendProcessInboxServiceRequest: (mailFacade: MailFacade) => Promise<void>
|
||||
|
||||
constructor(
|
||||
private readonly logins: LoginController,
|
||||
private readonly mailFacade: MailFacade,
|
||||
private spamHandler: () => SpamClassificationHandler,
|
||||
private readonly inboxRuleHandler: () => InboxRuleHandler,
|
||||
private processedMailsByMailGroup: Map<Id, UnencryptedProcessInboxDatum[]> = new Map(),
|
||||
private readonly debounceTimeout: number = DEFAULT_DEBOUNCE_PROCESS_INBOX_SERVICE_REQUESTS_MS,
|
||||
) {
|
||||
this.sendProcessInboxServiceRequest = debounce(this.debounceTimeout, async (mailFacade: MailFacade) => {
|
||||
// we debounce the requests to a rate of DEFAULT_DEBOUNCE_PROCESS_INBOX_SERVICE_REQUESTS_MS
|
||||
if (this.processedMailsByMailGroup.size > 0) {
|
||||
// copy map to prevent inserting into map while we await the server
|
||||
const map = this.processedMailsByMailGroup
|
||||
this.processedMailsByMailGroup = new Map()
|
||||
for (const [mailGroup, processedMails] of map) {
|
||||
// send request to server
|
||||
await mailFacade.processNewMails(mailGroup, processedMails)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
public async handleIncomingMail(mail: Mail, sourceFolder: MailFolder, mailboxDetail: MailboxDetail, folderSystem: FolderSystem): Promise<MailFolder> {
|
||||
await this.logins.loadCustomizations()
|
||||
const isSpamClassificationFeatureEnabled = this.logins.isEnabled(FeatureType.SpamClientClassification)
|
||||
if (!mail.processNeeded) {
|
||||
return sourceFolder
|
||||
}
|
||||
|
||||
const mailDetails = await this.mailFacade.loadMailDetailsBlob(mail)
|
||||
|
||||
let finalProcessInboxDatum: Nullable<UnencryptedProcessInboxDatum> = null
|
||||
let moveToFolder: MailFolder = sourceFolder
|
||||
|
||||
if (sourceFolder.folderType === MailSetKind.INBOX) {
|
||||
const result = await this.inboxRuleHandler()?.findAndApplyMatchingRule(mailboxDetail, mail, true)
|
||||
if (result) {
|
||||
const { targetFolder, processInboxDatum } = result
|
||||
finalProcessInboxDatum = processInboxDatum
|
||||
moveToFolder = targetFolder
|
||||
}
|
||||
}
|
||||
|
||||
if (finalProcessInboxDatum === null) {
|
||||
if (isSpamClassificationFeatureEnabled) {
|
||||
const { targetFolder, processInboxDatum } = await this.spamHandler().predictSpamForNewMail(mail, mailDetails, sourceFolder, folderSystem)
|
||||
moveToFolder = targetFolder
|
||||
finalProcessInboxDatum = processInboxDatum
|
||||
} else {
|
||||
finalProcessInboxDatum = {
|
||||
mailId: mail._id,
|
||||
targetMoveFolder: moveToFolder._id,
|
||||
classifierType: null,
|
||||
vector: await this.mailFacade.vectorizeAndCompressMails({ mail, mailDetails }),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const mailGroupId = assertNotNull(mail._ownerGroup)
|
||||
if (this.processedMailsByMailGroup.has(mailGroupId)) {
|
||||
this.processedMailsByMailGroup.get(mailGroupId)?.push(finalProcessInboxDatum)
|
||||
} else {
|
||||
this.processedMailsByMailGroup.set(mailGroupId, [finalProcessInboxDatum])
|
||||
}
|
||||
|
||||
// noinspection ES6MissingAwait
|
||||
this.sendProcessInboxServiceRequest(this.mailFacade)
|
||||
return moveToFolder
|
||||
}
|
||||
}
|
||||
|
|
@ -1,186 +1,42 @@
|
|||
import { createMoveMailData, Mail, MailAddress, MailDetails, MailFolder, MoveMailData } from "../../../common/api/entities/tutanota/TypeRefs"
|
||||
import {
|
||||
DEFAULT_IS_SPAM,
|
||||
DEFAULT_IS_SPAM_CONFIDENCE,
|
||||
getSpamConfidence,
|
||||
MailAuthenticationStatus,
|
||||
MailSetKind,
|
||||
ProcessingState,
|
||||
SpamDecision,
|
||||
} from "../../../common/api/common/TutanotaConstants"
|
||||
import { SpamClassifier, SpamPredMailDatum, SpamTrainMailDatum } from "../../workerUtils/spamClassification/SpamClassifier"
|
||||
import { getMailBodyText } from "../../../common/api/common/CommonMailUtils"
|
||||
import { assertNotNull, debounce, isNotNull, Nullable, ofClass } from "@tutao/tutanota-utils"
|
||||
import { MailFacade } from "../../../common/api/worker/facades/lazy/MailFacade"
|
||||
import { ClientClassifierType } from "../../../common/api/common/ClientClassifierType"
|
||||
import { Mail, MailDetails, MailFolder } from "../../../common/api/entities/tutanota/TypeRefs"
|
||||
import { MailSetKind } from "../../../common/api/common/TutanotaConstants"
|
||||
import { SpamClassifier } from "../../workerUtils/spamClassification/SpamClassifier"
|
||||
import { assertNotNull } from "@tutao/tutanota-utils"
|
||||
import { FolderSystem } from "../../../common/api/common/mail/FolderSystem"
|
||||
import { LockedError, PreconditionFailedError } from "../../../common/api/common/error/RestError"
|
||||
import { assertMainOrNode } from "../../../common/api/common/Env"
|
||||
import { UnencryptedProcessInboxDatum } from "./ProcessInboxHandler"
|
||||
import { ClientClassifierType } from "../../../common/api/common/ClientClassifierType"
|
||||
import { createSpamMailDatum } from "../../../common/api/common/utils/spamClassificationUtils/SpamMailProcessor"
|
||||
import { isSameId } from "../../../common/api/common/utils/EntityUtils"
|
||||
|
||||
const DEBOUNCE_MOVE_MAIL_SERVICE_REQUESTS_MS = 500
|
||||
const DEBOUNCE_CLIENT_CLASSIFIER_RESULT_SERVICE_REQUESTS_MS = 1000
|
||||
assertMainOrNode()
|
||||
|
||||
export class SpamClassificationHandler {
|
||||
public constructor(
|
||||
private readonly mailFacade: MailFacade,
|
||||
private readonly spamClassifier: Nullable<SpamClassifier>,
|
||||
) {}
|
||||
public constructor(private readonly spamClassifier: SpamClassifier) {}
|
||||
|
||||
hamMoveMailData: MoveMailData | null = null
|
||||
spamMoveMailData: MoveMailData | null = null
|
||||
classifierResultServiceMailIds: IdTuple[] = []
|
||||
public async predictSpamForNewMail(
|
||||
mail: Mail,
|
||||
mailDetails: MailDetails,
|
||||
sourceFolder: MailFolder,
|
||||
folderSystem: FolderSystem,
|
||||
): Promise<{ targetFolder: MailFolder; processInboxDatum: UnencryptedProcessInboxDatum }> {
|
||||
const spamMailDatum = createSpamMailDatum(mail, mailDetails)
|
||||
|
||||
sendClassifierResultServiceRequest = debounce(DEBOUNCE_CLIENT_CLASSIFIER_RESULT_SERVICE_REQUESTS_MS, async (mailFacade: MailFacade) => {
|
||||
// Each update to ClientClassifierResultService (for mails that did not move) requires one request
|
||||
// We debounce the requests to a rate of DEBOUNCE_CLIENT_CLASSIFIER_RESULT_SERVICE_REQUESTS_MS
|
||||
if (this.classifierResultServiceMailIds.length) {
|
||||
const mailIds = this.classifierResultServiceMailIds
|
||||
this.classifierResultServiceMailIds = []
|
||||
return mailFacade.updateMailPredictionState(mailIds, ProcessingState.INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_MADE)
|
||||
}
|
||||
})
|
||||
|
||||
sendMoveMailServiceRequest = debounce(DEBOUNCE_MOVE_MAIL_SERVICE_REQUESTS_MS, async (mailFacade: MailFacade) => {
|
||||
// Each update to MoveMailService (for ham or spam mails that did move) requires one request
|
||||
// We debounce the requests to a rate of DEBOUNCE_MOVE_MAIL_SERVICE_REQUESTS_MS
|
||||
if (this.hamMoveMailData) {
|
||||
const moveMailData = this.hamMoveMailData
|
||||
this.hamMoveMailData = null
|
||||
await this.sendMoveMailRequest(mailFacade, moveMailData)
|
||||
}
|
||||
if (this.spamMoveMailData) {
|
||||
const moveMailData = this.spamMoveMailData
|
||||
this.spamMoveMailData = null
|
||||
await this.sendMoveMailRequest(mailFacade, moveMailData)
|
||||
}
|
||||
})
|
||||
|
||||
async sendMoveMailRequest(mailFacade: MailFacade, moveMailData: MoveMailData): Promise<void> {
|
||||
mailFacade
|
||||
.moveMails(moveMailData.mails, moveMailData.targetFolder, null, ClientClassifierType.CLIENT_CLASSIFICATION)
|
||||
.catch(
|
||||
ofClass(LockedError, (e) => {
|
||||
// LockedError should no longer be thrown!?!
|
||||
console.log("moving mails failed", e, moveMailData.targetFolder)
|
||||
}),
|
||||
)
|
||||
.catch(
|
||||
ofClass(PreconditionFailedError, (e) => {
|
||||
// move mail operation may have been locked by other process
|
||||
console.log("moving mails failed", e, moveMailData.targetFolder)
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
public async predictSpamForNewMail(mail: Mail, mailDetails: MailDetails, sourceFolder: MailFolder, folderSystem: FolderSystem): Promise<MailFolder> {
|
||||
const spamPredMailDatum: SpamPredMailDatum = {
|
||||
subject: mail.subject,
|
||||
body: getMailBodyText(mailDetails.body),
|
||||
ownerGroup: assertNotNull(mail._ownerGroup),
|
||||
...extractSpamHeaderFeatures(mail, mailDetails),
|
||||
}
|
||||
const isSpam = (await this.spamClassifier?.predict(spamPredMailDatum)) ?? null
|
||||
const vectorizedMail = await this.spamClassifier.vectorize(spamMailDatum)
|
||||
const isSpam = (await this.spamClassifier.predict(vectorizedMail, spamMailDatum.ownerGroup)) ?? null
|
||||
|
||||
let targetFolder = sourceFolder
|
||||
if (isSpam && sourceFolder.folderType === MailSetKind.INBOX) {
|
||||
const spamFolder = assertNotNull(folderSystem.getSystemFolderByType(MailSetKind.SPAM))
|
||||
if (this.spamMoveMailData) {
|
||||
this.spamMoveMailData.mails.push(mail._id)
|
||||
} else {
|
||||
this.spamMoveMailData = createMoveMailData({
|
||||
targetFolder: spamFolder?._id,
|
||||
mails: [mail._id],
|
||||
excludeMailSet: null,
|
||||
moveReason: ClientClassifierType.CLIENT_CLASSIFICATION,
|
||||
})
|
||||
}
|
||||
await this.sendMoveMailServiceRequest(this.mailFacade)
|
||||
return spamFolder
|
||||
targetFolder = assertNotNull(folderSystem.getSystemFolderByType(MailSetKind.SPAM))
|
||||
} else if (!isSpam && sourceFolder.folderType === MailSetKind.SPAM) {
|
||||
const hamFolder = assertNotNull(folderSystem.getSystemFolderByType(MailSetKind.INBOX))
|
||||
if (this.hamMoveMailData) {
|
||||
this.hamMoveMailData.mails.push(mail._id)
|
||||
} else {
|
||||
this.hamMoveMailData = createMoveMailData({
|
||||
targetFolder: hamFolder?._id,
|
||||
mails: [mail._id],
|
||||
excludeMailSet: null,
|
||||
moveReason: ClientClassifierType.CLIENT_CLASSIFICATION,
|
||||
})
|
||||
}
|
||||
await this.sendMoveMailServiceRequest(this.mailFacade)
|
||||
return hamFolder
|
||||
} else if (mail.processingState !== ProcessingState.INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_MADE) {
|
||||
this.classifierResultServiceMailIds.push(mail._id)
|
||||
await this.sendClassifierResultServiceRequest(this.mailFacade)
|
||||
return sourceFolder
|
||||
} else {
|
||||
return sourceFolder
|
||||
targetFolder = assertNotNull(folderSystem.getSystemFolderByType(MailSetKind.INBOX))
|
||||
}
|
||||
}
|
||||
|
||||
public async updateSpamClassificationData(mail: Mail) {
|
||||
if (this.spamClassifier == null || mail.clientSpamClassifierResult == null) {
|
||||
return
|
||||
}
|
||||
const storedClassification = await this.spamClassifier.getSpamClassification(mail._id)
|
||||
const isSpam = mail.clientSpamClassifierResult.spamDecision === SpamDecision.BLACKLIST
|
||||
const isSpamConfidence = getSpamConfidence(mail)
|
||||
|
||||
if (isNotNull(storedClassification) && (isSpam !== storedClassification.isSpam || isSpamConfidence !== storedClassification.isSpamConfidence)) {
|
||||
// the model has trained on the mail but the spamFlag was wrong so we refit with higher isSpamConfidence
|
||||
await this.spamClassifier.updateSpamClassification(mail._id, isSpam, isSpamConfidence)
|
||||
}
|
||||
}
|
||||
|
||||
public async dropClassificationData(mailId: IdTuple) {
|
||||
await this.spamClassifier?.deleteSpamClassification(mailId)
|
||||
}
|
||||
|
||||
public async storeTrainingDatum(mail: Mail, mailDetails: MailDetails) {
|
||||
const spamTrainMailDatum: SpamTrainMailDatum = {
|
||||
const processInboxDatum: UnencryptedProcessInboxDatum = {
|
||||
mailId: mail._id,
|
||||
subject: mail.subject,
|
||||
body: getMailBodyText(mailDetails.body),
|
||||
isSpam: DEFAULT_IS_SPAM,
|
||||
isSpamConfidence: DEFAULT_IS_SPAM_CONFIDENCE,
|
||||
ownerGroup: assertNotNull(mail._ownerGroup),
|
||||
...extractSpamHeaderFeatures(mail, mailDetails),
|
||||
targetMoveFolder: targetFolder._id,
|
||||
classifierType: isSameId(targetFolder._id, sourceFolder._id) ? null : ClientClassifierType.CLIENT_CLASSIFICATION,
|
||||
vector: await this.spamClassifier.vectorizeAndCompress(spamMailDatum),
|
||||
}
|
||||
await this.spamClassifier?.storeSpamClassification(spamTrainMailDatum)
|
||||
return { targetFolder, processInboxDatum: processInboxDatum }
|
||||
}
|
||||
}
|
||||
|
||||
export function extractSpamHeaderFeatures(mail: Mail, mailDetails: MailDetails) {
|
||||
const sender = joinNamesAndMailAddresses([mail?.sender])
|
||||
const { toRecipients, ccRecipients, bccRecipients } = extractRecipients(mailDetails)
|
||||
const authStatus = convertAuthStatusToSpamCategorizationToken(mail.authStatus)
|
||||
|
||||
return { sender, toRecipients, ccRecipients, bccRecipients, authStatus }
|
||||
}
|
||||
|
||||
function extractRecipients({ recipients }: MailDetails) {
|
||||
const toRecipients = joinNamesAndMailAddresses(recipients?.toRecipients)
|
||||
const ccRecipients = joinNamesAndMailAddresses(recipients?.ccRecipients)
|
||||
const bccRecipients = joinNamesAndMailAddresses(recipients?.bccRecipients)
|
||||
|
||||
return { toRecipients, ccRecipients, bccRecipients }
|
||||
}
|
||||
|
||||
function joinNamesAndMailAddresses(recipients: MailAddress[] | null) {
|
||||
return recipients?.map((recipient) => `${recipient?.name} ${recipient?.address}`).join(" ") || ""
|
||||
}
|
||||
|
||||
function convertAuthStatusToSpamCategorizationToken(authStatus: string | null): string {
|
||||
if (authStatus === MailAuthenticationStatus.AUTHENTICATED) {
|
||||
return "TAUTHENTICATED"
|
||||
} else if (authStatus === MailAuthenticationStatus.HARD_FAIL) {
|
||||
return "THARDFAIL"
|
||||
} else if (authStatus === MailAuthenticationStatus.SOFT_FAIL) {
|
||||
return "TSOFTFAIL"
|
||||
} else if (authStatus === MailAuthenticationStatus.INVALID_MAIL_FROM) {
|
||||
return "TINVALIDMAILFROM"
|
||||
} else if (authStatus === MailAuthenticationStatus.MISSING_MAIL_FROM) {
|
||||
return "TMISSINGMAILFROM"
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
|
|
|||
33
src/mail-app/mail/model/SpamClassificationPostLoginAction.ts
Normal file
33
src/mail-app/mail/model/SpamClassificationPostLoginAction.ts
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import { LoggedInEvent, PostLoginAction } from "../../../common/api/main/LoginController"
|
||||
import { SpamClassifier } from "../../workerUtils/spamClassification/SpamClassifier"
|
||||
import { FeatureType } from "../../../common/api/common/TutanotaConstants"
|
||||
import { CustomerFacade } from "../../../common/api/worker/facades/lazy/CustomerFacade"
|
||||
import { filterMailMemberships } from "../../../common/api/common/utils/IndexUtils"
|
||||
import { assertNotNull } from "@tutao/tutanota-utils"
|
||||
import { isInternalUser } from "../../../common/api/common/utils/UserUtils"
|
||||
|
||||
/**
|
||||
* Initialize SpamClassifier if FeatureType.SpamClientClassification feature is enabled for the customer.
|
||||
*/
|
||||
export class SpamClassificationPostLoginAction implements PostLoginAction {
|
||||
constructor(
|
||||
private readonly spamClassifier: SpamClassifier,
|
||||
private readonly customerFacade: CustomerFacade,
|
||||
) {}
|
||||
|
||||
async onPartialLoginSuccess(_: LoggedInEvent): Promise<void> {}
|
||||
|
||||
async onFullLoginSuccess(_: LoggedInEvent): Promise<void> {
|
||||
await this.customerFacade.loadCustomizations()
|
||||
const isSpamClassificationEnabled = await this.customerFacade.isEnabled(FeatureType.SpamClientClassification)
|
||||
const user = assertNotNull(await this.customerFacade.getUser())
|
||||
if (isSpamClassificationEnabled && isInternalUser(user) && this.spamClassifier) {
|
||||
const ownerGroups = filterMailMemberships(user)
|
||||
for (const ownerGroup of ownerGroups) {
|
||||
this.spamClassifier.initialize(ownerGroup.group).catch((e) => {
|
||||
console.log(`failed to initialize spam classification model for group: ${ownerGroup.group}`, e)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -26,7 +26,6 @@ import { NotAuthorizedError, NotFoundError, PreconditionFailedError } from "../.
|
|||
import { UserError } from "../../../common/api/main/UserError.js"
|
||||
import { ProgrammingError } from "../../../common/api/common/error/ProgrammingError.js"
|
||||
import Stream from "mithril/stream"
|
||||
import { InboxRuleHandler } from "../model/InboxRuleHandler.js"
|
||||
import { Router } from "../../../common/gui/ScopedRouter.js"
|
||||
import { EntityUpdateData, isUpdateForTypeRef, PrefetchStatus } from "../../../common/api/common/utils/EntityUpdateUtils.js"
|
||||
import { EventController } from "../../../common/api/main/EventController.js"
|
||||
|
|
@ -40,6 +39,7 @@ import { MailSetListModel } from "../model/MailSetListModel"
|
|||
import { ConversationListModel } from "../model/ConversationListModel"
|
||||
import { MailListDisplayMode } from "../../../common/misc/DeviceConfig"
|
||||
import { client } from "../../../common/misc/ClientDetector"
|
||||
import { ProcessInboxHandler } from "../model/ProcessInboxHandler"
|
||||
|
||||
export interface MailOpenedListener {
|
||||
onEmailOpened(mail: Mail): unknown
|
||||
|
|
@ -98,7 +98,7 @@ export class MailViewModel {
|
|||
private readonly conversationViewModelFactory: ConversationViewModelFactory,
|
||||
private readonly mailOpenedListener: MailOpenedListener,
|
||||
private readonly conversationPrefProvider: ConversationPrefProvider,
|
||||
private readonly inboxRuleHandler: InboxRuleHandler,
|
||||
private readonly processInboxHandler: ProcessInboxHandler,
|
||||
private readonly router: Router,
|
||||
private readonly updateUi: () => unknown,
|
||||
) {}
|
||||
|
|
@ -258,8 +258,6 @@ export class MailViewModel {
|
|||
return
|
||||
}
|
||||
if (cached) {
|
||||
// Mails opened through the notification were not getting the inbox rule applied to them, so we apply it here
|
||||
this.mailModel.applyInboxRuleToMail(cached)
|
||||
console.log(TAG, "displaying cached mail", mailId)
|
||||
await this.displayExplicitMailTarget(cached)
|
||||
}
|
||||
|
|
@ -526,7 +524,7 @@ export class MailViewModel {
|
|||
this.conversationPrefProvider,
|
||||
this.entityClient,
|
||||
this.mailModel,
|
||||
this.inboxRuleHandler,
|
||||
this.processInboxHandler,
|
||||
this.cacheStorage,
|
||||
)
|
||||
} else {
|
||||
|
|
@ -535,7 +533,7 @@ export class MailViewModel {
|
|||
this.conversationPrefProvider,
|
||||
this.entityClient,
|
||||
this.mailModel,
|
||||
this.inboxRuleHandler,
|
||||
this.processInboxHandler,
|
||||
this.cacheStorage,
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -156,6 +156,7 @@ import { AutosaveFacade } from "../common/api/worker/facades/lazy/AutosaveFacade
|
|||
import { lang } from "../common/misc/LanguageViewModel.js"
|
||||
import { SpamClassificationHandler } from "./mail/model/SpamClassificationHandler"
|
||||
import { SpamClassifier } from "./workerUtils/spamClassification/SpamClassifier"
|
||||
import { ProcessInboxHandler } from "./mail/model/ProcessInboxHandler"
|
||||
import type { QuickActionsModel } from "../common/misc/quickactions/QuickActionsModel"
|
||||
|
||||
assertMainOrNode()
|
||||
|
|
@ -223,7 +224,7 @@ class MailLocator implements CommonLocator {
|
|||
bulkMailLoader!: BulkMailLoader
|
||||
mailExportFacade!: MailExportFacade
|
||||
syncTracker!: SyncTracker
|
||||
spamClassifier: SpamClassifier | null = null
|
||||
spamClassifier!: SpamClassifier
|
||||
whitelabelThemeGenerator!: WhitelabelThemeGenerator
|
||||
autosaveFacade!: AutosaveFacade
|
||||
|
||||
|
|
@ -287,7 +288,7 @@ class MailLocator implements CommonLocator {
|
|||
conversationViewModelFactory,
|
||||
this.mailOpenedListener,
|
||||
deviceConfig,
|
||||
this.inboxRuleHandler(),
|
||||
this.processInboxHandler(),
|
||||
router,
|
||||
await this.redraw(),
|
||||
)
|
||||
|
|
@ -303,7 +304,11 @@ class MailLocator implements CommonLocator {
|
|||
})
|
||||
|
||||
readonly spamClassificationHandler = lazyMemoized(() => {
|
||||
return new SpamClassificationHandler(this.mailFacade, this.spamClassifier)
|
||||
return new SpamClassificationHandler(this.spamClassifier)
|
||||
})
|
||||
|
||||
readonly processInboxHandler = lazyMemoized(() => {
|
||||
return new ProcessInboxHandler(this.logins, this.mailFacade, this.spamClassificationHandler, this.inboxRuleHandler)
|
||||
})
|
||||
|
||||
async searchViewModelFactory(): Promise<() => SearchViewModel> {
|
||||
|
|
@ -847,8 +852,7 @@ class MailLocator implements CommonLocator {
|
|||
this.logins,
|
||||
this.mailFacade,
|
||||
this.connectivityModel,
|
||||
this.spamClassificationHandler,
|
||||
this.inboxRuleHandler,
|
||||
this.processInboxHandler,
|
||||
)
|
||||
this.operationProgressTracker = new OperationProgressTracker()
|
||||
this.infoMessageHandler = new InfoMessageHandler((state: SearchIndexStateInfo) => {
|
||||
|
|
@ -879,6 +883,7 @@ class MailLocator implements CommonLocator {
|
|||
this.usageTestController = new UsageTestController(this.usageTestModel)
|
||||
this.Const = Const
|
||||
this.whitelabelThemeGenerator = new WhitelabelThemeGenerator()
|
||||
this.spamClassifier = spamClassifier
|
||||
if (!isBrowser()) {
|
||||
const { WebDesktopFacade } = await import("../common/native/main/WebDesktopFacade")
|
||||
const { WebMobileFacade } = await import("../common/native/main/WebMobileFacade.js")
|
||||
|
|
@ -895,10 +900,9 @@ class MailLocator implements CommonLocator {
|
|||
return await this.calendarEventModel(mode, getEventWithDefaultTimes(setNextHalfHour(new Date(date))), mailboxDetail, mailboxProperties, null)
|
||||
})
|
||||
const { OpenSettingsHandler } = await import("../common/native/main/OpenSettingsHandler.js")
|
||||
const openSettingsHandler = new OpenSettingsHandler(this.logins)
|
||||
|
||||
const openSettingsHandler = new OpenSettingsHandler(this.logins)
|
||||
this.webMobileFacade = new WebMobileFacade(this.connectivityModel, MAIL_PREFIX)
|
||||
this.spamClassifier = spamClassifier
|
||||
|
||||
this.nativeInterfaces = createNativeInterfaces(
|
||||
this.webMobileFacade,
|
||||
|
|
|
|||
|
|
@ -1,47 +0,0 @@
|
|||
import { LoggedInEvent, PostLoginAction } from "../../../common/api/main/LoginController"
|
||||
import { OfflineStorageSettingsModel } from "../../../common/offline/OfflineStorageSettingsModel"
|
||||
import { Indexer } from "../../workerUtils/index/Indexer"
|
||||
import { SessionType } from "../../../common/api/common/SessionType"
|
||||
import { SpamClassifier } from "../../workerUtils/spamClassification/SpamClassifier"
|
||||
import { FeatureType } from "../../../common/api/common/TutanotaConstants"
|
||||
import { CustomerFacade } from "../../../common/api/worker/facades/lazy/CustomerFacade"
|
||||
import { filterMailMemberships } from "../../../common/api/common/utils/IndexUtils"
|
||||
import { assertNotNull } from "@tutao/tutanota-utils"
|
||||
|
||||
/**
|
||||
* The search range is tied to the offline storage settings.
|
||||
* This updates the mail index on full login.
|
||||
* And also initialize spamClassification if enabled
|
||||
*/
|
||||
export class MailIndexAndSpamClassificationPostLoginAction implements PostLoginAction {
|
||||
constructor(
|
||||
private readonly offlineStorageSettings: OfflineStorageSettingsModel,
|
||||
private readonly indexer: Indexer,
|
||||
private readonly spamClassifier: SpamClassifier | null,
|
||||
private readonly customerFacade: CustomerFacade,
|
||||
) {}
|
||||
|
||||
async onPartialLoginSuccess(event: LoggedInEvent): Promise<void> {
|
||||
if (event.sessionType === SessionType.Persistent) {
|
||||
await this.offlineStorageSettings.init()
|
||||
// noinspection ES6MissingAwait
|
||||
this.indexer.resizeMailIndex(this.offlineStorageSettings.getTimeRange().getTime()).then(async () => {
|
||||
// spamClassification
|
||||
// Wait until indexing is done, as its populate offlineDb
|
||||
|
||||
await this.customerFacade.loadCustomizations()
|
||||
if (this.spamClassifier && (await this.customerFacade.isEnabled(FeatureType.SpamClientClassification))) {
|
||||
const ownerGroups = filterMailMemberships(assertNotNull(await this.customerFacade.getUser()))
|
||||
for (const ownerGroup of ownerGroups) {
|
||||
this.spamClassifier.initialize(ownerGroup.group).catch((e) => {
|
||||
console.log(`Failed to initialize spam classification model for group: ${ownerGroup._id}::${ownerGroup.group}. With reason:`)
|
||||
console.log(e)
|
||||
})
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async onFullLoginSuccess(_: LoggedInEvent): Promise<void> {}
|
||||
}
|
||||
25
src/mail-app/search/model/MailIndexerPostLoginAction.ts
Normal file
25
src/mail-app/search/model/MailIndexerPostLoginAction.ts
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import { LoggedInEvent, PostLoginAction } from "../../../common/api/main/LoginController"
|
||||
import { OfflineStorageSettingsModel } from "../../../common/offline/OfflineStorageSettingsModel"
|
||||
import { Indexer } from "../../workerUtils/index/Indexer"
|
||||
import { SessionType } from "../../../common/api/common/SessionType"
|
||||
|
||||
/**
|
||||
* The search range is tied to the offline storage settings.
|
||||
* This updates the mail index on full login.
|
||||
*/
|
||||
export class MailIndexerPostLoginAction implements PostLoginAction {
|
||||
constructor(
|
||||
private readonly offlineStorageSettings: OfflineStorageSettingsModel,
|
||||
private readonly indexer: Indexer,
|
||||
) {}
|
||||
|
||||
async onPartialLoginSuccess(event: LoggedInEvent): Promise<void> {
|
||||
if (event.sessionType === SessionType.Persistent) {
|
||||
await this.offlineStorageSettings.init()
|
||||
// noinspection ES6MissingAwait
|
||||
this.indexer.resizeMailIndex(this.offlineStorageSettings.getTimeRange().getTime())
|
||||
}
|
||||
}
|
||||
|
||||
async onFullLoginSuccess(_: LoggedInEvent): Promise<void> {}
|
||||
}
|
||||
|
|
@ -10,8 +10,6 @@ import { htmlToText } from "../../../common/api/common/utils/IndexUtils"
|
|||
import { getMailBodyText } from "../../../common/api/common/CommonMailUtils"
|
||||
import { ListElementEntity } from "../../../common/api/common/EntityTypes"
|
||||
import type { OfflineStorageTable } from "../../../common/api/worker/offline/OfflineStorage"
|
||||
import { SpamClassificationModel, SpamTrainMailDatum } from "../spamClassification/SpamClassifier"
|
||||
import { Nullable } from "@tutao/tutanota-utils/dist/Utils"
|
||||
|
||||
export const SearchTableDefinitions: Record<string, OfflineStorageTable> = Object.freeze({
|
||||
search_group_data: {
|
||||
|
|
@ -66,26 +64,6 @@ export const SearchTableDefinitions: Record<string, OfflineStorageTable> = Objec
|
|||
},
|
||||
})
|
||||
|
||||
export const SpamClassificationDefinitions: Record<string, OfflineStorageTable> = Object.freeze({
|
||||
spam_classification_training_data: {
|
||||
definition:
|
||||
"CREATE TABLE IF NOT EXISTS spam_classification_training_data (listId TEXT NOT NULL, elementId TEXT NOT NULL," +
|
||||
"ownerGroup TEXT NOT NULL, subject TEXT NOT NULL, body TEXT NOT NULL, isSpam NUMBER," +
|
||||
"lastModified NUMBER NOT NULL, isSpamConfidence NUMBER NOT NULL, sender TEXT NOT NULL," +
|
||||
"toRecipients TEXT NOT NULL, ccRecipients TEXT NOT NULL, bccRecipients TEXT NOT NULL," +
|
||||
"authStatus TEXT NOT NULL, PRIMARY KEY (listId, elementId))",
|
||||
|
||||
purgedWithCache: true,
|
||||
},
|
||||
|
||||
// TODO add test for new table
|
||||
spam_classification_model: {
|
||||
definition:
|
||||
"CREATE TABLE IF NOT EXISTS spam_classification_model (version NUMBER NOT NULL, ownerGroup TEXT NOT NULL, modelTopology TEXT NOT NULL, weightSpecs TEXT NOT NULL, weightData BLOB NOT NULL, PRIMARY KEY(version, ownerGroup))",
|
||||
purgedWithCache: true,
|
||||
},
|
||||
})
|
||||
|
||||
export interface IndexedGroupData {
|
||||
groupId: Id
|
||||
type: GroupType
|
||||
|
|
@ -187,127 +165,6 @@ export class OfflineStoragePersistence {
|
|||
}
|
||||
}
|
||||
|
||||
async storeSpamClassification(spamTrainMailDatum: SpamTrainMailDatum): Promise<void> {
|
||||
const { query, params } = sql`
|
||||
INSERT
|
||||
OR REPLACE INTO spam_classification_training_data(listId, elementId, ownerGroup, subject, body, isSpam,
|
||||
lastModified, isSpamConfidence, sender, toRecipients, ccRecipients, bccRecipients, authStatus)
|
||||
VALUES (
|
||||
${listIdPart(spamTrainMailDatum.mailId)},
|
||||
${elementIdPart(spamTrainMailDatum.mailId)},
|
||||
${spamTrainMailDatum.ownerGroup},
|
||||
${spamTrainMailDatum.subject},
|
||||
${spamTrainMailDatum.body},
|
||||
${spamTrainMailDatum.isSpam ? 1 : 0},
|
||||
${Date.now()},
|
||||
${spamTrainMailDatum.isSpamConfidence},
|
||||
${spamTrainMailDatum.sender},
|
||||
${spamTrainMailDatum.toRecipients},
|
||||
${spamTrainMailDatum.ccRecipients},
|
||||
${spamTrainMailDatum.bccRecipients},
|
||||
${spamTrainMailDatum.authStatus}
|
||||
)`
|
||||
await this.sqlCipherFacade.run(query, params)
|
||||
}
|
||||
|
||||
async deleteSpamClassification(mailId: IdTuple): Promise<void> {
|
||||
const mailListId = listIdPart(mailId)
|
||||
const mailElementId = elementIdPart(mailId)
|
||||
const { query, params } = sql`
|
||||
DELETE
|
||||
FROM spam_classification_training_data
|
||||
where listId = ${mailListId}
|
||||
AND elementId = ${mailElementId}`
|
||||
await this.sqlCipherFacade.run(query, params)
|
||||
}
|
||||
|
||||
async deleteSpamClassificationTrainingDataBeforeCutoff(cutoffTimestamp: number, ownerGroupId: Id): Promise<void> {
|
||||
const { query, params } = sql`DELETE
|
||||
FROM spam_classification_training_data
|
||||
WHERE lastModified < ${cutoffTimestamp}
|
||||
AND ownerGroup = ${ownerGroupId}`
|
||||
await this.sqlCipherFacade.run(query, params)
|
||||
}
|
||||
|
||||
async updateSpamClassification(mailId: IdTuple, isSpam: boolean, isSpamConfidence: number): Promise<void> {
|
||||
const { query, params } = sql`
|
||||
UPDATE spam_classification_training_data
|
||||
SET lastModified=${Date.now()},
|
||||
isSpamConfidence=${isSpamConfidence},
|
||||
isSpam=${isSpam ? 1 : 0}
|
||||
WHERE listId = ${listIdPart(mailId)}
|
||||
AND elementId = ${elementIdPart(mailId)}
|
||||
`
|
||||
await this.sqlCipherFacade.run(query, params)
|
||||
}
|
||||
|
||||
async getSpamClassification(mailId: IdTuple): Promise<Nullable<{ isSpam: boolean; isSpamConfidence: number }>> {
|
||||
const { query, params } = sql`
|
||||
SELECT isSpam, isSpamConfidence
|
||||
FROM spam_classification_training_data
|
||||
where listId = ${listIdPart(mailId)}
|
||||
AND elementId = ${elementIdPart(mailId)} `
|
||||
const result = await this.sqlCipherFacade.get(query, params)
|
||||
if (!result) {
|
||||
return null
|
||||
} else {
|
||||
const isSpam = untagSqlObject(result).isSpam === 1
|
||||
const isSpamConfidence = untagSqlObject(result).isSpamConfidence as number
|
||||
return { isSpam, isSpamConfidence }
|
||||
}
|
||||
}
|
||||
|
||||
async getCertainSpamClassificationTrainingDataAfterCutoff(cutoffTimestamp: number, ownerGroupId: Id): Promise<SpamTrainMailDatum[]> {
|
||||
const { query, params } = sql`SELECT listId,
|
||||
elementId,
|
||||
subject,
|
||||
body,
|
||||
isSpam,
|
||||
isSpamConfidence,
|
||||
sender,
|
||||
toRecipients,
|
||||
ccRecipients,
|
||||
bccRecipients,
|
||||
authStatus
|
||||
FROM spam_classification_training_data
|
||||
WHERE lastModified > ${cutoffTimestamp}
|
||||
AND isSpamConfidence > 0
|
||||
AND ownerGroup = ${ownerGroupId}`
|
||||
const resultRows = await this.sqlCipherFacade.all(query, params)
|
||||
return resultRows.map(untagSqlObject).map((row) => row as unknown as SpamTrainMailDatum)
|
||||
}
|
||||
|
||||
async putSpamClassificationModel(model: SpamClassificationModel) {
|
||||
const { query, params } = sql`INSERT
|
||||
OR REPLACE INTO
|
||||
spam_classification_model VALUES (
|
||||
${1},
|
||||
${model.ownerGroup},
|
||||
${model.modelTopology},
|
||||
${model.weightSpecs},
|
||||
${model.weightData}
|
||||
)`
|
||||
await this.sqlCipherFacade.run(query, params)
|
||||
}
|
||||
|
||||
async getSpamClassificationModel(ownerGroup: Id): Promise<Nullable<SpamClassificationModel>> {
|
||||
const { query, params } = sql`SELECT modelTopology, weightSpecs, weightData, ownerGroup
|
||||
FROM spam_classification_model
|
||||
WHERE version = ${1}
|
||||
AND ownerGroup = ${ownerGroup}`
|
||||
const resultRows = await this.sqlCipherFacade.get(query, params)
|
||||
if (resultRows !== null) {
|
||||
const untaggedValue = untagSqlObject(resultRows)
|
||||
return {
|
||||
modelTopology: untaggedValue.modelTopology,
|
||||
weightSpecs: untaggedValue.weightSpecs,
|
||||
weightData: untaggedValue.weightData,
|
||||
ownerGroup: untaggedValue.ownerGroup,
|
||||
} as SpamClassificationModel
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
async updateMailLocation(mail: Mail) {
|
||||
const rowid = await this.getRowid(MailTypeRef, mail._id)
|
||||
if (rowid == null) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { arrayHashUnsigned, downcast, promiseMap, stringToUtf8Uint8Array } from "@tutao/tutanota-utils"
|
||||
import { stringToHashBucketFast, tensor1d } from "./tensorflow-custom"
|
||||
import { MAX_WORD_FREQUENCY } from "../../../common/api/common/utils/spamClassificationUtils/SpamMailProcessor"
|
||||
|
||||
export class HashingVectorizer {
|
||||
private readonly hasher: (tokens: Array<string>) => Promise<Array<number>> = this.tensorHash
|
||||
|
|
@ -11,7 +12,9 @@ export class HashingVectorizer {
|
|||
|
||||
const indexes = await this.hasher(downcast<Array<string>>(tokens))
|
||||
for (const index of indexes) {
|
||||
vector[index] += 1
|
||||
if (vector[index] < MAX_WORD_FREQUENCY) {
|
||||
vector[index] += 1
|
||||
}
|
||||
}
|
||||
|
||||
return vector
|
||||
|
|
|
|||
|
|
@ -0,0 +1,241 @@
|
|||
import { EntityClient } from "../../../common/api/common/EntityClient"
|
||||
import { assertNotNull, isEmpty, isNotNull, last, lazyAsync, promiseMap } from "@tutao/tutanota-utils"
|
||||
import {
|
||||
ClientSpamTrainingDatum,
|
||||
ClientSpamTrainingDatumIndexEntryTypeRef,
|
||||
ClientSpamTrainingDatumTypeRef,
|
||||
MailBag,
|
||||
MailBox,
|
||||
MailboxGroupRootTypeRef,
|
||||
MailBoxTypeRef,
|
||||
MailFolder,
|
||||
MailFolderTypeRef,
|
||||
MailTypeRef,
|
||||
PopulateClientSpamTrainingDatum,
|
||||
} from "../../../common/api/entities/tutanota/TypeRefs"
|
||||
import { getMailSetKind, isFolder, MailSetKind, SpamDecision } from "../../../common/api/common/TutanotaConstants"
|
||||
import { GENERATED_MIN_ID, getElementId, isSameId, StrippedEntity, timestampToGeneratedId } from "../../../common/api/common/utils/EntityUtils"
|
||||
import { BulkMailLoader, MailWithMailDetails } from "../index/BulkMailLoader"
|
||||
import { hasError } from "../../../common/api/common/utils/ErrorUtils"
|
||||
import { getSpamConfidence } from "../../../common/api/common/utils/spamClassificationUtils/SpamMailProcessor"
|
||||
import { MailFacade } from "../../../common/api/worker/facades/lazy/MailFacade"
|
||||
|
||||
//Visible for testing
|
||||
export const SINGLE_TRAIN_INTERVAL_TRAINING_DATA_LIMIT = 1000
|
||||
const INITIAL_SPAM_CLASSIFICATION_INDEX_INTERVAL_DAYS = 90
|
||||
const TRAINING_DATA_TIME_LIMIT: number = INITIAL_SPAM_CLASSIFICATION_INDEX_INTERVAL_DAYS * -1
|
||||
|
||||
export type TrainingDataset = {
|
||||
trainingData: ClientSpamTrainingDatum[]
|
||||
lastTrainingDataIndexId: Id
|
||||
hamCount: number
|
||||
spamCount: number
|
||||
}
|
||||
|
||||
export type UnencryptedPopulateClientSpamTrainingDatum = Omit<StrippedEntity<PopulateClientSpamTrainingDatum>, "encVector" | "ownerEncVectorSessionKey"> & {
|
||||
vector: Uint8Array
|
||||
}
|
||||
|
||||
export class SpamClassificationDataDealer {
|
||||
constructor(
|
||||
private readonly entityClient: EntityClient,
|
||||
private readonly bulkMailLoader: lazyAsync<BulkMailLoader>,
|
||||
private readonly mailFacade: lazyAsync<MailFacade>,
|
||||
) {}
|
||||
|
||||
public async fetchAllTrainingData(ownerGroup: Id): Promise<TrainingDataset> {
|
||||
const mailboxGroupRoot = await this.entityClient.load(MailboxGroupRootTypeRef, ownerGroup)
|
||||
const mailbox = await this.entityClient.load(MailBoxTypeRef, mailboxGroupRoot.mailbox)
|
||||
const mailSets = await this.entityClient.loadAll(MailFolderTypeRef, assertNotNull(mailbox.folders).folders)
|
||||
|
||||
if (mailbox.clientSpamTrainingData == null || mailbox.modifiedClientSpamTrainingDataIndex == null) {
|
||||
return { trainingData: [], lastTrainingDataIndexId: GENERATED_MIN_ID, hamCount: 0, spamCount: 0 }
|
||||
}
|
||||
|
||||
// clientSpamTrainingData is NOT cached
|
||||
let clientSpamTrainingData = await this.entityClient.loadAll(ClientSpamTrainingDatumTypeRef, mailbox.clientSpamTrainingData)
|
||||
|
||||
// if the training data is empty for this mailbox, we are aggregating
|
||||
// the last INITIAL_SPAM_CLASSIFICATION_INDEX_INTERVAL_DAYS of mails and uploading the training data
|
||||
if (isEmpty(clientSpamTrainingData)) {
|
||||
console.log("building and uploading initial training data for mailbox: " + mailbox._id)
|
||||
const mailsWithMailDetails = await this.fetchMailAndMailDetailsForMailbox(mailbox, mailSets)
|
||||
console.log(`mailbox has ${mailsWithMailDetails.length} mails suitable for encrypted training vector data upload`)
|
||||
console.log(`vectorizing, compressing and encrypting those ${mailsWithMailDetails.length} mails...`)
|
||||
await this.uploadTrainingDataForMails(mailsWithMailDetails, mailbox, mailSets)
|
||||
clientSpamTrainingData = await this.entityClient.loadAll(ClientSpamTrainingDatumTypeRef, mailbox.clientSpamTrainingData)
|
||||
console.log(`clientSpamTrainingData list on the mailbox has ${clientSpamTrainingData.length} members.`)
|
||||
}
|
||||
|
||||
const { subsampledTrainingData, hamCount, spamCount } = this.subsampleHamAndSpamMails(clientSpamTrainingData)
|
||||
|
||||
const modifiedClientSpamTrainingDataIndices = await this.entityClient.loadAll(
|
||||
ClientSpamTrainingDatumIndexEntryTypeRef,
|
||||
mailbox.modifiedClientSpamTrainingDataIndex,
|
||||
)
|
||||
const lastModifiedClientSpamTrainingDataIndexElementId = isEmpty(modifiedClientSpamTrainingDataIndices)
|
||||
? GENERATED_MIN_ID
|
||||
: getElementId(assertNotNull(last(modifiedClientSpamTrainingDataIndices)))
|
||||
|
||||
return {
|
||||
trainingData: subsampledTrainingData,
|
||||
lastTrainingDataIndexId: lastModifiedClientSpamTrainingDataIndexElementId,
|
||||
hamCount,
|
||||
spamCount,
|
||||
}
|
||||
}
|
||||
|
||||
async fetchPartialTrainingDataFromIndexStartId(indexStartId: Id, ownerGroup: Id): Promise<TrainingDataset> {
|
||||
const mailboxGroupRoot = await this.entityClient.load(MailboxGroupRootTypeRef, ownerGroup)
|
||||
const mailbox = await this.entityClient.load(MailBoxTypeRef, mailboxGroupRoot.mailbox)
|
||||
|
||||
const emptyResult = { trainingData: [], lastTrainingDataIndexId: indexStartId, hamCount: 0, spamCount: 0 }
|
||||
if (mailbox.clientSpamTrainingData == null || mailbox.modifiedClientSpamTrainingDataIndex == null) {
|
||||
return emptyResult
|
||||
}
|
||||
|
||||
const modifiedClientSpamTrainingDataIndicesSinceStart = await this.entityClient.loadRange(
|
||||
ClientSpamTrainingDatumIndexEntryTypeRef,
|
||||
mailbox.modifiedClientSpamTrainingDataIndex,
|
||||
indexStartId,
|
||||
SINGLE_TRAIN_INTERVAL_TRAINING_DATA_LIMIT,
|
||||
false,
|
||||
)
|
||||
|
||||
if (isEmpty(modifiedClientSpamTrainingDataIndicesSinceStart)) {
|
||||
return emptyResult
|
||||
}
|
||||
|
||||
const clientSpamTrainingData = await this.entityClient.loadMultiple(
|
||||
ClientSpamTrainingDatumTypeRef,
|
||||
mailbox.clientSpamTrainingData,
|
||||
modifiedClientSpamTrainingDataIndicesSinceStart.map((index) => index.clientSpamTrainingDatumElementId),
|
||||
)
|
||||
|
||||
const { subsampledTrainingData, hamCount, spamCount } = this.subsampleHamAndSpamMails(clientSpamTrainingData)
|
||||
|
||||
return {
|
||||
trainingData: subsampledTrainingData,
|
||||
lastTrainingDataIndexId: getElementId(assertNotNull(last(modifiedClientSpamTrainingDataIndicesSinceStart))),
|
||||
hamCount,
|
||||
spamCount,
|
||||
}
|
||||
}
|
||||
|
||||
// Visible for testing
|
||||
subsampleHamAndSpamMails(clientSpamTrainingData: ClientSpamTrainingDatum[]): {
|
||||
subsampledTrainingData: ClientSpamTrainingDatum[]
|
||||
hamCount: number
|
||||
spamCount: number
|
||||
} {
|
||||
// we always want to include clientSpamTrainingData with high confidence (usually 4), because these mails have been moved explicitly by the user
|
||||
const hamDataHighConfidence = clientSpamTrainingData.filter((d) => Number(d.confidence) > 1 && d.spamDecision === SpamDecision.WHITELIST)
|
||||
const spamDataHighConfidence = clientSpamTrainingData.filter((d) => Number(d.confidence) > 1 && d.spamDecision === SpamDecision.BLACKLIST)
|
||||
|
||||
const hamDataLowConfidence = clientSpamTrainingData.filter((d) => Number(d.confidence) === 1 && d.spamDecision === SpamDecision.WHITELIST)
|
||||
const spamDataLowConfidence = clientSpamTrainingData.filter((d) => Number(d.confidence) === 1 && d.spamDecision === SpamDecision.BLACKLIST)
|
||||
|
||||
const hamCount = hamDataHighConfidence.length + hamDataLowConfidence.length
|
||||
const spamCount = spamDataHighConfidence.length + spamDataLowConfidence.length
|
||||
|
||||
if (hamCount === 0 || spamCount === 0) {
|
||||
return { subsampledTrainingData: clientSpamTrainingData, hamCount, spamCount }
|
||||
}
|
||||
|
||||
const ratio = hamCount / spamCount
|
||||
const MAX_RATIO = 10
|
||||
const MIN_RATIO = 1 / 10
|
||||
|
||||
let sampledHamLowConfidence = hamDataLowConfidence
|
||||
let sampledSpamLowConfidence = spamDataLowConfidence
|
||||
|
||||
if (ratio > MAX_RATIO) {
|
||||
const targetHamCount = Math.floor(spamCount * MAX_RATIO)
|
||||
sampledHamLowConfidence = this.sampleEntriesFromArray(hamDataLowConfidence, targetHamCount)
|
||||
} else if (ratio < MIN_RATIO) {
|
||||
const targetSpamCount = Math.floor(hamCount * MAX_RATIO)
|
||||
sampledSpamLowConfidence = this.sampleEntriesFromArray(spamDataLowConfidence, targetSpamCount)
|
||||
}
|
||||
|
||||
const finalHam = hamDataHighConfidence.concat(sampledHamLowConfidence)
|
||||
const finalSpam = spamDataHighConfidence.concat(sampledSpamLowConfidence)
|
||||
|
||||
const balanced = [...finalHam, ...finalSpam]
|
||||
console.log(
|
||||
`Subsampled training data to ${finalHam.length} ham (${hamDataHighConfidence.length} are confidence > 1) and ${finalSpam.length} spam (${spamDataHighConfidence.length} are confidence > 1) (ratio ${(finalHam.length / finalSpam.length).toFixed(2)}).`,
|
||||
)
|
||||
|
||||
return { subsampledTrainingData: balanced, hamCount: finalHam.length, spamCount: finalSpam.length }
|
||||
}
|
||||
|
||||
// Visible for testing
|
||||
async fetchMailsByMailbagAfterDate(mailbag: MailBag, mailSets: MailFolder[], startDate: Date): Promise<Array<MailWithMailDetails>> {
|
||||
const bulkMailLoader = await this.bulkMailLoader()
|
||||
const mails = await this.entityClient.loadAll(MailTypeRef, mailbag.mails, timestampToGeneratedId(startDate.getTime()))
|
||||
const filteredMails = mails.filter((mail) => {
|
||||
const trashFolder = assertNotNull(mailSets.find((set) => getMailSetKind(set) === MailSetKind.TRASH))
|
||||
const isMailTrashed = mail.sets.some((setId) => isSameId(setId, trashFolder._id))
|
||||
return isNotNull(mail.mailDetails) && !hasError(mail) && mail.receivedDate > startDate && !isMailTrashed
|
||||
})
|
||||
const mailsWithMailDetails = await bulkMailLoader.loadMailDetails(filteredMails)
|
||||
return mailsWithMailDetails ?? []
|
||||
}
|
||||
|
||||
private async fetchMailAndMailDetailsForMailbox(mailbox: MailBox, mailSets: MailFolder[]): Promise<Array<MailWithMailDetails>> {
|
||||
const downloadedMailClassificationData = new Array<MailWithMailDetails>()
|
||||
|
||||
const { LocalTimeDateProvider } = await import("../../../common/api/worker/DateProvider")
|
||||
const startDate = new LocalTimeDateProvider().getStartOfDayShiftedBy(TRAINING_DATA_TIME_LIMIT)
|
||||
|
||||
// sorted from latest to oldest
|
||||
const mailbagsToFetch = [assertNotNull(mailbox.currentMailBag), ...mailbox.archivedMailBags.reverse()]
|
||||
for (let currentMailbag = mailbagsToFetch.shift(); isNotNull(currentMailbag); currentMailbag = mailbagsToFetch.shift()) {
|
||||
const mailsOfThisMailbag = await this.fetchMailsByMailbagAfterDate(currentMailbag, mailSets, startDate)
|
||||
if (isEmpty(mailsOfThisMailbag)) {
|
||||
// the list is empty if none of the mails in the mailbag were recent enough,
|
||||
// therefore, there is no point in requesting the remaining mailbags unnecessarily
|
||||
break
|
||||
}
|
||||
downloadedMailClassificationData.push(...mailsOfThisMailbag)
|
||||
}
|
||||
return downloadedMailClassificationData
|
||||
}
|
||||
|
||||
private async uploadTrainingDataForMails(mails: MailWithMailDetails[], mailBox: MailBox, mailSets: MailFolder[]): Promise<void> {
|
||||
const clientSpamTrainingDataListId = mailBox.clientSpamTrainingData
|
||||
if (clientSpamTrainingDataListId == null) {
|
||||
return
|
||||
}
|
||||
|
||||
const unencryptedPopulateClientSpamTrainingData: UnencryptedPopulateClientSpamTrainingDatum[] = await promiseMap(
|
||||
mails,
|
||||
async (mailWithDetail) => {
|
||||
const { mail, mailDetails } = mailWithDetail
|
||||
const allMailFolders = mailSets.filter((mailSet) => isFolder(mailSet)).map((mailFolder) => mailFolder._id)
|
||||
const sourceMailFolderId = assertNotNull(mail.sets.find((setId) => allMailFolders.find((folderId) => isSameId(setId, folderId))))
|
||||
const sourceMailFolder = assertNotNull(mailSets.find((set) => isSameId(set._id, sourceMailFolderId)))
|
||||
const isSpam = getMailSetKind(sourceMailFolder) === MailSetKind.SPAM
|
||||
const unencryptedPopulateClientSpamTrainingData: UnencryptedPopulateClientSpamTrainingDatum = {
|
||||
mailId: mail._id,
|
||||
isSpam,
|
||||
confidence: getSpamConfidence(mail),
|
||||
vector: await (await this.mailFacade()).vectorizeAndCompressMails({ mail, mailDetails }),
|
||||
}
|
||||
return unencryptedPopulateClientSpamTrainingData
|
||||
},
|
||||
{
|
||||
concurrency: 5,
|
||||
},
|
||||
)
|
||||
// we are uploading the initial spam training data using the PopulateClientSpamTrainingDataService
|
||||
return (await this.mailFacade()).populateClientSpamTrainingData(assertNotNull(mailBox._ownerGroup), unencryptedPopulateClientSpamTrainingData)
|
||||
}
|
||||
|
||||
private sampleEntriesFromArray<T>(arr: T[], numberOfEntries: number): T[] {
|
||||
if (numberOfEntries >= arr.length) {
|
||||
return arr
|
||||
}
|
||||
const shuffled = arr.slice().sort(() => Math.random() - 0.5)
|
||||
return shuffled.slice(0, numberOfEntries)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,114 +0,0 @@
|
|||
import { EntityClient } from "../../../common/api/common/EntityClient"
|
||||
import { assertNotNull, isNotNull, lazyAsync } from "@tutao/tutanota-utils"
|
||||
import {
|
||||
MailAddress,
|
||||
MailBag,
|
||||
MailboxGroupRootTypeRef,
|
||||
MailBoxTypeRef,
|
||||
MailDetails,
|
||||
MailFolder,
|
||||
MailFolderTypeRef,
|
||||
MailTypeRef,
|
||||
Recipients,
|
||||
} from "../../../common/api/entities/tutanota/TypeRefs"
|
||||
import { getMailSetKind, getSpamConfidence, MailSetKind } from "../../../common/api/common/TutanotaConstants"
|
||||
import { elementIdPart, isSameId, listIdPart, timestampToGeneratedId } from "../../../common/api/common/utils/EntityUtils"
|
||||
import { OfflineStoragePersistence } from "../index/OfflineStoragePersistence"
|
||||
import { getMailBodyText } from "../../../common/api/common/CommonMailUtils"
|
||||
import { BulkMailLoader, MailWithMailDetails } from "../index/BulkMailLoader"
|
||||
import { hasError } from "../../../common/api/common/utils/ErrorUtils"
|
||||
import { SpamTrainMailDatum } from "./SpamClassifier"
|
||||
import { extractSpamHeaderFeatures } from "../../mail/model/SpamClassificationHandler"
|
||||
|
||||
const INITIAL_SPAM_CLASSIFICATION_INDEX_INTERVAL_DAYS = 28
|
||||
|
||||
export class SpamClassificationInitializer {
|
||||
/*
|
||||
* While downloading mails, we start from current mailbag, but it might be that current mailbag is too new,
|
||||
* If there are less than this mail in current mailbag, we will also try to fetch previous one
|
||||
*/
|
||||
public readonly MIN_MAILS_COUNT: number = 300
|
||||
public readonly TIME_LIMIT: number = INITIAL_SPAM_CLASSIFICATION_INDEX_INTERVAL_DAYS * -1
|
||||
|
||||
constructor(
|
||||
private readonly entityClient: EntityClient,
|
||||
private readonly offlineStorage: OfflineStoragePersistence,
|
||||
private readonly bulkMailLoader: lazyAsync<BulkMailLoader>,
|
||||
) {}
|
||||
|
||||
public async init(ownerGroup: Id): Promise<SpamTrainMailDatum[]> {
|
||||
// populate the spam classification data with the last 28 days of mails if they are
|
||||
// available in the current mail bag
|
||||
const data = await this.downloadMailAndMailDetailsByGroupMembership(ownerGroup)
|
||||
data.filter((datum) => datum.isSpamConfidence > 0)
|
||||
|
||||
let spamMailsCount = 0
|
||||
let hamMailsCount = 0
|
||||
for (const spamTrainMailDatum of data) {
|
||||
await this.offlineStorage.storeSpamClassification(spamTrainMailDatum)
|
||||
|
||||
if (spamTrainMailDatum.isSpam) spamMailsCount += 1
|
||||
else hamMailsCount += 1
|
||||
}
|
||||
|
||||
console.log(
|
||||
`Downloaded ${spamMailsCount} spam mails and ${hamMailsCount} ham mails for group: ${ownerGroup}. Spam:Ham ratio is: ${(spamMailsCount / hamMailsCount).toFixed(2)}`,
|
||||
)
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
private async downloadMailAndMailDetailsByGroupMembership(mailGroupId: Id): Promise<Array<SpamTrainMailDatum>> {
|
||||
const mailboxGroupRoot = await this.entityClient.load(MailboxGroupRootTypeRef, mailGroupId)
|
||||
const mailbox = await this.entityClient.load(MailBoxTypeRef, mailboxGroupRoot.mailbox)
|
||||
const mailSets = await this.entityClient.loadAll(MailFolderTypeRef, assertNotNull(mailbox.folders).folders)
|
||||
const spamFolder = mailSets.find((s) => getMailSetKind(s) === MailSetKind.SPAM)!
|
||||
|
||||
const downloadedMailClassificationDatas = new Array<SpamTrainMailDatum>()
|
||||
const allMailbags = [assertNotNull(mailbox.currentMailBag), ...mailbox.archivedMailBags].reverse() // sorted from latest to oldest
|
||||
|
||||
for (
|
||||
let currentMailbag = allMailbags.pop();
|
||||
isNotNull(currentMailbag) && downloadedMailClassificationDatas.length < this.MIN_MAILS_COUNT;
|
||||
currentMailbag = allMailbags.pop()
|
||||
) {
|
||||
const mailsOfThisMailbag = await this.downloadMailAndMailDetailsByMailbag(currentMailbag, spamFolder)
|
||||
downloadedMailClassificationDatas.push(...mailsOfThisMailbag)
|
||||
}
|
||||
|
||||
return downloadedMailClassificationDatas
|
||||
}
|
||||
|
||||
private async downloadMailAndMailDetailsByMailbag(mailbag: MailBag, spamFolder: MailFolder): Promise<Array<SpamTrainMailDatum>> {
|
||||
const { LocalTimeDateProvider } = await import("../../../common/api/worker/DateProvider.js")
|
||||
const dateProvider = new LocalTimeDateProvider()
|
||||
const startTime = dateProvider.getStartOfDayShiftedBy(this.TIME_LIMIT).getTime()
|
||||
const bulkMailLoader = await this.bulkMailLoader()
|
||||
return await this.entityClient
|
||||
.loadAll(MailTypeRef, mailbag.mails, timestampToGeneratedId(startTime))
|
||||
// Filter out draft mails and mails with error
|
||||
.then((mails) => {
|
||||
return mails.filter((m) => isNotNull(m.mailDetails) && !hasError(m))
|
||||
})
|
||||
// Download mail details
|
||||
.then((mails) => bulkMailLoader.loadMailDetails(mails))
|
||||
// Map to spam mail datum
|
||||
.then((mails) => mails.map((m) => this.mailWithDetailsToMailDatum(spamFolder, m)))
|
||||
}
|
||||
|
||||
private mailWithDetailsToMailDatum(spamFolder: MailFolder, { mail, mailDetails }: MailWithMailDetails): SpamTrainMailDatum {
|
||||
const isSpam = mail.sets.some((folderId) => isSameId(folderId, spamFolder._id))
|
||||
|
||||
return {
|
||||
mailId: mail._id,
|
||||
subject: mail.subject,
|
||||
body: getMailBodyText(mailDetails.body),
|
||||
isSpam: isSpam,
|
||||
isSpamConfidence: getSpamConfidence(mail),
|
||||
listId: listIdPart(mail._id),
|
||||
elementId: elementIdPart(mail._id),
|
||||
ownerGroup: assertNotNull(mail._ownerGroup),
|
||||
...extractSpamHeaderFeatures(mail, mailDetails),
|
||||
} as SpamTrainMailDatum
|
||||
}
|
||||
}
|
||||
|
|
@ -1,29 +1,10 @@
|
|||
import { assertWorkerOrNode } from "../../../common/api/common/Env"
|
||||
import { assertNotNull, defer, groupByAndMap, isNotNull, Nullable, promiseMap, tokenize } from "@tutao/tutanota-utils"
|
||||
import { HashingVectorizer } from "./HashingVectorizer"
|
||||
import {
|
||||
ML_BITCOIN_REGEX,
|
||||
ML_BITCOIN_TOKEN,
|
||||
ML_CREDIT_CARD_REGEX,
|
||||
ML_CREDIT_CARD_TOKEN,
|
||||
ML_DATE_REGEX,
|
||||
ML_DATE_TOKEN,
|
||||
ML_EMAIL_ADDR_REGEX,
|
||||
ML_EMAIL_ADDR_TOKEN,
|
||||
ML_NUMBER_SEQUENCE_REGEX,
|
||||
ML_NUMBER_SEQUENCE_TOKEN,
|
||||
ML_SPACE_BEFORE_NEW_LINE_REGEX,
|
||||
ML_SPACE_BEFORE_NEW_LINE_TOKEN,
|
||||
ML_SPECIAL_CHARACTER_REGEX,
|
||||
ML_SPECIAL_CHARACTER_TOKEN,
|
||||
ML_URL_REGEX,
|
||||
ML_URL_TOKEN,
|
||||
} from "./PreprocessPatterns"
|
||||
import { SpamClassificationInitializer } from "./SpamClassificationInitializer"
|
||||
import { assertNotNull, groupByAndMap, isEmpty, Nullable, promiseMap } from "@tutao/tutanota-utils"
|
||||
import { SpamClassificationDataDealer, TrainingDataset } from "./SpamClassificationDataDealer"
|
||||
import { CacheStorage } from "../../../common/api/worker/rest/DefaultEntityRestCache"
|
||||
import { htmlToText } from "../../../common/api/common/utils/IndexUtils"
|
||||
import {
|
||||
dense,
|
||||
enableProdMode,
|
||||
fromMemory,
|
||||
glorotUniform,
|
||||
LayersModel,
|
||||
|
|
@ -33,10 +14,13 @@ import {
|
|||
tensor2d,
|
||||
withSaveHandler,
|
||||
} from "./tensorflow-custom"
|
||||
import type { Tensor } from "@tensorflow/tfjs-core"
|
||||
import type { ModelArtifacts } from "@tensorflow/tfjs-core/dist/io/types"
|
||||
import type { ModelFitArgs } from "@tensorflow/tfjs-layers"
|
||||
import { OfflineStoragePersistence } from "../index/OfflineStoragePersistence"
|
||||
import type { Tensor } from "@tensorflow/tfjs-core"
|
||||
import { DEFAULT_PREPROCESS_CONFIGURATION, SpamMailDatum, SpamMailProcessor } from "../../../common/api/common/utils/spamClassificationUtils/SpamMailProcessor"
|
||||
import { SparseVectorCompressor } from "../../../common/api/common/utils/spamClassificationUtils/SparseVectorCompressor"
|
||||
import { SpamDecision } from "../../../common/api/common/TutanotaConstants"
|
||||
import { HashingVectorizer } from "./HashingVectorizer"
|
||||
|
||||
assertWorkerOrNode()
|
||||
|
||||
|
|
@ -45,222 +29,110 @@ export type SpamClassificationModel = {
|
|||
weightSpecs: string
|
||||
weightData: Uint8Array
|
||||
ownerGroup: Id
|
||||
hamCount: number
|
||||
spamCount: number
|
||||
}
|
||||
|
||||
export type SpamTrainMailDatum = {
|
||||
mailId: IdTuple
|
||||
subject: string
|
||||
body: string
|
||||
isSpam: boolean
|
||||
isSpamConfidence: number
|
||||
ownerGroup: Id
|
||||
sender: string
|
||||
toRecipients: string
|
||||
ccRecipients: string
|
||||
bccRecipients: string
|
||||
authStatus: string
|
||||
}
|
||||
|
||||
export type SpamPredMailDatum = {
|
||||
subject: string
|
||||
body: string
|
||||
ownerGroup: Id
|
||||
sender: string
|
||||
toRecipients: string
|
||||
ccRecipients: string
|
||||
bccRecipients: string
|
||||
authStatus: string
|
||||
}
|
||||
|
||||
const PREDICTION_THRESHOLD = 0.55
|
||||
|
||||
export type PreprocessConfiguration = {
|
||||
isPreprocessMails: boolean
|
||||
isRemoveHTML: boolean
|
||||
isReplaceDates: boolean
|
||||
isReplaceUrls: boolean
|
||||
isReplaceMailAddresses: boolean
|
||||
isReplaceBitcoinAddress: boolean
|
||||
isReplaceCreditCards: boolean
|
||||
isReplaceNumbers: boolean
|
||||
isReplaceSpecialCharacters: boolean
|
||||
isRemoveSpaceBeforeNewLine: boolean
|
||||
}
|
||||
|
||||
export const DEFAULT_PREPROCESS_CONFIGURATION: PreprocessConfiguration = {
|
||||
isPreprocessMails: true,
|
||||
isRemoveHTML: true,
|
||||
isReplaceDates: true,
|
||||
isReplaceUrls: true,
|
||||
isReplaceMailAddresses: true,
|
||||
isReplaceBitcoinAddress: true,
|
||||
isReplaceCreditCards: true,
|
||||
isReplaceNumbers: true,
|
||||
isReplaceSpecialCharacters: true,
|
||||
isRemoveSpaceBeforeNewLine: true,
|
||||
}
|
||||
export const DEFAULT_PREDICTION_THRESHOLD = 0.55
|
||||
|
||||
const TRAINING_INTERVAL = 1000 * 60 * 10 // 10 minutes
|
||||
const FULL_RETRAINING_INTERVAL = 1000 * 60 * 60 * 24 * 7 // 1 week
|
||||
|
||||
type TrainingPerformance = {
|
||||
trainingTime: number
|
||||
vectorizationTime: number
|
||||
export type Classifier = {
|
||||
isEnabled: boolean
|
||||
layersModel: LayersModel
|
||||
threshold: number
|
||||
hamCount: number
|
||||
spamCount: number
|
||||
}
|
||||
|
||||
export const spamClassifierTokenizer = (text: string): string[] => tokenize(text)
|
||||
|
||||
export class SpamClassifier {
|
||||
private readonly classifier: Map<Id, { model: LayersModel; isEnabled: boolean }>
|
||||
// Visible for testing
|
||||
readonly classifiers: Map<Id, Classifier>
|
||||
sparseVectorCompressor: SparseVectorCompressor
|
||||
spamMailProcessor: SpamMailProcessor
|
||||
|
||||
constructor(
|
||||
private readonly offlineStorage: OfflineStoragePersistence,
|
||||
private readonly offlineStorageCache: CacheStorage,
|
||||
private readonly initializer: SpamClassificationInitializer,
|
||||
private readonly cacheStorage: CacheStorage,
|
||||
private readonly initializer: SpamClassificationDataDealer,
|
||||
private readonly deterministic: boolean = false,
|
||||
private readonly preprocessConfiguration: PreprocessConfiguration = DEFAULT_PREPROCESS_CONFIGURATION,
|
||||
private readonly vectorizer: HashingVectorizer = new HashingVectorizer(),
|
||||
) {
|
||||
this.classifier = new Map()
|
||||
// enable tensorflow production mode
|
||||
enableProdMode()
|
||||
this.classifiers = new Map()
|
||||
this.sparseVectorCompressor = new SparseVectorCompressor()
|
||||
this.spamMailProcessor = new SpamMailProcessor(DEFAULT_PREPROCESS_CONFIGURATION, new HashingVectorizer(), this.sparseVectorCompressor)
|
||||
}
|
||||
|
||||
calculateThreshold(hamCount: number, spamCount: number) {
|
||||
const hamToSpamRatio = hamCount / spamCount
|
||||
let threshold = -0.1 * Math.log10(hamToSpamRatio * 10) + 0.65
|
||||
if (threshold < DEFAULT_PREDICTION_THRESHOLD) {
|
||||
threshold = DEFAULT_PREDICTION_THRESHOLD
|
||||
} else if (threshold > 0.75) {
|
||||
threshold = 0.75
|
||||
}
|
||||
return threshold
|
||||
}
|
||||
|
||||
public async initialize(ownerGroup: Id): Promise<void> {
|
||||
const loadedModel = await this.loadModel(ownerGroup)
|
||||
const classifier = await this.loadClassifier(ownerGroup)
|
||||
|
||||
const storage = assertNotNull(this.offlineStorageCache)
|
||||
setInterval(async () => {
|
||||
const cutoffDate = Date.now() - FULL_RETRAINING_INTERVAL
|
||||
const lastFullTrainingTime = await storage.getLastTrainedFromScratchTime()
|
||||
|
||||
if (cutoffDate > lastFullTrainingTime) {
|
||||
await this.retrainModelFromScratch(storage, ownerGroup, cutoffDate)
|
||||
if (classifier) {
|
||||
const timeSinceLastFullTraining = Date.now() - FULL_RETRAINING_INTERVAL
|
||||
const lastFullTrainingTime = await this.cacheStorage.getLastTrainedFromScratchTime()
|
||||
if (timeSinceLastFullTraining > lastFullTrainingTime) {
|
||||
console.log(`Retraining from scratch as last train (${new Date(lastFullTrainingTime)}) was more than a week ago`)
|
||||
await this.trainFromScratch(this.cacheStorage, ownerGroup)
|
||||
} else {
|
||||
console.log("loaded existing spam classification model from database")
|
||||
this.classifiers.set(ownerGroup, classifier)
|
||||
await this.updateAndSaveModel(this.cacheStorage, ownerGroup)
|
||||
}
|
||||
}, FULL_RETRAINING_INTERVAL)
|
||||
if (isNotNull(loadedModel)) {
|
||||
console.log("Loaded existing spam classification model from database")
|
||||
|
||||
this.classifier.set(ownerGroup, { model: loadedModel, isEnabled: true })
|
||||
|
||||
await this.updateAndSaveModel(storage, ownerGroup)
|
||||
setInterval(async () => {
|
||||
await this.updateAndSaveModel(storage, ownerGroup)
|
||||
await this.updateAndSaveModel(this.cacheStorage, ownerGroup)
|
||||
}, TRAINING_INTERVAL)
|
||||
} else {
|
||||
console.log("no existing model found. Training from scratch ...")
|
||||
await this.trainFromScratch(this.cacheStorage, ownerGroup)
|
||||
setInterval(async () => {
|
||||
await this.updateAndSaveModel(this.cacheStorage, ownerGroup)
|
||||
}, TRAINING_INTERVAL)
|
||||
return
|
||||
}
|
||||
|
||||
console.log("No existing model found. Training from scratch...")
|
||||
await this.trainFromScratch(storage, ownerGroup)
|
||||
setInterval(async () => {
|
||||
await this.updateAndSaveModel(storage, ownerGroup)
|
||||
}, TRAINING_INTERVAL)
|
||||
}
|
||||
|
||||
private async trainFromScratch(storage: CacheStorage, ownerGroup: string) {
|
||||
const data = await this.initializer.init(ownerGroup)
|
||||
if (data.length === 0) {
|
||||
console.log("No training data found. Training from scratch aborted.")
|
||||
return
|
||||
}
|
||||
await this.initialTraining(data)
|
||||
await this.saveModel(ownerGroup)
|
||||
await storage.setLastTrainedFromScratchTime(Date.now())
|
||||
await storage.setLastTrainedTime(Date.now())
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public async updateAndSaveModel(storage: CacheStorage, ownerGroup: Id) {
|
||||
const isModelUpdated = await this.updateModelFromCutoff(await storage.getLastTrainedTime(), ownerGroup)
|
||||
if (isModelUpdated) {
|
||||
await this.saveModel(ownerGroup)
|
||||
await storage.setLastTrainedTime(Date.now())
|
||||
}
|
||||
}
|
||||
|
||||
// visibleForTesting
|
||||
public preprocessMail(mail: SpamTrainMailDatum | SpamPredMailDatum): string {
|
||||
const mailText = this.concatSubjectAndBody(mail)
|
||||
|
||||
if (!this.preprocessConfiguration.isPreprocessMails) {
|
||||
return mailText
|
||||
public async updateAndSaveModel(storage: CacheStorage, ownerGroup: Id) {
|
||||
const isModelUpdated = await this.updateModelFromIndexStartId(await storage.getLastTrainingDataIndexId(), ownerGroup)
|
||||
if (isModelUpdated) {
|
||||
console.log(`Model updated successfully at ${Date.now()}`)
|
||||
}
|
||||
|
||||
let preprocessedMail = mailText
|
||||
|
||||
// 1. Remove HTML code
|
||||
if (this.preprocessConfiguration.isRemoveHTML) {
|
||||
preprocessedMail = htmlToText(preprocessedMail)
|
||||
}
|
||||
|
||||
// 2. Replace dates
|
||||
if (this.preprocessConfiguration.isReplaceDates) {
|
||||
for (const datePattern of ML_DATE_REGEX) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(datePattern, ML_DATE_TOKEN)
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Replace urls
|
||||
if (this.preprocessConfiguration.isReplaceUrls) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_URL_REGEX, ML_URL_TOKEN)
|
||||
}
|
||||
|
||||
// 4. Replace email addresses
|
||||
if (this.preprocessConfiguration.isReplaceMailAddresses) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_EMAIL_ADDR_REGEX, ML_EMAIL_ADDR_TOKEN)
|
||||
}
|
||||
|
||||
// 5. Replace Bitcoin addresses
|
||||
if (this.preprocessConfiguration.isReplaceBitcoinAddress) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_BITCOIN_REGEX, ML_BITCOIN_TOKEN)
|
||||
}
|
||||
|
||||
// 6. Replace credit card numbers
|
||||
if (this.preprocessConfiguration.isReplaceCreditCards) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_CREDIT_CARD_REGEX, ML_CREDIT_CARD_TOKEN)
|
||||
}
|
||||
|
||||
// 7. Replace remaining numbers
|
||||
if (this.preprocessConfiguration.isReplaceNumbers) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_NUMBER_SEQUENCE_REGEX, ML_NUMBER_SEQUENCE_TOKEN)
|
||||
}
|
||||
|
||||
// 8. Remove special characters
|
||||
if (this.preprocessConfiguration.isReplaceSpecialCharacters) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_SPECIAL_CHARACTER_REGEX, ML_SPECIAL_CHARACTER_TOKEN)
|
||||
}
|
||||
|
||||
// 9. Remove spaces at end of lines
|
||||
if (this.preprocessConfiguration.isRemoveSpaceBeforeNewLine) {
|
||||
preprocessedMail = preprocessedMail.replaceAll(ML_SPACE_BEFORE_NEW_LINE_REGEX, ML_SPACE_BEFORE_NEW_LINE_TOKEN)
|
||||
}
|
||||
|
||||
preprocessedMail += this.getHeaderFeatures(mail)
|
||||
|
||||
return preprocessedMail
|
||||
}
|
||||
|
||||
private getHeaderFeatures(mail: SpamTrainMailDatum | SpamPredMailDatum): string {
|
||||
const { sender, toRecipients, ccRecipients, bccRecipients, authStatus } = mail
|
||||
return `\n${sender}\n${toRecipients}\n${ccRecipients}\n${bccRecipients}\n${authStatus}`
|
||||
}
|
||||
public async initialTraining(ownerGroup: Id, trainingDataset: TrainingDataset): Promise<void> {
|
||||
const { trainingData: clientSpamTrainingData, hamCount, spamCount } = trainingDataset
|
||||
const trainingInput = await promiseMap(
|
||||
clientSpamTrainingData,
|
||||
(d) => {
|
||||
const vector = this.sparseVectorCompressor.binaryToVector(d.vector)
|
||||
const label = d.spamDecision === SpamDecision.BLACKLIST ? 1 : 0
|
||||
return { vector, label }
|
||||
},
|
||||
{
|
||||
concurrency: 5,
|
||||
},
|
||||
)
|
||||
const vectors = trainingInput.map((input) => input.vector)
|
||||
const labels = trainingInput.map((input) => input.label)
|
||||
|
||||
public async initialTraining(mails: SpamTrainMailDatum[]): Promise<TrainingPerformance> {
|
||||
const preprocessingStart = performance.now()
|
||||
const tokenizedMails = await promiseMap(mails, (mail) => spamClassifierTokenizer(this.preprocessMail(mail)))
|
||||
const preprocessingTime = performance.now() - preprocessingStart
|
||||
|
||||
const vectorizationStart = performance.now()
|
||||
|
||||
const vectors = await this.vectorizer.transform(tokenizedMails)
|
||||
const labels = mails.map((mail) => (mail.isSpam ? 1 : 0))
|
||||
const vectorizationTime = performance.now() - vectorizationStart
|
||||
|
||||
const xs = tensor2d(vectors, [vectors.length, this.vectorizer.dimension], undefined)
|
||||
const xs = tensor2d(vectors, [trainingInput.length, this.sparseVectorCompressor.dimension], undefined)
|
||||
const ys = tensor1d(labels, undefined)
|
||||
|
||||
const classifier = this.buildModel(this.vectorizer.dimension)
|
||||
const layersModel = this.buildModel(this.sparseVectorCompressor.dimension)
|
||||
|
||||
const trainingStart = performance.now()
|
||||
await classifier.fit(xs, ys, {
|
||||
await layersModel.fit(xs, ys, {
|
||||
epochs: 16,
|
||||
batchSize: 32,
|
||||
shuffle: !this.deterministic,
|
||||
|
|
@ -271,80 +143,100 @@ export class SpamClassifier {
|
|||
// }
|
||||
// },
|
||||
// },
|
||||
yieldEvery: 15,
|
||||
})
|
||||
const trainingTime = performance.now() - trainingStart
|
||||
|
||||
// When using the webgl backend we need to manually dispose @tensorflow tensors
|
||||
// when using the webgl backend we need to manually dispose @tensorflow tensors
|
||||
xs.dispose()
|
||||
ys.dispose()
|
||||
|
||||
this.classifier.set(mails[0].ownerGroup, { model: classifier, isEnabled: true })
|
||||
const threshold = this.calculateThreshold(trainingDataset.hamCount, trainingDataset.spamCount)
|
||||
const classifier = {
|
||||
layersModel: layersModel,
|
||||
isEnabled: true,
|
||||
hamCount,
|
||||
spamCount,
|
||||
threshold,
|
||||
}
|
||||
this.classifiers.set(ownerGroup, classifier)
|
||||
|
||||
console.log(
|
||||
`### Finished Initial Training ### (total trained mails: ${mails.length}, preprocessing time: ${preprocessingTime}, vectorization time: ${vectorizationTime}ms, training time: ${trainingTime})`,
|
||||
`### Finished Initial Spam Classification Model Training ### (total trained mails: ${clientSpamTrainingData.length} (ham:spam ${hamCount}:${spamCount} => threshold:${threshold}), training time: ${trainingTime})`,
|
||||
)
|
||||
|
||||
return { vectorizationTime, trainingTime }
|
||||
}
|
||||
|
||||
public async updateModelFromCutoff(cutoffTimestamp: number, ownerGroup: Id): Promise<boolean> {
|
||||
public async updateModelFromIndexStartId(indexStartId: Id, ownerGroup: Id): Promise<boolean> {
|
||||
try {
|
||||
const modelNotEnabled = this.classifier.get(ownerGroup) === undefined || this.classifier.get(ownerGroup)?.isEnabled === false
|
||||
const modelNotEnabled = this.classifiers.get(ownerGroup) === undefined || this.classifiers.get(ownerGroup)?.isEnabled === false
|
||||
if (modelNotEnabled) {
|
||||
console.warn("Client spam classification is not enabled or there were errors during training")
|
||||
console.warn("client spam classification is not enabled or there were errors during training")
|
||||
return false
|
||||
}
|
||||
|
||||
const newTrainingMails = await assertNotNull(this.offlineStorage).getCertainSpamClassificationTrainingDataAfterCutoff(cutoffTimestamp, ownerGroup)
|
||||
if (newTrainingMails.length === 0) {
|
||||
console.log("No new training data since last update.")
|
||||
const trainingDataset = await this.initializer.fetchPartialTrainingDataFromIndexStartId(indexStartId, ownerGroup)
|
||||
if (isEmpty(trainingDataset.trainingData)) {
|
||||
console.log("no new spam classification training data since last update")
|
||||
return false
|
||||
}
|
||||
console.log(`Retraining model with ${newTrainingMails.length} new mails (lastModified > ${new Date(cutoffTimestamp).toString()})`)
|
||||
|
||||
return await this.updateModel(ownerGroup, newTrainingMails)
|
||||
console.log(
|
||||
`retraining spam classification model with ${trainingDataset.trainingData.length} new mails (ham:spam ${trainingDataset.hamCount}:${trainingDataset.spamCount}) (lastTrainingDataIndexId > ${indexStartId})`,
|
||||
)
|
||||
|
||||
return await this.updateModel(ownerGroup, trainingDataset)
|
||||
} catch (e) {
|
||||
console.error("Failed trying to update the model: ", e)
|
||||
console.error("failed to update the model", e)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
async updateModel(ownerGroup: Id, newTrainingMails: SpamTrainMailDatum[]) {
|
||||
// visibleForTesting
|
||||
async updateModel(ownerGroup: Id, trainingDataset: TrainingDataset): Promise<boolean> {
|
||||
const retrainingStart = performance.now()
|
||||
|
||||
const modelToUpdate = assertNotNull(this.classifier.get(ownerGroup))
|
||||
const tokenizedMailsArray = await promiseMap(newTrainingMails, async (mail) => {
|
||||
const preprocessedMail = this.preprocessMail(mail)
|
||||
const tokenizedMail = spamClassifierTokenizer(preprocessedMail)
|
||||
return { tokenizedMail, isSpamConfidence: mail.isSpamConfidence, isSpam: mail.isSpam ? 1 : 0 }
|
||||
})
|
||||
if (isEmpty(trainingDataset.trainingData)) {
|
||||
console.log("no new spam classification training data since last update")
|
||||
return false
|
||||
}
|
||||
|
||||
const tokenizedMailsByConfidence = groupByAndMap(
|
||||
tokenizedMailsArray,
|
||||
({ isSpamConfidence }) => isSpamConfidence,
|
||||
({ isSpam, tokenizedMail }) => {
|
||||
return { isSpam, tokenizedMail }
|
||||
const modelToUpdate = assertNotNull(this.classifiers.get(ownerGroup))
|
||||
const trainingInput = await promiseMap(
|
||||
trainingDataset.trainingData,
|
||||
(d) => {
|
||||
const vector = this.sparseVectorCompressor.binaryToVector(d.vector)
|
||||
const label = d.spamDecision === SpamDecision.BLACKLIST ? 1 : 0
|
||||
const isSpamConfidence = Number(d.confidence)
|
||||
return { vector, label, isSpamConfidence }
|
||||
},
|
||||
{
|
||||
concurrency: 5,
|
||||
},
|
||||
)
|
||||
|
||||
const trainingInputByConfidence = groupByAndMap(
|
||||
trainingInput,
|
||||
({ isSpamConfidence }) => isSpamConfidence,
|
||||
({ vector, label }) => {
|
||||
return { vector, label }
|
||||
},
|
||||
)
|
||||
|
||||
modelToUpdate.isEnabled = false
|
||||
|
||||
try {
|
||||
for (const [isSpamConfidence, tokenizedMails] of tokenizedMailsByConfidence) {
|
||||
const vectors = await this.vectorizer.transform(tokenizedMails.map(({ tokenizedMail }) => tokenizedMail))
|
||||
const xs = tensor2d(vectors, [vectors.length, this.vectorizer.dimension], undefined)
|
||||
const ys = tensor1d(
|
||||
tokenizedMails.map(({ isSpam }) => isSpam),
|
||||
undefined,
|
||||
)
|
||||
for (const [isSpamConfidence, trainingInput] of trainingInputByConfidence) {
|
||||
const vectors = trainingInput.map((input) => input.vector)
|
||||
const labels = trainingInput.map((input) => input.label)
|
||||
|
||||
const xs = tensor2d(vectors, [vectors.length, this.sparseVectorCompressor.dimension], "int32")
|
||||
const ys = tensor1d(labels, "int32")
|
||||
|
||||
// We need a way to put weight on a specific mail, ideal way would be to pass sampleWeight to modelFitArgs,
|
||||
// but is not yet implemented: https://github.com/tensorflow/tfjs/blob/0fc04d958ea592f3b8db79a8b3b497b5c8904097/tfjs-layers/src/engine/training.ts#L1487
|
||||
//
|
||||
// work around:
|
||||
// current: Re fit the vector of mail multiple times corresponding to `isSpamConfidence`
|
||||
// tried approaches:
|
||||
// 1) Increasing value in vectorizer by isSpamConfidence instead of 1
|
||||
// 2) duplicating the emails with higher isSpamConfidence and calling .fit once
|
||||
// For now, we use the following workaround:
|
||||
// Re-fit the vector multiple times corresponding to `isSpamConfidence`
|
||||
const modelFitArgs: ModelFitArgs = {
|
||||
epochs: 8,
|
||||
batchSize: 32,
|
||||
|
|
@ -354,62 +246,51 @@ export class SpamClassifier {
|
|||
// console.log(`Epoch ${epoch + 1} - Loss: ${logs!.loss.toFixed(4)}`)
|
||||
// },
|
||||
// },
|
||||
yieldEvery: 15,
|
||||
}
|
||||
for (let i = 0; i <= isSpamConfidence; i++) {
|
||||
await modelToUpdate.model.fit(xs, ys, modelFitArgs)
|
||||
await modelToUpdate.layersModel.fit(xs, ys, modelFitArgs)
|
||||
}
|
||||
|
||||
// When using the webgl backend we need to manually dispose @tensorflow tensors
|
||||
// when using the webgl backend we need to manually dispose @tensorflow tensors
|
||||
xs.dispose()
|
||||
ys.dispose()
|
||||
}
|
||||
} finally {
|
||||
modelToUpdate.hamCount += trainingDataset.hamCount
|
||||
modelToUpdate.spamCount += trainingDataset.spamCount
|
||||
modelToUpdate.threshold = this.calculateThreshold(modelToUpdate.hamCount, modelToUpdate.spamCount)
|
||||
modelToUpdate.isEnabled = true
|
||||
}
|
||||
|
||||
console.log(`Retraining finished. Took: ${performance.now() - retrainingStart}ms`)
|
||||
const trainingMetadata = `Total Ham: ${modelToUpdate.hamCount} Spam: ${modelToUpdate.spamCount} threshold: ${modelToUpdate.threshold}}`
|
||||
console.log(`retraining spam classification model finished, took: ${performance.now() - retrainingStart}ms ${trainingMetadata}`)
|
||||
await this.saveModel(ownerGroup)
|
||||
await this.cacheStorage.setLastTrainingDataIndexId(trainingDataset.lastTrainingDataIndexId)
|
||||
return true
|
||||
}
|
||||
|
||||
// visibleForTesting
|
||||
public async predict(spamPredMailDatum: SpamPredMailDatum): Promise<Nullable<boolean>> {
|
||||
const classifier = this.classifier.get(spamPredMailDatum.ownerGroup)
|
||||
public async predict(vector: number[], ownerGroup: Id): Promise<Nullable<boolean>> {
|
||||
const classifier = this.classifiers.get(ownerGroup)
|
||||
if (classifier == null || !classifier.isEnabled) {
|
||||
return null
|
||||
}
|
||||
|
||||
const preprocessedMail = this.preprocessMail(spamPredMailDatum)
|
||||
const tokenizedMail = spamClassifierTokenizer(preprocessedMail)
|
||||
const vectors = await assertNotNull(this.vectorizer).transform([tokenizedMail])
|
||||
const vectors = [vector]
|
||||
const xs = tensor2d(vectors, [vectors.length, this.sparseVectorCompressor.dimension], "int32")
|
||||
|
||||
const xs = tensor2d(vectors, [vectors.length, assertNotNull(this.vectorizer).dimension], undefined)
|
||||
const predictionTensor = classifier.model.predict(xs) as Tensor
|
||||
const predictionTensor = classifier.layersModel.predict(xs) as Tensor
|
||||
const predictionData = await predictionTensor.data()
|
||||
const prediction = predictionData[0]
|
||||
|
||||
console.log(`predicted new mail to be with probability ${prediction.toFixed(2)} spam. Owner Group: ${spamPredMailDatum.ownerGroup}`)
|
||||
console.log(`predicted new mail to be with probability ${prediction.toFixed(2)} spam. Owner Group: ${ownerGroup}`)
|
||||
|
||||
// When using the webgl backend we need to manually dispose @tensorflow tensors
|
||||
// when using the webgl backend we need to manually dispose @tensorflow tensors
|
||||
xs.dispose()
|
||||
predictionTensor.dispose()
|
||||
|
||||
return prediction > PREDICTION_THRESHOLD
|
||||
}
|
||||
|
||||
public getSpamClassification(mailId: IdTuple) {
|
||||
return this.offlineStorage.getSpamClassification(mailId)
|
||||
}
|
||||
|
||||
public updateSpamClassification(mailId: IdTuple, isSpam: boolean, isSpamConfidence: number) {
|
||||
return this.offlineStorage.updateSpamClassification(mailId, isSpam, isSpamConfidence)
|
||||
}
|
||||
|
||||
public storeSpamClassification(spamTrainMailDatum: SpamTrainMailDatum) {
|
||||
return this.offlineStorage.storeSpamClassification(spamTrainMailDatum)
|
||||
}
|
||||
|
||||
public deleteSpamClassification(mailId: IdTuple) {
|
||||
return this.offlineStorage.deleteSpamClassification(mailId)
|
||||
return prediction > classifier.threshold
|
||||
}
|
||||
|
||||
// visibleForTesting
|
||||
|
|
@ -451,30 +332,126 @@ export class SpamClassifier {
|
|||
}
|
||||
|
||||
public async saveModel(ownerGroup: Id): Promise<void> {
|
||||
const modelArtifacts = await this.getModelArtifacts(ownerGroup)
|
||||
if (modelArtifacts == null) {
|
||||
throw new Error("Model is not available, and therefore can not be saved")
|
||||
const spamClassificationModel = await this.getSpamClassificationModel(ownerGroup)
|
||||
if (spamClassificationModel == null) {
|
||||
throw new Error("spam classification model is not available, and therefore can not be saved")
|
||||
}
|
||||
await this.cacheStorage.setSpamClassificationModel(spamClassificationModel)
|
||||
}
|
||||
|
||||
async vectorizeAndCompress(mailDatum: SpamMailDatum) {
|
||||
return await this.spamMailProcessor.vectorizeAndCompress(mailDatum)
|
||||
}
|
||||
|
||||
async vectorize(mailDatum: SpamMailDatum) {
|
||||
return await this.spamMailProcessor.vectorize(mailDatum)
|
||||
}
|
||||
|
||||
// visibleForTesting
|
||||
public async loadClassifier(ownerGroup: Id): Promise<Nullable<Classifier>> {
|
||||
const spamClassificationModel = await assertNotNull(this.cacheStorage).getSpamClassificationModel(ownerGroup)
|
||||
if (spamClassificationModel) {
|
||||
const modelTopology = JSON.parse(spamClassificationModel.modelTopology)
|
||||
const weightSpecs = JSON.parse(spamClassificationModel.weightSpecs)
|
||||
const weightData = spamClassificationModel.weightData.buffer.slice(
|
||||
spamClassificationModel.weightData.byteOffset,
|
||||
spamClassificationModel.weightData.byteOffset + spamClassificationModel.weightData.byteLength,
|
||||
)
|
||||
const modelArtifacts = { modelTopology, weightSpecs, weightData }
|
||||
const layersModel = await loadLayersModelFromIOHandler(fromMemory(modelArtifacts), undefined, undefined)
|
||||
layersModel.compile({
|
||||
optimizer: "adam",
|
||||
loss: "binaryCrossentropy",
|
||||
metrics: ["accuracy"],
|
||||
})
|
||||
const threshold = this.calculateThreshold(spamClassificationModel.hamCount, spamClassificationModel.spamCount)
|
||||
return {
|
||||
isEnabled: true,
|
||||
layersModel: layersModel,
|
||||
threshold,
|
||||
hamCount: spamClassificationModel.hamCount,
|
||||
spamCount: spamClassificationModel.spamCount,
|
||||
}
|
||||
} else {
|
||||
console.log("loading the spam classification spamClassificationModel from offline db failed ... ")
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
// visibleForTesting
|
||||
public async cloneClassifier(): Promise<SpamClassifier> {
|
||||
const newClassifier = new SpamClassifier(this.cacheStorage, this.initializer, this.deterministic)
|
||||
newClassifier.spamMailProcessor = this.spamMailProcessor
|
||||
newClassifier.sparseVectorCompressor = this.sparseVectorCompressor
|
||||
for (const [ownerGroup, { layersModel: _, isEnabled, threshold, hamCount, spamCount }] of this.classifiers) {
|
||||
const modelArtifacts = assertNotNull(await this.getModelArtifacts(ownerGroup))
|
||||
const newModel = await loadLayersModelFromIOHandler(fromMemory(modelArtifacts), undefined, undefined)
|
||||
newModel.compile({
|
||||
optimizer: "adam",
|
||||
loss: "binaryCrossentropy",
|
||||
metrics: ["accuracy"],
|
||||
})
|
||||
newClassifier.classifiers.set(ownerGroup, {
|
||||
layersModel: newModel,
|
||||
isEnabled,
|
||||
threshold,
|
||||
hamCount,
|
||||
spamCount,
|
||||
})
|
||||
}
|
||||
|
||||
return newClassifier
|
||||
}
|
||||
|
||||
// visibleForTesting
|
||||
public addSpamClassifierForOwner(ownerGroup: Id, classifier: Classifier) {
|
||||
this.classifiers.set(ownerGroup, classifier)
|
||||
}
|
||||
|
||||
private async trainFromScratch(storage: CacheStorage, ownerGroup: string) {
|
||||
const trainingDataset = await this.initializer.fetchAllTrainingData(ownerGroup)
|
||||
const { trainingData, lastTrainingDataIndexId } = trainingDataset
|
||||
if (isEmpty(trainingData)) {
|
||||
console.log("No training trainingData found. Training from scratch aborted.")
|
||||
return
|
||||
}
|
||||
await this.initialTraining(ownerGroup, trainingDataset)
|
||||
await this.saveModel(ownerGroup)
|
||||
await storage.setLastTrainedFromScratchTime(Date.now())
|
||||
await storage.setLastTrainingDataIndexId(lastTrainingDataIndexId)
|
||||
}
|
||||
|
||||
private async getSpamClassificationModel(ownerGroup: Id): Promise<SpamClassificationModel | null> {
|
||||
const classifier = this.classifiers.get(ownerGroup)
|
||||
if (!classifier) {
|
||||
return null
|
||||
}
|
||||
const modelArtifacts = await this.getModelArtifacts(ownerGroup)
|
||||
if (!modelArtifacts) {
|
||||
return null
|
||||
}
|
||||
const modelTopology = JSON.stringify(modelArtifacts.modelTopology)
|
||||
const weightSpecs = JSON.stringify(modelArtifacts.weightSpecs)
|
||||
const weightData = new Uint8Array(modelArtifacts.weightData as ArrayBuffer)
|
||||
return {
|
||||
modelTopology,
|
||||
weightSpecs,
|
||||
weightData,
|
||||
ownerGroup,
|
||||
hamCount: classifier.hamCount,
|
||||
spamCount: classifier.spamCount,
|
||||
}
|
||||
await assertNotNull(this.offlineStorage).putSpamClassificationModel(modelArtifacts.spamClassificationModel)
|
||||
}
|
||||
|
||||
private async getModelArtifacts(ownerGroup: Id) {
|
||||
const classifier = this.classifier.get(ownerGroup)?.model ?? null
|
||||
if (!classifier) return null
|
||||
const spamClassificationModel = defer<SpamClassificationModel>()
|
||||
const modelArtificats = new Promise<ModelArtifacts>((resolve) => {
|
||||
const classifier = this.classifiers.get(ownerGroup)
|
||||
if (!classifier) {
|
||||
return null
|
||||
}
|
||||
|
||||
return await new Promise<ModelArtifacts>((resolve) => {
|
||||
const saveInfo = withSaveHandler(async (artifacts: any) => {
|
||||
resolve(artifacts)
|
||||
const modelTopology = JSON.stringify(artifacts.modelTopology)
|
||||
const weightSpecs = JSON.stringify(artifacts.weightSpecs)
|
||||
const weightData = new Uint8Array(artifacts.weightData as ArrayBuffer)
|
||||
|
||||
spamClassificationModel.resolve({
|
||||
modelTopology,
|
||||
weightSpecs,
|
||||
weightData,
|
||||
ownerGroup,
|
||||
})
|
||||
return {
|
||||
modelArtifactsInfo: {
|
||||
dateSaved: new Date(),
|
||||
|
|
@ -482,80 +459,7 @@ export class SpamClassifier {
|
|||
},
|
||||
}
|
||||
})
|
||||
classifier.save(saveInfo, undefined)
|
||||
classifier.layersModel.save(saveInfo, undefined)
|
||||
})
|
||||
|
||||
return {
|
||||
modelArtifacts: await modelArtificats,
|
||||
spamClassificationModel: await spamClassificationModel.promise,
|
||||
}
|
||||
}
|
||||
|
||||
// visibleForTesting
|
||||
public async loadModel(ownerGroup: Id): Promise<Nullable<LayersModel>> {
|
||||
const model = await assertNotNull(this.offlineStorage).getSpamClassificationModel(ownerGroup)
|
||||
if (model) {
|
||||
const modelTopology = JSON.parse(model.modelTopology)
|
||||
const weightSpecs = JSON.parse(model.weightSpecs)
|
||||
const weightData = model.weightData.buffer.slice(model.weightData.byteOffset, model.weightData.byteOffset + model.weightData.byteLength)
|
||||
const classifier = await loadLayersModelFromIOHandler(fromMemory(modelTopology, weightSpecs, weightData, undefined), undefined)
|
||||
classifier.compile({
|
||||
optimizer: "adam",
|
||||
loss: "binaryCrossentropy",
|
||||
metrics: ["accuracy"],
|
||||
})
|
||||
return classifier
|
||||
} else {
|
||||
console.error("Loading the model from offline db failed")
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
private concatSubjectAndBody(mail: SpamTrainMailDatum | SpamPredMailDatum) {
|
||||
const subject = mail.subject || ""
|
||||
const body = mail.body || ""
|
||||
const concatenated = `${subject}\n${body}`.trim()
|
||||
|
||||
return concatenated.length > 0 ? concatenated : " "
|
||||
}
|
||||
|
||||
private async retrainModelFromScratch(storage: CacheStorage, ownerGroup: Id, cutoffTimestamp: number) {
|
||||
console.log("Model is being re-trained from scratch, deleting old data")
|
||||
try {
|
||||
await assertNotNull(this.offlineStorage).deleteSpamClassificationTrainingDataBeforeCutoff(cutoffTimestamp, ownerGroup)
|
||||
} catch (e) {
|
||||
console.error("Failed delete old training data: ", e)
|
||||
return
|
||||
}
|
||||
|
||||
await this.trainFromScratch(storage, ownerGroup)
|
||||
}
|
||||
|
||||
// visibleForTesting
|
||||
public async cloneClassifier(): Promise<SpamClassifier> {
|
||||
const newClassifier = new SpamClassifier(
|
||||
this.offlineStorage,
|
||||
this.offlineStorageCache,
|
||||
this.initializer,
|
||||
this.deterministic,
|
||||
this.preprocessConfiguration,
|
||||
)
|
||||
for (const [ownerGroup, { model: _, isEnabled }] of this.classifier) {
|
||||
const { modelArtifacts } = assertNotNull(await this.getModelArtifacts(ownerGroup))
|
||||
const newModel = await loadLayersModelFromIOHandler(fromMemory(modelArtifacts, undefined, undefined, undefined), undefined)
|
||||
newModel.compile({
|
||||
optimizer: "adam",
|
||||
loss: "binaryCrossentropy",
|
||||
metrics: ["accuracy"],
|
||||
})
|
||||
newClassifier.classifier.set(ownerGroup, { model: newModel, isEnabled })
|
||||
}
|
||||
|
||||
return newClassifier
|
||||
}
|
||||
|
||||
// visibleForTesting
|
||||
public addSpamClassifierForOwner(ownerGroup: Id, model: LayersModel, isEnabled: boolean) {
|
||||
this.classifier.set(ownerGroup, { model, isEnabled })
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import { glorotUniform } from "@tensorflow/tfjs-layers/dist/exports_initializers
|
|||
// Core tensor ops
|
||||
import { tensor2d } from "@tensorflow/tfjs-core"
|
||||
import { tensor1d } from "@tensorflow/tfjs-core"
|
||||
import { enableProdMode } from "@tensorflow/tfjs-core"
|
||||
import { stringToHashBucketFast } from "@tensorflow/tfjs-core/dist/ops/string/string_to_hash_bucket_fast"
|
||||
|
||||
// IO handlers
|
||||
|
|
@ -30,4 +31,5 @@ export {
|
|||
withSaveHandler,
|
||||
fromMemory,
|
||||
stringToHashBucketFast,
|
||||
enableProdMode,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ export interface WorkerInterface {
|
|||
readonly bulkMailLoader: BulkMailLoader
|
||||
readonly applicationTypesFacade: ApplicationTypesFacade
|
||||
readonly identityKeyCreator: IdentityKeyCreator
|
||||
readonly spamClassifier: SpamClassifier | null
|
||||
readonly spamClassifier: SpamClassifier
|
||||
readonly autosaveFacade: AutosaveFacade
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -112,9 +112,10 @@ import { PublicKeySignatureFacade } from "../../../common/api/worker/facades/Pub
|
|||
import { AdminKeyLoaderFacade } from "../../../common/api/worker/facades/AdminKeyLoaderFacade"
|
||||
import { IdentityKeyCreator } from "../../../common/api/worker/facades/lazy/IdentityKeyCreator"
|
||||
import { PublicIdentityKeyProvider } from "../../../common/api/worker/facades/PublicIdentityKeyProvider"
|
||||
import type { SpamClassifier } from "../spamClassification/SpamClassifier"
|
||||
import { SpamClassifier } from "../spamClassification/SpamClassifier"
|
||||
import { IdentityKeyTrustDatabase } from "../../../common/api/worker/facades/IdentityKeyTrustDatabase"
|
||||
import { AutosaveFacade } from "../../../common/api/worker/facades/lazy/AutosaveFacade"
|
||||
import { SpamClassificationDataDealer } from "../spamClassification/SpamClassificationDataDealer"
|
||||
|
||||
assertWorkerOrNode()
|
||||
|
||||
|
|
@ -197,7 +198,7 @@ export type WorkerLocatorType = {
|
|||
contactFacade: lazyAsync<ContactFacade>
|
||||
|
||||
//spam classification
|
||||
spamClassifier: SpamClassifier | null
|
||||
spamClassifier: SpamClassifier
|
||||
}
|
||||
export const locator: WorkerLocatorType = {} as any
|
||||
|
||||
|
|
@ -328,14 +329,8 @@ export async function initLocator(worker: WorkerImpl, browserData: BrowserData)
|
|||
if (isOfflineStorageAvailable() && !isAdminClient()) {
|
||||
locator.sqlCipherFacade = new SqlCipherFacadeSendDispatcher(locator.native)
|
||||
offlineStorageProvider = async () => {
|
||||
const { SpamClassifier } = await import("../spamClassification/SpamClassifier")
|
||||
const { SpamClassificationInitializer } = await import("../spamClassification/SpamClassificationInitializer")
|
||||
const offlineStorage = await offlineStorageIndexerPersistence()
|
||||
const spamClassifierInitializer = new SpamClassificationInitializer(locator.cachingEntityClient, offlineStorage, locator.bulkMailLoader)
|
||||
locator.spamClassifier = new SpamClassifier(offlineStorage, locator.cacheStorage, spamClassifierInitializer)
|
||||
|
||||
const { KeyVerificationTableDefinitions } = await import("../../../common/api/worker/facades/IdentityKeyTrustDatabase.js")
|
||||
const { SearchTableDefinitions, SpamClassificationDefinitions } = await import("../index/OfflineStoragePersistence.js")
|
||||
const { SearchTableDefinitions } = await import("../index/OfflineStoragePersistence.js")
|
||||
const { AutosaveDraftsTableDefinitions } = await import("../../../common/api/worker/facades/lazy/OfflineStorageAutosaveFacade.js")
|
||||
|
||||
const customCacheHandler = new CustomCacheHandlerMap(
|
||||
|
|
@ -358,12 +353,11 @@ export async function initLocator(worker: WorkerImpl, browserData: BrowserData)
|
|||
locator.instancePipeline.modelMapper,
|
||||
typeModelResolver,
|
||||
customCacheHandler,
|
||||
Object.assign({}, KeyVerificationTableDefinitions, SearchTableDefinitions, AutosaveDraftsTableDefinitions, SpamClassificationDefinitions),
|
||||
Object.assign({}, KeyVerificationTableDefinitions, SearchTableDefinitions, AutosaveDraftsTableDefinitions),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
offlineStorageProvider = async () => null
|
||||
locator.spamClassifier = null
|
||||
}
|
||||
const ephemeralStorageProvider = async () => {
|
||||
const customCacheHandler = new CustomCacheHandlerMap({
|
||||
|
|
@ -385,19 +379,18 @@ export async function initLocator(worker: WorkerImpl, browserData: BrowserData)
|
|||
const { PdfWriter } = await import("../../../common/api/worker/pdf/PdfWriter.js")
|
||||
return new PdfWriter(new TextEncoder(), undefined)
|
||||
}
|
||||
|
||||
locator.patchMerger = new PatchMerger(locator.cacheStorage, locator.instancePipeline, typeModelResolver, () => locator.crypto)
|
||||
|
||||
// We don't want to cache within the admin client
|
||||
|
||||
let cache: DefaultEntityRestCache | null = null
|
||||
if (!isAdminClient()) {
|
||||
cache = new DefaultEntityRestCache(entityRestClient, maybeUninitializedStorage, typeModelResolver, locator.patchMerger)
|
||||
}
|
||||
|
||||
locator.cache = cache ?? entityRestClient
|
||||
|
||||
locator.cachingEntityClient = new EntityClient(locator.cache, typeModelResolver)
|
||||
const nonCachingEntityClient = new EntityClient(entityRestClient, typeModelResolver)
|
||||
|
||||
locator.cacheManagement = lazyMemoized(async () => {
|
||||
const { CacheManagementFacade } = await import("../../../common/api/worker/facades/lazy/CacheManagementFacade.js")
|
||||
return new CacheManagementFacade(locator.user, locator.cachingEntityClient, assertNotNull(cache))
|
||||
|
|
@ -607,7 +600,7 @@ export async function initLocator(worker: WorkerImpl, browserData: BrowserData)
|
|||
if (!isTest() && sessionType !== SessionType.Temporary && !isAdminClient()) {
|
||||
// index new items in background
|
||||
console.log("initIndexer and SpamClassifier after log in")
|
||||
const indexingDone = fullLoginIndexerInit(worker)
|
||||
fullLoginIndexerInit(worker)
|
||||
}
|
||||
|
||||
return mainInterface.loginListener.onFullLoginSuccess(sessionType, cacheInfo, credentials)
|
||||
|
|
@ -737,6 +730,7 @@ export async function initLocator(worker: WorkerImpl, browserData: BrowserData)
|
|||
locator.user,
|
||||
locator.cachingEntityClient,
|
||||
locator.crypto,
|
||||
locator.cryptoWrapper,
|
||||
locator.serviceExecutor,
|
||||
await locator.blob(),
|
||||
fileApp,
|
||||
|
|
@ -745,6 +739,10 @@ export async function initLocator(worker: WorkerImpl, browserData: BrowserData)
|
|||
locator.publicEncryptionKeyProvider,
|
||||
)
|
||||
})
|
||||
|
||||
const spamClassificationDataDealer = new SpamClassificationDataDealer(locator.cachingEntityClient, locator.bulkMailLoader, locator.mail)
|
||||
locator.spamClassifier = new SpamClassifier(locator.cacheStorage, spamClassificationDataDealer)
|
||||
|
||||
const nativePushFacade = new NativePushFacadeSendDispatcher(worker)
|
||||
locator.calendar = lazyMemoized(async () => {
|
||||
const { CalendarFacade } = await import("../../../common/api/worker/facades/lazy/CalendarFacade.js")
|
||||
|
|
|
|||
|
|
@ -83,6 +83,7 @@ import "./serviceworker/SwTest.js"
|
|||
import "./api/worker/facades/KeyVerificationFacadeTest.js"
|
||||
import "./api/worker/utils/SleepDetectorTest.js"
|
||||
import "./api/worker/utils/spamClassification/HashingVectorizerTest.js"
|
||||
import "./api/worker/utils/spamClassification/SpamClassificationDataDealerTest.js"
|
||||
import "./api/worker/utils/spamClassification/PreprocessPatternsTest.js"
|
||||
import "./calendar/AlarmSchedulerTest.js"
|
||||
import "./calendar/CalendarAgendaViewTest.js"
|
||||
|
|
@ -115,6 +116,7 @@ import "./gui/base/WizardDialogNTest.js"
|
|||
import "./login/LoginViewModelTest.js"
|
||||
import "./login/PostLoginUtilsTest.js"
|
||||
import "./mail/InboxRuleHandlerTest.js"
|
||||
import "./mail/ProcessInboxHandlerTest.js"
|
||||
import "./mail/KnowledgeBaseSearchFilterTest.js"
|
||||
import "./mail/MailModelTest.js"
|
||||
import "./mail/MailUtilsSignatureTest.js"
|
||||
|
|
@ -211,6 +213,7 @@ async function setupSuite({ integration }: { integration?: boolean }) {
|
|||
if (typeof process !== "undefined") {
|
||||
// setup the Entropy for all testcases
|
||||
await random.addEntropy([{ data: 36, entropy: 256, source: "key" }])
|
||||
await import("./api/worker/utils/spamClassification/SparseVectorCompressorTest.js")
|
||||
await import("./api/worker/utils/spamClassification/SpamClassifierTest.js")
|
||||
await import("./api/worker/offline/OfflineStorageMigratorTest.js")
|
||||
await import("./api/worker/offline/OfflineStorageTest.js")
|
||||
|
|
|
|||
|
|
@ -578,7 +578,10 @@ o.spec("CryptoFacadeTest", function () {
|
|||
senderIdentityKeyPair.publicKey,
|
||||
senderKeyVersion,
|
||||
),
|
||||
).thenResolve({ authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED, verificationState: PresentableKeyVerificationState.SECURE })
|
||||
).thenResolve({
|
||||
authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED,
|
||||
verificationState: PresentableKeyVerificationState.SECURE,
|
||||
})
|
||||
|
||||
const sessionKey = neverNull(await crypto.resolveSessionKey(mail))
|
||||
|
||||
|
|
@ -614,7 +617,10 @@ o.spec("CryptoFacadeTest", function () {
|
|||
testData.senderIdentityKeyPair.publicKey,
|
||||
anything(),
|
||||
),
|
||||
).thenResolve({ authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED, verificationState: PresentableKeyVerificationState.SECURE })
|
||||
).thenResolve({
|
||||
authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED,
|
||||
verificationState: PresentableKeyVerificationState.SECURE,
|
||||
})
|
||||
|
||||
await crypto.enforceSessionKeyUpdateIfNeeded(testData.mail, files)
|
||||
verify(ownerEncSessionKeysUpdateQueue.postUpdateSessionKeysService(anything()), { times: 1 })
|
||||
|
|
@ -962,7 +968,10 @@ o.spec("CryptoFacadeTest", function () {
|
|||
testData.senderIdentityKeyPair.publicKey,
|
||||
parseKeyVersion(senderKeyVersion),
|
||||
),
|
||||
).thenResolve({ authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED, verificationState: PresentableKeyVerificationState.SECURE })
|
||||
).thenResolve({
|
||||
authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED,
|
||||
verificationState: PresentableKeyVerificationState.SECURE,
|
||||
})
|
||||
|
||||
const sessionKey: AesKey = neverNull(await crypto.resolveSessionKey(testData.mail))
|
||||
|
||||
|
|
@ -995,7 +1004,10 @@ o.spec("CryptoFacadeTest", function () {
|
|||
testData.senderIdentityKeyPair.publicKey,
|
||||
parseKeyVersion(senderKeyVersion),
|
||||
),
|
||||
).thenResolve({ authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED, verificationState: PresentableKeyVerificationState.SECURE })
|
||||
).thenResolve({
|
||||
authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED,
|
||||
verificationState: PresentableKeyVerificationState.SECURE,
|
||||
})
|
||||
|
||||
const sessionKey: AesKey = neverNull(await crypto.resolveSessionKey(testData.mail))
|
||||
|
||||
|
|
@ -1029,7 +1041,10 @@ o.spec("CryptoFacadeTest", function () {
|
|||
testData.senderIdentityKeyPair.publicKey,
|
||||
parseKeyVersion(senderKeyVersion),
|
||||
),
|
||||
).thenResolve({ authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_FAILED, verificationState: PresentableKeyVerificationState.ALERT })
|
||||
).thenResolve({
|
||||
authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_FAILED,
|
||||
verificationState: PresentableKeyVerificationState.ALERT,
|
||||
})
|
||||
|
||||
const sessionKey = neverNull(await crypto.resolveSessionKey(testData.mail))
|
||||
|
||||
|
|
@ -1258,7 +1273,10 @@ o.spec("CryptoFacadeTest", function () {
|
|||
anything(),
|
||||
anything(),
|
||||
),
|
||||
).thenResolve({ authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED, verificationState: PresentableKeyVerificationState.SECURE })
|
||||
).thenResolve({
|
||||
authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED,
|
||||
verificationState: PresentableKeyVerificationState.SECURE,
|
||||
})
|
||||
|
||||
const sessionKey = neverNull(await crypto.resolveSessionKey(testData.mail))
|
||||
|
||||
|
|
@ -1281,7 +1299,10 @@ o.spec("CryptoFacadeTest", function () {
|
|||
anything(),
|
||||
anything(),
|
||||
),
|
||||
).thenResolve({ authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED, verificationState: PresentableKeyVerificationState.SECURE })
|
||||
).thenResolve({
|
||||
authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED,
|
||||
verificationState: PresentableKeyVerificationState.SECURE,
|
||||
})
|
||||
|
||||
// do not use testdouble here because it's hard to not break the function itself and then verify invocations
|
||||
const decryptAndMapToInstance = (instancePipeline.cryptoMapper.decryptParsedInstance = spy(instancePipeline.cryptoMapper.decryptParsedInstance))
|
||||
|
|
@ -1310,7 +1331,10 @@ o.spec("CryptoFacadeTest", function () {
|
|||
anything(),
|
||||
anything(),
|
||||
),
|
||||
).thenResolve({ authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED, verificationState: PresentableKeyVerificationState.SECURE })
|
||||
).thenResolve({
|
||||
authStatus: EncryptionAuthStatus.TUTACRYPT_AUTHENTICATION_SUCCEEDED,
|
||||
verificationState: PresentableKeyVerificationState.SECURE,
|
||||
})
|
||||
|
||||
const mailSessionKey = neverNull(await crypto.resolveSessionKey(testData.mail))
|
||||
const bucketKey = assertNotNull(testData.mail.bucketKey)
|
||||
|
|
@ -1860,6 +1884,7 @@ o.spec("CryptoFacadeTest", function () {
|
|||
keyVerificationState: null,
|
||||
processingState: ProcessingState.INBOX_RULE_APPLIED,
|
||||
clientSpamClassifierResult: null,
|
||||
processNeeded: false,
|
||||
})
|
||||
|
||||
// casting here is fine, since we just want to mimic server response data
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ import { UnreadMailStateService } from "../../../../../src/common/api/entities/t
|
|||
import { BucketKeyTypeRef, InstanceSessionKey, InstanceSessionKeyTypeRef } from "../../../../../src/common/api/entities/sys/TypeRefs"
|
||||
import { OwnerEncSessionKeyProvider } from "../../../../../src/common/api/worker/rest/EntityRestClient"
|
||||
import { elementIdPart, getElementId } from "../../../../../src/common/api/common/utils/EntityUtils"
|
||||
import { VersionedEncryptedKey } from "../../../../../src/common/api/worker/crypto/CryptoWrapper"
|
||||
import { CryptoWrapper, VersionedEncryptedKey } from "../../../../../src/common/api/worker/crypto/CryptoWrapper"
|
||||
import { Recipient } from "../../../../../src/common/api/common/recipients/Recipient"
|
||||
import { AesKey } from "@tutao/tutanota-crypto"
|
||||
import { RecipientsNotFoundError } from "../../../../../src/common/api/common/error/RecipientsNotFoundError"
|
||||
|
|
@ -52,6 +52,7 @@ o.spec("MailFacade test", function () {
|
|||
let facade: MailFacade
|
||||
let userFacade: UserFacade
|
||||
let cryptoFacade: CryptoFacade
|
||||
let cryptoWrapper: CryptoWrapper
|
||||
let serviceExecutor: IServiceExecutor
|
||||
let entityClient: EntityClient
|
||||
let blobFacade: BlobFacade
|
||||
|
|
@ -67,6 +68,7 @@ o.spec("MailFacade test", function () {
|
|||
blobFacade = object()
|
||||
entityClient = object()
|
||||
cryptoFacade = object()
|
||||
cryptoWrapper = object()
|
||||
serviceExecutor = object()
|
||||
fileApp = object()
|
||||
loginFacade = object()
|
||||
|
|
@ -76,6 +78,7 @@ o.spec("MailFacade test", function () {
|
|||
userFacade,
|
||||
entityClient,
|
||||
cryptoFacade,
|
||||
cryptoWrapper,
|
||||
serviceExecutor,
|
||||
blobFacade,
|
||||
fileApp,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import o from "@tutao/otest"
|
||||
import { HashingVectorizer } from "../../../../../../src/mail-app/workerUtils/spamClassification/HashingVectorizer"
|
||||
import { arrayEquals } from "@tutao/tutanota-utils"
|
||||
import { spamClassifierTokenizer } from "../../../../../../src/mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
import { spamClassifierTokenizer } from "../../../../../../src/common/api/common/utils/spamClassificationUtils/SpamMailProcessor"
|
||||
|
||||
o.spec("HashingVectorizer", () => {
|
||||
const rawDocuments = [
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ import {
|
|||
ML_SPECIAL_CHARACTER_TOKEN,
|
||||
ML_URL_REGEX,
|
||||
ML_URL_TOKEN,
|
||||
} from "../../../../../../src/mail-app/workerUtils/spamClassification/PreprocessPatterns"
|
||||
} from "../../../../../../src/common/api/common/utils/spamClassificationUtils/PreprocessPatterns"
|
||||
import { isMailAddress } from "../../../../../../src/common/misc/FormatValidator"
|
||||
|
||||
o.spec("PreprocessPatterns", () => {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,374 @@
|
|||
import o from "@tutao/otest"
|
||||
import {
|
||||
SINGLE_TRAIN_INTERVAL_TRAINING_DATA_LIMIT,
|
||||
SpamClassificationDataDealer,
|
||||
UnencryptedPopulateClientSpamTrainingDatum,
|
||||
} from "../../../../../../src/mail-app/workerUtils/spamClassification/SpamClassificationDataDealer"
|
||||
import {
|
||||
ClientSpamTrainingDatum,
|
||||
ClientSpamTrainingDatumIndexEntryTypeRef,
|
||||
ClientSpamTrainingDatumTypeRef,
|
||||
MailBagTypeRef,
|
||||
MailBox,
|
||||
MailboxGroupRoot,
|
||||
MailboxGroupRootTypeRef,
|
||||
MailBoxTypeRef,
|
||||
MailDetails,
|
||||
MailDetailsTypeRef,
|
||||
MailFolderRefTypeRef,
|
||||
MailFolderTypeRef,
|
||||
MailTypeRef,
|
||||
} from "../../../../../../src/common/api/entities/tutanota/TypeRefs"
|
||||
import { MailSetKind, SpamDecision } from "../../../../../../src/common/api/common/TutanotaConstants"
|
||||
import { matchers, object, verify, when } from "testdouble"
|
||||
import { EntityClient } from "../../../../../../src/common/api/common/EntityClient"
|
||||
import { BulkMailLoader } from "../../../../../../src/mail-app/workerUtils/index/BulkMailLoader"
|
||||
import { MailFacade } from "../../../../../../src/common/api/worker/facades/lazy/MailFacade"
|
||||
import { createTestEntity } from "../../../../TestUtils"
|
||||
import { GENERATED_MIN_ID, getElementId, isSameId } from "../../../../../../src/common/api/common/utils/EntityUtils"
|
||||
import { DEFAULT_IS_SPAM_CONFIDENCE } from "../../../../../../src/common/api/common/utils/spamClassificationUtils/SpamMailProcessor"
|
||||
import { last } from "@tutao/tutanota-utils"
|
||||
|
||||
const { anything } = matchers
|
||||
|
||||
function createMailByFolderAndReceivedDate(mailId: IdTuple, mailSet: IdTuple, receivedDate: Date, mailDetailsId: Id) {
|
||||
return createTestEntity(MailTypeRef, {
|
||||
_id: mailId,
|
||||
sets: [mailSet],
|
||||
receivedDate: receivedDate,
|
||||
mailDetails: ["detailsListId", mailDetailsId],
|
||||
})
|
||||
}
|
||||
|
||||
function createSpamTrainingDatumByConfidenceAndDecision(confidence: string, spamDecision: SpamDecision): ClientSpamTrainingDatum {
|
||||
return createTestEntity(ClientSpamTrainingDatumTypeRef, {
|
||||
_ownerGroup: "group",
|
||||
confidence,
|
||||
spamDecision,
|
||||
vector: new Uint8Array(),
|
||||
})
|
||||
}
|
||||
|
||||
function createClientSpamTrainingDatumIndexEntryByClientSpamTrainingDatumElementId(clientSpamTrainingDatumElementId: Id) {
|
||||
return createTestEntity(ClientSpamTrainingDatumIndexEntryTypeRef, { clientSpamTrainingDatumElementId })
|
||||
}
|
||||
|
||||
o.spec("SpamClassificationDataDealer", () => {
|
||||
const entityClientMock = object<EntityClient>()
|
||||
const bulkMailLoaderMock = object<BulkMailLoader>()
|
||||
const mailFacadeMock = object<MailFacade>()
|
||||
let mailDetails: MailDetails
|
||||
let spamClassificationDataDealer: SpamClassificationDataDealer
|
||||
let mailboxGroupRoot: MailboxGroupRoot
|
||||
let mailBox: MailBox
|
||||
|
||||
const inboxFolder = createTestEntity(MailFolderTypeRef, {
|
||||
_id: ["folderListId", "inbox"],
|
||||
_ownerGroup: "owner",
|
||||
folderType: MailSetKind.INBOX,
|
||||
})
|
||||
const trashFolder = createTestEntity(MailFolderTypeRef, {
|
||||
_id: ["folderListId", "trash"],
|
||||
_ownerGroup: "owner",
|
||||
folderType: MailSetKind.TRASH,
|
||||
})
|
||||
const spamFolder = createTestEntity(MailFolderTypeRef, {
|
||||
_id: ["folderListId", "spam"],
|
||||
_ownerGroup: "owner",
|
||||
folderType: MailSetKind.SPAM,
|
||||
})
|
||||
|
||||
o.beforeEach(function () {
|
||||
mailboxGroupRoot = createTestEntity(MailboxGroupRootTypeRef, {
|
||||
_ownerGroup: "owner",
|
||||
mailbox: "mailbox",
|
||||
})
|
||||
mailBox = createTestEntity(MailBoxTypeRef, {
|
||||
_id: "mailbox",
|
||||
_ownerGroup: "owner",
|
||||
folders: createTestEntity(MailFolderRefTypeRef, { folders: "folderListId" }),
|
||||
currentMailBag: createTestEntity(MailBagTypeRef, { mails: "mailListId" }),
|
||||
archivedMailBags: [createTestEntity(MailBagTypeRef, { mails: "oldMailListId" })],
|
||||
clientSpamTrainingData: "clientSpamTrainingData",
|
||||
modifiedClientSpamTrainingDataIndex: "modifiedClientSpamTrainingDataIndex",
|
||||
})
|
||||
mailDetails = createTestEntity(MailDetailsTypeRef, { _id: "mailDetail" })
|
||||
when(mailFacadeMock.vectorizeAndCompressMails(anything())).thenResolve(new Uint8Array(1))
|
||||
spamClassificationDataDealer = new SpamClassificationDataDealer(
|
||||
entityClientMock,
|
||||
() => Promise.resolve(bulkMailLoaderMock),
|
||||
() => Promise.resolve(mailFacadeMock),
|
||||
)
|
||||
})
|
||||
|
||||
o.spec("subsampleHamAndSpamMails", () => {
|
||||
o("does not subsample if ratio is balanced", () => {
|
||||
const data = [
|
||||
createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.WHITELIST),
|
||||
createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.BLACKLIST),
|
||||
]
|
||||
const { subsampledTrainingData, hamCount, spamCount } = spamClassificationDataDealer.subsampleHamAndSpamMails(data)
|
||||
o(subsampledTrainingData.length).equals(2)
|
||||
o(hamCount).equals(1)
|
||||
o(spamCount).equals(1)
|
||||
})
|
||||
|
||||
o("limits ham when ratio > MAX_RATIO", () => {
|
||||
const hamData = Array.from({ length: 50 }, () => createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.WHITELIST))
|
||||
const spamData = Array.from({ length: 1 }, () => createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.BLACKLIST))
|
||||
const { subsampledTrainingData, hamCount, spamCount } = spamClassificationDataDealer.subsampleHamAndSpamMails([...hamData, ...spamData])
|
||||
o(hamCount).equals(10)
|
||||
o(spamCount).equals(1)
|
||||
o(subsampledTrainingData.length).equals(11)
|
||||
})
|
||||
|
||||
o("limits spam when ratio < MIN_RATIO", () => {
|
||||
const hamData = Array.from({ length: 1 }, () => createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.WHITELIST))
|
||||
const spamData = Array.from({ length: 50 }, () =>
|
||||
createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.BLACKLIST),
|
||||
)
|
||||
|
||||
const { subsampledTrainingData, hamCount, spamCount } = spamClassificationDataDealer.subsampleHamAndSpamMails([...hamData, ...spamData])
|
||||
o(hamCount).equals(1)
|
||||
o(spamCount).equals(10)
|
||||
o(subsampledTrainingData.length).equals(11)
|
||||
})
|
||||
})
|
||||
|
||||
o.spec("fetchAllTrainingData", () => {
|
||||
o("returns empty training data when index or training data is null", async () => {
|
||||
mailBox.clientSpamTrainingData = null
|
||||
mailBox.modifiedClientSpamTrainingDataIndex = null
|
||||
when(entityClientMock.load(MailboxGroupRootTypeRef, "owner")).thenResolve(mailboxGroupRoot)
|
||||
when(entityClientMock.load(MailBoxTypeRef, "mailbox")).thenResolve(mailBox)
|
||||
|
||||
const trainingDataset = await spamClassificationDataDealer.fetchAllTrainingData("owner")
|
||||
|
||||
o(trainingDataset.trainingData.length).equals(0)
|
||||
o(trainingDataset.hamCount).equals(0)
|
||||
o(trainingDataset.spamCount).equals(0)
|
||||
o(trainingDataset.lastTrainingDataIndexId).equals(GENERATED_MIN_ID)
|
||||
})
|
||||
|
||||
o("uploads training data when clientSpamTrainingData is empty", async () => {
|
||||
when(entityClientMock.load(MailboxGroupRootTypeRef, "owner")).thenResolve(mailboxGroupRoot)
|
||||
when(entityClientMock.load(MailBoxTypeRef, "mailbox")).thenResolve(mailBox)
|
||||
const spamTrainingData = Array.from({ length: 10 }, () =>
|
||||
createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.WHITELIST),
|
||||
).concat(Array.from({ length: 10 }, () => createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.BLACKLIST)))
|
||||
const mails = Array.from({ length: 10 }, () =>
|
||||
createMailByFolderAndReceivedDate([mailBox.currentMailBag!.mails, "inboxMailId"], inboxFolder._id, new Date(), mailDetails._id),
|
||||
).concat(
|
||||
Array.from({ length: 10 }, () =>
|
||||
createMailByFolderAndReceivedDate([mailBox.currentMailBag!.mails, "spamMailId"], spamFolder._id, new Date(), mailDetails._id),
|
||||
),
|
||||
)
|
||||
const modifiedIndicesSinceStart = spamTrainingData.map((data) =>
|
||||
createClientSpamTrainingDatumIndexEntryByClientSpamTrainingDatumElementId(getElementId(data)),
|
||||
)
|
||||
when(entityClientMock.loadAll(ClientSpamTrainingDatumTypeRef, mailBox.clientSpamTrainingData!)).thenResolve([], spamTrainingData)
|
||||
when(entityClientMock.loadAll(MailTypeRef, mailBox.currentMailBag!.mails, anything())).thenResolve(mails)
|
||||
when(entityClientMock.loadAll(MailTypeRef, mailBox.archivedMailBags[0].mails, anything())).thenResolve([])
|
||||
when(entityClientMock.loadAll(MailFolderTypeRef, mailBox.folders!.folders)).thenResolve([inboxFolder, spamFolder, trashFolder])
|
||||
when(entityClientMock.loadAll(ClientSpamTrainingDatumIndexEntryTypeRef, mailBox.modifiedClientSpamTrainingDataIndex!)).thenResolve(
|
||||
modifiedIndicesSinceStart,
|
||||
)
|
||||
|
||||
when(bulkMailLoaderMock.loadMailDetails(mails)).thenResolve(
|
||||
mails.map((mail) => {
|
||||
return { mail, mailDetails }
|
||||
}),
|
||||
)
|
||||
|
||||
const trainingDataset = await spamClassificationDataDealer.fetchAllTrainingData("owner")
|
||||
|
||||
// first load: empty, second load: fetch uploaded data
|
||||
verify(entityClientMock.loadAll(ClientSpamTrainingDatumTypeRef, mailBox.clientSpamTrainingData!), { times: 2 })
|
||||
verify(entityClientMock.loadAll(ClientSpamTrainingDatumIndexEntryTypeRef, mailBox.modifiedClientSpamTrainingDataIndex!), { times: 1 })
|
||||
const unencryptedPayload = mails.map((mail) => {
|
||||
return {
|
||||
mailId: mail._id,
|
||||
isSpam: isSameId(mail.sets[0], spamFolder._id),
|
||||
confidence: DEFAULT_IS_SPAM_CONFIDENCE,
|
||||
vector: new Uint8Array(1),
|
||||
} as UnencryptedPopulateClientSpamTrainingDatum
|
||||
})
|
||||
verify(mailFacadeMock.populateClientSpamTrainingData("owner", unencryptedPayload), { times: 1 })
|
||||
|
||||
o(trainingDataset).deepEquals({
|
||||
trainingData: spamTrainingData,
|
||||
lastTrainingDataIndexId: getElementId(last(modifiedIndicesSinceStart)!),
|
||||
hamCount: 10,
|
||||
spamCount: 10,
|
||||
})
|
||||
})
|
||||
|
||||
o("successfully returns training data with mixed ham/spam data", async () => {
|
||||
when(entityClientMock.load(MailboxGroupRootTypeRef, "owner")).thenResolve(mailboxGroupRoot)
|
||||
when(entityClientMock.load(MailBoxTypeRef, "mailbox")).thenResolve(mailBox)
|
||||
const spamTrainingData = Array.from({ length: 10 }, () =>
|
||||
createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.WHITELIST),
|
||||
).concat(Array.from({ length: 10 }, () => createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.BLACKLIST)))
|
||||
|
||||
const modifiedIndicesSinceStart = spamTrainingData.map((data) =>
|
||||
createClientSpamTrainingDatumIndexEntryByClientSpamTrainingDatumElementId(getElementId(data)),
|
||||
)
|
||||
when(entityClientMock.loadAll(ClientSpamTrainingDatumTypeRef, mailBox.clientSpamTrainingData!)).thenResolve(spamTrainingData)
|
||||
when(entityClientMock.loadAll(MailTypeRef, mailBox.archivedMailBags[0].mails, anything())).thenResolve([])
|
||||
when(entityClientMock.loadAll(MailFolderTypeRef, mailBox.folders!.folders)).thenResolve([inboxFolder, spamFolder, trashFolder])
|
||||
when(entityClientMock.loadAll(ClientSpamTrainingDatumIndexEntryTypeRef, mailBox.modifiedClientSpamTrainingDataIndex!)).thenResolve(
|
||||
modifiedIndicesSinceStart,
|
||||
)
|
||||
|
||||
const trainingDataset = await spamClassificationDataDealer.fetchAllTrainingData("owner")
|
||||
|
||||
// only one load as the list is already populated
|
||||
verify(entityClientMock.loadAll(ClientSpamTrainingDatumTypeRef, mailBox.clientSpamTrainingData!), { times: 1 })
|
||||
verify(entityClientMock.loadAll(ClientSpamTrainingDatumIndexEntryTypeRef, mailBox.modifiedClientSpamTrainingDataIndex!), { times: 1 })
|
||||
|
||||
o(trainingDataset).deepEquals({
|
||||
trainingData: spamTrainingData,
|
||||
lastTrainingDataIndexId: getElementId(last(modifiedIndicesSinceStart)!),
|
||||
hamCount: 10,
|
||||
spamCount: 10,
|
||||
})
|
||||
})
|
||||
|
||||
o("filters out training data with confidence=0 or spamDecision NONE", async () => {
|
||||
const noneDecisionData = createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.NONE)
|
||||
const zeroConfData = createSpamTrainingDatumByConfidenceAndDecision("0", SpamDecision.WHITELIST)
|
||||
const validHamData = createSpamTrainingDatumByConfidenceAndDecision("1", SpamDecision.WHITELIST)
|
||||
const validSpamData = createSpamTrainingDatumByConfidenceAndDecision("4", SpamDecision.BLACKLIST)
|
||||
when(entityClientMock.load(MailboxGroupRootTypeRef, "owner")).thenResolve(mailboxGroupRoot)
|
||||
when(entityClientMock.load(MailBoxTypeRef, "mailbox")).thenResolve(mailBox)
|
||||
|
||||
const spamTrainingData = [noneDecisionData, zeroConfData, validSpamData, validHamData]
|
||||
const modifiedIndicesSinceStart = spamTrainingData.map((data) =>
|
||||
createClientSpamTrainingDatumIndexEntryByClientSpamTrainingDatumElementId(getElementId(data)),
|
||||
)
|
||||
when(entityClientMock.loadAll(ClientSpamTrainingDatumTypeRef, mailBox.clientSpamTrainingData!)).thenResolve(spamTrainingData)
|
||||
|
||||
when(entityClientMock.loadAll(ClientSpamTrainingDatumIndexEntryTypeRef, mailBox.modifiedClientSpamTrainingDataIndex!)).thenResolve(
|
||||
modifiedIndicesSinceStart,
|
||||
)
|
||||
|
||||
when(entityClientMock.loadAll(MailFolderTypeRef, mailBox.folders!.folders)).thenResolve([inboxFolder, spamFolder, trashFolder])
|
||||
|
||||
const result = await spamClassificationDataDealer.fetchAllTrainingData("owner")
|
||||
|
||||
o(result.trainingData.length).equals(2)
|
||||
o(result.spamCount).equals(1)
|
||||
o(result.hamCount).equals(1)
|
||||
o(new Set(result.trainingData)).deepEquals(new Set([validSpamData, validHamData]))
|
||||
})
|
||||
})
|
||||
|
||||
o.spec("fetchPartialTrainingDataFromIndexStartId", () => {
|
||||
o("returns empty training data when index or training data is null", async () => {
|
||||
mailBox.clientSpamTrainingData = null
|
||||
mailBox.modifiedClientSpamTrainingDataIndex = null
|
||||
when(entityClientMock.load(MailboxGroupRootTypeRef, "owner")).thenResolve(mailboxGroupRoot)
|
||||
when(entityClientMock.load(MailBoxTypeRef, "mailbox")).thenResolve(mailBox)
|
||||
|
||||
const trainingDataset = await spamClassificationDataDealer.fetchPartialTrainingDataFromIndexStartId("startId", "owner")
|
||||
|
||||
o(trainingDataset.trainingData.length).equals(0)
|
||||
o(trainingDataset.hamCount).equals(0)
|
||||
o(trainingDataset.spamCount).equals(0)
|
||||
o(trainingDataset.lastTrainingDataIndexId).equals("startId")
|
||||
})
|
||||
|
||||
o("returns empty training data when modifiedClientSpamTrainingDataIndicesSinceStart are null", async () => {
|
||||
when(entityClientMock.load(MailboxGroupRootTypeRef, "owner")).thenResolve(mailboxGroupRoot)
|
||||
when(entityClientMock.load(MailBoxTypeRef, "mailbox")).thenResolve(mailBox)
|
||||
when(
|
||||
entityClientMock.loadRange(
|
||||
ClientSpamTrainingDatumIndexEntryTypeRef,
|
||||
mailBox.modifiedClientSpamTrainingDataIndex!,
|
||||
"startId",
|
||||
SINGLE_TRAIN_INTERVAL_TRAINING_DATA_LIMIT,
|
||||
false,
|
||||
),
|
||||
).thenResolve([])
|
||||
|
||||
const trainingDataset = await spamClassificationDataDealer.fetchPartialTrainingDataFromIndexStartId("startId", "owner")
|
||||
|
||||
o(trainingDataset.trainingData.length).equals(0)
|
||||
o(trainingDataset.hamCount).equals(0)
|
||||
o(trainingDataset.spamCount).equals(0)
|
||||
o(trainingDataset.lastTrainingDataIndexId).equals("startId")
|
||||
})
|
||||
|
||||
o("returns new training data when index or training data is there", async () => {
|
||||
when(entityClientMock.load(MailboxGroupRootTypeRef, "owner")).thenResolve(mailboxGroupRoot)
|
||||
when(entityClientMock.load(MailBoxTypeRef, "mailbox")).thenResolve(mailBox)
|
||||
|
||||
const oldSpamTrainingData = Array.from({ length: 50 }, () =>
|
||||
createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.WHITELIST),
|
||||
).concat(Array.from({ length: 50 }, () => createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.BLACKLIST)))
|
||||
|
||||
oldSpamTrainingData.map((data) => (data._id = [mailBox.clientSpamTrainingData!, GENERATED_MIN_ID]))
|
||||
|
||||
const newSpamTrainingData = Array.from({ length: 10 }, () =>
|
||||
createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.WHITELIST),
|
||||
).concat(Array.from({ length: 10 }, () => createSpamTrainingDatumByConfidenceAndDecision(DEFAULT_IS_SPAM_CONFIDENCE, SpamDecision.BLACKLIST)))
|
||||
|
||||
newSpamTrainingData.map((data) => (data._id = [mailBox.clientSpamTrainingData!, GENERATED_MIN_ID]))
|
||||
|
||||
const modifiedIndicesSinceStart = newSpamTrainingData.map((data) =>
|
||||
createClientSpamTrainingDatumIndexEntryByClientSpamTrainingDatumElementId(getElementId(data)),
|
||||
)
|
||||
|
||||
when(
|
||||
entityClientMock.loadRange(
|
||||
ClientSpamTrainingDatumIndexEntryTypeRef,
|
||||
mailBox.modifiedClientSpamTrainingDataIndex!,
|
||||
"startId",
|
||||
anything(),
|
||||
false,
|
||||
),
|
||||
).thenResolve(modifiedIndicesSinceStart)
|
||||
|
||||
when(
|
||||
entityClientMock.loadMultiple(
|
||||
ClientSpamTrainingDatumTypeRef,
|
||||
mailBox.clientSpamTrainingData,
|
||||
modifiedIndicesSinceStart.map((index) => index.clientSpamTrainingDatumElementId),
|
||||
),
|
||||
).thenResolve(newSpamTrainingData)
|
||||
|
||||
const trainingDataset = await spamClassificationDataDealer.fetchPartialTrainingDataFromIndexStartId("startId", "owner")
|
||||
|
||||
o(trainingDataset.trainingData.length).equals(20)
|
||||
o(trainingDataset.hamCount).equals(10)
|
||||
o(trainingDataset.spamCount).equals(10)
|
||||
o(trainingDataset.lastTrainingDataIndexId).equals(getElementId(last(modifiedIndicesSinceStart)!))
|
||||
})
|
||||
})
|
||||
|
||||
o.spec("fetchMailsByMailbagAfterDate", () => {
|
||||
o("correctly filters mails with received date greater than start date", async () => {
|
||||
const startDate = new Date(2020, 11, 30)
|
||||
const dayBeforeStart = new Date(2020, 11, 29)
|
||||
const recentMails = Array.from({ length: 10 }, () =>
|
||||
createMailByFolderAndReceivedDate([mailBox.currentMailBag!.mails, "inboxMailId"], inboxFolder._id, new Date(2025, 11, 17), mailDetails._id),
|
||||
)
|
||||
const oldMails = Array.from({ length: 10 }, () =>
|
||||
createMailByFolderAndReceivedDate([mailBox.currentMailBag!.mails, "inboxMailId"], inboxFolder._id, dayBeforeStart, mailDetails._id),
|
||||
)
|
||||
const mails = recentMails.concat(oldMails)
|
||||
when(entityClientMock.loadAll(MailTypeRef, mailBox.currentMailBag!.mails, anything())).thenResolve(mails)
|
||||
when(bulkMailLoaderMock.loadMailDetails(recentMails)).thenResolve(
|
||||
recentMails.map((mail) => {
|
||||
return { mail, mailDetails }
|
||||
}),
|
||||
)
|
||||
const result = await spamClassificationDataDealer.fetchMailsByMailbagAfterDate(
|
||||
mailBox.currentMailBag!,
|
||||
[inboxFolder, spamFolder, trashFolder],
|
||||
startDate,
|
||||
)
|
||||
o(result.length).equals(10)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
@ -1,36 +1,41 @@
|
|||
import o from "@tutao/otest"
|
||||
import fs from "node:fs"
|
||||
import { parseCsv } from "../../../../../../src/common/misc/parsing/CsvParser"
|
||||
import {
|
||||
DEFAULT_PREPROCESS_CONFIGURATION,
|
||||
SpamClassifier,
|
||||
SpamTrainMailDatum,
|
||||
} from "../../../../../../src/mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
import { OfflineStoragePersistence } from "../../../../../../src/mail-app/workerUtils/index/OfflineStoragePersistence"
|
||||
import { Classifier, DEFAULT_PREDICTION_THRESHOLD, SpamClassifier } from "../../../../../../src/mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
import { matchers, object, when } from "testdouble"
|
||||
import { assertNotNull, promiseMap } from "@tutao/tutanota-utils"
|
||||
import { SpamClassificationInitializer } from "../../../../../../src/mail-app/workerUtils/spamClassification/SpamClassificationInitializer"
|
||||
import { assertNotNull } from "@tutao/tutanota-utils"
|
||||
import { SpamClassificationDataDealer, TrainingDataset } from "../../../../../../src/mail-app/workerUtils/spamClassification/SpamClassificationDataDealer"
|
||||
import { CacheStorage } from "../../../../../../src/common/api/worker/rest/DefaultEntityRestCache"
|
||||
import { mockAttribute } from "@tutao/tutanota-test-utils"
|
||||
import "@tensorflow/tfjs-backend-cpu"
|
||||
import { HashingVectorizer } from "../../../../../../src/mail-app/workerUtils/spamClassification/HashingVectorizer"
|
||||
import { LayersModel, tensor1d } from "../../../../../../src/mail-app/workerUtils/spamClassification/tensorflow-custom"
|
||||
import { createTestEntity } from "../../../../TestUtils"
|
||||
import { MailTypeRef } from "../../../../../../src/common/api/entities/tutanota/TypeRefs"
|
||||
import { ClientSpamTrainingDatum, ClientSpamTrainingDatumTypeRef, MailTypeRef } from "../../../../../../src/common/api/entities/tutanota/TypeRefs"
|
||||
import { Sequential } from "@tensorflow/tfjs-layers"
|
||||
import { SparseVectorCompressor } from "../../../../../../src/common/api/common/utils/spamClassificationUtils/SparseVectorCompressor"
|
||||
import {
|
||||
DEFAULT_IS_SPAM_CONFIDENCE,
|
||||
DEFAULT_PREPROCESS_CONFIGURATION,
|
||||
SpamMailDatum,
|
||||
SpamMailProcessor,
|
||||
} from "../../../../../../src/common/api/common/utils/spamClassificationUtils/SpamMailProcessor"
|
||||
import { SpamDecision } from "../../../../../../src/common/api/common/TutanotaConstants"
|
||||
import { GENERATED_MIN_ID } from "../../../../../../src/common/api/common/utils/EntityUtils"
|
||||
|
||||
const { anything } = matchers
|
||||
export const DATASET_FILE_PATH: string = "./tests/api/worker/utils/spamClassification/spam_classification_test_mails.csv"
|
||||
const TEST_OWNER_GROUP = "owner"
|
||||
|
||||
export async function readMailDataFromCSV(filePath: string): Promise<{
|
||||
spamData: SpamTrainMailDatum[]
|
||||
hamData: SpamTrainMailDatum[]
|
||||
spamData: SpamMailDatum[]
|
||||
hamData: SpamMailDatum[]
|
||||
}> {
|
||||
const file = await fs.promises.readFile(filePath)
|
||||
const csv = parseCsv(file.toString())
|
||||
|
||||
let spamData: SpamTrainMailDatum[] = []
|
||||
let hamData: SpamTrainMailDatum[] = []
|
||||
let spamData: SpamMailDatum[] = []
|
||||
let hamData: SpamMailDatum[] = []
|
||||
for (const row of csv.rows.slice(1, csv.rows.length - 1)) {
|
||||
const subject = row[8]
|
||||
const body = row[10]
|
||||
|
|
@ -43,57 +48,77 @@ export async function readMailDataFromCSV(filePath: string): Promise<{
|
|||
|
||||
let isSpam = label === "spam" ? true : label === "ham" ? false : null
|
||||
isSpam = assertNotNull(isSpam, "Unknown label detected: " + label)
|
||||
const targetData = isSpam ? spamData : hamData
|
||||
targetData.push({
|
||||
mailId: ["mailListId", "mailElementId"],
|
||||
const spamMailDatum = {
|
||||
subject,
|
||||
body,
|
||||
isSpam,
|
||||
isSpamConfidence: 1,
|
||||
ownerGroup: "owner",
|
||||
ownerGroup: TEST_OWNER_GROUP,
|
||||
sender: from,
|
||||
toRecipients: to,
|
||||
ccRecipients: cc,
|
||||
bccRecipients: bcc,
|
||||
authStatus: authStatus,
|
||||
} as SpamTrainMailDatum)
|
||||
} as SpamMailDatum
|
||||
|
||||
const targetData = isSpam ? spamData : hamData
|
||||
targetData.push(spamMailDatum)
|
||||
}
|
||||
|
||||
return { spamData, hamData }
|
||||
}
|
||||
|
||||
async function convertToClientTrainingDatum(spamData: SpamMailDatum[], spamProcessor: SpamMailProcessor, isSpam: boolean): Promise<ClientSpamTrainingDatum[]> {
|
||||
let result: ClientSpamTrainingDatum[] = []
|
||||
for (const spamDatum of spamData) {
|
||||
const clientSpamTrainingDatum = createTestEntity(ClientSpamTrainingDatumTypeRef, {
|
||||
confidence: DEFAULT_IS_SPAM_CONFIDENCE.toString(),
|
||||
spamDecision: isSpam ? SpamDecision.BLACKLIST : SpamDecision.WHITELIST,
|
||||
vector: await spamProcessor.vectorizeAndCompress(spamDatum),
|
||||
})
|
||||
|
||||
result.push(clientSpamTrainingDatum)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
function getTrainingDataset(trainSet: ClientSpamTrainingDatum[]) {
|
||||
return {
|
||||
trainingData: trainSet,
|
||||
hamCount: trainSet.filter((item) => item.spamDecision === SpamDecision.WHITELIST).length,
|
||||
spamCount: trainSet.filter((item) => item.spamDecision === SpamDecision.BLACKLIST).length,
|
||||
lastTrainingDataIndexId: GENERATED_MIN_ID,
|
||||
}
|
||||
}
|
||||
|
||||
// Initial training (cutoff by day or amount)
|
||||
o.spec("SpamClassifierTest", () => {
|
||||
const mockOfflineStorageCache = object<CacheStorage>()
|
||||
const mockOfflineStorage = object<OfflineStoragePersistence>()
|
||||
const mockSpamClassificationInitializer = object<SpamClassificationInitializer>()
|
||||
let nonEfficientSmallVectorizer: HashingVectorizer
|
||||
const mockCacheStorage = object<CacheStorage>()
|
||||
const mockSpamClassificationDataDealer = object<SpamClassificationDataDealer>()
|
||||
let spamClassifier: SpamClassifier
|
||||
let spamProcessor: SpamMailProcessor
|
||||
let compressor: SparseVectorCompressor
|
||||
|
||||
let spamData: SpamTrainMailDatum[]
|
||||
let hamData: SpamTrainMailDatum[]
|
||||
let dataSlice: SpamTrainMailDatum[]
|
||||
let spamData: ClientSpamTrainingDatum[]
|
||||
let hamData: ClientSpamTrainingDatum[]
|
||||
let dataSlice: ClientSpamTrainingDatum[]
|
||||
|
||||
o.beforeEach(async () => {
|
||||
const spamHamData = await readMailDataFromCSV(DATASET_FILE_PATH)
|
||||
spamData = spamHamData.spamData
|
||||
hamData = spamHamData.hamData
|
||||
|
||||
mockSpamClassificationDataDealer.fetchAllTrainingData = async () => {
|
||||
return getTrainingDataset(dataSlice)
|
||||
}
|
||||
const vectorLength = 512
|
||||
|
||||
compressor = new SparseVectorCompressor(vectorLength)
|
||||
spamProcessor = new SpamMailProcessor(DEFAULT_PREPROCESS_CONFIGURATION, new HashingVectorizer(vectorLength), compressor)
|
||||
spamClassifier = new SpamClassifier(mockCacheStorage, mockSpamClassificationDataDealer, true)
|
||||
spamClassifier.spamMailProcessor = spamProcessor
|
||||
spamClassifier.sparseVectorCompressor = compressor
|
||||
spamData = await convertToClientTrainingDatum(spamHamData.spamData, spamProcessor, true)
|
||||
hamData = await convertToClientTrainingDatum(spamHamData.hamData, spamProcessor, false)
|
||||
dataSlice = spamData.concat(hamData)
|
||||
seededShuffle(dataSlice, 42)
|
||||
|
||||
mockSpamClassificationInitializer.init = async () => {
|
||||
return dataSlice
|
||||
}
|
||||
|
||||
nonEfficientSmallVectorizer = new HashingVectorizer(512)
|
||||
spamClassifier = new SpamClassifier(
|
||||
mockOfflineStorage,
|
||||
mockOfflineStorageCache,
|
||||
mockSpamClassificationInitializer,
|
||||
true,
|
||||
DEFAULT_PREPROCESS_CONFIGURATION,
|
||||
nonEfficientSmallVectorizer,
|
||||
)
|
||||
})
|
||||
|
||||
o("processSpam maintains server classification when client classification is not enabled", async function () {
|
||||
|
|
@ -101,23 +126,27 @@ o.spec("SpamClassifierTest", () => {
|
|||
_id: ["mailListId", "mailId"],
|
||||
sets: [["folderList", "serverFolder"]],
|
||||
})
|
||||
const spamTrainMailDatum: SpamTrainMailDatum = {
|
||||
mailId: mail._id,
|
||||
const spamMailDatum: SpamMailDatum = {
|
||||
ownerGroup: TEST_OWNER_GROUP,
|
||||
subject: mail.subject,
|
||||
body: "some body",
|
||||
isSpam: true,
|
||||
isSpamConfidence: 1,
|
||||
ownerGroup: "owner",
|
||||
sender: "",
|
||||
toRecipients: "",
|
||||
sender: "sender@tuta.com",
|
||||
toRecipients: "recipient@tuta.com",
|
||||
ccRecipients: "",
|
||||
bccRecipients: "",
|
||||
authStatus: "",
|
||||
authStatus: "0",
|
||||
}
|
||||
const layersModel = object<Sequential>()
|
||||
spamClassifier.addSpamClassifierForOwner(spamTrainMailDatum.ownerGroup, layersModel, false)
|
||||
|
||||
const predictedSpam = await spamClassifier.predict(spamTrainMailDatum)
|
||||
// convert to vector
|
||||
const layersModel = object<Sequential>()
|
||||
const classifier = object<Classifier>()
|
||||
classifier.layersModel = layersModel
|
||||
classifier.isEnabled = false
|
||||
classifier.threshold = DEFAULT_PREDICTION_THRESHOLD
|
||||
spamClassifier.addSpamClassifierForOwner(spamMailDatum.ownerGroup, classifier)
|
||||
|
||||
const vector = await spamProcessor.vectorize(spamMailDatum)
|
||||
const predictedSpam = await spamClassifier.predict(vector, spamMailDatum.ownerGroup)
|
||||
o(predictedSpam).equals(null)
|
||||
})
|
||||
|
||||
|
|
@ -126,37 +155,73 @@ o.spec("SpamClassifierTest", () => {
|
|||
_id: ["mailListId", "mailId"],
|
||||
sets: [["folderList", "serverFolder"]],
|
||||
})
|
||||
const spamTrainMailDatum: SpamTrainMailDatum = {
|
||||
mailId: mail._id,
|
||||
const spamMailDatum: SpamMailDatum = {
|
||||
ownerGroup: TEST_OWNER_GROUP,
|
||||
subject: mail.subject,
|
||||
body: "some body",
|
||||
isSpam: false,
|
||||
isSpamConfidence: 0,
|
||||
ownerGroup: "owner",
|
||||
sender: "",
|
||||
toRecipients: "",
|
||||
sender: "sender@tuta.com",
|
||||
toRecipients: "recipient@tuta.com",
|
||||
ccRecipients: "",
|
||||
bccRecipients: "",
|
||||
authStatus: "",
|
||||
authStatus: "0",
|
||||
}
|
||||
|
||||
const layersModel = object<Sequential>()
|
||||
when(layersModel.predict(anything())).thenReturn(tensor1d([1]))
|
||||
spamClassifier.addSpamClassifierForOwner(spamTrainMailDatum.ownerGroup, layersModel, true)
|
||||
const classifier = object<Classifier>()
|
||||
classifier.layersModel = layersModel
|
||||
classifier.isEnabled = true
|
||||
classifier.threshold = DEFAULT_PREDICTION_THRESHOLD
|
||||
spamClassifier.addSpamClassifierForOwner(spamMailDatum.ownerGroup, classifier)
|
||||
|
||||
const predictedSpam = await spamClassifier.predict(spamTrainMailDatum)
|
||||
const vector = await spamProcessor.vectorize(spamMailDatum)
|
||||
const predictedSpam = await spamClassifier.predict(vector, spamMailDatum.ownerGroup)
|
||||
o(predictedSpam).equals(true)
|
||||
})
|
||||
|
||||
o("processSpam respects the classifier threshold", async function () {
|
||||
const mail = createTestEntity(MailTypeRef, {
|
||||
_id: ["mailListId", "mailId"],
|
||||
sets: [["folderList", "serverFolder"]],
|
||||
})
|
||||
const spamMailDatum: SpamMailDatum = {
|
||||
ownerGroup: TEST_OWNER_GROUP,
|
||||
subject: mail.subject,
|
||||
body: "some body",
|
||||
sender: "sender@tuta.com",
|
||||
toRecipients: "recipient@tuta.com",
|
||||
ccRecipients: "",
|
||||
bccRecipients: "",
|
||||
authStatus: "0",
|
||||
}
|
||||
|
||||
const layersModel = object<Sequential>()
|
||||
when(layersModel.predict(anything())).thenReturn(tensor1d([0.7]))
|
||||
const classifier = object<Classifier>()
|
||||
classifier.layersModel = layersModel
|
||||
classifier.isEnabled = true
|
||||
classifier.threshold = 0.9
|
||||
spamClassifier.addSpamClassifierForOwner(spamMailDatum.ownerGroup, classifier)
|
||||
|
||||
const vector = await spamProcessor.vectorize(spamMailDatum)
|
||||
const predictedSpam = await spamClassifier.predict(vector, spamMailDatum.ownerGroup)
|
||||
o(predictedSpam).equals(false)
|
||||
})
|
||||
|
||||
o("Initial training only", async () => {
|
||||
o.timeout(20_000)
|
||||
|
||||
const trainTestSplit = dataSlice.length * 0.8
|
||||
const trainSet = dataSlice.slice(0, trainTestSplit)
|
||||
const testSet = dataSlice.slice(trainTestSplit)
|
||||
const trainingDataset: TrainingDataset = getTrainingDataset(trainSet)
|
||||
await spamClassifier.initialTraining(TEST_OWNER_GROUP, trainingDataset)
|
||||
await testClassifier(spamClassifier, testSet, compressor)
|
||||
|
||||
await spamClassifier.initialTraining(trainSet)
|
||||
await testClassifier(spamClassifier, testSet)
|
||||
const classifier = spamClassifier.classifiers.get(TEST_OWNER_GROUP)
|
||||
o(classifier?.hamCount).equals(trainingDataset.hamCount)
|
||||
o(classifier?.spamCount).equals(trainingDataset.spamCount)
|
||||
o(classifier?.threshold).equals(spamClassifier.calculateThreshold(trainingDataset.hamCount, trainingDataset.spamCount))
|
||||
})
|
||||
|
||||
o("Initial training and refitting in multi step", async () => {
|
||||
|
|
@ -170,18 +235,26 @@ o.spec("SpamClassifierTest", () => {
|
|||
const trainSetSecondHalf = trainSet.slice(trainSet.length / 2, trainSet.length)
|
||||
|
||||
dataSlice = trainSetFirstHalf
|
||||
o(await mockSpamClassificationInitializer.init("owner")).deepEquals(trainSetFirstHalf)
|
||||
await spamClassifier.initialTraining(dataSlice)
|
||||
o((await mockSpamClassificationDataDealer.fetchAllTrainingData(TEST_OWNER_GROUP)).trainingData).deepEquals(dataSlice)
|
||||
const initialTrainingDataset = getTrainingDataset(dataSlice)
|
||||
await spamClassifier.initialTraining(TEST_OWNER_GROUP, initialTrainingDataset)
|
||||
console.log(`==> Result when testing with mails in two steps (first step).`)
|
||||
await testClassifier(spamClassifier, testSet)
|
||||
await testClassifier(spamClassifier, testSet, compressor)
|
||||
|
||||
await spamClassifier.updateModel("owner", trainSetSecondHalf)
|
||||
const trainingDatasetSecondHalf = getTrainingDataset(trainSetSecondHalf)
|
||||
await spamClassifier.updateModel(TEST_OWNER_GROUP, trainingDatasetSecondHalf)
|
||||
console.log(`==> Result when testing with mails in two steps (second step).`)
|
||||
await testClassifier(spamClassifier, testSet)
|
||||
await testClassifier(spamClassifier, testSet, compressor)
|
||||
|
||||
const classifier = spamClassifier.classifiers.get(TEST_OWNER_GROUP)
|
||||
const finalHamCount = initialTrainingDataset.hamCount + trainingDatasetSecondHalf.hamCount
|
||||
const finalSpamCount = initialTrainingDataset.spamCount + trainingDatasetSecondHalf.spamCount
|
||||
o(classifier?.hamCount).equals(finalHamCount)
|
||||
o(classifier?.spamCount).equals(finalSpamCount)
|
||||
o(classifier?.threshold).equals(spamClassifier.calculateThreshold(finalHamCount, finalSpamCount))
|
||||
})
|
||||
|
||||
o("preprocessMail outputs expected tokens for mail content", async () => {
|
||||
const classifier = new SpamClassifier(object(), object(), object())
|
||||
const mail = {
|
||||
subject: `Sample Tokens and values`,
|
||||
sender: "sender",
|
||||
|
|
@ -273,8 +346,8 @@ o.spec("SpamClassifierTest", () => {
|
|||
<table cellpadding="0" cellspacing="0" border="0" role="presentation" width="100%"><tbody><tr><td align="center"><a href="https://mail.abc-web.de/optiext/optiextension.dll?ID=someid" rel="noopener noreferrer" target="_blank" style="text-decoration:none"><img id="OWATemporaryImageDivContainer1" src="https://mail.some-domain.de/images/SMC/grafik/image.png" alt="" border="0" class="" width="100%" style="max-width:100%;display:block;width:100%"></a></td></tr></tbody></table>
|
||||
this text is shown
|
||||
`,
|
||||
} as SpamTrainMailDatum
|
||||
const preprocessedMail = classifier.preprocessMail(mail)
|
||||
} as SpamMailDatum
|
||||
const preprocessedMail = spamProcessor.preprocessMail(mail)
|
||||
// prettier-ignore
|
||||
const expectedOutput = `Sample Tokens and values
|
||||
Hello TSPECIALCHAR these are my MAC Address
|
||||
|
|
@ -364,13 +437,17 @@ authStatus`
|
|||
})
|
||||
|
||||
o("predict uses different models for different owner groups", async () => {
|
||||
const firstGroupModel = object<LayersModel>()
|
||||
const secondGroupModel = object<LayersModel>()
|
||||
mockAttribute(spamClassifier, spamClassifier.loadModel, (ownerGroup) => {
|
||||
const firstGroupClassifier = object<Classifier>()
|
||||
firstGroupClassifier.layersModel = object<LayersModel>()
|
||||
firstGroupClassifier.threshold = DEFAULT_PREDICTION_THRESHOLD
|
||||
const secondGroupClassifier = object<Classifier>()
|
||||
secondGroupClassifier.threshold = DEFAULT_PREDICTION_THRESHOLD
|
||||
secondGroupClassifier.layersModel = object<LayersModel>()
|
||||
mockAttribute(spamClassifier, spamClassifier.loadClassifier, (ownerGroup) => {
|
||||
if (ownerGroup === "firstGroup") {
|
||||
return Promise.resolve(firstGroupModel)
|
||||
return Promise.resolve(firstGroupClassifier)
|
||||
} else if (ownerGroup === "secondGroup") {
|
||||
return Promise.resolve(secondGroupModel)
|
||||
return Promise.resolve(secondGroupClassifier)
|
||||
}
|
||||
return null
|
||||
})
|
||||
|
|
@ -380,9 +457,9 @@ authStatus`
|
|||
})
|
||||
|
||||
const firstGroupReturnTensor = tensor1d([1.0], undefined)
|
||||
when(firstGroupModel.predict(matchers.anything())).thenReturn(firstGroupReturnTensor)
|
||||
when(firstGroupClassifier.layersModel.predict(matchers.anything())).thenReturn(firstGroupReturnTensor)
|
||||
const secondGroupReturnTensor = tensor1d([0.0], undefined)
|
||||
when(secondGroupModel.predict(matchers.anything())).thenReturn(secondGroupReturnTensor)
|
||||
when(secondGroupClassifier.layersModel.predict(matchers.anything())).thenReturn(secondGroupReturnTensor)
|
||||
|
||||
await spamClassifier.initialize("firstGroup")
|
||||
await spamClassifier.initialize("secondGroup")
|
||||
|
|
@ -397,14 +474,16 @@ authStatus`
|
|||
authStatus: "",
|
||||
}
|
||||
|
||||
const isSpamFirstMail = await spamClassifier.predict({
|
||||
const firstMailVector = await spamProcessor.vectorize({
|
||||
ownerGroup: "firstGroup",
|
||||
...commonSpamFields,
|
||||
})
|
||||
const isSpamSecondMail = await spamClassifier.predict({
|
||||
const isSpamFirstMail = await spamClassifier.predict(firstMailVector, "firstGroup")
|
||||
const secondMailVector = await spamProcessor.vectorize({
|
||||
ownerGroup: "secondGroup",
|
||||
...commonSpamFields,
|
||||
})
|
||||
const isSpamSecondMail = await spamClassifier.predict(secondMailVector, "secondGroup")
|
||||
|
||||
o(isSpamFirstMail).equals(true)
|
||||
o(isSpamSecondMail).equals(false)
|
||||
|
|
@ -419,39 +498,60 @@ authStatus`
|
|||
// They run in loop hence do take more time to finish and is not necessary to include in CI test suite
|
||||
//
|
||||
// To enable running this, change following constant to true
|
||||
const DO_RUN_PERFORMANCE_ANALYSIS = false
|
||||
const DO_RUN_PERFORMANCE_ANALYSIS = true
|
||||
if (DO_RUN_PERFORMANCE_ANALYSIS) {
|
||||
async function filterForMisclassifiedClientSpamTrainingData(
|
||||
classifier: SpamClassifier,
|
||||
compressor: SparseVectorCompressor,
|
||||
dataSlice: ClientSpamTrainingDatum[],
|
||||
desiredSlice: number,
|
||||
) {
|
||||
return dataSlice
|
||||
.slice(desiredSlice)
|
||||
.filter(async (datum) => {
|
||||
const currentClassificationIsSpam = datum.spamDecision === SpamDecision.BLACKLIST
|
||||
const actualPrediction = await classifier.predict(compressor.binaryToVector(datum.vector), datum._ownerGroup || TEST_OWNER_GROUP)
|
||||
return currentClassificationIsSpam !== actualPrediction
|
||||
})
|
||||
.sort()
|
||||
.slice(0, desiredSlice)
|
||||
}
|
||||
|
||||
o.spec("SpamClassifier - Performance Analysis", () => {
|
||||
const mockOfflineStorageCache = object<CacheStorage>()
|
||||
const mockOfflineStorage = object<OfflineStoragePersistence>()
|
||||
const compressor = new SparseVectorCompressor()
|
||||
let spamClassifier = object<SpamClassifier>()
|
||||
let dataSlice: SpamTrainMailDatum[]
|
||||
o.beforeEach(() => {
|
||||
const mockSpamClassificationInitializer = object<SpamClassificationInitializer>()
|
||||
mockSpamClassificationInitializer.init = async () => {
|
||||
return dataSlice
|
||||
let dataSlice: ClientSpamTrainingDatum[]
|
||||
let spamProcessor: SpamMailProcessor
|
||||
|
||||
o.beforeEach(async () => {
|
||||
const mockSpamClassificationDataDealer = object<SpamClassificationDataDealer>()
|
||||
mockSpamClassificationDataDealer.fetchAllTrainingData = async () => {
|
||||
return getTrainingDataset(dataSlice)
|
||||
}
|
||||
spamClassifier = new SpamClassifier(mockOfflineStorage, mockOfflineStorageCache, mockSpamClassificationInitializer)
|
||||
spamProcessor = new SpamMailProcessor(DEFAULT_PREPROCESS_CONFIGURATION, new HashingVectorizer(), compressor)
|
||||
spamClassifier = new SpamClassifier(mockOfflineStorageCache, mockSpamClassificationDataDealer, false)
|
||||
spamClassifier.spamMailProcessor = spamProcessor
|
||||
})
|
||||
|
||||
o("time to refit", async () => {
|
||||
o.timeout(20_000_000)
|
||||
const { spamData, hamData } = await readMailDataFromCSV(DATASET_FILE_PATH)
|
||||
const hamSlice = hamData.slice(0, 1000)
|
||||
const spamSlice = spamData.slice(0, 400)
|
||||
const hamSlice = await convertToClientTrainingDatum(hamData.slice(0, 1000), spamProcessor, false)
|
||||
const spamSlice = await convertToClientTrainingDatum(spamData.slice(0, 400), spamProcessor, true)
|
||||
dataSlice = hamSlice.concat(spamSlice)
|
||||
seededShuffle(dataSlice, 42)
|
||||
|
||||
const start = performance.now()
|
||||
await spamClassifier.initialTraining(dataSlice)
|
||||
await spamClassifier.initialTraining(TEST_OWNER_GROUP, getTrainingDataset(dataSlice))
|
||||
const initialTrainingDuration = performance.now() - start
|
||||
console.log(`initial training time ${initialTrainingDuration}ms`)
|
||||
|
||||
for (let i = 0; i < 20; i++) {
|
||||
const nowSpam = [hamSlice[0]]
|
||||
nowSpam.map((formerHam) => (formerHam.isSpam = true))
|
||||
nowSpam.map((formerHam) => (formerHam.spamDecision = "1"))
|
||||
const retrainingStart = performance.now()
|
||||
await spamClassifier.updateModel("owner", nowSpam)
|
||||
await spamClassifier.updateModel(TEST_OWNER_GROUP, getTrainingDataset(nowSpam))
|
||||
const retrainingDuration = performance.now() - retrainingStart
|
||||
console.log(`retraining time ${retrainingDuration}ms`)
|
||||
}
|
||||
|
|
@ -460,17 +560,13 @@ if (DO_RUN_PERFORMANCE_ANALYSIS) {
|
|||
o("refit after moving a false negative classification multiple times", async () => {
|
||||
o.timeout(20_000_000)
|
||||
const { spamData, hamData } = await readMailDataFromCSV(DATASET_FILE_PATH)
|
||||
const hamSlice = hamData.slice(0, 100)
|
||||
const spamSlice = spamData.slice(0, 10)
|
||||
const hamSlice = await convertToClientTrainingDatum(hamData.slice(0, 100), spamProcessor, false)
|
||||
const spamSlice = await convertToClientTrainingDatum(spamData.slice(0, 10), spamProcessor, true)
|
||||
dataSlice = hamSlice.concat(spamSlice)
|
||||
// seededShuffle(dataSlice, 42)
|
||||
seededShuffle(dataSlice, 42)
|
||||
|
||||
await spamClassifier.initialTraining(dataSlice)
|
||||
const falseNegatives = spamData
|
||||
.slice(10)
|
||||
.filter(async (mailDatum) => mailDatum.isSpam !== (await spamClassifier.predict(mailDatum)))
|
||||
.sort()
|
||||
.slice(0, 10)
|
||||
await spamClassifier.initialTraining(TEST_OWNER_GROUP, getTrainingDataset(dataSlice))
|
||||
const falseNegatives = await filterForMisclassifiedClientSpamTrainingData(spamClassifier, compressor, spamSlice, 10)
|
||||
|
||||
let retrainingNeeded = new Array<number>(falseNegatives.length).fill(0)
|
||||
for (let i = 0; i < falseNegatives.length; i++) {
|
||||
|
|
@ -479,32 +575,39 @@ if (DO_RUN_PERFORMANCE_ANALYSIS) {
|
|||
|
||||
let retrainCount = 0
|
||||
let predictedSpam = false
|
||||
while (!predictedSpam && retrainCount++ <= 3) {
|
||||
await copiedClassifier.updateModel("owner", [{ ...sample, isSpam: true, isSpamConfidence: 1 }])
|
||||
predictedSpam = assertNotNull(await copiedClassifier.predict(sample))
|
||||
while (!predictedSpam && retrainCount++ <= 10) {
|
||||
await copiedClassifier.updateModel(
|
||||
TEST_OWNER_GROUP,
|
||||
getTrainingDataset([
|
||||
{
|
||||
...sample,
|
||||
spamDecision: SpamDecision.BLACKLIST,
|
||||
confidence: "4",
|
||||
},
|
||||
]),
|
||||
)
|
||||
predictedSpam = assertNotNull(await copiedClassifier.predict(compressor.binaryToVector(sample.vector), TEST_OWNER_GROUP))
|
||||
}
|
||||
retrainingNeeded[i] = retrainCount
|
||||
}
|
||||
|
||||
console.log(retrainingNeeded)
|
||||
const maxRetrain = Math.max(...retrainingNeeded)
|
||||
o.check(retrainingNeeded.length >= 10).equals(true)
|
||||
o.check(retrainingNeeded.length >= 10).equals(false)
|
||||
o.check(maxRetrain < 3).equals(true)
|
||||
})
|
||||
|
||||
o("refit after moving a false positive classification multiple times", async () => {
|
||||
o.timeout(20_000_000)
|
||||
const { spamData, hamData } = await readMailDataFromCSV(DATASET_FILE_PATH)
|
||||
const hamSlice = hamData.slice(0, 10)
|
||||
const spamSlice = spamData.slice(0, 100)
|
||||
const hamSlice = await convertToClientTrainingDatum(hamData.slice(0, 10), spamProcessor, false)
|
||||
const spamSlice = await convertToClientTrainingDatum(spamData.slice(0, 100), spamProcessor, true)
|
||||
dataSlice = hamSlice.concat(spamSlice)
|
||||
// seededShuffle(dataSlice, 42)
|
||||
seededShuffle(dataSlice, 42)
|
||||
|
||||
await spamClassifier.initialTraining(dataSlice)
|
||||
const falsePositive = hamData
|
||||
.slice(10)
|
||||
.filter(async (mailDatum) => mailDatum.isSpam !== (await spamClassifier.predict(mailDatum)))
|
||||
.slice(0, 10)
|
||||
await spamClassifier.initialTraining(TEST_OWNER_GROUP, getTrainingDataset(dataSlice))
|
||||
|
||||
const falsePositive = await filterForMisclassifiedClientSpamTrainingData(spamClassifier, compressor, hamSlice, 10)
|
||||
let retrainingNeeded = new Array<number>(falsePositive.length).fill(0)
|
||||
for (let i = 0; i < falsePositive.length; i++) {
|
||||
const sample = falsePositive[i]
|
||||
|
|
@ -513,32 +616,31 @@ if (DO_RUN_PERFORMANCE_ANALYSIS) {
|
|||
let retrainCount = 0
|
||||
let predictedSpam = false
|
||||
while (!predictedSpam && retrainCount++ <= 10) {
|
||||
await copiedClassifier.updateModel("owner", [{ ...sample, isSpam: true }])
|
||||
await copiedClassifier.updateModel("owner", [{ ...sample, isSpam: false }])
|
||||
predictedSpam = assertNotNull(await copiedClassifier.predict(sample))
|
||||
await copiedClassifier.updateModel(
|
||||
TEST_OWNER_GROUP,
|
||||
getTrainingDataset([{ ...sample, spamDecision: SpamDecision.WHITELIST, confidence: "4" }]),
|
||||
)
|
||||
predictedSpam = assertNotNull(await copiedClassifier.predict(compressor.binaryToVector(sample.vector), TEST_OWNER_GROUP))
|
||||
}
|
||||
retrainingNeeded[i] = retrainCount
|
||||
}
|
||||
|
||||
console.log(retrainingNeeded)
|
||||
const maxRetrain = Math.max(...retrainingNeeded)
|
||||
o.check(retrainingNeeded.length >= 10).equals(true)
|
||||
o.check(retrainingNeeded.length >= 10).equals(false)
|
||||
o.check(maxRetrain < 3).equals(true)
|
||||
})
|
||||
|
||||
o("retrain after moving a false negative classification multiple times", async () => {
|
||||
o("retrain from scratch after moving a false negative classification multiple times", async () => {
|
||||
o.timeout(20_000_000)
|
||||
const { spamData, hamData } = await readMailDataFromCSV(DATASET_FILE_PATH)
|
||||
const hamSlice = hamData.slice(0, 100)
|
||||
const spamSlice = spamData.slice(0, 10)
|
||||
const hamSlice = await convertToClientTrainingDatum(hamData.slice(0, 100), spamProcessor, false)
|
||||
const spamSlice = await convertToClientTrainingDatum(spamData.slice(0, 10), spamProcessor, true)
|
||||
dataSlice = hamSlice.concat(spamSlice)
|
||||
seededShuffle(dataSlice, 42)
|
||||
|
||||
await spamClassifier.initialTraining(dataSlice)
|
||||
const falseNegatives = spamData
|
||||
.slice(10)
|
||||
.filter(async (mailDatum) => mailDatum.isSpam !== (await spamClassifier.predict(mailDatum)))
|
||||
.slice(0, 10)
|
||||
await spamClassifier.initialTraining(TEST_OWNER_GROUP, getTrainingDataset(dataSlice))
|
||||
const falseNegatives = await filterForMisclassifiedClientSpamTrainingData(spamClassifier, compressor, spamSlice, 10)
|
||||
|
||||
let retrainingNeeded = new Array<number>(falseNegatives.length).fill(0)
|
||||
for (let i = 0; i < falseNegatives.length; i++) {
|
||||
|
|
@ -548,68 +650,30 @@ if (DO_RUN_PERFORMANCE_ANALYSIS) {
|
|||
let retrainCount = 0
|
||||
let predictedSpam = false
|
||||
while (!predictedSpam && retrainCount++ <= 10) {
|
||||
await copiedClassifier.initialTraining([...dataSlice, sample])
|
||||
predictedSpam = assertNotNull(await copiedClassifier.predict(sample))
|
||||
await copiedClassifier.initialTraining(
|
||||
TEST_OWNER_GROUP,
|
||||
getTrainingDataset([...dataSlice, { ...sample, spamDecision: SpamDecision.BLACKLIST, confidence: "4" }]),
|
||||
)
|
||||
predictedSpam = assertNotNull(await copiedClassifier.predict(compressor.binaryToVector(sample.vector), TEST_OWNER_GROUP))
|
||||
}
|
||||
retrainingNeeded[i] = retrainCount
|
||||
}
|
||||
|
||||
console.log(retrainingNeeded)
|
||||
const maxRetrain = Math.max(...retrainingNeeded)
|
||||
o.check(retrainingNeeded.length >= 10).equals(true)
|
||||
o.check(retrainingNeeded.length >= 10).equals(false)
|
||||
o.check(maxRetrain < 3).equals(true)
|
||||
})
|
||||
|
||||
o("Time spent in vectorization during initial training", async () => {
|
||||
o.timeout(2_000_000)
|
||||
|
||||
const ITERATION_COUNT: number = 1
|
||||
const { spamData, hamData } = await readMailDataFromCSV(DATASET_FILE_PATH)
|
||||
dataSlice = spamData.concat(hamData)
|
||||
|
||||
let trainingTimes = new Array<number>()
|
||||
let vectorizationTimes = new Array<number>()
|
||||
let trainingWithoutVectorization = new Array<number>()
|
||||
|
||||
await promiseMap(
|
||||
new Array<number>(ITERATION_COUNT).fill(0),
|
||||
async () => {
|
||||
const { vectorizationTime, trainingTime } = await spamClassifier.initialTraining(dataSlice)
|
||||
trainingTimes.push(trainingTime)
|
||||
vectorizationTimes.push(vectorizationTime)
|
||||
trainingWithoutVectorization.push(trainingTime - vectorizationTime)
|
||||
},
|
||||
{ concurrency: ITERATION_COUNT },
|
||||
)
|
||||
|
||||
trainingTimes = trainingTimes.sort()
|
||||
vectorizationTimes = vectorizationTimes.sort()
|
||||
trainingWithoutVectorization = trainingWithoutVectorization.sort()
|
||||
const avgTrainingTime = trainingTimes.reduce((a, b) => a + b, 0) / trainingTimes.length
|
||||
const avgVectorizationTime = vectorizationTimes.reduce((a, b) => a + b, 0) / vectorizationTimes.length
|
||||
const avgTrainingWithoutVectorization = trainingWithoutVectorization.reduce((a, b) => a + b, 0) / trainingWithoutVectorization.length
|
||||
|
||||
console.log("For vectorization:")
|
||||
console.log({ min: vectorizationTimes.at(0), max: vectorizationTimes.at(-1), avg: avgVectorizationTime })
|
||||
console.log("For whole training:")
|
||||
console.log({ min: trainingTimes.at(0), max: trainingTimes.at(-1), avg: avgTrainingTime })
|
||||
console.log("For training without vectorization:")
|
||||
console.log({
|
||||
min: trainingWithoutVectorization.at(0),
|
||||
max: trainingWithoutVectorization.at(-1),
|
||||
avg: avgTrainingWithoutVectorization,
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async function testClassifier(classifier: SpamClassifier, mails: SpamTrainMailDatum[]): Promise<void> {
|
||||
async function testClassifier(classifier: SpamClassifier, mails: ClientSpamTrainingDatum[], compressor: SparseVectorCompressor): Promise<void> {
|
||||
let predictionArray: number[] = []
|
||||
for (let mail of mails) {
|
||||
const prediction = await classifier.predict(mail)
|
||||
const prediction = await classifier.predict(compressor.binaryToVector(mail.vector), TEST_OWNER_GROUP)
|
||||
predictionArray.push(prediction ? 1 : 0)
|
||||
}
|
||||
const ysArray = mails.map((mail) => mail.isSpam)
|
||||
const ysArray = mails.map((mail) => mail.spamDecision === SpamDecision.BLACKLIST)
|
||||
|
||||
let tp = 0,
|
||||
tn = 0,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,38 @@
|
|||
import o from "@tutao/otest"
|
||||
import { promiseMap } from "@tutao/tutanota-utils"
|
||||
import { SparseVectorCompressor } from "../../../../../../src/common/api/common/utils/spamClassificationUtils/SparseVectorCompressor"
|
||||
import { HashingVectorizer } from "../../../../../../src/mail-app/workerUtils/spamClassification/HashingVectorizer"
|
||||
import { DATASET_FILE_PATH, readMailDataFromCSV } from "./SpamClassifierTest"
|
||||
import { spamClassifierTokenizer, SpamMailProcessor } from "../../../../../../src/common/api/common/utils/spamClassificationUtils/SpamMailProcessor"
|
||||
|
||||
o.spec("SparseVectorCompressorTest", () => {
|
||||
o("sparse compress vectors", async () => {
|
||||
o.timeout(20_000)
|
||||
const spamHamData = await readMailDataFromCSV(DATASET_FILE_PATH)
|
||||
const spamData = spamHamData.spamData
|
||||
const hamData = spamHamData.hamData
|
||||
const dataSlice = spamData.concat(hamData)
|
||||
const tokenizedMails = await promiseMap(dataSlice, (mail) => spamClassifierTokenizer(new SpamMailProcessor().preprocessMail(mail)))
|
||||
const vectorizer = new HashingVectorizer()
|
||||
const vectors = (await vectorizer.transform(tokenizedMails)).slice(0, 1)
|
||||
|
||||
const compressor = new SparseVectorCompressor()
|
||||
const BYTES_PER_NUMBER = 2
|
||||
console.log("Byte size of a number: ", BYTES_PER_NUMBER)
|
||||
const compressedVectors = vectors.map((v) => compressor.vectorToBinary(v))
|
||||
const decompressedVectors = compressedVectors.map((v) => compressor.binaryToVector(v))
|
||||
|
||||
const decompressedVectorByteSizes: number[] = []
|
||||
const compressedVectorByteSizes: number[] = []
|
||||
for (let i = 0; i < compressedVectors.length; i++) {
|
||||
compressedVectorByteSizes.push(compressedVectors[i].values.length + compressedVectors[i].length)
|
||||
decompressedVectorByteSizes.push(decompressedVectors[i].length)
|
||||
}
|
||||
const averageCompressedVectorByteSize = compressedVectorByteSizes.reduce((a, b) => a + b, 0) / compressedVectorByteSizes.length
|
||||
const averageDecompressedVectorByteSize = decompressedVectorByteSizes.reduce((a, b) => a + b, 0) / decompressedVectorByteSizes.length
|
||||
console.log(`Average compressed vector byte size (Custom): ${averageCompressedVectorByteSize.toFixed(2)}B`)
|
||||
console.log(`Average decompressed vector byte size (Custom): ${averageDecompressedVectorByteSize.toFixed(2)}B`)
|
||||
|
||||
o.check(decompressedVectors).deepEquals(vectors)
|
||||
})
|
||||
})
|
||||
|
|
@ -4,7 +4,6 @@ import { mock, Spy, spy, verify } from "@tutao/tutanota-test-utils"
|
|||
import { MailSetKind, OperationType, ProcessingState } from "../../../src/common/api/common/TutanotaConstants.js"
|
||||
import {
|
||||
BodyTypeRef,
|
||||
ClientSpamClassifierResultTypeRef,
|
||||
Mail,
|
||||
MailAddressTypeRef,
|
||||
MailDetails,
|
||||
|
|
@ -24,17 +23,15 @@ import { UserController } from "../../../src/common/api/main/UserController.js"
|
|||
import { createTestEntity } from "../TestUtils.js"
|
||||
import { EntityUpdateData, PrefetchStatus } from "../../../src/common/api/common/utils/EntityUpdateUtils.js"
|
||||
import { MailboxDetail, MailboxModel } from "../../../src/common/mailFunctionality/MailboxModel.js"
|
||||
import { getElementId, getListId } from "../../../src/common/api/common/utils/EntityUtils.js"
|
||||
import { MailModel } from "../../../src/mail-app/mail/model/MailModel.js"
|
||||
import { EventController } from "../../../src/common/api/main/EventController.js"
|
||||
import { MailFacade } from "../../../src/common/api/worker/facades/lazy/MailFacade.js"
|
||||
import { ClientModelInfo } from "../../../src/common/api/common/EntityFunctions"
|
||||
import { InboxRuleHandler } from "../../../src/mail-app/mail/model/InboxRuleHandler"
|
||||
import { SpamClassificationHandler } from "../../../src/mail-app/mail/model/SpamClassificationHandler"
|
||||
import { SpamClassifier, SpamTrainMailDatum } from "../../../src/mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
import { WebsocketConnectivityModel } from "../../../src/common/misc/WebsocketConnectivityModel"
|
||||
import { FolderSystem } from "../../../src/common/api/common/mail/FolderSystem"
|
||||
import { NotAuthorizedError, NotFoundError } from "../../../src/common/api/common/error/RestError"
|
||||
import { NotAuthorizedError } from "../../../src/common/api/common/error/RestError"
|
||||
import { ProcessInboxHandler } from "../../../src/mail-app/mail/model/ProcessInboxHandler"
|
||||
|
||||
const { anything } = matchers
|
||||
|
||||
|
|
@ -68,6 +65,7 @@ o.spec("MailModelTest", function () {
|
|||
logins = object()
|
||||
let userController = object<UserController>()
|
||||
when(userController.isUpdateForLoggedInUserInstance(matchers.anything(), matchers.anything())).thenReturn(false)
|
||||
when(userController.isInternalUser()).thenReturn(true)
|
||||
when(logins.getUserController()).thenReturn(userController)
|
||||
|
||||
connectivityModel = object<WebsocketConnectivityModel>()
|
||||
|
|
@ -82,39 +80,9 @@ o.spec("MailModelTest", function () {
|
|||
mailFacade,
|
||||
connectivityModel,
|
||||
() => object(),
|
||||
() => null,
|
||||
)
|
||||
})
|
||||
|
||||
o("doesn't send notification for another folder", async function () {
|
||||
const mail = createTestEntity(MailTypeRef, { _id: ["mailBagListId", "mailId"], sets: [] })
|
||||
restClient.addListInstances(mail)
|
||||
await model.entityEventsReceived([
|
||||
makeUpdate({
|
||||
instanceListId: getListId(mail) as NonEmptyString,
|
||||
instanceId: getElementId(mail),
|
||||
operation: OperationType.CREATE,
|
||||
}),
|
||||
])
|
||||
o(showSpy.invocations.length).equals(0)
|
||||
})
|
||||
o("doesn't send notification for move operation", async function () {
|
||||
const mail = createTestEntity(MailTypeRef, { _id: ["mailBagListId", "mailId"], sets: [] })
|
||||
restClient.addListInstances(mail)
|
||||
await model.entityEventsReceived([
|
||||
makeUpdate({
|
||||
instanceListId: getListId(mail) as NonEmptyString,
|
||||
instanceId: getElementId(mail),
|
||||
operation: OperationType.DELETE,
|
||||
}),
|
||||
makeUpdate({
|
||||
instanceListId: getListId(mail) as NonEmptyString,
|
||||
instanceId: getElementId(mail),
|
||||
operation: OperationType.CREATE,
|
||||
}),
|
||||
])
|
||||
o(showSpy.invocations.length).equals(0)
|
||||
})
|
||||
o("markMails", async function () {
|
||||
const mailId1: IdTuple = ["mailbag id1", "mail id1"]
|
||||
const mailId2: IdTuple = ["mailbag id2", "mail id2"]
|
||||
|
|
@ -125,19 +93,15 @@ o.spec("MailModelTest", function () {
|
|||
|
||||
o.spec("Inbox rule processing and spam prediction", () => {
|
||||
let inboxRuleHandler: InboxRuleHandler
|
||||
let spamClassificationHandler: SpamClassificationHandler
|
||||
let spamClassifier: SpamClassifier
|
||||
let mailboxModel: MailboxModel
|
||||
let modelWithSpamAndInboxRule: MailModel
|
||||
let mail: Mail
|
||||
let mailDetails: MailDetails
|
||||
|
||||
let processInboxHandler: ProcessInboxHandler = object<ProcessInboxHandler>()
|
||||
o.beforeEach(async () => {
|
||||
const entityClient = new EntityClient(restClient, ClientModelInfo.getNewInstanceForTestsOnly())
|
||||
mailboxModel = instance(MailboxModel)
|
||||
inboxRuleHandler = object<InboxRuleHandler>()
|
||||
spamClassifier = object<SpamClassifier>()
|
||||
spamClassificationHandler = new SpamClassificationHandler(mailFacade, spamClassifier)
|
||||
|
||||
mailDetails = createTestEntity(MailDetailsTypeRef, {
|
||||
_id: "mailDetail",
|
||||
|
|
@ -159,6 +123,7 @@ o.spec("MailModelTest", function () {
|
|||
sets: [inboxFolder._id],
|
||||
sender: createTestEntity(MailAddressTypeRef, { name: "Sender", address: "sender@tuta.com" }),
|
||||
processingState: ProcessingState.INBOX_RULE_NOT_PROCESSED,
|
||||
processNeeded: true,
|
||||
authStatus: "0",
|
||||
})
|
||||
const mailDetailsBlob: MailDetailsBlob = createTestEntity(MailDetailsBlobTypeRef, {
|
||||
|
|
@ -180,8 +145,7 @@ o.spec("MailModelTest", function () {
|
|||
logins,
|
||||
mailFacade,
|
||||
connectivityModel,
|
||||
() => spamClassificationHandler,
|
||||
() => inboxRuleHandler,
|
||||
() => processInboxHandler,
|
||||
),
|
||||
(m: MailModel) => {
|
||||
m.getFolderSystemByGroupId = (groupId) => {
|
||||
|
|
@ -193,162 +157,51 @@ o.spec("MailModelTest", function () {
|
|||
)
|
||||
})
|
||||
|
||||
o("does not re-apply inbox rules or re-classify mail if the mail is in a final processingState", async function () {
|
||||
const alreadyClassifiedMail = createTestEntity(MailTypeRef, {
|
||||
_id: ["mailListId", "maildIdWithFinalProcessingState"],
|
||||
o("invokes ProcessInboxHandler if the mail is not processed", async function () {
|
||||
const notProcessedMail = createTestEntity(MailTypeRef, {
|
||||
_id: ["mailListId", "notProcessedMailId"],
|
||||
_ownerGroup: "mailGroup",
|
||||
mailDetails: ["detailsList", mailDetails._id],
|
||||
sets: [inboxFolder._id],
|
||||
processingState: ProcessingState.INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_MADE,
|
||||
clientSpamClassifierResult: createTestEntity(ClientSpamClassifierResultTypeRef),
|
||||
processNeeded: true,
|
||||
})
|
||||
restClient.addListInstances(alreadyClassifiedMail)
|
||||
when(mailFacade.loadMailDetailsBlob(alreadyClassifiedMail)).thenResolve(mailDetails)
|
||||
restClient.addListInstances(notProcessedMail)
|
||||
when(mailFacade.loadMailDetailsBlob(notProcessedMail)).thenResolve(mailDetails)
|
||||
|
||||
const alreadyClassifiedMailCreateEvent = makeUpdate({
|
||||
instanceListId: "mailListId",
|
||||
instanceId: "maildIdWithFinalProcessingState",
|
||||
instanceId: "notProcessedMailId",
|
||||
operation: OperationType.CREATE,
|
||||
})
|
||||
|
||||
const { processingDone } = await modelWithSpamAndInboxRule.entityEventsReceived([alreadyClassifiedMailCreateEvent])
|
||||
await processingDone
|
||||
await modelWithSpamAndInboxRule.entityEventsReceived([alreadyClassifiedMailCreateEvent])
|
||||
|
||||
verify(inboxRuleHandler.findAndApplyMatchingRule(anything(), anything(), anything()), { times: 0 })
|
||||
verify(spamClassificationHandler.predictSpamForNewMail(anything(), anything(), anything(), anything()), { times: 0 })
|
||||
verify(spamClassifier.storeSpamClassification(anything()), { times: 0 })
|
||||
verify(spamClassifier.predict(anything()), { times: 0 })
|
||||
verify(processInboxHandler.handleIncomingMail(anything(), anything(), anything(), anything()), { times: 1 })
|
||||
})
|
||||
|
||||
o("don't classify mail if the mail is read and in INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_PENDING state", async function () {
|
||||
const alreadyClassifiedMail = createTestEntity(MailTypeRef, {
|
||||
_id: ["mailListId", "maildIdWithFinalProcessingState"],
|
||||
o("does not invoke ProcessInboxHandler if the mail is already processed", async function () {
|
||||
const alreadyProcessedMail = createTestEntity(MailTypeRef, {
|
||||
_id: ["mailListId", "processedMailId"],
|
||||
_ownerGroup: "mailGroup",
|
||||
mailDetails: ["detailsList", mailDetails._id],
|
||||
sets: [inboxFolder._id],
|
||||
processingState: ProcessingState.INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_PENDING,
|
||||
clientSpamClassifierResult: createTestEntity(ClientSpamClassifierResultTypeRef),
|
||||
unread: false,
|
||||
processNeeded: false,
|
||||
})
|
||||
restClient.addListInstances(alreadyClassifiedMail)
|
||||
when(mailFacade.loadMailDetailsBlob(alreadyClassifiedMail)).thenResolve(mailDetails)
|
||||
restClient.addListInstances(alreadyProcessedMail)
|
||||
when(mailFacade.loadMailDetailsBlob(alreadyProcessedMail)).thenResolve(mailDetails)
|
||||
|
||||
const alreadyClassifiedMailCreateEvent = makeUpdate({
|
||||
instanceListId: "mailListId",
|
||||
instanceId: "mailIdDoNotRunPredictionState",
|
||||
instanceId: "processedMailId",
|
||||
operation: OperationType.CREATE,
|
||||
})
|
||||
|
||||
const { processingDone } = await modelWithSpamAndInboxRule.entityEventsReceived([alreadyClassifiedMailCreateEvent])
|
||||
await processingDone
|
||||
await modelWithSpamAndInboxRule.entityEventsReceived([alreadyClassifiedMailCreateEvent])
|
||||
|
||||
verify(inboxRuleHandler.findAndApplyMatchingRule(anything(), anything(), anything()), { times: 0 })
|
||||
verify(spamClassificationHandler.predictSpamForNewMail(anything(), anything(), anything(), anything()), { times: 0 })
|
||||
verify(spamClassifier.storeSpamClassification(anything()), { times: 0 })
|
||||
verify(spamClassifier.predict(anything()), { times: 0 })
|
||||
verify(processInboxHandler.handleIncomingMail(anything(), anything(), anything(), anything()), { times: 0 })
|
||||
})
|
||||
|
||||
o("don't classify mail if the mail is in INBOX_RULE_NOT_PROCESSED_AND_DO_NOT_RUN_SPAM_PREDICTION state", async function () {
|
||||
const alreadyClassifiedMail = createTestEntity(MailTypeRef, {
|
||||
_id: ["mailListId", "maildIdWithFinalProcessingState"],
|
||||
_ownerGroup: "mailGroup",
|
||||
mailDetails: ["detailsList", mailDetails._id],
|
||||
sets: [inboxFolder._id],
|
||||
processingState: ProcessingState.INBOX_RULE_NOT_PROCESSED_AND_DO_NOT_RUN_SPAM_PREDICTION,
|
||||
clientSpamClassifierResult: createTestEntity(ClientSpamClassifierResultTypeRef),
|
||||
})
|
||||
restClient.addListInstances(alreadyClassifiedMail)
|
||||
when(mailFacade.loadMailDetailsBlob(alreadyClassifiedMail)).thenResolve(mailDetails)
|
||||
|
||||
const alreadyClassifiedMailCreateEvent = makeUpdate({
|
||||
instanceListId: "mailListId",
|
||||
instanceId: "mailIdDoNotRunPredictionState",
|
||||
operation: OperationType.CREATE,
|
||||
})
|
||||
|
||||
const { processingDone } = await modelWithSpamAndInboxRule.entityEventsReceived([alreadyClassifiedMailCreateEvent])
|
||||
await processingDone
|
||||
|
||||
verify(inboxRuleHandler.findAndApplyMatchingRule(anything(), anything(), anything()), { times: 0 })
|
||||
verify(spamClassificationHandler.predictSpamForNewMail(anything(), anything(), anything(), anything()), { times: 0 })
|
||||
verify(spamClassifier.storeSpamClassification(anything()), { times: 0 })
|
||||
verify(spamClassifier.predict(anything()), { times: 0 })
|
||||
})
|
||||
|
||||
o("does not try to apply inbox rule when downloading of mail fails on create mail event", async function () {
|
||||
restClient.setListElementException(mail._id, new NotFoundError("Mail not found"))
|
||||
|
||||
const mailCreateEvent = makeUpdate({
|
||||
instanceListId: getListId(mail) as NonEmptyString,
|
||||
instanceId: getElementId(mail),
|
||||
operation: OperationType.CREATE,
|
||||
})
|
||||
await modelWithSpamAndInboxRule.entityEventsReceived([mailCreateEvent])
|
||||
|
||||
verify(inboxRuleHandler.findAndApplyMatchingRule(anything(), anything(), anything()), { times: 0 })
|
||||
})
|
||||
|
||||
o("spam prediction does not happen when inbox rule is applied", async () => {
|
||||
when(spamClassifier.predict(anything())).thenResolve(false)
|
||||
|
||||
const mailCreateEvent = makeUpdate({
|
||||
instanceListId: "mailListId",
|
||||
instanceId: "mailId",
|
||||
operation: OperationType.CREATE,
|
||||
})
|
||||
|
||||
// when inbox rule is applied
|
||||
when(inboxRuleHandler.findAndApplyMatchingRule(anything(), anything(), anything())).thenResolve(inboxFolder)
|
||||
const { processingDone } = await modelWithSpamAndInboxRule.entityEventsReceived([mailCreateEvent])
|
||||
await processingDone
|
||||
const expectedSpamTrainMailDatum: SpamTrainMailDatum = {
|
||||
mailId: ["mailListId", "mailId"],
|
||||
ownerGroup: "mailGroup",
|
||||
body: "some text",
|
||||
subject: "subject",
|
||||
isSpam: false,
|
||||
isSpamConfidence: 1,
|
||||
sender: "Sender sender@tuta.com",
|
||||
toRecipients: "Recipient recipient@tuta.com",
|
||||
ccRecipients: "",
|
||||
bccRecipients: "",
|
||||
authStatus: "TAUTHENTICATED",
|
||||
}
|
||||
verify(spamClassifier.storeSpamClassification(expectedSpamTrainMailDatum), { times: 1 })
|
||||
verify(spamClassifier.predict(anything()), { times: 0 })
|
||||
})
|
||||
|
||||
o("spam prediction happens when inbox rule is not applied", async () => {
|
||||
when(spamClassifier.predict(anything())).thenResolve(false)
|
||||
|
||||
const mailCreateEvent = makeUpdate({
|
||||
instanceListId: "mailListId",
|
||||
instanceId: "mailId",
|
||||
operation: OperationType.CREATE,
|
||||
})
|
||||
|
||||
when(inboxRuleHandler.findAndApplyMatchingRule(anything(), anything(), anything())).thenResolve(null)
|
||||
const { processingDone } = await modelWithSpamAndInboxRule.entityEventsReceived([mailCreateEvent])
|
||||
await processingDone
|
||||
|
||||
const expectedSpamTrainMailDatum: SpamTrainMailDatum = {
|
||||
mailId: ["mailListId", "mailId"],
|
||||
ownerGroup: "mailGroup",
|
||||
body: "some text",
|
||||
subject: "subject",
|
||||
isSpam: false,
|
||||
isSpamConfidence: 1,
|
||||
sender: "Sender sender@tuta.com",
|
||||
toRecipients: "Recipient recipient@tuta.com",
|
||||
ccRecipients: "",
|
||||
bccRecipients: "",
|
||||
authStatus: "TAUTHENTICATED",
|
||||
}
|
||||
verify(spamClassifier.storeSpamClassification(expectedSpamTrainMailDatum), { times: 1 })
|
||||
verify(spamClassifier.predict(anything()), { times: 1 })
|
||||
})
|
||||
|
||||
o("does not try to do spam classification when downloading of mail fails on create mail event", async function () {
|
||||
o("does not invoke ProcessInboxHandler when downloading of mail fails on create mail event", async function () {
|
||||
when(inboxRuleHandler.findAndApplyMatchingRule(anything(), anything(), anything())).thenResolve(null)
|
||||
const mailCreateEvent = makeUpdate({
|
||||
instanceListId: "mailListId",
|
||||
|
|
@ -358,42 +211,8 @@ o.spec("MailModelTest", function () {
|
|||
|
||||
// mail not being there
|
||||
restClient.setListElementException(mail._id, new NotAuthorizedError("blah"))
|
||||
const { processingDone: inboxRuleProcessedMailNotThere } = await modelWithSpamAndInboxRule.entityEventsReceived([mailCreateEvent])
|
||||
await inboxRuleProcessedMailNotThere
|
||||
verify(spamClassifier.storeSpamClassification(anything()), { times: 0 })
|
||||
verify(spamClassifier.predict(anything()), { times: 0 })
|
||||
|
||||
// mail being there
|
||||
restClient.addListInstances(mail)
|
||||
const { processingDone: inboxRuleProcessedMailIsThere } = await modelWithSpamAndInboxRule.entityEventsReceived([mailCreateEvent])
|
||||
await inboxRuleProcessedMailIsThere
|
||||
const expectedSpamTrainMailDatum: SpamTrainMailDatum = {
|
||||
mailId: ["mailListId", "mailId"],
|
||||
ownerGroup: "mailGroup",
|
||||
body: "some text",
|
||||
subject: "subject",
|
||||
isSpam: false,
|
||||
isSpamConfidence: 1,
|
||||
sender: "Sender sender@tuta.com",
|
||||
toRecipients: "Recipient recipient@tuta.com",
|
||||
ccRecipients: "",
|
||||
bccRecipients: "",
|
||||
authStatus: "TAUTHENTICATED",
|
||||
}
|
||||
verify(spamClassifier.storeSpamClassification(expectedSpamTrainMailDatum), { times: 1 })
|
||||
verify(spamClassifier.predict(anything()), { times: 1 })
|
||||
})
|
||||
|
||||
o("deletes a training datum for deleted mail event", async () => {
|
||||
const mailDeleteEvent = makeUpdate({
|
||||
instanceListId: "mailListId",
|
||||
instanceId: "mailId",
|
||||
operation: OperationType.DELETE,
|
||||
})
|
||||
const { processingDone } = await modelWithSpamAndInboxRule.entityEventsReceived([mailDeleteEvent])
|
||||
await processingDone
|
||||
|
||||
verify(spamClassifier.deleteSpamClassification(mail._id), { times: 1 })
|
||||
await modelWithSpamAndInboxRule.entityEventsReceived([mailCreateEvent])
|
||||
verify(processInboxHandler.handleIncomingMail(anything(), anything(), anything(), anything()), { times: 0 })
|
||||
})
|
||||
})
|
||||
|
||||
|
|
|
|||
224
test/tests/mail/ProcessInboxHandlerTest.ts
Normal file
224
test/tests/mail/ProcessInboxHandlerTest.ts
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
import o from "@tutao/otest"
|
||||
import { matchers, object, verify, when } from "testdouble"
|
||||
import {
|
||||
Body,
|
||||
BodyTypeRef,
|
||||
ClientSpamClassifierResultTypeRef,
|
||||
Mail,
|
||||
MailDetails,
|
||||
MailDetailsTypeRef,
|
||||
MailFolderTypeRef,
|
||||
MailTypeRef,
|
||||
} from "../../../src/common/api/entities/tutanota/TypeRefs"
|
||||
import { FeatureType, MailSetKind, ProcessingState, SpamDecision } from "../../../src/common/api/common/TutanotaConstants"
|
||||
import { ClientClassifierType } from "../../../src/common/api/common/ClientClassifierType"
|
||||
import { assertNotNull, delay } from "@tutao/tutanota-utils"
|
||||
import { MailFacade } from "../../../src/common/api/worker/facades/lazy/MailFacade"
|
||||
import { createTestEntity } from "../TestUtils"
|
||||
import { SpamClassificationHandler } from "../../../src/mail-app/mail/model/SpamClassificationHandler"
|
||||
import { FolderSystem } from "../../../src/common/api/common/mail/FolderSystem"
|
||||
import { isSameId } from "../../../src/common/api/common/utils/EntityUtils"
|
||||
import { InboxRuleHandler } from "../../../src/mail-app/mail/model/InboxRuleHandler"
|
||||
import { ProcessInboxHandler, UnencryptedProcessInboxDatum } from "../../../src/mail-app/mail/model/ProcessInboxHandler"
|
||||
import { MailboxDetail } from "../../../src/common/mailFunctionality/MailboxModel"
|
||||
import { createSpamMailDatum, SpamMailProcessor } from "../../../src/common/api/common/utils/spamClassificationUtils/SpamMailProcessor"
|
||||
import { LoginController } from "../../../src/common/api/main/LoginController"
|
||||
|
||||
const { anything } = matchers
|
||||
|
||||
o.spec("ProcessInboxHandlerTest", function () {
|
||||
let mailFacade = object<MailFacade>()
|
||||
let logins = object<LoginController>()
|
||||
let body: Body
|
||||
let mail: Mail
|
||||
let spamHandler: SpamClassificationHandler
|
||||
let folderSystem: FolderSystem
|
||||
let mailboxDetail: MailboxDetail
|
||||
let mailDetails: MailDetails
|
||||
let inboxRuleHandler: InboxRuleHandler = object<InboxRuleHandler>()
|
||||
let processInboxHandler: ProcessInboxHandler
|
||||
|
||||
const inboxFolder = createTestEntity(MailFolderTypeRef, { _id: ["listId", "inbox"], folderType: MailSetKind.INBOX })
|
||||
const trashFolder = createTestEntity(MailFolderTypeRef, { _id: ["listId", "trash"], folderType: MailSetKind.TRASH })
|
||||
const spamFolder = createTestEntity(MailFolderTypeRef, { _id: ["listId", "spam"], folderType: MailSetKind.SPAM })
|
||||
|
||||
o.beforeEach(function () {
|
||||
spamHandler = object<SpamClassificationHandler>()
|
||||
inboxRuleHandler = object<InboxRuleHandler>()
|
||||
|
||||
body = createTestEntity(BodyTypeRef, { text: "Body Text" })
|
||||
mailDetails = createTestEntity(MailDetailsTypeRef, { _id: "mailDetail", body })
|
||||
mail = createTestEntity(MailTypeRef, {
|
||||
_id: ["listId", "elementId"],
|
||||
sets: [spamFolder._id],
|
||||
subject: "subject",
|
||||
_ownerGroup: "owner",
|
||||
mailDetails: ["detailsList", mailDetails._id],
|
||||
unread: true,
|
||||
processingState: ProcessingState.INBOX_RULE_NOT_PROCESSED,
|
||||
clientSpamClassifierResult: createTestEntity(ClientSpamClassifierResultTypeRef, { spamDecision: SpamDecision.NONE }),
|
||||
processNeeded: true,
|
||||
})
|
||||
folderSystem = object<FolderSystem>()
|
||||
mailboxDetail = object()
|
||||
|
||||
when(mailFacade.moveMails(anything(), anything(), anything())).thenResolve([])
|
||||
when(
|
||||
mailFacade.loadMailDetailsBlob(
|
||||
matchers.argThat((requestedMails: Mail) => {
|
||||
return isSameId(requestedMails._id, mail._id)
|
||||
}),
|
||||
),
|
||||
).thenDo(async () => mailDetails)
|
||||
processInboxHandler = new ProcessInboxHandler(
|
||||
logins,
|
||||
mailFacade,
|
||||
() => spamHandler,
|
||||
() => inboxRuleHandler,
|
||||
new Map(),
|
||||
0,
|
||||
)
|
||||
when(logins.isEnabled(FeatureType.SpamClientClassification)).thenReturn(true)
|
||||
})
|
||||
|
||||
o("handleIncomingMail does move mail if it has been processed already", async function () {
|
||||
mail.sets = [inboxFolder._id]
|
||||
mail.processNeeded = false
|
||||
verify(inboxRuleHandler.findAndApplyMatchingRule(anything(), anything(), anything()), { times: 0 })
|
||||
verify(spamHandler.predictSpamForNewMail(anything(), anything(), anything(), anything()), { times: 0 })
|
||||
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
|
||||
o(targetFolder).deepEquals(inboxFolder)
|
||||
verify(mailFacade.processNewMails(anything(), anything()), { times: 0 })
|
||||
})
|
||||
|
||||
o("handleIncomingMail does move mail from inbox to other folder if inbox rule applies", async function () {
|
||||
mail.sets = [inboxFolder._id]
|
||||
const processInboxDatum: UnencryptedProcessInboxDatum = {
|
||||
classifierType: ClientClassifierType.CUSTOMER_INBOX_RULES,
|
||||
mailId: mail._id,
|
||||
targetMoveFolder: trashFolder._id,
|
||||
vector: new Uint8Array(),
|
||||
}
|
||||
when(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)).thenResolve({
|
||||
targetFolder: trashFolder,
|
||||
processInboxDatum,
|
||||
})
|
||||
verify(spamHandler.predictSpamForNewMail(anything(), anything(), anything(), anything()), { times: 0 })
|
||||
|
||||
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
|
||||
o(targetFolder).deepEquals(trashFolder)
|
||||
await delay(0)
|
||||
verify(mailFacade.processNewMails(assertNotNull(mail._ownerGroup), [processInboxDatum]))
|
||||
})
|
||||
|
||||
o("handleIncomingMail does move mail from inbox to spam folder if mail is spam", async function () {
|
||||
mail.sets = [inboxFolder._id]
|
||||
when(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)).thenResolve(null)
|
||||
const processInboxDatum: UnencryptedProcessInboxDatum = {
|
||||
classifierType: ClientClassifierType.CLIENT_CLASSIFICATION,
|
||||
mailId: mail._id,
|
||||
targetMoveFolder: spamFolder._id,
|
||||
vector: new Uint8Array(),
|
||||
}
|
||||
when(spamHandler.predictSpamForNewMail(mail, mailDetails, inboxFolder, folderSystem)).thenResolve({
|
||||
targetFolder: spamFolder,
|
||||
processInboxDatum,
|
||||
})
|
||||
|
||||
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
|
||||
o(targetFolder).deepEquals(spamFolder)
|
||||
await delay(0)
|
||||
verify(mailFacade.processNewMails(assertNotNull(mail._ownerGroup), [processInboxDatum]))
|
||||
})
|
||||
|
||||
o("handleIncomingMail does NOT move mail from inbox to spam folder if mail is ham", async function () {
|
||||
mail.sets = [inboxFolder._id]
|
||||
when(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)).thenResolve(null)
|
||||
const processInboxDatum: UnencryptedProcessInboxDatum = {
|
||||
classifierType: null,
|
||||
mailId: mail._id,
|
||||
targetMoveFolder: inboxFolder._id,
|
||||
vector: new Uint8Array(),
|
||||
}
|
||||
when(spamHandler.predictSpamForNewMail(mail, mailDetails, inboxFolder, folderSystem)).thenResolve({
|
||||
targetFolder: inboxFolder,
|
||||
processInboxDatum,
|
||||
})
|
||||
|
||||
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
|
||||
o(targetFolder).deepEquals(inboxFolder)
|
||||
await delay(0)
|
||||
verify(mailFacade.processNewMails(assertNotNull(mail._ownerGroup), [processInboxDatum]))
|
||||
})
|
||||
|
||||
o("handleIncomingMail does NOT move mail from spam to inbox folder if mail is spam", async function () {
|
||||
mail.sets = [spamFolder._id]
|
||||
when(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)).thenResolve(null)
|
||||
const processInboxDatum: UnencryptedProcessInboxDatum = {
|
||||
classifierType: ClientClassifierType.CLIENT_CLASSIFICATION,
|
||||
mailId: mail._id,
|
||||
targetMoveFolder: spamFolder._id,
|
||||
vector: new Uint8Array(),
|
||||
}
|
||||
when(spamHandler.predictSpamForNewMail(mail, mailDetails, inboxFolder, folderSystem)).thenResolve({
|
||||
targetFolder: spamFolder,
|
||||
processInboxDatum,
|
||||
})
|
||||
|
||||
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
|
||||
o(targetFolder).deepEquals(spamFolder)
|
||||
await delay(0)
|
||||
verify(mailFacade.processNewMails(assertNotNull(mail._ownerGroup), [processInboxDatum]))
|
||||
})
|
||||
|
||||
o("handleIncomingMail moves mail from spam to inbox folder if mail is ham", async function () {
|
||||
mail.sets = [spamFolder._id]
|
||||
when(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)).thenResolve(null)
|
||||
const processInboxDatum: UnencryptedProcessInboxDatum = {
|
||||
classifierType: ClientClassifierType.CLIENT_CLASSIFICATION,
|
||||
mailId: mail._id,
|
||||
targetMoveFolder: inboxFolder._id,
|
||||
vector: new Uint8Array(),
|
||||
}
|
||||
when(spamHandler.predictSpamForNewMail(mail, mailDetails, inboxFolder, folderSystem)).thenResolve({
|
||||
targetFolder: inboxFolder,
|
||||
processInboxDatum,
|
||||
})
|
||||
|
||||
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
|
||||
o(targetFolder).deepEquals(inboxFolder)
|
||||
await delay(0)
|
||||
verify(mailFacade.processNewMails(assertNotNull(mail._ownerGroup), [processInboxDatum]))
|
||||
})
|
||||
|
||||
o("handleIncomingMail does NOT move mail from inbox to spam folder if spam classification is disabled", async function () {
|
||||
when(logins.isEnabled(FeatureType.SpamClientClassification)).thenReturn(false)
|
||||
|
||||
mail.sets = [inboxFolder._id]
|
||||
const compressedVector = new Uint8Array([2, 4, 8, 16])
|
||||
|
||||
const datum = createSpamMailDatum(mail, mailDetails)
|
||||
when(mailFacade.vectorizeAndCompressMails({ mail, mailDetails })).thenResolve(compressedVector)
|
||||
processInboxHandler = new ProcessInboxHandler(
|
||||
logins,
|
||||
mailFacade,
|
||||
() => spamHandler,
|
||||
() => inboxRuleHandler,
|
||||
new Map(),
|
||||
0,
|
||||
)
|
||||
when(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, mail, true)).thenResolve(null)
|
||||
const processedMail: UnencryptedProcessInboxDatum = {
|
||||
classifierType: null,
|
||||
mailId: mail._id,
|
||||
targetMoveFolder: inboxFolder._id,
|
||||
vector: compressedVector,
|
||||
}
|
||||
verify(spamHandler.predictSpamForNewMail(anything(), anything(), anything(), anything()), { times: 0 })
|
||||
|
||||
const targetFolder = await processInboxHandler.handleIncomingMail(mail, inboxFolder, mailboxDetail, folderSystem)
|
||||
o(targetFolder).deepEquals(inboxFolder)
|
||||
await delay(0)
|
||||
verify(mailFacade.processNewMails(assertNotNull(mail._ownerGroup), [processedMail]))
|
||||
})
|
||||
})
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
import o from "@tutao/otest"
|
||||
import { matchers, object, verify, when } from "testdouble"
|
||||
import { matchers, object, when } from "testdouble"
|
||||
import {
|
||||
Body,
|
||||
BodyTypeRef,
|
||||
|
|
@ -10,8 +10,7 @@ import {
|
|||
MailFolderTypeRef,
|
||||
MailTypeRef,
|
||||
} from "../../../src/common/api/entities/tutanota/TypeRefs"
|
||||
import { SpamClassifier, SpamTrainMailDatum } from "../../../src/mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
import { getMailBodyText } from "../../../src/common/api/common/CommonMailUtils"
|
||||
import { SpamClassifier } from "../../../src/mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
import { MailSetKind, ProcessingState, SpamDecision } from "../../../src/common/api/common/TutanotaConstants"
|
||||
import { ClientClassifierType } from "../../../src/common/api/common/ClientClassifierType"
|
||||
import { assert, assertNotNull } from "@tutao/tutanota-utils"
|
||||
|
|
@ -20,7 +19,8 @@ import { createTestEntity } from "../TestUtils"
|
|||
import { SpamClassificationHandler } from "../../../src/mail-app/mail/model/SpamClassificationHandler"
|
||||
import { FolderSystem } from "../../../src/common/api/common/mail/FolderSystem"
|
||||
import { isSameId } from "../../../src/common/api/common/utils/EntityUtils"
|
||||
import { any } from "@tensorflow/tfjs-core"
|
||||
import { UnencryptedProcessInboxDatum } from "../../../src/mail-app/mail/model/ProcessInboxHandler"
|
||||
import { createSpamMailDatum, SpamMailProcessor } from "../../../src/common/api/common/utils/spamClassificationUtils/SpamMailProcessor"
|
||||
|
||||
const { anything } = matchers
|
||||
|
||||
|
|
@ -30,6 +30,7 @@ o.spec("SpamClassificationHandlerTest", function () {
|
|||
let mail: Mail
|
||||
let spamClassifier: SpamClassifier
|
||||
let spamHandler: SpamClassificationHandler
|
||||
let spamMailProcessor: SpamMailProcessor = new SpamMailProcessor()
|
||||
let folderSystem: FolderSystem
|
||||
let mailDetails: MailDetails
|
||||
|
||||
|
|
@ -37,7 +38,7 @@ o.spec("SpamClassificationHandlerTest", function () {
|
|||
const trashFolder = createTestEntity(MailFolderTypeRef, { _id: ["listId", "trash"], folderType: MailSetKind.TRASH })
|
||||
const spamFolder = createTestEntity(MailFolderTypeRef, { _id: ["listId", "spam"], folderType: MailSetKind.SPAM })
|
||||
|
||||
o.beforeEach(function () {
|
||||
o.beforeEach(async function () {
|
||||
spamClassifier = object<SpamClassifier>()
|
||||
|
||||
body = createTestEntity(BodyTypeRef, { text: "Body Text" })
|
||||
|
|
@ -54,7 +55,7 @@ o.spec("SpamClassificationHandlerTest", function () {
|
|||
})
|
||||
folderSystem = object<FolderSystem>()
|
||||
|
||||
when(mailFacade.moveMails(anything(), anything(), anything(), ClientClassifierType.CLIENT_CLASSIFICATION)).thenResolve([])
|
||||
when(mailFacade.moveMails(anything(), anything(), anything())).thenResolve([])
|
||||
when(folderSystem.getSystemFolderByType(MailSetKind.SPAM)).thenReturn(spamFolder)
|
||||
when(folderSystem.getSystemFolderByType(MailSetKind.INBOX)).thenReturn(inboxFolder)
|
||||
when(folderSystem.getSystemFolderByType(MailSetKind.TRASH)).thenReturn(trashFolder)
|
||||
|
|
@ -75,87 +76,77 @@ o.spec("SpamClassificationHandlerTest", function () {
|
|||
),
|
||||
anything(),
|
||||
).thenDo(async () => [{ mail, mailDetails }])
|
||||
spamHandler = new SpamClassificationHandler(mailFacade, spamClassifier)
|
||||
when(spamClassifier.vectorizeAndCompress(createSpamMailDatum(mail, mailDetails))).thenResolve(
|
||||
await spamMailProcessor.vectorizeAndCompress(createSpamMailDatum(mail, mailDetails)),
|
||||
)
|
||||
spamHandler = new SpamClassificationHandler(spamClassifier)
|
||||
})
|
||||
|
||||
o("predictSpamForNewMail does move mail from inbox to spam folder if mail is spam", async function () {
|
||||
mail.sets = [inboxFolder._id]
|
||||
when(spamClassifier.predict(anything())).thenResolve(true)
|
||||
when(spamClassifier.predict(anything(), anything())).thenResolve(true)
|
||||
|
||||
const finalResult = await spamHandler.predictSpamForNewMail(mail, mailDetails, inboxFolder, folderSystem)
|
||||
o(spamHandler.hamMoveMailData).deepEquals(null)
|
||||
o(spamHandler.spamMoveMailData?.mails).deepEquals([mail._id])
|
||||
o(spamHandler.classifierResultServiceMailIds).deepEquals([])
|
||||
o(finalResult).deepEquals(spamFolder)
|
||||
|
||||
const expectedProcessInboxDatum: UnencryptedProcessInboxDatum = {
|
||||
mailId: mail._id,
|
||||
targetMoveFolder: spamFolder._id,
|
||||
classifierType: ClientClassifierType.CLIENT_CLASSIFICATION,
|
||||
vector: await spamMailProcessor.vectorizeAndCompress(createSpamMailDatum(mail, mailDetails)),
|
||||
}
|
||||
|
||||
o(finalResult.targetFolder).deepEquals(spamFolder)
|
||||
o(finalResult.processInboxDatum).deepEquals(expectedProcessInboxDatum)
|
||||
})
|
||||
|
||||
o("predictSpamForNewMail does NOT move mail from inbox to spam folder if mail is ham", async function () {
|
||||
mail.sets = [inboxFolder._id]
|
||||
when(spamClassifier.predict(anything())).thenResolve(false)
|
||||
when(spamClassifier.predict(anything(), anything())).thenResolve(false)
|
||||
|
||||
const finalResult = await spamHandler.predictSpamForNewMail(mail, mailDetails, inboxFolder, folderSystem)
|
||||
o(spamHandler.hamMoveMailData).deepEquals(null)
|
||||
o(spamHandler.spamMoveMailData).deepEquals(null)
|
||||
o(spamHandler.classifierResultServiceMailIds).deepEquals([mail._id])
|
||||
o(finalResult).deepEquals(inboxFolder)
|
||||
|
||||
const expectedProcessInboxDatum: UnencryptedProcessInboxDatum = {
|
||||
mailId: mail._id,
|
||||
targetMoveFolder: inboxFolder._id,
|
||||
classifierType: null,
|
||||
vector: await spamMailProcessor.vectorizeAndCompress(createSpamMailDatum(mail, mailDetails)),
|
||||
}
|
||||
|
||||
o(finalResult.targetFolder).deepEquals(inboxFolder)
|
||||
o(finalResult.processInboxDatum).deepEquals(expectedProcessInboxDatum)
|
||||
})
|
||||
|
||||
o("predictSpamForNewMail does NOT move mail from spam to inbox folder if mail is spam", async function () {
|
||||
mail.sets = [spamFolder._id]
|
||||
when(spamClassifier.predict(anything())).thenResolve(true)
|
||||
when(spamClassifier.predict(anything(), anything())).thenResolve(true)
|
||||
|
||||
const finalResult = await spamHandler.predictSpamForNewMail(mail, mailDetails, spamFolder, folderSystem)
|
||||
o(spamHandler.hamMoveMailData).deepEquals(null)
|
||||
o(spamHandler.spamMoveMailData).deepEquals(null)
|
||||
o(spamHandler.classifierResultServiceMailIds).deepEquals([mail._id])
|
||||
o(finalResult).deepEquals(spamFolder)
|
||||
|
||||
const expectedProcessInboxDatum: UnencryptedProcessInboxDatum = {
|
||||
mailId: mail._id,
|
||||
targetMoveFolder: spamFolder._id,
|
||||
classifierType: null,
|
||||
vector: await spamMailProcessor.vectorizeAndCompress(createSpamMailDatum(mail, mailDetails)),
|
||||
}
|
||||
|
||||
o(finalResult.targetFolder).deepEquals(spamFolder)
|
||||
o(finalResult.processInboxDatum).deepEquals(expectedProcessInboxDatum)
|
||||
})
|
||||
|
||||
o("predictSpamForNewMail moves mail from spam to inbox folder if mail is ham", async function () {
|
||||
mail.sets = [spamFolder._id]
|
||||
when(spamClassifier.predict(anything())).thenResolve(false)
|
||||
when(spamClassifier.predict(anything(), anything())).thenResolve(false)
|
||||
|
||||
const finalResult = await spamHandler.predictSpamForNewMail(mail, mailDetails, spamFolder, folderSystem)
|
||||
o(spamHandler.hamMoveMailData?.mails).deepEquals([mail._id])
|
||||
o(spamHandler.spamMoveMailData).deepEquals(null)
|
||||
o(spamHandler.classifierResultServiceMailIds).deepEquals([])
|
||||
o(finalResult).deepEquals(inboxFolder)
|
||||
})
|
||||
|
||||
o("predictSpamForNewMail does NOT move mail from spam to spam folder if mail is spam", async function () {
|
||||
mail.sets = [spamFolder._id]
|
||||
when(spamClassifier.predict(anything())).thenResolve(true)
|
||||
const expectedProcessInboxDatum: UnencryptedProcessInboxDatum = {
|
||||
mailId: mail._id,
|
||||
targetMoveFolder: inboxFolder._id,
|
||||
classifierType: ClientClassifierType.CLIENT_CLASSIFICATION,
|
||||
vector: await spamMailProcessor.vectorizeAndCompress(createSpamMailDatum(mail, mailDetails)),
|
||||
}
|
||||
|
||||
const finalResult = await spamHandler.predictSpamForNewMail(mail, mailDetails, spamFolder, folderSystem)
|
||||
o(spamHandler.hamMoveMailData).deepEquals(null)
|
||||
o(spamHandler.spamMoveMailData).deepEquals(null)
|
||||
o(spamHandler.classifierResultServiceMailIds).deepEquals([mail._id])
|
||||
o(finalResult).deepEquals(spamFolder)
|
||||
})
|
||||
|
||||
o(
|
||||
"predictSpamForNewMail does NOT send classifierResultService request if processingState is INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_MADE",
|
||||
async function () {
|
||||
mail.sets = [inboxFolder._id]
|
||||
mail.processingState = ProcessingState.INBOX_RULE_PROCESSED_AND_SPAM_PREDICTION_MADE
|
||||
when(spamClassifier.predict(anything())).thenResolve(false)
|
||||
|
||||
const finalResult = await spamHandler.predictSpamForNewMail(mail, mailDetails, inboxFolder, folderSystem)
|
||||
o(spamHandler.hamMoveMailData).deepEquals(null)
|
||||
o(spamHandler.spamMoveMailData).deepEquals(null)
|
||||
o(spamHandler.classifierResultServiceMailIds).deepEquals([])
|
||||
o(finalResult).deepEquals(inboxFolder)
|
||||
},
|
||||
)
|
||||
|
||||
o("update spam classification data on every mail update", async function () {
|
||||
when(spamClassifier.getSpamClassification(anything())).thenResolve({ isSpam: false, isSpamConfidence: 0 })
|
||||
mail.clientSpamClassifierResult = createTestEntity(ClientSpamClassifierResultTypeRef, {
|
||||
spamDecision: SpamDecision.BLACKLIST,
|
||||
confidence: "1",
|
||||
})
|
||||
|
||||
await spamHandler.updateSpamClassificationData(mail)
|
||||
verify(spamClassifier.updateSpamClassification(["listId", "elementId"], true, 1), { times: 1 })
|
||||
o(finalResult.targetFolder).deepEquals(inboxFolder)
|
||||
o(finalResult.processInboxDatum).deepEquals(expectedProcessInboxDatum)
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ import { ConversationListModel } from "../../../../src/mail-app/mail/model/Conve
|
|||
import { theme } from "../../../../src/common/gui/theme.js"
|
||||
import { ListLoadingState } from "../../../../src/common/gui/base/List"
|
||||
import { getMailFilterForType, MailFilterType } from "../../../../src/mail-app/mail/view/MailViewerUtils"
|
||||
import { ProcessInboxHandler } from "../../../../src/mail-app/mail/model/ProcessInboxHandler"
|
||||
import { FolderSystem } from "../../../../src/common/api/common/mail/FolderSystem"
|
||||
|
||||
o.spec("ConversationListModel", () => {
|
||||
let model: ConversationListModel
|
||||
|
|
@ -80,7 +82,7 @@ o.spec("ConversationListModel", () => {
|
|||
let conversationPrefProvider: ConversationPrefProvider
|
||||
let entityClient: EntityClient
|
||||
let mailModel: MailModel
|
||||
let inboxRuleHandler: InboxRuleHandler
|
||||
let processInboxHandler: ProcessInboxHandler
|
||||
let cacheStorage: ExposedCacheStorage
|
||||
|
||||
o.beforeEach(() => {
|
||||
|
|
@ -95,10 +97,12 @@ o.spec("ConversationListModel", () => {
|
|||
conversationPrefProvider = object()
|
||||
entityClient = object()
|
||||
mailModel = object()
|
||||
inboxRuleHandler = object()
|
||||
processInboxHandler = object()
|
||||
cacheStorage = object()
|
||||
model = new ConversationListModel(mailSet, conversationPrefProvider, entityClient, mailModel, inboxRuleHandler, cacheStorage)
|
||||
model = new ConversationListModel(mailSet, conversationPrefProvider, entityClient, mailModel, processInboxHandler, cacheStorage)
|
||||
when(mailModel.getMailboxDetailsForMailFolder(mailSet)).thenResolve(mailboxDetail)
|
||||
const folderSystem: FolderSystem = object()
|
||||
when(mailModel.getFolderSystemByGroupId(matchers.anything())).thenReturn(folderSystem)
|
||||
})
|
||||
|
||||
// Care has to be ensured for generating mail set entry IDs as we depend on real mail set ID decoding, thus we have
|
||||
|
|
@ -209,7 +213,7 @@ o.spec("ConversationListModel", () => {
|
|||
verify(mailModel.getMailboxDetailsForMailFolder(matchers.anything()), {
|
||||
times: 0,
|
||||
})
|
||||
verify(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, matchers.anything(), true), {
|
||||
verify(processInboxHandler.handleIncomingMail(matchers.anything(), matchers.anything(), mailboxDetail, matchers.anything()), {
|
||||
times: 0,
|
||||
})
|
||||
})
|
||||
|
|
@ -227,7 +231,7 @@ o.spec("ConversationListModel", () => {
|
|||
verify(mailModel.getMailboxDetailsForMailFolder(matchers.anything()), {
|
||||
times: 0,
|
||||
})
|
||||
verify(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, matchers.anything(), true), {
|
||||
verify(processInboxHandler.handleIncomingMail(matchers.anything(), matchers.anything(), mailboxDetail, matchers.anything()), {
|
||||
times: 0,
|
||||
})
|
||||
})
|
||||
|
|
@ -246,7 +250,7 @@ o.spec("ConversationListModel", () => {
|
|||
verify(mailModel.getMailboxDetailsForMailFolder(matchers.anything()), {
|
||||
times: 0,
|
||||
})
|
||||
verify(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, matchers.anything(), true), {
|
||||
verify(processInboxHandler.handleIncomingMail(matchers.anything(), matchers.anything(), mailboxDetail, matchers.anything()), {
|
||||
times: 0,
|
||||
})
|
||||
o.check(model.loadingStatus).equals(ListLoadingState.Idle)
|
||||
|
|
@ -262,15 +266,27 @@ o.spec("ConversationListModel", () => {
|
|||
|
||||
// make one item have a rule
|
||||
when(
|
||||
inboxRuleHandler.findAndApplyMatchingRule(
|
||||
mailboxDetail,
|
||||
processInboxHandler.handleIncomingMail(
|
||||
matchers.argThat((mail: Mail) => isSameId(mail._id, makeMailId(25))),
|
||||
true,
|
||||
matchers.anything(),
|
||||
matchers.anything(),
|
||||
matchers.anything(),
|
||||
),
|
||||
).thenResolve({})
|
||||
).thenResolve({ folderType: MailSetKind.SPAM })
|
||||
|
||||
when(
|
||||
processInboxHandler.handleIncomingMail(
|
||||
matchers.argThat((mail: Mail) => !isSameId(mail._id, makeMailId(25))),
|
||||
matchers.anything(),
|
||||
matchers.anything(),
|
||||
matchers.anything(),
|
||||
),
|
||||
).thenResolve({ folderType: MailSetKind.INBOX })
|
||||
|
||||
await setUpTestData(PageSize, labels, false, 1)
|
||||
|
||||
await model.loadInitial()
|
||||
|
||||
o.check(model.mails.length).equals(PageSize - 1)
|
||||
for (const mail of model.mails) {
|
||||
o.check(model.getLabelsForMail(mail)).deepEquals(labels)
|
||||
|
|
@ -281,7 +297,7 @@ o.spec("ConversationListModel", () => {
|
|||
verify(mailModel.getMailboxDetailsForMailFolder(matchers.anything()), {
|
||||
times: 1,
|
||||
})
|
||||
verify(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, matchers.anything(), true), {
|
||||
verify(processInboxHandler.handleIncomingMail(matchers.anything(), matchers.anything(), mailboxDetail, matchers.anything()), {
|
||||
times: 100,
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ import { matchers, object, verify, when } from "testdouble"
|
|||
import { ConversationPrefProvider } from "../../../../src/mail-app/mail/view/ConversationViewModel"
|
||||
import { EntityClient } from "../../../../src/common/api/common/EntityClient"
|
||||
import { MailModel } from "../../../../src/mail-app/mail/model/MailModel"
|
||||
import { InboxRuleHandler } from "../../../../src/mail-app/mail/model/InboxRuleHandler"
|
||||
import { ExposedCacheStorage } from "../../../../src/common/api/worker/rest/DefaultEntityRestCache"
|
||||
import { MailSetKind, OperationType } from "../../../../src/common/api/common/TutanotaConstants"
|
||||
import {
|
||||
|
|
@ -39,6 +38,8 @@ import { clamp, pad } from "@tutao/tutanota-utils"
|
|||
import { LoadedMail } from "../../../../src/mail-app/mail/model/MailSetListModel"
|
||||
import { getMailFilterForType, MailFilterType } from "../../../../src/mail-app/mail/view/MailViewerUtils"
|
||||
import { theme } from "../../../../src/common/gui/theme.js"
|
||||
import { ProcessInboxHandler } from "../../../../src/mail-app/mail/model/ProcessInboxHandler"
|
||||
import { FolderSystem } from "../../../../src/common/api/common/mail/FolderSystem"
|
||||
|
||||
o.spec("MailListModel", () => {
|
||||
let model: MailListModel
|
||||
|
|
@ -78,7 +79,7 @@ o.spec("MailListModel", () => {
|
|||
let conversationPrefProvider: ConversationPrefProvider
|
||||
let entityClient: EntityClient
|
||||
let mailModel: MailModel
|
||||
let inboxRuleHandler: InboxRuleHandler
|
||||
let processInboxHandler: ProcessInboxHandler
|
||||
let cacheStorage: ExposedCacheStorage
|
||||
|
||||
o.beforeEach(() => {
|
||||
|
|
@ -93,10 +94,12 @@ o.spec("MailListModel", () => {
|
|||
conversationPrefProvider = object()
|
||||
entityClient = object()
|
||||
mailModel = object()
|
||||
inboxRuleHandler = object()
|
||||
processInboxHandler = object()
|
||||
cacheStorage = object()
|
||||
model = new MailListModel(mailSet, conversationPrefProvider, entityClient, mailModel, inboxRuleHandler, cacheStorage)
|
||||
model = new MailListModel(mailSet, conversationPrefProvider, entityClient, mailModel, processInboxHandler, cacheStorage)
|
||||
when(mailModel.getMailboxDetailsForMailFolder(mailSet)).thenResolve(mailboxDetail)
|
||||
const folderSystem: FolderSystem = object()
|
||||
when(mailModel.getFolderSystemByGroupId(matchers.anything())).thenReturn(folderSystem)
|
||||
})
|
||||
|
||||
// Care has to be ensured for generating mail set entry IDs as we depend on real mail set ID decoding, thus we have
|
||||
|
|
@ -204,7 +207,8 @@ o.spec("MailListModel", () => {
|
|||
verify(mailModel.getMailboxDetailsForMailFolder(matchers.anything()), {
|
||||
times: 0,
|
||||
})
|
||||
verify(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, matchers.anything(), true), {
|
||||
|
||||
verify(processInboxHandler.handleIncomingMail(matchers.anything(), matchers.anything(), matchers.anything(), matchers.anything()), {
|
||||
times: 0,
|
||||
})
|
||||
})
|
||||
|
|
@ -222,7 +226,7 @@ o.spec("MailListModel", () => {
|
|||
verify(mailModel.getMailboxDetailsForMailFolder(matchers.anything()), {
|
||||
times: 0,
|
||||
})
|
||||
verify(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, matchers.anything(), true), {
|
||||
verify(processInboxHandler.handleIncomingMail(matchers.anything(), matchers.anything(), matchers.anything(), matchers.anything()), {
|
||||
times: 0,
|
||||
})
|
||||
})
|
||||
|
|
@ -230,14 +234,23 @@ o.spec("MailListModel", () => {
|
|||
o.test("applies inbox rules if inbox", async () => {
|
||||
mailSet.folderType = MailSetKind.INBOX
|
||||
|
||||
// make one item have a rule
|
||||
when(
|
||||
inboxRuleHandler.findAndApplyMatchingRule(
|
||||
mailboxDetail,
|
||||
processInboxHandler.handleIncomingMail(
|
||||
matchers.argThat((mail: Mail) => isSameId(mail._id, makeMailId(25))),
|
||||
true,
|
||||
matchers.anything(),
|
||||
matchers.anything(),
|
||||
matchers.anything(),
|
||||
),
|
||||
).thenResolve({})
|
||||
).thenResolve({ folderType: MailSetKind.SPAM })
|
||||
|
||||
when(
|
||||
processInboxHandler.handleIncomingMail(
|
||||
matchers.argThat((mail: Mail) => !isSameId(mail._id, makeMailId(25))),
|
||||
matchers.anything(),
|
||||
matchers.anything(),
|
||||
matchers.anything(),
|
||||
),
|
||||
).thenResolve({ folderType: MailSetKind.INBOX })
|
||||
|
||||
await setUpTestData(PageSize, labels, false)
|
||||
await model.loadInitial()
|
||||
|
|
@ -251,7 +264,8 @@ o.spec("MailListModel", () => {
|
|||
verify(mailModel.getMailboxDetailsForMailFolder(matchers.anything()), {
|
||||
times: 1,
|
||||
})
|
||||
verify(inboxRuleHandler.findAndApplyMatchingRule(mailboxDetail, matchers.anything(), true), {
|
||||
|
||||
verify(processInboxHandler.handleIncomingMail(matchers.anything(), matchers.anything(), matchers.anything(), matchers.anything()), {
|
||||
times: 100,
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -1821,6 +1821,9 @@ mod tests {
|
|||
"1729"=> JsonElement::Array(
|
||||
vec![],
|
||||
),
|
||||
"1769"=> JsonElement::String(
|
||||
"0".to_string()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -401,6 +401,8 @@ pub struct Mail {
|
|||
pub keyVerificationState: Option<i64>,
|
||||
#[serde(rename = "1728")]
|
||||
pub processingState: i64,
|
||||
#[serde(rename = "1769")]
|
||||
pub processNeeded: bool,
|
||||
#[serde(rename = "111")]
|
||||
pub sender: MailAddress,
|
||||
#[serde(rename = "115")]
|
||||
|
|
@ -473,6 +475,10 @@ pub struct MailBox {
|
|||
pub mailImportStates: GeneratedId,
|
||||
#[serde(rename = "1710")]
|
||||
pub extractedFeatures: Option<GeneratedId>,
|
||||
#[serde(rename = "1754")]
|
||||
pub clientSpamTrainingData: Option<GeneratedId>,
|
||||
#[serde(rename = "1755")]
|
||||
pub modifiedClientSpamTrainingDataIndex: Option<GeneratedId>,
|
||||
|
||||
#[serde(default)]
|
||||
pub _errors: Errors,
|
||||
|
|
@ -4146,3 +4152,166 @@ impl Entity for ClientClassifierResultPostIn {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(uniffi::Record, Clone, Serialize, Deserialize)]
|
||||
#[cfg_attr(any(test, feature = "testing"), derive(PartialEq, Debug))]
|
||||
pub struct ClientSpamTrainingDatum {
|
||||
#[serde(rename = "1738")]
|
||||
pub _id: Option<IdTupleGenerated>,
|
||||
#[serde(rename = "1739")]
|
||||
pub _permissions: GeneratedId,
|
||||
#[serde(rename = "1740")]
|
||||
pub _format: i64,
|
||||
#[serde(rename = "1741")]
|
||||
pub _ownerGroup: Option<GeneratedId>,
|
||||
#[serde(rename = "1742")]
|
||||
#[serde(with = "serde_bytes")]
|
||||
pub _ownerEncSessionKey: Option<Vec<u8>>,
|
||||
#[serde(rename = "1743")]
|
||||
pub _ownerKeyVersion: Option<i64>,
|
||||
#[serde(rename = "1744")]
|
||||
pub confidence: i64,
|
||||
#[serde(rename = "1745")]
|
||||
pub spamDecision: i64,
|
||||
#[serde(rename = "1746")]
|
||||
#[serde(with = "serde_bytes")]
|
||||
pub vector: Vec<u8>,
|
||||
|
||||
#[serde(default)]
|
||||
pub _errors: Errors,
|
||||
#[serde(default)]
|
||||
pub _finalIvs: HashMap<String, Option<FinalIv>>,
|
||||
}
|
||||
|
||||
impl Entity for ClientSpamTrainingDatum {
|
||||
fn type_ref() -> TypeRef {
|
||||
TypeRef {
|
||||
app: AppName::Tutanota,
|
||||
type_id: TypeId::from(1736),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(uniffi::Record, Clone, Serialize, Deserialize)]
|
||||
#[cfg_attr(any(test, feature = "testing"), derive(PartialEq, Debug))]
|
||||
pub struct ClientSpamTrainingDatumIndexEntry {
|
||||
#[serde(rename = "1749")]
|
||||
pub _id: Option<IdTupleGenerated>,
|
||||
#[serde(rename = "1750")]
|
||||
pub _permissions: GeneratedId,
|
||||
#[serde(rename = "1751")]
|
||||
pub _format: i64,
|
||||
#[serde(rename = "1752")]
|
||||
pub _ownerGroup: Option<GeneratedId>,
|
||||
#[serde(rename = "1753")]
|
||||
pub clientSpamTrainingDatumElementId: GeneratedId,
|
||||
}
|
||||
|
||||
impl Entity for ClientSpamTrainingDatumIndexEntry {
|
||||
fn type_ref() -> TypeRef {
|
||||
TypeRef {
|
||||
app: AppName::Tutanota,
|
||||
type_id: TypeId::from(1747),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(uniffi::Record, Clone, Serialize, Deserialize)]
|
||||
#[cfg_attr(any(test, feature = "testing"), derive(PartialEq, Debug))]
|
||||
pub struct ProcessInboxDatum {
|
||||
#[serde(rename = "1757")]
|
||||
pub _id: Option<CustomId>,
|
||||
#[serde(rename = "1758")]
|
||||
#[serde(with = "serde_bytes")]
|
||||
pub ownerEncVectorSessionKey: Vec<u8>,
|
||||
#[serde(rename = "1759")]
|
||||
pub ownerKeyVersion: i64,
|
||||
#[serde(rename = "1762")]
|
||||
pub classifierType: Option<i64>,
|
||||
#[serde(rename = "1763")]
|
||||
#[serde(with = "serde_bytes")]
|
||||
pub encVector: Vec<u8>,
|
||||
#[serde(rename = "1760")]
|
||||
pub mailId: IdTupleGenerated,
|
||||
#[serde(rename = "1761")]
|
||||
pub targetMoveFolder: IdTupleGenerated,
|
||||
}
|
||||
|
||||
impl Entity for ProcessInboxDatum {
|
||||
fn type_ref() -> TypeRef {
|
||||
TypeRef {
|
||||
app: AppName::Tutanota,
|
||||
type_id: TypeId::from(1756),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(uniffi::Record, Clone, Serialize, Deserialize)]
|
||||
#[cfg_attr(any(test, feature = "testing"), derive(PartialEq, Debug))]
|
||||
pub struct ProcessInboxPostIn {
|
||||
#[serde(rename = "1765")]
|
||||
pub _format: i64,
|
||||
#[serde(rename = "1766")]
|
||||
pub mailOwnerGroup: GeneratedId,
|
||||
#[serde(rename = "1767")]
|
||||
pub processInboxDatum: Vec<ProcessInboxDatum>,
|
||||
}
|
||||
|
||||
impl Entity for ProcessInboxPostIn {
|
||||
fn type_ref() -> TypeRef {
|
||||
TypeRef {
|
||||
app: AppName::Tutanota,
|
||||
type_id: TypeId::from(1764),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(uniffi::Record, Clone, Serialize, Deserialize)]
|
||||
#[cfg_attr(any(test, feature = "testing"), derive(PartialEq, Debug))]
|
||||
pub struct PopulateClientSpamTrainingDatum {
|
||||
#[serde(rename = "1771")]
|
||||
pub _id: Option<CustomId>,
|
||||
#[serde(rename = "1772")]
|
||||
#[serde(with = "serde_bytes")]
|
||||
pub ownerEncVectorSessionKey: Vec<u8>,
|
||||
#[serde(rename = "1773")]
|
||||
pub ownerKeyVersion: i64,
|
||||
#[serde(rename = "1775")]
|
||||
pub isSpam: bool,
|
||||
#[serde(rename = "1776")]
|
||||
pub confidence: i64,
|
||||
#[serde(rename = "1777")]
|
||||
#[serde(with = "serde_bytes")]
|
||||
pub encVector: Vec<u8>,
|
||||
#[serde(rename = "1774")]
|
||||
pub mailId: IdTupleGenerated,
|
||||
}
|
||||
|
||||
impl Entity for PopulateClientSpamTrainingDatum {
|
||||
fn type_ref() -> TypeRef {
|
||||
TypeRef {
|
||||
app: AppName::Tutanota,
|
||||
type_id: TypeId::from(1770),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(uniffi::Record, Clone, Serialize, Deserialize)]
|
||||
#[cfg_attr(any(test, feature = "testing"), derive(PartialEq, Debug))]
|
||||
pub struct PopulateClientSpamTrainingDataPostIn {
|
||||
#[serde(rename = "1779")]
|
||||
pub _format: i64,
|
||||
#[serde(rename = "1780")]
|
||||
pub mailOwnerGroup: GeneratedId,
|
||||
#[serde(rename = "1781")]
|
||||
pub populateClientSpamTrainingDatum: Vec<PopulateClientSpamTrainingDatum>,
|
||||
}
|
||||
|
||||
impl Entity for PopulateClientSpamTrainingDataPostIn {
|
||||
fn type_ref() -> TypeRef {
|
||||
TypeRef {
|
||||
app: AppName::Tutanota,
|
||||
type_id: TypeId::from(1778),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,6 +44,8 @@ use crate::entities::generated::tutanota::MoveMailData;
|
|||
use crate::entities::generated::tutanota::MoveMailPostOut;
|
||||
use crate::entities::generated::tutanota::NewsIn;
|
||||
use crate::entities::generated::tutanota::NewsOut;
|
||||
use crate::entities::generated::tutanota::PopulateClientSpamTrainingDataPostIn;
|
||||
use crate::entities::generated::tutanota::ProcessInboxPostIn;
|
||||
use crate::entities::generated::tutanota::ReceiveInfoServiceData;
|
||||
use crate::entities::generated::tutanota::ReceiveInfoServicePostOut;
|
||||
use crate::entities::generated::tutanota::ReportMailPostData;
|
||||
|
|
@ -59,70 +61,70 @@ use crate::entities::generated::tutanota::UserAccountCreateData;
|
|||
use crate::entities::generated::tutanota::UserAccountPostOut;
|
||||
pub struct ApplyLabelService;
|
||||
|
||||
crate::service_impl!(declare, ApplyLabelService, "tutanota/applylabelservice", 97);
|
||||
crate::service_impl!(declare, ApplyLabelService, "tutanota/applylabelservice", 98);
|
||||
crate::service_impl!(POST, ApplyLabelService, ApplyLabelServicePostIn, ());
|
||||
|
||||
|
||||
pub struct CalendarService;
|
||||
|
||||
crate::service_impl!(declare, CalendarService, "tutanota/calendarservice", 97);
|
||||
crate::service_impl!(declare, CalendarService, "tutanota/calendarservice", 98);
|
||||
crate::service_impl!(POST, CalendarService, UserAreaGroupPostData, CreateGroupPostReturn);
|
||||
crate::service_impl!(DELETE, CalendarService, CalendarDeleteData, ());
|
||||
|
||||
|
||||
pub struct ChangePrimaryAddressService;
|
||||
|
||||
crate::service_impl!(declare, ChangePrimaryAddressService, "tutanota/changeprimaryaddressservice", 97);
|
||||
crate::service_impl!(declare, ChangePrimaryAddressService, "tutanota/changeprimaryaddressservice", 98);
|
||||
crate::service_impl!(PUT, ChangePrimaryAddressService, ChangePrimaryAddressServicePutIn, ());
|
||||
|
||||
|
||||
pub struct ClientClassifierResultService;
|
||||
|
||||
crate::service_impl!(declare, ClientClassifierResultService, "tutanota/clientclassifierresultservice", 97);
|
||||
crate::service_impl!(declare, ClientClassifierResultService, "tutanota/clientclassifierresultservice", 98);
|
||||
crate::service_impl!(POST, ClientClassifierResultService, ClientClassifierResultPostIn, ());
|
||||
|
||||
|
||||
pub struct ContactListGroupService;
|
||||
|
||||
crate::service_impl!(declare, ContactListGroupService, "tutanota/contactlistgroupservice", 97);
|
||||
crate::service_impl!(declare, ContactListGroupService, "tutanota/contactlistgroupservice", 98);
|
||||
crate::service_impl!(POST, ContactListGroupService, UserAreaGroupPostData, CreateGroupPostReturn);
|
||||
crate::service_impl!(DELETE, ContactListGroupService, UserAreaGroupDeleteData, ());
|
||||
|
||||
|
||||
pub struct CustomerAccountService;
|
||||
|
||||
crate::service_impl!(declare, CustomerAccountService, "tutanota/customeraccountservice", 97);
|
||||
crate::service_impl!(declare, CustomerAccountService, "tutanota/customeraccountservice", 98);
|
||||
crate::service_impl!(POST, CustomerAccountService, CustomerAccountCreateData, ());
|
||||
|
||||
|
||||
pub struct DraftService;
|
||||
|
||||
crate::service_impl!(declare, DraftService, "tutanota/draftservice", 97);
|
||||
crate::service_impl!(declare, DraftService, "tutanota/draftservice", 98);
|
||||
crate::service_impl!(POST, DraftService, DraftCreateData, DraftCreateReturn);
|
||||
crate::service_impl!(PUT, DraftService, DraftUpdateData, DraftUpdateReturn);
|
||||
|
||||
|
||||
pub struct EncryptTutanotaPropertiesService;
|
||||
|
||||
crate::service_impl!(declare, EncryptTutanotaPropertiesService, "tutanota/encrypttutanotapropertiesservice", 97);
|
||||
crate::service_impl!(declare, EncryptTutanotaPropertiesService, "tutanota/encrypttutanotapropertiesservice", 98);
|
||||
crate::service_impl!(POST, EncryptTutanotaPropertiesService, EncryptTutanotaPropertiesData, ());
|
||||
|
||||
|
||||
pub struct EntropyService;
|
||||
|
||||
crate::service_impl!(declare, EntropyService, "tutanota/entropyservice", 97);
|
||||
crate::service_impl!(declare, EntropyService, "tutanota/entropyservice", 98);
|
||||
crate::service_impl!(PUT, EntropyService, EntropyData, ());
|
||||
|
||||
|
||||
pub struct ExternalUserService;
|
||||
|
||||
crate::service_impl!(declare, ExternalUserService, "tutanota/externaluserservice", 97);
|
||||
crate::service_impl!(declare, ExternalUserService, "tutanota/externaluserservice", 98);
|
||||
crate::service_impl!(POST, ExternalUserService, ExternalUserData, ());
|
||||
|
||||
|
||||
pub struct GroupInvitationService;
|
||||
|
||||
crate::service_impl!(declare, GroupInvitationService, "tutanota/groupinvitationservice", 97);
|
||||
crate::service_impl!(declare, GroupInvitationService, "tutanota/groupinvitationservice", 98);
|
||||
crate::service_impl!(POST, GroupInvitationService, GroupInvitationPostData, GroupInvitationPostReturn);
|
||||
crate::service_impl!(PUT, GroupInvitationService, GroupInvitationPutData, ());
|
||||
crate::service_impl!(DELETE, GroupInvitationService, GroupInvitationDeleteData, ());
|
||||
|
|
@ -130,26 +132,26 @@ crate::service_impl!(DELETE, GroupInvitationService, GroupInvitationDeleteData,
|
|||
|
||||
pub struct ImportMailService;
|
||||
|
||||
crate::service_impl!(declare, ImportMailService, "tutanota/importmailservice", 97);
|
||||
crate::service_impl!(declare, ImportMailService, "tutanota/importmailservice", 98);
|
||||
crate::service_impl!(POST, ImportMailService, ImportMailPostIn, ImportMailPostOut);
|
||||
crate::service_impl!(GET, ImportMailService, ImportMailGetIn, ImportMailGetOut);
|
||||
|
||||
|
||||
pub struct ListUnsubscribeService;
|
||||
|
||||
crate::service_impl!(declare, ListUnsubscribeService, "tutanota/listunsubscribeservice", 97);
|
||||
crate::service_impl!(declare, ListUnsubscribeService, "tutanota/listunsubscribeservice", 98);
|
||||
crate::service_impl!(POST, ListUnsubscribeService, ListUnsubscribeData, ());
|
||||
|
||||
|
||||
pub struct MailExportTokenService;
|
||||
|
||||
crate::service_impl!(declare, MailExportTokenService, "tutanota/mailexporttokenservice", 97);
|
||||
crate::service_impl!(declare, MailExportTokenService, "tutanota/mailexporttokenservice", 98);
|
||||
crate::service_impl!(POST, MailExportTokenService, (), MailExportTokenServicePostOut);
|
||||
|
||||
|
||||
pub struct MailFolderService;
|
||||
|
||||
crate::service_impl!(declare, MailFolderService, "tutanota/mailfolderservice", 97);
|
||||
crate::service_impl!(declare, MailFolderService, "tutanota/mailfolderservice", 98);
|
||||
crate::service_impl!(POST, MailFolderService, CreateMailFolderData, CreateMailFolderReturn);
|
||||
crate::service_impl!(PUT, MailFolderService, UpdateMailFolderData, ());
|
||||
crate::service_impl!(DELETE, MailFolderService, DeleteMailFolderData, ());
|
||||
|
|
@ -157,87 +159,99 @@ crate::service_impl!(DELETE, MailFolderService, DeleteMailFolderData, ());
|
|||
|
||||
pub struct MailGroupService;
|
||||
|
||||
crate::service_impl!(declare, MailGroupService, "tutanota/mailgroupservice", 97);
|
||||
crate::service_impl!(declare, MailGroupService, "tutanota/mailgroupservice", 98);
|
||||
crate::service_impl!(POST, MailGroupService, CreateMailGroupData, MailGroupPostOut);
|
||||
crate::service_impl!(DELETE, MailGroupService, DeleteGroupData, ());
|
||||
|
||||
|
||||
pub struct MailService;
|
||||
|
||||
crate::service_impl!(declare, MailService, "tutanota/mailservice", 97);
|
||||
crate::service_impl!(declare, MailService, "tutanota/mailservice", 98);
|
||||
crate::service_impl!(DELETE, MailService, DeleteMailData, ());
|
||||
|
||||
|
||||
pub struct ManageLabelService;
|
||||
|
||||
crate::service_impl!(declare, ManageLabelService, "tutanota/managelabelservice", 97);
|
||||
crate::service_impl!(declare, ManageLabelService, "tutanota/managelabelservice", 98);
|
||||
crate::service_impl!(POST, ManageLabelService, ManageLabelServicePostIn, ());
|
||||
crate::service_impl!(DELETE, ManageLabelService, ManageLabelServiceDeleteIn, ());
|
||||
|
||||
|
||||
pub struct MoveMailService;
|
||||
|
||||
crate::service_impl!(declare, MoveMailService, "tutanota/movemailservice", 97);
|
||||
crate::service_impl!(declare, MoveMailService, "tutanota/movemailservice", 98);
|
||||
crate::service_impl!(POST, MoveMailService, MoveMailData, MoveMailPostOut);
|
||||
|
||||
|
||||
pub struct NewsService;
|
||||
|
||||
crate::service_impl!(declare, NewsService, "tutanota/newsservice", 97);
|
||||
crate::service_impl!(declare, NewsService, "tutanota/newsservice", 98);
|
||||
crate::service_impl!(POST, NewsService, NewsIn, ());
|
||||
crate::service_impl!(GET, NewsService, (), NewsOut);
|
||||
|
||||
|
||||
pub struct PopulateClientSpamTrainingDataService;
|
||||
|
||||
crate::service_impl!(declare, PopulateClientSpamTrainingDataService, "tutanota/populateclientspamtrainingdataservice", 98);
|
||||
crate::service_impl!(POST, PopulateClientSpamTrainingDataService, PopulateClientSpamTrainingDataPostIn, ());
|
||||
|
||||
|
||||
pub struct ProcessInboxService;
|
||||
|
||||
crate::service_impl!(declare, ProcessInboxService, "tutanota/processinboxservice", 98);
|
||||
crate::service_impl!(POST, ProcessInboxService, ProcessInboxPostIn, ());
|
||||
|
||||
|
||||
pub struct ReceiveInfoService;
|
||||
|
||||
crate::service_impl!(declare, ReceiveInfoService, "tutanota/receiveinfoservice", 97);
|
||||
crate::service_impl!(declare, ReceiveInfoService, "tutanota/receiveinfoservice", 98);
|
||||
crate::service_impl!(POST, ReceiveInfoService, ReceiveInfoServiceData, ReceiveInfoServicePostOut);
|
||||
|
||||
|
||||
pub struct ReportMailService;
|
||||
|
||||
crate::service_impl!(declare, ReportMailService, "tutanota/reportmailservice", 97);
|
||||
crate::service_impl!(declare, ReportMailService, "tutanota/reportmailservice", 98);
|
||||
crate::service_impl!(POST, ReportMailService, ReportMailPostData, ());
|
||||
|
||||
|
||||
pub struct ResolveConversationsService;
|
||||
|
||||
crate::service_impl!(declare, ResolveConversationsService, "tutanota/resolveconversationsservice", 97);
|
||||
crate::service_impl!(declare, ResolveConversationsService, "tutanota/resolveconversationsservice", 98);
|
||||
crate::service_impl!(GET, ResolveConversationsService, ResolveConversationsServiceGetIn, ResolveConversationsServiceGetOut);
|
||||
|
||||
|
||||
pub struct SendDraftService;
|
||||
|
||||
crate::service_impl!(declare, SendDraftService, "tutanota/senddraftservice", 97);
|
||||
crate::service_impl!(declare, SendDraftService, "tutanota/senddraftservice", 98);
|
||||
crate::service_impl!(POST, SendDraftService, SendDraftData, SendDraftReturn);
|
||||
|
||||
|
||||
pub struct SimpleMoveMailService;
|
||||
|
||||
crate::service_impl!(declare, SimpleMoveMailService, "tutanota/simplemovemailservice", 97);
|
||||
crate::service_impl!(declare, SimpleMoveMailService, "tutanota/simplemovemailservice", 98);
|
||||
crate::service_impl!(POST, SimpleMoveMailService, SimpleMoveMailPostIn, MoveMailPostOut);
|
||||
|
||||
|
||||
pub struct TemplateGroupService;
|
||||
|
||||
crate::service_impl!(declare, TemplateGroupService, "tutanota/templategroupservice", 97);
|
||||
crate::service_impl!(declare, TemplateGroupService, "tutanota/templategroupservice", 98);
|
||||
crate::service_impl!(POST, TemplateGroupService, UserAreaGroupPostData, CreateGroupPostReturn);
|
||||
crate::service_impl!(DELETE, TemplateGroupService, UserAreaGroupDeleteData, ());
|
||||
|
||||
|
||||
pub struct TranslationService;
|
||||
|
||||
crate::service_impl!(declare, TranslationService, "tutanota/translationservice", 97);
|
||||
crate::service_impl!(declare, TranslationService, "tutanota/translationservice", 98);
|
||||
crate::service_impl!(GET, TranslationService, TranslationGetIn, TranslationGetOut);
|
||||
|
||||
|
||||
pub struct UnreadMailStateService;
|
||||
|
||||
crate::service_impl!(declare, UnreadMailStateService, "tutanota/unreadmailstateservice", 97);
|
||||
crate::service_impl!(declare, UnreadMailStateService, "tutanota/unreadmailstateservice", 98);
|
||||
crate::service_impl!(POST, UnreadMailStateService, UnreadMailStatePostIn, ());
|
||||
|
||||
|
||||
pub struct UserAccountService;
|
||||
|
||||
crate::service_impl!(declare, UserAccountService, "tutanota/useraccountservice", 97);
|
||||
crate::service_impl!(declare, UserAccountService, "tutanota/useraccountservice", 98);
|
||||
crate::service_impl!(POST, UserAccountService, UserAccountCreateData, UserAccountPostOut);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -43,5 +43,6 @@
|
|||
"1465": [],
|
||||
"1677": null,
|
||||
"1728": "1",
|
||||
"1729": []
|
||||
"1729": [],
|
||||
"1769": "0"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,5 +43,6 @@
|
|||
"896": "1723113273034",
|
||||
"1677": null,
|
||||
"1728": "1",
|
||||
"1729": []
|
||||
"1729": [],
|
||||
"1769": "0"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,5 +43,6 @@
|
|||
"1465": [],
|
||||
"1677": null,
|
||||
"1728": "1",
|
||||
"1729": []
|
||||
"1729": [],
|
||||
"1769": "0"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,5 +43,6 @@
|
|||
"466": "",
|
||||
"1677": null,
|
||||
"1728": "1",
|
||||
"1729": []
|
||||
"1729": [],
|
||||
"1769": "1"
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue