mirror of
https://github.com/tutao/tutanota.git
synced 2025-12-08 06:09:50 +00:00
Implement spam training data sync and add TutanotaModelV98
We sync the spam training data encrypted through our server to make sure that all clients for a specific user behave the same when classifying mails. Additionally, this enables the spam classification in the webApp. We compress the training data vectors (see clientSpamTrainingDatum) before uploading to our server using SparseVectorCompressor.ts. When a user has the ClientSpamClassification enabled, the spam training data sync will happen for every mail received. ClientSpamTrainingDatum are not stored in the CacheStorage. No entityEvents are emitted for this type. However, we retrieve creations and updates for ClientSpamTrainingData through the modifiedClientSpamTrainingDataIndex. We calculate a threshold per classifier based on the dataset ham to spam ratio, we also subsample our training data to cap the ham to spam ratio within a certain limit. Co-authored-by: jomapp <17314077+jomapp@users.noreply.github.com> Co-authored-by: das <das@tutao.de> Co-authored-by: abp <abp@tutao.de> Co-authored-by: Kinan <104761667+kibibytium@users.noreply.github.com> Co-authored-by: sug <sug@tutao.de> Co-authored-by: nif <nif@tutao.de> Co-authored-by: map <mpfau@users.noreply.github.com>
This commit is contained in:
parent
f8bbd32695
commit
5293be6a4a
63 changed files with 3877 additions and 1963 deletions
|
|
@ -1,7 +1,7 @@
|
|||
import { BlobElementEntity, Entity, ListElementEntity, ServerModelParsedInstance, SomeEntity, TypeModel } from "../../common/EntityTypes.js"
|
||||
import { customIdToBase64Url, ensureBase64Ext, firstBiggerThanSecond } from "../../common/utils/EntityUtils.js"
|
||||
import { customIdToBase64Url, ensureBase64Ext, firstBiggerThanSecond, GENERATED_MIN_ID } from "../../common/utils/EntityUtils.js"
|
||||
import { CacheStorage, LastUpdateTime } from "./DefaultEntityRestCache.js"
|
||||
import { assertNotNull, clone, filterNull, getFromMap, getTypeString, Nullable, parseTypeString, remove, TypeRef } from "@tutao/tutanota-utils"
|
||||
import { assertNotNull, clone, filterNull, getFromMap, getTypeString, newPromise, Nullable, parseTypeString, remove, TypeRef } from "@tutao/tutanota-utils"
|
||||
import { CustomCacheHandlerMap } from "./cacheHandler/CustomCacheHandler.js"
|
||||
import { Type as TypeId } from "../../common/EntityConstants.js"
|
||||
import { ProgrammingError } from "../../common/error/ProgrammingError.js"
|
||||
|
|
@ -10,6 +10,7 @@ import { ModelMapper } from "../crypto/ModelMapper"
|
|||
import { ServerTypeModelResolver } from "../../common/EntityFunctions"
|
||||
import { expandId } from "./RestClientIdUtils"
|
||||
import { hasError } from "../../common/utils/ErrorUtils"
|
||||
import { SpamClassificationModel } from "../../../../mail-app/workerUtils/spamClassification/SpamClassifier"
|
||||
|
||||
/** Cache for a single list. */
|
||||
type ListCache = {
|
||||
|
|
@ -41,8 +42,9 @@ export class EphemeralCacheStorage implements CacheStorage {
|
|||
private readonly entities: Map<string, Map<Id, ServerModelParsedInstance>> = new Map()
|
||||
private readonly lists: Map<string, ListTypeCache> = new Map()
|
||||
private readonly blobEntities: Map<string, BlobElementTypeCache> = new Map()
|
||||
private readonly spamClassificationModelCache: Map<Id, SpamClassificationModel> = new Map()
|
||||
private lastUpdateTime: number | null = null
|
||||
private lastTrainedTime: number | null = null
|
||||
private lastTrainingDataId: Id = GENERATED_MIN_ID
|
||||
private lastTrainedFromScratchTime: number | null = null
|
||||
private userId: Id | null = null
|
||||
private lastBatchIdPerGroup = new Map<Id, Id>()
|
||||
|
|
@ -419,12 +421,12 @@ export class EphemeralCacheStorage implements CacheStorage {
|
|||
this.lastUpdateTime = value
|
||||
}
|
||||
|
||||
async getLastTrainedTime(): Promise<number> {
|
||||
return this.lastTrainedTime ?? 0
|
||||
async getLastTrainingDataIndexId(): Promise<Id> {
|
||||
return this.lastTrainingDataId
|
||||
}
|
||||
|
||||
async setLastTrainedTime(value: number): Promise<void> {
|
||||
this.lastTrainedTime = value
|
||||
async setLastTrainingDataIndexId(id: Id): Promise<void> {
|
||||
this.lastTrainingDataId = id
|
||||
}
|
||||
|
||||
async getLastTrainedFromScratchTime(): Promise<number> {
|
||||
|
|
@ -435,6 +437,14 @@ export class EphemeralCacheStorage implements CacheStorage {
|
|||
this.lastTrainedFromScratchTime = ms
|
||||
}
|
||||
|
||||
async setSpamClassificationModel(model: SpamClassificationModel): Promise<void> {
|
||||
this.spamClassificationModelCache.set(model.ownerGroup, model)
|
||||
}
|
||||
|
||||
async getSpamClassificationModel(ownerGroup: Id): Promise<Nullable<SpamClassificationModel>> {
|
||||
return this.spamClassificationModelCache.get(ownerGroup) ?? null
|
||||
}
|
||||
|
||||
async getWholeList<T extends ListElementEntity>(typeRef: TypeRef<T>, listId: Id): Promise<Array<T>> {
|
||||
const parsedInstances = await this.getWholeListParsed(typeRef, listId)
|
||||
return await this.modelMapper.mapToInstances(typeRef, parsedInstances)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue