mirror of
https://github.com/tutao/tutanota.git
synced 2025-10-19 07:53:47 +00:00
1839 lines
62 KiB
JavaScript
Vendored
1839 lines
62 KiB
JavaScript
Vendored
// THIS FILE IS AUTOMATICALLY GENERATED DO NOT EDIT DIRECTLY
|
||
// See update-tlds.js for encoding/decoding format
|
||
// https://data.iana.org/TLD/tlds-alpha-by-domain.txt
|
||
const encodedTlds = 'aaa1rp3bb0ott3vie4c1le2ogado5udhabi7c0ademy5centure6ountant0s9o1tor4d0s1ult4e0g1ro2tna4f0l1rica5g0akhan5ency5i0g1rbus3force5tel5kdn3l0ibaba4pay4lfinanz6state5y2sace3tom5m0azon4ericanexpress7family11x2fam3ica3sterdam8nalytics7droid5quan4z2o0l2partments8p0le4q0uarelle8r0ab1mco4chi3my2pa2t0e3s0da2ia2sociates9t0hleta5torney7u0ction5di0ble3o3spost5thor3o0s4w0s2x0a2z0ure5ba0by2idu3namex4d1k2r0celona5laycard4s5efoot5gains6seball5ketball8uhaus5yern5b0c1t1va3cg1n2d1e0ats2uty4er2rlin4st0buy5t2f1g1h0arti5i0ble3d1ke2ng0o3o1z2j1lack0friday9ockbuster8g1omberg7ue3m0s1w2n0pparibas9o0ats3ehringer8fa2m1nd2o0k0ing5sch2tik2on4t1utique6x2r0adesco6idgestone9oadway5ker3ther5ussels7s1t1uild0ers6siness6y1zz3v1w1y1z0h3ca0b1fe2l0l1vinklein9m0era3p2non3petown5ital0one8r0avan4ds2e0er0s4s2sa1e1h1ino4t0ering5holic7ba1n1re3c1d1enter4o1rn3f0a1d2g1h0anel2nel4rity4se2t2eap3intai5ristmas6ome4urch5i0priani6rcle4sco3tadel4i0c2y3k1l0aims4eaning6ick2nic1que6othing5ud3ub0med6m1n1o0ach3des3ffee4llege4ogne5m0mbank4unity6pany2re3uter5sec4ndos3struction8ulting7tact3ractors9oking4l1p2rsica5untry4pon0s4rses6pa2r0edit0card4union9icket5own3s1uise0s6u0isinella9v1w1x1y0mru3ou3z2dad1nce3ta1e1ing3sun4y2clk3ds2e0al0er2s3gree4livery5l1oitte5ta3mocrat6ntal2ist5si0gn4v2hl2iamonds6et2gital5rect0ory7scount3ver5h2y2j1k1m1np2o0cs1tor4g1mains5t1wnload7rive4tv2ubai3nlop4pont4rban5vag2r2z2earth3t2c0o2deka3u0cation8e1g1mail3erck5nergy4gineer0ing9terprises10pson4quipment8r0icsson6ni3s0q1tate5t1u0rovision8s2vents5xchange6pert3osed4ress5traspace10fage2il1rwinds6th3mily4n0s2rm0ers5shion4t3edex3edback6rrari3ero6i0delity5o2lm2nal1nce1ial7re0stone6mdale6sh0ing5t0ness6j1k1lickr3ghts4r2orist4wers5y2m1o0o0d1tball6rd1ex2sale4um3undation8x2r0ee1senius7l1ogans4ntier7tr2ujitsu5n0d2rniture7tbol5yi3ga0l0lery3o1up4me0s3p1rden4y2b0iz3d0n2e0a1nt0ing5orge5f1g0ee3h1i0ft0s3ves2ing5l0ass3e1obal2o4m0ail3bh2o1x2n1odaddy5ld0point6f2o0dyear5g0le4p1t1v2p1q1r0ainger5phics5tis4een3ipe3ocery4up4s1t1u0cci3ge2ide2tars5ru3w1y2hair2mburg5ngout5us3bo2dfc0bank7ealth0care8lp1sinki6re1mes5iphop4samitsu7tachi5v2k0t2m1n1ockey4ldings5iday5medepot5goods5s0ense7nda3rse3spital5t0ing5t0els3mail5use3w2r1sbc3t1u0ghes5yatt3undai7ibm2cbc2e1u2d1e0ee3fm2kano4l1m0amat4db2mo0bilien9n0c1dustries8finiti5o2g1k1stitute6urance4e4t0ernational10uit4vestments10o1piranga7q1r0ish4s0maili5t0anbul7t0au2v3jaguar4va3cb2e0ep2tzt3welry6io2ll2m0p2nj2o0bs1urg4t1y2p0morgan6rs3uegos4niper7kaufen5ddi3e0rryhotels6properties14fh2g1h1i0a1ds2m1ndle4tchen5wi3m1n1oeln3matsu5sher5p0mg2n2r0d1ed3uokgroup8w1y0oto4z2la0caixa5mborghini8er3nd0rover6xess5salle5t0ino3robe5w0yer5b1c1ds2ease3clerc5frak4gal2o2xus4gbt3i0dl2fe0insurance9style7ghting6ke2lly3mited4o2ncoln4k2ve1ing5k1lc1p2oan0s3cker3us3l1ndon4tte1o3ve3pl0financial11r1s1t0d0a3u0ndbeck6xe1ury5v1y2ma0drid4if1son4keup4n0agement7go3p1rket0ing3s4riott5shalls7ttel5ba2c0kinsey7d1e0d0ia3et2lbourne7me1orial6n0u2rckmsd7g1h1iami3crosoft7l1ni1t2t0subishi9k1l0b1s2m0a2n1o0bi0le4da2e1i1m1nash3ey2ster5rmon3tgage6scow4to0rcycles9v0ie4p1q1r1s0d2t0n1r2u0seum3ic4v1w1x1y1z2na0b1goya4me2vy3ba2c1e0c1t0bank4flix4work5ustar5w0s2xt0direct7us4f0l2g0o2hk2i0co2ke1on3nja3ssan1y5l1o0kia3rton4w0ruz3tv4p1r0a1w2tt2u1yc2z2obi1server7ffice5kinawa6layan0group9lo3m0ega4ne1g1l0ine5oo2pen3racle3nge4g0anic5igins6saka4tsuka4t2vh3pa0ge2nasonic7ris2s1tners4s1y3y2ccw3e0t2f0izer5g1h0armacy6d1ilips5one2to0graphy6s4ysio5ics1tet2ures6d1n0g1k2oneer5zza4k1l0ace2y0station9umbing5s3m1n0c2ohl2ker3litie5rn2st3r0axi3ess3ime3o0d0uctions8f1gressive8mo2perties3y5tection8u0dential9s1t1ub2w0c2y2qa1pon3uebec3st5racing4dio4e0ad1lestate6tor2y4cipes5d0stone5umbrella9hab3ise0n3t2liance6n0t0als5pair3ort3ublican8st0aurant8view0s5xroth6ich0ardli6oh3l1o1p2o0cks3deo3gers4om3s0vp3u0gby3hr2n2w0e2yukyu6sa0arland6fe0ty4kura4le1on3msclub4ung5ndvik0coromant12ofi4p1rl2s1ve2xo3b0i1s2c0b1haeffler7midt4olarships8ol3ule3warz5ience5ot3d1e0arch3t2cure1ity6ek2lect4ner3rvices6ven3w1x0y3fr2g1h0angrila6rp3ell3ia1ksha5oes2p0ping5uji3w3i0lk2na1gles5te3j1k0i0n2y0pe4l0ing4m0art3ile4n0cf3o0ccer3ial4ftbank4ware6hu2lar2utions7ng1y2y2pa0ce3ort2t3r0l2s1t0ada2ples4r1tebank4farm7c0group6ockholm6rage3e3ream4udio2y3yle4u0cks3pplies3y2ort5rf1gery5zuki5v1watch4iss4x1y0dney4stems6z2tab1ipei4lk2obao4rget4tamotors6r2too4x0i3c0i2d0k2eam2ch0nology8l1masek5nnis4va3f1g1h0d1eater2re6iaa2ckets5enda4ps2res2ol4j0maxx4x2k0maxx5l1m0all4n1o0day3kyo3ols3p1ray3shiba5tal3urs3wn2yota3s3r0ade1ing4ining5vel0ers0insurance16ust3v2t1ube2i1nes3shu4v0s2w1z2ua1bank3s2g1k1nicom3versity8o2ol2ps2s1y1z2va0cations7na1guard7c1e0gas3ntures6risign5mögensberater2ung14sicherung10t2g1i0ajes4deo3g1king4llas4n1p1rgin4sa1ion4va1o3laanderen9n1odka3lvo3te1ing3o2yage5u2wales2mart4ter4ng0gou5tch0es6eather0channel12bcam3er2site5d0ding5ibo2r3f1hoswho6ien2ki2lliamhill9n0dows4e1ners6me2olterskluwer11odside6rk0s2ld3w2s1tc1f3xbox3erox4ihuan4n2xx2yz3yachts4hoo3maxun5ndex5e1odobashi7ga2kohama6u0tube6t1un3za0ppos4ra3ero3ip2m1one3uerich6w2';
|
||
// Internationalized domain names containing non-ASCII
|
||
const encodedUtlds = 'ελ1υ2бг1ел3дети4ею2католик6ом3мкд2он1сква6онлайн5рг3рус2ф2сайт3рб3укр3қаз3հայ3ישראל5קום3ابوظبي5رامكو5لاردن4بحرين5جزائر5سعودية6عليان5مغرب5مارات5یران5بارت2زار4يتك3ھارت5تونس4سودان3رية5شبكة4عراق2ب2مان4فلسطين6قطر3كاثوليك6وم3مصر2ليسيا5وريتانيا7قع4همراه5پاکستان7ڀارت4कॉम3नेट3भारत0म्3ोत5संगठन5বাংলা5ভারত2ৰত4ਭਾਰਤ4ભારત4ଭାରତ4இந்தியா6லங்கை6சிங்கப்பூர்11భారత్5ಭಾರತ4ഭാരതം5ලංකා4คอม3ไทย3ລາວ3გე2みんな3アマゾン4クラウド4グーグル4コム2ストア3セール3ファッション6ポイント4世界2中信1国1國1文网3亚马逊3企业2佛山2信息2健康2八卦2公司1益2台湾1灣2商城1店1标2嘉里0大酒店5在线2大拿2天主教3娱乐2家電2广东2微博2慈善2我爱你3手机2招聘2政务1府2新加坡2闻2时尚2書籍2机构2淡马锡3游戏2澳門2点看2移动2组织机构4网址1店1站1络2联通2谷歌2购物2通販2集团2電訊盈科4飞利浦3食品2餐厅2香格里拉3港2닷넷1컴2삼성2한국2';
|
||
|
||
/**
|
||
* Finite State Machine generation utilities
|
||
*/
|
||
|
||
/**
|
||
* @template T
|
||
* @typedef {{ [group: string]: T[] }} Collections
|
||
*/
|
||
|
||
/**
|
||
* @typedef {{ [group: string]: true }} Flags
|
||
*/
|
||
|
||
// Keys in scanner Collections instances
|
||
const numeric = 'numeric';
|
||
const ascii = 'ascii';
|
||
const alpha = 'alpha';
|
||
const asciinumeric = 'asciinumeric';
|
||
const alphanumeric = 'alphanumeric';
|
||
const domain = 'domain';
|
||
const emoji = 'emoji';
|
||
const scheme = 'scheme';
|
||
const slashscheme = 'slashscheme';
|
||
const whitespace = 'whitespace';
|
||
|
||
/**
|
||
* @template T
|
||
* @param {string} name
|
||
* @param {Collections<T>} groups to register in
|
||
* @returns {T[]} Current list of tokens in the given collection
|
||
*/
|
||
function registerGroup(name, groups) {
|
||
if (!(name in groups)) {
|
||
groups[name] = [];
|
||
}
|
||
return groups[name];
|
||
}
|
||
|
||
/**
|
||
* @template T
|
||
* @param {T} t token to add
|
||
* @param {Collections<T>} groups
|
||
* @param {Flags} flags
|
||
*/
|
||
function addToGroups(t, flags, groups) {
|
||
if (flags[numeric]) {
|
||
flags[asciinumeric] = true;
|
||
flags[alphanumeric] = true;
|
||
}
|
||
if (flags[ascii]) {
|
||
flags[asciinumeric] = true;
|
||
flags[alpha] = true;
|
||
}
|
||
if (flags[asciinumeric]) {
|
||
flags[alphanumeric] = true;
|
||
}
|
||
if (flags[alpha]) {
|
||
flags[alphanumeric] = true;
|
||
}
|
||
if (flags[alphanumeric]) {
|
||
flags[domain] = true;
|
||
}
|
||
if (flags[emoji]) {
|
||
flags[domain] = true;
|
||
}
|
||
for (const k in flags) {
|
||
const group = registerGroup(k, groups);
|
||
if (group.indexOf(t) < 0) {
|
||
group.push(t);
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* @template T
|
||
* @param {T} t token to check
|
||
* @param {Collections<T>} groups
|
||
* @returns {Flags} group flags that contain this token
|
||
*/
|
||
function flagsForToken(t, groups) {
|
||
const result = {};
|
||
for (const c in groups) {
|
||
if (groups[c].indexOf(t) >= 0) {
|
||
result[c] = true;
|
||
}
|
||
}
|
||
return result;
|
||
}
|
||
|
||
/**
|
||
* @template T
|
||
* @typedef {null | T } Transition
|
||
*/
|
||
|
||
/**
|
||
* Define a basic state machine state. j is the list of character transitions,
|
||
* jr is the list of regex-match transitions, jd is the default state to
|
||
* transition to t is the accepting token type, if any. If this is the terminal
|
||
* state, then it does not emit a token.
|
||
*
|
||
* The template type T represents the type of the token this state accepts. This
|
||
* should be a string (such as of the token exports in `text.js`) or a
|
||
* MultiToken subclass (from `multi.js`)
|
||
*
|
||
* @template T
|
||
* @param {T} [token] Token that this state emits
|
||
*/
|
||
function State(token = null) {
|
||
// this.n = null; // DEBUG: State name
|
||
/** @type {{ [input: string]: State<T> }} j */
|
||
this.j = {}; // IMPLEMENTATION 1
|
||
// this.j = []; // IMPLEMENTATION 2
|
||
/** @type {[RegExp, State<T>][]} jr */
|
||
this.jr = [];
|
||
/** @type {?State<T>} jd */
|
||
this.jd = null;
|
||
/** @type {?T} t */
|
||
this.t = token;
|
||
}
|
||
|
||
/**
|
||
* Scanner token groups
|
||
* @type Collections<string>
|
||
*/
|
||
State.groups = {};
|
||
State.prototype = {
|
||
accepts() {
|
||
return !!this.t;
|
||
},
|
||
/**
|
||
* Follow an existing transition from the given input to the next state.
|
||
* Does not mutate.
|
||
* @param {string} input character or token type to transition on
|
||
* @returns {?State<T>} the next state, if any
|
||
*/
|
||
go(input) {
|
||
const state = this;
|
||
const nextState = state.j[input];
|
||
if (nextState) {
|
||
return nextState;
|
||
}
|
||
for (let i = 0; i < state.jr.length; i++) {
|
||
const regex = state.jr[i][0];
|
||
const nextState = state.jr[i][1]; // note: might be empty to prevent default jump
|
||
if (nextState && regex.test(input)) {
|
||
return nextState;
|
||
}
|
||
}
|
||
// Nowhere left to jump! Return default, if any
|
||
return state.jd;
|
||
},
|
||
/**
|
||
* Whether the state has a transition for the given input. Set the second
|
||
* argument to true to only look for an exact match (and not a default or
|
||
* regular-expression-based transition)
|
||
* @param {string} input
|
||
* @param {boolean} exactOnly
|
||
*/
|
||
has(input, exactOnly = false) {
|
||
return exactOnly ? input in this.j : !!this.go(input);
|
||
},
|
||
/**
|
||
* Short for "transition all"; create a transition from the array of items
|
||
* in the given list to the same final resulting state.
|
||
* @param {string | string[]} inputs Group of inputs to transition on
|
||
* @param {Transition<T> | State<T>} [next] Transition options
|
||
* @param {Flags} [flags] Collections flags to add token to
|
||
* @param {Collections<T>} [groups] Master list of token groups
|
||
*/
|
||
ta(inputs, next, flags, groups) {
|
||
for (let i = 0; i < inputs.length; i++) {
|
||
this.tt(inputs[i], next, flags, groups);
|
||
}
|
||
},
|
||
/**
|
||
* Short for "take regexp transition"; defines a transition for this state
|
||
* when it encounters a token which matches the given regular expression
|
||
* @param {RegExp} regexp Regular expression transition (populate first)
|
||
* @param {T | State<T>} [next] Transition options
|
||
* @param {Flags} [flags] Collections flags to add token to
|
||
* @param {Collections<T>} [groups] Master list of token groups
|
||
* @returns {State<T>} taken after the given input
|
||
*/
|
||
tr(regexp, next, flags, groups) {
|
||
groups = groups || State.groups;
|
||
let nextState;
|
||
if (next && next.j) {
|
||
nextState = next;
|
||
} else {
|
||
// Token with maybe token groups
|
||
nextState = new State(next);
|
||
if (flags && groups) {
|
||
addToGroups(next, flags, groups);
|
||
}
|
||
}
|
||
this.jr.push([regexp, nextState]);
|
||
return nextState;
|
||
},
|
||
/**
|
||
* Short for "take transitions", will take as many sequential transitions as
|
||
* the length of the given input and returns the
|
||
* resulting final state.
|
||
* @param {string | string[]} input
|
||
* @param {T | State<T>} [next] Transition options
|
||
* @param {Flags} [flags] Collections flags to add token to
|
||
* @param {Collections<T>} [groups] Master list of token groups
|
||
* @returns {State<T>} taken after the given input
|
||
*/
|
||
ts(input, next, flags, groups) {
|
||
let state = this;
|
||
const len = input.length;
|
||
if (!len) {
|
||
return state;
|
||
}
|
||
for (let i = 0; i < len - 1; i++) {
|
||
state = state.tt(input[i]);
|
||
}
|
||
return state.tt(input[len - 1], next, flags, groups);
|
||
},
|
||
/**
|
||
* Short for "take transition", this is a method for building/working with
|
||
* state machines.
|
||
*
|
||
* If a state already exists for the given input, returns it.
|
||
*
|
||
* If a token is specified, that state will emit that token when reached by
|
||
* the linkify engine.
|
||
*
|
||
* If no state exists, it will be initialized with some default transitions
|
||
* that resemble existing default transitions.
|
||
*
|
||
* If a state is given for the second argument, that state will be
|
||
* transitioned to on the given input regardless of what that input
|
||
* previously did.
|
||
*
|
||
* Specify a token group flags to define groups that this token belongs to.
|
||
* The token will be added to corresponding entires in the given groups
|
||
* object.
|
||
*
|
||
* @param {string} input character, token type to transition on
|
||
* @param {T | State<T>} [next] Transition options
|
||
* @param {Flags} [flags] Collections flags to add token to
|
||
* @param {Collections<T>} [groups] Master list of groups
|
||
* @returns {State<T>} taken after the given input
|
||
*/
|
||
tt(input, next, flags, groups) {
|
||
groups = groups || State.groups;
|
||
const state = this;
|
||
|
||
// Check if existing state given, just a basic transition
|
||
if (next && next.j) {
|
||
state.j[input] = next;
|
||
return next;
|
||
}
|
||
const t = next;
|
||
|
||
// Take the transition with the usual default mechanisms and use that as
|
||
// a template for creating the next state
|
||
let nextState,
|
||
templateState = state.go(input);
|
||
if (templateState) {
|
||
nextState = new State();
|
||
Object.assign(nextState.j, templateState.j);
|
||
nextState.jr.push.apply(nextState.jr, templateState.jr);
|
||
nextState.jd = templateState.jd;
|
||
nextState.t = templateState.t;
|
||
} else {
|
||
nextState = new State();
|
||
}
|
||
if (t) {
|
||
// Ensure newly token is in the same groups as the old token
|
||
if (groups) {
|
||
if (nextState.t && typeof nextState.t === 'string') {
|
||
const allFlags = Object.assign(flagsForToken(nextState.t, groups), flags);
|
||
addToGroups(t, allFlags, groups);
|
||
} else if (flags) {
|
||
addToGroups(t, flags, groups);
|
||
}
|
||
}
|
||
nextState.t = t; // overwrite anything that was previously there
|
||
}
|
||
state.j[input] = nextState;
|
||
return nextState;
|
||
}
|
||
};
|
||
|
||
// Helper functions to improve minification (not exported outside linkifyjs module)
|
||
|
||
/**
|
||
* @template T
|
||
* @param {State<T>} state
|
||
* @param {string | string[]} input
|
||
* @param {Flags} [flags]
|
||
* @param {Collections<T>} [groups]
|
||
*/
|
||
const ta = (state, input, next, flags, groups) => state.ta(input, next, flags, groups);
|
||
|
||
/**
|
||
* @template T
|
||
* @param {State<T>} state
|
||
* @param {RegExp} regexp
|
||
* @param {T | State<T>} [next]
|
||
* @param {Flags} [flags]
|
||
* @param {Collections<T>} [groups]
|
||
*/
|
||
const tr = (state, regexp, next, flags, groups) => state.tr(regexp, next, flags, groups);
|
||
|
||
/**
|
||
* @template T
|
||
* @param {State<T>} state
|
||
* @param {string | string[]} input
|
||
* @param {T | State<T>} [next]
|
||
* @param {Flags} [flags]
|
||
* @param {Collections<T>} [groups]
|
||
*/
|
||
const ts = (state, input, next, flags, groups) => state.ts(input, next, flags, groups);
|
||
|
||
/**
|
||
* @template T
|
||
* @param {State<T>} state
|
||
* @param {string} input
|
||
* @param {T | State<T>} [next]
|
||
* @param {Collections<T>} [groups]
|
||
* @param {Flags} [flags]
|
||
*/
|
||
const tt = (state, input, next, flags, groups) => state.tt(input, next, flags, groups);
|
||
|
||
/******************************************************************************
|
||
Text Tokens
|
||
Identifiers for token outputs from the regexp scanner
|
||
******************************************************************************/
|
||
|
||
// A valid web domain token
|
||
const WORD = 'WORD'; // only contains a-z
|
||
const UWORD = 'UWORD'; // contains letters other than a-z, used for IDN
|
||
const ASCIINUMERICAL = 'ASCIINUMERICAL'; // contains a-z, 0-9
|
||
const ALPHANUMERICAL = 'ALPHANUMERICAL'; // contains numbers and letters other than a-z, used for IDN
|
||
|
||
// Special case of word
|
||
const LOCALHOST = 'LOCALHOST';
|
||
|
||
// Valid top-level domain, special case of WORD (see tlds.js)
|
||
const TLD = 'TLD';
|
||
|
||
// Valid IDN TLD, special case of UWORD (see tlds.js)
|
||
const UTLD = 'UTLD';
|
||
|
||
// The scheme portion of a web URI protocol. Supported types include: `mailto`,
|
||
// `file`, and user-defined custom protocols. Limited to schemes that contain
|
||
// only letters
|
||
const SCHEME = 'SCHEME';
|
||
|
||
// Similar to SCHEME, except makes distinction for schemes that must always be
|
||
// followed by `://`, not just `:`. Supported types include `http`, `https`,
|
||
// `ftp`, `ftps`
|
||
const SLASH_SCHEME = 'SLASH_SCHEME';
|
||
|
||
// Any sequence of digits 0-9
|
||
const NUM = 'NUM';
|
||
|
||
// Any number of consecutive whitespace characters that are not newline
|
||
const WS = 'WS';
|
||
|
||
// New line (unix style)
|
||
const NL = 'NL'; // \n
|
||
|
||
// Opening/closing bracket classes
|
||
// TODO: Rename OPEN -> LEFT and CLOSE -> RIGHT in v5 to fit with Unicode names
|
||
// Also rename angle brackes to LESSTHAN and GREATER THAN
|
||
const OPENBRACE = 'OPENBRACE'; // {
|
||
const CLOSEBRACE = 'CLOSEBRACE'; // }
|
||
const OPENBRACKET = 'OPENBRACKET'; // [
|
||
const CLOSEBRACKET = 'CLOSEBRACKET'; // ]
|
||
const OPENPAREN = 'OPENPAREN'; // (
|
||
const CLOSEPAREN = 'CLOSEPAREN'; // )
|
||
const OPENANGLEBRACKET = 'OPENANGLEBRACKET'; // <
|
||
const CLOSEANGLEBRACKET = 'CLOSEANGLEBRACKET'; // >
|
||
const FULLWIDTHLEFTPAREN = 'FULLWIDTHLEFTPAREN'; // (
|
||
const FULLWIDTHRIGHTPAREN = 'FULLWIDTHRIGHTPAREN'; // )
|
||
const LEFTCORNERBRACKET = 'LEFTCORNERBRACKET'; // 「
|
||
const RIGHTCORNERBRACKET = 'RIGHTCORNERBRACKET'; // 」
|
||
const LEFTWHITECORNERBRACKET = 'LEFTWHITECORNERBRACKET'; // 『
|
||
const RIGHTWHITECORNERBRACKET = 'RIGHTWHITECORNERBRACKET'; // 』
|
||
const FULLWIDTHLESSTHAN = 'FULLWIDTHLESSTHAN'; // <
|
||
const FULLWIDTHGREATERTHAN = 'FULLWIDTHGREATERTHAN'; // >
|
||
|
||
// Various symbols
|
||
const AMPERSAND = 'AMPERSAND'; // &
|
||
const APOSTROPHE = 'APOSTROPHE'; // '
|
||
const ASTERISK = 'ASTERISK'; // *
|
||
const AT = 'AT'; // @
|
||
const BACKSLASH = 'BACKSLASH'; // \
|
||
const BACKTICK = 'BACKTICK'; // `
|
||
const CARET = 'CARET'; // ^
|
||
const COLON = 'COLON'; // :
|
||
const COMMA = 'COMMA'; // ,
|
||
const DOLLAR = 'DOLLAR'; // $
|
||
const DOT = 'DOT'; // .
|
||
const EQUALS = 'EQUALS'; // =
|
||
const EXCLAMATION = 'EXCLAMATION'; // !
|
||
const HYPHEN = 'HYPHEN'; // -
|
||
const PERCENT = 'PERCENT'; // %
|
||
const PIPE = 'PIPE'; // |
|
||
const PLUS = 'PLUS'; // +
|
||
const POUND = 'POUND'; // #
|
||
const QUERY = 'QUERY'; // ?
|
||
const QUOTE = 'QUOTE'; // "
|
||
const FULLWIDTHMIDDLEDOT = 'FULLWIDTHMIDDLEDOT'; // ・
|
||
|
||
const SEMI = 'SEMI'; // ;
|
||
const SLASH = 'SLASH'; // /
|
||
const TILDE = 'TILDE'; // ~
|
||
const UNDERSCORE = 'UNDERSCORE'; // _
|
||
|
||
// Emoji symbol
|
||
const EMOJI$1 = 'EMOJI';
|
||
|
||
// Default token - anything that is not one of the above
|
||
const SYM = 'SYM';
|
||
|
||
var tk = /*#__PURE__*/Object.freeze({
|
||
__proto__: null,
|
||
ALPHANUMERICAL: ALPHANUMERICAL,
|
||
AMPERSAND: AMPERSAND,
|
||
APOSTROPHE: APOSTROPHE,
|
||
ASCIINUMERICAL: ASCIINUMERICAL,
|
||
ASTERISK: ASTERISK,
|
||
AT: AT,
|
||
BACKSLASH: BACKSLASH,
|
||
BACKTICK: BACKTICK,
|
||
CARET: CARET,
|
||
CLOSEANGLEBRACKET: CLOSEANGLEBRACKET,
|
||
CLOSEBRACE: CLOSEBRACE,
|
||
CLOSEBRACKET: CLOSEBRACKET,
|
||
CLOSEPAREN: CLOSEPAREN,
|
||
COLON: COLON,
|
||
COMMA: COMMA,
|
||
DOLLAR: DOLLAR,
|
||
DOT: DOT,
|
||
EMOJI: EMOJI$1,
|
||
EQUALS: EQUALS,
|
||
EXCLAMATION: EXCLAMATION,
|
||
FULLWIDTHGREATERTHAN: FULLWIDTHGREATERTHAN,
|
||
FULLWIDTHLEFTPAREN: FULLWIDTHLEFTPAREN,
|
||
FULLWIDTHLESSTHAN: FULLWIDTHLESSTHAN,
|
||
FULLWIDTHMIDDLEDOT: FULLWIDTHMIDDLEDOT,
|
||
FULLWIDTHRIGHTPAREN: FULLWIDTHRIGHTPAREN,
|
||
HYPHEN: HYPHEN,
|
||
LEFTCORNERBRACKET: LEFTCORNERBRACKET,
|
||
LEFTWHITECORNERBRACKET: LEFTWHITECORNERBRACKET,
|
||
LOCALHOST: LOCALHOST,
|
||
NL: NL,
|
||
NUM: NUM,
|
||
OPENANGLEBRACKET: OPENANGLEBRACKET,
|
||
OPENBRACE: OPENBRACE,
|
||
OPENBRACKET: OPENBRACKET,
|
||
OPENPAREN: OPENPAREN,
|
||
PERCENT: PERCENT,
|
||
PIPE: PIPE,
|
||
PLUS: PLUS,
|
||
POUND: POUND,
|
||
QUERY: QUERY,
|
||
QUOTE: QUOTE,
|
||
RIGHTCORNERBRACKET: RIGHTCORNERBRACKET,
|
||
RIGHTWHITECORNERBRACKET: RIGHTWHITECORNERBRACKET,
|
||
SCHEME: SCHEME,
|
||
SEMI: SEMI,
|
||
SLASH: SLASH,
|
||
SLASH_SCHEME: SLASH_SCHEME,
|
||
SYM: SYM,
|
||
TILDE: TILDE,
|
||
TLD: TLD,
|
||
UNDERSCORE: UNDERSCORE,
|
||
UTLD: UTLD,
|
||
UWORD: UWORD,
|
||
WORD: WORD,
|
||
WS: WS
|
||
});
|
||
|
||
// Note that these two Unicode ones expand into a really big one with Babel
|
||
const ASCII_LETTER = /[a-z]/;
|
||
const LETTER = /\p{L}/u; // Any Unicode character with letter data type
|
||
const EMOJI = /\p{Emoji}/u; // Any Unicode emoji character
|
||
const EMOJI_VARIATION$1 = /\ufe0f/;
|
||
const DIGIT = /\d/;
|
||
const SPACE = /\s/;
|
||
|
||
var regexp = /*#__PURE__*/Object.freeze({
|
||
__proto__: null,
|
||
ASCII_LETTER: ASCII_LETTER,
|
||
DIGIT: DIGIT,
|
||
EMOJI: EMOJI,
|
||
EMOJI_VARIATION: EMOJI_VARIATION$1,
|
||
LETTER: LETTER,
|
||
SPACE: SPACE
|
||
});
|
||
|
||
/**
|
||
The scanner provides an interface that takes a string of text as input, and
|
||
outputs an array of tokens instances that can be used for easy URL parsing.
|
||
*/
|
||
|
||
const CR = '\r'; // carriage-return character
|
||
const LF = '\n'; // line-feed character
|
||
const EMOJI_VARIATION = '\ufe0f'; // Variation selector, follows heart and others
|
||
const EMOJI_JOINER = '\u200d'; // zero-width joiner
|
||
const OBJECT_REPLACEMENT = '\ufffc'; // whitespace placeholder that sometimes appears in rich text editors
|
||
|
||
let tlds = null,
|
||
utlds = null; // don't change so only have to be computed once
|
||
|
||
/**
|
||
* Scanner output token:
|
||
* - `t` is the token name (e.g., 'NUM', 'EMOJI', 'TLD')
|
||
* - `v` is the value of the token (e.g., '123', '❤️', 'com')
|
||
* - `s` is the start index of the token in the original string
|
||
* - `e` is the end index of the token in the original string
|
||
* @typedef {{t: string, v: string, s: number, e: number}} Token
|
||
*/
|
||
|
||
/**
|
||
* @template T
|
||
* @typedef {{ [collection: string]: T[] }} Collections
|
||
*/
|
||
|
||
/**
|
||
* Initialize the scanner character-based state machine for the given start
|
||
* state
|
||
* @param {[string, boolean][]} customSchemes List of custom schemes, where each
|
||
* item is a length-2 tuple with the first element set to the string scheme, and
|
||
* the second element set to `true` if the `://` after the scheme is optional
|
||
*/
|
||
function init$2(customSchemes = []) {
|
||
// Frequently used states (name argument removed during minification)
|
||
/** @type Collections<string> */
|
||
const groups = {}; // of tokens
|
||
State.groups = groups;
|
||
/** @type State<string> */
|
||
const Start = new State();
|
||
if (tlds == null) {
|
||
tlds = decodeTlds(encodedTlds);
|
||
}
|
||
if (utlds == null) {
|
||
utlds = decodeTlds(encodedUtlds);
|
||
}
|
||
|
||
// States for special URL symbols that accept immediately after start
|
||
tt(Start, "'", APOSTROPHE);
|
||
tt(Start, '{', OPENBRACE);
|
||
tt(Start, '}', CLOSEBRACE);
|
||
tt(Start, '[', OPENBRACKET);
|
||
tt(Start, ']', CLOSEBRACKET);
|
||
tt(Start, '(', OPENPAREN);
|
||
tt(Start, ')', CLOSEPAREN);
|
||
tt(Start, '<', OPENANGLEBRACKET);
|
||
tt(Start, '>', CLOSEANGLEBRACKET);
|
||
tt(Start, '(', FULLWIDTHLEFTPAREN);
|
||
tt(Start, ')', FULLWIDTHRIGHTPAREN);
|
||
tt(Start, '「', LEFTCORNERBRACKET);
|
||
tt(Start, '」', RIGHTCORNERBRACKET);
|
||
tt(Start, '『', LEFTWHITECORNERBRACKET);
|
||
tt(Start, '』', RIGHTWHITECORNERBRACKET);
|
||
tt(Start, '<', FULLWIDTHLESSTHAN);
|
||
tt(Start, '>', FULLWIDTHGREATERTHAN);
|
||
tt(Start, '&', AMPERSAND);
|
||
tt(Start, '*', ASTERISK);
|
||
tt(Start, '@', AT);
|
||
tt(Start, '`', BACKTICK);
|
||
tt(Start, '^', CARET);
|
||
tt(Start, ':', COLON);
|
||
tt(Start, ',', COMMA);
|
||
tt(Start, '$', DOLLAR);
|
||
tt(Start, '.', DOT);
|
||
tt(Start, '=', EQUALS);
|
||
tt(Start, '!', EXCLAMATION);
|
||
tt(Start, '-', HYPHEN);
|
||
tt(Start, '%', PERCENT);
|
||
tt(Start, '|', PIPE);
|
||
tt(Start, '+', PLUS);
|
||
tt(Start, '#', POUND);
|
||
tt(Start, '?', QUERY);
|
||
tt(Start, '"', QUOTE);
|
||
tt(Start, '/', SLASH);
|
||
tt(Start, ';', SEMI);
|
||
tt(Start, '~', TILDE);
|
||
tt(Start, '_', UNDERSCORE);
|
||
tt(Start, '\\', BACKSLASH);
|
||
tt(Start, '・', FULLWIDTHMIDDLEDOT);
|
||
const Num = tr(Start, DIGIT, NUM, {
|
||
[numeric]: true
|
||
});
|
||
tr(Num, DIGIT, Num);
|
||
const Asciinumeric = tr(Num, ASCII_LETTER, ASCIINUMERICAL, {
|
||
[asciinumeric]: true
|
||
});
|
||
const Alphanumeric = tr(Num, LETTER, ALPHANUMERICAL, {
|
||
[alphanumeric]: true
|
||
});
|
||
|
||
// State which emits a word token
|
||
const Word = tr(Start, ASCII_LETTER, WORD, {
|
||
[ascii]: true
|
||
});
|
||
tr(Word, DIGIT, Asciinumeric);
|
||
tr(Word, ASCII_LETTER, Word);
|
||
tr(Asciinumeric, DIGIT, Asciinumeric);
|
||
tr(Asciinumeric, ASCII_LETTER, Asciinumeric);
|
||
|
||
// Same as previous, but specific to non-fsm.ascii alphabet words
|
||
const UWord = tr(Start, LETTER, UWORD, {
|
||
[alpha]: true
|
||
});
|
||
tr(UWord, ASCII_LETTER); // Non-accepting
|
||
tr(UWord, DIGIT, Alphanumeric);
|
||
tr(UWord, LETTER, UWord);
|
||
tr(Alphanumeric, DIGIT, Alphanumeric);
|
||
tr(Alphanumeric, ASCII_LETTER); // Non-accepting
|
||
tr(Alphanumeric, LETTER, Alphanumeric); // Non-accepting
|
||
|
||
// Whitespace jumps
|
||
// Tokens of only non-newline whitespace are arbitrarily long
|
||
// If any whitespace except newline, more whitespace!
|
||
const Nl = tt(Start, LF, NL, {
|
||
[whitespace]: true
|
||
});
|
||
const Cr = tt(Start, CR, WS, {
|
||
[whitespace]: true
|
||
});
|
||
const Ws = tr(Start, SPACE, WS, {
|
||
[whitespace]: true
|
||
});
|
||
tt(Start, OBJECT_REPLACEMENT, Ws);
|
||
tt(Cr, LF, Nl); // \r\n
|
||
tt(Cr, OBJECT_REPLACEMENT, Ws);
|
||
tr(Cr, SPACE, Ws);
|
||
tt(Ws, CR); // non-accepting state to avoid mixing whitespaces
|
||
tt(Ws, LF); // non-accepting state to avoid mixing whitespaces
|
||
tr(Ws, SPACE, Ws);
|
||
tt(Ws, OBJECT_REPLACEMENT, Ws);
|
||
|
||
// Emoji tokens. They are not grouped by the scanner except in cases where a
|
||
// zero-width joiner is present
|
||
const Emoji = tr(Start, EMOJI, EMOJI$1, {
|
||
[emoji]: true
|
||
});
|
||
tt(Emoji, '#'); // no transition, emoji regex seems to match #
|
||
tr(Emoji, EMOJI, Emoji);
|
||
tt(Emoji, EMOJI_VARIATION, Emoji);
|
||
// tt(Start, EMOJI_VARIATION, Emoji); // This one is sketchy
|
||
|
||
const EmojiJoiner = tt(Emoji, EMOJI_JOINER);
|
||
tt(EmojiJoiner, '#');
|
||
tr(EmojiJoiner, EMOJI, Emoji);
|
||
// tt(EmojiJoiner, EMOJI_VARIATION, Emoji); // also sketchy
|
||
|
||
// Generates states for top-level domains
|
||
// Note that this is most accurate when tlds are in alphabetical order
|
||
const wordjr = [[ASCII_LETTER, Word], [DIGIT, Asciinumeric]];
|
||
const uwordjr = [[ASCII_LETTER, null], [LETTER, UWord], [DIGIT, Alphanumeric]];
|
||
for (let i = 0; i < tlds.length; i++) {
|
||
fastts(Start, tlds[i], TLD, WORD, wordjr);
|
||
}
|
||
for (let i = 0; i < utlds.length; i++) {
|
||
fastts(Start, utlds[i], UTLD, UWORD, uwordjr);
|
||
}
|
||
addToGroups(TLD, {
|
||
tld: true,
|
||
ascii: true
|
||
}, groups);
|
||
addToGroups(UTLD, {
|
||
utld: true,
|
||
alpha: true
|
||
}, groups);
|
||
|
||
// Collect the states generated by different protocols. NOTE: If any new TLDs
|
||
// get added that are also protocols, set the token to be the same as the
|
||
// protocol to ensure parsing works as expected.
|
||
fastts(Start, 'file', SCHEME, WORD, wordjr);
|
||
fastts(Start, 'mailto', SCHEME, WORD, wordjr);
|
||
fastts(Start, 'http', SLASH_SCHEME, WORD, wordjr);
|
||
fastts(Start, 'https', SLASH_SCHEME, WORD, wordjr);
|
||
fastts(Start, 'ftp', SLASH_SCHEME, WORD, wordjr);
|
||
fastts(Start, 'ftps', SLASH_SCHEME, WORD, wordjr);
|
||
addToGroups(SCHEME, {
|
||
scheme: true,
|
||
ascii: true
|
||
}, groups);
|
||
addToGroups(SLASH_SCHEME, {
|
||
slashscheme: true,
|
||
ascii: true
|
||
}, groups);
|
||
|
||
// Register custom schemes. Assumes each scheme is asciinumeric with hyphens
|
||
customSchemes = customSchemes.sort((a, b) => a[0] > b[0] ? 1 : -1);
|
||
for (let i = 0; i < customSchemes.length; i++) {
|
||
const sch = customSchemes[i][0];
|
||
const optionalSlashSlash = customSchemes[i][1];
|
||
const flags = optionalSlashSlash ? {
|
||
[scheme]: true
|
||
} : {
|
||
[slashscheme]: true
|
||
};
|
||
if (sch.indexOf('-') >= 0) {
|
||
flags[domain] = true;
|
||
} else if (!ASCII_LETTER.test(sch)) {
|
||
flags[numeric] = true; // numbers only
|
||
} else if (DIGIT.test(sch)) {
|
||
flags[asciinumeric] = true;
|
||
} else {
|
||
flags[ascii] = true;
|
||
}
|
||
ts(Start, sch, sch, flags);
|
||
}
|
||
|
||
// Localhost token
|
||
ts(Start, 'localhost', LOCALHOST, {
|
||
ascii: true
|
||
});
|
||
|
||
// Set default transition for start state (some symbol)
|
||
Start.jd = new State(SYM);
|
||
return {
|
||
start: Start,
|
||
tokens: Object.assign({
|
||
groups
|
||
}, tk)
|
||
};
|
||
}
|
||
|
||
/**
|
||
Given a string, returns an array of TOKEN instances representing the
|
||
composition of that string.
|
||
|
||
@method run
|
||
@param {State<string>} start scanner starting state
|
||
@param {string} str input string to scan
|
||
@return {Token[]} list of tokens, each with a type and value
|
||
*/
|
||
function run$1(start, str) {
|
||
// State machine is not case sensitive, so input is tokenized in lowercased
|
||
// form (still returns regular case). Uses selective `toLowerCase` because
|
||
// lowercasing the entire string causes the length and character position to
|
||
// vary in some non-English strings with V8-based runtimes.
|
||
const iterable = stringToArray(str.replace(/[A-Z]/g, c => c.toLowerCase()));
|
||
const charCount = iterable.length; // <= len if there are emojis, etc
|
||
const tokens = []; // return value
|
||
|
||
// cursor through the string itself, accounting for characters that have
|
||
// width with length 2 such as emojis
|
||
let cursor = 0;
|
||
|
||
// Cursor through the array-representation of the string
|
||
let charCursor = 0;
|
||
|
||
// Tokenize the string
|
||
while (charCursor < charCount) {
|
||
let state = start;
|
||
let nextState = null;
|
||
let tokenLength = 0;
|
||
let latestAccepting = null;
|
||
let sinceAccepts = -1;
|
||
let charsSinceAccepts = -1;
|
||
while (charCursor < charCount && (nextState = state.go(iterable[charCursor]))) {
|
||
state = nextState;
|
||
|
||
// Keep track of the latest accepting state
|
||
if (state.accepts()) {
|
||
sinceAccepts = 0;
|
||
charsSinceAccepts = 0;
|
||
latestAccepting = state;
|
||
} else if (sinceAccepts >= 0) {
|
||
sinceAccepts += iterable[charCursor].length;
|
||
charsSinceAccepts++;
|
||
}
|
||
tokenLength += iterable[charCursor].length;
|
||
cursor += iterable[charCursor].length;
|
||
charCursor++;
|
||
}
|
||
|
||
// Roll back to the latest accepting state
|
||
cursor -= sinceAccepts;
|
||
charCursor -= charsSinceAccepts;
|
||
tokenLength -= sinceAccepts;
|
||
|
||
// No more jumps, just make a new token from the last accepting one
|
||
tokens.push({
|
||
t: latestAccepting.t,
|
||
// token type/name
|
||
v: str.slice(cursor - tokenLength, cursor),
|
||
// string value
|
||
s: cursor - tokenLength,
|
||
// start index
|
||
e: cursor // end index (excluding)
|
||
});
|
||
}
|
||
return tokens;
|
||
}
|
||
|
||
/**
|
||
* Convert a String to an Array of characters, taking into account that some
|
||
* characters like emojis take up two string indexes.
|
||
*
|
||
* Adapted from core-js (MIT license)
|
||
* https://github.com/zloirock/core-js/blob/2d69cf5f99ab3ea3463c395df81e5a15b68f49d9/packages/core-js/internals/string-multibyte.js
|
||
*
|
||
* @function stringToArray
|
||
* @param {string} str
|
||
* @returns {string[]}
|
||
*/
|
||
function stringToArray(str) {
|
||
const result = [];
|
||
const len = str.length;
|
||
let index = 0;
|
||
while (index < len) {
|
||
let first = str.charCodeAt(index);
|
||
let second;
|
||
let char = first < 0xd800 || first > 0xdbff || index + 1 === len || (second = str.charCodeAt(index + 1)) < 0xdc00 || second > 0xdfff ? str[index] // single character
|
||
: str.slice(index, index + 2); // two-index characters
|
||
result.push(char);
|
||
index += char.length;
|
||
}
|
||
return result;
|
||
}
|
||
|
||
/**
|
||
* Fast version of ts function for when transition defaults are well known
|
||
* @param {State<string>} state
|
||
* @param {string} input
|
||
* @param {string} t
|
||
* @param {string} defaultt
|
||
* @param {[RegExp, State<string>][]} jr
|
||
* @returns {State<string>}
|
||
*/
|
||
function fastts(state, input, t, defaultt, jr) {
|
||
let next;
|
||
const len = input.length;
|
||
for (let i = 0; i < len - 1; i++) {
|
||
const char = input[i];
|
||
if (state.j[char]) {
|
||
next = state.j[char];
|
||
} else {
|
||
next = new State(defaultt);
|
||
next.jr = jr.slice();
|
||
state.j[char] = next;
|
||
}
|
||
state = next;
|
||
}
|
||
next = new State(t);
|
||
next.jr = jr.slice();
|
||
state.j[input[len - 1]] = next;
|
||
return next;
|
||
}
|
||
|
||
/**
|
||
* Converts a string of Top-Level Domain names encoded in update-tlds.js back
|
||
* into a list of strings.
|
||
* @param {str} encoded encoded TLDs string
|
||
* @returns {str[]} original TLDs list
|
||
*/
|
||
function decodeTlds(encoded) {
|
||
const words = [];
|
||
const stack = [];
|
||
let i = 0;
|
||
let digits = '0123456789';
|
||
while (i < encoded.length) {
|
||
let popDigitCount = 0;
|
||
while (digits.indexOf(encoded[i + popDigitCount]) >= 0) {
|
||
popDigitCount++; // encountered some digits, have to pop to go one level up trie
|
||
}
|
||
if (popDigitCount > 0) {
|
||
words.push(stack.join('')); // whatever preceded the pop digits must be a word
|
||
for (let popCount = parseInt(encoded.substring(i, i + popDigitCount), 10); popCount > 0; popCount--) {
|
||
stack.pop();
|
||
}
|
||
i += popDigitCount;
|
||
} else {
|
||
stack.push(encoded[i]); // drop down a level into the trie
|
||
i++;
|
||
}
|
||
}
|
||
return words;
|
||
}
|
||
|
||
/**
|
||
* An object where each key is a valid DOM Event Name such as `click` or `focus`
|
||
* and each value is an event handler function.
|
||
*
|
||
* https://developer.mozilla.org/en-US/docs/Web/API/Element#events
|
||
* @typedef {?{ [event: string]: Function }} EventListeners
|
||
*/
|
||
|
||
/**
|
||
* All formatted properties required to render a link, including `tagName`,
|
||
* `attributes`, `content` and `eventListeners`.
|
||
* @typedef {{ tagName: any, attributes: {[attr: string]: any}, content: string,
|
||
* eventListeners: EventListeners }} IntermediateRepresentation
|
||
*/
|
||
|
||
/**
|
||
* Specify either an object described by the template type `O` or a function.
|
||
*
|
||
* The function takes a string value (usually the link's href attribute), the
|
||
* link type (`'url'`, `'hashtag`', etc.) and an internal token representation
|
||
* of the link. It should return an object of the template type `O`
|
||
* @template O
|
||
* @typedef {O | ((value: string, type: string, token: MultiToken) => O)} OptObj
|
||
*/
|
||
|
||
/**
|
||
* Specify either a function described by template type `F` or an object.
|
||
*
|
||
* Each key in the object should be a link type (`'url'`, `'hashtag`', etc.). Each
|
||
* value should be a function with template type `F` that is called when the
|
||
* corresponding link type is encountered.
|
||
* @template F
|
||
* @typedef {F | { [type: string]: F}} OptFn
|
||
*/
|
||
|
||
/**
|
||
* Specify either a value with template type `V`, a function that returns `V` or
|
||
* an object where each value resolves to `V`.
|
||
*
|
||
* The function takes a string value (usually the link's href attribute), the
|
||
* link type (`'url'`, `'hashtag`', etc.) and an internal token representation
|
||
* of the link. It should return an object of the template type `V`
|
||
*
|
||
* For the object, each key should be a link type (`'url'`, `'hashtag`', etc.).
|
||
* Each value should either have type `V` or a function that returns V. This
|
||
* function similarly takes a string value and a token.
|
||
*
|
||
* Example valid types for `Opt<string>`:
|
||
*
|
||
* ```js
|
||
* 'hello'
|
||
* (value, type, token) => 'world'
|
||
* { url: 'hello', email: (value, token) => 'world'}
|
||
* ```
|
||
* @template V
|
||
* @typedef {V | ((value: string, type: string, token: MultiToken) => V) | { [type: string]: V | ((value: string, token: MultiToken) => V) }} Opt
|
||
*/
|
||
|
||
/**
|
||
* See available options: https://linkify.js.org/docs/options.html
|
||
* @typedef {{
|
||
* defaultProtocol?: string,
|
||
* events?: OptObj<EventListeners>,
|
||
* format?: Opt<string>,
|
||
* formatHref?: Opt<string>,
|
||
* nl2br?: boolean,
|
||
* tagName?: Opt<any>,
|
||
* target?: Opt<string>,
|
||
* rel?: Opt<string>,
|
||
* validate?: Opt<boolean>,
|
||
* truncate?: Opt<number>,
|
||
* className?: Opt<string>,
|
||
* attributes?: OptObj<({ [attr: string]: any })>,
|
||
* ignoreTags?: string[],
|
||
* render?: OptFn<((ir: IntermediateRepresentation) => any)>
|
||
* }} Opts
|
||
*/
|
||
|
||
/**
|
||
* @type Required<Opts>
|
||
*/
|
||
const defaults = {
|
||
defaultProtocol: 'http',
|
||
events: null,
|
||
format: noop,
|
||
formatHref: noop,
|
||
nl2br: false,
|
||
tagName: 'a',
|
||
target: null,
|
||
rel: null,
|
||
validate: true,
|
||
truncate: Infinity,
|
||
className: null,
|
||
attributes: null,
|
||
ignoreTags: [],
|
||
render: null
|
||
};
|
||
|
||
/**
|
||
* Utility class for linkify interfaces to apply specified
|
||
* {@link Opts formatting and rendering options}.
|
||
*
|
||
* @param {Opts | Options} [opts] Option value overrides.
|
||
* @param {(ir: IntermediateRepresentation) => any} [defaultRender] (For
|
||
* internal use) default render function that determines how to generate an
|
||
* HTML element based on a link token's derived tagName, attributes and HTML.
|
||
* Similar to render option
|
||
*/
|
||
function Options(opts, defaultRender = null) {
|
||
let o = Object.assign({}, defaults);
|
||
if (opts) {
|
||
o = Object.assign(o, opts instanceof Options ? opts.o : opts);
|
||
}
|
||
|
||
// Ensure all ignored tags are uppercase
|
||
const ignoredTags = o.ignoreTags;
|
||
const uppercaseIgnoredTags = [];
|
||
for (let i = 0; i < ignoredTags.length; i++) {
|
||
uppercaseIgnoredTags.push(ignoredTags[i].toUpperCase());
|
||
}
|
||
/** @protected */
|
||
this.o = o;
|
||
if (defaultRender) {
|
||
this.defaultRender = defaultRender;
|
||
}
|
||
this.ignoreTags = uppercaseIgnoredTags;
|
||
}
|
||
Options.prototype = {
|
||
o: defaults,
|
||
/**
|
||
* @type string[]
|
||
*/
|
||
ignoreTags: [],
|
||
/**
|
||
* @param {IntermediateRepresentation} ir
|
||
* @returns {any}
|
||
*/
|
||
defaultRender(ir) {
|
||
return ir;
|
||
},
|
||
/**
|
||
* Returns true or false based on whether a token should be displayed as a
|
||
* link based on the user options.
|
||
* @param {MultiToken} token
|
||
* @returns {boolean}
|
||
*/
|
||
check(token) {
|
||
return this.get('validate', token.toString(), token);
|
||
},
|
||
// Private methods
|
||
|
||
/**
|
||
* Resolve an option's value based on the value of the option and the given
|
||
* params. If operator and token are specified and the target option is
|
||
* callable, automatically calls the function with the given argument.
|
||
* @template {keyof Opts} K
|
||
* @param {K} key Name of option to use
|
||
* @param {string} [operator] will be passed to the target option if it's a
|
||
* function. If not specified, RAW function value gets returned
|
||
* @param {MultiToken} [token] The token from linkify.tokenize
|
||
* @returns {Opts[K] | any}
|
||
*/
|
||
get(key, operator, token) {
|
||
const isCallable = operator != null;
|
||
let option = this.o[key];
|
||
if (!option) {
|
||
return option;
|
||
}
|
||
if (typeof option === 'object') {
|
||
option = token.t in option ? option[token.t] : defaults[key];
|
||
if (typeof option === 'function' && isCallable) {
|
||
option = option(operator, token);
|
||
}
|
||
} else if (typeof option === 'function' && isCallable) {
|
||
option = option(operator, token.t, token);
|
||
}
|
||
return option;
|
||
},
|
||
/**
|
||
* @template {keyof Opts} L
|
||
* @param {L} key Name of options object to use
|
||
* @param {string} [operator]
|
||
* @param {MultiToken} [token]
|
||
* @returns {Opts[L] | any}
|
||
*/
|
||
getObj(key, operator, token) {
|
||
let obj = this.o[key];
|
||
if (typeof obj === 'function' && operator != null) {
|
||
obj = obj(operator, token.t, token);
|
||
}
|
||
return obj;
|
||
},
|
||
/**
|
||
* Convert the given token to a rendered element that may be added to the
|
||
* calling-interface's DOM
|
||
* @param {MultiToken} token Token to render to an HTML element
|
||
* @returns {any} Render result; e.g., HTML string, DOM element, React
|
||
* Component, etc.
|
||
*/
|
||
render(token) {
|
||
const ir = token.render(this); // intermediate representation
|
||
const renderFn = this.get('render', null, token) || this.defaultRender;
|
||
return renderFn(ir, token.t, token);
|
||
}
|
||
};
|
||
function noop(val) {
|
||
return val;
|
||
}
|
||
|
||
var options = /*#__PURE__*/Object.freeze({
|
||
__proto__: null,
|
||
Options: Options,
|
||
defaults: defaults
|
||
});
|
||
|
||
/******************************************************************************
|
||
Multi-Tokens
|
||
Tokens composed of arrays of TextTokens
|
||
******************************************************************************/
|
||
|
||
/**
|
||
* @param {string} value
|
||
* @param {Token[]} tokens
|
||
*/
|
||
function MultiToken(value, tokens) {
|
||
this.t = 'token';
|
||
this.v = value;
|
||
this.tk = tokens;
|
||
}
|
||
|
||
/**
|
||
* Abstract class used for manufacturing tokens of text tokens. That is rather
|
||
* than the value for a token being a small string of text, it's value an array
|
||
* of text tokens.
|
||
*
|
||
* Used for grouping together URLs, emails, hashtags, and other potential
|
||
* creations.
|
||
* @class MultiToken
|
||
* @property {string} t
|
||
* @property {string} v
|
||
* @property {Token[]} tk
|
||
* @abstract
|
||
*/
|
||
MultiToken.prototype = {
|
||
isLink: false,
|
||
/**
|
||
* Return the string this token represents.
|
||
* @return {string}
|
||
*/
|
||
toString() {
|
||
return this.v;
|
||
},
|
||
/**
|
||
* What should the value for this token be in the `href` HTML attribute?
|
||
* Returns the `.toString` value by default.
|
||
* @param {string} [scheme]
|
||
* @return {string}
|
||
*/
|
||
toHref(scheme) {
|
||
return this.toString();
|
||
},
|
||
/**
|
||
* @param {Options} options Formatting options
|
||
* @returns {string}
|
||
*/
|
||
toFormattedString(options) {
|
||
const val = this.toString();
|
||
const truncate = options.get('truncate', val, this);
|
||
const formatted = options.get('format', val, this);
|
||
return truncate && formatted.length > truncate ? formatted.substring(0, truncate) + '…' : formatted;
|
||
},
|
||
/**
|
||
*
|
||
* @param {Options} options
|
||
* @returns {string}
|
||
*/
|
||
toFormattedHref(options) {
|
||
return options.get('formatHref', this.toHref(options.get('defaultProtocol')), this);
|
||
},
|
||
/**
|
||
* The start index of this token in the original input string
|
||
* @returns {number}
|
||
*/
|
||
startIndex() {
|
||
return this.tk[0].s;
|
||
},
|
||
/**
|
||
* The end index of this token in the original input string (up to this
|
||
* index but not including it)
|
||
* @returns {number}
|
||
*/
|
||
endIndex() {
|
||
return this.tk[this.tk.length - 1].e;
|
||
},
|
||
/**
|
||
Returns an object of relevant values for this token, which includes keys
|
||
* type - Kind of token ('url', 'email', etc.)
|
||
* value - Original text
|
||
* href - The value that should be added to the anchor tag's href
|
||
attribute
|
||
@method toObject
|
||
@param {string} [protocol] `'http'` by default
|
||
*/
|
||
toObject(protocol = defaults.defaultProtocol) {
|
||
return {
|
||
type: this.t,
|
||
value: this.toString(),
|
||
isLink: this.isLink,
|
||
href: this.toHref(protocol),
|
||
start: this.startIndex(),
|
||
end: this.endIndex()
|
||
};
|
||
},
|
||
/**
|
||
*
|
||
* @param {Options} options Formatting option
|
||
*/
|
||
toFormattedObject(options) {
|
||
return {
|
||
type: this.t,
|
||
value: this.toFormattedString(options),
|
||
isLink: this.isLink,
|
||
href: this.toFormattedHref(options),
|
||
start: this.startIndex(),
|
||
end: this.endIndex()
|
||
};
|
||
},
|
||
/**
|
||
* Whether this token should be rendered as a link according to the given options
|
||
* @param {Options} options
|
||
* @returns {boolean}
|
||
*/
|
||
validate(options) {
|
||
return options.get('validate', this.toString(), this);
|
||
},
|
||
/**
|
||
* Return an object that represents how this link should be rendered.
|
||
* @param {Options} options Formattinng options
|
||
*/
|
||
render(options) {
|
||
const token = this;
|
||
const href = this.toHref(options.get('defaultProtocol'));
|
||
const formattedHref = options.get('formatHref', href, this);
|
||
const tagName = options.get('tagName', href, token);
|
||
const content = this.toFormattedString(options);
|
||
const attributes = {};
|
||
const className = options.get('className', href, token);
|
||
const target = options.get('target', href, token);
|
||
const rel = options.get('rel', href, token);
|
||
const attrs = options.getObj('attributes', href, token);
|
||
const eventListeners = options.getObj('events', href, token);
|
||
attributes.href = formattedHref;
|
||
if (className) {
|
||
attributes.class = className;
|
||
}
|
||
if (target) {
|
||
attributes.target = target;
|
||
}
|
||
if (rel) {
|
||
attributes.rel = rel;
|
||
}
|
||
if (attrs) {
|
||
Object.assign(attributes, attrs);
|
||
}
|
||
return {
|
||
tagName,
|
||
attributes,
|
||
content,
|
||
eventListeners
|
||
};
|
||
}
|
||
};
|
||
|
||
/**
|
||
* Create a new token that can be emitted by the parser state machine
|
||
* @param {string} type readable type of the token
|
||
* @param {object} props properties to assign or override, including isLink = true or false
|
||
* @returns {new (value: string, tokens: Token[]) => MultiToken} new token class
|
||
*/
|
||
function createTokenClass(type, props) {
|
||
class Token extends MultiToken {
|
||
constructor(value, tokens) {
|
||
super(value, tokens);
|
||
this.t = type;
|
||
}
|
||
}
|
||
for (const p in props) {
|
||
Token.prototype[p] = props[p];
|
||
}
|
||
Token.t = type;
|
||
return Token;
|
||
}
|
||
|
||
/**
|
||
Represents a list of tokens making up a valid email address
|
||
*/
|
||
const Email = createTokenClass('email', {
|
||
isLink: true,
|
||
toHref() {
|
||
return 'mailto:' + this.toString();
|
||
}
|
||
});
|
||
|
||
/**
|
||
Represents some plain text
|
||
*/
|
||
const Text = createTokenClass('text');
|
||
|
||
/**
|
||
Multi-linebreak token - represents a line break
|
||
@class Nl
|
||
*/
|
||
const Nl = createTokenClass('nl');
|
||
|
||
/**
|
||
Represents a list of text tokens making up a valid URL
|
||
@class Url
|
||
*/
|
||
const Url = createTokenClass('url', {
|
||
isLink: true,
|
||
/**
|
||
Lowercases relevant parts of the domain and adds the protocol if
|
||
required. Note that this will not escape unsafe HTML characters in the
|
||
URL.
|
||
@param {string} [scheme] default scheme (e.g., 'https')
|
||
@return {string} the full href
|
||
*/
|
||
toHref(scheme = defaults.defaultProtocol) {
|
||
// Check if already has a prefix scheme
|
||
return this.hasProtocol() ? this.v : `${scheme}://${this.v}`;
|
||
},
|
||
/**
|
||
* Check whether this URL token has a protocol
|
||
* @return {boolean}
|
||
*/
|
||
hasProtocol() {
|
||
const tokens = this.tk;
|
||
return tokens.length >= 2 && tokens[0].t !== LOCALHOST && tokens[1].t === COLON;
|
||
}
|
||
});
|
||
|
||
var multi = /*#__PURE__*/Object.freeze({
|
||
__proto__: null,
|
||
Base: MultiToken,
|
||
Email: Email,
|
||
MultiToken: MultiToken,
|
||
Nl: Nl,
|
||
Text: Text,
|
||
Url: Url,
|
||
createTokenClass: createTokenClass
|
||
});
|
||
|
||
/**
|
||
Not exactly parser, more like the second-stage scanner (although we can
|
||
theoretically hotswap the code here with a real parser in the future... but
|
||
for a little URL-finding utility abstract syntax trees may be a little
|
||
overkill).
|
||
|
||
URL format: http://en.wikipedia.org/wiki/URI_scheme
|
||
Email format: http://en.wikipedia.org/wiki/EmailAddress (links to RFC in
|
||
reference)
|
||
|
||
@module linkify
|
||
@submodule parser
|
||
@main run
|
||
*/
|
||
|
||
const makeState = arg => new State(arg);
|
||
|
||
/**
|
||
* Generate the parser multi token-based state machine
|
||
* @param {{ groups: Collections<string> }} tokens
|
||
*/
|
||
function init$1({
|
||
groups
|
||
}) {
|
||
// Types of characters the URL can definitely end in
|
||
const qsAccepting = groups.domain.concat([AMPERSAND, ASTERISK, AT, BACKSLASH, BACKTICK, CARET, DOLLAR, EQUALS, HYPHEN, NUM, PERCENT, PIPE, PLUS, POUND, SLASH, SYM, TILDE, UNDERSCORE]);
|
||
|
||
// Types of tokens that can follow a URL and be part of the query string
|
||
// but cannot be the very last characters
|
||
// Characters that cannot appear in the URL at all should be excluded
|
||
const qsNonAccepting = [APOSTROPHE, COLON, COMMA, DOT, EXCLAMATION, PERCENT, QUERY, QUOTE, SEMI, OPENANGLEBRACKET, CLOSEANGLEBRACKET, OPENBRACE, CLOSEBRACE, CLOSEBRACKET, OPENBRACKET, OPENPAREN, CLOSEPAREN, FULLWIDTHLEFTPAREN, FULLWIDTHRIGHTPAREN, LEFTCORNERBRACKET, RIGHTCORNERBRACKET, LEFTWHITECORNERBRACKET, RIGHTWHITECORNERBRACKET, FULLWIDTHLESSTHAN, FULLWIDTHGREATERTHAN];
|
||
|
||
// For addresses without the mailto prefix
|
||
// Tokens allowed in the localpart of the email
|
||
const localpartAccepting = [AMPERSAND, APOSTROPHE, ASTERISK, BACKSLASH, BACKTICK, CARET, DOLLAR, EQUALS, HYPHEN, OPENBRACE, CLOSEBRACE, PERCENT, PIPE, PLUS, POUND, QUERY, SLASH, SYM, TILDE, UNDERSCORE];
|
||
|
||
// The universal starting state.
|
||
/**
|
||
* @type State<Token>
|
||
*/
|
||
const Start = makeState();
|
||
const Localpart = tt(Start, TILDE); // Local part of the email address
|
||
ta(Localpart, localpartAccepting, Localpart);
|
||
ta(Localpart, groups.domain, Localpart);
|
||
const Domain = makeState(),
|
||
Scheme = makeState(),
|
||
SlashScheme = makeState();
|
||
ta(Start, groups.domain, Domain); // parsed string ends with a potential domain name (A)
|
||
ta(Start, groups.scheme, Scheme); // e.g., 'mailto'
|
||
ta(Start, groups.slashscheme, SlashScheme); // e.g., 'http'
|
||
|
||
ta(Domain, localpartAccepting, Localpart);
|
||
ta(Domain, groups.domain, Domain);
|
||
const LocalpartAt = tt(Domain, AT); // Local part of the email address plus @
|
||
|
||
tt(Localpart, AT, LocalpartAt); // close to an email address now
|
||
|
||
// Local part of an email address can be e.g. 'http' or 'mailto'
|
||
tt(Scheme, AT, LocalpartAt);
|
||
tt(SlashScheme, AT, LocalpartAt);
|
||
const LocalpartDot = tt(Localpart, DOT); // Local part of the email address plus '.' (localpart cannot end in .)
|
||
ta(LocalpartDot, localpartAccepting, Localpart);
|
||
ta(LocalpartDot, groups.domain, Localpart);
|
||
const EmailDomain = makeState();
|
||
ta(LocalpartAt, groups.domain, EmailDomain); // parsed string starts with local email info + @ with a potential domain name
|
||
ta(EmailDomain, groups.domain, EmailDomain);
|
||
const EmailDomainDot = tt(EmailDomain, DOT); // domain followed by DOT
|
||
ta(EmailDomainDot, groups.domain, EmailDomain);
|
||
const Email$1 = makeState(Email); // Possible email address (could have more tlds)
|
||
ta(EmailDomainDot, groups.tld, Email$1);
|
||
ta(EmailDomainDot, groups.utld, Email$1);
|
||
tt(LocalpartAt, LOCALHOST, Email$1);
|
||
|
||
// Hyphen can jump back to a domain name
|
||
const EmailDomainHyphen = tt(EmailDomain, HYPHEN); // parsed string starts with local email info + @ with a potential domain name
|
||
tt(EmailDomainHyphen, HYPHEN, EmailDomainHyphen);
|
||
ta(EmailDomainHyphen, groups.domain, EmailDomain);
|
||
ta(Email$1, groups.domain, EmailDomain);
|
||
tt(Email$1, DOT, EmailDomainDot);
|
||
tt(Email$1, HYPHEN, EmailDomainHyphen);
|
||
|
||
// Final possible email states
|
||
const EmailColon = tt(Email$1, COLON); // URL followed by colon (potential port number here)
|
||
/*const EmailColonPort = */
|
||
ta(EmailColon, groups.numeric, Email); // URL followed by colon and port number
|
||
|
||
// Account for dots and hyphens. Hyphens are usually parts of domain names
|
||
// (but not TLDs)
|
||
const DomainHyphen = tt(Domain, HYPHEN); // domain followed by hyphen
|
||
const DomainDot = tt(Domain, DOT); // domain followed by DOT
|
||
tt(DomainHyphen, HYPHEN, DomainHyphen);
|
||
ta(DomainHyphen, groups.domain, Domain);
|
||
ta(DomainDot, localpartAccepting, Localpart);
|
||
ta(DomainDot, groups.domain, Domain);
|
||
const DomainDotTld = makeState(Url); // Simplest possible URL with no query string
|
||
ta(DomainDot, groups.tld, DomainDotTld);
|
||
ta(DomainDot, groups.utld, DomainDotTld);
|
||
ta(DomainDotTld, groups.domain, Domain);
|
||
ta(DomainDotTld, localpartAccepting, Localpart);
|
||
tt(DomainDotTld, DOT, DomainDot);
|
||
tt(DomainDotTld, HYPHEN, DomainHyphen);
|
||
tt(DomainDotTld, AT, LocalpartAt);
|
||
const DomainDotTldColon = tt(DomainDotTld, COLON); // URL followed by colon (potential port number here)
|
||
const DomainDotTldColonPort = makeState(Url); // TLD followed by a port number
|
||
ta(DomainDotTldColon, groups.numeric, DomainDotTldColonPort);
|
||
|
||
// Long URL with optional port and maybe query string
|
||
const Url$1 = makeState(Url);
|
||
|
||
// URL with extra symbols at the end, followed by an opening bracket
|
||
const UrlNonaccept = makeState(); // URL followed by some symbols (will not be part of the final URL)
|
||
|
||
// Query strings
|
||
ta(Url$1, qsAccepting, Url$1);
|
||
ta(Url$1, qsNonAccepting, UrlNonaccept);
|
||
ta(UrlNonaccept, qsAccepting, Url$1);
|
||
ta(UrlNonaccept, qsNonAccepting, UrlNonaccept);
|
||
|
||
// Become real URLs after `SLASH` or `COLON NUM SLASH`
|
||
// Here works with or without scheme:// prefix
|
||
tt(DomainDotTld, SLASH, Url$1);
|
||
tt(DomainDotTldColonPort, SLASH, Url$1);
|
||
|
||
// Note that domains that begin with schemes are treated slighly differently
|
||
const SchemeColon = tt(Scheme, COLON); // e.g., 'mailto:'
|
||
const SlashSchemeColon = tt(SlashScheme, COLON); // e.g., 'http:'
|
||
const SlashSchemeColonSlash = tt(SlashSchemeColon, SLASH); // e.g., 'http:/'
|
||
|
||
const UriPrefix = tt(SlashSchemeColonSlash, SLASH); // e.g., 'http://'
|
||
|
||
// Scheme states can transition to domain states
|
||
ta(Scheme, groups.domain, Domain);
|
||
tt(Scheme, DOT, DomainDot);
|
||
tt(Scheme, HYPHEN, DomainHyphen);
|
||
ta(SlashScheme, groups.domain, Domain);
|
||
tt(SlashScheme, DOT, DomainDot);
|
||
tt(SlashScheme, HYPHEN, DomainHyphen);
|
||
|
||
// Force URL with scheme prefix followed by anything sane
|
||
ta(SchemeColon, groups.domain, Url$1);
|
||
tt(SchemeColon, SLASH, Url$1);
|
||
tt(SchemeColon, QUERY, Url$1);
|
||
ta(UriPrefix, groups.domain, Url$1);
|
||
ta(UriPrefix, qsAccepting, Url$1);
|
||
tt(UriPrefix, SLASH, Url$1);
|
||
const bracketPairs = [[OPENBRACE, CLOSEBRACE],
|
||
// {}
|
||
[OPENBRACKET, CLOSEBRACKET],
|
||
// []
|
||
[OPENPAREN, CLOSEPAREN],
|
||
// ()
|
||
[OPENANGLEBRACKET, CLOSEANGLEBRACKET],
|
||
// <>
|
||
[FULLWIDTHLEFTPAREN, FULLWIDTHRIGHTPAREN],
|
||
// ()
|
||
[LEFTCORNERBRACKET, RIGHTCORNERBRACKET],
|
||
// 「」
|
||
[LEFTWHITECORNERBRACKET, RIGHTWHITECORNERBRACKET],
|
||
// 『』
|
||
[FULLWIDTHLESSTHAN, FULLWIDTHGREATERTHAN] // <>
|
||
];
|
||
for (let i = 0; i < bracketPairs.length; i++) {
|
||
const [OPEN, CLOSE] = bracketPairs[i];
|
||
const UrlOpen = tt(Url$1, OPEN); // URL followed by open bracket
|
||
|
||
// Continue not accepting for open brackets
|
||
tt(UrlNonaccept, OPEN, UrlOpen);
|
||
|
||
// Closing bracket component. This character WILL be included in the URL
|
||
tt(UrlOpen, CLOSE, Url$1);
|
||
|
||
// URL that beings with an opening bracket, followed by a symbols.
|
||
// Note that the final state can still be `UrlOpen` (if the URL has a
|
||
// single opening bracket for some reason).
|
||
const UrlOpenQ = makeState(Url);
|
||
ta(UrlOpen, qsAccepting, UrlOpenQ);
|
||
const UrlOpenSyms = makeState(); // UrlOpen followed by some symbols it cannot end it
|
||
ta(UrlOpen, qsNonAccepting);
|
||
|
||
// URL that begins with an opening bracket, followed by some symbols
|
||
ta(UrlOpenQ, qsAccepting, UrlOpenQ);
|
||
ta(UrlOpenQ, qsNonAccepting, UrlOpenSyms);
|
||
ta(UrlOpenSyms, qsAccepting, UrlOpenQ);
|
||
ta(UrlOpenSyms, qsNonAccepting, UrlOpenSyms);
|
||
|
||
// Close brace/bracket to become regular URL
|
||
tt(UrlOpenQ, CLOSE, Url$1);
|
||
tt(UrlOpenSyms, CLOSE, Url$1);
|
||
}
|
||
tt(Start, LOCALHOST, DomainDotTld); // localhost is a valid URL state
|
||
tt(Start, NL, Nl); // single new line
|
||
|
||
return {
|
||
start: Start,
|
||
tokens: tk
|
||
};
|
||
}
|
||
|
||
/**
|
||
* Run the parser state machine on a list of scanned string-based tokens to
|
||
* create a list of multi tokens, each of which represents a URL, email address,
|
||
* plain text, etc.
|
||
*
|
||
* @param {State<MultiToken>} start parser start state
|
||
* @param {string} input the original input used to generate the given tokens
|
||
* @param {Token[]} tokens list of scanned tokens
|
||
* @returns {MultiToken[]}
|
||
*/
|
||
function run(start, input, tokens) {
|
||
let len = tokens.length;
|
||
let cursor = 0;
|
||
let multis = [];
|
||
let textTokens = [];
|
||
while (cursor < len) {
|
||
let state = start;
|
||
let secondState = null;
|
||
let nextState = null;
|
||
let multiLength = 0;
|
||
let latestAccepting = null;
|
||
let sinceAccepts = -1;
|
||
while (cursor < len && !(secondState = state.go(tokens[cursor].t))) {
|
||
// Starting tokens with nowhere to jump to.
|
||
// Consider these to be just plain text
|
||
textTokens.push(tokens[cursor++]);
|
||
}
|
||
while (cursor < len && (nextState = secondState || state.go(tokens[cursor].t))) {
|
||
// Get the next state
|
||
secondState = null;
|
||
state = nextState;
|
||
|
||
// Keep track of the latest accepting state
|
||
if (state.accepts()) {
|
||
sinceAccepts = 0;
|
||
latestAccepting = state;
|
||
} else if (sinceAccepts >= 0) {
|
||
sinceAccepts++;
|
||
}
|
||
cursor++;
|
||
multiLength++;
|
||
}
|
||
if (sinceAccepts < 0) {
|
||
// No accepting state was found, part of a regular text token add
|
||
// the first text token to the text tokens array and try again from
|
||
// the next
|
||
cursor -= multiLength;
|
||
if (cursor < len) {
|
||
textTokens.push(tokens[cursor]);
|
||
cursor++;
|
||
}
|
||
} else {
|
||
// Accepting state!
|
||
// First close off the textTokens (if available)
|
||
if (textTokens.length > 0) {
|
||
multis.push(initMultiToken(Text, input, textTokens));
|
||
textTokens = [];
|
||
}
|
||
|
||
// Roll back to the latest accepting state
|
||
cursor -= sinceAccepts;
|
||
multiLength -= sinceAccepts;
|
||
|
||
// Create a new multitoken
|
||
const Multi = latestAccepting.t;
|
||
const subtokens = tokens.slice(cursor - multiLength, cursor);
|
||
multis.push(initMultiToken(Multi, input, subtokens));
|
||
}
|
||
}
|
||
|
||
// Finally close off the textTokens (if available)
|
||
if (textTokens.length > 0) {
|
||
multis.push(initMultiToken(Text, input, textTokens));
|
||
}
|
||
return multis;
|
||
}
|
||
|
||
/**
|
||
* Utility function for instantiating a new multitoken with all the relevant
|
||
* fields during parsing.
|
||
* @param {new (value: string, tokens: Token[]) => MultiToken} Multi class to instantiate
|
||
* @param {string} input original input string
|
||
* @param {Token[]} tokens consecutive tokens scanned from input string
|
||
* @returns {MultiToken}
|
||
*/
|
||
function initMultiToken(Multi, input, tokens) {
|
||
const startIdx = tokens[0].s;
|
||
const endIdx = tokens[tokens.length - 1].e;
|
||
const value = input.slice(startIdx, endIdx);
|
||
return new Multi(value, tokens);
|
||
}
|
||
|
||
const warn = typeof console !== 'undefined' && console && console.warn || (() => {});
|
||
const warnAdvice = 'until manual call of linkify.init(). Register all schemes and plugins before invoking linkify the first time.';
|
||
|
||
// Side-effect initialization state
|
||
const INIT = {
|
||
scanner: null,
|
||
parser: null,
|
||
tokenQueue: [],
|
||
pluginQueue: [],
|
||
customSchemes: [],
|
||
initialized: false
|
||
};
|
||
|
||
/**
|
||
* @typedef {{
|
||
* start: State<string>,
|
||
* tokens: { groups: Collections<string> } & typeof tk
|
||
* }} ScannerInit
|
||
*/
|
||
|
||
/**
|
||
* @typedef {{
|
||
* start: State<MultiToken>,
|
||
* tokens: typeof multi
|
||
* }} ParserInit
|
||
*/
|
||
|
||
/**
|
||
* @typedef {(arg: { scanner: ScannerInit }) => void} TokenPlugin
|
||
*/
|
||
|
||
/**
|
||
* @typedef {(arg: { scanner: ScannerInit, parser: ParserInit }) => void} Plugin
|
||
*/
|
||
|
||
/**
|
||
* De-register all plugins and reset the internal state-machine. Used for
|
||
* testing; not required in practice.
|
||
* @private
|
||
*/
|
||
function reset() {
|
||
State.groups = {};
|
||
INIT.scanner = null;
|
||
INIT.parser = null;
|
||
INIT.tokenQueue = [];
|
||
INIT.pluginQueue = [];
|
||
INIT.customSchemes = [];
|
||
INIT.initialized = false;
|
||
return INIT;
|
||
}
|
||
|
||
/**
|
||
* Register a token plugin to allow the scanner to recognize additional token
|
||
* types before the parser state machine is constructed from the results.
|
||
* @param {string} name of plugin to register
|
||
* @param {TokenPlugin} plugin function that accepts the scanner state machine
|
||
* and available scanner tokens and collections and extends the state machine to
|
||
* recognize additional tokens or groups.
|
||
*/
|
||
function registerTokenPlugin(name, plugin) {
|
||
if (typeof plugin !== 'function') {
|
||
throw new Error(`linkifyjs: Invalid token plugin ${plugin} (expects function)`);
|
||
}
|
||
for (let i = 0; i < INIT.tokenQueue.length; i++) {
|
||
if (name === INIT.tokenQueue[i][0]) {
|
||
warn(`linkifyjs: token plugin "${name}" already registered - will be overwritten`);
|
||
INIT.tokenQueue[i] = [name, plugin];
|
||
return;
|
||
}
|
||
}
|
||
INIT.tokenQueue.push([name, plugin]);
|
||
if (INIT.initialized) {
|
||
warn(`linkifyjs: already initialized - will not register token plugin "${name}" ${warnAdvice}`);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Register a linkify plugin
|
||
* @param {string} name of plugin to register
|
||
* @param {Plugin} plugin function that accepts the parser state machine and
|
||
* extends the parser to recognize additional link types
|
||
*/
|
||
function registerPlugin(name, plugin) {
|
||
if (typeof plugin !== 'function') {
|
||
throw new Error(`linkifyjs: Invalid plugin ${plugin} (expects function)`);
|
||
}
|
||
for (let i = 0; i < INIT.pluginQueue.length; i++) {
|
||
if (name === INIT.pluginQueue[i][0]) {
|
||
warn(`linkifyjs: plugin "${name}" already registered - will be overwritten`);
|
||
INIT.pluginQueue[i] = [name, plugin];
|
||
return;
|
||
}
|
||
}
|
||
INIT.pluginQueue.push([name, plugin]);
|
||
if (INIT.initialized) {
|
||
warn(`linkifyjs: already initialized - will not register plugin "${name}" ${warnAdvice}`);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Detect URLs with the following additional protocol. Anything with format
|
||
* "protocol://..." will be considered a link. If `optionalSlashSlash` is set to
|
||
* `true`, anything with format "protocol:..." will be considered a link.
|
||
* @param {string} scheme
|
||
* @param {boolean} [optionalSlashSlash]
|
||
*/
|
||
function registerCustomProtocol(scheme, optionalSlashSlash = false) {
|
||
if (INIT.initialized) {
|
||
warn(`linkifyjs: already initialized - will not register custom scheme "${scheme}" ${warnAdvice}`);
|
||
}
|
||
if (!/^[0-9a-z]+(-[0-9a-z]+)*$/.test(scheme)) {
|
||
throw new Error(`linkifyjs: incorrect scheme format.
|
||
1. Must only contain digits, lowercase ASCII letters or "-"
|
||
2. Cannot start or end with "-"
|
||
3. "-" cannot repeat`);
|
||
}
|
||
INIT.customSchemes.push([scheme, optionalSlashSlash]);
|
||
}
|
||
|
||
/**
|
||
* Initialize the linkify state machine. Called automatically the first time
|
||
* linkify is called on a string, but may be called manually as well.
|
||
*/
|
||
function init() {
|
||
// Initialize scanner state machine and plugins
|
||
INIT.scanner = init$2(INIT.customSchemes);
|
||
for (let i = 0; i < INIT.tokenQueue.length; i++) {
|
||
INIT.tokenQueue[i][1]({
|
||
scanner: INIT.scanner
|
||
});
|
||
}
|
||
|
||
// Initialize parser state machine and plugins
|
||
INIT.parser = init$1(INIT.scanner.tokens);
|
||
for (let i = 0; i < INIT.pluginQueue.length; i++) {
|
||
INIT.pluginQueue[i][1]({
|
||
scanner: INIT.scanner,
|
||
parser: INIT.parser
|
||
});
|
||
}
|
||
INIT.initialized = true;
|
||
return INIT;
|
||
}
|
||
|
||
/**
|
||
* Parse a string into tokens that represent linkable and non-linkable sub-components
|
||
* @param {string} str
|
||
* @return {MultiToken[]} tokens
|
||
*/
|
||
function tokenize(str) {
|
||
if (!INIT.initialized) {
|
||
init();
|
||
}
|
||
return run(INIT.parser.start, str, run$1(INIT.scanner.start, str));
|
||
}
|
||
tokenize.scan = run$1; // for testing
|
||
|
||
/**
|
||
* Find a list of linkable items in the given string.
|
||
* @param {string} str string to find links in
|
||
* @param {string | Opts} [type] either formatting options or specific type of
|
||
* links to find, e.g., 'url' or 'email'
|
||
* @param {Opts} [opts] formatting options for final output. Cannot be specified
|
||
* if opts already provided in `type` argument
|
||
*/
|
||
function find(str, type = null, opts = null) {
|
||
if (type && typeof type === 'object') {
|
||
if (opts) {
|
||
throw Error(`linkifyjs: Invalid link type ${type}; must be a string`);
|
||
}
|
||
opts = type;
|
||
type = null;
|
||
}
|
||
const options = new Options(opts);
|
||
const tokens = tokenize(str);
|
||
const filtered = [];
|
||
for (let i = 0; i < tokens.length; i++) {
|
||
const token = tokens[i];
|
||
if (token.isLink && (!type || token.t === type) && options.check(token)) {
|
||
filtered.push(token.toFormattedObject(options));
|
||
}
|
||
}
|
||
return filtered;
|
||
}
|
||
|
||
/**
|
||
* Is the given string valid linkable text of some sort. Note that this does not
|
||
* trim the text for you.
|
||
*
|
||
* Optionally pass in a second `type` param, which is the type of link to test
|
||
* for.
|
||
*
|
||
* For example,
|
||
*
|
||
* linkify.test(str, 'email');
|
||
*
|
||
* Returns `true` if str is a valid email.
|
||
* @param {string} str string to test for links
|
||
* @param {string} [type] optional specific link type to look for
|
||
* @returns boolean true/false
|
||
*/
|
||
function test(str, type = null) {
|
||
const tokens = tokenize(str);
|
||
return tokens.length === 1 && tokens[0].isLink && (!type || tokens[0].t === type);
|
||
}
|
||
|
||
export { MultiToken, Options, State, createTokenClass, find, init, multi, options, regexp, registerCustomProtocol, registerPlugin, registerTokenPlugin, reset, stringToArray, test, multi as text, tokenize };
|