tutanota/libs/linkify.js
2025-08-04 13:34:21 +02:00

1839 lines
62 KiB
JavaScript
Vendored
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// THIS FILE IS AUTOMATICALLY GENERATED DO NOT EDIT DIRECTLY
// See update-tlds.js for encoding/decoding format
// https://data.iana.org/TLD/tlds-alpha-by-domain.txt
const encodedTlds = 'aaa1rp3bb0ott3vie4c1le2ogado5udhabi7c0ademy5centure6ountant0s9o1tor4d0s1ult4e0g1ro2tna4f0l1rica5g0akhan5ency5i0g1rbus3force5tel5kdn3l0ibaba4pay4lfinanz6state5y2sace3tom5m0azon4ericanexpress7family11x2fam3ica3sterdam8nalytics7droid5quan4z2o0l2partments8p0le4q0uarelle8r0ab1mco4chi3my2pa2t0e3s0da2ia2sociates9t0hleta5torney7u0ction5di0ble3o3spost5thor3o0s4w0s2x0a2z0ure5ba0by2idu3namex4d1k2r0celona5laycard4s5efoot5gains6seball5ketball8uhaus5yern5b0c1t1va3cg1n2d1e0ats2uty4er2rlin4st0buy5t2f1g1h0arti5i0ble3d1ke2ng0o3o1z2j1lack0friday9ockbuster8g1omberg7ue3m0s1w2n0pparibas9o0ats3ehringer8fa2m1nd2o0k0ing5sch2tik2on4t1utique6x2r0adesco6idgestone9oadway5ker3ther5ussels7s1t1uild0ers6siness6y1zz3v1w1y1z0h3ca0b1fe2l0l1vinklein9m0era3p2non3petown5ital0one8r0avan4ds2e0er0s4s2sa1e1h1ino4t0ering5holic7ba1n1re3c1d1enter4o1rn3f0a1d2g1h0anel2nel4rity4se2t2eap3intai5ristmas6ome4urch5i0priani6rcle4sco3tadel4i0c2y3k1l0aims4eaning6ick2nic1que6othing5ud3ub0med6m1n1o0ach3des3ffee4llege4ogne5m0mbank4unity6pany2re3uter5sec4ndos3struction8ulting7tact3ractors9oking4l1p2rsica5untry4pon0s4rses6pa2r0edit0card4union9icket5own3s1uise0s6u0isinella9v1w1x1y0mru3ou3z2dad1nce3ta1e1ing3sun4y2clk3ds2e0al0er2s3gree4livery5l1oitte5ta3mocrat6ntal2ist5si0gn4v2hl2iamonds6et2gital5rect0ory7scount3ver5h2y2j1k1m1np2o0cs1tor4g1mains5t1wnload7rive4tv2ubai3nlop4pont4rban5vag2r2z2earth3t2c0o2deka3u0cation8e1g1mail3erck5nergy4gineer0ing9terprises10pson4quipment8r0icsson6ni3s0q1tate5t1u0rovision8s2vents5xchange6pert3osed4ress5traspace10fage2il1rwinds6th3mily4n0s2rm0ers5shion4t3edex3edback6rrari3ero6i0delity5o2lm2nal1nce1ial7re0stone6mdale6sh0ing5t0ness6j1k1lickr3ghts4r2orist4wers5y2m1o0o0d1tball6rd1ex2sale4um3undation8x2r0ee1senius7l1ogans4ntier7tr2ujitsu5n0d2rniture7tbol5yi3ga0l0lery3o1up4me0s3p1rden4y2b0iz3d0n2e0a1nt0ing5orge5f1g0ee3h1i0ft0s3ves2ing5l0ass3e1obal2o4m0ail3bh2o1x2n1odaddy5ld0point6f2o0dyear5g0le4p1t1v2p1q1r0ainger5phics5tis4een3ipe3ocery4up4s1t1u0cci3ge2ide2tars5ru3w1y2hair2mburg5ngout5us3bo2dfc0bank7ealth0care8lp1sinki6re1mes5iphop4samitsu7tachi5v2k0t2m1n1ockey4ldings5iday5medepot5goods5s0ense7nda3rse3spital5t0ing5t0els3mail5use3w2r1sbc3t1u0ghes5yatt3undai7ibm2cbc2e1u2d1e0ee3fm2kano4l1m0amat4db2mo0bilien9n0c1dustries8finiti5o2g1k1stitute6urance4e4t0ernational10uit4vestments10o1piranga7q1r0ish4s0maili5t0anbul7t0au2v3jaguar4va3cb2e0ep2tzt3welry6io2ll2m0p2nj2o0bs1urg4t1y2p0morgan6rs3uegos4niper7kaufen5ddi3e0rryhotels6properties14fh2g1h1i0a1ds2m1ndle4tchen5wi3m1n1oeln3matsu5sher5p0mg2n2r0d1ed3uokgroup8w1y0oto4z2la0caixa5mborghini8er3nd0rover6xess5salle5t0ino3robe5w0yer5b1c1ds2ease3clerc5frak4gal2o2xus4gbt3i0dl2fe0insurance9style7ghting6ke2lly3mited4o2ncoln4k2ve1ing5k1lc1p2oan0s3cker3us3l1ndon4tte1o3ve3pl0financial11r1s1t0d0a3u0ndbeck6xe1ury5v1y2ma0drid4if1son4keup4n0agement7go3p1rket0ing3s4riott5shalls7ttel5ba2c0kinsey7d1e0d0ia3et2lbourne7me1orial6n0u2rckmsd7g1h1iami3crosoft7l1ni1t2t0subishi9k1l0b1s2m0a2n1o0bi0le4da2e1i1m1nash3ey2ster5rmon3tgage6scow4to0rcycles9v0ie4p1q1r1s0d2t0n1r2u0seum3ic4v1w1x1y1z2na0b1goya4me2vy3ba2c1e0c1t0bank4flix4work5ustar5w0s2xt0direct7us4f0l2g0o2hk2i0co2ke1on3nja3ssan1y5l1o0kia3rton4w0ruz3tv4p1r0a1w2tt2u1yc2z2obi1server7ffice5kinawa6layan0group9lo3m0ega4ne1g1l0ine5oo2pen3racle3nge4g0anic5igins6saka4tsuka4t2vh3pa0ge2nasonic7ris2s1tners4s1y3y2ccw3e0t2f0izer5g1h0armacy6d1ilips5one2to0graphy6s4ysio5ics1tet2ures6d1n0g1k2oneer5zza4k1l0ace2y0station9umbing5s3m1n0c2ohl2ker3litie5rn2st3r0axi3ess3ime3o0d0uctions8f1gressive8mo2perties3y5tection8u0dential9s1t1ub2w0c2y2qa1pon3uebec3st5racing4dio4e0ad1lestate6tor2y4cipes5d0stone5umbrella9hab3ise0n3t2liance6n0t0als5pair3ort3ublican8st0aurant8view0s5xroth6ich0ardli6oh3l1o1p2o0cks3deo3gers4om3s0vp3u0gby3hr2n2w0e2yukyu6sa0arland6fe0ty4kura4le1on3msclub4ung5ndvik0coromant12ofi4p1rl2s1ve2xo3b0i1s2c0b1haeffler7midt4olarships8ol3ule3warz5ience5ot3d1e0arch3t2cure1ity6ek2lect4ner3rvices6ven3w1x0y3fr2g1h0angrila6rp3ell3ia1ksha5oes2p0ping5uji3w3i0lk2na1gles5te3j1k0i0n2y0pe4l0ing4m0art3ile4n0cf3o0ccer3ial4ftbank4ware6hu2lar2utions7ng1y2y2pa0ce3ort2t3r0l2s1t0ada2ples4r1tebank4farm7c0group6ockholm6rage3e3ream4udio2y3yle4u0cks3pplies3y2ort5rf1gery5zuki5v1watch4iss4x1y0dney4stems6z2tab1ipei4lk2obao4rget4tamotors6r2too4x0i3c0i2d0k2eam2ch0nology8l1masek5nnis4va3f1g1h0d1eater2re6iaa2ckets5enda4ps2res2ol4j0maxx4x2k0maxx5l1m0all4n1o0day3kyo3ols3p1ray3shiba5tal3urs3wn2yota3s3r0ade1ing4ining5vel0ers0insurance16ust3v2t1ube2i1nes3shu4v0s2w1z2ua1bank3s2g1k1nicom3versity8o2ol2ps2s1y1z2va0cations7na1guard7c1e0gas3ntures6risign5mögensberater2ung14sicherung10t2g1i0ajes4deo3g1king4llas4n1p1rgin4sa1ion4va1o3laanderen9n1odka3lvo3te1ing3o2yage5u2wales2mart4ter4ng0gou5tch0es6eather0channel12bcam3er2site5d0ding5ibo2r3f1hoswho6ien2ki2lliamhill9n0dows4e1ners6me2olterskluwer11odside6rk0s2ld3w2s1tc1f3xbox3erox4ihuan4n2xx2yz3yachts4hoo3maxun5ndex5e1odobashi7ga2kohama6u0tube6t1un3za0ppos4ra3ero3ip2m1one3uerich6w2';
// Internationalized domain names containing non-ASCII
const encodedUtlds = 'ελ1υ2бг1ел3дети4ею2католик6ом3мкд2он1сква6онлайн5рг3рус2ф2сайт3рб3украз3հայ3ישראל5קום3ابوظبي5رامكو5لاردن4بحرين5جزائر5سعودية6عليان5مغرب5مارات5یران5بارت2زار4يتك3ھارت5تونس4سودان3رية5شبكة4عراق2ب2مان4فلسطين6قطر3كاثوليك6وم3مصر2ليسيا5وريتانيا7قع4همراهاکستان7ڀارت4कॉम3नेट3भारत0म्3ोत5संगठन5বাংলা5ভারত2ৰত4ਭਾਰਤ4ભારત4ଭାରତ4இந்தியா6லங்கை6சிங்கப்பூர்11భారత్5ಭಾರತ4ഭാരത5ලකා4คอม3ไทย3ລາວ3გე2みんな3アマゾン4クラウド4グーグル4コム2ストア3セール3ファッション6ポイント4世界2中信1国1國1文网3亚马逊3企业2佛山2信息2健康2八卦2公司1益2台湾1灣2商城1店1标2嘉里0大酒店5在线2大拿2天主教3娱乐2家電2广东2微博2慈善2我爱你3手机2招聘2政务1府2新加坡2闻2时尚2書籍2机构2淡马锡3游戏2澳門2点看2移动2组织机构4网址1店1站1络2联通2谷歌2购物2通販2集团2電訊盈科4飞利浦3食品2餐厅2香格里拉3港2닷넷1컴2삼성2한국2';
/**
* Finite State Machine generation utilities
*/
/**
* @template T
* @typedef {{ [group: string]: T[] }} Collections
*/
/**
* @typedef {{ [group: string]: true }} Flags
*/
// Keys in scanner Collections instances
const numeric = 'numeric';
const ascii = 'ascii';
const alpha = 'alpha';
const asciinumeric = 'asciinumeric';
const alphanumeric = 'alphanumeric';
const domain = 'domain';
const emoji = 'emoji';
const scheme = 'scheme';
const slashscheme = 'slashscheme';
const whitespace = 'whitespace';
/**
* @template T
* @param {string} name
* @param {Collections<T>} groups to register in
* @returns {T[]} Current list of tokens in the given collection
*/
function registerGroup(name, groups) {
if (!(name in groups)) {
groups[name] = [];
}
return groups[name];
}
/**
* @template T
* @param {T} t token to add
* @param {Collections<T>} groups
* @param {Flags} flags
*/
function addToGroups(t, flags, groups) {
if (flags[numeric]) {
flags[asciinumeric] = true;
flags[alphanumeric] = true;
}
if (flags[ascii]) {
flags[asciinumeric] = true;
flags[alpha] = true;
}
if (flags[asciinumeric]) {
flags[alphanumeric] = true;
}
if (flags[alpha]) {
flags[alphanumeric] = true;
}
if (flags[alphanumeric]) {
flags[domain] = true;
}
if (flags[emoji]) {
flags[domain] = true;
}
for (const k in flags) {
const group = registerGroup(k, groups);
if (group.indexOf(t) < 0) {
group.push(t);
}
}
}
/**
* @template T
* @param {T} t token to check
* @param {Collections<T>} groups
* @returns {Flags} group flags that contain this token
*/
function flagsForToken(t, groups) {
const result = {};
for (const c in groups) {
if (groups[c].indexOf(t) >= 0) {
result[c] = true;
}
}
return result;
}
/**
* @template T
* @typedef {null | T } Transition
*/
/**
* Define a basic state machine state. j is the list of character transitions,
* jr is the list of regex-match transitions, jd is the default state to
* transition to t is the accepting token type, if any. If this is the terminal
* state, then it does not emit a token.
*
* The template type T represents the type of the token this state accepts. This
* should be a string (such as of the token exports in `text.js`) or a
* MultiToken subclass (from `multi.js`)
*
* @template T
* @param {T} [token] Token that this state emits
*/
function State(token = null) {
// this.n = null; // DEBUG: State name
/** @type {{ [input: string]: State<T> }} j */
this.j = {}; // IMPLEMENTATION 1
// this.j = []; // IMPLEMENTATION 2
/** @type {[RegExp, State<T>][]} jr */
this.jr = [];
/** @type {?State<T>} jd */
this.jd = null;
/** @type {?T} t */
this.t = token;
}
/**
* Scanner token groups
* @type Collections<string>
*/
State.groups = {};
State.prototype = {
accepts() {
return !!this.t;
},
/**
* Follow an existing transition from the given input to the next state.
* Does not mutate.
* @param {string} input character or token type to transition on
* @returns {?State<T>} the next state, if any
*/
go(input) {
const state = this;
const nextState = state.j[input];
if (nextState) {
return nextState;
}
for (let i = 0; i < state.jr.length; i++) {
const regex = state.jr[i][0];
const nextState = state.jr[i][1]; // note: might be empty to prevent default jump
if (nextState && regex.test(input)) {
return nextState;
}
}
// Nowhere left to jump! Return default, if any
return state.jd;
},
/**
* Whether the state has a transition for the given input. Set the second
* argument to true to only look for an exact match (and not a default or
* regular-expression-based transition)
* @param {string} input
* @param {boolean} exactOnly
*/
has(input, exactOnly = false) {
return exactOnly ? input in this.j : !!this.go(input);
},
/**
* Short for "transition all"; create a transition from the array of items
* in the given list to the same final resulting state.
* @param {string | string[]} inputs Group of inputs to transition on
* @param {Transition<T> | State<T>} [next] Transition options
* @param {Flags} [flags] Collections flags to add token to
* @param {Collections<T>} [groups] Master list of token groups
*/
ta(inputs, next, flags, groups) {
for (let i = 0; i < inputs.length; i++) {
this.tt(inputs[i], next, flags, groups);
}
},
/**
* Short for "take regexp transition"; defines a transition for this state
* when it encounters a token which matches the given regular expression
* @param {RegExp} regexp Regular expression transition (populate first)
* @param {T | State<T>} [next] Transition options
* @param {Flags} [flags] Collections flags to add token to
* @param {Collections<T>} [groups] Master list of token groups
* @returns {State<T>} taken after the given input
*/
tr(regexp, next, flags, groups) {
groups = groups || State.groups;
let nextState;
if (next && next.j) {
nextState = next;
} else {
// Token with maybe token groups
nextState = new State(next);
if (flags && groups) {
addToGroups(next, flags, groups);
}
}
this.jr.push([regexp, nextState]);
return nextState;
},
/**
* Short for "take transitions", will take as many sequential transitions as
* the length of the given input and returns the
* resulting final state.
* @param {string | string[]} input
* @param {T | State<T>} [next] Transition options
* @param {Flags} [flags] Collections flags to add token to
* @param {Collections<T>} [groups] Master list of token groups
* @returns {State<T>} taken after the given input
*/
ts(input, next, flags, groups) {
let state = this;
const len = input.length;
if (!len) {
return state;
}
for (let i = 0; i < len - 1; i++) {
state = state.tt(input[i]);
}
return state.tt(input[len - 1], next, flags, groups);
},
/**
* Short for "take transition", this is a method for building/working with
* state machines.
*
* If a state already exists for the given input, returns it.
*
* If a token is specified, that state will emit that token when reached by
* the linkify engine.
*
* If no state exists, it will be initialized with some default transitions
* that resemble existing default transitions.
*
* If a state is given for the second argument, that state will be
* transitioned to on the given input regardless of what that input
* previously did.
*
* Specify a token group flags to define groups that this token belongs to.
* The token will be added to corresponding entires in the given groups
* object.
*
* @param {string} input character, token type to transition on
* @param {T | State<T>} [next] Transition options
* @param {Flags} [flags] Collections flags to add token to
* @param {Collections<T>} [groups] Master list of groups
* @returns {State<T>} taken after the given input
*/
tt(input, next, flags, groups) {
groups = groups || State.groups;
const state = this;
// Check if existing state given, just a basic transition
if (next && next.j) {
state.j[input] = next;
return next;
}
const t = next;
// Take the transition with the usual default mechanisms and use that as
// a template for creating the next state
let nextState,
templateState = state.go(input);
if (templateState) {
nextState = new State();
Object.assign(nextState.j, templateState.j);
nextState.jr.push.apply(nextState.jr, templateState.jr);
nextState.jd = templateState.jd;
nextState.t = templateState.t;
} else {
nextState = new State();
}
if (t) {
// Ensure newly token is in the same groups as the old token
if (groups) {
if (nextState.t && typeof nextState.t === 'string') {
const allFlags = Object.assign(flagsForToken(nextState.t, groups), flags);
addToGroups(t, allFlags, groups);
} else if (flags) {
addToGroups(t, flags, groups);
}
}
nextState.t = t; // overwrite anything that was previously there
}
state.j[input] = nextState;
return nextState;
}
};
// Helper functions to improve minification (not exported outside linkifyjs module)
/**
* @template T
* @param {State<T>} state
* @param {string | string[]} input
* @param {Flags} [flags]
* @param {Collections<T>} [groups]
*/
const ta = (state, input, next, flags, groups) => state.ta(input, next, flags, groups);
/**
* @template T
* @param {State<T>} state
* @param {RegExp} regexp
* @param {T | State<T>} [next]
* @param {Flags} [flags]
* @param {Collections<T>} [groups]
*/
const tr = (state, regexp, next, flags, groups) => state.tr(regexp, next, flags, groups);
/**
* @template T
* @param {State<T>} state
* @param {string | string[]} input
* @param {T | State<T>} [next]
* @param {Flags} [flags]
* @param {Collections<T>} [groups]
*/
const ts = (state, input, next, flags, groups) => state.ts(input, next, flags, groups);
/**
* @template T
* @param {State<T>} state
* @param {string} input
* @param {T | State<T>} [next]
* @param {Collections<T>} [groups]
* @param {Flags} [flags]
*/
const tt = (state, input, next, flags, groups) => state.tt(input, next, flags, groups);
/******************************************************************************
Text Tokens
Identifiers for token outputs from the regexp scanner
******************************************************************************/
// A valid web domain token
const WORD = 'WORD'; // only contains a-z
const UWORD = 'UWORD'; // contains letters other than a-z, used for IDN
const ASCIINUMERICAL = 'ASCIINUMERICAL'; // contains a-z, 0-9
const ALPHANUMERICAL = 'ALPHANUMERICAL'; // contains numbers and letters other than a-z, used for IDN
// Special case of word
const LOCALHOST = 'LOCALHOST';
// Valid top-level domain, special case of WORD (see tlds.js)
const TLD = 'TLD';
// Valid IDN TLD, special case of UWORD (see tlds.js)
const UTLD = 'UTLD';
// The scheme portion of a web URI protocol. Supported types include: `mailto`,
// `file`, and user-defined custom protocols. Limited to schemes that contain
// only letters
const SCHEME = 'SCHEME';
// Similar to SCHEME, except makes distinction for schemes that must always be
// followed by `://`, not just `:`. Supported types include `http`, `https`,
// `ftp`, `ftps`
const SLASH_SCHEME = 'SLASH_SCHEME';
// Any sequence of digits 0-9
const NUM = 'NUM';
// Any number of consecutive whitespace characters that are not newline
const WS = 'WS';
// New line (unix style)
const NL = 'NL'; // \n
// Opening/closing bracket classes
// TODO: Rename OPEN -> LEFT and CLOSE -> RIGHT in v5 to fit with Unicode names
// Also rename angle brackes to LESSTHAN and GREATER THAN
const OPENBRACE = 'OPENBRACE'; // {
const CLOSEBRACE = 'CLOSEBRACE'; // }
const OPENBRACKET = 'OPENBRACKET'; // [
const CLOSEBRACKET = 'CLOSEBRACKET'; // ]
const OPENPAREN = 'OPENPAREN'; // (
const CLOSEPAREN = 'CLOSEPAREN'; // )
const OPENANGLEBRACKET = 'OPENANGLEBRACKET'; // <
const CLOSEANGLEBRACKET = 'CLOSEANGLEBRACKET'; // >
const FULLWIDTHLEFTPAREN = 'FULLWIDTHLEFTPAREN'; //
const FULLWIDTHRIGHTPAREN = 'FULLWIDTHRIGHTPAREN'; //
const LEFTCORNERBRACKET = 'LEFTCORNERBRACKET'; // 「
const RIGHTCORNERBRACKET = 'RIGHTCORNERBRACKET'; // 」
const LEFTWHITECORNERBRACKET = 'LEFTWHITECORNERBRACKET'; // 『
const RIGHTWHITECORNERBRACKET = 'RIGHTWHITECORNERBRACKET'; // 』
const FULLWIDTHLESSTHAN = 'FULLWIDTHLESSTHAN'; //
const FULLWIDTHGREATERTHAN = 'FULLWIDTHGREATERTHAN'; //
// Various symbols
const AMPERSAND = 'AMPERSAND'; // &
const APOSTROPHE = 'APOSTROPHE'; // '
const ASTERISK = 'ASTERISK'; // *
const AT = 'AT'; // @
const BACKSLASH = 'BACKSLASH'; // \
const BACKTICK = 'BACKTICK'; // `
const CARET = 'CARET'; // ^
const COLON = 'COLON'; // :
const COMMA = 'COMMA'; // ,
const DOLLAR = 'DOLLAR'; // $
const DOT = 'DOT'; // .
const EQUALS = 'EQUALS'; // =
const EXCLAMATION = 'EXCLAMATION'; // !
const HYPHEN = 'HYPHEN'; // -
const PERCENT = 'PERCENT'; // %
const PIPE = 'PIPE'; // |
const PLUS = 'PLUS'; // +
const POUND = 'POUND'; // #
const QUERY = 'QUERY'; // ?
const QUOTE = 'QUOTE'; // "
const FULLWIDTHMIDDLEDOT = 'FULLWIDTHMIDDLEDOT'; // ・
const SEMI = 'SEMI'; // ;
const SLASH = 'SLASH'; // /
const TILDE = 'TILDE'; // ~
const UNDERSCORE = 'UNDERSCORE'; // _
// Emoji symbol
const EMOJI$1 = 'EMOJI';
// Default token - anything that is not one of the above
const SYM = 'SYM';
var tk = /*#__PURE__*/Object.freeze({
__proto__: null,
ALPHANUMERICAL: ALPHANUMERICAL,
AMPERSAND: AMPERSAND,
APOSTROPHE: APOSTROPHE,
ASCIINUMERICAL: ASCIINUMERICAL,
ASTERISK: ASTERISK,
AT: AT,
BACKSLASH: BACKSLASH,
BACKTICK: BACKTICK,
CARET: CARET,
CLOSEANGLEBRACKET: CLOSEANGLEBRACKET,
CLOSEBRACE: CLOSEBRACE,
CLOSEBRACKET: CLOSEBRACKET,
CLOSEPAREN: CLOSEPAREN,
COLON: COLON,
COMMA: COMMA,
DOLLAR: DOLLAR,
DOT: DOT,
EMOJI: EMOJI$1,
EQUALS: EQUALS,
EXCLAMATION: EXCLAMATION,
FULLWIDTHGREATERTHAN: FULLWIDTHGREATERTHAN,
FULLWIDTHLEFTPAREN: FULLWIDTHLEFTPAREN,
FULLWIDTHLESSTHAN: FULLWIDTHLESSTHAN,
FULLWIDTHMIDDLEDOT: FULLWIDTHMIDDLEDOT,
FULLWIDTHRIGHTPAREN: FULLWIDTHRIGHTPAREN,
HYPHEN: HYPHEN,
LEFTCORNERBRACKET: LEFTCORNERBRACKET,
LEFTWHITECORNERBRACKET: LEFTWHITECORNERBRACKET,
LOCALHOST: LOCALHOST,
NL: NL,
NUM: NUM,
OPENANGLEBRACKET: OPENANGLEBRACKET,
OPENBRACE: OPENBRACE,
OPENBRACKET: OPENBRACKET,
OPENPAREN: OPENPAREN,
PERCENT: PERCENT,
PIPE: PIPE,
PLUS: PLUS,
POUND: POUND,
QUERY: QUERY,
QUOTE: QUOTE,
RIGHTCORNERBRACKET: RIGHTCORNERBRACKET,
RIGHTWHITECORNERBRACKET: RIGHTWHITECORNERBRACKET,
SCHEME: SCHEME,
SEMI: SEMI,
SLASH: SLASH,
SLASH_SCHEME: SLASH_SCHEME,
SYM: SYM,
TILDE: TILDE,
TLD: TLD,
UNDERSCORE: UNDERSCORE,
UTLD: UTLD,
UWORD: UWORD,
WORD: WORD,
WS: WS
});
// Note that these two Unicode ones expand into a really big one with Babel
const ASCII_LETTER = /[a-z]/;
const LETTER = /\p{L}/u; // Any Unicode character with letter data type
const EMOJI = /\p{Emoji}/u; // Any Unicode emoji character
const EMOJI_VARIATION$1 = /\ufe0f/;
const DIGIT = /\d/;
const SPACE = /\s/;
var regexp = /*#__PURE__*/Object.freeze({
__proto__: null,
ASCII_LETTER: ASCII_LETTER,
DIGIT: DIGIT,
EMOJI: EMOJI,
EMOJI_VARIATION: EMOJI_VARIATION$1,
LETTER: LETTER,
SPACE: SPACE
});
/**
The scanner provides an interface that takes a string of text as input, and
outputs an array of tokens instances that can be used for easy URL parsing.
*/
const CR = '\r'; // carriage-return character
const LF = '\n'; // line-feed character
const EMOJI_VARIATION = '\ufe0f'; // Variation selector, follows heart and others
const EMOJI_JOINER = '\u200d'; // zero-width joiner
const OBJECT_REPLACEMENT = '\ufffc'; // whitespace placeholder that sometimes appears in rich text editors
let tlds = null,
utlds = null; // don't change so only have to be computed once
/**
* Scanner output token:
* - `t` is the token name (e.g., 'NUM', 'EMOJI', 'TLD')
* - `v` is the value of the token (e.g., '123', '❤️', 'com')
* - `s` is the start index of the token in the original string
* - `e` is the end index of the token in the original string
* @typedef {{t: string, v: string, s: number, e: number}} Token
*/
/**
* @template T
* @typedef {{ [collection: string]: T[] }} Collections
*/
/**
* Initialize the scanner character-based state machine for the given start
* state
* @param {[string, boolean][]} customSchemes List of custom schemes, where each
* item is a length-2 tuple with the first element set to the string scheme, and
* the second element set to `true` if the `://` after the scheme is optional
*/
function init$2(customSchemes = []) {
// Frequently used states (name argument removed during minification)
/** @type Collections<string> */
const groups = {}; // of tokens
State.groups = groups;
/** @type State<string> */
const Start = new State();
if (tlds == null) {
tlds = decodeTlds(encodedTlds);
}
if (utlds == null) {
utlds = decodeTlds(encodedUtlds);
}
// States for special URL symbols that accept immediately after start
tt(Start, "'", APOSTROPHE);
tt(Start, '{', OPENBRACE);
tt(Start, '}', CLOSEBRACE);
tt(Start, '[', OPENBRACKET);
tt(Start, ']', CLOSEBRACKET);
tt(Start, '(', OPENPAREN);
tt(Start, ')', CLOSEPAREN);
tt(Start, '<', OPENANGLEBRACKET);
tt(Start, '>', CLOSEANGLEBRACKET);
tt(Start, '', FULLWIDTHLEFTPAREN);
tt(Start, '', FULLWIDTHRIGHTPAREN);
tt(Start, '「', LEFTCORNERBRACKET);
tt(Start, '」', RIGHTCORNERBRACKET);
tt(Start, '『', LEFTWHITECORNERBRACKET);
tt(Start, '』', RIGHTWHITECORNERBRACKET);
tt(Start, '', FULLWIDTHLESSTHAN);
tt(Start, '', FULLWIDTHGREATERTHAN);
tt(Start, '&', AMPERSAND);
tt(Start, '*', ASTERISK);
tt(Start, '@', AT);
tt(Start, '`', BACKTICK);
tt(Start, '^', CARET);
tt(Start, ':', COLON);
tt(Start, ',', COMMA);
tt(Start, '$', DOLLAR);
tt(Start, '.', DOT);
tt(Start, '=', EQUALS);
tt(Start, '!', EXCLAMATION);
tt(Start, '-', HYPHEN);
tt(Start, '%', PERCENT);
tt(Start, '|', PIPE);
tt(Start, '+', PLUS);
tt(Start, '#', POUND);
tt(Start, '?', QUERY);
tt(Start, '"', QUOTE);
tt(Start, '/', SLASH);
tt(Start, ';', SEMI);
tt(Start, '~', TILDE);
tt(Start, '_', UNDERSCORE);
tt(Start, '\\', BACKSLASH);
tt(Start, '・', FULLWIDTHMIDDLEDOT);
const Num = tr(Start, DIGIT, NUM, {
[numeric]: true
});
tr(Num, DIGIT, Num);
const Asciinumeric = tr(Num, ASCII_LETTER, ASCIINUMERICAL, {
[asciinumeric]: true
});
const Alphanumeric = tr(Num, LETTER, ALPHANUMERICAL, {
[alphanumeric]: true
});
// State which emits a word token
const Word = tr(Start, ASCII_LETTER, WORD, {
[ascii]: true
});
tr(Word, DIGIT, Asciinumeric);
tr(Word, ASCII_LETTER, Word);
tr(Asciinumeric, DIGIT, Asciinumeric);
tr(Asciinumeric, ASCII_LETTER, Asciinumeric);
// Same as previous, but specific to non-fsm.ascii alphabet words
const UWord = tr(Start, LETTER, UWORD, {
[alpha]: true
});
tr(UWord, ASCII_LETTER); // Non-accepting
tr(UWord, DIGIT, Alphanumeric);
tr(UWord, LETTER, UWord);
tr(Alphanumeric, DIGIT, Alphanumeric);
tr(Alphanumeric, ASCII_LETTER); // Non-accepting
tr(Alphanumeric, LETTER, Alphanumeric); // Non-accepting
// Whitespace jumps
// Tokens of only non-newline whitespace are arbitrarily long
// If any whitespace except newline, more whitespace!
const Nl = tt(Start, LF, NL, {
[whitespace]: true
});
const Cr = tt(Start, CR, WS, {
[whitespace]: true
});
const Ws = tr(Start, SPACE, WS, {
[whitespace]: true
});
tt(Start, OBJECT_REPLACEMENT, Ws);
tt(Cr, LF, Nl); // \r\n
tt(Cr, OBJECT_REPLACEMENT, Ws);
tr(Cr, SPACE, Ws);
tt(Ws, CR); // non-accepting state to avoid mixing whitespaces
tt(Ws, LF); // non-accepting state to avoid mixing whitespaces
tr(Ws, SPACE, Ws);
tt(Ws, OBJECT_REPLACEMENT, Ws);
// Emoji tokens. They are not grouped by the scanner except in cases where a
// zero-width joiner is present
const Emoji = tr(Start, EMOJI, EMOJI$1, {
[emoji]: true
});
tt(Emoji, '#'); // no transition, emoji regex seems to match #
tr(Emoji, EMOJI, Emoji);
tt(Emoji, EMOJI_VARIATION, Emoji);
// tt(Start, EMOJI_VARIATION, Emoji); // This one is sketchy
const EmojiJoiner = tt(Emoji, EMOJI_JOINER);
tt(EmojiJoiner, '#');
tr(EmojiJoiner, EMOJI, Emoji);
// tt(EmojiJoiner, EMOJI_VARIATION, Emoji); // also sketchy
// Generates states for top-level domains
// Note that this is most accurate when tlds are in alphabetical order
const wordjr = [[ASCII_LETTER, Word], [DIGIT, Asciinumeric]];
const uwordjr = [[ASCII_LETTER, null], [LETTER, UWord], [DIGIT, Alphanumeric]];
for (let i = 0; i < tlds.length; i++) {
fastts(Start, tlds[i], TLD, WORD, wordjr);
}
for (let i = 0; i < utlds.length; i++) {
fastts(Start, utlds[i], UTLD, UWORD, uwordjr);
}
addToGroups(TLD, {
tld: true,
ascii: true
}, groups);
addToGroups(UTLD, {
utld: true,
alpha: true
}, groups);
// Collect the states generated by different protocols. NOTE: If any new TLDs
// get added that are also protocols, set the token to be the same as the
// protocol to ensure parsing works as expected.
fastts(Start, 'file', SCHEME, WORD, wordjr);
fastts(Start, 'mailto', SCHEME, WORD, wordjr);
fastts(Start, 'http', SLASH_SCHEME, WORD, wordjr);
fastts(Start, 'https', SLASH_SCHEME, WORD, wordjr);
fastts(Start, 'ftp', SLASH_SCHEME, WORD, wordjr);
fastts(Start, 'ftps', SLASH_SCHEME, WORD, wordjr);
addToGroups(SCHEME, {
scheme: true,
ascii: true
}, groups);
addToGroups(SLASH_SCHEME, {
slashscheme: true,
ascii: true
}, groups);
// Register custom schemes. Assumes each scheme is asciinumeric with hyphens
customSchemes = customSchemes.sort((a, b) => a[0] > b[0] ? 1 : -1);
for (let i = 0; i < customSchemes.length; i++) {
const sch = customSchemes[i][0];
const optionalSlashSlash = customSchemes[i][1];
const flags = optionalSlashSlash ? {
[scheme]: true
} : {
[slashscheme]: true
};
if (sch.indexOf('-') >= 0) {
flags[domain] = true;
} else if (!ASCII_LETTER.test(sch)) {
flags[numeric] = true; // numbers only
} else if (DIGIT.test(sch)) {
flags[asciinumeric] = true;
} else {
flags[ascii] = true;
}
ts(Start, sch, sch, flags);
}
// Localhost token
ts(Start, 'localhost', LOCALHOST, {
ascii: true
});
// Set default transition for start state (some symbol)
Start.jd = new State(SYM);
return {
start: Start,
tokens: Object.assign({
groups
}, tk)
};
}
/**
Given a string, returns an array of TOKEN instances representing the
composition of that string.
@method run
@param {State<string>} start scanner starting state
@param {string} str input string to scan
@return {Token[]} list of tokens, each with a type and value
*/
function run$1(start, str) {
// State machine is not case sensitive, so input is tokenized in lowercased
// form (still returns regular case). Uses selective `toLowerCase` because
// lowercasing the entire string causes the length and character position to
// vary in some non-English strings with V8-based runtimes.
const iterable = stringToArray(str.replace(/[A-Z]/g, c => c.toLowerCase()));
const charCount = iterable.length; // <= len if there are emojis, etc
const tokens = []; // return value
// cursor through the string itself, accounting for characters that have
// width with length 2 such as emojis
let cursor = 0;
// Cursor through the array-representation of the string
let charCursor = 0;
// Tokenize the string
while (charCursor < charCount) {
let state = start;
let nextState = null;
let tokenLength = 0;
let latestAccepting = null;
let sinceAccepts = -1;
let charsSinceAccepts = -1;
while (charCursor < charCount && (nextState = state.go(iterable[charCursor]))) {
state = nextState;
// Keep track of the latest accepting state
if (state.accepts()) {
sinceAccepts = 0;
charsSinceAccepts = 0;
latestAccepting = state;
} else if (sinceAccepts >= 0) {
sinceAccepts += iterable[charCursor].length;
charsSinceAccepts++;
}
tokenLength += iterable[charCursor].length;
cursor += iterable[charCursor].length;
charCursor++;
}
// Roll back to the latest accepting state
cursor -= sinceAccepts;
charCursor -= charsSinceAccepts;
tokenLength -= sinceAccepts;
// No more jumps, just make a new token from the last accepting one
tokens.push({
t: latestAccepting.t,
// token type/name
v: str.slice(cursor - tokenLength, cursor),
// string value
s: cursor - tokenLength,
// start index
e: cursor // end index (excluding)
});
}
return tokens;
}
/**
* Convert a String to an Array of characters, taking into account that some
* characters like emojis take up two string indexes.
*
* Adapted from core-js (MIT license)
* https://github.com/zloirock/core-js/blob/2d69cf5f99ab3ea3463c395df81e5a15b68f49d9/packages/core-js/internals/string-multibyte.js
*
* @function stringToArray
* @param {string} str
* @returns {string[]}
*/
function stringToArray(str) {
const result = [];
const len = str.length;
let index = 0;
while (index < len) {
let first = str.charCodeAt(index);
let second;
let char = first < 0xd800 || first > 0xdbff || index + 1 === len || (second = str.charCodeAt(index + 1)) < 0xdc00 || second > 0xdfff ? str[index] // single character
: str.slice(index, index + 2); // two-index characters
result.push(char);
index += char.length;
}
return result;
}
/**
* Fast version of ts function for when transition defaults are well known
* @param {State<string>} state
* @param {string} input
* @param {string} t
* @param {string} defaultt
* @param {[RegExp, State<string>][]} jr
* @returns {State<string>}
*/
function fastts(state, input, t, defaultt, jr) {
let next;
const len = input.length;
for (let i = 0; i < len - 1; i++) {
const char = input[i];
if (state.j[char]) {
next = state.j[char];
} else {
next = new State(defaultt);
next.jr = jr.slice();
state.j[char] = next;
}
state = next;
}
next = new State(t);
next.jr = jr.slice();
state.j[input[len - 1]] = next;
return next;
}
/**
* Converts a string of Top-Level Domain names encoded in update-tlds.js back
* into a list of strings.
* @param {str} encoded encoded TLDs string
* @returns {str[]} original TLDs list
*/
function decodeTlds(encoded) {
const words = [];
const stack = [];
let i = 0;
let digits = '0123456789';
while (i < encoded.length) {
let popDigitCount = 0;
while (digits.indexOf(encoded[i + popDigitCount]) >= 0) {
popDigitCount++; // encountered some digits, have to pop to go one level up trie
}
if (popDigitCount > 0) {
words.push(stack.join('')); // whatever preceded the pop digits must be a word
for (let popCount = parseInt(encoded.substring(i, i + popDigitCount), 10); popCount > 0; popCount--) {
stack.pop();
}
i += popDigitCount;
} else {
stack.push(encoded[i]); // drop down a level into the trie
i++;
}
}
return words;
}
/**
* An object where each key is a valid DOM Event Name such as `click` or `focus`
* and each value is an event handler function.
*
* https://developer.mozilla.org/en-US/docs/Web/API/Element#events
* @typedef {?{ [event: string]: Function }} EventListeners
*/
/**
* All formatted properties required to render a link, including `tagName`,
* `attributes`, `content` and `eventListeners`.
* @typedef {{ tagName: any, attributes: {[attr: string]: any}, content: string,
* eventListeners: EventListeners }} IntermediateRepresentation
*/
/**
* Specify either an object described by the template type `O` or a function.
*
* The function takes a string value (usually the link's href attribute), the
* link type (`'url'`, `'hashtag`', etc.) and an internal token representation
* of the link. It should return an object of the template type `O`
* @template O
* @typedef {O | ((value: string, type: string, token: MultiToken) => O)} OptObj
*/
/**
* Specify either a function described by template type `F` or an object.
*
* Each key in the object should be a link type (`'url'`, `'hashtag`', etc.). Each
* value should be a function with template type `F` that is called when the
* corresponding link type is encountered.
* @template F
* @typedef {F | { [type: string]: F}} OptFn
*/
/**
* Specify either a value with template type `V`, a function that returns `V` or
* an object where each value resolves to `V`.
*
* The function takes a string value (usually the link's href attribute), the
* link type (`'url'`, `'hashtag`', etc.) and an internal token representation
* of the link. It should return an object of the template type `V`
*
* For the object, each key should be a link type (`'url'`, `'hashtag`', etc.).
* Each value should either have type `V` or a function that returns V. This
* function similarly takes a string value and a token.
*
* Example valid types for `Opt<string>`:
*
* ```js
* 'hello'
* (value, type, token) => 'world'
* { url: 'hello', email: (value, token) => 'world'}
* ```
* @template V
* @typedef {V | ((value: string, type: string, token: MultiToken) => V) | { [type: string]: V | ((value: string, token: MultiToken) => V) }} Opt
*/
/**
* See available options: https://linkify.js.org/docs/options.html
* @typedef {{
* defaultProtocol?: string,
* events?: OptObj<EventListeners>,
* format?: Opt<string>,
* formatHref?: Opt<string>,
* nl2br?: boolean,
* tagName?: Opt<any>,
* target?: Opt<string>,
* rel?: Opt<string>,
* validate?: Opt<boolean>,
* truncate?: Opt<number>,
* className?: Opt<string>,
* attributes?: OptObj<({ [attr: string]: any })>,
* ignoreTags?: string[],
* render?: OptFn<((ir: IntermediateRepresentation) => any)>
* }} Opts
*/
/**
* @type Required<Opts>
*/
const defaults = {
defaultProtocol: 'http',
events: null,
format: noop,
formatHref: noop,
nl2br: false,
tagName: 'a',
target: null,
rel: null,
validate: true,
truncate: Infinity,
className: null,
attributes: null,
ignoreTags: [],
render: null
};
/**
* Utility class for linkify interfaces to apply specified
* {@link Opts formatting and rendering options}.
*
* @param {Opts | Options} [opts] Option value overrides.
* @param {(ir: IntermediateRepresentation) => any} [defaultRender] (For
* internal use) default render function that determines how to generate an
* HTML element based on a link token's derived tagName, attributes and HTML.
* Similar to render option
*/
function Options(opts, defaultRender = null) {
let o = Object.assign({}, defaults);
if (opts) {
o = Object.assign(o, opts instanceof Options ? opts.o : opts);
}
// Ensure all ignored tags are uppercase
const ignoredTags = o.ignoreTags;
const uppercaseIgnoredTags = [];
for (let i = 0; i < ignoredTags.length; i++) {
uppercaseIgnoredTags.push(ignoredTags[i].toUpperCase());
}
/** @protected */
this.o = o;
if (defaultRender) {
this.defaultRender = defaultRender;
}
this.ignoreTags = uppercaseIgnoredTags;
}
Options.prototype = {
o: defaults,
/**
* @type string[]
*/
ignoreTags: [],
/**
* @param {IntermediateRepresentation} ir
* @returns {any}
*/
defaultRender(ir) {
return ir;
},
/**
* Returns true or false based on whether a token should be displayed as a
* link based on the user options.
* @param {MultiToken} token
* @returns {boolean}
*/
check(token) {
return this.get('validate', token.toString(), token);
},
// Private methods
/**
* Resolve an option's value based on the value of the option and the given
* params. If operator and token are specified and the target option is
* callable, automatically calls the function with the given argument.
* @template {keyof Opts} K
* @param {K} key Name of option to use
* @param {string} [operator] will be passed to the target option if it's a
* function. If not specified, RAW function value gets returned
* @param {MultiToken} [token] The token from linkify.tokenize
* @returns {Opts[K] | any}
*/
get(key, operator, token) {
const isCallable = operator != null;
let option = this.o[key];
if (!option) {
return option;
}
if (typeof option === 'object') {
option = token.t in option ? option[token.t] : defaults[key];
if (typeof option === 'function' && isCallable) {
option = option(operator, token);
}
} else if (typeof option === 'function' && isCallable) {
option = option(operator, token.t, token);
}
return option;
},
/**
* @template {keyof Opts} L
* @param {L} key Name of options object to use
* @param {string} [operator]
* @param {MultiToken} [token]
* @returns {Opts[L] | any}
*/
getObj(key, operator, token) {
let obj = this.o[key];
if (typeof obj === 'function' && operator != null) {
obj = obj(operator, token.t, token);
}
return obj;
},
/**
* Convert the given token to a rendered element that may be added to the
* calling-interface's DOM
* @param {MultiToken} token Token to render to an HTML element
* @returns {any} Render result; e.g., HTML string, DOM element, React
* Component, etc.
*/
render(token) {
const ir = token.render(this); // intermediate representation
const renderFn = this.get('render', null, token) || this.defaultRender;
return renderFn(ir, token.t, token);
}
};
function noop(val) {
return val;
}
var options = /*#__PURE__*/Object.freeze({
__proto__: null,
Options: Options,
defaults: defaults
});
/******************************************************************************
Multi-Tokens
Tokens composed of arrays of TextTokens
******************************************************************************/
/**
* @param {string} value
* @param {Token[]} tokens
*/
function MultiToken(value, tokens) {
this.t = 'token';
this.v = value;
this.tk = tokens;
}
/**
* Abstract class used for manufacturing tokens of text tokens. That is rather
* than the value for a token being a small string of text, it's value an array
* of text tokens.
*
* Used for grouping together URLs, emails, hashtags, and other potential
* creations.
* @class MultiToken
* @property {string} t
* @property {string} v
* @property {Token[]} tk
* @abstract
*/
MultiToken.prototype = {
isLink: false,
/**
* Return the string this token represents.
* @return {string}
*/
toString() {
return this.v;
},
/**
* What should the value for this token be in the `href` HTML attribute?
* Returns the `.toString` value by default.
* @param {string} [scheme]
* @return {string}
*/
toHref(scheme) {
return this.toString();
},
/**
* @param {Options} options Formatting options
* @returns {string}
*/
toFormattedString(options) {
const val = this.toString();
const truncate = options.get('truncate', val, this);
const formatted = options.get('format', val, this);
return truncate && formatted.length > truncate ? formatted.substring(0, truncate) + '…' : formatted;
},
/**
*
* @param {Options} options
* @returns {string}
*/
toFormattedHref(options) {
return options.get('formatHref', this.toHref(options.get('defaultProtocol')), this);
},
/**
* The start index of this token in the original input string
* @returns {number}
*/
startIndex() {
return this.tk[0].s;
},
/**
* The end index of this token in the original input string (up to this
* index but not including it)
* @returns {number}
*/
endIndex() {
return this.tk[this.tk.length - 1].e;
},
/**
Returns an object of relevant values for this token, which includes keys
* type - Kind of token ('url', 'email', etc.)
* value - Original text
* href - The value that should be added to the anchor tag's href
attribute
@method toObject
@param {string} [protocol] `'http'` by default
*/
toObject(protocol = defaults.defaultProtocol) {
return {
type: this.t,
value: this.toString(),
isLink: this.isLink,
href: this.toHref(protocol),
start: this.startIndex(),
end: this.endIndex()
};
},
/**
*
* @param {Options} options Formatting option
*/
toFormattedObject(options) {
return {
type: this.t,
value: this.toFormattedString(options),
isLink: this.isLink,
href: this.toFormattedHref(options),
start: this.startIndex(),
end: this.endIndex()
};
},
/**
* Whether this token should be rendered as a link according to the given options
* @param {Options} options
* @returns {boolean}
*/
validate(options) {
return options.get('validate', this.toString(), this);
},
/**
* Return an object that represents how this link should be rendered.
* @param {Options} options Formattinng options
*/
render(options) {
const token = this;
const href = this.toHref(options.get('defaultProtocol'));
const formattedHref = options.get('formatHref', href, this);
const tagName = options.get('tagName', href, token);
const content = this.toFormattedString(options);
const attributes = {};
const className = options.get('className', href, token);
const target = options.get('target', href, token);
const rel = options.get('rel', href, token);
const attrs = options.getObj('attributes', href, token);
const eventListeners = options.getObj('events', href, token);
attributes.href = formattedHref;
if (className) {
attributes.class = className;
}
if (target) {
attributes.target = target;
}
if (rel) {
attributes.rel = rel;
}
if (attrs) {
Object.assign(attributes, attrs);
}
return {
tagName,
attributes,
content,
eventListeners
};
}
};
/**
* Create a new token that can be emitted by the parser state machine
* @param {string} type readable type of the token
* @param {object} props properties to assign or override, including isLink = true or false
* @returns {new (value: string, tokens: Token[]) => MultiToken} new token class
*/
function createTokenClass(type, props) {
class Token extends MultiToken {
constructor(value, tokens) {
super(value, tokens);
this.t = type;
}
}
for (const p in props) {
Token.prototype[p] = props[p];
}
Token.t = type;
return Token;
}
/**
Represents a list of tokens making up a valid email address
*/
const Email = createTokenClass('email', {
isLink: true,
toHref() {
return 'mailto:' + this.toString();
}
});
/**
Represents some plain text
*/
const Text = createTokenClass('text');
/**
Multi-linebreak token - represents a line break
@class Nl
*/
const Nl = createTokenClass('nl');
/**
Represents a list of text tokens making up a valid URL
@class Url
*/
const Url = createTokenClass('url', {
isLink: true,
/**
Lowercases relevant parts of the domain and adds the protocol if
required. Note that this will not escape unsafe HTML characters in the
URL.
@param {string} [scheme] default scheme (e.g., 'https')
@return {string} the full href
*/
toHref(scheme = defaults.defaultProtocol) {
// Check if already has a prefix scheme
return this.hasProtocol() ? this.v : `${scheme}://${this.v}`;
},
/**
* Check whether this URL token has a protocol
* @return {boolean}
*/
hasProtocol() {
const tokens = this.tk;
return tokens.length >= 2 && tokens[0].t !== LOCALHOST && tokens[1].t === COLON;
}
});
var multi = /*#__PURE__*/Object.freeze({
__proto__: null,
Base: MultiToken,
Email: Email,
MultiToken: MultiToken,
Nl: Nl,
Text: Text,
Url: Url,
createTokenClass: createTokenClass
});
/**
Not exactly parser, more like the second-stage scanner (although we can
theoretically hotswap the code here with a real parser in the future... but
for a little URL-finding utility abstract syntax trees may be a little
overkill).
URL format: http://en.wikipedia.org/wiki/URI_scheme
Email format: http://en.wikipedia.org/wiki/EmailAddress (links to RFC in
reference)
@module linkify
@submodule parser
@main run
*/
const makeState = arg => new State(arg);
/**
* Generate the parser multi token-based state machine
* @param {{ groups: Collections<string> }} tokens
*/
function init$1({
groups
}) {
// Types of characters the URL can definitely end in
const qsAccepting = groups.domain.concat([AMPERSAND, ASTERISK, AT, BACKSLASH, BACKTICK, CARET, DOLLAR, EQUALS, HYPHEN, NUM, PERCENT, PIPE, PLUS, POUND, SLASH, SYM, TILDE, UNDERSCORE]);
// Types of tokens that can follow a URL and be part of the query string
// but cannot be the very last characters
// Characters that cannot appear in the URL at all should be excluded
const qsNonAccepting = [APOSTROPHE, COLON, COMMA, DOT, EXCLAMATION, PERCENT, QUERY, QUOTE, SEMI, OPENANGLEBRACKET, CLOSEANGLEBRACKET, OPENBRACE, CLOSEBRACE, CLOSEBRACKET, OPENBRACKET, OPENPAREN, CLOSEPAREN, FULLWIDTHLEFTPAREN, FULLWIDTHRIGHTPAREN, LEFTCORNERBRACKET, RIGHTCORNERBRACKET, LEFTWHITECORNERBRACKET, RIGHTWHITECORNERBRACKET, FULLWIDTHLESSTHAN, FULLWIDTHGREATERTHAN];
// For addresses without the mailto prefix
// Tokens allowed in the localpart of the email
const localpartAccepting = [AMPERSAND, APOSTROPHE, ASTERISK, BACKSLASH, BACKTICK, CARET, DOLLAR, EQUALS, HYPHEN, OPENBRACE, CLOSEBRACE, PERCENT, PIPE, PLUS, POUND, QUERY, SLASH, SYM, TILDE, UNDERSCORE];
// The universal starting state.
/**
* @type State<Token>
*/
const Start = makeState();
const Localpart = tt(Start, TILDE); // Local part of the email address
ta(Localpart, localpartAccepting, Localpart);
ta(Localpart, groups.domain, Localpart);
const Domain = makeState(),
Scheme = makeState(),
SlashScheme = makeState();
ta(Start, groups.domain, Domain); // parsed string ends with a potential domain name (A)
ta(Start, groups.scheme, Scheme); // e.g., 'mailto'
ta(Start, groups.slashscheme, SlashScheme); // e.g., 'http'
ta(Domain, localpartAccepting, Localpart);
ta(Domain, groups.domain, Domain);
const LocalpartAt = tt(Domain, AT); // Local part of the email address plus @
tt(Localpart, AT, LocalpartAt); // close to an email address now
// Local part of an email address can be e.g. 'http' or 'mailto'
tt(Scheme, AT, LocalpartAt);
tt(SlashScheme, AT, LocalpartAt);
const LocalpartDot = tt(Localpart, DOT); // Local part of the email address plus '.' (localpart cannot end in .)
ta(LocalpartDot, localpartAccepting, Localpart);
ta(LocalpartDot, groups.domain, Localpart);
const EmailDomain = makeState();
ta(LocalpartAt, groups.domain, EmailDomain); // parsed string starts with local email info + @ with a potential domain name
ta(EmailDomain, groups.domain, EmailDomain);
const EmailDomainDot = tt(EmailDomain, DOT); // domain followed by DOT
ta(EmailDomainDot, groups.domain, EmailDomain);
const Email$1 = makeState(Email); // Possible email address (could have more tlds)
ta(EmailDomainDot, groups.tld, Email$1);
ta(EmailDomainDot, groups.utld, Email$1);
tt(LocalpartAt, LOCALHOST, Email$1);
// Hyphen can jump back to a domain name
const EmailDomainHyphen = tt(EmailDomain, HYPHEN); // parsed string starts with local email info + @ with a potential domain name
tt(EmailDomainHyphen, HYPHEN, EmailDomainHyphen);
ta(EmailDomainHyphen, groups.domain, EmailDomain);
ta(Email$1, groups.domain, EmailDomain);
tt(Email$1, DOT, EmailDomainDot);
tt(Email$1, HYPHEN, EmailDomainHyphen);
// Final possible email states
const EmailColon = tt(Email$1, COLON); // URL followed by colon (potential port number here)
/*const EmailColonPort = */
ta(EmailColon, groups.numeric, Email); // URL followed by colon and port number
// Account for dots and hyphens. Hyphens are usually parts of domain names
// (but not TLDs)
const DomainHyphen = tt(Domain, HYPHEN); // domain followed by hyphen
const DomainDot = tt(Domain, DOT); // domain followed by DOT
tt(DomainHyphen, HYPHEN, DomainHyphen);
ta(DomainHyphen, groups.domain, Domain);
ta(DomainDot, localpartAccepting, Localpart);
ta(DomainDot, groups.domain, Domain);
const DomainDotTld = makeState(Url); // Simplest possible URL with no query string
ta(DomainDot, groups.tld, DomainDotTld);
ta(DomainDot, groups.utld, DomainDotTld);
ta(DomainDotTld, groups.domain, Domain);
ta(DomainDotTld, localpartAccepting, Localpart);
tt(DomainDotTld, DOT, DomainDot);
tt(DomainDotTld, HYPHEN, DomainHyphen);
tt(DomainDotTld, AT, LocalpartAt);
const DomainDotTldColon = tt(DomainDotTld, COLON); // URL followed by colon (potential port number here)
const DomainDotTldColonPort = makeState(Url); // TLD followed by a port number
ta(DomainDotTldColon, groups.numeric, DomainDotTldColonPort);
// Long URL with optional port and maybe query string
const Url$1 = makeState(Url);
// URL with extra symbols at the end, followed by an opening bracket
const UrlNonaccept = makeState(); // URL followed by some symbols (will not be part of the final URL)
// Query strings
ta(Url$1, qsAccepting, Url$1);
ta(Url$1, qsNonAccepting, UrlNonaccept);
ta(UrlNonaccept, qsAccepting, Url$1);
ta(UrlNonaccept, qsNonAccepting, UrlNonaccept);
// Become real URLs after `SLASH` or `COLON NUM SLASH`
// Here works with or without scheme:// prefix
tt(DomainDotTld, SLASH, Url$1);
tt(DomainDotTldColonPort, SLASH, Url$1);
// Note that domains that begin with schemes are treated slighly differently
const SchemeColon = tt(Scheme, COLON); // e.g., 'mailto:'
const SlashSchemeColon = tt(SlashScheme, COLON); // e.g., 'http:'
const SlashSchemeColonSlash = tt(SlashSchemeColon, SLASH); // e.g., 'http:/'
const UriPrefix = tt(SlashSchemeColonSlash, SLASH); // e.g., 'http://'
// Scheme states can transition to domain states
ta(Scheme, groups.domain, Domain);
tt(Scheme, DOT, DomainDot);
tt(Scheme, HYPHEN, DomainHyphen);
ta(SlashScheme, groups.domain, Domain);
tt(SlashScheme, DOT, DomainDot);
tt(SlashScheme, HYPHEN, DomainHyphen);
// Force URL with scheme prefix followed by anything sane
ta(SchemeColon, groups.domain, Url$1);
tt(SchemeColon, SLASH, Url$1);
tt(SchemeColon, QUERY, Url$1);
ta(UriPrefix, groups.domain, Url$1);
ta(UriPrefix, qsAccepting, Url$1);
tt(UriPrefix, SLASH, Url$1);
const bracketPairs = [[OPENBRACE, CLOSEBRACE],
// {}
[OPENBRACKET, CLOSEBRACKET],
// []
[OPENPAREN, CLOSEPAREN],
// ()
[OPENANGLEBRACKET, CLOSEANGLEBRACKET],
// <>
[FULLWIDTHLEFTPAREN, FULLWIDTHRIGHTPAREN],
//
[LEFTCORNERBRACKET, RIGHTCORNERBRACKET],
// 「」
[LEFTWHITECORNERBRACKET, RIGHTWHITECORNERBRACKET],
// 『』
[FULLWIDTHLESSTHAN, FULLWIDTHGREATERTHAN] //
];
for (let i = 0; i < bracketPairs.length; i++) {
const [OPEN, CLOSE] = bracketPairs[i];
const UrlOpen = tt(Url$1, OPEN); // URL followed by open bracket
// Continue not accepting for open brackets
tt(UrlNonaccept, OPEN, UrlOpen);
// Closing bracket component. This character WILL be included in the URL
tt(UrlOpen, CLOSE, Url$1);
// URL that beings with an opening bracket, followed by a symbols.
// Note that the final state can still be `UrlOpen` (if the URL has a
// single opening bracket for some reason).
const UrlOpenQ = makeState(Url);
ta(UrlOpen, qsAccepting, UrlOpenQ);
const UrlOpenSyms = makeState(); // UrlOpen followed by some symbols it cannot end it
ta(UrlOpen, qsNonAccepting);
// URL that begins with an opening bracket, followed by some symbols
ta(UrlOpenQ, qsAccepting, UrlOpenQ);
ta(UrlOpenQ, qsNonAccepting, UrlOpenSyms);
ta(UrlOpenSyms, qsAccepting, UrlOpenQ);
ta(UrlOpenSyms, qsNonAccepting, UrlOpenSyms);
// Close brace/bracket to become regular URL
tt(UrlOpenQ, CLOSE, Url$1);
tt(UrlOpenSyms, CLOSE, Url$1);
}
tt(Start, LOCALHOST, DomainDotTld); // localhost is a valid URL state
tt(Start, NL, Nl); // single new line
return {
start: Start,
tokens: tk
};
}
/**
* Run the parser state machine on a list of scanned string-based tokens to
* create a list of multi tokens, each of which represents a URL, email address,
* plain text, etc.
*
* @param {State<MultiToken>} start parser start state
* @param {string} input the original input used to generate the given tokens
* @param {Token[]} tokens list of scanned tokens
* @returns {MultiToken[]}
*/
function run(start, input, tokens) {
let len = tokens.length;
let cursor = 0;
let multis = [];
let textTokens = [];
while (cursor < len) {
let state = start;
let secondState = null;
let nextState = null;
let multiLength = 0;
let latestAccepting = null;
let sinceAccepts = -1;
while (cursor < len && !(secondState = state.go(tokens[cursor].t))) {
// Starting tokens with nowhere to jump to.
// Consider these to be just plain text
textTokens.push(tokens[cursor++]);
}
while (cursor < len && (nextState = secondState || state.go(tokens[cursor].t))) {
// Get the next state
secondState = null;
state = nextState;
// Keep track of the latest accepting state
if (state.accepts()) {
sinceAccepts = 0;
latestAccepting = state;
} else if (sinceAccepts >= 0) {
sinceAccepts++;
}
cursor++;
multiLength++;
}
if (sinceAccepts < 0) {
// No accepting state was found, part of a regular text token add
// the first text token to the text tokens array and try again from
// the next
cursor -= multiLength;
if (cursor < len) {
textTokens.push(tokens[cursor]);
cursor++;
}
} else {
// Accepting state!
// First close off the textTokens (if available)
if (textTokens.length > 0) {
multis.push(initMultiToken(Text, input, textTokens));
textTokens = [];
}
// Roll back to the latest accepting state
cursor -= sinceAccepts;
multiLength -= sinceAccepts;
// Create a new multitoken
const Multi = latestAccepting.t;
const subtokens = tokens.slice(cursor - multiLength, cursor);
multis.push(initMultiToken(Multi, input, subtokens));
}
}
// Finally close off the textTokens (if available)
if (textTokens.length > 0) {
multis.push(initMultiToken(Text, input, textTokens));
}
return multis;
}
/**
* Utility function for instantiating a new multitoken with all the relevant
* fields during parsing.
* @param {new (value: string, tokens: Token[]) => MultiToken} Multi class to instantiate
* @param {string} input original input string
* @param {Token[]} tokens consecutive tokens scanned from input string
* @returns {MultiToken}
*/
function initMultiToken(Multi, input, tokens) {
const startIdx = tokens[0].s;
const endIdx = tokens[tokens.length - 1].e;
const value = input.slice(startIdx, endIdx);
return new Multi(value, tokens);
}
const warn = typeof console !== 'undefined' && console && console.warn || (() => {});
const warnAdvice = 'until manual call of linkify.init(). Register all schemes and plugins before invoking linkify the first time.';
// Side-effect initialization state
const INIT = {
scanner: null,
parser: null,
tokenQueue: [],
pluginQueue: [],
customSchemes: [],
initialized: false
};
/**
* @typedef {{
* start: State<string>,
* tokens: { groups: Collections<string> } & typeof tk
* }} ScannerInit
*/
/**
* @typedef {{
* start: State<MultiToken>,
* tokens: typeof multi
* }} ParserInit
*/
/**
* @typedef {(arg: { scanner: ScannerInit }) => void} TokenPlugin
*/
/**
* @typedef {(arg: { scanner: ScannerInit, parser: ParserInit }) => void} Plugin
*/
/**
* De-register all plugins and reset the internal state-machine. Used for
* testing; not required in practice.
* @private
*/
function reset() {
State.groups = {};
INIT.scanner = null;
INIT.parser = null;
INIT.tokenQueue = [];
INIT.pluginQueue = [];
INIT.customSchemes = [];
INIT.initialized = false;
return INIT;
}
/**
* Register a token plugin to allow the scanner to recognize additional token
* types before the parser state machine is constructed from the results.
* @param {string} name of plugin to register
* @param {TokenPlugin} plugin function that accepts the scanner state machine
* and available scanner tokens and collections and extends the state machine to
* recognize additional tokens or groups.
*/
function registerTokenPlugin(name, plugin) {
if (typeof plugin !== 'function') {
throw new Error(`linkifyjs: Invalid token plugin ${plugin} (expects function)`);
}
for (let i = 0; i < INIT.tokenQueue.length; i++) {
if (name === INIT.tokenQueue[i][0]) {
warn(`linkifyjs: token plugin "${name}" already registered - will be overwritten`);
INIT.tokenQueue[i] = [name, plugin];
return;
}
}
INIT.tokenQueue.push([name, plugin]);
if (INIT.initialized) {
warn(`linkifyjs: already initialized - will not register token plugin "${name}" ${warnAdvice}`);
}
}
/**
* Register a linkify plugin
* @param {string} name of plugin to register
* @param {Plugin} plugin function that accepts the parser state machine and
* extends the parser to recognize additional link types
*/
function registerPlugin(name, plugin) {
if (typeof plugin !== 'function') {
throw new Error(`linkifyjs: Invalid plugin ${plugin} (expects function)`);
}
for (let i = 0; i < INIT.pluginQueue.length; i++) {
if (name === INIT.pluginQueue[i][0]) {
warn(`linkifyjs: plugin "${name}" already registered - will be overwritten`);
INIT.pluginQueue[i] = [name, plugin];
return;
}
}
INIT.pluginQueue.push([name, plugin]);
if (INIT.initialized) {
warn(`linkifyjs: already initialized - will not register plugin "${name}" ${warnAdvice}`);
}
}
/**
* Detect URLs with the following additional protocol. Anything with format
* "protocol://..." will be considered a link. If `optionalSlashSlash` is set to
* `true`, anything with format "protocol:..." will be considered a link.
* @param {string} scheme
* @param {boolean} [optionalSlashSlash]
*/
function registerCustomProtocol(scheme, optionalSlashSlash = false) {
if (INIT.initialized) {
warn(`linkifyjs: already initialized - will not register custom scheme "${scheme}" ${warnAdvice}`);
}
if (!/^[0-9a-z]+(-[0-9a-z]+)*$/.test(scheme)) {
throw new Error(`linkifyjs: incorrect scheme format.
1. Must only contain digits, lowercase ASCII letters or "-"
2. Cannot start or end with "-"
3. "-" cannot repeat`);
}
INIT.customSchemes.push([scheme, optionalSlashSlash]);
}
/**
* Initialize the linkify state machine. Called automatically the first time
* linkify is called on a string, but may be called manually as well.
*/
function init() {
// Initialize scanner state machine and plugins
INIT.scanner = init$2(INIT.customSchemes);
for (let i = 0; i < INIT.tokenQueue.length; i++) {
INIT.tokenQueue[i][1]({
scanner: INIT.scanner
});
}
// Initialize parser state machine and plugins
INIT.parser = init$1(INIT.scanner.tokens);
for (let i = 0; i < INIT.pluginQueue.length; i++) {
INIT.pluginQueue[i][1]({
scanner: INIT.scanner,
parser: INIT.parser
});
}
INIT.initialized = true;
return INIT;
}
/**
* Parse a string into tokens that represent linkable and non-linkable sub-components
* @param {string} str
* @return {MultiToken[]} tokens
*/
function tokenize(str) {
if (!INIT.initialized) {
init();
}
return run(INIT.parser.start, str, run$1(INIT.scanner.start, str));
}
tokenize.scan = run$1; // for testing
/**
* Find a list of linkable items in the given string.
* @param {string} str string to find links in
* @param {string | Opts} [type] either formatting options or specific type of
* links to find, e.g., 'url' or 'email'
* @param {Opts} [opts] formatting options for final output. Cannot be specified
* if opts already provided in `type` argument
*/
function find(str, type = null, opts = null) {
if (type && typeof type === 'object') {
if (opts) {
throw Error(`linkifyjs: Invalid link type ${type}; must be a string`);
}
opts = type;
type = null;
}
const options = new Options(opts);
const tokens = tokenize(str);
const filtered = [];
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i];
if (token.isLink && (!type || token.t === type) && options.check(token)) {
filtered.push(token.toFormattedObject(options));
}
}
return filtered;
}
/**
* Is the given string valid linkable text of some sort. Note that this does not
* trim the text for you.
*
* Optionally pass in a second `type` param, which is the type of link to test
* for.
*
* For example,
*
* linkify.test(str, 'email');
*
* Returns `true` if str is a valid email.
* @param {string} str string to test for links
* @param {string} [type] optional specific link type to look for
* @returns boolean true/false
*/
function test(str, type = null) {
const tokens = tokenize(str);
return tokens.length === 1 && tokens[0].isLink && (!type || tokens[0].t === type);
}
export { MultiToken, Options, State, createTokenClass, find, init, multi, options, regexp, registerCustomProtocol, registerPlugin, registerTokenPlugin, reset, stringToArray, test, multi as text, tokenize };