2023-12-13 11:23:54 +01:00
|
|
|
|
// THIS FILE IS AUTOMATICALLY GENERATED DO NOT EDIT DIRECTLY
|
|
|
|
|
|
// See update-tlds.js for encoding/decoding format
|
|
|
|
|
|
// https://data.iana.org/TLD/tlds-alpha-by-domain.txt
|
2025-07-30 10:32:43 +02:00
|
|
|
|
const encodedTlds = 'aaa1rp3bb0ott3vie4c1le2ogado5udhabi7c0ademy5centure6ountant0s9o1tor4d0s1ult4e0g1ro2tna4f0l1rica5g0akhan5ency5i0g1rbus3force5tel5kdn3l0ibaba4pay4lfinanz6state5y2sace3tom5m0azon4ericanexpress7family11x2fam3ica3sterdam8nalytics7droid5quan4z2o0l2partments8p0le4q0uarelle8r0ab1mco4chi3my2pa2t0e3s0da2ia2sociates9t0hleta5torney7u0ction5di0ble3o3spost5thor3o0s4w0s2x0a2z0ure5ba0by2idu3namex4d1k2r0celona5laycard4s5efoot5gains6seball5ketball8uhaus5yern5b0c1t1va3cg1n2d1e0ats2uty4er2rlin4st0buy5t2f1g1h0arti5i0ble3d1ke2ng0o3o1z2j1lack0friday9ockbuster8g1omberg7ue3m0s1w2n0pparibas9o0ats3ehringer8fa2m1nd2o0k0ing5sch2tik2on4t1utique6x2r0adesco6idgestone9oadway5ker3ther5ussels7s1t1uild0ers6siness6y1zz3v1w1y1z0h3ca0b1fe2l0l1vinklein9m0era3p2non3petown5ital0one8r0avan4ds2e0er0s4s2sa1e1h1ino4t0ering5holic7ba1n1re3c1d1enter4o1rn3f0a1d2g1h0anel2nel4rity4se2t2eap3intai5ristmas6ome4urch5i0priani6rcle4sco3tadel4i0c2y3k1l0aims4eaning6ick2nic1que6othing5ud3ub0med6m1n1o0ach3des3ffee4llege4ogne5m0mbank4unity6pany2re3uter5sec4ndos3struction8ulting7tact3ractors9oking4l1p2rsica5untry4pon0s4rses6pa2r0edit0card4union9icket5own3s1uise0s6u0isinella9v1w1x1y0mru3ou3z2dad1nce3ta1e1ing3sun4y2clk3ds2e0al0er2s3gree4livery5l1oitte5ta3mocrat6ntal2ist5si0gn4v2hl2iamonds6et2gital5rect0ory7scount3ver5h2y2j1k1m1np2o0cs1tor4g1mains5t1wnload7rive4tv2ubai3nlop4pont4rban5vag2r2z2earth3t2c0o2deka3u0cation8e1g1mail3erck5nergy4gineer0ing9terprises10pson4quipment8r0icsson6ni3s0q1tate5t1u0rovision8s2vents5xchange6pert3osed4ress5traspace10fage2il1rwinds6th3mily4n0s2rm0ers5shion4t3edex3edback6rrari3ero6i0delity5o2lm2nal1nce1ial7re0stone6mdale6sh0ing5t0ness6j1k1lickr3ghts4r2orist4wers5y2m1o0o0d1tball6rd1ex2sale4um3undation8x2r0ee1senius7l1ogans4ntier7tr2ujitsu5n0d2rniture7tbol5yi3ga0l0lery3o1up4me0s3p1rden4y2b0iz3d0n2e0a1nt0ing5orge5f1g0ee3h1i0ft0s3ves2ing5l0ass3e1obal2o4m0ail3bh2o1x2n1odaddy5ld0point6f2o0dyear5g0le4p1t1v2p1q1r0ainger5phics5tis4een3ipe3ocery4up4s1t1u0cci3ge2ide2tars5ru3w1y2hair2mburg5ngout5us3bo2dfc0bank7ealth0care8lp1sinki6re1mes5iphop4samitsu7tachi5v2k0t2m1n1ockey4ldings5iday5medepot5goods5s0ense7nda3rse3spital5t0ing5t0els3mail5use3w2r1sbc3t1u0ghes5yatt3undai7ibm2cbc2e1u2d1e0ee3fm2kano4l1m0amat4db2mo0bilien9n0c1dustries8finiti5o2g1k1stitute6urance4e4t0ernational10uit4vestments10o1piranga7q1r0ish4s0maili5t0anbul7t0au2v3jaguar4va3cb2e0ep2tzt3welry6io2ll2m0p2nj2o0bs1urg4t1y2p0morgan6rs3uegos4niper7kaufen5ddi3e0rryhotels6properties14fh2g1h1i0a1ds2m1ndle4tchen5wi3m1n1oeln3matsu5sher5p0mg2n2r0d1ed3uokgroup8w1y0oto4z2la0caixa5mborghini8er3nd0rover6xess5salle5t0ino3robe5w0yer5b1c1ds2ease3clerc5frak4gal2o2xus4gbt3i0dl2fe0insurance9style7ghting6ke2lly3mited4o2ncoln4k2ve1ing5k1lc1p2oan0s3cker3us3l1ndon4tte1o3ve3pl0financial11r1s1t0d0a3u0ndbeck6xe1ury5v1y2ma0drid4if1son4keup4n0agement7go3p1rket0ing3s4riott5shalls7ttel5ba2c0kinsey7d1e0d0ia3et2lbourne7me1orial6n0u2rckmsd7g1h1iami3crosoft7l1ni1t2t0subishi9k1l0b1s2m0a2n1o0bi0le4da2e1i1m1nash3ey2ster5rmon3tgage6scow4to0rcycles9v0ie4p1q1r1s0d2t0n1r2u0seum3ic4v1w1x1y1z2na0b1goya4me2vy3ba2c1e0c1t0bank4flix4work5ustar5w0s2xt0direct7us4f0l2g0o2hk2i0co2ke1on3nja3ssan1y5l1o0kia3rton4w0ruz3tv4p1r0a1w2tt2u1yc2z2obi1server7ffice5kinawa6layan0group9lo3m0ega4ne1g1l0ine5oo2pen3racle3nge4g0anic5igins6saka4tsuka4t2vh3pa0ge2nasonic7ris2s1tners4s1y3y2ccw3e0t2f0izer5g1h0armacy6d1ilips5one2to0graphy6s4ysio5ics1tet2ures6d1n0g1k2oneer5zza4k1l0ace2y0station9umbing5s3m1n0c2ohl2ker3litie5rn2st3r0axi3ess3ime3o0d0uctions8f1gressive8mo2perties3y5tection8u0dential9s1t1ub2w0c2y2qa1pon3uebec3st5racing4dio4e0ad1lestate6tor2y4cipes5d0stone5umbrella9hab3ise0n3t2liance6n0t0als5pair3ort3ublican8st0aurant8view0s5xroth6ich0ardli6oh3l1o1p2o0cks3deo3gers4om3s0vp3u0gby3hr2n2w0e2yukyu6sa0arland6fe0ty4kura4le1on3msclub4ung5ndvik0coromant12ofi4p1rl2s1ve2xo3b0i1s2c0b1haeffler7midt4olarships8ol3ule3warz5ience5ot3d1e0arch3t2cure1ity6ek2lect4ner3rvices6ven3w1x0y3fr2g1h0angrila6rp3ell3ia1ksha5oes2p0ping5uji3w3i0lk2na1gles5te3j1k0i0n2y0pe4l0ing4m0art3ile4n0cf3o0ccer3ial4ftbank4ware6hu2lar2utions7ng1y2y2pa0ce3ort2t3r0l2s1t0ada2ples4r1tebank4farm7c0group6ock
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// Internationalized domain names containing non-ASCII
|
|
|
|
|
|
const encodedUtlds = 'ελ1υ2бг1ел3дети4ею2католик6ом3мкд2он1сква6онлайн5рг3рус2ф2сайт3рб3укр3қаз3հայ3ישראל5קום3ابوظبي5رامكو5لاردن4بحرين5جزائر5سعودية6عليان5مغرب5مارات5یران5بارت2زار4يتك3ھارت5تونس4سودان3رية5شبكة4عراق2ب2مان4فلسطين6قطر3كاثوليك6وم3مصر2ليسيا5وريتانيا7قع4همراه5پاکستان7ڀارت4कॉम3नेट3भारत0म्3ोत5संगठन5বাংলা5ভারত2ৰত4ਭਾਰਤ4ભારત4ଭାରତ4இந்தியா6லங்கை6சிங்கப்பூர்11భారత్5ಭಾರತ4ഭാരതം5ලංකා4คอม3ไทย3ລາວ3გე2みんな3アマゾン4クラウド4グーグル4コム2ストア3セール3ファッション6ポイント4世界2中信1国1國1文网3亚马逊3企业2佛山2信息2健康2八卦2公司1益2台湾1灣2商城1店1标2嘉里0大酒店5在线2大拿2天主教3娱乐2家電2广东2微博2慈善2我爱你3手机2招聘2政务1府2新加坡2闻2时尚2書籍2机构2淡马锡3游戏2澳門2点看2移动2组织机构4网址1店1站1络2联通2谷歌2购物2通販2集团2電訊盈科4飞利浦3食品2餐厅2香格里拉3港2닷넷1컴2삼성2한국2';
|
|
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
|
|
|
|
|
* Finite State Machine generation utilities
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
/**
|
|
|
|
|
|
* @template T
|
|
|
|
|
|
* @typedef {{ [group: string]: T[] }} Collections
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @typedef {{ [group: string]: true }} Flags
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
// Keys in scanner Collections instances
|
|
|
|
|
|
const numeric = 'numeric';
|
|
|
|
|
|
const ascii = 'ascii';
|
|
|
|
|
|
const alpha = 'alpha';
|
|
|
|
|
|
const asciinumeric = 'asciinumeric';
|
|
|
|
|
|
const alphanumeric = 'alphanumeric';
|
|
|
|
|
|
const domain = 'domain';
|
|
|
|
|
|
const emoji = 'emoji';
|
|
|
|
|
|
const scheme = 'scheme';
|
|
|
|
|
|
const slashscheme = 'slashscheme';
|
|
|
|
|
|
const whitespace = 'whitespace';
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @template T
|
|
|
|
|
|
* @param {string} name
|
|
|
|
|
|
* @param {Collections<T>} groups to register in
|
|
|
|
|
|
* @returns {T[]} Current list of tokens in the given collection
|
|
|
|
|
|
*/
|
|
|
|
|
|
function registerGroup(name, groups) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
if (!(name in groups)) {
|
|
|
|
|
|
groups[name] = [];
|
|
|
|
|
|
}
|
|
|
|
|
|
return groups[name];
|
2023-12-13 11:23:54 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @template T
|
|
|
|
|
|
* @param {T} t token to add
|
|
|
|
|
|
* @param {Collections<T>} groups
|
|
|
|
|
|
* @param {Flags} flags
|
|
|
|
|
|
*/
|
|
|
|
|
|
function addToGroups(t, flags, groups) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
if (flags[numeric]) {
|
|
|
|
|
|
flags[asciinumeric] = true;
|
|
|
|
|
|
flags[alphanumeric] = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (flags[ascii]) {
|
|
|
|
|
|
flags[asciinumeric] = true;
|
|
|
|
|
|
flags[alpha] = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (flags[asciinumeric]) {
|
|
|
|
|
|
flags[alphanumeric] = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (flags[alpha]) {
|
|
|
|
|
|
flags[alphanumeric] = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (flags[alphanumeric]) {
|
|
|
|
|
|
flags[domain] = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (flags[emoji]) {
|
|
|
|
|
|
flags[domain] = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
for (const k in flags) {
|
|
|
|
|
|
const group = registerGroup(k, groups);
|
|
|
|
|
|
if (group.indexOf(t) < 0) {
|
|
|
|
|
|
group.push(t);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2023-12-13 11:23:54 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @template T
|
|
|
|
|
|
* @param {T} t token to check
|
|
|
|
|
|
* @param {Collections<T>} groups
|
|
|
|
|
|
* @returns {Flags} group flags that contain this token
|
|
|
|
|
|
*/
|
|
|
|
|
|
function flagsForToken(t, groups) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
const result = {};
|
|
|
|
|
|
for (const c in groups) {
|
|
|
|
|
|
if (groups[c].indexOf(t) >= 0) {
|
|
|
|
|
|
result[c] = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return result;
|
2023-12-13 11:23:54 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @template T
|
|
|
|
|
|
* @typedef {null | T } Transition
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
|
|
|
|
|
* Define a basic state machine state. j is the list of character transitions,
|
|
|
|
|
|
* jr is the list of regex-match transitions, jd is the default state to
|
|
|
|
|
|
* transition to t is the accepting token type, if any. If this is the terminal
|
|
|
|
|
|
* state, then it does not emit a token.
|
2023-12-13 11:23:54 +01:00
|
|
|
|
*
|
|
|
|
|
|
* The template type T represents the type of the token this state accepts. This
|
|
|
|
|
|
* should be a string (such as of the token exports in `text.js`) or a
|
|
|
|
|
|
* MultiToken subclass (from `multi.js`)
|
|
|
|
|
|
*
|
|
|
|
|
|
* @template T
|
|
|
|
|
|
* @param {T} [token] Token that this state emits
|
2021-05-28 16:46:29 +02:00
|
|
|
|
*/
|
2025-05-07 10:40:03 +02:00
|
|
|
|
function State(token = null) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
// this.n = null; // DEBUG: State name
|
|
|
|
|
|
/** @type {{ [input: string]: State<T> }} j */
|
|
|
|
|
|
this.j = {}; // IMPLEMENTATION 1
|
|
|
|
|
|
// this.j = []; // IMPLEMENTATION 2
|
|
|
|
|
|
/** @type {[RegExp, State<T>][]} jr */
|
|
|
|
|
|
this.jr = [];
|
|
|
|
|
|
/** @type {?State<T>} jd */
|
|
|
|
|
|
this.jd = null;
|
|
|
|
|
|
/** @type {?T} t */
|
|
|
|
|
|
this.t = token;
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* Scanner token groups
|
|
|
|
|
|
* @type Collections<string>
|
2021-05-28 16:46:29 +02:00
|
|
|
|
*/
|
2023-12-13 11:23:54 +01:00
|
|
|
|
State.groups = {};
|
2021-05-28 16:46:29 +02:00
|
|
|
|
State.prototype = {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
accepts() {
|
|
|
|
|
|
return !!this.t;
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Follow an existing transition from the given input to the next state.
|
|
|
|
|
|
* Does not mutate.
|
|
|
|
|
|
* @param {string} input character or token type to transition on
|
|
|
|
|
|
* @returns {?State<T>} the next state, if any
|
|
|
|
|
|
*/
|
|
|
|
|
|
go(input) {
|
|
|
|
|
|
const state = this;
|
|
|
|
|
|
const nextState = state.j[input];
|
|
|
|
|
|
if (nextState) {
|
|
|
|
|
|
return nextState;
|
|
|
|
|
|
}
|
|
|
|
|
|
for (let i = 0; i < state.jr.length; i++) {
|
|
|
|
|
|
const regex = state.jr[i][0];
|
|
|
|
|
|
const nextState = state.jr[i][1]; // note: might be empty to prevent default jump
|
|
|
|
|
|
if (nextState && regex.test(input)) {
|
|
|
|
|
|
return nextState;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
// Nowhere left to jump! Return default, if any
|
|
|
|
|
|
return state.jd;
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Whether the state has a transition for the given input. Set the second
|
|
|
|
|
|
* argument to true to only look for an exact match (and not a default or
|
|
|
|
|
|
* regular-expression-based transition)
|
|
|
|
|
|
* @param {string} input
|
|
|
|
|
|
* @param {boolean} exactOnly
|
|
|
|
|
|
*/
|
2025-05-07 10:40:03 +02:00
|
|
|
|
has(input, exactOnly = false) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
return exactOnly ? input in this.j : !!this.go(input);
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Short for "transition all"; create a transition from the array of items
|
|
|
|
|
|
* in the given list to the same final resulting state.
|
|
|
|
|
|
* @param {string | string[]} inputs Group of inputs to transition on
|
|
|
|
|
|
* @param {Transition<T> | State<T>} [next] Transition options
|
|
|
|
|
|
* @param {Flags} [flags] Collections flags to add token to
|
|
|
|
|
|
* @param {Collections<T>} [groups] Master list of token groups
|
|
|
|
|
|
*/
|
|
|
|
|
|
ta(inputs, next, flags, groups) {
|
|
|
|
|
|
for (let i = 0; i < inputs.length; i++) {
|
|
|
|
|
|
this.tt(inputs[i], next, flags, groups);
|
|
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Short for "take regexp transition"; defines a transition for this state
|
|
|
|
|
|
* when it encounters a token which matches the given regular expression
|
|
|
|
|
|
* @param {RegExp} regexp Regular expression transition (populate first)
|
|
|
|
|
|
* @param {T | State<T>} [next] Transition options
|
|
|
|
|
|
* @param {Flags} [flags] Collections flags to add token to
|
|
|
|
|
|
* @param {Collections<T>} [groups] Master list of token groups
|
|
|
|
|
|
* @returns {State<T>} taken after the given input
|
|
|
|
|
|
*/
|
|
|
|
|
|
tr(regexp, next, flags, groups) {
|
|
|
|
|
|
groups = groups || State.groups;
|
|
|
|
|
|
let nextState;
|
|
|
|
|
|
if (next && next.j) {
|
|
|
|
|
|
nextState = next;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// Token with maybe token groups
|
|
|
|
|
|
nextState = new State(next);
|
|
|
|
|
|
if (flags && groups) {
|
|
|
|
|
|
addToGroups(next, flags, groups);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
this.jr.push([regexp, nextState]);
|
|
|
|
|
|
return nextState;
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Short for "take transitions", will take as many sequential transitions as
|
|
|
|
|
|
* the length of the given input and returns the
|
|
|
|
|
|
* resulting final state.
|
|
|
|
|
|
* @param {string | string[]} input
|
|
|
|
|
|
* @param {T | State<T>} [next] Transition options
|
|
|
|
|
|
* @param {Flags} [flags] Collections flags to add token to
|
|
|
|
|
|
* @param {Collections<T>} [groups] Master list of token groups
|
|
|
|
|
|
* @returns {State<T>} taken after the given input
|
|
|
|
|
|
*/
|
|
|
|
|
|
ts(input, next, flags, groups) {
|
|
|
|
|
|
let state = this;
|
|
|
|
|
|
const len = input.length;
|
|
|
|
|
|
if (!len) {
|
|
|
|
|
|
return state;
|
|
|
|
|
|
}
|
|
|
|
|
|
for (let i = 0; i < len - 1; i++) {
|
|
|
|
|
|
state = state.tt(input[i]);
|
|
|
|
|
|
}
|
|
|
|
|
|
return state.tt(input[len - 1], next, flags, groups);
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Short for "take transition", this is a method for building/working with
|
|
|
|
|
|
* state machines.
|
|
|
|
|
|
*
|
|
|
|
|
|
* If a state already exists for the given input, returns it.
|
|
|
|
|
|
*
|
|
|
|
|
|
* If a token is specified, that state will emit that token when reached by
|
|
|
|
|
|
* the linkify engine.
|
|
|
|
|
|
*
|
|
|
|
|
|
* If no state exists, it will be initialized with some default transitions
|
|
|
|
|
|
* that resemble existing default transitions.
|
|
|
|
|
|
*
|
|
|
|
|
|
* If a state is given for the second argument, that state will be
|
|
|
|
|
|
* transitioned to on the given input regardless of what that input
|
|
|
|
|
|
* previously did.
|
|
|
|
|
|
*
|
|
|
|
|
|
* Specify a token group flags to define groups that this token belongs to.
|
|
|
|
|
|
* The token will be added to corresponding entires in the given groups
|
|
|
|
|
|
* object.
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {string} input character, token type to transition on
|
|
|
|
|
|
* @param {T | State<T>} [next] Transition options
|
|
|
|
|
|
* @param {Flags} [flags] Collections flags to add token to
|
|
|
|
|
|
* @param {Collections<T>} [groups] Master list of groups
|
|
|
|
|
|
* @returns {State<T>} taken after the given input
|
|
|
|
|
|
*/
|
|
|
|
|
|
tt(input, next, flags, groups) {
|
|
|
|
|
|
groups = groups || State.groups;
|
|
|
|
|
|
const state = this;
|
|
|
|
|
|
|
|
|
|
|
|
// Check if existing state given, just a basic transition
|
|
|
|
|
|
if (next && next.j) {
|
|
|
|
|
|
state.j[input] = next;
|
|
|
|
|
|
return next;
|
|
|
|
|
|
}
|
|
|
|
|
|
const t = next;
|
|
|
|
|
|
|
|
|
|
|
|
// Take the transition with the usual default mechanisms and use that as
|
|
|
|
|
|
// a template for creating the next state
|
|
|
|
|
|
let nextState,
|
|
|
|
|
|
templateState = state.go(input);
|
|
|
|
|
|
if (templateState) {
|
|
|
|
|
|
nextState = new State();
|
2025-07-30 10:32:43 +02:00
|
|
|
|
Object.assign(nextState.j, templateState.j);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
nextState.jr.push.apply(nextState.jr, templateState.jr);
|
|
|
|
|
|
nextState.jd = templateState.jd;
|
|
|
|
|
|
nextState.t = templateState.t;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
nextState = new State();
|
|
|
|
|
|
}
|
|
|
|
|
|
if (t) {
|
|
|
|
|
|
// Ensure newly token is in the same groups as the old token
|
|
|
|
|
|
if (groups) {
|
|
|
|
|
|
if (nextState.t && typeof nextState.t === 'string') {
|
2025-07-30 10:32:43 +02:00
|
|
|
|
const allFlags = Object.assign(flagsForToken(nextState.t, groups), flags);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
addToGroups(t, allFlags, groups);
|
|
|
|
|
|
} else if (flags) {
|
|
|
|
|
|
addToGroups(t, flags, groups);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
nextState.t = t; // overwrite anything that was previously there
|
|
|
|
|
|
}
|
|
|
|
|
|
state.j[input] = nextState;
|
|
|
|
|
|
return nextState;
|
|
|
|
|
|
}
|
2021-05-28 16:46:29 +02:00
|
|
|
|
};
|
|
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// Helper functions to improve minification (not exported outside linkifyjs module)
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
|
|
|
|
|
/**
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @template T
|
|
|
|
|
|
* @param {State<T>} state
|
|
|
|
|
|
* @param {string | string[]} input
|
|
|
|
|
|
* @param {Flags} [flags]
|
|
|
|
|
|
* @param {Collections<T>} [groups]
|
2021-05-28 16:46:29 +02:00
|
|
|
|
*/
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const ta = (state, input, next, flags, groups) => state.ta(input, next, flags, groups);
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
|
|
|
|
|
/**
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @template T
|
|
|
|
|
|
* @param {State<T>} state
|
|
|
|
|
|
* @param {RegExp} regexp
|
|
|
|
|
|
* @param {T | State<T>} [next]
|
|
|
|
|
|
* @param {Flags} [flags]
|
|
|
|
|
|
* @param {Collections<T>} [groups]
|
2021-05-28 16:46:29 +02:00
|
|
|
|
*/
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const tr = (state, regexp, next, flags, groups) => state.tr(regexp, next, flags, groups);
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
|
|
|
|
|
/**
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @template T
|
|
|
|
|
|
* @param {State<T>} state
|
|
|
|
|
|
* @param {string | string[]} input
|
|
|
|
|
|
* @param {T | State<T>} [next]
|
|
|
|
|
|
* @param {Flags} [flags]
|
|
|
|
|
|
* @param {Collections<T>} [groups]
|
2021-05-28 16:46:29 +02:00
|
|
|
|
*/
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const ts = (state, input, next, flags, groups) => state.ts(input, next, flags, groups);
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
|
|
|
|
|
/**
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @template T
|
|
|
|
|
|
* @param {State<T>} state
|
|
|
|
|
|
* @param {string} input
|
|
|
|
|
|
* @param {T | State<T>} [next]
|
|
|
|
|
|
* @param {Collections<T>} [groups]
|
|
|
|
|
|
* @param {Flags} [flags]
|
2021-05-28 16:46:29 +02:00
|
|
|
|
*/
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const tt = (state, input, next, flags, groups) => state.tt(input, next, flags, groups);
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
|
|
|
|
|
/******************************************************************************
|
2023-12-13 11:23:54 +01:00
|
|
|
|
Text Tokens
|
|
|
|
|
|
Identifiers for token outputs from the regexp scanner
|
2024-07-24 13:06:03 +02:00
|
|
|
|
******************************************************************************/
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// A valid web domain token
|
|
|
|
|
|
const WORD = 'WORD'; // only contains a-z
|
|
|
|
|
|
const UWORD = 'UWORD'; // contains letters other than a-z, used for IDN
|
2025-05-07 10:40:03 +02:00
|
|
|
|
const ASCIINUMERICAL = 'ASCIINUMERICAL'; // contains a-z, 0-9
|
|
|
|
|
|
const ALPHANUMERICAL = 'ALPHANUMERICAL'; // contains numbers and letters other than a-z, used for IDN
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// Special case of word
|
|
|
|
|
|
const LOCALHOST = 'LOCALHOST';
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// Valid top-level domain, special case of WORD (see tlds.js)
|
|
|
|
|
|
const TLD = 'TLD';
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// Valid IDN TLD, special case of UWORD (see tlds.js)
|
|
|
|
|
|
const UTLD = 'UTLD';
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// The scheme portion of a web URI protocol. Supported types include: `mailto`,
|
|
|
|
|
|
// `file`, and user-defined custom protocols. Limited to schemes that contain
|
|
|
|
|
|
// only letters
|
|
|
|
|
|
const SCHEME = 'SCHEME';
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// Similar to SCHEME, except makes distinction for schemes that must always be
|
|
|
|
|
|
// followed by `://`, not just `:`. Supported types include `http`, `https`,
|
|
|
|
|
|
// `ftp`, `ftps`
|
|
|
|
|
|
const SLASH_SCHEME = 'SLASH_SCHEME';
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// Any sequence of digits 0-9
|
|
|
|
|
|
const NUM = 'NUM';
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// Any number of consecutive whitespace characters that are not newline
|
|
|
|
|
|
const WS = 'WS';
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// New line (unix style)
|
2025-05-07 10:40:03 +02:00
|
|
|
|
const NL = 'NL'; // \n
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// Opening/closing bracket classes
|
|
|
|
|
|
// TODO: Rename OPEN -> LEFT and CLOSE -> RIGHT in v5 to fit with Unicode names
|
|
|
|
|
|
// Also rename angle brackes to LESSTHAN and GREATER THAN
|
|
|
|
|
|
const OPENBRACE = 'OPENBRACE'; // {
|
|
|
|
|
|
const CLOSEBRACE = 'CLOSEBRACE'; // }
|
|
|
|
|
|
const OPENBRACKET = 'OPENBRACKET'; // [
|
|
|
|
|
|
const CLOSEBRACKET = 'CLOSEBRACKET'; // ]
|
|
|
|
|
|
const OPENPAREN = 'OPENPAREN'; // (
|
|
|
|
|
|
const CLOSEPAREN = 'CLOSEPAREN'; // )
|
|
|
|
|
|
const OPENANGLEBRACKET = 'OPENANGLEBRACKET'; // <
|
|
|
|
|
|
const CLOSEANGLEBRACKET = 'CLOSEANGLEBRACKET'; // >
|
|
|
|
|
|
const FULLWIDTHLEFTPAREN = 'FULLWIDTHLEFTPAREN'; // (
|
|
|
|
|
|
const FULLWIDTHRIGHTPAREN = 'FULLWIDTHRIGHTPAREN'; // )
|
|
|
|
|
|
const LEFTCORNERBRACKET = 'LEFTCORNERBRACKET'; // 「
|
|
|
|
|
|
const RIGHTCORNERBRACKET = 'RIGHTCORNERBRACKET'; // 」
|
|
|
|
|
|
const LEFTWHITECORNERBRACKET = 'LEFTWHITECORNERBRACKET'; // 『
|
|
|
|
|
|
const RIGHTWHITECORNERBRACKET = 'RIGHTWHITECORNERBRACKET'; // 』
|
|
|
|
|
|
const FULLWIDTHLESSTHAN = 'FULLWIDTHLESSTHAN'; // <
|
|
|
|
|
|
const FULLWIDTHGREATERTHAN = 'FULLWIDTHGREATERTHAN'; // >
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2022-01-19 15:03:45 +01:00
|
|
|
|
// Various symbols
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const AMPERSAND = 'AMPERSAND'; // &
|
|
|
|
|
|
const APOSTROPHE = 'APOSTROPHE'; // '
|
|
|
|
|
|
const ASTERISK = 'ASTERISK'; // *
|
|
|
|
|
|
const AT = 'AT'; // @
|
|
|
|
|
|
const BACKSLASH = 'BACKSLASH'; // \
|
|
|
|
|
|
const BACKTICK = 'BACKTICK'; // `
|
|
|
|
|
|
const CARET = 'CARET'; // ^
|
|
|
|
|
|
const COLON = 'COLON'; // :
|
|
|
|
|
|
const COMMA = 'COMMA'; // ,
|
|
|
|
|
|
const DOLLAR = 'DOLLAR'; // $
|
|
|
|
|
|
const DOT = 'DOT'; // .
|
|
|
|
|
|
const EQUALS = 'EQUALS'; // =
|
|
|
|
|
|
const EXCLAMATION = 'EXCLAMATION'; // !
|
|
|
|
|
|
const HYPHEN = 'HYPHEN'; // -
|
|
|
|
|
|
const PERCENT = 'PERCENT'; // %
|
|
|
|
|
|
const PIPE = 'PIPE'; // |
|
|
|
|
|
|
const PLUS = 'PLUS'; // +
|
|
|
|
|
|
const POUND = 'POUND'; // #
|
|
|
|
|
|
const QUERY = 'QUERY'; // ?
|
|
|
|
|
|
const QUOTE = 'QUOTE'; // "
|
2025-05-07 10:40:03 +02:00
|
|
|
|
const FULLWIDTHMIDDLEDOT = 'FULLWIDTHMIDDLEDOT'; // ・
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
|
|
|
|
|
const SEMI = 'SEMI'; // ;
|
|
|
|
|
|
const SLASH = 'SLASH'; // /
|
|
|
|
|
|
const TILDE = 'TILDE'; // ~
|
|
|
|
|
|
const UNDERSCORE = 'UNDERSCORE'; // _
|
|
|
|
|
|
|
|
|
|
|
|
// Emoji symbol
|
|
|
|
|
|
const EMOJI$1 = 'EMOJI';
|
2022-01-19 15:03:45 +01:00
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
// Default token - anything that is not one of the above
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const SYM = 'SYM';
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
var tk = /*#__PURE__*/Object.freeze({
|
2021-05-28 16:46:29 +02:00
|
|
|
|
__proto__: null,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
ALPHANUMERICAL: ALPHANUMERICAL,
|
2021-05-28 16:46:29 +02:00
|
|
|
|
AMPERSAND: AMPERSAND,
|
2022-01-19 15:03:45 +01:00
|
|
|
|
APOSTROPHE: APOSTROPHE,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
ASCIINUMERICAL: ASCIINUMERICAL,
|
2022-01-19 15:03:45 +01:00
|
|
|
|
ASTERISK: ASTERISK,
|
|
|
|
|
|
AT: AT,
|
|
|
|
|
|
BACKSLASH: BACKSLASH,
|
|
|
|
|
|
BACKTICK: BACKTICK,
|
|
|
|
|
|
CARET: CARET,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
CLOSEANGLEBRACKET: CLOSEANGLEBRACKET,
|
|
|
|
|
|
CLOSEBRACE: CLOSEBRACE,
|
|
|
|
|
|
CLOSEBRACKET: CLOSEBRACKET,
|
|
|
|
|
|
CLOSEPAREN: CLOSEPAREN,
|
2022-01-19 15:03:45 +01:00
|
|
|
|
COLON: COLON,
|
|
|
|
|
|
COMMA: COMMA,
|
|
|
|
|
|
DOLLAR: DOLLAR,
|
|
|
|
|
|
DOT: DOT,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
EMOJI: EMOJI$1,
|
2022-01-19 15:03:45 +01:00
|
|
|
|
EQUALS: EQUALS,
|
|
|
|
|
|
EXCLAMATION: EXCLAMATION,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
FULLWIDTHGREATERTHAN: FULLWIDTHGREATERTHAN,
|
|
|
|
|
|
FULLWIDTHLEFTPAREN: FULLWIDTHLEFTPAREN,
|
|
|
|
|
|
FULLWIDTHLESSTHAN: FULLWIDTHLESSTHAN,
|
|
|
|
|
|
FULLWIDTHMIDDLEDOT: FULLWIDTHMIDDLEDOT,
|
|
|
|
|
|
FULLWIDTHRIGHTPAREN: FULLWIDTHRIGHTPAREN,
|
2022-01-19 15:03:45 +01:00
|
|
|
|
HYPHEN: HYPHEN,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
LEFTCORNERBRACKET: LEFTCORNERBRACKET,
|
|
|
|
|
|
LEFTWHITECORNERBRACKET: LEFTWHITECORNERBRACKET,
|
|
|
|
|
|
LOCALHOST: LOCALHOST,
|
|
|
|
|
|
NL: NL,
|
|
|
|
|
|
NUM: NUM,
|
|
|
|
|
|
OPENANGLEBRACKET: OPENANGLEBRACKET,
|
|
|
|
|
|
OPENBRACE: OPENBRACE,
|
|
|
|
|
|
OPENBRACKET: OPENBRACKET,
|
|
|
|
|
|
OPENPAREN: OPENPAREN,
|
2022-01-19 15:03:45 +01:00
|
|
|
|
PERCENT: PERCENT,
|
|
|
|
|
|
PIPE: PIPE,
|
|
|
|
|
|
PLUS: PLUS,
|
|
|
|
|
|
POUND: POUND,
|
|
|
|
|
|
QUERY: QUERY,
|
|
|
|
|
|
QUOTE: QUOTE,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
RIGHTCORNERBRACKET: RIGHTCORNERBRACKET,
|
|
|
|
|
|
RIGHTWHITECORNERBRACKET: RIGHTWHITECORNERBRACKET,
|
|
|
|
|
|
SCHEME: SCHEME,
|
2022-01-19 15:03:45 +01:00
|
|
|
|
SEMI: SEMI,
|
|
|
|
|
|
SLASH: SLASH,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
SLASH_SCHEME: SLASH_SCHEME,
|
|
|
|
|
|
SYM: SYM,
|
2022-01-19 15:03:45 +01:00
|
|
|
|
TILDE: TILDE,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
TLD: TLD,
|
2022-01-19 15:03:45 +01:00
|
|
|
|
UNDERSCORE: UNDERSCORE,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
UTLD: UTLD,
|
|
|
|
|
|
UWORD: UWORD,
|
|
|
|
|
|
WORD: WORD,
|
|
|
|
|
|
WS: WS
|
2021-05-28 16:46:29 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// Note that these two Unicode ones expand into a really big one with Babel
|
|
|
|
|
|
const ASCII_LETTER = /[a-z]/;
|
|
|
|
|
|
const LETTER = /\p{L}/u; // Any Unicode character with letter data type
|
|
|
|
|
|
const EMOJI = /\p{Emoji}/u; // Any Unicode emoji character
|
|
|
|
|
|
const EMOJI_VARIATION$1 = /\ufe0f/;
|
|
|
|
|
|
const DIGIT = /\d/;
|
|
|
|
|
|
const SPACE = /\s/;
|
|
|
|
|
|
|
|
|
|
|
|
var regexp = /*#__PURE__*/Object.freeze({
|
|
|
|
|
|
__proto__: null,
|
|
|
|
|
|
ASCII_LETTER: ASCII_LETTER,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
DIGIT: DIGIT,
|
2023-12-13 11:23:54 +01:00
|
|
|
|
EMOJI: EMOJI,
|
|
|
|
|
|
EMOJI_VARIATION: EMOJI_VARIATION$1,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
LETTER: LETTER,
|
2023-12-13 11:23:54 +01:00
|
|
|
|
SPACE: SPACE
|
|
|
|
|
|
});
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
The scanner provides an interface that takes a string of text as input, and
|
|
|
|
|
|
outputs an array of tokens instances that can be used for easy URL parsing.
|
2024-07-24 13:06:03 +02:00
|
|
|
|
*/
|
2025-05-07 10:40:03 +02:00
|
|
|
|
|
|
|
|
|
|
const CR = '\r'; // carriage-return character
|
|
|
|
|
|
const LF = '\n'; // line-feed character
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const EMOJI_VARIATION = '\ufe0f'; // Variation selector, follows heart and others
|
|
|
|
|
|
const EMOJI_JOINER = '\u200d'; // zero-width joiner
|
2025-05-07 10:40:03 +02:00
|
|
|
|
const OBJECT_REPLACEMENT = '\ufffc'; // whitespace placeholder that sometimes appears in rich text editors
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
let tlds = null,
|
2024-07-24 13:06:03 +02:00
|
|
|
|
utlds = null; // don't change so only have to be computed once
|
2022-01-19 15:03:45 +01:00
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* Scanner output token:
|
|
|
|
|
|
* - `t` is the token name (e.g., 'NUM', 'EMOJI', 'TLD')
|
|
|
|
|
|
* - `v` is the value of the token (e.g., '123', '❤️', 'com')
|
|
|
|
|
|
* - `s` is the start index of the token in the original string
|
|
|
|
|
|
* - `e` is the end index of the token in the original string
|
|
|
|
|
|
* @typedef {{t: string, v: string, s: number, e: number}} Token
|
2021-05-28 16:46:29 +02:00
|
|
|
|
*/
|
|
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
/**
|
|
|
|
|
|
* @template T
|
|
|
|
|
|
* @typedef {{ [collection: string]: T[] }} Collections
|
|
|
|
|
|
*/
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
/**
|
|
|
|
|
|
* Initialize the scanner character-based state machine for the given start
|
|
|
|
|
|
* state
|
|
|
|
|
|
* @param {[string, boolean][]} customSchemes List of custom schemes, where each
|
|
|
|
|
|
* item is a length-2 tuple with the first element set to the string scheme, and
|
|
|
|
|
|
* the second element set to `true` if the `://` after the scheme is optional
|
|
|
|
|
|
*/
|
2025-05-07 10:40:03 +02:00
|
|
|
|
function init$2(customSchemes = []) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
// Frequently used states (name argument removed during minification)
|
|
|
|
|
|
/** @type Collections<string> */
|
|
|
|
|
|
const groups = {}; // of tokens
|
|
|
|
|
|
State.groups = groups;
|
|
|
|
|
|
/** @type State<string> */
|
|
|
|
|
|
const Start = new State();
|
|
|
|
|
|
if (tlds == null) {
|
|
|
|
|
|
tlds = decodeTlds(encodedTlds);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (utlds == null) {
|
|
|
|
|
|
utlds = decodeTlds(encodedUtlds);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// States for special URL symbols that accept immediately after start
|
|
|
|
|
|
tt(Start, "'", APOSTROPHE);
|
|
|
|
|
|
tt(Start, '{', OPENBRACE);
|
|
|
|
|
|
tt(Start, '}', CLOSEBRACE);
|
|
|
|
|
|
tt(Start, '[', OPENBRACKET);
|
|
|
|
|
|
tt(Start, ']', CLOSEBRACKET);
|
|
|
|
|
|
tt(Start, '(', OPENPAREN);
|
|
|
|
|
|
tt(Start, ')', CLOSEPAREN);
|
|
|
|
|
|
tt(Start, '<', OPENANGLEBRACKET);
|
|
|
|
|
|
tt(Start, '>', CLOSEANGLEBRACKET);
|
|
|
|
|
|
tt(Start, '(', FULLWIDTHLEFTPAREN);
|
|
|
|
|
|
tt(Start, ')', FULLWIDTHRIGHTPAREN);
|
|
|
|
|
|
tt(Start, '「', LEFTCORNERBRACKET);
|
|
|
|
|
|
tt(Start, '」', RIGHTCORNERBRACKET);
|
|
|
|
|
|
tt(Start, '『', LEFTWHITECORNERBRACKET);
|
|
|
|
|
|
tt(Start, '』', RIGHTWHITECORNERBRACKET);
|
|
|
|
|
|
tt(Start, '<', FULLWIDTHLESSTHAN);
|
|
|
|
|
|
tt(Start, '>', FULLWIDTHGREATERTHAN);
|
|
|
|
|
|
tt(Start, '&', AMPERSAND);
|
|
|
|
|
|
tt(Start, '*', ASTERISK);
|
|
|
|
|
|
tt(Start, '@', AT);
|
|
|
|
|
|
tt(Start, '`', BACKTICK);
|
|
|
|
|
|
tt(Start, '^', CARET);
|
|
|
|
|
|
tt(Start, ':', COLON);
|
|
|
|
|
|
tt(Start, ',', COMMA);
|
|
|
|
|
|
tt(Start, '$', DOLLAR);
|
|
|
|
|
|
tt(Start, '.', DOT);
|
|
|
|
|
|
tt(Start, '=', EQUALS);
|
|
|
|
|
|
tt(Start, '!', EXCLAMATION);
|
|
|
|
|
|
tt(Start, '-', HYPHEN);
|
|
|
|
|
|
tt(Start, '%', PERCENT);
|
|
|
|
|
|
tt(Start, '|', PIPE);
|
|
|
|
|
|
tt(Start, '+', PLUS);
|
|
|
|
|
|
tt(Start, '#', POUND);
|
|
|
|
|
|
tt(Start, '?', QUERY);
|
|
|
|
|
|
tt(Start, '"', QUOTE);
|
|
|
|
|
|
tt(Start, '/', SLASH);
|
|
|
|
|
|
tt(Start, ';', SEMI);
|
|
|
|
|
|
tt(Start, '~', TILDE);
|
|
|
|
|
|
tt(Start, '_', UNDERSCORE);
|
|
|
|
|
|
tt(Start, '\\', BACKSLASH);
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tt(Start, '・', FULLWIDTHMIDDLEDOT);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
const Num = tr(Start, DIGIT, NUM, {
|
|
|
|
|
|
[numeric]: true
|
|
|
|
|
|
});
|
|
|
|
|
|
tr(Num, DIGIT, Num);
|
2025-05-07 10:40:03 +02:00
|
|
|
|
const Asciinumeric = tr(Num, ASCII_LETTER, ASCIINUMERICAL, {
|
|
|
|
|
|
[asciinumeric]: true
|
|
|
|
|
|
});
|
|
|
|
|
|
const Alphanumeric = tr(Num, LETTER, ALPHANUMERICAL, {
|
|
|
|
|
|
[alphanumeric]: true
|
|
|
|
|
|
});
|
2024-07-24 13:06:03 +02:00
|
|
|
|
|
|
|
|
|
|
// State which emits a word token
|
|
|
|
|
|
const Word = tr(Start, ASCII_LETTER, WORD, {
|
|
|
|
|
|
[ascii]: true
|
|
|
|
|
|
});
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tr(Word, DIGIT, Asciinumeric);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
tr(Word, ASCII_LETTER, Word);
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tr(Asciinumeric, DIGIT, Asciinumeric);
|
|
|
|
|
|
tr(Asciinumeric, ASCII_LETTER, Asciinumeric);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
|
|
|
|
|
|
// Same as previous, but specific to non-fsm.ascii alphabet words
|
|
|
|
|
|
const UWord = tr(Start, LETTER, UWORD, {
|
|
|
|
|
|
[alpha]: true
|
|
|
|
|
|
});
|
|
|
|
|
|
tr(UWord, ASCII_LETTER); // Non-accepting
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tr(UWord, DIGIT, Alphanumeric);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
tr(UWord, LETTER, UWord);
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tr(Alphanumeric, DIGIT, Alphanumeric);
|
|
|
|
|
|
tr(Alphanumeric, ASCII_LETTER); // Non-accepting
|
|
|
|
|
|
tr(Alphanumeric, LETTER, Alphanumeric); // Non-accepting
|
2024-07-24 13:06:03 +02:00
|
|
|
|
|
|
|
|
|
|
// Whitespace jumps
|
|
|
|
|
|
// Tokens of only non-newline whitespace are arbitrarily long
|
|
|
|
|
|
// If any whitespace except newline, more whitespace!
|
2025-05-07 10:40:03 +02:00
|
|
|
|
const Nl = tt(Start, LF, NL, {
|
|
|
|
|
|
[whitespace]: true
|
|
|
|
|
|
});
|
|
|
|
|
|
const Cr = tt(Start, CR, WS, {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
[whitespace]: true
|
|
|
|
|
|
});
|
2025-05-07 10:40:03 +02:00
|
|
|
|
const Ws = tr(Start, SPACE, WS, {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
[whitespace]: true
|
|
|
|
|
|
});
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tt(Start, OBJECT_REPLACEMENT, Ws);
|
|
|
|
|
|
tt(Cr, LF, Nl); // \r\n
|
|
|
|
|
|
tt(Cr, OBJECT_REPLACEMENT, Ws);
|
|
|
|
|
|
tr(Cr, SPACE, Ws);
|
|
|
|
|
|
tt(Ws, CR); // non-accepting state to avoid mixing whitespaces
|
|
|
|
|
|
tt(Ws, LF); // non-accepting state to avoid mixing whitespaces
|
2024-07-24 13:06:03 +02:00
|
|
|
|
tr(Ws, SPACE, Ws);
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tt(Ws, OBJECT_REPLACEMENT, Ws);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
|
|
|
|
|
|
// Emoji tokens. They are not grouped by the scanner except in cases where a
|
|
|
|
|
|
// zero-width joiner is present
|
|
|
|
|
|
const Emoji = tr(Start, EMOJI, EMOJI$1, {
|
|
|
|
|
|
[emoji]: true
|
|
|
|
|
|
});
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tt(Emoji, '#'); // no transition, emoji regex seems to match #
|
2024-07-24 13:06:03 +02:00
|
|
|
|
tr(Emoji, EMOJI, Emoji);
|
|
|
|
|
|
tt(Emoji, EMOJI_VARIATION, Emoji);
|
|
|
|
|
|
// tt(Start, EMOJI_VARIATION, Emoji); // This one is sketchy
|
|
|
|
|
|
|
|
|
|
|
|
const EmojiJoiner = tt(Emoji, EMOJI_JOINER);
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tt(EmojiJoiner, '#');
|
2024-07-24 13:06:03 +02:00
|
|
|
|
tr(EmojiJoiner, EMOJI, Emoji);
|
|
|
|
|
|
// tt(EmojiJoiner, EMOJI_VARIATION, Emoji); // also sketchy
|
|
|
|
|
|
|
|
|
|
|
|
// Generates states for top-level domains
|
|
|
|
|
|
// Note that this is most accurate when tlds are in alphabetical order
|
2025-05-07 10:40:03 +02:00
|
|
|
|
const wordjr = [[ASCII_LETTER, Word], [DIGIT, Asciinumeric]];
|
|
|
|
|
|
const uwordjr = [[ASCII_LETTER, null], [LETTER, UWord], [DIGIT, Alphanumeric]];
|
2024-07-24 13:06:03 +02:00
|
|
|
|
for (let i = 0; i < tlds.length; i++) {
|
|
|
|
|
|
fastts(Start, tlds[i], TLD, WORD, wordjr);
|
|
|
|
|
|
}
|
|
|
|
|
|
for (let i = 0; i < utlds.length; i++) {
|
|
|
|
|
|
fastts(Start, utlds[i], UTLD, UWORD, uwordjr);
|
|
|
|
|
|
}
|
|
|
|
|
|
addToGroups(TLD, {
|
|
|
|
|
|
tld: true,
|
|
|
|
|
|
ascii: true
|
|
|
|
|
|
}, groups);
|
|
|
|
|
|
addToGroups(UTLD, {
|
|
|
|
|
|
utld: true,
|
|
|
|
|
|
alpha: true
|
|
|
|
|
|
}, groups);
|
|
|
|
|
|
|
|
|
|
|
|
// Collect the states generated by different protocols. NOTE: If any new TLDs
|
|
|
|
|
|
// get added that are also protocols, set the token to be the same as the
|
|
|
|
|
|
// protocol to ensure parsing works as expected.
|
|
|
|
|
|
fastts(Start, 'file', SCHEME, WORD, wordjr);
|
|
|
|
|
|
fastts(Start, 'mailto', SCHEME, WORD, wordjr);
|
|
|
|
|
|
fastts(Start, 'http', SLASH_SCHEME, WORD, wordjr);
|
|
|
|
|
|
fastts(Start, 'https', SLASH_SCHEME, WORD, wordjr);
|
|
|
|
|
|
fastts(Start, 'ftp', SLASH_SCHEME, WORD, wordjr);
|
|
|
|
|
|
fastts(Start, 'ftps', SLASH_SCHEME, WORD, wordjr);
|
|
|
|
|
|
addToGroups(SCHEME, {
|
|
|
|
|
|
scheme: true,
|
|
|
|
|
|
ascii: true
|
|
|
|
|
|
}, groups);
|
|
|
|
|
|
addToGroups(SLASH_SCHEME, {
|
|
|
|
|
|
slashscheme: true,
|
|
|
|
|
|
ascii: true
|
|
|
|
|
|
}, groups);
|
|
|
|
|
|
|
|
|
|
|
|
// Register custom schemes. Assumes each scheme is asciinumeric with hyphens
|
|
|
|
|
|
customSchemes = customSchemes.sort((a, b) => a[0] > b[0] ? 1 : -1);
|
|
|
|
|
|
for (let i = 0; i < customSchemes.length; i++) {
|
|
|
|
|
|
const sch = customSchemes[i][0];
|
|
|
|
|
|
const optionalSlashSlash = customSchemes[i][1];
|
|
|
|
|
|
const flags = optionalSlashSlash ? {
|
|
|
|
|
|
[scheme]: true
|
|
|
|
|
|
} : {
|
|
|
|
|
|
[slashscheme]: true
|
|
|
|
|
|
};
|
|
|
|
|
|
if (sch.indexOf('-') >= 0) {
|
|
|
|
|
|
flags[domain] = true;
|
|
|
|
|
|
} else if (!ASCII_LETTER.test(sch)) {
|
|
|
|
|
|
flags[numeric] = true; // numbers only
|
|
|
|
|
|
} else if (DIGIT.test(sch)) {
|
|
|
|
|
|
flags[asciinumeric] = true;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
flags[ascii] = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
ts(Start, sch, sch, flags);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Localhost token
|
|
|
|
|
|
ts(Start, 'localhost', LOCALHOST, {
|
|
|
|
|
|
ascii: true
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
// Set default transition for start state (some symbol)
|
|
|
|
|
|
Start.jd = new State(SYM);
|
|
|
|
|
|
return {
|
|
|
|
|
|
start: Start,
|
2025-07-30 10:32:43 +02:00
|
|
|
|
tokens: Object.assign({
|
2024-07-24 13:06:03 +02:00
|
|
|
|
groups
|
|
|
|
|
|
}, tk)
|
|
|
|
|
|
};
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
|
|
|
|
|
Given a string, returns an array of TOKEN instances representing the
|
|
|
|
|
|
composition of that string.
|
|
|
|
|
|
|
|
|
|
|
|
@method run
|
2023-12-13 11:23:54 +01:00
|
|
|
|
@param {State<string>} start scanner starting state
|
2021-05-28 16:46:29 +02:00
|
|
|
|
@param {string} str input string to scan
|
2023-12-13 11:23:54 +01:00
|
|
|
|
@return {Token[]} list of tokens, each with a type and value
|
2024-07-24 13:06:03 +02:00
|
|
|
|
*/
|
2021-05-28 16:46:29 +02:00
|
|
|
|
function run$1(start, str) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
// State machine is not case sensitive, so input is tokenized in lowercased
|
|
|
|
|
|
// form (still returns regular case). Uses selective `toLowerCase` because
|
|
|
|
|
|
// lowercasing the entire string causes the length and character position to
|
|
|
|
|
|
// vary in some non-English strings with V8-based runtimes.
|
|
|
|
|
|
const iterable = stringToArray(str.replace(/[A-Z]/g, c => c.toLowerCase()));
|
|
|
|
|
|
const charCount = iterable.length; // <= len if there are emojis, etc
|
|
|
|
|
|
const tokens = []; // return value
|
|
|
|
|
|
|
|
|
|
|
|
// cursor through the string itself, accounting for characters that have
|
|
|
|
|
|
// width with length 2 such as emojis
|
|
|
|
|
|
let cursor = 0;
|
|
|
|
|
|
|
|
|
|
|
|
// Cursor through the array-representation of the string
|
|
|
|
|
|
let charCursor = 0;
|
|
|
|
|
|
|
|
|
|
|
|
// Tokenize the string
|
|
|
|
|
|
while (charCursor < charCount) {
|
|
|
|
|
|
let state = start;
|
|
|
|
|
|
let nextState = null;
|
|
|
|
|
|
let tokenLength = 0;
|
|
|
|
|
|
let latestAccepting = null;
|
|
|
|
|
|
let sinceAccepts = -1;
|
|
|
|
|
|
let charsSinceAccepts = -1;
|
|
|
|
|
|
while (charCursor < charCount && (nextState = state.go(iterable[charCursor]))) {
|
|
|
|
|
|
state = nextState;
|
|
|
|
|
|
|
|
|
|
|
|
// Keep track of the latest accepting state
|
|
|
|
|
|
if (state.accepts()) {
|
|
|
|
|
|
sinceAccepts = 0;
|
|
|
|
|
|
charsSinceAccepts = 0;
|
|
|
|
|
|
latestAccepting = state;
|
|
|
|
|
|
} else if (sinceAccepts >= 0) {
|
|
|
|
|
|
sinceAccepts += iterable[charCursor].length;
|
|
|
|
|
|
charsSinceAccepts++;
|
|
|
|
|
|
}
|
|
|
|
|
|
tokenLength += iterable[charCursor].length;
|
|
|
|
|
|
cursor += iterable[charCursor].length;
|
|
|
|
|
|
charCursor++;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Roll back to the latest accepting state
|
|
|
|
|
|
cursor -= sinceAccepts;
|
|
|
|
|
|
charCursor -= charsSinceAccepts;
|
|
|
|
|
|
tokenLength -= sinceAccepts;
|
|
|
|
|
|
|
|
|
|
|
|
// No more jumps, just make a new token from the last accepting one
|
|
|
|
|
|
tokens.push({
|
|
|
|
|
|
t: latestAccepting.t,
|
|
|
|
|
|
// token type/name
|
|
|
|
|
|
v: str.slice(cursor - tokenLength, cursor),
|
|
|
|
|
|
// string value
|
|
|
|
|
|
s: cursor - tokenLength,
|
|
|
|
|
|
// start index
|
|
|
|
|
|
e: cursor // end index (excluding)
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
return tokens;
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
2022-01-19 15:03:45 +01:00
|
|
|
|
/**
|
|
|
|
|
|
* Convert a String to an Array of characters, taking into account that some
|
|
|
|
|
|
* characters like emojis take up two string indexes.
|
|
|
|
|
|
*
|
|
|
|
|
|
* Adapted from core-js (MIT license)
|
|
|
|
|
|
* https://github.com/zloirock/core-js/blob/2d69cf5f99ab3ea3463c395df81e5a15b68f49d9/packages/core-js/internals/string-multibyte.js
|
|
|
|
|
|
*
|
|
|
|
|
|
* @function stringToArray
|
|
|
|
|
|
* @param {string} str
|
|
|
|
|
|
* @returns {string[]}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function stringToArray(str) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
const result = [];
|
|
|
|
|
|
const len = str.length;
|
|
|
|
|
|
let index = 0;
|
|
|
|
|
|
while (index < len) {
|
|
|
|
|
|
let first = str.charCodeAt(index);
|
|
|
|
|
|
let second;
|
|
|
|
|
|
let char = first < 0xd800 || first > 0xdbff || index + 1 === len || (second = str.charCodeAt(index + 1)) < 0xdc00 || second > 0xdfff ? str[index] // single character
|
|
|
|
|
|
: str.slice(index, index + 2); // two-index characters
|
|
|
|
|
|
result.push(char);
|
|
|
|
|
|
index += char.length;
|
|
|
|
|
|
}
|
|
|
|
|
|
return result;
|
2022-01-19 15:03:45 +01:00
|
|
|
|
}
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
/**
|
|
|
|
|
|
* Fast version of ts function for when transition defaults are well known
|
|
|
|
|
|
* @param {State<string>} state
|
|
|
|
|
|
* @param {string} input
|
|
|
|
|
|
* @param {string} t
|
|
|
|
|
|
* @param {string} defaultt
|
|
|
|
|
|
* @param {[RegExp, State<string>][]} jr
|
|
|
|
|
|
* @returns {State<string>}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function fastts(state, input, t, defaultt, jr) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
let next;
|
|
|
|
|
|
const len = input.length;
|
|
|
|
|
|
for (let i = 0; i < len - 1; i++) {
|
|
|
|
|
|
const char = input[i];
|
|
|
|
|
|
if (state.j[char]) {
|
|
|
|
|
|
next = state.j[char];
|
|
|
|
|
|
} else {
|
|
|
|
|
|
next = new State(defaultt);
|
|
|
|
|
|
next.jr = jr.slice();
|
|
|
|
|
|
state.j[char] = next;
|
|
|
|
|
|
}
|
|
|
|
|
|
state = next;
|
|
|
|
|
|
}
|
|
|
|
|
|
next = new State(t);
|
|
|
|
|
|
next.jr = jr.slice();
|
|
|
|
|
|
state.j[input[len - 1]] = next;
|
|
|
|
|
|
return next;
|
2023-12-13 11:23:54 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Converts a string of Top-Level Domain names encoded in update-tlds.js back
|
|
|
|
|
|
* into a list of strings.
|
|
|
|
|
|
* @param {str} encoded encoded TLDs string
|
|
|
|
|
|
* @returns {str[]} original TLDs list
|
|
|
|
|
|
*/
|
|
|
|
|
|
function decodeTlds(encoded) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
const words = [];
|
|
|
|
|
|
const stack = [];
|
|
|
|
|
|
let i = 0;
|
|
|
|
|
|
let digits = '0123456789';
|
|
|
|
|
|
while (i < encoded.length) {
|
|
|
|
|
|
let popDigitCount = 0;
|
|
|
|
|
|
while (digits.indexOf(encoded[i + popDigitCount]) >= 0) {
|
|
|
|
|
|
popDigitCount++; // encountered some digits, have to pop to go one level up trie
|
|
|
|
|
|
}
|
|
|
|
|
|
if (popDigitCount > 0) {
|
|
|
|
|
|
words.push(stack.join('')); // whatever preceded the pop digits must be a word
|
|
|
|
|
|
for (let popCount = parseInt(encoded.substring(i, i + popDigitCount), 10); popCount > 0; popCount--) {
|
|
|
|
|
|
stack.pop();
|
|
|
|
|
|
}
|
|
|
|
|
|
i += popDigitCount;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
stack.push(encoded[i]); // drop down a level into the trie
|
|
|
|
|
|
i++;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return words;
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2022-01-19 15:03:45 +01:00
|
|
|
|
/**
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* An object where each key is a valid DOM Event Name such as `click` or `focus`
|
|
|
|
|
|
* and each value is an event handler function.
|
|
|
|
|
|
*
|
|
|
|
|
|
* https://developer.mozilla.org/en-US/docs/Web/API/Element#events
|
|
|
|
|
|
* @typedef {?{ [event: string]: Function }} EventListeners
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* All formatted properties required to render a link, including `tagName`,
|
|
|
|
|
|
* `attributes`, `content` and `eventListeners`.
|
|
|
|
|
|
* @typedef {{ tagName: any, attributes: {[attr: string]: any}, content: string,
|
|
|
|
|
|
* eventListeners: EventListeners }} IntermediateRepresentation
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Specify either an object described by the template type `O` or a function.
|
|
|
|
|
|
*
|
|
|
|
|
|
* The function takes a string value (usually the link's href attribute), the
|
|
|
|
|
|
* link type (`'url'`, `'hashtag`', etc.) and an internal token representation
|
|
|
|
|
|
* of the link. It should return an object of the template type `O`
|
|
|
|
|
|
* @template O
|
|
|
|
|
|
* @typedef {O | ((value: string, type: string, token: MultiToken) => O)} OptObj
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Specify either a function described by template type `F` or an object.
|
|
|
|
|
|
*
|
|
|
|
|
|
* Each key in the object should be a link type (`'url'`, `'hashtag`', etc.). Each
|
|
|
|
|
|
* value should be a function with template type `F` that is called when the
|
|
|
|
|
|
* corresponding link type is encountered.
|
|
|
|
|
|
* @template F
|
|
|
|
|
|
* @typedef {F | { [type: string]: F}} OptFn
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Specify either a value with template type `V`, a function that returns `V` or
|
|
|
|
|
|
* an object where each value resolves to `V`.
|
|
|
|
|
|
*
|
|
|
|
|
|
* The function takes a string value (usually the link's href attribute), the
|
|
|
|
|
|
* link type (`'url'`, `'hashtag`', etc.) and an internal token representation
|
|
|
|
|
|
* of the link. It should return an object of the template type `V`
|
|
|
|
|
|
*
|
|
|
|
|
|
* For the object, each key should be a link type (`'url'`, `'hashtag`', etc.).
|
|
|
|
|
|
* Each value should either have type `V` or a function that returns V. This
|
|
|
|
|
|
* function similarly takes a string value and a token.
|
|
|
|
|
|
*
|
|
|
|
|
|
* Example valid types for `Opt<string>`:
|
|
|
|
|
|
*
|
|
|
|
|
|
* ```js
|
|
|
|
|
|
* 'hello'
|
|
|
|
|
|
* (value, type, token) => 'world'
|
|
|
|
|
|
* { url: 'hello', email: (value, token) => 'world'}
|
|
|
|
|
|
* ```
|
|
|
|
|
|
* @template V
|
|
|
|
|
|
* @typedef {V | ((value: string, type: string, token: MultiToken) => V) | { [type: string]: V | ((value: string, token: MultiToken) => V) }} Opt
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* See available options: https://linkify.js.org/docs/options.html
|
|
|
|
|
|
* @typedef {{
|
|
|
|
|
|
* defaultProtocol?: string,
|
|
|
|
|
|
* events?: OptObj<EventListeners>,
|
|
|
|
|
|
* format?: Opt<string>,
|
|
|
|
|
|
* formatHref?: Opt<string>,
|
|
|
|
|
|
* nl2br?: boolean,
|
|
|
|
|
|
* tagName?: Opt<any>,
|
|
|
|
|
|
* target?: Opt<string>,
|
|
|
|
|
|
* rel?: Opt<string>,
|
|
|
|
|
|
* validate?: Opt<boolean>,
|
|
|
|
|
|
* truncate?: Opt<number>,
|
|
|
|
|
|
* className?: Opt<string>,
|
|
|
|
|
|
* attributes?: OptObj<({ [attr: string]: any })>,
|
|
|
|
|
|
* ignoreTags?: string[],
|
|
|
|
|
|
* render?: OptFn<((ir: IntermediateRepresentation) => any)>
|
|
|
|
|
|
* }} Opts
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @type Required<Opts>
|
2022-01-19 15:03:45 +01:00
|
|
|
|
*/
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const defaults = {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
defaultProtocol: 'http',
|
|
|
|
|
|
events: null,
|
|
|
|
|
|
format: noop,
|
|
|
|
|
|
formatHref: noop,
|
|
|
|
|
|
nl2br: false,
|
|
|
|
|
|
tagName: 'a',
|
|
|
|
|
|
target: null,
|
|
|
|
|
|
rel: null,
|
|
|
|
|
|
validate: true,
|
|
|
|
|
|
truncate: Infinity,
|
|
|
|
|
|
className: null,
|
|
|
|
|
|
attributes: null,
|
|
|
|
|
|
ignoreTags: [],
|
|
|
|
|
|
render: null
|
2021-05-28 16:46:29 +02:00
|
|
|
|
};
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
2022-01-19 15:03:45 +01:00
|
|
|
|
/**
|
2024-07-24 13:06:03 +02:00
|
|
|
|
* Utility class for linkify interfaces to apply specified
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* {@link Opts formatting and rendering options}.
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {Opts | Options} [opts] Option value overrides.
|
|
|
|
|
|
* @param {(ir: IntermediateRepresentation) => any} [defaultRender] (For
|
|
|
|
|
|
* internal use) default render function that determines how to generate an
|
|
|
|
|
|
* HTML element based on a link token's derived tagName, attributes and HTML.
|
|
|
|
|
|
* Similar to render option
|
2022-01-19 15:03:45 +01:00
|
|
|
|
*/
|
2025-05-07 10:40:03 +02:00
|
|
|
|
function Options(opts, defaultRender = null) {
|
2025-07-30 10:32:43 +02:00
|
|
|
|
let o = Object.assign({}, defaults);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
if (opts) {
|
2025-07-30 10:32:43 +02:00
|
|
|
|
o = Object.assign(o, opts instanceof Options ? opts.o : opts);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Ensure all ignored tags are uppercase
|
|
|
|
|
|
const ignoredTags = o.ignoreTags;
|
|
|
|
|
|
const uppercaseIgnoredTags = [];
|
|
|
|
|
|
for (let i = 0; i < ignoredTags.length; i++) {
|
|
|
|
|
|
uppercaseIgnoredTags.push(ignoredTags[i].toUpperCase());
|
|
|
|
|
|
}
|
|
|
|
|
|
/** @protected */
|
|
|
|
|
|
this.o = o;
|
|
|
|
|
|
if (defaultRender) {
|
|
|
|
|
|
this.defaultRender = defaultRender;
|
|
|
|
|
|
}
|
|
|
|
|
|
this.ignoreTags = uppercaseIgnoredTags;
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
Options.prototype = {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
o: defaults,
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @type string[]
|
|
|
|
|
|
*/
|
|
|
|
|
|
ignoreTags: [],
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @param {IntermediateRepresentation} ir
|
|
|
|
|
|
* @returns {any}
|
|
|
|
|
|
*/
|
|
|
|
|
|
defaultRender(ir) {
|
|
|
|
|
|
return ir;
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Returns true or false based on whether a token should be displayed as a
|
|
|
|
|
|
* link based on the user options.
|
|
|
|
|
|
* @param {MultiToken} token
|
|
|
|
|
|
* @returns {boolean}
|
|
|
|
|
|
*/
|
|
|
|
|
|
check(token) {
|
|
|
|
|
|
return this.get('validate', token.toString(), token);
|
|
|
|
|
|
},
|
|
|
|
|
|
// Private methods
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Resolve an option's value based on the value of the option and the given
|
|
|
|
|
|
* params. If operator and token are specified and the target option is
|
|
|
|
|
|
* callable, automatically calls the function with the given argument.
|
|
|
|
|
|
* @template {keyof Opts} K
|
|
|
|
|
|
* @param {K} key Name of option to use
|
|
|
|
|
|
* @param {string} [operator] will be passed to the target option if it's a
|
|
|
|
|
|
* function. If not specified, RAW function value gets returned
|
|
|
|
|
|
* @param {MultiToken} [token] The token from linkify.tokenize
|
|
|
|
|
|
* @returns {Opts[K] | any}
|
|
|
|
|
|
*/
|
|
|
|
|
|
get(key, operator, token) {
|
|
|
|
|
|
const isCallable = operator != null;
|
|
|
|
|
|
let option = this.o[key];
|
|
|
|
|
|
if (!option) {
|
|
|
|
|
|
return option;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (typeof option === 'object') {
|
|
|
|
|
|
option = token.t in option ? option[token.t] : defaults[key];
|
|
|
|
|
|
if (typeof option === 'function' && isCallable) {
|
|
|
|
|
|
option = option(operator, token);
|
|
|
|
|
|
}
|
|
|
|
|
|
} else if (typeof option === 'function' && isCallable) {
|
|
|
|
|
|
option = option(operator, token.t, token);
|
|
|
|
|
|
}
|
|
|
|
|
|
return option;
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @template {keyof Opts} L
|
|
|
|
|
|
* @param {L} key Name of options object to use
|
|
|
|
|
|
* @param {string} [operator]
|
|
|
|
|
|
* @param {MultiToken} [token]
|
|
|
|
|
|
* @returns {Opts[L] | any}
|
|
|
|
|
|
*/
|
|
|
|
|
|
getObj(key, operator, token) {
|
|
|
|
|
|
let obj = this.o[key];
|
|
|
|
|
|
if (typeof obj === 'function' && operator != null) {
|
|
|
|
|
|
obj = obj(operator, token.t, token);
|
|
|
|
|
|
}
|
|
|
|
|
|
return obj;
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Convert the given token to a rendered element that may be added to the
|
|
|
|
|
|
* calling-interface's DOM
|
|
|
|
|
|
* @param {MultiToken} token Token to render to an HTML element
|
|
|
|
|
|
* @returns {any} Render result; e.g., HTML string, DOM element, React
|
|
|
|
|
|
* Component, etc.
|
|
|
|
|
|
*/
|
|
|
|
|
|
render(token) {
|
|
|
|
|
|
const ir = token.render(this); // intermediate representation
|
|
|
|
|
|
const renderFn = this.get('render', null, token) || this.defaultRender;
|
|
|
|
|
|
return renderFn(ir, token.t, token);
|
|
|
|
|
|
}
|
2021-05-28 16:46:29 +02:00
|
|
|
|
};
|
|
|
|
|
|
function noop(val) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
return val;
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var options = /*#__PURE__*/Object.freeze({
|
|
|
|
|
|
__proto__: null,
|
2023-12-13 11:23:54 +01:00
|
|
|
|
Options: Options,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
defaults: defaults
|
2021-05-28 16:46:29 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
/******************************************************************************
|
|
|
|
|
|
Multi-Tokens
|
|
|
|
|
|
Tokens composed of arrays of TextTokens
|
2024-07-24 13:06:03 +02:00
|
|
|
|
******************************************************************************/
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
|
|
|
|
|
/**
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @param {string} value
|
|
|
|
|
|
* @param {Token[]} tokens
|
|
|
|
|
|
*/
|
|
|
|
|
|
function MultiToken(value, tokens) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
this.t = 'token';
|
|
|
|
|
|
this.v = value;
|
|
|
|
|
|
this.tk = tokens;
|
2023-12-13 11:23:54 +01:00
|
|
|
|
}
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
/**
|
|
|
|
|
|
* Abstract class used for manufacturing tokens of text tokens. That is rather
|
|
|
|
|
|
* than the value for a token being a small string of text, it's value an array
|
|
|
|
|
|
* of text tokens.
|
|
|
|
|
|
*
|
|
|
|
|
|
* Used for grouping together URLs, emails, hashtags, and other potential
|
|
|
|
|
|
* creations.
|
|
|
|
|
|
* @class MultiToken
|
|
|
|
|
|
* @property {string} t
|
|
|
|
|
|
* @property {string} v
|
|
|
|
|
|
* @property {Token[]} tk
|
|
|
|
|
|
* @abstract
|
|
|
|
|
|
*/
|
2021-05-28 16:46:29 +02:00
|
|
|
|
MultiToken.prototype = {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
isLink: false,
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Return the string this token represents.
|
|
|
|
|
|
* @return {string}
|
|
|
|
|
|
*/
|
|
|
|
|
|
toString() {
|
|
|
|
|
|
return this.v;
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* What should the value for this token be in the `href` HTML attribute?
|
|
|
|
|
|
* Returns the `.toString` value by default.
|
|
|
|
|
|
* @param {string} [scheme]
|
|
|
|
|
|
* @return {string}
|
2025-05-07 10:40:03 +02:00
|
|
|
|
*/
|
2024-07-24 13:06:03 +02:00
|
|
|
|
toHref(scheme) {
|
|
|
|
|
|
return this.toString();
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @param {Options} options Formatting options
|
|
|
|
|
|
* @returns {string}
|
|
|
|
|
|
*/
|
|
|
|
|
|
toFormattedString(options) {
|
|
|
|
|
|
const val = this.toString();
|
|
|
|
|
|
const truncate = options.get('truncate', val, this);
|
|
|
|
|
|
const formatted = options.get('format', val, this);
|
|
|
|
|
|
return truncate && formatted.length > truncate ? formatted.substring(0, truncate) + '…' : formatted;
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {Options} options
|
|
|
|
|
|
* @returns {string}
|
|
|
|
|
|
*/
|
|
|
|
|
|
toFormattedHref(options) {
|
|
|
|
|
|
return options.get('formatHref', this.toHref(options.get('defaultProtocol')), this);
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* The start index of this token in the original input string
|
|
|
|
|
|
* @returns {number}
|
|
|
|
|
|
*/
|
|
|
|
|
|
startIndex() {
|
|
|
|
|
|
return this.tk[0].s;
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* The end index of this token in the original input string (up to this
|
|
|
|
|
|
* index but not including it)
|
|
|
|
|
|
* @returns {number}
|
|
|
|
|
|
*/
|
|
|
|
|
|
endIndex() {
|
|
|
|
|
|
return this.tk[this.tk.length - 1].e;
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
2023-12-13 11:23:54 +01:00
|
|
|
|
Returns an object of relevant values for this token, which includes keys
|
2024-07-24 13:06:03 +02:00
|
|
|
|
* type - Kind of token ('url', 'email', etc.)
|
|
|
|
|
|
* value - Original text
|
|
|
|
|
|
* href - The value that should be added to the anchor tag's href
|
2021-05-28 16:46:29 +02:00
|
|
|
|
attribute
|
|
|
|
|
|
@method toObject
|
|
|
|
|
|
@param {string} [protocol] `'http'` by default
|
2024-07-24 13:06:03 +02:00
|
|
|
|
*/
|
2025-05-07 10:40:03 +02:00
|
|
|
|
toObject(protocol = defaults.defaultProtocol) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
return {
|
|
|
|
|
|
type: this.t,
|
|
|
|
|
|
value: this.toString(),
|
|
|
|
|
|
isLink: this.isLink,
|
|
|
|
|
|
href: this.toHref(protocol),
|
|
|
|
|
|
start: this.startIndex(),
|
|
|
|
|
|
end: this.endIndex()
|
|
|
|
|
|
};
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {Options} options Formatting option
|
|
|
|
|
|
*/
|
|
|
|
|
|
toFormattedObject(options) {
|
|
|
|
|
|
return {
|
|
|
|
|
|
type: this.t,
|
|
|
|
|
|
value: this.toFormattedString(options),
|
|
|
|
|
|
isLink: this.isLink,
|
|
|
|
|
|
href: this.toFormattedHref(options),
|
|
|
|
|
|
start: this.startIndex(),
|
|
|
|
|
|
end: this.endIndex()
|
|
|
|
|
|
};
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Whether this token should be rendered as a link according to the given options
|
|
|
|
|
|
* @param {Options} options
|
|
|
|
|
|
* @returns {boolean}
|
|
|
|
|
|
*/
|
|
|
|
|
|
validate(options) {
|
|
|
|
|
|
return options.get('validate', this.toString(), this);
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Return an object that represents how this link should be rendered.
|
|
|
|
|
|
* @param {Options} options Formattinng options
|
|
|
|
|
|
*/
|
|
|
|
|
|
render(options) {
|
|
|
|
|
|
const token = this;
|
|
|
|
|
|
const href = this.toHref(options.get('defaultProtocol'));
|
|
|
|
|
|
const formattedHref = options.get('formatHref', href, this);
|
|
|
|
|
|
const tagName = options.get('tagName', href, token);
|
|
|
|
|
|
const content = this.toFormattedString(options);
|
|
|
|
|
|
const attributes = {};
|
|
|
|
|
|
const className = options.get('className', href, token);
|
|
|
|
|
|
const target = options.get('target', href, token);
|
|
|
|
|
|
const rel = options.get('rel', href, token);
|
|
|
|
|
|
const attrs = options.getObj('attributes', href, token);
|
|
|
|
|
|
const eventListeners = options.getObj('events', href, token);
|
|
|
|
|
|
attributes.href = formattedHref;
|
|
|
|
|
|
if (className) {
|
|
|
|
|
|
attributes.class = className;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (target) {
|
|
|
|
|
|
attributes.target = target;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (rel) {
|
|
|
|
|
|
attributes.rel = rel;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (attrs) {
|
2025-07-30 10:32:43 +02:00
|
|
|
|
Object.assign(attributes, attrs);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
}
|
|
|
|
|
|
return {
|
|
|
|
|
|
tagName,
|
|
|
|
|
|
attributes,
|
|
|
|
|
|
content,
|
|
|
|
|
|
eventListeners
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
2023-12-13 11:23:54 +01:00
|
|
|
|
};
|
|
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
|
|
|
|
|
* Create a new token that can be emitted by the parser state machine
|
|
|
|
|
|
* @param {string} type readable type of the token
|
|
|
|
|
|
* @param {object} props properties to assign or override, including isLink = true or false
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @returns {new (value: string, tokens: Token[]) => MultiToken} new token class
|
2021-05-28 16:46:29 +02:00
|
|
|
|
*/
|
|
|
|
|
|
function createTokenClass(type, props) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
class Token extends MultiToken {
|
|
|
|
|
|
constructor(value, tokens) {
|
|
|
|
|
|
super(value, tokens);
|
|
|
|
|
|
this.t = type;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
for (const p in props) {
|
|
|
|
|
|
Token.prototype[p] = props[p];
|
|
|
|
|
|
}
|
|
|
|
|
|
Token.t = type;
|
|
|
|
|
|
return Token;
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
Represents a list of tokens making up a valid email address
|
2024-07-24 13:06:03 +02:00
|
|
|
|
*/
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const Email = createTokenClass('email', {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
isLink: true,
|
|
|
|
|
|
toHref() {
|
|
|
|
|
|
return 'mailto:' + this.toString();
|
|
|
|
|
|
}
|
2021-05-28 16:46:29 +02:00
|
|
|
|
});
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
|
|
|
|
|
Represents some plain text
|
2024-07-24 13:06:03 +02:00
|
|
|
|
*/
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const Text = createTokenClass('text');
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
Multi-linebreak token - represents a line break
|
|
|
|
|
|
@class Nl
|
2024-07-24 13:06:03 +02:00
|
|
|
|
*/
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const Nl = createTokenClass('nl');
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
Represents a list of text tokens making up a valid URL
|
|
|
|
|
|
@class Url
|
2024-07-24 13:06:03 +02:00
|
|
|
|
*/
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const Url = createTokenClass('url', {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
isLink: true,
|
|
|
|
|
|
/**
|
2021-05-28 16:46:29 +02:00
|
|
|
|
Lowercases relevant parts of the domain and adds the protocol if
|
|
|
|
|
|
required. Note that this will not escape unsafe HTML characters in the
|
|
|
|
|
|
URL.
|
2023-12-13 11:23:54 +01:00
|
|
|
|
@param {string} [scheme] default scheme (e.g., 'https')
|
|
|
|
|
|
@return {string} the full href
|
2024-07-24 13:06:03 +02:00
|
|
|
|
*/
|
2025-05-07 10:40:03 +02:00
|
|
|
|
toHref(scheme = defaults.defaultProtocol) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
// Check if already has a prefix scheme
|
|
|
|
|
|
return this.hasProtocol() ? this.v : `${scheme}://${this.v}`;
|
|
|
|
|
|
},
|
|
|
|
|
|
/**
|
|
|
|
|
|
* Check whether this URL token has a protocol
|
|
|
|
|
|
* @return {boolean}
|
|
|
|
|
|
*/
|
|
|
|
|
|
hasProtocol() {
|
|
|
|
|
|
const tokens = this.tk;
|
|
|
|
|
|
return tokens.length >= 2 && tokens[0].t !== LOCALHOST && tokens[1].t === COLON;
|
|
|
|
|
|
}
|
2021-05-28 16:46:29 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
var multi = /*#__PURE__*/Object.freeze({
|
|
|
|
|
|
__proto__: null,
|
|
|
|
|
|
Base: MultiToken,
|
|
|
|
|
|
Email: Email,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
MultiToken: MultiToken,
|
2021-05-28 16:46:29 +02:00
|
|
|
|
Nl: Nl,
|
2025-05-07 10:40:03 +02:00
|
|
|
|
Text: Text,
|
|
|
|
|
|
Url: Url,
|
|
|
|
|
|
createTokenClass: createTokenClass
|
2021-05-28 16:46:29 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
Not exactly parser, more like the second-stage scanner (although we can
|
|
|
|
|
|
theoretically hotswap the code here with a real parser in the future... but
|
|
|
|
|
|
for a little URL-finding utility abstract syntax trees may be a little
|
|
|
|
|
|
overkill).
|
|
|
|
|
|
|
|
|
|
|
|
URL format: http://en.wikipedia.org/wiki/URI_scheme
|
2023-12-13 11:23:54 +01:00
|
|
|
|
Email format: http://en.wikipedia.org/wiki/EmailAddress (links to RFC in
|
2021-05-28 16:46:29 +02:00
|
|
|
|
reference)
|
|
|
|
|
|
|
|
|
|
|
|
@module linkify
|
|
|
|
|
|
@submodule parser
|
|
|
|
|
|
@main run
|
2024-07-24 13:06:03 +02:00
|
|
|
|
*/
|
2025-05-07 10:40:03 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const makeState = arg => new State(arg);
|
|
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
|
|
|
|
|
* Generate the parser multi token-based state machine
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @param {{ groups: Collections<string> }} tokens
|
2021-05-28 16:46:29 +02:00
|
|
|
|
*/
|
2025-05-07 10:40:03 +02:00
|
|
|
|
function init$1({
|
|
|
|
|
|
groups
|
|
|
|
|
|
}) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
// Types of characters the URL can definitely end in
|
|
|
|
|
|
const qsAccepting = groups.domain.concat([AMPERSAND, ASTERISK, AT, BACKSLASH, BACKTICK, CARET, DOLLAR, EQUALS, HYPHEN, NUM, PERCENT, PIPE, PLUS, POUND, SLASH, SYM, TILDE, UNDERSCORE]);
|
|
|
|
|
|
|
|
|
|
|
|
// Types of tokens that can follow a URL and be part of the query string
|
|
|
|
|
|
// but cannot be the very last characters
|
|
|
|
|
|
// Characters that cannot appear in the URL at all should be excluded
|
2025-05-07 10:40:03 +02:00
|
|
|
|
const qsNonAccepting = [APOSTROPHE, COLON, COMMA, DOT, EXCLAMATION, PERCENT, QUERY, QUOTE, SEMI, OPENANGLEBRACKET, CLOSEANGLEBRACKET, OPENBRACE, CLOSEBRACE, CLOSEBRACKET, OPENBRACKET, OPENPAREN, CLOSEPAREN, FULLWIDTHLEFTPAREN, FULLWIDTHRIGHTPAREN, LEFTCORNERBRACKET, RIGHTCORNERBRACKET, LEFTWHITECORNERBRACKET, RIGHTWHITECORNERBRACKET, FULLWIDTHLESSTHAN, FULLWIDTHGREATERTHAN];
|
2024-07-24 13:06:03 +02:00
|
|
|
|
|
|
|
|
|
|
// For addresses without the mailto prefix
|
|
|
|
|
|
// Tokens allowed in the localpart of the email
|
|
|
|
|
|
const localpartAccepting = [AMPERSAND, APOSTROPHE, ASTERISK, BACKSLASH, BACKTICK, CARET, DOLLAR, EQUALS, HYPHEN, OPENBRACE, CLOSEBRACE, PERCENT, PIPE, PLUS, POUND, QUERY, SLASH, SYM, TILDE, UNDERSCORE];
|
|
|
|
|
|
|
|
|
|
|
|
// The universal starting state.
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @type State<Token>
|
|
|
|
|
|
*/
|
|
|
|
|
|
const Start = makeState();
|
|
|
|
|
|
const Localpart = tt(Start, TILDE); // Local part of the email address
|
|
|
|
|
|
ta(Localpart, localpartAccepting, Localpart);
|
|
|
|
|
|
ta(Localpart, groups.domain, Localpart);
|
|
|
|
|
|
const Domain = makeState(),
|
|
|
|
|
|
Scheme = makeState(),
|
|
|
|
|
|
SlashScheme = makeState();
|
|
|
|
|
|
ta(Start, groups.domain, Domain); // parsed string ends with a potential domain name (A)
|
|
|
|
|
|
ta(Start, groups.scheme, Scheme); // e.g., 'mailto'
|
|
|
|
|
|
ta(Start, groups.slashscheme, SlashScheme); // e.g., 'http'
|
|
|
|
|
|
|
|
|
|
|
|
ta(Domain, localpartAccepting, Localpart);
|
|
|
|
|
|
ta(Domain, groups.domain, Domain);
|
|
|
|
|
|
const LocalpartAt = tt(Domain, AT); // Local part of the email address plus @
|
|
|
|
|
|
|
|
|
|
|
|
tt(Localpart, AT, LocalpartAt); // close to an email address now
|
|
|
|
|
|
|
|
|
|
|
|
// Local part of an email address can be e.g. 'http' or 'mailto'
|
|
|
|
|
|
tt(Scheme, AT, LocalpartAt);
|
|
|
|
|
|
tt(SlashScheme, AT, LocalpartAt);
|
|
|
|
|
|
const LocalpartDot = tt(Localpart, DOT); // Local part of the email address plus '.' (localpart cannot end in .)
|
|
|
|
|
|
ta(LocalpartDot, localpartAccepting, Localpart);
|
|
|
|
|
|
ta(LocalpartDot, groups.domain, Localpart);
|
|
|
|
|
|
const EmailDomain = makeState();
|
|
|
|
|
|
ta(LocalpartAt, groups.domain, EmailDomain); // parsed string starts with local email info + @ with a potential domain name
|
|
|
|
|
|
ta(EmailDomain, groups.domain, EmailDomain);
|
|
|
|
|
|
const EmailDomainDot = tt(EmailDomain, DOT); // domain followed by DOT
|
|
|
|
|
|
ta(EmailDomainDot, groups.domain, EmailDomain);
|
|
|
|
|
|
const Email$1 = makeState(Email); // Possible email address (could have more tlds)
|
|
|
|
|
|
ta(EmailDomainDot, groups.tld, Email$1);
|
|
|
|
|
|
ta(EmailDomainDot, groups.utld, Email$1);
|
|
|
|
|
|
tt(LocalpartAt, LOCALHOST, Email$1);
|
|
|
|
|
|
|
|
|
|
|
|
// Hyphen can jump back to a domain name
|
|
|
|
|
|
const EmailDomainHyphen = tt(EmailDomain, HYPHEN); // parsed string starts with local email info + @ with a potential domain name
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tt(EmailDomainHyphen, HYPHEN, EmailDomainHyphen);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
ta(EmailDomainHyphen, groups.domain, EmailDomain);
|
|
|
|
|
|
ta(Email$1, groups.domain, EmailDomain);
|
|
|
|
|
|
tt(Email$1, DOT, EmailDomainDot);
|
|
|
|
|
|
tt(Email$1, HYPHEN, EmailDomainHyphen);
|
|
|
|
|
|
|
|
|
|
|
|
// Final possible email states
|
|
|
|
|
|
const EmailColon = tt(Email$1, COLON); // URL followed by colon (potential port number here)
|
|
|
|
|
|
/*const EmailColonPort = */
|
|
|
|
|
|
ta(EmailColon, groups.numeric, Email); // URL followed by colon and port number
|
|
|
|
|
|
|
|
|
|
|
|
// Account for dots and hyphens. Hyphens are usually parts of domain names
|
|
|
|
|
|
// (but not TLDs)
|
|
|
|
|
|
const DomainHyphen = tt(Domain, HYPHEN); // domain followed by hyphen
|
|
|
|
|
|
const DomainDot = tt(Domain, DOT); // domain followed by DOT
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tt(DomainHyphen, HYPHEN, DomainHyphen);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
ta(DomainHyphen, groups.domain, Domain);
|
|
|
|
|
|
ta(DomainDot, localpartAccepting, Localpart);
|
|
|
|
|
|
ta(DomainDot, groups.domain, Domain);
|
|
|
|
|
|
const DomainDotTld = makeState(Url); // Simplest possible URL with no query string
|
|
|
|
|
|
ta(DomainDot, groups.tld, DomainDotTld);
|
|
|
|
|
|
ta(DomainDot, groups.utld, DomainDotTld);
|
|
|
|
|
|
ta(DomainDotTld, groups.domain, Domain);
|
|
|
|
|
|
ta(DomainDotTld, localpartAccepting, Localpart);
|
|
|
|
|
|
tt(DomainDotTld, DOT, DomainDot);
|
|
|
|
|
|
tt(DomainDotTld, HYPHEN, DomainHyphen);
|
|
|
|
|
|
tt(DomainDotTld, AT, LocalpartAt);
|
|
|
|
|
|
const DomainDotTldColon = tt(DomainDotTld, COLON); // URL followed by colon (potential port number here)
|
|
|
|
|
|
const DomainDotTldColonPort = makeState(Url); // TLD followed by a port number
|
|
|
|
|
|
ta(DomainDotTldColon, groups.numeric, DomainDotTldColonPort);
|
|
|
|
|
|
|
|
|
|
|
|
// Long URL with optional port and maybe query string
|
|
|
|
|
|
const Url$1 = makeState(Url);
|
|
|
|
|
|
|
|
|
|
|
|
// URL with extra symbols at the end, followed by an opening bracket
|
|
|
|
|
|
const UrlNonaccept = makeState(); // URL followed by some symbols (will not be part of the final URL)
|
|
|
|
|
|
|
|
|
|
|
|
// Query strings
|
|
|
|
|
|
ta(Url$1, qsAccepting, Url$1);
|
|
|
|
|
|
ta(Url$1, qsNonAccepting, UrlNonaccept);
|
|
|
|
|
|
ta(UrlNonaccept, qsAccepting, Url$1);
|
|
|
|
|
|
ta(UrlNonaccept, qsNonAccepting, UrlNonaccept);
|
|
|
|
|
|
|
|
|
|
|
|
// Become real URLs after `SLASH` or `COLON NUM SLASH`
|
|
|
|
|
|
// Here works with or without scheme:// prefix
|
|
|
|
|
|
tt(DomainDotTld, SLASH, Url$1);
|
|
|
|
|
|
tt(DomainDotTldColonPort, SLASH, Url$1);
|
|
|
|
|
|
|
|
|
|
|
|
// Note that domains that begin with schemes are treated slighly differently
|
|
|
|
|
|
const SchemeColon = tt(Scheme, COLON); // e.g., 'mailto:'
|
|
|
|
|
|
const SlashSchemeColon = tt(SlashScheme, COLON); // e.g., 'http:'
|
|
|
|
|
|
const SlashSchemeColonSlash = tt(SlashSchemeColon, SLASH); // e.g., 'http:/'
|
|
|
|
|
|
|
|
|
|
|
|
const UriPrefix = tt(SlashSchemeColonSlash, SLASH); // e.g., 'http://'
|
|
|
|
|
|
|
|
|
|
|
|
// Scheme states can transition to domain states
|
|
|
|
|
|
ta(Scheme, groups.domain, Domain);
|
|
|
|
|
|
tt(Scheme, DOT, DomainDot);
|
|
|
|
|
|
tt(Scheme, HYPHEN, DomainHyphen);
|
|
|
|
|
|
ta(SlashScheme, groups.domain, Domain);
|
|
|
|
|
|
tt(SlashScheme, DOT, DomainDot);
|
|
|
|
|
|
tt(SlashScheme, HYPHEN, DomainHyphen);
|
|
|
|
|
|
|
|
|
|
|
|
// Force URL with scheme prefix followed by anything sane
|
|
|
|
|
|
ta(SchemeColon, groups.domain, Url$1);
|
|
|
|
|
|
tt(SchemeColon, SLASH, Url$1);
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tt(SchemeColon, QUERY, Url$1);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
ta(UriPrefix, groups.domain, Url$1);
|
|
|
|
|
|
ta(UriPrefix, qsAccepting, Url$1);
|
|
|
|
|
|
tt(UriPrefix, SLASH, Url$1);
|
|
|
|
|
|
const bracketPairs = [[OPENBRACE, CLOSEBRACE],
|
|
|
|
|
|
// {}
|
|
|
|
|
|
[OPENBRACKET, CLOSEBRACKET],
|
|
|
|
|
|
// []
|
|
|
|
|
|
[OPENPAREN, CLOSEPAREN],
|
|
|
|
|
|
// ()
|
|
|
|
|
|
[OPENANGLEBRACKET, CLOSEANGLEBRACKET],
|
|
|
|
|
|
// <>
|
|
|
|
|
|
[FULLWIDTHLEFTPAREN, FULLWIDTHRIGHTPAREN],
|
|
|
|
|
|
// ()
|
|
|
|
|
|
[LEFTCORNERBRACKET, RIGHTCORNERBRACKET],
|
|
|
|
|
|
// 「」
|
|
|
|
|
|
[LEFTWHITECORNERBRACKET, RIGHTWHITECORNERBRACKET],
|
|
|
|
|
|
// 『』
|
|
|
|
|
|
[FULLWIDTHLESSTHAN, FULLWIDTHGREATERTHAN] // <>
|
|
|
|
|
|
];
|
|
|
|
|
|
for (let i = 0; i < bracketPairs.length; i++) {
|
|
|
|
|
|
const [OPEN, CLOSE] = bracketPairs[i];
|
|
|
|
|
|
const UrlOpen = tt(Url$1, OPEN); // URL followed by open bracket
|
|
|
|
|
|
|
|
|
|
|
|
// Continue not accepting for open brackets
|
|
|
|
|
|
tt(UrlNonaccept, OPEN, UrlOpen);
|
|
|
|
|
|
|
|
|
|
|
|
// Closing bracket component. This character WILL be included in the URL
|
|
|
|
|
|
tt(UrlOpen, CLOSE, Url$1);
|
|
|
|
|
|
|
|
|
|
|
|
// URL that beings with an opening bracket, followed by a symbols.
|
|
|
|
|
|
// Note that the final state can still be `UrlOpen` (if the URL has a
|
|
|
|
|
|
// single opening bracket for some reason).
|
|
|
|
|
|
const UrlOpenQ = makeState(Url);
|
|
|
|
|
|
ta(UrlOpen, qsAccepting, UrlOpenQ);
|
|
|
|
|
|
const UrlOpenSyms = makeState(); // UrlOpen followed by some symbols it cannot end it
|
|
|
|
|
|
ta(UrlOpen, qsNonAccepting);
|
|
|
|
|
|
|
|
|
|
|
|
// URL that begins with an opening bracket, followed by some symbols
|
|
|
|
|
|
ta(UrlOpenQ, qsAccepting, UrlOpenQ);
|
|
|
|
|
|
ta(UrlOpenQ, qsNonAccepting, UrlOpenSyms);
|
|
|
|
|
|
ta(UrlOpenSyms, qsAccepting, UrlOpenQ);
|
|
|
|
|
|
ta(UrlOpenSyms, qsNonAccepting, UrlOpenSyms);
|
|
|
|
|
|
|
|
|
|
|
|
// Close brace/bracket to become regular URL
|
|
|
|
|
|
tt(UrlOpenQ, CLOSE, Url$1);
|
|
|
|
|
|
tt(UrlOpenSyms, CLOSE, Url$1);
|
|
|
|
|
|
}
|
|
|
|
|
|
tt(Start, LOCALHOST, DomainDotTld); // localhost is a valid URL state
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tt(Start, NL, Nl); // single new line
|
2024-07-24 13:06:03 +02:00
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
start: Start,
|
|
|
|
|
|
tokens: tk
|
|
|
|
|
|
};
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
|
|
|
|
|
* Run the parser state machine on a list of scanned string-based tokens to
|
|
|
|
|
|
* create a list of multi tokens, each of which represents a URL, email address,
|
|
|
|
|
|
* plain text, etc.
|
|
|
|
|
|
*
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @param {State<MultiToken>} start parser start state
|
2021-05-28 16:46:29 +02:00
|
|
|
|
* @param {string} input the original input used to generate the given tokens
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @param {Token[]} tokens list of scanned tokens
|
2022-01-19 15:03:45 +01:00
|
|
|
|
* @returns {MultiToken[]}
|
2021-05-28 16:46:29 +02:00
|
|
|
|
*/
|
|
|
|
|
|
function run(start, input, tokens) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
let len = tokens.length;
|
|
|
|
|
|
let cursor = 0;
|
|
|
|
|
|
let multis = [];
|
|
|
|
|
|
let textTokens = [];
|
|
|
|
|
|
while (cursor < len) {
|
|
|
|
|
|
let state = start;
|
|
|
|
|
|
let secondState = null;
|
|
|
|
|
|
let nextState = null;
|
|
|
|
|
|
let multiLength = 0;
|
|
|
|
|
|
let latestAccepting = null;
|
|
|
|
|
|
let sinceAccepts = -1;
|
|
|
|
|
|
while (cursor < len && !(secondState = state.go(tokens[cursor].t))) {
|
|
|
|
|
|
// Starting tokens with nowhere to jump to.
|
|
|
|
|
|
// Consider these to be just plain text
|
|
|
|
|
|
textTokens.push(tokens[cursor++]);
|
|
|
|
|
|
}
|
|
|
|
|
|
while (cursor < len && (nextState = secondState || state.go(tokens[cursor].t))) {
|
|
|
|
|
|
// Get the next state
|
|
|
|
|
|
secondState = null;
|
|
|
|
|
|
state = nextState;
|
|
|
|
|
|
|
|
|
|
|
|
// Keep track of the latest accepting state
|
|
|
|
|
|
if (state.accepts()) {
|
|
|
|
|
|
sinceAccepts = 0;
|
|
|
|
|
|
latestAccepting = state;
|
|
|
|
|
|
} else if (sinceAccepts >= 0) {
|
|
|
|
|
|
sinceAccepts++;
|
|
|
|
|
|
}
|
|
|
|
|
|
cursor++;
|
|
|
|
|
|
multiLength++;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (sinceAccepts < 0) {
|
|
|
|
|
|
// No accepting state was found, part of a regular text token add
|
|
|
|
|
|
// the first text token to the text tokens array and try again from
|
|
|
|
|
|
// the next
|
|
|
|
|
|
cursor -= multiLength;
|
|
|
|
|
|
if (cursor < len) {
|
|
|
|
|
|
textTokens.push(tokens[cursor]);
|
|
|
|
|
|
cursor++;
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// Accepting state!
|
|
|
|
|
|
// First close off the textTokens (if available)
|
|
|
|
|
|
if (textTokens.length > 0) {
|
|
|
|
|
|
multis.push(initMultiToken(Text, input, textTokens));
|
|
|
|
|
|
textTokens = [];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Roll back to the latest accepting state
|
|
|
|
|
|
cursor -= sinceAccepts;
|
|
|
|
|
|
multiLength -= sinceAccepts;
|
|
|
|
|
|
|
|
|
|
|
|
// Create a new multitoken
|
|
|
|
|
|
const Multi = latestAccepting.t;
|
|
|
|
|
|
const subtokens = tokens.slice(cursor - multiLength, cursor);
|
|
|
|
|
|
multis.push(initMultiToken(Multi, input, subtokens));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Finally close off the textTokens (if available)
|
|
|
|
|
|
if (textTokens.length > 0) {
|
|
|
|
|
|
multis.push(initMultiToken(Text, input, textTokens));
|
|
|
|
|
|
}
|
|
|
|
|
|
return multis;
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
|
|
|
|
|
* Utility function for instantiating a new multitoken with all the relevant
|
|
|
|
|
|
* fields during parsing.
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @param {new (value: string, tokens: Token[]) => MultiToken} Multi class to instantiate
|
2021-05-28 16:46:29 +02:00
|
|
|
|
* @param {string} input original input string
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @param {Token[]} tokens consecutive tokens scanned from input string
|
2021-05-28 16:46:29 +02:00
|
|
|
|
* @returns {MultiToken}
|
|
|
|
|
|
*/
|
2023-12-13 11:23:54 +01:00
|
|
|
|
function initMultiToken(Multi, input, tokens) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
const startIdx = tokens[0].s;
|
|
|
|
|
|
const endIdx = tokens[tokens.length - 1].e;
|
|
|
|
|
|
const value = input.slice(startIdx, endIdx);
|
|
|
|
|
|
return new Multi(value, tokens);
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
const warn = typeof console !== 'undefined' && console && console.warn || (() => {});
|
|
|
|
|
|
const warnAdvice = 'until manual call of linkify.init(). Register all schemes and plugins before invoking linkify the first time.';
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
// Side-effect initialization state
|
|
|
|
|
|
const INIT = {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
scanner: null,
|
|
|
|
|
|
parser: null,
|
|
|
|
|
|
tokenQueue: [],
|
|
|
|
|
|
pluginQueue: [],
|
|
|
|
|
|
customSchemes: [],
|
|
|
|
|
|
initialized: false
|
2021-05-28 16:46:29 +02:00
|
|
|
|
};
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @typedef {{
|
|
|
|
|
|
* start: State<string>,
|
|
|
|
|
|
* tokens: { groups: Collections<string> } & typeof tk
|
|
|
|
|
|
* }} ScannerInit
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @typedef {{
|
|
|
|
|
|
* start: State<MultiToken>,
|
|
|
|
|
|
* tokens: typeof multi
|
|
|
|
|
|
* }} ParserInit
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @typedef {(arg: { scanner: ScannerInit }) => void} TokenPlugin
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* @typedef {(arg: { scanner: ScannerInit, parser: ParserInit }) => void} Plugin
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
|
|
|
|
|
* De-register all plugins and reset the internal state-machine. Used for
|
|
|
|
|
|
* testing; not required in practice.
|
|
|
|
|
|
* @private
|
|
|
|
|
|
*/
|
|
|
|
|
|
function reset() {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
State.groups = {};
|
|
|
|
|
|
INIT.scanner = null;
|
|
|
|
|
|
INIT.parser = null;
|
|
|
|
|
|
INIT.tokenQueue = [];
|
|
|
|
|
|
INIT.pluginQueue = [];
|
|
|
|
|
|
INIT.customSchemes = [];
|
|
|
|
|
|
INIT.initialized = false;
|
2025-05-07 10:40:03 +02:00
|
|
|
|
return INIT;
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* Register a token plugin to allow the scanner to recognize additional token
|
|
|
|
|
|
* types before the parser state machine is constructed from the results.
|
2021-05-28 16:46:29 +02:00
|
|
|
|
* @param {string} name of plugin to register
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @param {TokenPlugin} plugin function that accepts the scanner state machine
|
|
|
|
|
|
* and available scanner tokens and collections and extends the state machine to
|
|
|
|
|
|
* recognize additional tokens or groups.
|
2021-05-28 16:46:29 +02:00
|
|
|
|
*/
|
2023-12-13 11:23:54 +01:00
|
|
|
|
function registerTokenPlugin(name, plugin) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
if (typeof plugin !== 'function') {
|
|
|
|
|
|
throw new Error(`linkifyjs: Invalid token plugin ${plugin} (expects function)`);
|
|
|
|
|
|
}
|
|
|
|
|
|
for (let i = 0; i < INIT.tokenQueue.length; i++) {
|
|
|
|
|
|
if (name === INIT.tokenQueue[i][0]) {
|
|
|
|
|
|
warn(`linkifyjs: token plugin "${name}" already registered - will be overwritten`);
|
|
|
|
|
|
INIT.tokenQueue[i] = [name, plugin];
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
INIT.tokenQueue.push([name, plugin]);
|
|
|
|
|
|
if (INIT.initialized) {
|
|
|
|
|
|
warn(`linkifyjs: already initialized - will not register token plugin "${name}" ${warnAdvice}`);
|
|
|
|
|
|
}
|
2023-12-13 11:23:54 +01:00
|
|
|
|
}
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
/**
|
|
|
|
|
|
* Register a linkify plugin
|
|
|
|
|
|
* @param {string} name of plugin to register
|
|
|
|
|
|
* @param {Plugin} plugin function that accepts the parser state machine and
|
|
|
|
|
|
* extends the parser to recognize additional link types
|
|
|
|
|
|
*/
|
2021-05-28 16:46:29 +02:00
|
|
|
|
function registerPlugin(name, plugin) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
if (typeof plugin !== 'function') {
|
|
|
|
|
|
throw new Error(`linkifyjs: Invalid plugin ${plugin} (expects function)`);
|
|
|
|
|
|
}
|
|
|
|
|
|
for (let i = 0; i < INIT.pluginQueue.length; i++) {
|
|
|
|
|
|
if (name === INIT.pluginQueue[i][0]) {
|
|
|
|
|
|
warn(`linkifyjs: plugin "${name}" already registered - will be overwritten`);
|
|
|
|
|
|
INIT.pluginQueue[i] = [name, plugin];
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
INIT.pluginQueue.push([name, plugin]);
|
|
|
|
|
|
if (INIT.initialized) {
|
|
|
|
|
|
warn(`linkifyjs: already initialized - will not register plugin "${name}" ${warnAdvice}`);
|
|
|
|
|
|
}
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* Detect URLs with the following additional protocol. Anything with format
|
|
|
|
|
|
* "protocol://..." will be considered a link. If `optionalSlashSlash` is set to
|
|
|
|
|
|
* `true`, anything with format "protocol:..." will be considered a link.
|
2025-05-07 10:40:03 +02:00
|
|
|
|
* @param {string} scheme
|
2023-12-13 11:23:54 +01:00
|
|
|
|
* @param {boolean} [optionalSlashSlash]
|
2021-05-28 16:46:29 +02:00
|
|
|
|
*/
|
2025-05-07 10:40:03 +02:00
|
|
|
|
function registerCustomProtocol(scheme, optionalSlashSlash = false) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
if (INIT.initialized) {
|
|
|
|
|
|
warn(`linkifyjs: already initialized - will not register custom scheme "${scheme}" ${warnAdvice}`);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!/^[0-9a-z]+(-[0-9a-z]+)*$/.test(scheme)) {
|
|
|
|
|
|
throw new Error(`linkifyjs: incorrect scheme format.
|
2023-12-13 11:23:54 +01:00
|
|
|
|
1. Must only contain digits, lowercase ASCII letters or "-"
|
|
|
|
|
|
2. Cannot start or end with "-"
|
|
|
|
|
|
3. "-" cannot repeat`);
|
2024-07-24 13:06:03 +02:00
|
|
|
|
}
|
|
|
|
|
|
INIT.customSchemes.push([scheme, optionalSlashSlash]);
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
|
|
|
|
|
* Initialize the linkify state machine. Called automatically the first time
|
|
|
|
|
|
* linkify is called on a string, but may be called manually as well.
|
|
|
|
|
|
*/
|
|
|
|
|
|
function init() {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
// Initialize scanner state machine and plugins
|
|
|
|
|
|
INIT.scanner = init$2(INIT.customSchemes);
|
|
|
|
|
|
for (let i = 0; i < INIT.tokenQueue.length; i++) {
|
|
|
|
|
|
INIT.tokenQueue[i][1]({
|
|
|
|
|
|
scanner: INIT.scanner
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Initialize parser state machine and plugins
|
|
|
|
|
|
INIT.parser = init$1(INIT.scanner.tokens);
|
|
|
|
|
|
for (let i = 0; i < INIT.pluginQueue.length; i++) {
|
|
|
|
|
|
INIT.pluginQueue[i][1]({
|
|
|
|
|
|
scanner: INIT.scanner,
|
|
|
|
|
|
parser: INIT.parser
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
INIT.initialized = true;
|
2025-05-07 10:40:03 +02:00
|
|
|
|
return INIT;
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
/**
|
|
|
|
|
|
* Parse a string into tokens that represent linkable and non-linkable sub-components
|
|
|
|
|
|
* @param {string} str
|
|
|
|
|
|
* @return {MultiToken[]} tokens
|
|
|
|
|
|
*/
|
2021-05-28 16:46:29 +02:00
|
|
|
|
function tokenize(str) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
if (!INIT.initialized) {
|
|
|
|
|
|
init();
|
|
|
|
|
|
}
|
|
|
|
|
|
return run(INIT.parser.start, str, run$1(INIT.scanner.start, str));
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
2025-05-07 10:40:03 +02:00
|
|
|
|
tokenize.scan = run$1; // for testing
|
2021-05-28 16:46:29 +02:00
|
|
|
|
|
2023-12-13 11:23:54 +01:00
|
|
|
|
/**
|
|
|
|
|
|
* Find a list of linkable items in the given string.
|
|
|
|
|
|
* @param {string} str string to find links in
|
|
|
|
|
|
* @param {string | Opts} [type] either formatting options or specific type of
|
|
|
|
|
|
* links to find, e.g., 'url' or 'email'
|
|
|
|
|
|
* @param {Opts} [opts] formatting options for final output. Cannot be specified
|
|
|
|
|
|
* if opts already provided in `type` argument
|
|
|
|
|
|
*/
|
2025-05-07 10:40:03 +02:00
|
|
|
|
function find(str, type = null, opts = null) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
if (type && typeof type === 'object') {
|
|
|
|
|
|
if (opts) {
|
|
|
|
|
|
throw Error(`linkifyjs: Invalid link type ${type}; must be a string`);
|
|
|
|
|
|
}
|
|
|
|
|
|
opts = type;
|
|
|
|
|
|
type = null;
|
|
|
|
|
|
}
|
|
|
|
|
|
const options = new Options(opts);
|
|
|
|
|
|
const tokens = tokenize(str);
|
|
|
|
|
|
const filtered = [];
|
|
|
|
|
|
for (let i = 0; i < tokens.length; i++) {
|
|
|
|
|
|
const token = tokens[i];
|
|
|
|
|
|
if (token.isLink && (!type || token.t === type) && options.check(token)) {
|
|
|
|
|
|
filtered.push(token.toFormattedObject(options));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return filtered;
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
2023-12-13 11:23:54 +01:00
|
|
|
|
|
2021-05-28 16:46:29 +02:00
|
|
|
|
/**
|
|
|
|
|
|
* Is the given string valid linkable text of some sort. Note that this does not
|
|
|
|
|
|
* trim the text for you.
|
|
|
|
|
|
*
|
|
|
|
|
|
* Optionally pass in a second `type` param, which is the type of link to test
|
|
|
|
|
|
* for.
|
|
|
|
|
|
*
|
|
|
|
|
|
* For example,
|
|
|
|
|
|
*
|
|
|
|
|
|
* linkify.test(str, 'email');
|
|
|
|
|
|
*
|
|
|
|
|
|
* Returns `true` if str is a valid email.
|
|
|
|
|
|
* @param {string} str string to test for links
|
|
|
|
|
|
* @param {string} [type] optional specific link type to look for
|
|
|
|
|
|
* @returns boolean true/false
|
|
|
|
|
|
*/
|
2025-05-07 10:40:03 +02:00
|
|
|
|
function test(str, type = null) {
|
2024-07-24 13:06:03 +02:00
|
|
|
|
const tokens = tokenize(str);
|
|
|
|
|
|
return tokens.length === 1 && tokens[0].isLink && (!type || tokens[0].t === type);
|
2021-05-28 16:46:29 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-05-07 10:40:03 +02:00
|
|
|
|
export { MultiToken, Options, State, createTokenClass, find, init, multi, options, regexp, registerCustomProtocol, registerPlugin, registerTokenPlugin, reset, stringToArray, test, multi as text, tokenize };
|