mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-12-08 06:09:48 +00:00
55 lines
116 KiB
JavaScript
55 lines
116 KiB
JavaScript
|
|
/*! behaviors.js is part of Webrecorder project. Copyright (C) 2021-2025, Webrecorder Software. Licensed under the Affero General Public License v3. */(()=>{var __webpack_modules__={"./node_modules/query-selector-shadow-dom/src/normalize.js":
|
||
|
|
/*!*****************************************************************!*\
|
||
|
|
!*** ./node_modules/query-selector-shadow-dom/src/normalize.js ***!
|
||
|
|
\*****************************************************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "normalizeSelector": () => (/* binding */ normalizeSelector)\n/* harmony export */ });\n/* istanbul ignore file */\n\n\n// normalize-selector-rev-02.js\n/*\n author: kyle simpson (@getify)\n original source: https://gist.github.com/getify/9679380\n\n modified for tests by david kaye (@dfkaye)\n 21 march 2014\n\n rev-02 incorporate kyle\'s changes 3/2/42014\n*/\n\nfunction normalizeSelector(sel) {\n // save unmatched text, if any\n function saveUnmatched() {\n if (unmatched) {\n // whitespace needed after combinator?\n if (tokens.length > 0 && /^[~+>]$/.test(tokens[tokens.length - 1])) {\n tokens.push(" ");\n }\n\n // save unmatched text\n tokens.push(unmatched);\n }\n }\n\n var tokens = [],\n match,\n unmatched,\n regex,\n state = [0],\n next_match_idx = 0,\n prev_match_idx,\n not_escaped_pattern = /(?:[^\\\\]|(?:^|[^\\\\])(?:\\\\\\\\)+)$/,\n whitespace_pattern = /^\\s+$/,\n state_patterns = [\n /\\s+|\\/\\*|["\'>~+[(]/g, // general\n /\\s+|\\/\\*|["\'[\\]()]/g, // [..] set\n /\\s+|\\/\\*|["\'[\\]()]/g, // (..) set\n null, // string literal (placeholder)\n /\\*\\//g, // comment\n ];\n sel = sel.trim();\n\n // eslint-disable-next-line no-constant-condition\n while (true) {\n unmatched = "";\n\n regex = state_patterns[state[state.length - 1]];\n\n regex.lastIndex = next_match_idx;\n match = regex.exec(sel);\n\n // matched text to process?\n if (match) {\n prev_match_idx = next_match_idx;\n next_match_idx = regex.lastIndex;\n\n // collect the previous string chunk not matched before this token\n if (prev_match_idx < next_match_idx - match[0].length) {\n unmatched = sel.substring(\n prev_match_idx,\n next_match_idx - match[0].length\n );\n }\n\n // general, [ ] pair, ( ) pair?\n if (state[state.length - 1] < 3) {\n saveUnmatched();\n\n // starting a [ ] pair?\n if (match[0] === "[") {\n state.push(1);\n }\n // starting a ( ) pair?\n else if (match[0] === "(") {\n state.push(2);\n }\n // starting a string literal?\n else if (/^["\']$/.test(match[0])) {\n state.push(3);\n state_patterns[3] = new RegExp(match[0], "g");\n }\n // starting a comment?\n else if (match[0] === "/*") {\n state.push(4);\n }\n // ending a [ ] or ( ) pair?\n else if (/^[\\])]$/.test(match[0]) && state.length > 0) {\n state.pop();\n }\n // handling whitespace or a combinator?\n else if (/^(?:\\s+|[~+>])$/.test(match[0])) {\n // need to insert whitespace before?\n if (\n tokens.length > 0 &&\n !whitespace_pattern.test(tokens[tokens.length - 1]) &&\n state[state.length - 1] === 0\n ) {\n // add normalized whitespace\n tokens.push(" ");\n }\n\n // case-insensitive attribute selector CSS L4\n if (\n state[state.length - 1] === 1 &&\n tokens.length === 5 &&\n tokens[2].charAt(tokens[2].length - 1) === "="\n ) {\n tokens[4] = " " + tokens[4];\n }\n\n // whitespace token we can skip?\n if (whitespace_pattern.test(match[0])) {\n continue;\n }\n }\n\n // save matched text\n tokens.push(match[0]);\n }\n // otherwise, string literal or comment\n else {\n // save unmatched text\n tokens[tokens.length - 1] += unmatched;\n\n // unescaped terminator to string literal or comment?\n if (not_escaped_pattern.test(tokens[tokens.length - 1])) {\n //
|
||
|
|
/*!*************************************************************************!*\
|
||
|
|
!*** ./node_modules/query-selector-shadow-dom/src/querySelectorDeep.js ***!
|
||
|
|
\*************************************************************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval("__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ \"collectAllElementsDeep\": () => (/* binding */ collectAllElementsDeep),\n/* harmony export */ \"querySelectorAllDeep\": () => (/* binding */ querySelectorAllDeep),\n/* harmony export */ \"querySelectorDeep\": () => (/* binding */ querySelectorDeep)\n/* harmony export */ });\n/* harmony import */ var _normalize__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./normalize */ \"./node_modules/query-selector-shadow-dom/src/normalize.js\");\n\n\n/**\n* Finds first matching elements on the page that may be in a shadow root using a complex selector of n-depth\n*\n* Don't have to specify all shadow roots to button, tree is travered to find the correct element\n*\n* Example querySelectorAllDeep('downloads-item:nth-child(4) #remove');\n*\n* Example should work on chrome://downloads outputting the remove button inside of a download card component\n*\n* Example find first active download link element querySelectorDeep('#downloads-list .is-active a[href^=\"https://\"]');\n*\n* Another example querySelectorAllDeep('#downloads-list div#title-area + a');\ne.g.\n*/\nfunction querySelectorAllDeep(selector, root = document, allElements = null) {\n return _querySelectorDeep(selector, true, root, allElements);\n}\n\nfunction querySelectorDeep(selector, root = document, allElements = null) {\n return _querySelectorDeep(selector, false, root, allElements);\n}\n\nfunction _querySelectorDeep(selector, findMany, root, allElements = null) {\n selector = (0,_normalize__WEBPACK_IMPORTED_MODULE_0__.normalizeSelector)(selector);\n let lightElement = root.querySelector(selector);\n\n if (document.head.createShadowRoot || document.head.attachShadow) {\n // no need to do any special if selector matches something specific in light-dom\n if (!findMany && lightElement) {\n return lightElement;\n }\n\n // split on commas because those are a logical divide in the operation\n const selectionsToMake = splitByCharacterUnlessQuoted(selector, ',');\n\n return selectionsToMake.reduce((acc, minimalSelector) => {\n // if not finding many just reduce the first match\n if (!findMany && acc) {\n return acc;\n }\n // do best to support complex selectors and split the query\n const splitSelector = splitByCharacterUnlessQuoted(minimalSelector\n //remove white space at start of selector\n .replace(/^\\s+/g, '')\n .replace(/\\s*([>+~]+)\\s*/g, '$1'), ' ')\n // filter out entry white selectors\n .filter((entry) => !!entry)\n // convert \"a > b\" to [\"a\", \"b\"]\n .map((entry) => splitByCharacterUnlessQuoted(entry, '>'));\n\n const possibleElementsIndex = splitSelector.length - 1;\n const lastSplitPart = splitSelector[possibleElementsIndex][splitSelector[possibleElementsIndex].length - 1];\n const possibleElements = collectAllElementsDeep(lastSplitPart, root, allElements);\n const findElements = findMatchingElement(splitSelector, possibleElementsIndex, root);\n if (findMany) {\n acc = acc.concat(possibleElements.filter(findElements));\n return acc;\n } else {\n acc = possibleElements.find(findElements);\n return acc || null;\n }\n }, findMany ? [] : null);\n\n\n } else {\n if (!findMany) {\n return lightElement;\n } else {\n return root.querySelectorAll(selector);\n }\n }\n\n}\n\nfunction findMatchingElement(splitSelector, possibleElementsIndex, root) {\n return (element) => {\n let position = possibleEl
|
||
|
|
/*!******************!*\
|
||
|
|
!*** ./index.ts ***!
|
||
|
|
\******************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "BehaviorManager": () => (/* reexport safe */ _src__WEBPACK_IMPORTED_MODULE_0__.BehaviorManager)\n/* harmony export */ });\n/* harmony import */ var _src__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./src */ "./src/index.ts");\n\n\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./index.ts?')},"./src/autoclick.ts":
|
||
|
|
/*!**************************!*\
|
||
|
|
!*** ./src/autoclick.ts ***!
|
||
|
|
\**************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "AutoClick": () => (/* binding */ AutoClick)\n/* harmony export */ });\n/* harmony import */ var _lib_behavior__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./lib/behavior */ "./src/lib/behavior.ts");\n/* harmony import */ var _lib_utils__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./lib/utils */ "./src/lib/utils.ts");\n\n\nclass AutoClick extends _lib_behavior__WEBPACK_IMPORTED_MODULE_0__.BackgroundBehavior {\n _donePromise;\n _markDone;\n selector;\n seenElem = new WeakSet();\n static id = "Autoclick";\n constructor(selector = "a") {\n super();\n this.selector = selector;\n this._donePromise = new Promise((resolve) => (this._markDone = resolve));\n }\n nextSameOriginLink() {\n try {\n const allLinks = document.querySelectorAll(this.selector);\n for (const el of allLinks) {\n const elem = el;\n if (elem.href && !elem.href.startsWith(self.location.origin)) {\n continue;\n }\n if (!elem.isConnected) {\n continue;\n }\n if (!elem.checkVisibility()) {\n continue;\n }\n if (this.seenElem.has(elem)) {\n continue;\n }\n this.seenElem.add(elem);\n return elem;\n }\n }\n catch (e) {\n this.debug(e.toString());\n }\n return null;\n }\n async start() {\n const beforeUnload = (event) => {\n event.preventDefault();\n return false;\n };\n window.addEventListener("beforeunload", beforeUnload);\n while (true) {\n const elem = this.nextSameOriginLink();\n if (!elem) {\n break;\n }\n await this.processElem(elem);\n }\n window.removeEventListener("beforeunload", beforeUnload);\n this._markDone();\n }\n async processElem(elem) {\n if (elem.target) {\n return;\n }\n if (elem.href) {\n if (!(await (0,_lib_utils__WEBPACK_IMPORTED_MODULE_1__.addToExternalSet)(elem.href))) {\n return;\n }\n this.debug("Clicking on link: " + elem.href);\n }\n else {\n this.debug("Click empty link");\n }\n const origHref = self.location.href;\n const origHistoryLen = self.history.length;\n if (elem.click) {\n elem.click();\n }\n else if (elem.dispatchEvent) {\n elem.dispatchEvent(new MouseEvent("click"));\n }\n await (0,_lib_utils__WEBPACK_IMPORTED_MODULE_1__.sleep)(250);\n if (self.history.length === origHistoryLen + 1 &&\n self.location.href != origHref) {\n await new Promise((resolve) => {\n window.addEventListener("popstate", () => {\n resolve(null);\n }, { once: true });\n window.history.back();\n });\n }\n }\n catch(e) {\n this.debug(e.toString());\n }\n done() {\n return this._donePromise;\n }\n}\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/autoclick.ts?')},"./src/autofetcher.ts":
|
||
|
|
/*!****************************!*\
|
||
|
|
!*** ./src/autofetcher.ts ***!
|
||
|
|
\****************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "AutoFetcher": () => (/* binding */ AutoFetcher)\n/* harmony export */ });\n/* harmony import */ var query_selector_shadow_dom__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! query-selector-shadow-dom */ "./node_modules/query-selector-shadow-dom/src/querySelectorDeep.js");\n/* harmony import */ var _lib_behavior__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./lib/behavior */ "./src/lib/behavior.ts");\n/* harmony import */ var _lib_utils__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./lib/utils */ "./src/lib/utils.ts");\n\n\n\nconst SRC_SET_SELECTOR = "img[srcset], img[data-srcset], img[data-src], noscript > img[src], img[loading=\'lazy\'], " +\n "video[srcset], video[data-srcset], video[data-src], audio[srcset], audio[data-srcset], audio[data-src], " +\n "picture > source[srcset], picture > source[data-srcset], picture > source[data-src], " +\n "video > source[srcset], video > source[data-srcset], video > source[data-src], " +\n "audio > source[srcset], audio > source[data-srcset], audio > source[data-src]";\nconst SRCSET_REGEX = /\\s*(\\S*\\s+[\\d.]+[wx]),|(?:\\s*,(?:\\s+|(?=https?:)))/;\nconst STYLE_REGEX = /(url\\s*\\(\\s*[\\\\"\']*)([^)\'"]+)([\\\\"\']*\\s*\\))/gi;\nconst IMPORT_REGEX = /(@import\\s*[\\\\"\']*)([^)\'";]+)([\\\\"\']*\\s*;?)/gi;\nconst MAX_CONCURRENT = 6;\nclass AutoFetcher extends _lib_behavior__WEBPACK_IMPORTED_MODULE_1__.BackgroundBehavior {\n urlSet = new Set();\n pendingQueue = [];\n waitQueue = [];\n mutationObserver;\n numPending = 0;\n numDone = 0;\n headers;\n _donePromise;\n _markDone;\n active;\n running = false;\n static id = "Autofetcher";\n constructor(active = false, headers = null, startEarly = false) {\n super();\n this.headers = headers || {};\n this._donePromise = new Promise((resolve) => (this._markDone = resolve));\n this.active = active;\n if (this.active && startEarly) {\n document.addEventListener("DOMContentLoaded", () => this.initObserver());\n }\n }\n get numFetching() {\n return this.numDone + this.numPending + this.pendingQueue.length;\n }\n async start() {\n if (!this.active) {\n return;\n }\n this.initObserver();\n this.run();\n (0,_lib_utils__WEBPACK_IMPORTED_MODULE_2__.sleep)(500).then(() => {\n if (!this.pendingQueue.length && !this.numPending) {\n this._markDone(null);\n }\n });\n }\n done() {\n return this._donePromise;\n }\n async run() {\n this.running = true;\n for (const url of this.waitQueue) {\n this.doFetch(url);\n }\n this.waitQueue = [];\n this.extractSrcSrcSetAll(document);\n this.extractStyleSheets();\n this.extractDataAttributes(document);\n }\n isValidUrl(url) {\n return url && (url.startsWith("http:") || url.startsWith("https:"));\n }\n queueUrl(url, immediate = false) {\n try {\n url = new URL(url, document.baseURI).href;\n }\n catch (e) {\n return false;\n }\n if (!this.isValidUrl(url)) {\n return false;\n }\n if (this.urlSet.has(url)) {\n return false;\n }\n this.urlSet.add(url);\n if (this.running || immediate) {\n this.doFetch(url);\n }\n else {\n this.waitQueue.push(url);\n }\n return true;\n }\n async doFetchStream(url) {\n try {\n const resp = await fetch(url, {\n credentials: "include",\n referrerPolicy: "origin-when-cross-origin",\n });\n this.debug(`Autofetch: started ${url}`);\n const reader = resp.body.getReader();
|
||
|
|
/*!*************************!*\
|
||
|
|
!*** ./src/autoplay.ts ***!
|
||
|
|
\*************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "Autoplay": () => (/* binding */ Autoplay)\n/* harmony export */ });\n/* harmony import */ var query_selector_shadow_dom__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! query-selector-shadow-dom */ "./node_modules/query-selector-shadow-dom/src/querySelectorDeep.js");\n/* harmony import */ var _lib_behavior__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./lib/behavior */ "./src/lib/behavior.ts");\n/* harmony import */ var _lib_utils__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./lib/utils */ "./src/lib/utils.ts");\n\n\n\nclass Autoplay extends _lib_behavior__WEBPACK_IMPORTED_MODULE_1__.BackgroundBehavior {\n mediaSet;\n autofetcher;\n numPlaying;\n promises;\n _initDone;\n running = false;\n polling = false;\n static id = "Autoplay";\n constructor(autofetcher, startEarly = false) {\n super();\n this.mediaSet = new Set();\n this.autofetcher = autofetcher;\n this.numPlaying = 0;\n this.promises = [];\n this._initDone = () => null;\n this.promises.push(new Promise((resolve) => (this._initDone = resolve)));\n if (startEarly) {\n document.addEventListener("DOMContentLoaded", () => this.pollAudioVideo());\n }\n }\n async start() {\n this.running = true;\n this.pollAudioVideo();\n this._initDone();\n }\n async pollAudioVideo() {\n const run = true;\n if (this.polling) {\n return;\n }\n this.polling = true;\n while (run) {\n for (const [, elem] of (0,query_selector_shadow_dom__WEBPACK_IMPORTED_MODULE_0__.querySelectorAllDeep)("video, audio, picture").entries()) {\n if (!elem["__bx_autoplay_found"]) {\n if (!this.running) {\n if (this.processFetchableUrl(elem)) {\n elem["__bx_autoplay_found"] = true;\n }\n continue;\n }\n await this.loadMedia(elem);\n elem["__bx_autoplay_found"] = true;\n }\n }\n await (0,_lib_utils__WEBPACK_IMPORTED_MODULE_2__.sleep)(500);\n }\n this.polling = false;\n }\n fetchSrcUrl(source) {\n const url = source.src || source.currentSrc;\n if (!url) {\n return false;\n }\n if (!url.startsWith("http:") && !url.startsWith("https:")) {\n return false;\n }\n if (this.mediaSet.has(url)) {\n return true;\n }\n this.debug("fetch media source URL: " + url);\n this.mediaSet.add(url);\n this.autofetcher.queueUrl(url);\n return true;\n }\n processFetchableUrl(media) {\n let found = this.fetchSrcUrl(media);\n const sources = media.querySelectorAll("source");\n for (const source of sources) {\n const foundSource = this.fetchSrcUrl(source);\n found = found || foundSource;\n }\n return found;\n }\n async loadMedia(media) {\n this.debug("processing media element: " + media.outerHTML);\n const found = this.processFetchableUrl(media);\n if (!media.play) {\n this.debug("media not playable, skipping");\n return;\n }\n if (found) {\n if (!media.paused) {\n media.pause();\n this.debug("media URL found, pausing playback");\n }\n return;\n }\n if (media.paused || media.currentTime) {\n if (media.paused) {\n this.debug("no src url found, attempting to click or play: " + media.outerHTML);\n }\n else {\n this.debug("media already playing, waiting for full playback to fi
|
||
|
|
/*!***************************!*\
|
||
|
|
!*** ./src/autoscroll.ts ***!
|
||
|
|
\***************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "AutoScroll": () => (/* binding */ AutoScroll)\n/* harmony export */ });\n/* harmony import */ var _lib_behavior__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./lib/behavior */ "./src/lib/behavior.ts");\n/* harmony import */ var _lib_utils__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./lib/utils */ "./src/lib/utils.ts");\n\n\nclass AutoScroll extends _lib_behavior__WEBPACK_IMPORTED_MODULE_0__.Behavior {\n autoFetcher;\n showMoreQuery;\n state = { segments: 1 };\n lastScrollPos;\n samePosCount;\n origPath;\n lastMsg = "";\n constructor(autofetcher) {\n super();\n this.autoFetcher = autofetcher;\n this.showMoreQuery =\n "//*[contains(text(), \'show more\') or contains(text(), \'Show more\')]";\n this.lastScrollPos = -1;\n this.samePosCount = 0;\n this.origPath = document.location.pathname;\n }\n static id = "Autoscroll";\n currScrollPos() {\n return Math.round(self.scrollY + self.innerHeight);\n }\n canScrollMore() {\n const scrollElem = self.document.scrollingElement || self.document.body;\n return (this.currScrollPos() <\n Math.max(scrollElem.clientHeight, scrollElem.scrollHeight));\n }\n debug(msg) {\n if (this.lastMsg === msg) {\n return;\n }\n super.debug(msg);\n this.lastMsg = msg;\n }\n hasScrollEL(obj) {\n try {\n return !!self["getEventListeners"](obj).scroll;\n }\n catch (_) {\n this.debug("getEventListeners() not available");\n return true;\n }\n }\n async shouldScroll() {\n if (!this.hasScrollEL(self.window) &&\n !this.hasScrollEL(self.document) &&\n !this.hasScrollEL(self.document.body)) {\n return false;\n }\n if (window.frames.length >= 2) {\n return true;\n }\n const lastScrollHeight = self.document.scrollingElement.scrollHeight;\n const numFetching = this.autoFetcher.numFetching;\n const scrollEnd = document.scrollingElement.scrollHeight * 0.98 - self.innerHeight;\n window.scrollTo({ top: scrollEnd, left: 0, behavior: "smooth" });\n await (0,_lib_utils__WEBPACK_IMPORTED_MODULE_1__.sleep)(500);\n if (lastScrollHeight !== self.document.scrollingElement.scrollHeight ||\n numFetching < this.autoFetcher.numFetching) {\n window.scrollTo({ top: 0, left: 0, behavior: "auto" });\n return true;\n }\n return false;\n }\n shouldScrollUp() {\n if (self.window.scrollY === 0) {\n return false;\n }\n if ((self.window.scrollY + self["scrollHeight"]) /\n self.document.scrollingElement.scrollHeight <\n 0.9) {\n return false;\n }\n return true;\n }\n async *[Symbol.asyncIterator]() {\n if (this.shouldScrollUp()) {\n yield* this.scrollUp();\n return;\n }\n if (await this.shouldScroll()) {\n yield* this.scrollDown();\n return;\n }\n yield this.getState("Skipping autoscroll, page seems to not be responsive to scrolling events");\n }\n async *scrollDown() {\n const scrollInc = Math.min(self.document.scrollingElement.clientHeight * 0.1, 30);\n const interval = 75;\n let elapsedWait = 0;\n let showMoreElem = null;\n let ignoreShowMoreElem = false;\n const scrollOpts = { top: scrollInc, left: 0, behavior: "auto" };\n let lastScrollHeight = self.document.scrollingElement.scrollHeight;\n while (this.canScrollMore()) {\n if (document.location.pathname !== this.origPath) {\n (0,_lib_utils__WEBPACK_IMPORTED_MODULE_1__.beh
|
||
|
|
/*!**********************!*\
|
||
|
|
!*** ./src/index.ts ***!
|
||
|
|
\**********************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "BehaviorManager": () => (/* binding */ BehaviorManager)\n/* harmony export */ });\n/* harmony import */ var _autofetcher__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./autofetcher */ "./src/autofetcher.ts");\n/* harmony import */ var _autoplay__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./autoplay */ "./src/autoplay.ts");\n/* harmony import */ var _autoscroll__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./autoscroll */ "./src/autoscroll.ts");\n/* harmony import */ var _autoclick__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./autoclick */ "./src/autoclick.ts");\n/* harmony import */ var _lib_utils__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./lib/utils */ "./src/lib/utils.ts");\n/* harmony import */ var _lib_behavior__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./lib/behavior */ "./src/lib/behavior.ts");\n/* harmony import */ var _site__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./site */ "./src/site/index.ts");\n\n\n\n\n\n\n\n\nconst DEFAULT_OPTS = {\n autofetch: true,\n autoplay: true,\n autoscroll: true,\n autoclick: true,\n siteSpecific: true,\n};\nconst DEFAULT_CLICK_SELECTOR = "a";\nconst DEFAULT_LINK_SELECTOR = "a[href]";\nconst DEFAULT_LINK_EXTRACT = "href";\nclass BehaviorManager {\n autofetch;\n behaviors;\n loadedBehaviors;\n mainBehavior;\n mainBehaviorClass;\n inited;\n started;\n timeout;\n opts;\n linkOpts;\n constructor() {\n this.behaviors = [];\n this.loadedBehaviors = _site__WEBPACK_IMPORTED_MODULE_6__["default"].reduce((behaviors, next) => {\n behaviors[next.id] = next;\n return behaviors;\n }, {});\n this.mainBehavior = null;\n this.inited = false;\n this.started = false;\n this.linkOpts = {\n selector: DEFAULT_LINK_SELECTOR,\n extractName: DEFAULT_LINK_EXTRACT,\n };\n (0,_lib_utils__WEBPACK_IMPORTED_MODULE_4__.behaviorLog)("Loaded behaviors for: " + self.location.href);\n }\n init(opts = DEFAULT_OPTS, restart = false, customBehaviors = null) {\n if (this.inited && !restart) {\n return;\n }\n this.inited = true;\n this.opts = opts;\n if (!self.window) {\n return;\n }\n this.timeout = opts.timeout;\n if (opts.log !== undefined) {\n let logger = opts.log;\n if (typeof logger === "string") {\n logger = self[logger];\n }\n if (typeof logger === "function") {\n (0,_lib_utils__WEBPACK_IMPORTED_MODULE_4__._setLogFunc)(logger);\n }\n else if (logger === false) {\n (0,_lib_utils__WEBPACK_IMPORTED_MODULE_4__._setLogFunc)(null);\n }\n }\n this.autofetch = new _autofetcher__WEBPACK_IMPORTED_MODULE_0__.AutoFetcher(!!opts.autofetch, opts.fetchHeaders, opts.startEarly);\n if (opts.autofetch) {\n (0,_lib_utils__WEBPACK_IMPORTED_MODULE_4__.behaviorLog)("Using AutoFetcher");\n this.behaviors.push(this.autofetch);\n }\n if (opts.autoplay) {\n (0,_lib_utils__WEBPACK_IMPORTED_MODULE_4__.behaviorLog)("Using Autoplay");\n this.behaviors.push(new _autoplay__WEBPACK_IMPORTED_MODULE_1__.Autoplay(this.autofetch, opts.startEarly));\n }\n if (opts.autoclick) {\n (0,_lib_utils__WEBPACK_IMPORTED_MODULE_4__.behaviorLog)("Using AutoClick");\n this.behaviors.push(new _autoclick__WEBPACK_IMPORTED_MODULE_3__.AutoClick(opts.clickSelector || DEFAULT_CLICK_SELECTOR));\n }\n if (customBehaviors) {\n for (const behaviorClass of customBehaviors) {\n try {\n this.load(behaviorClass);\n }
|
||
|
|
/*!*****************************!*\
|
||
|
|
!*** ./src/lib/behavior.ts ***!
|
||
|
|
\*****************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "BackgroundBehavior": () => (/* binding */ BackgroundBehavior),\n/* harmony export */ "Behavior": () => (/* binding */ Behavior),\n/* harmony export */ "BehaviorRunner": () => (/* binding */ BehaviorRunner)\n/* harmony export */ });\n/* harmony import */ var _utils__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./utils */ "./src/lib/utils.ts");\n\n\nclass BackgroundBehavior {\n debug(msg) {\n (0,_utils__WEBPACK_IMPORTED_MODULE_0__.behaviorLog)(msg, "debug");\n }\n error(msg) {\n (0,_utils__WEBPACK_IMPORTED_MODULE_0__.behaviorLog)(msg, "error");\n }\n log(msg, type = "info") {\n (0,_utils__WEBPACK_IMPORTED_MODULE_0__.behaviorLog)(msg, type);\n }\n}\nclass Behavior extends BackgroundBehavior {\n _running;\n paused;\n _unpause;\n state;\n scrollOpts;\n constructor() {\n super();\n this._running = null;\n this.paused = null;\n this._unpause = null;\n this.state = {};\n this.scrollOpts = { behavior: "smooth", block: "center", inline: "center" };\n }\n start() {\n this._running = this.run();\n }\n done() {\n return this._running ? this._running : Promise.resolve();\n }\n async run() {\n try {\n for await (const step of this) {\n this.debug(step);\n if (this.paused) {\n await this.paused;\n }\n }\n this.debug(this.getState("done!"));\n }\n catch (e) {\n this.error(e.toString());\n }\n }\n pause() {\n if (this.paused) {\n return;\n }\n this.paused = new Promise((resolve) => {\n this._unpause = resolve;\n });\n }\n unpause() {\n if (this._unpause) {\n this._unpause();\n this.paused = null;\n this._unpause = null;\n }\n }\n getState(msg, incrValue) {\n if (incrValue) {\n if (this.state[incrValue] === undefined) {\n this.state[incrValue] = 1;\n }\n else {\n this.state[incrValue]++;\n }\n }\n return { state: this.state, msg };\n }\n cleanup() { }\n async awaitPageLoad(_) {\n }\n static load() {\n if (self["__bx_behaviors"]) {\n self["__bx_behaviors"].load(this);\n }\n else {\n console.warn(`Could not load ${this.name} behavior: window.__bx_behaviors is not initialized`);\n }\n }\n async *[Symbol.asyncIterator]() {\n yield;\n }\n}\nclass AbstractBehaviorInst {\n}\nclass BehaviorRunner extends BackgroundBehavior {\n inst;\n behaviorProps;\n ctx;\n _running;\n paused;\n _unpause;\n get id() {\n return (this.inst?.constructor).id;\n }\n constructor(behavior, mainOpts = {}) {\n super();\n this.behaviorProps = behavior;\n this.inst = new behavior();\n if (typeof this.inst.run !== "function" ||\n this.inst.run.constructor.name !== "AsyncGeneratorFunction") {\n throw Error("Invalid behavior: missing `async run*` instance method");\n }\n let { state, opts } = behavior.init();\n state = state || {};\n opts = opts ? { ...opts, ...mainOpts } : mainOpts;\n const log = async (data, type) => this.wrappedLog(data, type);\n this.ctx = { Lib: _utils__WEBPACK_IMPORTED_MODULE_0__, state, opts, log };\n this._running = null;\n this.paused = null;\n this._unpause = null;\n }\n wrappedLog(data, type = "info") {\n let logData;\n if (typeof data === "string" || data instanceof String) {\n logData = { msg: data };\n }\n else {\n logData = data;\n
|
||
|
|
/*!**************************!*\
|
||
|
|
!*** ./src/lib/utils.ts ***!
|
||
|
|
\**************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "HistoryState": () => (/* binding */ HistoryState),\n/* harmony export */ "RestoreState": () => (/* binding */ RestoreState),\n/* harmony export */ "_setBehaviorManager": () => (/* binding */ _setBehaviorManager),\n/* harmony export */ "_setLogFunc": () => (/* binding */ _setLogFunc),\n/* harmony export */ "addLink": () => (/* binding */ addLink),\n/* harmony export */ "addToExternalSet": () => (/* binding */ addToExternalSet),\n/* harmony export */ "assertContentValid": () => (/* binding */ assertContentValid),\n/* harmony export */ "awaitLoad": () => (/* binding */ awaitLoad),\n/* harmony export */ "behaviorLog": () => (/* binding */ behaviorLog),\n/* harmony export */ "callBinding": () => (/* binding */ callBinding),\n/* harmony export */ "checkToJsonOverride": () => (/* binding */ checkToJsonOverride),\n/* harmony export */ "doExternalFetch": () => (/* binding */ doExternalFetch),\n/* harmony export */ "getState": () => (/* binding */ getState),\n/* harmony export */ "initFlow": () => (/* binding */ initFlow),\n/* harmony export */ "installBehaviors": () => (/* binding */ installBehaviors),\n/* harmony export */ "isInViewport": () => (/* binding */ isInViewport),\n/* harmony export */ "iterChildElem": () => (/* binding */ iterChildElem),\n/* harmony export */ "iterChildMatches": () => (/* binding */ iterChildMatches),\n/* harmony export */ "nextFlowStep": () => (/* binding */ nextFlowStep),\n/* harmony export */ "openWindow": () => (/* binding */ openWindow),\n/* harmony export */ "scrollAndClick": () => (/* binding */ scrollAndClick),\n/* harmony export */ "scrollIntoView": () => (/* binding */ scrollIntoView),\n/* harmony export */ "scrollToOffset": () => (/* binding */ scrollToOffset),\n/* harmony export */ "sleep": () => (/* binding */ sleep),\n/* harmony export */ "waitForNetworkIdle": () => (/* binding */ waitForNetworkIdle),\n/* harmony export */ "waitUnit": () => (/* binding */ waitUnit),\n/* harmony export */ "waitUntil": () => (/* binding */ waitUntil),\n/* harmony export */ "waitUntilNode": () => (/* binding */ waitUntilNode),\n/* harmony export */ "xpathNode": () => (/* binding */ xpathNode),\n/* harmony export */ "xpathNodes": () => (/* binding */ xpathNodes),\n/* harmony export */ "xpathString": () => (/* binding */ xpathString)\n/* harmony export */ });\nlet _logFunc = console.log;\nlet _behaviorMgrClass = null;\nconst scrollOpts = { behavior: "smooth", block: "center", inline: "center" };\nasync function scrollAndClick(node, interval = 500, opts = scrollOpts) {\n node.scrollIntoView(opts);\n await sleep(interval);\n node.click();\n}\nconst waitUnit = 200;\nasync function sleep(timeout) {\n return new Promise((resolve) => setTimeout(resolve, timeout));\n}\nasync function waitUntil(pred, interval = waitUnit) {\n while (!pred()) {\n await sleep(interval);\n }\n}\nasync function waitUntilNode(path, root = document, old = null, timeout = 1000, interval = waitUnit) {\n let node = null;\n let stop = false;\n const waitP = waitUntil(() => {\n node = xpathNode(path, root);\n return stop || (node !== old && node !== null);\n }, interval);\n const timeoutP = new Promise((r) => setTimeout(() => {\n stop = true;\n r("TIMEOUT");\n }, timeout));\n await Promise.race([waitP, timeoutP]);\n return node;\n}\nasync function awaitLoad(iframe) {\n const doc = iframe ? iframe.contentDocument : document;\n const win = iframe ? iframe.contentWindow : window;\n return new Promise((resolve) => {\n if (doc.readyState === "complete") {\n resolve(null);\n }\n else {\n win.addEventListener("load", resolve);\n }\n });\n}\nfunction unsetToJson(obj) {\n if (obj.toJSON) {\n try {\n
|
||
|
|
/*!******************************!*\
|
||
|
|
!*** ./src/site/facebook.ts ***!
|
||
|
|
\******************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "FacebookTimelineBehavior": () => (/* binding */ FacebookTimelineBehavior)\n/* harmony export */ });\nconst Q = {\n feed: "//div[@role=\'feed\']",\n article: ".//div[@role=\'article\']",\n pageletPostList: "//div[@data-pagelet=\'page\']/div[@role=\'main\']//div[@role=\'main\']/div",\n pageletProfilePostList: "//div[@data-pagelet=\'page\']//div[@data-pagelet=\'ProfileTimeline\']",\n articleToPostList: "//div[@role=\'article\']/../../../../div",\n photosOrVideos: `.//a[(contains(@href, \'/photos/\') or contains(@href, \'/photo/?\') or contains(@href, \'/videos/\')) and (starts-with(@href, \'${window.location.origin}/\') or starts-with(@href, \'/\'))]`,\n postQuery: ".//a[contains(@href, \'/posts/\')]",\n extraLabel: "//*[starts-with(text(), \'+\')]",\n nextSlideQuery: "//div[@data-name=\'media-viewer-nav-container\']/div[@data-visualcompletion][2]//div[@role=\'button\']",\n nextSlide: "//div[@aria-hidden=\'false\']//div[@role=\'button\' and not(@aria-hidden) and @aria-label]",\n commentList: ".//ul[(../h3) or (../h4)]",\n commentMoreReplies: "./div[2]/div[1]/div[2]/div[@role=\'button\']",\n commentMoreComments: "./following-sibling::div/div/div[2][@role=\'button\'][./span/span]",\n viewComments: ".//h4/..//div[@role=\'button\']",\n photoCommentList: "//ul[../h2]",\n firstPhotoThumbnail: "//div[@role=\'main\']//div[3]//div[contains(@style, \'border-radius\')]//div[contains(@style, \'max-width\') and contains(@style, \'min-width\')]//a[@role=\'link\']",\n firstVideoThumbnail: "//div[@role=\'main\']//div[contains(@style, \'z-index\')]/following-sibling::div/div/div/div[last()]//a[contains(@href, \'/videos/\') and @aria-hidden!=\'true\']",\n firstVideoSimple: "//div[@role=\'main\']//a[contains(@href, \'/videos/\') and @aria-hidden!=\'true\']",\n mainVideo: "//div[@data-pagelet=\'root\']//div[@role=\'dialog\']//div[@role=\'main\']//video",\n nextVideo: "following::a[contains(@href, \'/videos/\') and @aria-hidden!=\'true\']",\n isPhotoVideoPage: /^.*facebook\\.com\\/[^/]+\\/(photos|videos)\\/.+/,\n isPhotosPage: /^.*facebook\\.com\\/[^/]+\\/photos\\/?($|\\?)/,\n isVideosPage: /^.*facebook\\.com\\/[^/]+\\/videos\\/?($|\\?)/,\n pageLoadWaitUntil: "//div[@role=\'main\']",\n};\nclass FacebookTimelineBehavior {\n extraWindow;\n allowNewWindow;\n static id = "Facebook";\n static isMatch() {\n return !!window.location.href.match(/https:\\/\\/(www\\.)?facebook\\.com\\/.*\\/posts\\//);\n }\n static init() {\n return {\n state: {},\n };\n }\n constructor() {\n this.extraWindow = null;\n this.allowNewWindow = false;\n }\n async *iterPostFeeds(ctx) {\n const { iterChildElem, waitUnit, waitUntil, xpathNode, xpathNodes } = ctx.Lib;\n const feeds = Array.from(xpathNodes(Q.feed));\n if (feeds && feeds.length) {\n for (const feed of feeds) {\n for await (const post of iterChildElem(feed, waitUnit, waitUntil * 10)) {\n yield* this.viewPost(ctx, xpathNode(Q.article, post));\n }\n }\n }\n else {\n const feed = xpathNode(Q.pageletPostList) ||\n xpathNode(Q.pageletProfilePostList) ||\n xpathNode(Q.articleToPostList);\n if (!feed) {\n return;\n }\n for await (const post of iterChildElem(feed, waitUnit, waitUntil * 10)) {\n yield* this.viewPost(ctx, xpathNode(Q.article, post));\n }\n }\n if (this.extraWindow) {\n this.extraWindow.close();\n }\n }\n async *viewPost(ctx, post, maxExpands = 2) {\n const { getState, scrollIntoView, sleep, waitUnit, xpathNode } = ctx.Lib;\n if (!post) {\n
|
||
|
|
/*!***************************!*\
|
||
|
|
!*** ./src/site/index.ts ***!
|
||
|
|
\***************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "default": () => (__WEBPACK_DEFAULT_EXPORT__)\n/* harmony export */ });\n/* harmony import */ var _facebook__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./facebook */ "./src/site/facebook.ts");\n/* harmony import */ var _instagram__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./instagram */ "./src/site/instagram.ts");\n/* harmony import */ var _telegram__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./telegram */ "./src/site/telegram.ts");\n/* harmony import */ var _twitter__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./twitter */ "./src/site/twitter.ts");\n/* harmony import */ var _tiktok__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./tiktok */ "./src/site/tiktok.ts");\n\n\n\n\n\nconst siteBehaviors = [\n _instagram__WEBPACK_IMPORTED_MODULE_1__.InstagramPostsBehavior,\n _twitter__WEBPACK_IMPORTED_MODULE_3__.TwitterTimelineBehavior,\n _facebook__WEBPACK_IMPORTED_MODULE_0__.FacebookTimelineBehavior,\n _telegram__WEBPACK_IMPORTED_MODULE_2__.TelegramBehavior,\n _tiktok__WEBPACK_IMPORTED_MODULE_4__.TikTokVideoBehavior,\n _tiktok__WEBPACK_IMPORTED_MODULE_4__.TikTokProfileBehavior,\n];\n/* harmony default export */ const __WEBPACK_DEFAULT_EXPORT__ = (siteBehaviors);\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/site/index.ts?')},"./src/site/instagram.ts":
|
||
|
|
/*!*******************************!*\
|
||
|
|
!*** ./src/site/instagram.ts ***!
|
||
|
|
\*******************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "InstagramPostsBehavior": () => (/* binding */ InstagramPostsBehavior)\n/* harmony export */ });\nconst subpostNextOnlyChevron = "//article[@role=\'presentation\']//div[@role=\'presentation\']/following-sibling::button";\nconst Q = {\n rootPath: "//main//div/div[2]/div/div/div/div",\n childMatchSelect: "string(.//a[starts-with(@href, \'/\')]/@href)",\n childMatch: "child::div[.//a[@href=\'$1\']]",\n firstPostInRow: "div[1]//a",\n postCloseButton: "//div[last() - 2]//div[@role=\'button\']",\n nextPost: "//button[.//*[local-name() = \'svg\' and @aria-label=\'Next\']]",\n postLoading: "//*[@aria-label=\'Loading...\']",\n subpostNextOnlyChevron,\n subpostPrevNextChevron: subpostNextOnlyChevron + "[2]",\n commentRoot: "//article[@role=\'presentation\']/div[1]/div[2]//ul/div[last()]/div/div",\n viewReplies: "ul/li//button[span[not(count(*)) and contains(text(), \'(\')]]",\n loadMore: "//button[span[@aria-label]]",\n pageLoadWaitUntil: "//main",\n};\nclass InstagramPostsBehavior {\n maxCommentsTime;\n postOnlyWindow;\n static id = "Instagram";\n static isMatch() {\n return !!window.location.href.match(/https:\\/\\/(www\\.)?instagram\\.com\\//);\n }\n static init() {\n return {\n state: {\n posts: 0,\n slides: 0,\n rows: 0,\n comments: 0,\n },\n };\n }\n constructor() {\n this.maxCommentsTime = 10000;\n this.postOnlyWindow = null;\n }\n cleanup() {\n if (this.postOnlyWindow) {\n this.postOnlyWindow.close();\n this.postOnlyWindow = null;\n }\n }\n async waitForNext(ctx, child) {\n if (!child) {\n return null;\n }\n await ctx.Lib.sleep(ctx.Lib.waitUnit);\n if (!child.nextElementSibling) {\n return null;\n }\n return child.nextElementSibling;\n }\n async *iterRow(ctx) {\n const { RestoreState, sleep, waitUnit, xpathNode } = ctx.Lib;\n const root = xpathNode(Q.rootPath);\n if (!root) {\n return;\n }\n let child = root.firstElementChild;\n if (!child) {\n return;\n }\n while (child) {\n await sleep(waitUnit);\n const restorer = new RestoreState(Q.childMatchSelect, child);\n if (restorer.matchValue) {\n yield child;\n child = await restorer.restore(Q.rootPath, Q.childMatch);\n }\n child = await this.waitForNext(ctx, child);\n }\n }\n async *viewStandalonePost(ctx, origLoc) {\n const { getState, sleep, waitUnit, waitUntil, xpathNode, xpathString } = ctx.Lib;\n const root = xpathNode(Q.rootPath);\n if (!root?.firstElementChild) {\n return;\n }\n const firstPostHref = xpathString(Q.childMatchSelect, root.firstElementChild);\n yield getState(ctx, "Loading single post view for first post: " + firstPostHref);\n window.history.replaceState({}, "", firstPostHref);\n window.dispatchEvent(new PopStateEvent("popstate", { state: {} }));\n let root2 = null;\n let root3 = null;\n await sleep(waitUnit * 5);\n await waitUntil(() => (root2 = xpathNode(Q.rootPath)) !== root && root2, waitUnit * 5);\n await sleep(waitUnit * 5);\n window.history.replaceState({}, "", origLoc);\n window.dispatchEvent(new PopStateEvent("popstate", { state: {} }));\n await waitUntil(() => (root3 = xpathNode(Q.rootPath)) !== root2 && root3, waitUnit * 5);\n }\n async *iterSubposts(ctx) {\n const { getState, sleep, waitUnit, xpathNode } = ctx.Lib;\n let next = xpathNode(Q.subpostNextOnlyChevron);\n let count = 1
|
||
|
|
/*!******************************!*\
|
||
|
|
!*** ./src/site/telegram.ts ***!
|
||
|
|
\******************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "TelegramBehavior": () => (/* binding */ TelegramBehavior)\n/* harmony export */ });\nconst Q = {\n telegramContainer: "//main//section[@class=\'tgme_channel_history js-message_history\']",\n postId: "string(./div[@data-post]/@data-post)",\n linkExternal: "string(.//a[@class=\'tgme_widget_message_link_preview\' and @href]/@href)",\n};\nclass TelegramBehavior {\n static id = "Telegram";\n static isMatch() {\n return !!window.location.href.match(/https:\\/\\/t.me\\/s\\/\\w[\\w]+/);\n }\n static init() {\n return {\n state: { messages: 0 },\n };\n }\n async waitForPrev(ctx, child) {\n if (!child) {\n return null;\n }\n await ctx.Lib.sleep(ctx.Lib.waitUnit * 5);\n if (!child.previousElementSibling) {\n return null;\n }\n return child.previousElementSibling;\n }\n async *run(ctx) {\n const { getState, scrollIntoView, sleep, waitUnit, xpathNode, xpathString, } = ctx.Lib;\n const root = xpathNode(Q.telegramContainer);\n if (!root) {\n return;\n }\n let child = root.lastElementChild;\n while (child) {\n scrollIntoView(child);\n const postId = xpathString(Q.postId, child) || "unknown";\n const linkUrl = xpathString(Q.linkExternal, child);\n if (linkUrl?.endsWith(".jpg") || linkUrl.endsWith(".png")) {\n yield getState(ctx, "Loading External Image: " + linkUrl);\n const image = new Image();\n image.src = linkUrl;\n document.body.appendChild(image);\n await sleep(waitUnit * 2.5);\n document.body.removeChild(image);\n }\n yield getState(ctx, "Loading Message: " + postId, "messages");\n child = await this.waitForPrev(ctx, child);\n }\n }\n}\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/site/telegram.ts?')},"./src/site/tiktok.ts":
|
||
|
|
/*!****************************!*\
|
||
|
|
!*** ./src/site/tiktok.ts ***!
|
||
|
|
\****************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "BREADTH_ALL": () => (/* binding */ BREADTH_ALL),\n/* harmony export */ "TikTokProfileBehavior": () => (/* binding */ TikTokProfileBehavior),\n/* harmony export */ "TikTokSharedBehavior": () => (/* binding */ TikTokSharedBehavior),\n/* harmony export */ "TikTokVideoBehavior": () => (/* binding */ TikTokVideoBehavior)\n/* harmony export */ });\nconst Q = {\n commentButton: "button[aria-label^=\'Read or add comments\']",\n commentList: "//div[contains(@class, \'CommentListContainer\')]",\n commentItem: "div[contains(@class, \'CommentItemContainer\')]",\n viewMoreReplies: ".//p[contains(@class, \'ReplyActionText\')]",\n viewMoreThread: ".//p[starts-with(@data-e2e, \'view-more\') and string-length(text()) > 0]",\n profileVideoList: "//div[starts-with(@data-e2e, \'user-post-item-list\')]",\n profileVideoItem: "div[contains(@class, \'DivItemContainerV2\')]",\n backButton: "button[contains(@class, \'StyledCloseIconContainer\')]",\n pageLoadWaitUntil: "//*[@role=\'dialog\']",\n};\nconst BREADTH_ALL = Symbol("BREADTH_ALL");\nclass TikTokSharedBehavior {\n async awaitPageLoad(ctx) {\n const { assertContentValid, waitUntilNode } = ctx.Lib;\n await waitUntilNode(Q.pageLoadWaitUntil, document, null, 10000);\n assertContentValid(() => !!document.querySelector("*[aria-label=\'Messages\']"), "not_logged_in");\n }\n}\nclass TikTokVideoBehavior extends TikTokSharedBehavior {\n static id = "TikTokVideo";\n static init() {\n return {\n state: { comments: 0 },\n opts: { breadth: BREADTH_ALL },\n };\n }\n static isMatch() {\n const pathRegex = /https:\\/\\/(www\\.)?tiktok\\.com\\/@.+\\/video\\/\\d+\\/?.*/;\n return !!window.location.href.match(pathRegex);\n }\n breadthComplete({ opts: { breadth } }, iter) {\n return breadth !== BREADTH_ALL && breadth <= iter;\n }\n async *crawlThread(ctx, parentNode, prev = null, iter = 0) {\n const { waitUntilNode, scrollAndClick, getState } = ctx.Lib;\n const next = await waitUntilNode(Q.viewMoreThread, parentNode, prev);\n if (!next || this.breadthComplete(ctx, iter))\n return;\n await scrollAndClick(next, 500);\n yield getState(ctx, "View more replies", "comments");\n yield* this.crawlThread(ctx, parentNode, next, iter + 1);\n }\n async *expandThread(ctx, item) {\n const { xpathNode, scrollAndClick, getState } = ctx.Lib;\n const viewMore = xpathNode(Q.viewMoreReplies, item);\n if (!viewMore)\n return;\n await scrollAndClick(viewMore, 500);\n yield getState(ctx, "View comment", "comments");\n yield* this.crawlThread(ctx, item, null, 1);\n }\n async *run(ctx) {\n const { xpathNode, iterChildMatches, scrollIntoView, getState, sleep } = ctx.Lib;\n const showComments = document.querySelector(Q.commentButton);\n if (showComments) {\n showComments.click();\n await sleep(500);\n }\n const commentList = xpathNode(Q.commentList);\n const commentItems = iterChildMatches(Q.commentItem, commentList);\n for await (const item of commentItems) {\n scrollIntoView(item);\n yield getState(ctx, "View comment", "comments");\n if (this.breadthComplete(ctx, 0))\n continue;\n yield* this.expandThread(ctx, item);\n }\n yield getState(ctx, "TikTok Video Behavior Complete");\n }\n}\nclass TikTokProfileBehavior extends TikTokSharedBehavior {\n static id = "TikTokProfile";\n static isMatch() {\n const pathRegex = /https:\\/\\/(www\\.)?tiktok\\.com\\/@[a-zA-Z0-9]+(\\/?$|\\/\\?.*)/;\n return !!window.location.href.match(pathRegex);\n }\n static init() {\n
|
||
|
|
/*!*****************************!*\
|
||
|
|
!*** ./src/site/twitter.ts ***!
|
||
|
|
\*****************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "TwitterTimelineBehavior": () => (/* binding */ TwitterTimelineBehavior)\n/* harmony export */ });\nconst Q = {\n rootPath: "//h1[@role=\'heading\' and @aria-level=\'1\']/following-sibling::div[@aria-label]//div[@style]",\n anchor: ".//article",\n childMatchSelect: "string(.//article//a[starts-with(@href, \'/\') and @aria-label]/@href)",\n childMatch: "child::div[.//a[@href=\'$1\']]",\n expand: ".//div[@role=\'button\' and not(@aria-haspopup) and not(@data-testid)]",\n quote: ".//div[@role=\'blockquote\' and @aria-haspopup=\'false\']",\n image: ".//a[@role=\'link\' and starts-with(@href, \'/\') and contains(@href, \'/photo/\')]",\n imageFirstNext: "//div[@aria-roledescription=\'carousel\']/div[2]/div[1]//div[@role=\'button\']",\n imageNext: "//div[@aria-roledescription=\'carousel\']/div[2]/div[2]//div[@role=\'button\']",\n imageClose: "//div[@role=\'presentation\']/div[@role=\'button\' and @aria-label]",\n backButton: "//div[@data-testid=\'titleContainer\']//div[@role=\'button\']",\n viewSensitive: ".//a[@href=\'/settings/content_you_see\']/parent::div/parent::div/parent::div//div[@role=\'button\']",\n progress: ".//*[@role=\'progressbar\']",\n promoted: ".//div[data-testid=\'placementTracking\']",\n};\nclass TwitterTimelineBehavior {\n seenTweets;\n seenMediaTweets;\n static id = "Twitter";\n static isMatch() {\n return !!window.location.href.match(/https:\\/\\/(www\\.)?(x|twitter)\\.com\\//);\n }\n static init() {\n return {\n state: {\n tweets: 0,\n images: 0,\n videos: 0,\n },\n opts: {\n maxDepth: 0,\n },\n };\n }\n constructor() {\n this.seenTweets = new Set();\n this.seenMediaTweets = new Set();\n }\n showingProgressBar(ctx, root) {\n const { xpathNode } = ctx.Lib;\n const node = xpathNode(Q.progress, root);\n if (!node) {\n return false;\n }\n return node.clientHeight > 10;\n }\n async waitForNext(ctx, child) {\n const { sleep, waitUnit } = ctx.Lib;\n if (!child) {\n return null;\n }\n await sleep(waitUnit * 2);\n if (!child.nextElementSibling) {\n return null;\n }\n while (this.showingProgressBar(ctx, child.nextElementSibling)) {\n await sleep(waitUnit);\n }\n return child.nextElementSibling;\n }\n async expandMore(ctx, child) {\n const { sleep, waitUnit, xpathNode } = ctx.Lib;\n const expandElem = xpathNode(Q.expand, child);\n if (!expandElem) {\n return child;\n }\n const prev = child.previousElementSibling;\n expandElem.click();\n await sleep(waitUnit);\n while (this.showingProgressBar(ctx, prev.nextElementSibling)) {\n await sleep(waitUnit);\n }\n child = prev.nextElementSibling;\n return child;\n }\n async *infScroll(ctx) {\n const { scrollIntoView, RestoreState, sleep, waitUnit, xpathNode } = ctx.Lib;\n const root = xpathNode(Q.rootPath);\n if (!root) {\n return;\n }\n let child = root.firstElementChild;\n if (!child) {\n return;\n }\n while (child) {\n let anchorElem = xpathNode(Q.anchor, child);\n if (!anchorElem && Q.expand) {\n child = await this.expandMore(ctx, child);\n anchorElem = xpathNode(Q.anchor, child);\n }\n if (child?.innerText) {\n scrollIntoView(child);\n }\n if (child && anchorElem) {\n await sleep(waitUnit);\n const restorer = new RestoreState(Q.childMatchSelec
|
||
|
|
/*!*****************************!*\
|
||
|
|
!*** ./src/site/youtube.ts ***!
|
||
|
|
\*****************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "YoutubeBehavior": () => (/* binding */ YoutubeBehavior)\n/* harmony export */ });\n/* harmony import */ var _autoscroll__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../autoscroll */ "./src/autoscroll.ts");\n\nclass YoutubeBehavior extends _autoscroll__WEBPACK_IMPORTED_MODULE_0__.AutoScroll {\n async awaitPageLoad(ctx) {\n const { sleep, assertContentValid } = ctx.Lib;\n await sleep(10);\n assertContentValid(() => {\n const video = document.querySelector("video");\n const paused = video && video.paused;\n if (paused) {\n return false;\n }\n return document.documentElement.outerHTML.indexOf("not a bot") === -1;\n }, "no_video_playing");\n }\n}\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/site/youtube.ts?')},"./src lazy recursive ^.*$":
|
||
|
|
/*!*****************************************!*\
|
||
|
|
!*** ./src/ lazy ^.*$ namespace object ***!
|
||
|
|
\*****************************************/(module,__unused_webpack_exports,__webpack_require__)=>{eval('var map = {\n\t".": [\n\t\t"./src/index.ts"\n\t],\n\t"./": [\n\t\t"./src/index.ts"\n\t],\n\t"./autoclick": [\n\t\t"./src/autoclick.ts"\n\t],\n\t"./autoclick.ts": [\n\t\t"./src/autoclick.ts"\n\t],\n\t"./autofetcher": [\n\t\t"./src/autofetcher.ts"\n\t],\n\t"./autofetcher.ts": [\n\t\t"./src/autofetcher.ts"\n\t],\n\t"./autoplay": [\n\t\t"./src/autoplay.ts"\n\t],\n\t"./autoplay.ts": [\n\t\t"./src/autoplay.ts"\n\t],\n\t"./autoscroll": [\n\t\t"./src/autoscroll.ts"\n\t],\n\t"./autoscroll.ts": [\n\t\t"./src/autoscroll.ts"\n\t],\n\t"./index": [\n\t\t"./src/index.ts"\n\t],\n\t"./index.ts": [\n\t\t"./src/index.ts"\n\t],\n\t"./lib/behavior": [\n\t\t"./src/lib/behavior.ts"\n\t],\n\t"./lib/behavior.ts": [\n\t\t"./src/lib/behavior.ts"\n\t],\n\t"./lib/utils": [\n\t\t"./src/lib/utils.ts"\n\t],\n\t"./lib/utils.ts": [\n\t\t"./src/lib/utils.ts"\n\t],\n\t"./site": [\n\t\t"./src/site/index.ts"\n\t],\n\t"./site/": [\n\t\t"./src/site/index.ts"\n\t],\n\t"./site/facebook": [\n\t\t"./src/site/facebook.ts"\n\t],\n\t"./site/facebook.ts": [\n\t\t"./src/site/facebook.ts"\n\t],\n\t"./site/index": [\n\t\t"./src/site/index.ts"\n\t],\n\t"./site/index.ts": [\n\t\t"./src/site/index.ts"\n\t],\n\t"./site/instagram": [\n\t\t"./src/site/instagram.ts"\n\t],\n\t"./site/instagram.ts": [\n\t\t"./src/site/instagram.ts"\n\t],\n\t"./site/telegram": [\n\t\t"./src/site/telegram.ts"\n\t],\n\t"./site/telegram.ts": [\n\t\t"./src/site/telegram.ts"\n\t],\n\t"./site/tiktok": [\n\t\t"./src/site/tiktok.ts"\n\t],\n\t"./site/tiktok.ts": [\n\t\t"./src/site/tiktok.ts"\n\t],\n\t"./site/twitter": [\n\t\t"./src/site/twitter.ts"\n\t],\n\t"./site/twitter.ts": [\n\t\t"./src/site/twitter.ts"\n\t],\n\t"./site/youtube": [\n\t\t"./src/site/youtube.ts",\n\t\t"main"\n\t],\n\t"./site/youtube.ts": [\n\t\t"./src/site/youtube.ts",\n\t\t"main"\n\t]\n};\nfunction webpackAsyncContext(req) {\n\tif(!__webpack_require__.o(map, req)) {\n\t\treturn Promise.resolve().then(() => {\n\t\t\tvar e = new Error("Cannot find module \'" + req + "\'");\n\t\t\te.code = \'MODULE_NOT_FOUND\';\n\t\t\tthrow e;\n\t\t});\n\t}\n\n\tvar ids = map[req], id = ids[0];\n\treturn Promise.all(ids.slice(1).map(__webpack_require__.e)).then(() => {\n\t\treturn __webpack_require__(id);\n\t});\n}\nwebpackAsyncContext.keys = () => (Object.keys(map));\nwebpackAsyncContext.id = "./src lazy recursive ^.*$";\nmodule.exports = webpackAsyncContext;\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/_lazy_^.*$_namespace_object?')}},__webpack_module_cache__={};function __webpack_require__(e){var n=__webpack_module_cache__[e];if(void 0!==n)return n.exports;var t=__webpack_module_cache__[e]={exports:{}};return __webpack_modules__[e](t,t.exports,__webpack_require__),t.exports}__webpack_require__.d=(e,n)=>{for(var t in n)__webpack_require__.o(n,t)&&!__webpack_require__.o(e,t)&&Object.defineProperty(e,t,{enumerable:!0,get:n[t]})},__webpack_require__.e=()=>Promise.resolve(),__webpack_require__.o=(e,n)=>Object.prototype.hasOwnProperty.call(e,n),__webpack_require__.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})};var __webpack_exports__=__webpack_require__("./index.ts")})();
|