diff --git a/behaviors.js b/behaviors.js index 0bb3c5b8..bad74dc2 100644 --- a/behaviors.js +++ b/behaviors.js @@ -28,7 +28,7 @@ \*****************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "BackgroundBehavior": () => (/* binding */ BackgroundBehavior),\n/* harmony export */ "Behavior": () => (/* binding */ Behavior),\n/* harmony export */ "BehaviorRunner": () => (/* binding */ BehaviorRunner)\n/* harmony export */ });\n/* harmony import */ var _utils__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./utils */ "./src/lib/utils.ts");\n\n\nclass BackgroundBehavior {\n debug(msg) {\n (0,_utils__WEBPACK_IMPORTED_MODULE_0__.behaviorLog)(msg, "debug");\n }\n error(msg) {\n (0,_utils__WEBPACK_IMPORTED_MODULE_0__.behaviorLog)(msg, "error");\n }\n log(msg, type = "info") {\n (0,_utils__WEBPACK_IMPORTED_MODULE_0__.behaviorLog)(msg, type);\n }\n}\nclass Behavior extends BackgroundBehavior {\n _running;\n paused;\n _unpause;\n state;\n scrollOpts;\n constructor() {\n super();\n this._running = null;\n this.paused = null;\n this._unpause = null;\n this.state = {};\n this.scrollOpts = { behavior: "smooth", block: "center", inline: "center" };\n }\n start() {\n this._running = this.run();\n }\n done() {\n return this._running ? this._running : Promise.resolve();\n }\n async run() {\n try {\n for await (const step of this) {\n this.debug(step);\n if (this.paused) {\n await this.paused;\n }\n }\n this.debug(this.getState("done!"));\n }\n catch (e) {\n this.error(e.toString());\n }\n }\n pause() {\n if (this.paused) {\n return;\n }\n this.paused = new Promise((resolve) => {\n this._unpause = resolve;\n });\n }\n unpause() {\n if (this._unpause) {\n this._unpause();\n this.paused = null;\n this._unpause = null;\n }\n }\n getState(msg, incrValue) {\n if (incrValue) {\n if (this.state[incrValue] === undefined) {\n this.state[incrValue] = 1;\n }\n else {\n this.state[incrValue]++;\n }\n }\n return { state: this.state, msg };\n }\n cleanup() { }\n async awaitPageLoad(_) {\n }\n static load() {\n if (self["__bx_behaviors"]) {\n self["__bx_behaviors"].load(this);\n }\n else {\n console.warn(`Could not load ${this.name} behavior: window.__bx_behaviors is not initialized`);\n }\n }\n async *[Symbol.asyncIterator]() {\n yield;\n }\n}\nclass AbstractBehaviorInst {\n}\nclass BehaviorRunner extends BackgroundBehavior {\n inst;\n behaviorProps;\n ctx;\n _running;\n paused;\n _unpause;\n get id() {\n return (this.inst?.constructor).id;\n }\n constructor(behavior, mainOpts = {}) {\n super();\n this.behaviorProps = behavior;\n this.inst = new behavior();\n if (typeof this.inst.run !== "function" ||\n this.inst.run.constructor.name !== "AsyncGeneratorFunction") {\n throw Error("Invalid behavior: missing `async run*` instance method");\n }\n let { state, opts } = behavior.init();\n state = state || {};\n opts = opts ? { ...opts, ...mainOpts } : mainOpts;\n const log = async (data, type) => this.wrappedLog(data, type);\n this.ctx = { Lib: _utils__WEBPACK_IMPORTED_MODULE_0__, state, opts, log };\n this._running = null;\n this.paused = null;\n this._unpause = null;\n }\n wrappedLog(data, type = "info") {\n let logData;\n if (typeof data === "string" || data instanceof String) {\n logData = { msg: data };\n }\n else {\n logData = data;\n }\n this.log({ ...logData, behavior: this.behaviorProps.id, siteSpecific: true }, type);\n }\n start() {\n this._running = this.run();\n }\n done() {\n return this._running ? this._running : Promise.resolve();\n }\n async run() {\n try {\n for await (const step of this.inst.run(this.ctx)) {\n if (step) {\n this.wrappedLog(step);\n }\n if (this.paused) {\n await this.paused;\n }\n }\n this.debug({ msg: "done!", behavior: this.behaviorProps.id });\n }\n catch (e) {\n this.error({ msg: e.toString(), behavior: this.behaviorProps.id });\n }\n }\n pause() {\n if (this.paused) {\n return;\n }\n this.paused = new Promise((resolve) => {\n this._unpause = resolve;\n });\n }\n unpause() {\n if (this._unpause) {\n this._unpause();\n this.paused = null;\n this._unpause = null;\n }\n }\n cleanup() { }\n async awaitPageLoad() {\n if (this.inst.awaitPageLoad) {\n await this.inst.awaitPageLoad(this.ctx);\n }\n }\n static load() {\n if (self["__bx_behaviors"]) {\n self["__bx_behaviors"].load(this);\n }\n else {\n console.warn(`Could not load ${this.name} behavior: window.__bx_behaviors is not initialized`);\n }\n }\n}\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/lib/behavior.ts?')},"./src/lib/utils.ts": /*!**************************!*\ !*** ./src/lib/utils.ts ***! - \**************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "HistoryState": () => (/* binding */ HistoryState),\n/* harmony export */ "RestoreState": () => (/* binding */ RestoreState),\n/* harmony export */ "_setBehaviorManager": () => (/* binding */ _setBehaviorManager),\n/* harmony export */ "_setLogFunc": () => (/* binding */ _setLogFunc),\n/* harmony export */ "addLink": () => (/* binding */ addLink),\n/* harmony export */ "addToExternalSet": () => (/* binding */ addToExternalSet),\n/* harmony export */ "assertContentValid": () => (/* binding */ assertContentValid),\n/* harmony export */ "awaitLoad": () => (/* binding */ awaitLoad),\n/* harmony export */ "behaviorLog": () => (/* binding */ behaviorLog),\n/* harmony export */ "callBinding": () => (/* binding */ callBinding),\n/* harmony export */ "checkToJsonOverride": () => (/* binding */ checkToJsonOverride),\n/* harmony export */ "doExternalFetch": () => (/* binding */ doExternalFetch),\n/* harmony export */ "getState": () => (/* binding */ getState),\n/* harmony export */ "initFlow": () => (/* binding */ initFlow),\n/* harmony export */ "installBehaviors": () => (/* binding */ installBehaviors),\n/* harmony export */ "isInViewport": () => (/* binding */ isInViewport),\n/* harmony export */ "iterChildElem": () => (/* binding */ iterChildElem),\n/* harmony export */ "iterChildMatches": () => (/* binding */ iterChildMatches),\n/* harmony export */ "nextFlowStep": () => (/* binding */ nextFlowStep),\n/* harmony export */ "openWindow": () => (/* binding */ openWindow),\n/* harmony export */ "scrollAndClick": () => (/* binding */ scrollAndClick),\n/* harmony export */ "scrollIntoView": () => (/* binding */ scrollIntoView),\n/* harmony export */ "scrollToOffset": () => (/* binding */ scrollToOffset),\n/* harmony export */ "sleep": () => (/* binding */ sleep),\n/* harmony export */ "waitForNetworkIdle": () => (/* binding */ waitForNetworkIdle),\n/* harmony export */ "waitUnit": () => (/* binding */ waitUnit),\n/* harmony export */ "waitUntil": () => (/* binding */ waitUntil),\n/* harmony export */ "waitUntilNode": () => (/* binding */ waitUntilNode),\n/* harmony export */ "xpathNode": () => (/* binding */ xpathNode),\n/* harmony export */ "xpathNodes": () => (/* binding */ xpathNodes),\n/* harmony export */ "xpathString": () => (/* binding */ xpathString)\n/* harmony export */ });\nlet _logFunc = console.log;\nlet _behaviorMgrClass = null;\nconst scrollOpts = { behavior: "smooth", block: "center", inline: "center" };\nasync function scrollAndClick(node, interval = 500, opts = scrollOpts) {\n node.scrollIntoView(opts);\n await sleep(interval);\n node.click();\n}\nconst waitUnit = 200;\nasync function sleep(timeout) {\n return new Promise((resolve) => setTimeout(resolve, timeout));\n}\nasync function waitUntil(pred, interval = waitUnit) {\n while (!pred()) {\n await sleep(interval);\n }\n}\nasync function waitUntilNode(path, root = document, old = null, timeout = 1000, interval = waitUnit) {\n let node = null;\n let stop = false;\n const waitP = waitUntil(() => {\n node = xpathNode(path, root);\n return stop || (node !== old && node !== null);\n }, interval);\n const timeoutP = new Promise((r) => setTimeout(() => {\n stop = true;\n r("TIMEOUT");\n }, timeout));\n await Promise.race([waitP, timeoutP]);\n return node;\n}\nasync function awaitLoad(iframe) {\n const doc = iframe ? iframe.contentDocument : document;\n const win = iframe ? iframe.contentWindow : window;\n return new Promise((resolve) => {\n if (doc.readyState === "complete") {\n resolve(null);\n }\n else {\n win.addEventListener("load", resolve);\n }\n });\n}\nfunction unsetToJson(obj) {\n if (obj.toJSON) {\n try {\n obj.__bx__toJSON = obj.toJSON;\n delete obj.toJSON;\n }\n catch (_) {\n }\n }\n}\nfunction restoreToJson(obj) {\n if (obj.__bx__toJSON) {\n try {\n obj.toJSON = obj.__bx__toJSON;\n delete obj.__bx__toJSON;\n }\n catch (_) {\n }\n }\n}\nfunction unsetAllJson() {\n unsetToJson(Object);\n unsetToJson(Object.prototype);\n unsetToJson(Array);\n unsetToJson(Array.prototype);\n}\nfunction restoreAllJson() {\n restoreToJson(Object);\n restoreToJson(Object.prototype);\n restoreToJson(Array);\n restoreToJson(Array.prototype);\n}\nlet needUnsetToJson = false;\nfunction checkToJsonOverride() {\n needUnsetToJson =\n !!Object.toJSON ||\n !!Object.prototype.toJSON ||\n !!Array.toJSON ||\n !!Array.prototype.toJSON;\n}\nasync function callBinding(binding, obj) {\n try {\n if (needUnsetToJson) {\n unsetAllJson();\n }\n return binding(obj);\n }\n catch (_) {\n return binding(JSON.stringify(obj));\n }\n finally {\n if (needUnsetToJson) {\n restoreAllJson();\n }\n }\n}\nasync function behaviorLog(data, type = "debug") {\n if (_logFunc) {\n await callBinding(_logFunc, { data, type });\n }\n}\nasync function addLink(url) {\n if (typeof self["__bx_addLink"] === "function") {\n return await callBinding(self["__bx_addLink"], url);\n }\n}\nasync function doExternalFetch(url) {\n if (typeof self["__bx_fetch"] === "function") {\n return await callBinding(self["__bx_fetch"], url);\n }\n return false;\n}\nasync function addToExternalSet(url) {\n if (typeof self["__bx_addSet"] === "function") {\n return await callBinding(self["__bx_addSet"], url);\n }\n return true;\n}\nasync function waitForNetworkIdle(idleTime = 500, concurrency = 0) {\n if (typeof self["__bx_netIdle"] === "function") {\n return await callBinding(self["__bx_netIdle"], { idleTime, concurrency });\n }\n}\nasync function initFlow(params) {\n if (typeof self["__bx_initFlow"] === "function") {\n return await callBinding(self["__bx_initFlow"], params);\n }\n return -1;\n}\nasync function nextFlowStep(id) {\n if (typeof self["__bx_nextFlowStep"] === "function") {\n return await callBinding(self["__bx_nextFlowStep"], id);\n }\n return { done: true, msg: "" };\n}\nfunction assertContentValid(assertFunc, reason = "invalid") {\n if (typeof self["__bx_contentCheckFailed"] === "function") {\n if (!assertFunc()) {\n behaviorLog("Behavior content check failed: " + reason, "error");\n callBinding(self["__bx_contentCheckFailed"], reason);\n }\n }\n}\nasync function openWindow(url) {\n if (self["__bx_open"]) {\n const p = new Promise((resolve) => (self["__bx_openResolve"] = resolve));\n await callBinding(self["__bx_open"], { url });\n let win = null;\n try {\n win = await p;\n if (win) {\n return win;\n }\n }\n catch (e) {\n console.warn(e);\n }\n finally {\n delete self["__bx_openResolve"];\n }\n }\n return window.open(url);\n}\nfunction _setLogFunc(func) {\n _logFunc = func;\n}\nfunction _setBehaviorManager(cls) {\n _behaviorMgrClass = cls;\n}\nfunction installBehaviors(obj) {\n obj.__bx_behaviors = new _behaviorMgrClass();\n}\nclass RestoreState {\n matchValue;\n constructor(childMatchSelect, child) {\n this.matchValue = xpathString(childMatchSelect, child);\n }\n async restore(rootPath, childMatch) {\n let root = null;\n while (((root = xpathNode(rootPath)), !root)) {\n await sleep(100);\n }\n return xpathNode(childMatch.replace("$1", this.matchValue), root);\n }\n}\nclass HistoryState {\n loc;\n constructor(op) {\n this.loc = window.location.href;\n op();\n }\n get changed() {\n return window.location.href !== this.loc;\n }\n async goBack(backButtonQuery) {\n if (!this.changed) {\n return Promise.resolve(true);\n }\n const backButton = xpathNode(backButtonQuery);\n return new Promise((resolve) => {\n window.addEventListener("popstate", () => {\n resolve(null);\n }, { once: true });\n if (backButton) {\n backButton["click"]();\n }\n else {\n window.history.back();\n }\n });\n }\n}\nfunction xpathNode(path, root) {\n root = root || document;\n return document.evaluate(path, root, null, XPathResult.FIRST_ORDERED_NODE_TYPE).singleNodeValue;\n}\nfunction* xpathNodes(path, root) {\n root = root || document;\n const iter = document.evaluate(path, root, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE);\n let result = null;\n while ((result = iter.iterateNext()) !== null) {\n yield result;\n }\n}\nfunction xpathString(path, root) {\n root = root || document;\n return document.evaluate(path, root, null, XPathResult.STRING_TYPE)\n .stringValue;\n}\nasync function* iterChildElem(root, timeout, totalTimeout) {\n let child = root.firstElementChild;\n while (child) {\n yield child;\n if (!child.nextElementSibling) {\n await Promise.race([\n waitUntil(() => !!child.nextElementSibling, timeout),\n sleep(totalTimeout),\n ]);\n }\n child = child.nextElementSibling;\n }\n}\nasync function* iterChildMatches(path, root, interval = waitUnit, timeout = 5000) {\n let node = xpathNode(`.//${path}`, root);\n const getMatch = (node) => xpathNode(`./following-sibling::${path}`, node);\n while (node) {\n yield node;\n let next = getMatch(node);\n if (next) {\n node = next;\n continue;\n }\n await Promise.race([\n waitUntil(() => {\n next = getMatch(node);\n return next;\n }, interval),\n sleep(timeout),\n ]);\n node = next;\n }\n}\nfunction isInViewport(elem) {\n const bounding = elem.getBoundingClientRect();\n return (bounding.top >= 0 &&\n bounding.left >= 0 &&\n bounding.bottom <=\n (window.innerHeight || document.documentElement.clientHeight) &&\n bounding.right <=\n (window.innerWidth || document.documentElement.clientWidth));\n}\nfunction scrollToOffset(element, offset = 0) {\n const elPosition = element.getBoundingClientRect().top;\n const topPosition = elPosition + window.pageYOffset - offset;\n window.scrollTo({ top: topPosition, behavior: "smooth" });\n}\nfunction scrollIntoView(element, opts = {\n behavior: "smooth",\n block: "center",\n inline: "center",\n}) {\n element.scrollIntoView(opts);\n}\nfunction getState(ctx, msg, incrValue) {\n if (typeof ctx.state === "undefined") {\n ctx.state = {};\n }\n if (incrValue) {\n if (ctx.state[incrValue] === undefined) {\n ctx.state[incrValue] = 1;\n }\n else {\n ctx.state[incrValue]++;\n }\n }\n return { state: ctx.state, msg };\n}\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/lib/utils.ts?')},"./src/site/facebook.ts": + \**************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "HistoryState": () => (/* binding */ HistoryState),\n/* harmony export */ "RestoreState": () => (/* binding */ RestoreState),\n/* harmony export */ "_setBehaviorManager": () => (/* binding */ _setBehaviorManager),\n/* harmony export */ "_setLogFunc": () => (/* binding */ _setLogFunc),\n/* harmony export */ "addLink": () => (/* binding */ addLink),\n/* harmony export */ "addToExternalSet": () => (/* binding */ addToExternalSet),\n/* harmony export */ "assertContentValid": () => (/* binding */ assertContentValid),\n/* harmony export */ "awaitLoad": () => (/* binding */ awaitLoad),\n/* harmony export */ "behaviorLog": () => (/* binding */ behaviorLog),\n/* harmony export */ "callBinding": () => (/* binding */ callBinding),\n/* harmony export */ "checkToJsonOverride": () => (/* binding */ checkToJsonOverride),\n/* harmony export */ "click": () => (/* binding */ click),\n/* harmony export */ "doExternalFetch": () => (/* binding */ doExternalFetch),\n/* harmony export */ "getState": () => (/* binding */ getState),\n/* harmony export */ "initFlow": () => (/* binding */ initFlow),\n/* harmony export */ "installBehaviors": () => (/* binding */ installBehaviors),\n/* harmony export */ "isInViewport": () => (/* binding */ isInViewport),\n/* harmony export */ "iterChildElem": () => (/* binding */ iterChildElem),\n/* harmony export */ "iterChildMatches": () => (/* binding */ iterChildMatches),\n/* harmony export */ "nextFlowStep": () => (/* binding */ nextFlowStep),\n/* harmony export */ "openWindow": () => (/* binding */ openWindow),\n/* harmony export */ "scrollAndClick": () => (/* binding */ scrollAndClick),\n/* harmony export */ "scrollIntoView": () => (/* binding */ scrollIntoView),\n/* harmony export */ "scrollToOffset": () => (/* binding */ scrollToOffset),\n/* harmony export */ "sleep": () => (/* binding */ sleep),\n/* harmony export */ "waitForNetworkIdle": () => (/* binding */ waitForNetworkIdle),\n/* harmony export */ "waitUnit": () => (/* binding */ waitUnit),\n/* harmony export */ "waitUntil": () => (/* binding */ waitUntil),\n/* harmony export */ "waitUntilNode": () => (/* binding */ waitUntilNode),\n/* harmony export */ "xpathNode": () => (/* binding */ xpathNode),\n/* harmony export */ "xpathNodes": () => (/* binding */ xpathNodes),\n/* harmony export */ "xpathString": () => (/* binding */ xpathString)\n/* harmony export */ });\nlet _logFunc = console.log;\nlet _behaviorMgrClass = null;\nconst scrollOpts = { behavior: "smooth", block: "center", inline: "center" };\nasync function scrollAndClick(node, interval = 500, opts = scrollOpts) {\n node.scrollIntoView(opts);\n await sleep(interval);\n node.click();\n}\nconst waitUnit = 200;\nasync function sleep(timeout) {\n return new Promise((resolve) => setTimeout(resolve, timeout));\n}\nasync function waitUntil(pred, interval = waitUnit) {\n while (!pred()) {\n await sleep(interval);\n }\n}\nasync function waitUntilNode(path, root = document, old = null, timeout = 1000, interval = waitUnit) {\n let node = null;\n let stop = false;\n const waitP = waitUntil(() => {\n node = xpathNode(path, root);\n return stop || (node !== old && node !== null);\n }, interval);\n const timeoutP = new Promise((r) => setTimeout(() => {\n stop = true;\n r("TIMEOUT");\n }, timeout));\n await Promise.race([waitP, timeoutP]);\n return node;\n}\nasync function awaitLoad(iframe) {\n const doc = iframe ? iframe.contentDocument : document;\n const win = iframe ? iframe.contentWindow : window;\n return new Promise((resolve) => {\n if (doc.readyState === "complete") {\n resolve(null);\n }\n else {\n win.addEventListener("load", resolve);\n }\n });\n}\nfunction unsetToJson(obj) {\n if (obj.toJSON) {\n try {\n obj.__bx__toJSON = obj.toJSON;\n delete obj.toJSON;\n }\n catch (_) {\n }\n }\n}\nfunction restoreToJson(obj) {\n if (obj.__bx__toJSON) {\n try {\n obj.toJSON = obj.__bx__toJSON;\n delete obj.__bx__toJSON;\n }\n catch (_) {\n }\n }\n}\nfunction unsetAllJson() {\n unsetToJson(Object);\n unsetToJson(Object.prototype);\n unsetToJson(Array);\n unsetToJson(Array.prototype);\n}\nfunction restoreAllJson() {\n restoreToJson(Object);\n restoreToJson(Object.prototype);\n restoreToJson(Array);\n restoreToJson(Array.prototype);\n}\nlet needUnsetToJson = false;\nfunction checkToJsonOverride() {\n needUnsetToJson =\n !!Object.toJSON ||\n !!Object.prototype.toJSON ||\n !!Array.toJSON ||\n !!Array.prototype.toJSON;\n}\nasync function callBinding(binding, obj) {\n try {\n if (needUnsetToJson) {\n unsetAllJson();\n }\n return binding(obj);\n }\n catch (_) {\n return binding(JSON.stringify(obj));\n }\n finally {\n if (needUnsetToJson) {\n restoreAllJson();\n }\n }\n}\nasync function behaviorLog(data, type = "debug") {\n if (_logFunc) {\n await callBinding(_logFunc, { data, type });\n }\n}\nasync function addLink(url) {\n if (typeof self["__bx_addLink"] === "function") {\n return await callBinding(self["__bx_addLink"], url);\n }\n}\nasync function doExternalFetch(url) {\n if (typeof self["__bx_fetch"] === "function") {\n return await callBinding(self["__bx_fetch"], url);\n }\n return false;\n}\nasync function addToExternalSet(url) {\n if (typeof self["__bx_addSet"] === "function") {\n return await callBinding(self["__bx_addSet"], url);\n }\n return true;\n}\nasync function waitForNetworkIdle(idleTime = 500, concurrency = 0) {\n if (typeof self["__bx_netIdle"] === "function") {\n return await callBinding(self["__bx_netIdle"], { idleTime, concurrency });\n }\n}\nasync function initFlow(params) {\n if (typeof self["__bx_initFlow"] === "function") {\n return await callBinding(self["__bx_initFlow"], params);\n }\n return -1;\n}\nasync function nextFlowStep(id) {\n if (typeof self["__bx_nextFlowStep"] === "function") {\n return await callBinding(self["__bx_nextFlowStep"], id);\n }\n return { done: true, msg: "" };\n}\nasync function click(sel) {\n if (typeof self["__bx_click"] === "function") {\n return await callBinding(self["__bx_click"], sel);\n }\n const elem = document.querySelector(sel);\n if (elem) {\n elem.click();\n await sleep(500);\n }\n}\nfunction assertContentValid(assertFunc, reason = "invalid") {\n if (typeof self["__bx_contentCheckFailed"] === "function") {\n if (!assertFunc()) {\n behaviorLog("Behavior content check failed: " + reason, "error");\n callBinding(self["__bx_contentCheckFailed"], reason);\n }\n }\n}\nasync function openWindow(url) {\n if (self["__bx_open"]) {\n const p = new Promise((resolve) => (self["__bx_openResolve"] = resolve));\n await callBinding(self["__bx_open"], { url });\n let win = null;\n try {\n win = await p;\n if (win) {\n return win;\n }\n }\n catch (e) {\n console.warn(e);\n }\n finally {\n delete self["__bx_openResolve"];\n }\n }\n return window.open(url);\n}\nfunction _setLogFunc(func) {\n _logFunc = func;\n}\nfunction _setBehaviorManager(cls) {\n _behaviorMgrClass = cls;\n}\nfunction installBehaviors(obj) {\n obj.__bx_behaviors = new _behaviorMgrClass();\n}\nclass RestoreState {\n matchValue;\n constructor(childMatchSelect, child) {\n this.matchValue = xpathString(childMatchSelect, child);\n }\n async restore(rootPath, childMatch) {\n let root = null;\n while (((root = xpathNode(rootPath)), !root)) {\n await sleep(100);\n }\n return xpathNode(childMatch.replace("$1", this.matchValue), root);\n }\n}\nclass HistoryState {\n loc;\n constructor(op) {\n this.loc = window.location.href;\n op();\n }\n get changed() {\n return window.location.href !== this.loc;\n }\n async goBack(backButtonQuery) {\n if (!this.changed) {\n return Promise.resolve(true);\n }\n const backButton = xpathNode(backButtonQuery);\n return new Promise((resolve) => {\n window.addEventListener("popstate", () => {\n resolve(null);\n }, { once: true });\n if (backButton) {\n backButton["click"]();\n }\n else {\n window.history.back();\n }\n });\n }\n}\nfunction xpathNode(path, root) {\n root = root || document;\n return document.evaluate(path, root, null, XPathResult.FIRST_ORDERED_NODE_TYPE).singleNodeValue;\n}\nfunction* xpathNodes(path, root) {\n root = root || document;\n const iter = document.evaluate(path, root, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE);\n let result = null;\n while ((result = iter.iterateNext()) !== null) {\n yield result;\n }\n}\nfunction xpathString(path, root) {\n root = root || document;\n return document.evaluate(path, root, null, XPathResult.STRING_TYPE)\n .stringValue;\n}\nasync function* iterChildElem(root, timeout, totalTimeout) {\n let child = root.firstElementChild;\n while (child) {\n yield child;\n if (!child.nextElementSibling) {\n await Promise.race([\n waitUntil(() => !!child.nextElementSibling, timeout),\n sleep(totalTimeout),\n ]);\n }\n child = child.nextElementSibling;\n }\n}\nasync function* iterChildMatches(path, root, interval = waitUnit, timeout = 5000) {\n let node = xpathNode(`.//${path}`, root);\n const getMatch = (node) => xpathNode(`./following-sibling::${path}`, node);\n while (node) {\n yield node;\n let next = getMatch(node);\n if (next) {\n node = next;\n continue;\n }\n await Promise.race([\n waitUntil(() => {\n next = getMatch(node);\n return next;\n }, interval),\n sleep(timeout),\n ]);\n node = next;\n }\n}\nfunction isInViewport(elem) {\n const bounding = elem.getBoundingClientRect();\n return (bounding.top >= 0 &&\n bounding.left >= 0 &&\n bounding.bottom <=\n (window.innerHeight || document.documentElement.clientHeight) &&\n bounding.right <=\n (window.innerWidth || document.documentElement.clientWidth));\n}\nfunction scrollToOffset(element, offset = 0) {\n const elPosition = element.getBoundingClientRect().top;\n const topPosition = elPosition + window.pageYOffset - offset;\n window.scrollTo({ top: topPosition, behavior: "smooth" });\n}\nfunction scrollIntoView(element, opts = {\n behavior: "smooth",\n block: "center",\n inline: "center",\n}) {\n element.scrollIntoView(opts);\n}\nfunction getState(ctx, msg, incrValue) {\n if (typeof ctx.state === "undefined") {\n ctx.state = {};\n }\n if (incrValue) {\n if (ctx.state[incrValue] === undefined) {\n ctx.state[incrValue] = 1;\n }\n else {\n ctx.state[incrValue]++;\n }\n }\n return { state: ctx.state, msg };\n}\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/lib/utils.ts?')},"./src/site/facebook.ts": /*!******************************!*\ !*** ./src/site/facebook.ts ***! \******************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "FacebookTimelineBehavior": () => (/* binding */ FacebookTimelineBehavior)\n/* harmony export */ });\nconst Q = {\n feed: "//div[@role=\'feed\']",\n article: ".//div[@role=\'article\']",\n pageletPostList: "//div[@data-pagelet=\'page\']/div[@role=\'main\']//div[@role=\'main\']/div",\n pageletProfilePostList: "//div[@data-pagelet=\'page\']//div[@data-pagelet=\'ProfileTimeline\']",\n articleToPostList: "//div[@role=\'article\']/../../../../div",\n photosOrVideos: `.//a[(contains(@href, \'/photos/\') or contains(@href, \'/photo/?\') or contains(@href, \'/videos/\')) and (starts-with(@href, \'${window.location.origin}/\') or starts-with(@href, \'/\'))]`,\n postQuery: ".//a[contains(@href, \'/posts/\')]",\n extraLabel: "//*[starts-with(text(), \'+\')]",\n nextSlideQuery: "//div[@data-name=\'media-viewer-nav-container\']/div[@data-visualcompletion][2]//div[@role=\'button\']",\n nextSlide: "//div[@aria-hidden=\'false\']//div[@role=\'button\' and not(@aria-hidden) and @aria-label]",\n commentList: ".//ul[(../h3) or (../h4)]",\n commentMoreReplies: "./div[2]/div[1]/div[2]/div[@role=\'button\']",\n commentMoreComments: "./following-sibling::div/div/div[2][@role=\'button\'][./span/span]",\n viewComments: ".//h4/..//div[@role=\'button\']",\n photoCommentList: "//ul[../h2]",\n firstPhotoThumbnail: "//div[@role=\'main\']//div[3]//div[contains(@style, \'border-radius\')]//div[contains(@style, \'max-width\') and contains(@style, \'min-width\')]//a[@role=\'link\']",\n firstVideoThumbnail: "//div[@role=\'main\']//div[contains(@style, \'z-index\')]/following-sibling::div/div/div/div[last()]//a[contains(@href, \'/videos/\') and @aria-hidden!=\'true\']",\n firstVideoSimple: "//div[@role=\'main\']//a[contains(@href, \'/videos/\') and @aria-hidden!=\'true\']",\n mainVideo: "//div[@data-pagelet=\'root\']//div[@role=\'dialog\']//div[@role=\'main\']//video",\n nextVideo: "following::a[contains(@href, \'/videos/\') and @aria-hidden!=\'true\']",\n isPhotoVideoPage: /^.*facebook\\.com\\/[^/]+\\/(photos|videos)\\/.+/,\n isPhotosPage: /^.*facebook\\.com\\/[^/]+\\/photos\\/?($|\\?)/,\n isVideosPage: /^.*facebook\\.com\\/[^/]+\\/videos\\/?($|\\?)/,\n pageLoadWaitUntil: "//div[@role=\'main\']",\n};\nclass FacebookTimelineBehavior {\n extraWindow;\n allowNewWindow;\n static id = "Facebook";\n static isMatch() {\n return !!window.location.href.match(/https:\\/\\/(www\\.)?facebook\\.com\\/.*\\/posts\\//);\n }\n static init() {\n return {\n state: {},\n };\n }\n constructor() {\n this.extraWindow = null;\n this.allowNewWindow = false;\n }\n async *iterPostFeeds(ctx) {\n const { iterChildElem, waitUnit, waitUntil, xpathNode, xpathNodes } = ctx.Lib;\n const feeds = Array.from(xpathNodes(Q.feed));\n if (feeds && feeds.length) {\n for (const feed of feeds) {\n for await (const post of iterChildElem(feed, waitUnit, waitUntil * 10)) {\n yield* this.viewPost(ctx, xpathNode(Q.article, post));\n }\n }\n }\n else {\n const feed = xpathNode(Q.pageletPostList) ||\n xpathNode(Q.pageletProfilePostList) ||\n xpathNode(Q.articleToPostList);\n if (!feed) {\n return;\n }\n for await (const post of iterChildElem(feed, waitUnit, waitUntil * 10)) {\n yield* this.viewPost(ctx, xpathNode(Q.article, post));\n }\n }\n if (this.extraWindow) {\n this.extraWindow.close();\n }\n }\n async *viewPost(ctx, post, maxExpands = 2) {\n const { getState, scrollIntoView, sleep, waitUnit, xpathNode } = ctx.Lib;\n if (!post) {\n return;\n }\n const postLink = xpathNode(Q.postQuery, post);\n let url = null;\n if (postLink) {\n url = new URL(postLink.href, window.location.href);\n url.search = "";\n }\n yield getState(ctx, "Viewing post " + (url || ""), "posts");\n scrollIntoView(post);\n await sleep(waitUnit * 2);\n if (xpathNode(".//video", post)) {\n yield getState(ctx, "Playing inline video", "videos");\n await sleep(waitUnit * 2);\n }\n let commentRootUL = xpathNode(Q.commentList, post);\n if (!commentRootUL) {\n const viewCommentsButton = xpathNode(Q.viewComments, post);\n if (viewCommentsButton) {\n viewCommentsButton.click();\n await sleep(waitUnit * 2);\n }\n commentRootUL = xpathNode(Q.commentList, post);\n }\n yield* this.iterComments(ctx, commentRootUL, maxExpands);\n await sleep(waitUnit * 5);\n }\n async *viewPhotosOrVideos(ctx, post) {\n const { getState, sleep, waitUnit, xpathNode, xpathNodes } = ctx.Lib;\n const objects = Array.from(xpathNodes(Q.photosOrVideos, post));\n const objHrefs = new Set();\n let count = 0;\n for (const obj of objects) {\n const url = new URL(obj.href, window.location.href);\n if (obj.href.indexOf("?fbid") === -1) {\n url.search = "";\n }\n if (objHrefs.has(url.href)) {\n continue;\n }\n const type = obj.href.indexOf("/video") >= 0 ? "videos" : "photos";\n ++count;\n objHrefs.add(url.href);\n yield getState(ctx, `Viewing ${type} ${url.href}`, type);\n obj.scrollIntoView();\n await sleep(waitUnit * 5);\n obj.click();\n await sleep(waitUnit * 10);\n if (this.allowNewWindow) {\n await this.openNewWindow(ctx, url.href);\n }\n if (count === objects.length) {\n yield* this.viewExtraObjects(ctx, obj, type, this.allowNewWindow);\n }\n const close = xpathNode(Q.nextSlide);\n if (close) {\n close.click();\n await sleep(waitUnit * 2);\n }\n }\n }\n async *viewExtraObjects(ctx, obj, type, openNew) {\n const { getState, sleep, waitUnit, waitUntil, xpathNode } = ctx.Lib;\n const extraLabel = xpathNode(Q.extraLabel, obj);\n if (!extraLabel) {\n return;\n }\n const num = Number(extraLabel.innerText.slice(1));\n if (isNaN(num)) {\n return;\n }\n let lastHref;\n for (let i = 0; i < num; i++) {\n const nextSlideButton = xpathNode(Q.nextSlideQuery);\n if (!nextSlideButton) {\n continue;\n }\n lastHref = window.location.href;\n nextSlideButton.click();\n await sleep(waitUnit * 5);\n await waitUntil(() => window.location.href !== lastHref, waitUnit * 2);\n yield getState(ctx, `Viewing extra ${type} ${window.location.href}`);\n if (openNew) {\n await this.openNewWindow(ctx, window.location.href);\n }\n }\n }\n async openNewWindow(ctx, url) {\n if (!this.extraWindow) {\n this.extraWindow = await ctx.Lib.openWindow(url);\n }\n else {\n this.extraWindow.location.href = url;\n }\n }\n async *iterComments(ctx, commentRootUL, maxExpands = 2) {\n const { getState, scrollIntoView, sleep, waitUnit, xpathNode } = ctx.Lib;\n if (!commentRootUL) {\n await sleep(waitUnit * 5);\n return;\n }\n let commentBlock = commentRootUL.firstElementChild;\n let lastBlock = null;\n let count = 0;\n while (commentBlock && count < maxExpands) {\n while (commentBlock && count < maxExpands) {\n yield getState(ctx, "Loading comments", "comments");\n scrollIntoView(commentBlock);\n await sleep(waitUnit * 2);\n const moreReplies = xpathNode(Q.commentMoreReplies, commentBlock);\n if (moreReplies) {\n moreReplies.click();\n await sleep(waitUnit * 5);\n }\n lastBlock = commentBlock;\n commentBlock = lastBlock.nextElementSibling;\n count++;\n }\n if (count === maxExpands) {\n break;\n }\n const moreButton = xpathNode(Q.commentMoreComments, commentRootUL);\n if (moreButton) {\n scrollIntoView(moreButton);\n moreButton.click();\n await sleep(waitUnit * 5);\n if (lastBlock) {\n commentBlock = lastBlock.nextElementSibling;\n await sleep(waitUnit * 5);\n }\n }\n }\n await sleep(waitUnit * 2);\n }\n async *iterPhotoSlideShow(ctx) {\n const { getState, scrollIntoView, sleep, waitUnit, waitUntil, xpathNode } = ctx.Lib;\n const firstPhoto = xpathNode(Q.firstPhotoThumbnail);\n if (!firstPhoto) {\n return;\n }\n let lastHref = window.location.href;\n scrollIntoView(firstPhoto);\n firstPhoto.click();\n await sleep(waitUnit * 5);\n await waitUntil(() => window.location.href !== lastHref, waitUnit * 2);\n let nextSlideButton = null;\n while ((nextSlideButton = xpathNode(Q.nextSlideQuery))) {\n lastHref = window.location.href;\n await sleep(waitUnit);\n nextSlideButton.click();\n await sleep(waitUnit * 5);\n await Promise.race([\n waitUntil(() => window.location.href !== lastHref, waitUnit * 2),\n sleep(3000),\n ]);\n if (window.location.href === lastHref) {\n break;\n }\n yield getState(ctx, `Viewing photo ${window.location.href}`, "photos");\n const root = xpathNode(Q.photoCommentList);\n yield* this.iterComments(ctx, root, 2);\n await sleep(waitUnit * 5);\n }\n }\n async *iterAllVideos(ctx) {\n const { getState, scrollIntoView, sleep, waitUnit, waitUntil, xpathNode, xpathNodes, } = ctx.Lib;\n const firstInlineVideo = xpathNode("//video");\n if (firstInlineVideo) {\n scrollIntoView(firstInlineVideo);\n await sleep(waitUnit * 5);\n }\n let videoLink = xpathNode(Q.firstVideoThumbnail) || xpathNode(Q.firstVideoSimple);\n if (!videoLink) {\n return;\n }\n while (videoLink) {\n scrollIntoView(videoLink);\n let lastHref = window.location.href;\n videoLink.click();\n await waitUntil(() => window.location.href !== lastHref, waitUnit * 2);\n yield getState(ctx, "Viewing video: " + window.location.href, "videos");\n await sleep(waitUnit * 10);\n await Promise.race([\n waitUntil(() => {\n for (const video of xpathNodes("//video")) {\n if (video.readyState >= 3) {\n return true;\n }\n }\n return false;\n }, waitUnit * 2),\n sleep(20000),\n ]);\n await sleep(waitUnit * 10);\n const close = xpathNode(Q.nextSlide);\n if (!close) {\n break;\n }\n lastHref = window.location.href;\n close.click();\n await waitUntil(() => window.location.href !== lastHref, waitUnit * 2);\n videoLink = xpathNode(Q.nextVideo, videoLink);\n }\n }\n async *run(ctx) {\n const { getState, sleep, xpathNode } = ctx.Lib;\n yield getState(ctx, "Starting...");\n await sleep(2000);\n if (Q.isPhotosPage.exec(window.location.href)) {\n ctx.state = { photos: 0, comments: 0 };\n yield* this.iterPhotoSlideShow(ctx);\n return;\n }\n if (Q.isVideosPage.exec(window.location.href)) {\n ctx.state = { videos: 0, comments: 0 };\n yield* this.iterAllVideos(ctx);\n return;\n }\n if (Q.isPhotoVideoPage.exec(window.location.href)) {\n ctx.state = { comments: 0 };\n const root = xpathNode(Q.photoCommentList);\n yield* this.iterComments(ctx, root, 1000);\n return;\n }\n ctx.state = { posts: 0, comments: 0, videos: 0 };\n yield* this.iterPostFeeds(ctx);\n }\n async awaitPageLoad(ctx) {\n const { Lib, log } = ctx;\n const { assertContentValid, waitUntilNode } = Lib;\n log("Waiting for Facebook to fully load", "info");\n await waitUntilNode(Q.pageLoadWaitUntil, document, null, 10000);\n assertContentValid(() => !!document.querySelector("div[aria-label*=\'Account Controls\' i]"), "not_logged_in");\n }\n}\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/site/facebook.ts?')},"./src/site/index.ts": @@ -43,7 +43,7 @@ \******************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "TelegramBehavior": () => (/* binding */ TelegramBehavior)\n/* harmony export */ });\nconst Q = {\n telegramContainer: "//main//section[@class=\'tgme_channel_history js-message_history\']",\n postId: "string(./div[@data-post]/@data-post)",\n linkExternal: "string(.//a[@class=\'tgme_widget_message_link_preview\' and @href]/@href)",\n};\nclass TelegramBehavior {\n static id = "Telegram";\n static isMatch() {\n return !!window.location.href.match(/https:\\/\\/t.me\\/s\\/\\w[\\w]+/);\n }\n static init() {\n return {\n state: { messages: 0 },\n };\n }\n async waitForPrev(ctx, child) {\n if (!child) {\n return null;\n }\n await ctx.Lib.sleep(ctx.Lib.waitUnit * 5);\n if (!child.previousElementSibling) {\n return null;\n }\n return child.previousElementSibling;\n }\n async *run(ctx) {\n const { getState, scrollIntoView, sleep, waitUnit, xpathNode, xpathString, } = ctx.Lib;\n const root = xpathNode(Q.telegramContainer);\n if (!root) {\n return;\n }\n let child = root.lastElementChild;\n while (child) {\n scrollIntoView(child);\n const postId = xpathString(Q.postId, child) || "unknown";\n const linkUrl = xpathString(Q.linkExternal, child);\n if (linkUrl?.endsWith(".jpg") || linkUrl.endsWith(".png")) {\n yield getState(ctx, "Loading External Image: " + linkUrl);\n const image = new Image();\n image.src = linkUrl;\n document.body.appendChild(image);\n await sleep(waitUnit * 2.5);\n document.body.removeChild(image);\n }\n yield getState(ctx, "Loading Message: " + postId, "messages");\n child = await this.waitForPrev(ctx, child);\n }\n }\n}\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/site/telegram.ts?')},"./src/site/tiktok.ts": /*!****************************!*\ !*** ./src/site/tiktok.ts ***! - \****************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "BREADTH_ALL": () => (/* binding */ BREADTH_ALL),\n/* harmony export */ "TikTokProfileBehavior": () => (/* binding */ TikTokProfileBehavior),\n/* harmony export */ "TikTokSharedBehavior": () => (/* binding */ TikTokSharedBehavior),\n/* harmony export */ "TikTokVideoBehavior": () => (/* binding */ TikTokVideoBehavior)\n/* harmony export */ });\nconst Q = {\n commentButton: "button[aria-label^=\'Read or add comments\']",\n commentList: "//div[contains(@class, \'CommentListContainer\')]",\n commentItem: "div[contains(@class, \'CommentItemContainer\')]",\n viewMoreReplies: ".//p[contains(@class, \'ReplyActionText\')]",\n viewMoreThread: ".//p[starts-with(@data-e2e, \'view-more\') and string-length(text()) > 0]",\n profileVideoList: "//div[starts-with(@data-e2e, \'user-post-item-list\')]",\n profileVideoItem: "div[contains(@class, \'DivItemContainerV2\')]",\n backButton: "button[contains(@class, \'StyledCloseIconContainer\')]",\n pageLoadWaitUntil: "//*[@role=\'dialog\']",\n};\nconst BREADTH_ALL = Symbol("BREADTH_ALL");\nclass TikTokSharedBehavior {\n async awaitPageLoad(ctx) {\n const { assertContentValid, waitUntilNode } = ctx.Lib;\n await waitUntilNode(Q.pageLoadWaitUntil, document, null, 10000);\n assertContentValid(() => !!document.querySelector("*[aria-label=\'Messages\']"), "not_logged_in");\n }\n}\nclass TikTokVideoBehavior extends TikTokSharedBehavior {\n static id = "TikTokVideo";\n static init() {\n return {\n state: { comments: 0 },\n opts: { breadth: BREADTH_ALL },\n };\n }\n static isMatch() {\n const pathRegex = /https:\\/\\/(www\\.)?tiktok\\.com\\/@.+\\/video\\/\\d+\\/?.*/;\n return !!window.location.href.match(pathRegex);\n }\n breadthComplete({ opts: { breadth } }, iter) {\n return breadth !== BREADTH_ALL && breadth <= iter;\n }\n async *crawlThread(ctx, parentNode, prev = null, iter = 0) {\n const { waitUntilNode, scrollAndClick, getState } = ctx.Lib;\n const next = await waitUntilNode(Q.viewMoreThread, parentNode, prev);\n if (!next || this.breadthComplete(ctx, iter))\n return;\n await scrollAndClick(next, 500);\n yield getState(ctx, "View more replies", "comments");\n yield* this.crawlThread(ctx, parentNode, next, iter + 1);\n }\n async *expandThread(ctx, item) {\n const { xpathNode, scrollAndClick, getState } = ctx.Lib;\n const viewMore = xpathNode(Q.viewMoreReplies, item);\n if (!viewMore)\n return;\n await scrollAndClick(viewMore, 500);\n yield getState(ctx, "View comment", "comments");\n yield* this.crawlThread(ctx, item, null, 1);\n }\n async *run(ctx) {\n const { xpathNode, iterChildMatches, scrollIntoView, getState, sleep } = ctx.Lib;\n const showComments = document.querySelector(Q.commentButton);\n if (showComments) {\n showComments.click();\n await sleep(500);\n }\n const commentList = xpathNode(Q.commentList);\n const commentItems = iterChildMatches(Q.commentItem, commentList);\n for await (const item of commentItems) {\n scrollIntoView(item);\n yield getState(ctx, "View comment", "comments");\n if (this.breadthComplete(ctx, 0))\n continue;\n yield* this.expandThread(ctx, item);\n }\n yield getState(ctx, "TikTok Video Behavior Complete");\n }\n}\nclass TikTokProfileBehavior extends TikTokSharedBehavior {\n static id = "TikTokProfile";\n static isMatch() {\n const pathRegex = /https:\\/\\/(www\\.)?tiktok\\.com\\/@[a-zA-Z0-9]+(\\/?$|\\/\\?.*)/;\n return !!window.location.href.match(pathRegex);\n }\n static init() {\n return {\n state: { videos: 0, comments: 0 },\n opts: { breadth: BREADTH_ALL },\n };\n }\n async *openVideo(ctx, item) {\n const { HistoryState, xpathNode, sleep } = ctx.Lib;\n const link = xpathNode(".//a", item);\n if (!link)\n return;\n const viewState = new HistoryState(() => link.click());\n await sleep(500);\n if (viewState.changed) {\n const videoBehavior = new TikTokVideoBehavior();\n yield* videoBehavior.run(ctx);\n await sleep(500);\n await viewState.goBack(Q.backButton);\n }\n }\n async *run(ctx) {\n const { xpathNode, iterChildMatches, scrollIntoView, getState, sleep } = ctx.Lib;\n const profileVideoList = xpathNode(Q.profileVideoList);\n const profileVideos = iterChildMatches(Q.profileVideoItem, profileVideoList);\n for await (const item of profileVideos) {\n scrollIntoView(item);\n yield getState(ctx, "View video", "videos");\n yield* this.openVideo(ctx, item);\n await sleep(500);\n }\n yield getState(ctx, "TikTok Profile Behavior Complete");\n }\n}\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/site/tiktok.ts?')},"./src/site/twitter.ts": + \****************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "BREADTH_ALL": () => (/* binding */ BREADTH_ALL),\n/* harmony export */ "TikTokProfileBehavior": () => (/* binding */ TikTokProfileBehavior),\n/* harmony export */ "TikTokSharedBehavior": () => (/* binding */ TikTokSharedBehavior),\n/* harmony export */ "TikTokVideoBehavior": () => (/* binding */ TikTokVideoBehavior)\n/* harmony export */ });\nconst Q = {\n commentButton: "button[aria-label^=\'Read or add comments\']",\n commentList: "//div[contains(@class, \'CommentListContainer\')]",\n commentItem: "div[contains(@class, \'CommentItemContainer\')]",\n viewMoreReplies: ".//p[contains(@class, \'ReplyActionText\')]",\n viewMoreThread: ".//p[starts-with(@data-e2e, \'view-more\') and string-length(text()) > 0]",\n profileVideoList: "//div[starts-with(@data-e2e, \'user-post-item-list\')]",\n profileVideoItem: "div[contains(@class, \'DivItemContainerV2\')]",\n backButton: "button[contains(@class, \'StyledCloseIconContainer\')]",\n pageLoadWaitUntil: "//*[@role=\'dialog\']",\n};\nconst BREADTH_ALL = Symbol("BREADTH_ALL");\nclass TikTokSharedBehavior {\n async awaitPageLoad(ctx) {\n const { assertContentValid, waitUntilNode } = ctx.Lib;\n await waitUntilNode(Q.pageLoadWaitUntil, document, null, 10000);\n assertContentValid(() => !!document.querySelector("*[aria-label=\'Messages\']"), "not_logged_in");\n }\n}\nclass TikTokVideoBehavior extends TikTokSharedBehavior {\n static id = "TikTokVideo";\n static init() {\n return {\n state: { comments: 0 },\n opts: { breadth: BREADTH_ALL },\n };\n }\n static isMatch() {\n const pathRegex = /https:\\/\\/(www\\.)?tiktok\\.com\\/@.+\\/video\\/\\d+\\/?.*/;\n return !!window.location.href.match(pathRegex);\n }\n breadthComplete({ opts: { breadth } }, iter) {\n return breadth !== BREADTH_ALL && breadth <= iter;\n }\n async *crawlThread(ctx, parentNode, prev = null, iter = 0) {\n const { waitUntilNode, scrollAndClick, getState } = ctx.Lib;\n const next = await waitUntilNode(Q.viewMoreThread, parentNode, prev);\n if (!next || this.breadthComplete(ctx, iter))\n return;\n await scrollAndClick(next, 500);\n yield getState(ctx, "View more replies", "comments");\n yield* this.crawlThread(ctx, parentNode, next, iter + 1);\n }\n async *expandThread(ctx, item) {\n const { xpathNode, scrollAndClick, getState } = ctx.Lib;\n const viewMore = xpathNode(Q.viewMoreReplies, item);\n if (!viewMore)\n return;\n await scrollAndClick(viewMore, 500);\n yield getState(ctx, "View comment", "comments");\n yield* this.crawlThread(ctx, item, null, 1);\n }\n async *run(ctx) {\n const { xpathNode, iterChildMatches, scrollIntoView, getState, click, waitUntilNode } = ctx.Lib;\n await click(Q.commentButton);\n await waitUntilNode("//div[contains(@class, \'DivCommentMain\')]");\n const commentList = xpathNode(Q.commentList);\n const commentItems = iterChildMatches(Q.commentItem, commentList);\n for await (const item of commentItems) {\n scrollIntoView(item);\n yield getState(ctx, "View comment", "comments");\n if (this.breadthComplete(ctx, 0))\n continue;\n yield* this.expandThread(ctx, item);\n }\n yield getState(ctx, "TikTok Video Behavior Complete");\n }\n}\nclass TikTokProfileBehavior extends TikTokSharedBehavior {\n static id = "TikTokProfile";\n static isMatch() {\n const pathRegex = /https:\\/\\/(www\\.)?tiktok\\.com\\/@[a-zA-Z0-9]+(\\/?$|\\/\\?.*)/;\n return !!window.location.href.match(pathRegex);\n }\n static init() {\n return {\n state: { videos: 0, comments: 0 },\n opts: { breadth: BREADTH_ALL },\n };\n }\n async *openVideo(ctx, item) {\n const { HistoryState, xpathNode, sleep } = ctx.Lib;\n const link = xpathNode(".//a", item);\n if (!link)\n return;\n const viewState = new HistoryState(() => link.click());\n await sleep(500);\n if (viewState.changed) {\n const videoBehavior = new TikTokVideoBehavior();\n yield* videoBehavior.run(ctx);\n await sleep(500);\n await viewState.goBack(Q.backButton);\n }\n }\n async *run(ctx) {\n const { xpathNode, iterChildMatches, scrollIntoView, getState, sleep } = ctx.Lib;\n const profileVideoList = xpathNode(Q.profileVideoList);\n const profileVideos = iterChildMatches(Q.profileVideoItem, profileVideoList);\n for await (const item of profileVideos) {\n scrollIntoView(item);\n yield getState(ctx, "View video", "videos");\n yield* this.openVideo(ctx, item);\n await sleep(500);\n }\n yield getState(ctx, "TikTok Profile Behavior Complete");\n }\n}\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/site/tiktok.ts?')},"./src/site/twitter.ts": /*!*****************************!*\ !*** ./src/site/twitter.ts ***! \*****************************/(__unused_webpack_module,__webpack_exports__,__webpack_require__)=>{"use strict";eval('__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpack_require__.d(__webpack_exports__, {\n/* harmony export */ "TwitterTimelineBehavior": () => (/* binding */ TwitterTimelineBehavior)\n/* harmony export */ });\nconst Q = {\n rootPath: "//h1[@role=\'heading\' and @aria-level=\'1\']/following-sibling::div[@aria-label]//div[@style]",\n anchor: ".//article",\n childMatchSelect: "string(.//article//a[starts-with(@href, \'/\') and @aria-label]/@href)",\n childMatch: "child::div[.//a[@href=\'$1\']]",\n expand: ".//div[@role=\'button\' and not(@aria-haspopup) and not(@data-testid)]",\n quote: ".//div[@role=\'blockquote\' and @aria-haspopup=\'false\']",\n image: ".//a[@role=\'link\' and starts-with(@href, \'/\') and contains(@href, \'/photo/\')]",\n imageFirstNext: "//div[@aria-roledescription=\'carousel\']/div[2]/div[1]//div[@role=\'button\']",\n imageNext: "//div[@aria-roledescription=\'carousel\']/div[2]/div[2]//div[@role=\'button\']",\n imageClose: "//div[@role=\'presentation\']/div[@role=\'button\' and @aria-label]",\n backButton: "//div[@data-testid=\'titleContainer\']//div[@role=\'button\']",\n viewSensitive: ".//a[@href=\'/settings/content_you_see\']/parent::div/parent::div/parent::div//div[@role=\'button\']",\n progress: ".//*[@role=\'progressbar\']",\n promoted: ".//div[data-testid=\'placementTracking\']",\n};\nclass TwitterTimelineBehavior {\n seenTweets;\n seenMediaTweets;\n static id = "Twitter";\n static isMatch() {\n return !!window.location.href.match(/https:\\/\\/(www\\.)?(x|twitter)\\.com\\//);\n }\n static init() {\n return {\n state: {\n tweets: 0,\n images: 0,\n videos: 0,\n },\n opts: {\n maxDepth: 0,\n },\n };\n }\n constructor() {\n this.seenTweets = new Set();\n this.seenMediaTweets = new Set();\n }\n showingProgressBar(ctx, root) {\n const { xpathNode } = ctx.Lib;\n const node = xpathNode(Q.progress, root);\n if (!node) {\n return false;\n }\n return node.clientHeight > 10;\n }\n async waitForNext(ctx, child) {\n const { sleep, waitUnit } = ctx.Lib;\n if (!child) {\n return null;\n }\n await sleep(waitUnit * 2);\n if (!child.nextElementSibling) {\n return null;\n }\n while (this.showingProgressBar(ctx, child.nextElementSibling)) {\n await sleep(waitUnit);\n }\n return child.nextElementSibling;\n }\n async expandMore(ctx, child) {\n const { sleep, waitUnit, xpathNode } = ctx.Lib;\n const expandElem = xpathNode(Q.expand, child);\n if (!expandElem) {\n return child;\n }\n const prev = child.previousElementSibling;\n expandElem.click();\n await sleep(waitUnit);\n while (this.showingProgressBar(ctx, prev.nextElementSibling)) {\n await sleep(waitUnit);\n }\n child = prev.nextElementSibling;\n return child;\n }\n async *infScroll(ctx) {\n const { scrollIntoView, RestoreState, sleep, waitUnit, xpathNode } = ctx.Lib;\n const root = xpathNode(Q.rootPath);\n if (!root) {\n return;\n }\n let child = root.firstElementChild;\n if (!child) {\n return;\n }\n while (child) {\n let anchorElem = xpathNode(Q.anchor, child);\n if (!anchorElem && Q.expand) {\n child = await this.expandMore(ctx, child);\n anchorElem = xpathNode(Q.anchor, child);\n }\n if (child?.innerText) {\n scrollIntoView(child);\n }\n if (child && anchorElem) {\n await sleep(waitUnit);\n const restorer = new RestoreState(Q.childMatchSelect, child);\n yield anchorElem;\n if (restorer.matchValue) {\n child = await restorer.restore(Q.rootPath, Q.childMatch);\n }\n }\n child = await this.waitForNext(ctx, child);\n }\n }\n async *mediaPlaying(ctx, tweet) {\n const { getState, sleep, xpathNode, xpathString } = ctx.Lib;\n const media = xpathNode("(.//video | .//audio)", tweet);\n if (!media || media.paused) {\n return;\n }\n let mediaTweetUrl = null;\n try {\n mediaTweetUrl = new URL(xpathString(Q.childMatchSelect, tweet.parentElement), window.location.origin).href;\n }\n catch (e) {\n console.warn(e);\n }\n if (media.src.startsWith("https://") && media.src.indexOf(".mp4") > 0) {\n yield getState(ctx, `Loading video for ${mediaTweetUrl || "unknown"}`, "videos");\n return;\n }\n let msg;\n if (mediaTweetUrl) {\n if (this.seenMediaTweets.has(mediaTweetUrl)) {\n return;\n }\n msg = `Waiting for media playback for ${mediaTweetUrl} to finish`;\n this.seenMediaTweets.add(mediaTweetUrl);\n }\n else {\n msg = "Loading video";\n }\n yield getState(ctx, msg, "videos");\n const p = new Promise((resolve) => {\n media.addEventListener("ended", () => resolve(null));\n media.addEventListener("abort", () => resolve(null));\n media.addEventListener("error", () => resolve(null));\n media.addEventListener("pause", () => resolve(null));\n });\n await Promise.race([p, sleep(60000)]);\n }\n async *clickImages(ctx, tweet) {\n const { getState, HistoryState, sleep, waitUnit, xpathNode } = ctx.Lib;\n const imagePopup = xpathNode(Q.image, tweet);\n if (imagePopup) {\n const imageState = new HistoryState(() => imagePopup.click());\n yield getState(ctx, "Loading Image: " + window.location.href, "images");\n await sleep(waitUnit * 5);\n let nextImage = xpathNode(Q.imageFirstNext);\n let prevLocation = window.location.href;\n while (nextImage) {\n nextImage.click();\n await sleep(waitUnit * 2);\n if (window.location.href === prevLocation) {\n await sleep(waitUnit * 5);\n break;\n }\n prevLocation = window.location.href;\n yield getState(ctx, "Loading Image: " + window.location.href, "images");\n await sleep(waitUnit * 5);\n nextImage = xpathNode(Q.imageNext);\n }\n await imageState.goBack(Q.imageClose);\n }\n }\n async *clickTweet(ctx, tweet, depth) {\n const { getState, HistoryState, sleep, waitUnit } = ctx.Lib;\n const tweetState = new HistoryState(() => tweet.click());\n await sleep(waitUnit);\n if (tweetState.changed) {\n yield getState(ctx, "Capturing Tweet: " + window.location.href, "tweets");\n const maxDepth = ctx.opts.maxDepth;\n if (depth < maxDepth && !this.seenTweets.has(window.location.href)) {\n yield* this.iterTimeline(ctx, depth + 1);\n }\n this.seenTweets.add(window.location.href);\n await sleep(waitUnit * 2);\n await tweetState.goBack(Q.backButton);\n await sleep(waitUnit);\n }\n }\n async *iterTimeline(ctx, depth = 0) {\n const { getState, sleep, waitUnit, xpathNode } = ctx.Lib;\n if (this.seenTweets.has(window.location.href)) {\n return;\n }\n yield getState(ctx, "Capturing thread: " + window.location.href, "threads");\n for await (const tweet of this.infScroll(ctx)) {\n if (xpathNode(Q.promoted, tweet)) {\n continue;\n }\n await sleep(waitUnit * 2.5);\n const viewButton = xpathNode(Q.viewSensitive, tweet);\n if (viewButton) {\n viewButton.click();\n await sleep(waitUnit * 2.5);\n }\n yield* this.clickImages(ctx, tweet);\n const quoteTweet = xpathNode(Q.quote, tweet);\n if (quoteTweet) {\n yield* this.clickTweet(ctx, quoteTweet, 1000);\n }\n yield* this.mediaPlaying(ctx, tweet);\n yield* this.clickTweet(ctx, tweet, depth);\n await sleep(waitUnit * 5);\n }\n }\n async *run(ctx) {\n yield* this.iterTimeline(ctx, 0);\n }\n async awaitPageLoad(ctx) {\n const { sleep, assertContentValid } = ctx.Lib;\n await sleep(5);\n assertContentValid(() => !document.documentElement.outerHTML.match(/Log In/i), "not_logged_in");\n }\n}\n\n\n//# sourceURL=webpack://browsertrix-behaviors/./src/site/twitter.ts?')},"./src/site/youtube.ts": diff --git a/package.json b/package.json index 22f1574d..b9a8495e 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ "css-selector-parser": "^3.0.5", "fetch-socks": "^1.3.0", "get-folder-size": "^4.0.0", + "ghost-cursor": "^1.4.1", "husky": "^8.0.3", "ioredis": "^5.3.2", "iso-639-1": "^3.1.5", diff --git a/src/crawler.ts b/src/crawler.ts index 79436e12..6aadc5eb 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -3,6 +3,7 @@ import path from "path"; import fs, { WriteStream } from "fs"; import os from "os"; import fsp from "fs/promises"; +import { createCursor } from "ghost-cursor"; import { RedisCrawlState, @@ -835,6 +836,13 @@ export class Crawler { (url: string) => callbacks.addLink && callbacks.addLink(url), ); + await page.exposeFunction(BxFunctionBindings.Click, async (sel: string) => { + //@ts-expect-error: ignore + const cursor = createCursor(page); + await cursor.click(sel); + logger.info("Custom Click Made", {}, "behaviorScript"); + }); + // used for both behaviors and link extraction now await this.browser.addInitScript(page, btrixBehaviors); diff --git a/src/util/constants.ts b/src/util/constants.ts index 15b00bd7..dbbdc163 100644 --- a/src/util/constants.ts +++ b/src/util/constants.ts @@ -32,6 +32,8 @@ export enum BxFunctionBindings { NextFlowStep = "__bx_nextFlowStep", ContentCheckFailed = "__bx_contentCheckFailed", + + Click = "__bx_click", } export const MAX_DEPTH = 1000000; diff --git a/yarn.lock b/yarn.lock index 3d1bec7d..a2119e16 100644 --- a/yarn.lock +++ b/yarn.lock @@ -914,6 +914,11 @@ dependencies: "@babel/types" "^7.20.7" +"@types/bezier-js@4": + version "4.1.3" + resolved "https://registry.yarnpkg.com/@types/bezier-js/-/bezier-js-4.1.3.tgz#237d4fe7e9aae7edd0c27a71f9f236f4ddc1c562" + integrity sha512-FNVVCu5mx/rJCWBxLTcL7oOajmGtWtBTDjq6DSUWUI12GeePivrZZXz+UgE0D6VYsLEjvExRO03z4hVtu3pTEQ== + "@types/graceful-fs@^4.1.3": version "4.1.9" resolved "https://registry.yarnpkg.com/@types/graceful-fs/-/graceful-fs-4.1.9.tgz#2a06bc0f68a20ab37b3e36aa238be6abdf49e8b4" @@ -1534,6 +1539,11 @@ basic-ftp@^5.0.2: resolved "https://registry.yarnpkg.com/basic-ftp/-/basic-ftp-5.0.5.tgz#14a474f5fffecca1f4f406f1c26b18f800225ac0" integrity sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg== +bezier-js@^6.1.3: + version "6.1.4" + resolved "https://registry.yarnpkg.com/bezier-js/-/bezier-js-6.1.4.tgz#c7828f6c8900562b69d5040afb881bcbdad82001" + integrity sha512-PA0FW9ZpcHbojUCMu28z9Vg/fNkwTj5YhusSAjHHDfHDGLxJ6YUKrAN2vk1fP2MMOxVw4Oko16FMlRGVBGqLKg== + bl@^4.0.3: version "4.1.0" resolved "https://registry.yarnpkg.com/bl/-/bl-4.1.0.tgz#451535264182bec2fbbc83a62ab98cf11d9f7b3a" @@ -2697,6 +2707,15 @@ get-uri@^6.0.1: debug "^4.3.4" fs-extra "^11.2.0" +ghost-cursor@^1.4.1: + version "1.4.1" + resolved "https://registry.yarnpkg.com/ghost-cursor/-/ghost-cursor-1.4.1.tgz#dbdf38132459bb385334d1093958134b57551a87" + integrity sha512-K8A8/Co/Jbdqee694qrNsGWBG51DVK5UF2gGKEoZBDx9F1WmoD2SzUoDHWoY7O+TY84s1VrWwwfkVKxI2FoV2Q== + dependencies: + "@types/bezier-js" "4" + bezier-js "^6.1.3" + debug "^4.3.4" + github-from-package@0.0.0: version "0.0.0" resolved "https://registry.yarnpkg.com/github-from-package/-/github-from-package-0.0.0.tgz#97fb5d96bfde8973313f20e8288ef9a167fa64ce"