Compare commits

...

2 commits

Author SHA1 Message Date
Ilya Kreymer
2270964996
logging: remove duplicate seeds found error (#893)
Per discussion, the message is unnecessary / confusing (doesn't provide
enough info) and can also happen on crawler restart.
2025-10-07 08:18:22 -07:00
Ilya Kreymer
fd49041f63
flow behaviors: add scrolling into view (#892)
Some page elements don't quite respond correctly if the element is not
in view, so should add the setEnsureElementIsInTheViewport() to click,
doubleclick, hover and change step locators.
2025-10-07 08:17:56 -07:00
3 changed files with 6 additions and 13 deletions

View file

@ -129,8 +129,6 @@ export class Crawler {
limitHit = false;
pageLimit: number;
dupeSeedsFound = false;
saveStateFiles: string[] = [];
lastSaveTime: number;
@ -2487,10 +2485,6 @@ self.__bx_behaviors.selectMainBehavior();
return false;
case QueueState.DUPE_URL:
if (!this.dupeSeedsFound && depth === 0) {
logger.error("Duplicate seed URLs found and skipped");
this.dupeSeedsFound = true;
}
logger.debug(
"Page URL not queued, already seen",
{ url, ...logDetails },

View file

@ -368,7 +368,7 @@ class Flow {
case StepType.DoubleClick:
await locator(step)
.setTimeout(timeout * 1000)
//.on('action', () => startWaitingForEvents())
.setEnsureElementIsInTheViewport(true)
.click({
count: 2,
button: step.button && mouseButtonMap.get(step.button),
@ -392,7 +392,7 @@ class Flow {
await locator(step)
.setTimeout(timeout * 1000)
//.on('action', () => startWaitingForEvents())
.setEnsureElementIsInTheViewport(true)
.click({
delay: step.duration,
button: step.button && mouseButtonMap.get(step.button),
@ -410,7 +410,7 @@ class Flow {
case StepType.Hover:
await locator(step)
.setTimeout(timeout * 1000)
//.on('action', () => startWaitingForEvents())
.setEnsureElementIsInTheViewport(true)
.hover();
break;
@ -426,15 +426,14 @@ class Flow {
case StepType.Change:
await locator(step)
//.on('action', () => startWaitingForEvents())
.setTimeout(timeout * 1000)
.setEnsureElementIsInTheViewport(true)
.fill(step.value);
break;
case StepType.Scroll: {
if ("selectors" in step) {
await locator(step)
//.on('action', () => startWaitingForEvents())
.setTimeout(timeout * 1000)
.scroll({
scrollLeft: step.x || 0,

View file

@ -43,8 +43,8 @@ test("test custom selector crawls JS files as pages", async () => {
]);
const expectedExtraPages = new Set([
"https://www.iana.org/_js/jquery.js",
"https://www.iana.org/_js/iana.js",
"https://www.iana.org/static/_js/jquery.js",
"https://www.iana.org/static/_js/iana.js",
]);
expect(pages).toEqual(expectedPages);