mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
various edge-case loading optimizations: (#709)
- rework 'should stream' logic: * ensure 206 responses (or any response) greater than 25M are streamed * response between 5M and 25M are read into memory if text/css/js as they may be rewritten * responses <5M are read into memory * responses with unknown size are streamed if a 2xx, otherwise read into memory, assuming error code responses may lack status codes but otherwise are small - likely fix for issues in #706 - if too many range requests for same URL are being made, try skipping/failing right away to reduce load - assume main browser context is used not just for service workers, always enable - check false positive 'net-aborted' error that may actually be ok for media, as well as documents - improve logging - interrupt any pending requests (that may be loading via browser context) after page timeout, log dropped requests --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
parent
5c00bca2b4
commit
e5bab8e7c8
6 changed files with 149 additions and 80 deletions
18
yarn.lock
18
yarn.lock
|
@ -2075,10 +2075,10 @@ detect-newline@^3.0.0:
|
|||
resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-3.1.0.tgz#576f5dfc63ae1a192ff192d8ad3af6308991b651"
|
||||
integrity sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==
|
||||
|
||||
devtools-protocol@0.0.1342118:
|
||||
version "0.0.1342118"
|
||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1342118.tgz#ea136fc1701572c0830233dcb414dc857e582e0a"
|
||||
integrity sha512-75fMas7PkYNDTmDyb6PRJCH7ILmHLp+BhrZGeMsa4bCh40DTxgCz2NRy5UDzII4C5KuD0oBMZ9vXKhEl6UD/3w==
|
||||
devtools-protocol@0.0.1354347:
|
||||
version "0.0.1354347"
|
||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1354347.tgz#5cb509610b8f61fc69a31e5c810d5bed002d85ea"
|
||||
integrity sha512-BlmkSqV0V84E2WnEnoPnwyix57rQxAM5SKJjf4TbYOCGLAWtz8CDH8RIaGOjPgPCXo2Mce3kxSY497OySidY3Q==
|
||||
|
||||
diff-sequences@^29.6.3:
|
||||
version "29.6.3"
|
||||
|
@ -4375,15 +4375,15 @@ punycode@^2.1.0:
|
|||
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
|
||||
integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==
|
||||
|
||||
puppeteer-core@^23.5.1:
|
||||
version "23.5.1"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-23.5.1.tgz#fac4268820c35d3172e783a1f1a39773b2c0f7c6"
|
||||
integrity sha512-We6xKCSZaZ23+GAYckeNfeDeJIVuhxOBsh/gZkbULu/XLFJ3umSiiQ8Ey927h3g/XrCCr8CnSZ5fvP5v2vB5Yw==
|
||||
puppeteer-core@^23.6.0:
|
||||
version "23.6.0"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-23.6.0.tgz#a3e1e09c05f47fb8ca2bc9d4ca200d18e3704303"
|
||||
integrity sha512-se1bhgUpR9C529SgHGr/eyT92mYyQPAhA2S9pGtGrVG2xob9qE6Pbp7TlqiSPlnnY1lINqhn6/67EwkdzOmKqQ==
|
||||
dependencies:
|
||||
"@puppeteer/browsers" "2.4.0"
|
||||
chromium-bidi "0.8.0"
|
||||
debug "^4.3.7"
|
||||
devtools-protocol "0.0.1342118"
|
||||
devtools-protocol "0.0.1354347"
|
||||
typed-query-selector "^2.12.0"
|
||||
ws "^8.18.0"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue