mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-12-08 06:09:48 +00:00
1685 lines
No EOL
59 KiB
HTML
1685 lines
No EOL
59 KiB
HTML
|
|
<!doctype html>
|
|
<html lang="en" class="no-js">
|
|
<head>
|
|
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|
|
|
|
|
|
|
|
|
<link rel="prev" href="../exit-codes/">
|
|
|
|
|
|
<link rel="next" href="../crawl-scope/">
|
|
|
|
|
|
|
|
|
|
|
|
<link rel="icon" href="../../assets/brand/browsertrix-crawler-icon-color-dynamic.svg">
|
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.0">
|
|
|
|
|
|
|
|
<title>Commonly-Used Options - Browsertrix Crawler Docs</title>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/stylesheets/main.618322db.min.css">
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/stylesheets/palette.ab4e12ef.min.css">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<style>:root{--md-admonition-icon--note:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-pencil-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M12.854.146a.5.5%200%200%200-.707%200L10.5%201.793%2014.207%205.5l1.647-1.646a.5.5%200%200%200%200-.708l-3-3zm.646%206.061L9.793%202.5%203.293%209H3.5a.5.5%200%200%201%20.5.5v.5h.5a.5.5%200%200%201%20.5.5v.5h.5a.5.5%200%200%201%20.5.5v.5h.5a.5.5%200%200%201%20.5.5v.207l6.5-6.5zm-7.468%207.468A.5.5%200%200%201%206%2013.5V13h-.5a.5.5%200%200%201-.5-.5V12h-.5a.5.5%200%200%201-.5-.5V11h-.5a.5.5%200%200%201-.5-.5V10h-.5a.499.499%200%200%201-.175-.032l-.179.178a.5.5%200%200%200-.11.168l-2%205a.5.5%200%200%200%20.65.65l5-2a.5.5%200%200%200%20.168-.11l.178-.178z%22/%3E%3C/svg%3E');--md-admonition-icon--abstract:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-file-earmark-text-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M9.293%200H4a2%202%200%200%200-2%202v12a2%202%200%200%200%202%202h8a2%202%200%200%200%202-2V4.707A1%201%200%200%200%2013.707%204L10%20.293A1%201%200%200%200%209.293%200zM9.5%203.5v-2l3%203h-2a1%201%200%200%201-1-1zM4.5%209a.5.5%200%200%201%200-1h7a.5.5%200%200%201%200%201h-7zM4%2010.5a.5.5%200%200%201%20.5-.5h7a.5.5%200%200%201%200%201h-7a.5.5%200%200%201-.5-.5zm.5%202.5a.5.5%200%200%201%200-1h4a.5.5%200%200%201%200%201h-4z%22/%3E%3C/svg%3E');--md-admonition-icon--info:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-info-circle-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M8%2016A8%208%200%201%200%208%200a8%208%200%200%200%200%2016zm.93-9.412-1%204.705c-.07.34.029.533.304.533.194%200%20.487-.07.686-.246l-.088.416c-.287.346-.92.598-1.465.598-.703%200-1.002-.422-.808-1.319l.738-3.468c.064-.293.006-.399-.287-.47l-.451-.081.082-.381%202.29-.287zM8%205.5a1%201%200%201%201%200-2%201%201%200%200%201%200%202z%22/%3E%3C/svg%3E');--md-admonition-icon--tip:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-exclamation-circle-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M16%208A8%208%200%201%201%200%208a8%208%200%200%201%2016%200zM8%204a.905.905%200%200%200-.9.995l.35%203.507a.552.552%200%200%200%201.1%200l.35-3.507A.905.905%200%200%200%208%204zm.002%206a1%201%200%201%200%200%202%201%201%200%200%200%200-2z%22/%3E%3C/svg%3E');--md-admonition-icon--success:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-check-circle-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M16%208A8%208%200%201%201%200%208a8%208%200%200%201%2016%200zm-3.97-3.03a.75.75%200%200%200-1.08.022L7.477%209.417%205.384%207.323a.75.75%200%200%200-1.06%201.06L6.97%2011.03a.75.75%200%200%200%201.079-.02l3.992-4.99a.75.75%200%200%200-.01-1.05z%22/%3E%3C/svg%3E');--md-admonition-icon--question:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-question-circle-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M16%208A8%208%200%201%201%200%208a8%208%200%200%201%2016%200zM5.496%206.033h.825c.138%200%20.248-.113.266-.25.09-.656.54-1.134%201.342-1.134.686%200%201.314.343%201.314%201.168%200%20.635-.374.927-.965%201.371-.673.489-1.206%201.06-1.168%201.987l.003.217a.25.25%200%200%200%20.25.246h.811a.25.25%200%200%200%20.25-.25v-.105c0-.718.273-.927%201.01-1.486.609-.463%201.244-.977%201.244-2.056%200-1.511-1.276-2.241-2.673-2.241-1.267%200-2.655.59-2.75%202.286a.237.237%200%200%200%20.241.247zm2.325%206.443c.61%200%201.029-.394%201.029-.927%200-.552-.42-.94-1.029-.94-.584%200-1.009.388-1.009.94%200%20.533.425.927%201.01.927z%22/%3E%3C/svg%3E');--md-admonition-icon--warning:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-exclamation-triangle-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M8.982%201.566a1.13%201.13%200%200%200-1.96%200L.165%2013.233c-.457.778.091%201.767.98%201.767h13.713c.889%200%201.438-.99.98-1.767L8.982%201.566zM8%205c.535%200%20.954.462.9.995l-.35%203.507a.552.552%200%200%201-1.1%200L7.1%205.995A.905.905%200%200%201%208%205zm.002%206a1%201%200%201%201%200%202%201%201%200%200%201%200-2z%22/%3E%3C/svg%3E');--md-admonition-icon--failure:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-x-octagon-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M11.46.146A.5.5%200%200%200%2011.107%200H4.893a.5.5%200%200%200-.353.146L.146%204.54A.5.5%200%200%200%200%204.893v6.214a.5.5%200%200%200%20.146.353l4.394%204.394a.5.5%200%200%200%20.353.146h6.214a.5.5%200%200%200%20.353-.146l4.394-4.394a.5.5%200%200%200%20.146-.353V4.893a.5.5%200%200%200-.146-.353L11.46.146zm-6.106%204.5L8%207.293l2.646-2.647a.5.5%200%200%201%20.708.708L8.707%208l2.647%202.646a.5.5%200%200%201-.708.708L8%208.707l-2.646%202.647a.5.5%200%200%201-.708-.708L7.293%208%204.646%205.354a.5.5%200%201%201%20.708-.708z%22/%3E%3C/svg%3E');--md-admonition-icon--danger:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-exclamation-diamond-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M9.05.435c-.58-.58-1.52-.58-2.1%200L.436%206.95c-.58.58-.58%201.519%200%202.098l6.516%206.516c.58.58%201.519.58%202.098%200l6.516-6.516c.58-.58.58-1.519%200-2.098L9.05.435zM8%204c.535%200%20.954.462.9.995l-.35%203.507a.552.552%200%200%201-1.1%200L7.1%204.995A.905.905%200%200%201%208%204zm.002%206a1%201%200%201%201%200%202%201%201%200%200%201%200-2z%22/%3E%3C/svg%3E');--md-admonition-icon--bug:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-bug-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M4.978.855a.5.5%200%201%200-.956.29l.41%201.352A4.985%204.985%200%200%200%203%206h10a4.985%204.985%200%200%200-1.432-3.503l.41-1.352a.5.5%200%201%200-.956-.29l-.291.956A4.978%204.978%200%200%200%208%201a4.979%204.979%200%200%200-2.731.811l-.29-.956z%22/%3E%20%20%3Cpath%20d%3D%22M13%206v1H8.5v8.975A5%205%200%200%200%2013%2011h.5a.5.5%200%200%201%20.5.5v.5a.5.5%200%201%200%201%200v-.5a1.5%201.5%200%200%200-1.5-1.5H13V9h1.5a.5.5%200%200%200%200-1H13V7h.5A1.5%201.5%200%200%200%2015%205.5V5a.5.5%200%200%200-1%200v.5a.5.5%200%200%201-.5.5H13zm-5.5%209.975V7H3V6h-.5a.5.5%200%200%201-.5-.5V5a.5.5%200%200%200-1%200v.5A1.5%201.5%200%200%200%202.5%207H3v1H1.5a.5.5%200%200%200%200%201H3v1h-.5A1.5%201.5%200%200%200%201%2011.5v.5a.5.5%200%201%200%201%200v-.5a.5.5%200%200%201%20.5-.5H3a5%205%200%200%200%204.5%204.975z%22/%3E%3C/svg%3E');--md-admonition-icon--example:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-mortarboard-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M8.211%202.047a.5.5%200%200%200-.422%200l-7.5%203.5a.5.5%200%200%200%20.025.917l7.5%203a.5.5%200%200%200%20.372%200L14%207.14V13a1%201%200%200%200-1%201v2h3v-2a1%201%200%200%200-1-1V6.739l.686-.275a.5.5%200%200%200%20.025-.917l-7.5-3.5Z%22/%3E%20%20%3Cpath%20d%3D%22M4.176%209.032a.5.5%200%200%200-.656.327l-.5%201.7a.5.5%200%200%200%20.294.605l4.5%201.8a.5.5%200%200%200%20.372%200l4.5-1.8a.5.5%200%200%200%20.294-.605l-.5-1.7a.5.5%200%200%200-.656-.327L8%2010.466%204.176%209.032Z%22/%3E%3C/svg%3E');--md-admonition-icon--quote:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-quote%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M12%2012a1%201%200%200%200%201-1V8.558a1%201%200%200%200-1-1h-1.388c0-.351.021-.703.062-1.054.062-.372.166-.703.31-.992.145-.29.331-.517.559-.683.227-.186.516-.279.868-.279V3c-.579%200-1.085.124-1.52.372a3.322%203.322%200%200%200-1.085.992%204.92%204.92%200%200%200-.62%201.458A7.712%207.712%200%200%200%209%207.558V11a1%201%200%200%200%201%201h2Zm-6%200a1%201%200%200%200%201-1V8.558a1%201%200%200%200-1-1H4.612c0-.351.021-.703.062-1.054.062-.372.166-.703.31-.992.145-.29.331-.517.559-.683.227-.186.516-.279.868-.279V3c-.579%200-1.085.124-1.52.372a3.322%203.322%200%200%200-1.085.992%204.92%204.92%200%200%200-.62%201.458A7.712%207.712%200%200%200%203%207.558V11a1%201%200%200%200%201%201h2Z%22/%3E%3C/svg%3E');}</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../../stylesheets/extra.css">
|
|
|
|
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
|
|
|
|
|
|
|
|
|
|
|
</head>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<body dir="ltr" data-md-color-scheme="webrecorder" data-md-color-primary="indigo" data-md-color-accent="indigo">
|
|
|
|
|
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
|
<label class="md-overlay" for="__drawer"></label>
|
|
<div data-md-component="skip">
|
|
|
|
|
|
<a href="#commonly-used-options" class="md-skip">
|
|
Skip to content
|
|
</a>
|
|
|
|
</div>
|
|
<div data-md-component="announce">
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<header class="md-header md-header--shadow md-header--lifted" data-md-component="header">
|
|
<nav class="md-header__inner md-grid" aria-label="Header">
|
|
<a href="../.." title="Browsertrix Crawler Docs" class="md-header__button md-logo" aria-label="Browsertrix Crawler Docs" data-md-component="logo">
|
|
|
|
<img src="../../assets/brand/browsertrix-crawler-white.svg" alt="logo">
|
|
|
|
</a>
|
|
<label class="md-header__button md-icon" for="__drawer">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
|
</label>
|
|
<div class="md-header__title" data-md-component="header-title">
|
|
<div class="md-header__ellipsis">
|
|
<div class="md-header__topic">
|
|
<span class="md-ellipsis">
|
|
Browsertrix Crawler Docs
|
|
</span>
|
|
</div>
|
|
<div class="md-header__topic" data-md-component="header-topic">
|
|
<span class="md-ellipsis">
|
|
|
|
Commonly-Used Options
|
|
|
|
</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-header__button md-icon" for="__search">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
|
</label>
|
|
<div class="md-search" data-md-component="search" role="dialog">
|
|
<label class="md-search__overlay" for="__search"></label>
|
|
<div class="md-search__inner" role="search">
|
|
<form class="md-search__form" name="search">
|
|
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
|
|
<label class="md-search__icon md-icon" for="__search">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
|
</label>
|
|
<nav class="md-search__options" aria-label="Search">
|
|
|
|
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
|
</button>
|
|
</nav>
|
|
|
|
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
|
|
|
</form>
|
|
<div class="md-search__output">
|
|
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
|
<div class="md-search-result" data-md-component="search-result">
|
|
<div class="md-search-result__meta">
|
|
Initializing search
|
|
</div>
|
|
<ol class="md-search-result__list" role="presentation"></ol>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-header__source">
|
|
<a href="https://github.com/webrecorder/browsertrix-crawler/" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-github" viewBox="0 0 16 16">
|
|
<path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z"/>
|
|
</svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
Browsertrix Crawler
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
</nav>
|
|
|
|
|
|
|
|
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
|
<div class="md-grid">
|
|
<ul class="md-tabs__list">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../.." class="md-tabs__link">
|
|
|
|
|
|
|
|
|
|
|
|
Home
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../../develop/" class="md-tabs__link">
|
|
|
|
|
|
|
|
|
|
|
|
Develop
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item md-tabs__item--active">
|
|
<a href="../" class="md-tabs__link">
|
|
|
|
|
|
|
|
|
|
|
|
User Guide
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</div>
|
|
</nav>
|
|
|
|
|
|
</header>
|
|
|
|
<div class="md-container" data-md-component="container">
|
|
|
|
|
|
|
|
|
|
<main class="md-main" data-md-component="main">
|
|
<div class="md-main__inner md-grid">
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
|
<label class="md-nav__title" for="__drawer">
|
|
<a href="../.." title="Browsertrix Crawler Docs" class="md-nav__button md-logo" aria-label="Browsertrix Crawler Docs" data-md-component="logo">
|
|
|
|
<img src="../../assets/brand/browsertrix-crawler-white.svg" alt="logo">
|
|
|
|
</a>
|
|
Browsertrix Crawler Docs
|
|
</label>
|
|
|
|
<div class="md-nav__source">
|
|
<a href="https://github.com/webrecorder/browsertrix-crawler/" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-github" viewBox="0 0 16 16">
|
|
<path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z"/>
|
|
</svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
Browsertrix Crawler
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../.." class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Home
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
|
|
|
|
|
|
<div class="md-nav__link md-nav__container">
|
|
<a href="../../develop/" class="md-nav__link ">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Develop
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
|
|
|
|
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="0">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
</div>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_2">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
|
|
|
|
Develop
|
|
|
|
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../develop/docs/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Documentation
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
|
|
|
|
|
|
<div class="md-nav__link md-nav__container">
|
|
<a href="../" class="md-nav__link ">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
User Guide
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
|
|
|
|
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
</div>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
|
|
<label class="md-nav__title" for="__nav_3">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
|
|
|
|
User Guide
|
|
|
|
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../outputs/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Outputs
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../exit-codes/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Exit codes
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active">
|
|
|
|
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__link md-nav__link--active" for="__toc">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Commonly-Used Options
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<a href="./" class="md-nav__link md-nav__link--active">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Commonly-Used Options
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#waiting-for-page-load" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Waiting for Page Load
|
|
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Waiting for Page Load">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#additional-wait" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Additional Wait
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#link-extraction" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Link Extraction
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#ad-blocking" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Ad Blocking
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#sitemap-parsing" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Sitemap Parsing
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#custom-warcinfo-fields" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Custom Warcinfo Fields
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#screenshots" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Screenshots
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#screencasting" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Screencasting
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#text-extraction" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Text Extraction
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#uploading-crawl-outputs-to-s3-compatible-storage" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Uploading Crawl Outputs to S3-Compatible Storage
|
|
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Uploading Crawl Outputs to S3-Compatible Storage">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#webhook-notification" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Webhook Notification
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#saving-crawl-state-interrupting-and-restarting-the-crawl" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Saving Crawl State: Interrupting and Restarting the Crawl
|
|
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Saving Crawl State: Interrupting and Restarting the Crawl">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#periodic-state-saving" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Periodic State Saving
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#crawl-interruption-options" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Crawl Interruption Options
|
|
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Crawl Interruption Options">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#1-graceful-shutdown" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
1. Graceful Shutdown
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#2-less-graceful-quick-shutdown" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
2. Less-Graceful, Quick Shutdown
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#3-violent-immediate-shutdown" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
3. Violent / Immediate Shutdown
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#recommendations" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Recommendations
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../crawl-scope/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Crawl Scope
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../yaml-config/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
YAML Crawl Config
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../browser-profiles/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Creating and Using Browser Profiles
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../proxies/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Crawling with Proxies
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../behaviors/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Browser Behaviors
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../qa/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Quality Assurance
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../cli-options/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
All Command-Line Options
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#waiting-for-page-load" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Waiting for Page Load
|
|
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Waiting for Page Load">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#additional-wait" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Additional Wait
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#link-extraction" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Link Extraction
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#ad-blocking" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Ad Blocking
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#sitemap-parsing" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Sitemap Parsing
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#custom-warcinfo-fields" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Custom Warcinfo Fields
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#screenshots" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Screenshots
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#screencasting" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Screencasting
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#text-extraction" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Text Extraction
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#uploading-crawl-outputs-to-s3-compatible-storage" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Uploading Crawl Outputs to S3-Compatible Storage
|
|
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Uploading Crawl Outputs to S3-Compatible Storage">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#webhook-notification" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Webhook Notification
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#saving-crawl-state-interrupting-and-restarting-the-crawl" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Saving Crawl State: Interrupting and Restarting the Crawl
|
|
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Saving Crawl State: Interrupting and Restarting the Crawl">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#periodic-state-saving" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Periodic State Saving
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#crawl-interruption-options" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Crawl Interruption Options
|
|
|
|
</span>
|
|
</a>
|
|
|
|
<nav class="md-nav" aria-label="Crawl Interruption Options">
|
|
<ul class="md-nav__list">
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#1-graceful-shutdown" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
1. Graceful Shutdown
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#2-less-graceful-quick-shutdown" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
2. Less-Graceful, Quick Shutdown
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#3-violent-immediate-shutdown" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
3. Violent / Immediate Shutdown
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#recommendations" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Recommendations
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-content" data-md-component="content">
|
|
|
|
<article class="md-content__inner md-typeset">
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://github.com/webrecorder/browsertrix-crawler/edit/main/docs/docs/user-guide/common-options.md" title="Edit this page" class="md-content__button md-icon" rel="edit">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-pencil" viewBox="0 0 16 16">
|
|
<path d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5 13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5zm-9.761 5.175-.106.106-1.528 3.821 3.821-1.528.106-.106A.5.5 0 0 1 5 12.5V12h-.5a.5.5 0 0 1-.5-.5V11h-.5a.5.5 0 0 1-.468-.325z"/>
|
|
</svg>
|
|
</a>
|
|
|
|
|
|
|
|
|
|
<h1 id="commonly-used-options">Commonly-Used Options<a class="headerlink" href="#commonly-used-options" title="Permanent link">¶</a></h1>
|
|
<h2 id="waiting-for-page-load">Waiting for Page Load<a class="headerlink" href="#waiting-for-page-load" title="Permanent link">¶</a></h2>
|
|
<p>One of the key nuances of browser-based crawling is determining when a page is finished loading. This can be configured with the <code>--waitUntil</code> flag.</p>
|
|
<p>The default is <code>load,networkidle2</code>, which waits until page load and ≤2 requests remain, but for static sites, <code>--wait-until domcontentloaded</code> may be used to speed up the crawl (to avoid waiting for ads to load for example). <code>--waitUntil networkidle0</code> may make sense for sites where absolutely all requests must be waited until before proceeding.</p>
|
|
<p>See <a href="https://pptr.dev/api/puppeteer.page.goto#remarks">page.goto waitUntil options</a> for more info on the options that can be used with this flag from the Puppeteer docs.</p>
|
|
<p>The <code>--pageLoadTimeout</code>/<code>--timeout</code> option sets the timeout in seconds for page load, defaulting to 90 seconds. Behaviors will run on the page once either the page load condition or the page load timeout is met, whichever happens first.</p>
|
|
<h3 id="additional-wait">Additional Wait<a class="headerlink" href="#additional-wait" title="Permanent link">¶</a></h3>
|
|
<p>Occasionally, a page may seem to have loaded, but performs dynamic initialization / additional loading. This is can be hard to detect, and the <code>--postLoadDelay</code> flag
|
|
can be used to specify additional seconds to wait after the page appears to have loaded, before moving on to post-processing actions, such as link extraction, screenshotting and text extraction (see below).</p>
|
|
<p>(On the other hand, the <code>--pageExtraDelay</code>/<code>--delay</code> adds an extra after all post-load actions have taken place, and can be useful for rate-limiting.)</p>
|
|
<h2 id="link-extraction">Link Extraction<a class="headerlink" href="#link-extraction" title="Permanent link">¶</a></h2>
|
|
<p>By default, the crawler will extract all <code>href</code> properties from all <code><a></code> tags that have an <code>href</code>.
|
|
This can be customized with the <code>--selectLinks</code> option, which can provide alternative selectors of the form:
|
|
<code>[css selector]->[property to use]</code> or <code>[css selector]->@[attribute to use]</code>. The default value is <code>a[href]->href</code>.</p>
|
|
<p>For example, to specify the default, but also include all <code>divs</code> that have class <code>mylink</code> and use <code>custom-href</code> attribute as the link, use <code>--selectLinks 'a[href]->href' --selectLinks 'div.mylink->@custom-href'</code>.</p>
|
|
<p>Any number of selectors can be specified in this way, and each will be applied in sequence on each page.</p>
|
|
<h2 id="ad-blocking">Ad Blocking<a class="headerlink" href="#ad-blocking" title="Permanent link">¶</a></h2>
|
|
<p>Brave Browser, the browser used by Browsertrix Crawler for crawling, has some ad and tracker blocking features enabled by default. These <a href="https://brave.com/shields/">Shields</a> be disabled or customized using <a href="../browser-profiles/">Browser Profiles</a>.</p>
|
|
<p>Browsertrix Crawler also supports blocking ads from being loaded during capture based on <a href="https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts">Stephen Black's list of known ad hosts</a>. To enable ad blocking based on this list, use the <code>--blockAds</code> option. If <code>--adBlockMessage</code> is set, a record with the specified error message will be added in the ad's place.</p>
|
|
<h2 id="sitemap-parsing">Sitemap Parsing<a class="headerlink" href="#sitemap-parsing" title="Permanent link">¶</a></h2>
|
|
<p>The <code>--sitemap</code> option can be used to have the crawler parse a sitemap and queue any found URLs while respecting the crawl's scoping rules and limits. Browsertrix Crawler is able to parse regular sitemaps as well as sitemap indices that point out to nested sitemaps.</p>
|
|
<p>By default, <code>--sitemap</code> will look for a sitemap at <code><your-seed>/sitemap.xml</code>. If a website's sitemap is hosted at a different URL, pass the URL with the flag like <code>--sitemap <sitemap url></code>.</p>
|
|
<p>The <code>--sitemapFrom</code>/<code>--sitemapFromDate</code> and <code>--sitemapTo</code>/<code>--sitemapToDate</code> options allow for only extracting pages within a specific date range. If set, these options will filter URLs from sitemaps to those greater than or equal to (>=) or lesser than or equal to (<=) a provided ISO Date string (<code>YYYY-MM-DD</code>, <code>YYYY-MM-DDTHH:MM:SS</code>, or partial date), respectively.</p>
|
|
<h2 id="custom-warcinfo-fields">Custom Warcinfo Fields<a class="headerlink" href="#custom-warcinfo-fields" title="Permanent link">¶</a></h2>
|
|
<p>Custom fields can be added to the <code>warcinfo</code> WARC record, generated for each combined WARC. The fields can be specified in the YAML config under <code>warcinfo</code> section or specifying individually via the command-line.</p>
|
|
<p>For example, the following are equivalent ways to add additional warcinfo fields:</p>
|
|
<p>via yaml config:</p>
|
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nt">warcinfo</span><span class="p">:</span>
|
|
<a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="w"> </span><span class="nt">operator</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">my-org</span>
|
|
<a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="w"> </span><span class="nt">hostname</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">hostname.my-org</span>
|
|
</code></pre></div>
|
|
<p>via command-line:</p>
|
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a>--warcinfo.operator<span class="w"> </span>my-org<span class="w"> </span>--warcinfo.hostname<span class="w"> </span>hostname.my-org
|
|
</code></pre></div>
|
|
<h2 id="screenshots">Screenshots<a class="headerlink" href="#screenshots" title="Permanent link">¶</a></h2>
|
|
<p>Browsertrix Crawler includes the ability to take screenshots of each page crawled via the <code>--screenshot</code> option.</p>
|
|
<p>Three screenshot options are available:</p>
|
|
<ul>
|
|
<li><code>--screenshot view</code>: Takes a png screenshot of the initially visible viewport (1920x1080)</li>
|
|
<li><code>--screenshot fullPage</code>: Takes a png screenshot of the full page</li>
|
|
<li><code>--screenshot thumbnail</code>: Takes a jpeg thumbnail of the initially visible viewport (1920x1080)</li>
|
|
</ul>
|
|
<p>These can be combined using a comma-separated list passed via the <code>--screenshot</code> option, e.g.: <code>--screenshot thumbnail,view,fullPage</code> or passed in separately <code>--screenshot thumbnail --screenshot view --screenshot fullPage</code>.</p>
|
|
<p>Screenshots are written into a <code>screenshots.warc.gz</code> WARC file in the <code>archives/</code> directory. If the <code>--generateWACZ</code> command line option is used, the screenshots WARC is written into the <code>archive</code> directory of the WACZ file and indexed alongside the other WARCs.</p>
|
|
<h2 id="screencasting">Screencasting<a class="headerlink" href="#screencasting" title="Permanent link">¶</a></h2>
|
|
<p>Browsertrix Crawler includes a screencasting option which allows watching the crawl in real-time via screencast (connected via a websocket).</p>
|
|
<p>To enable, add <code>--screencastPort</code> command-line option and also map the port on the docker container. An example command might be:</p>
|
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a>docker<span class="w"> </span>run<span class="w"> </span>-p<span class="w"> </span><span class="m">9037</span>:9037<span class="w"> </span>-v<span class="w"> </span><span class="nv">$PWD</span>/crawls:/crawls/<span class="w"> </span>webrecorder/browsertrix-crawler<span class="w"> </span>crawl<span class="w"> </span>--url<span class="w"> </span>https://www.example.com<span class="w"> </span>--screencastPort<span class="w"> </span><span class="m">9037</span>
|
|
</code></pre></div>
|
|
<p>Then, open <code>http://localhost:9037/</code> and watch the crawl!</p>
|
|
<h2 id="text-extraction">Text Extraction<a class="headerlink" href="#text-extraction" title="Permanent link">¶</a></h2>
|
|
<p>Browsertrix Crawler supports text extraction via the <code>--text</code> flag, which accepts one or more of the following extraction options:</p>
|
|
<ul>
|
|
<li><code>--text to-pages</code> — Extract initial text and add it to the text field in pages.jsonl</li>
|
|
<li><code>--text to-warc</code> — Extract initial page text and add it to a <code>urn:text:<url></code> WARC resource record</li>
|
|
<li><code>--text final-to-warc</code> — Extract the final page text after all behaviors have run and add it to a <code>urn:textFinal:<url></code> WARC resource record</li>
|
|
</ul>
|
|
<p>The options can be separate or combined into a comma separate list, eg. <code>--text to-warc,final-to-warc</code> or <code>--text to-warc --text final-to-warc</code>
|
|
are equivalent. For backwards compatibility, <code>--text</code> alone is equivalent to <code>--text to-pages</code>.</p>
|
|
<h2 id="uploading-crawl-outputs-to-s3-compatible-storage">Uploading Crawl Outputs to S3-Compatible Storage<a class="headerlink" href="#uploading-crawl-outputs-to-s3-compatible-storage" title="Permanent link">¶</a></h2>
|
|
<p>Browsertrix Crawler includes support for uploading WACZ files to S3-compatible storage, and notifying a webhook when the upload succeeds.</p>
|
|
<p>S3 upload is only supported when WACZ output is enabled and will not work for WARC output.</p>
|
|
<p>This feature can currently be enabled by setting environment variables (for security reasons, these settings are not passed in as part of the command-line or YAML config at this time).</p>
|
|
<p>Environment variables for S3-uploads include:</p>
|
|
<ul>
|
|
<li><code>STORE_ACCESS_KEY</code> / <code>STORE_SECRET_KEY</code> — S3 credentials</li>
|
|
<li><code>STORE_ENDPOINT_URL</code> — S3 endpoint URL</li>
|
|
<li><code>STORE_PATH</code> — optional path appended to endpoint, if provided</li>
|
|
<li><code>STORE_FILENAME</code> — filename or template for filename to put on S3</li>
|
|
<li><code>STORE_USER</code> — optional username to pass back as part of the webhook callback</li>
|
|
<li><code>STORE_REGION</code> - optional region to pass to S3 endpoint. Defaults to <code>us-east-1</code> if unspecified.</li>
|
|
<li><code>CRAWL_ID</code> — unique crawl id (defaults to container hostname)</li>
|
|
<li><code>WEBHOOK_URL</code> — the URL of the webhook (can be http://, https://, or redis://)</li>
|
|
</ul>
|
|
<h3 id="webhook-notification">Webhook Notification<a class="headerlink" href="#webhook-notification" title="Permanent link">¶</a></h3>
|
|
<p>The webhook URL can be an HTTP URL which receives a JSON POST request OR a Redis URL, which specifies a redis list key to which the JSON data is pushed as a string.</p>
|
|
<p>Webhook notification JSON includes:</p>
|
|
<ul>
|
|
<li><code>id</code> — crawl id (value of <code>CRAWL_ID</code>)</li>
|
|
<li><code>userId</code> — user id (value of <code>STORE_USER</code>)</li>
|
|
<li><code>filename</code> — bucket path + filename of the file</li>
|
|
<li><code>size</code> — size of WACZ file</li>
|
|
<li><code>hash</code> — SHA-256 of WACZ file</li>
|
|
<li><code>completed</code> — boolean of whether crawl fully completed or partially (due to interrupt signal or other error).</li>
|
|
</ul>
|
|
<h2 id="saving-crawl-state-interrupting-and-restarting-the-crawl">Saving Crawl State: Interrupting and Restarting the Crawl<a class="headerlink" href="#saving-crawl-state-interrupting-and-restarting-the-crawl" title="Permanent link">¶</a></h2>
|
|
<p>A crawl can be gracefully interrupted with Ctrl-C (SIGINT) or a SIGTERM (see below for more details).</p>
|
|
<p>When a crawl is interrupted, the current crawl state is written to the <code>crawls</code> subdirectory inside the collection directory. The crawl state includes the current YAML config, if any, plus the current state of the crawl.</p>
|
|
<p>This crawl state YAML file can then be used as <code>--config</code> option to restart the crawl from where it was left of previously. When restarting a crawl you will need to include any command line options you used to start the original crawl (e.g. <code>--url</code>), since these are not persisted to the crawl state.</p>
|
|
<p>By default, the crawl interruption waits for current pages to finish. A subsequent SIGINT will cause the crawl to stop immediately. Any unfinished pages are recorded in the <code>pending</code> section of the crawl state (if gracefully finished, the section will be empty).</p>
|
|
<p>By default, the crawl state is only written when a crawl is interrupted before completing. The <code>--saveState</code> cli option can be set to <code>always</code> or <code>never</code> respectively, to control when the crawl state file should be written.</p>
|
|
<h3 id="periodic-state-saving">Periodic State Saving<a class="headerlink" href="#periodic-state-saving" title="Permanent link">¶</a></h3>
|
|
<p>When the <code>--saveState</code> is set to always, Browsertrix Crawler will also save the state automatically during the crawl, as set by the <code>--saveStateInterval</code> setting. The crawler will keep the last <code>--saveStateHistory</code> save states and delete older ones. This provides extra backup, in the event that the crawl fails unexpectedly or is not terminated via Ctrl-C, several previous crawl states are still available.</p>
|
|
<h2 id="crawl-interruption-options">Crawl Interruption Options<a class="headerlink" href="#crawl-interruption-options" title="Permanent link">¶</a></h2>
|
|
<p>Browsertrix Crawler has different crawl interruption modes, and does everything it can to ensure the WARC data written is always valid when a crawl is interrupted. The following are three interruption scenarios:</p>
|
|
<h3 id="1-graceful-shutdown">1. Graceful Shutdown<a class="headerlink" href="#1-graceful-shutdown" title="Permanent link">¶</a></h3>
|
|
<p>Initiated when a single SIGINT (Ctrl+C) or SIGTERM (<code>docker kill -s SIGINT</code>, <code>docker kill -s SIGTERM</code>, <code>kill</code>) signal is received.</p>
|
|
<p>The crawler will attempt to finish current pages, finish any pending async requests, write all WARCS, generate WACZ files and finish other post-processing, save state from Redis, and then exit.</p>
|
|
<h3 id="2-less-graceful-quick-shutdown">2. Less-Graceful, Quick Shutdown<a class="headerlink" href="#2-less-graceful-quick-shutdown" title="Permanent link">¶</a></h3>
|
|
<p>If a second SIGINT / SIGTERM is received, the crawler will close the browser immediately, interrupting any on-going network requests. Any asynchronous fetching will not be finished. However, anything in the WARC queue will be written and WARC files will be flushed. WACZ files and other post-processing will not be generated, but the current state from Redis will still be saved if enabled (see above). WARC records should be fully finished and WARC files should be valid, though they may not contain all the data for the pages being processed during the interruption.</p>
|
|
<h3 id="3-violent-immediate-shutdown">3. Violent / Immediate Shutdown<a class="headerlink" href="#3-violent-immediate-shutdown" title="Permanent link">¶</a></h3>
|
|
<p>If a crawler is killed, eg. with SIGKILL signal (<code>docker kill</code>, <code>kill -9</code>), the crawler container / process will be immediately shut down. It will not have a chance to finish any WARC files, and there is no guarantee that WARC files will be valid, but the crawler will of course exit right away.</p>
|
|
<h3 id="recommendations">Recommendations<a class="headerlink" href="#recommendations" title="Permanent link">¶</a></h3>
|
|
<p>It is recommended to gracefully stop the crawler by sending a SIGINT or SIGTERM signal, which can be done via Ctrl+C or <code>docker kill -s SIGINT <containerid></code>. Repeating the command will result in a faster, slightly less-graceful shutdown.
|
|
Using SIGKILL is not recommended except for last resort, and only when data is to be discarded.</p>
|
|
<p>Note: When using the crawler in the Browsertrix app / in Kubernetes general, stopping a crawl / stopping a pod always results in option #1 (sending a single SIGTERM signal) to the crawler pod(s)</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</article>
|
|
</div>
|
|
|
|
|
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
|
</div>
|
|
|
|
</main>
|
|
|
|
<footer class="md-footer">
|
|
|
|
|
|
|
|
<nav class="md-footer__inner md-grid" aria-label="Footer" >
|
|
|
|
|
|
<a href="../exit-codes/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Exit codes">
|
|
<div class="md-footer__button md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
|
</div>
|
|
<div class="md-footer__title">
|
|
<span class="md-footer__direction">
|
|
Previous
|
|
</span>
|
|
<div class="md-ellipsis">
|
|
Exit codes
|
|
</div>
|
|
</div>
|
|
</a>
|
|
|
|
|
|
|
|
<a href="../crawl-scope/" class="md-footer__link md-footer__link--next" aria-label="Next: Crawl Scope">
|
|
<div class="md-footer__title">
|
|
<span class="md-footer__direction">
|
|
Next
|
|
</span>
|
|
<div class="md-ellipsis">
|
|
Crawl Scope
|
|
</div>
|
|
</div>
|
|
<div class="md-footer__button md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11z"/></svg>
|
|
</div>
|
|
</a>
|
|
|
|
</nav>
|
|
|
|
|
|
<div class="md-footer-meta md-typeset">
|
|
<div class="md-footer-meta__inner md-grid">
|
|
<div class="md-copyright">
|
|
|
|
<div class="md-copyright__highlight">
|
|
Creative Commons Attribution 4.0 International (CC BY 4.0)
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div class="md-social">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://webrecorder.net" target="_blank" rel="noopener" title="webrecorder.net" class="md-social__link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-globe" viewBox="0 0 16 16">
|
|
<path d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8zm7.5-6.923c-.67.204-1.335.82-1.887 1.855A7.97 7.97 0 0 0 5.145 4H7.5V1.077zM4.09 4a9.267 9.267 0 0 1 .64-1.539 6.7 6.7 0 0 1 .597-.933A7.025 7.025 0 0 0 2.255 4H4.09zm-.582 3.5c.03-.877.138-1.718.312-2.5H1.674a6.958 6.958 0 0 0-.656 2.5h2.49zM4.847 5a12.5 12.5 0 0 0-.338 2.5H7.5V5H4.847zM8.5 5v2.5h2.99a12.495 12.495 0 0 0-.337-2.5H8.5zM4.51 8.5a12.5 12.5 0 0 0 .337 2.5H7.5V8.5H4.51zm3.99 0V11h2.653c.187-.765.306-1.608.338-2.5H8.5zM5.145 12c.138.386.295.744.468 1.068.552 1.035 1.218 1.65 1.887 1.855V12H5.145zm.182 2.472a6.696 6.696 0 0 1-.597-.933A9.268 9.268 0 0 1 4.09 12H2.255a7.024 7.024 0 0 0 3.072 2.472zM3.82 11a13.652 13.652 0 0 1-.312-2.5h-2.49c.062.89.291 1.733.656 2.5H3.82zm6.853 3.472A7.024 7.024 0 0 0 13.745 12H11.91a9.27 9.27 0 0 1-.64 1.539 6.688 6.688 0 0 1-.597.933zM8.5 12v2.923c.67-.204 1.335-.82 1.887-1.855.173-.324.33-.682.468-1.068H8.5zm3.68-1h2.146c.365-.767.594-1.61.656-2.5h-2.49a13.65 13.65 0 0 1-.312 2.5zm2.802-3.5a6.959 6.959 0 0 0-.656-2.5H12.18c.174.782.282 1.623.312 2.5h2.49zM11.27 2.461c.247.464.462.98.64 1.539h1.835a7.024 7.024 0 0 0-3.072-2.472c.218.284.418.598.597.933zM10.855 4a7.966 7.966 0 0 0-.468-1.068C9.835 1.897 9.17 1.282 8.5 1.077V4h2.355z"/>
|
|
</svg>
|
|
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://forum.webrecorder.net/" target="_blank" rel="noopener" title="forum.webrecorder.net" class="md-social__link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-chat-left-text-fill" viewBox="0 0 16 16">
|
|
<path d="M0 2a2 2 0 0 1 2-2h12a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H4.414a1 1 0 0 0-.707.293L.854 15.146A.5.5 0 0 1 0 14.793V2zm3.5 1a.5.5 0 0 0 0 1h9a.5.5 0 0 0 0-1h-9zm0 2.5a.5.5 0 0 0 0 1h9a.5.5 0 0 0 0-1h-9zm0 2.5a.5.5 0 0 0 0 1h5a.5.5 0 0 0 0-1h-5z"/>
|
|
</svg>
|
|
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://digipres.club/@webrecorder" target="_blank" rel="noopener me" title="digipres.club" class="md-social__link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-mastodon" viewBox="0 0 16 16">
|
|
<path d="M11.19 12.195c2.016-.24 3.77-1.475 3.99-2.603.348-1.778.32-4.339.32-4.339 0-3.47-2.286-4.488-2.286-4.488C12.062.238 10.083.017 8.027 0h-.05C5.92.017 3.942.238 2.79.765c0 0-2.285 1.017-2.285 4.488l-.002.662c-.004.64-.007 1.35.011 2.091.083 3.394.626 6.74 3.78 7.57 1.454.383 2.703.463 3.709.408 1.823-.1 2.847-.647 2.847-.647l-.06-1.317s-1.303.41-2.767.36c-1.45-.05-2.98-.156-3.215-1.928a3.614 3.614 0 0 1-.033-.496s1.424.346 3.228.428c1.103.05 2.137-.064 3.188-.189zm1.613-2.47H11.13v-4.08c0-.859-.364-1.295-1.091-1.295-.804 0-1.207.517-1.207 1.541v2.233H7.168V5.89c0-1.024-.403-1.541-1.207-1.541-.727 0-1.091.436-1.091 1.296v4.079H3.197V5.522c0-.859.22-1.541.66-2.046.456-.505 1.052-.764 1.793-.764.856 0 1.504.328 1.933.983L8 4.39l.417-.695c.429-.655 1.077-.983 1.934-.983.74 0 1.336.259 1.791.764.442.505.661 1.187.661 2.046v4.203z"/>
|
|
</svg>
|
|
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://www.youtube.com/@webrecorder" target="_blank" rel="noopener" title="www.youtube.com" class="md-social__link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-youtube" viewBox="0 0 16 16">
|
|
<path d="M8.051 1.999h.089c.822.003 4.987.033 6.11.335a2.01 2.01 0 0 1 1.415 1.42c.101.38.172.883.22 1.402l.01.104.022.26.008.104c.065.914.073 1.77.074 1.957v.075c-.001.194-.01 1.108-.082 2.06l-.008.105-.009.104c-.05.572-.124 1.14-.235 1.558a2.007 2.007 0 0 1-1.415 1.42c-1.16.312-5.569.334-6.18.335h-.142c-.309 0-1.587-.006-2.927-.052l-.17-.006-.087-.004-.171-.007-.171-.007c-1.11-.049-2.167-.128-2.654-.26a2.007 2.007 0 0 1-1.415-1.419c-.111-.417-.185-.986-.235-1.558L.09 9.82l-.008-.104A31.4 31.4 0 0 1 0 7.68v-.123c.002-.215.01-.958.064-1.778l.007-.103.003-.052.008-.104.022-.26.01-.104c.048-.519.119-1.023.22-1.402a2.007 2.007 0 0 1 1.415-1.42c.487-.13 1.544-.21 2.654-.26l.17-.007.172-.006.086-.003.171-.007A99.788 99.788 0 0 1 7.858 2h.193zM6.4 5.209v4.818l4.157-2.408L6.4 5.209z"/>
|
|
</svg>
|
|
</a>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
</footer>
|
|
|
|
</div>
|
|
<div class="md-dialog" data-md-component="dialog">
|
|
<div class="md-dialog__inner md-typeset"></div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": ["navigation.sections", "navigation.tabs", "navigation.tabs.sticky", "navigation.instant", "navigation.tracking", "navigation.indexes", "navigation.footer", "content.code.copy", "content.action.edit", "content.tooltips", "search.suggest"], "search": "../../assets/javascripts/workers/search.7a47a382.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
|
|
|
|
|
<script src="../../assets/javascripts/bundle.e71a0d61.min.js"></script>
|
|
|
|
|
|
</body>
|
|
</html> |