mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-12-08 06:09:48 +00:00
1281 lines
No EOL
46 KiB
HTML
1281 lines
No EOL
46 KiB
HTML
|
|
<!doctype html>
|
|
<html lang="en" class="no-js">
|
|
<head>
|
|
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|
|
|
|
|
|
|
|
|
<link rel="prev" href="../yaml-config/">
|
|
|
|
|
|
<link rel="next" href="../proxies/">
|
|
|
|
|
|
|
|
|
|
|
|
<link rel="icon" href="../../assets/brand/browsertrix-crawler-icon-color-dynamic.svg">
|
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.0">
|
|
|
|
|
|
|
|
<title>Creating and Using Browser Profiles - Browsertrix Crawler Docs</title>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/stylesheets/main.618322db.min.css">
|
|
|
|
|
|
<link rel="stylesheet" href="../../assets/stylesheets/palette.ab4e12ef.min.css">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<style>:root{--md-admonition-icon--note:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-pencil-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M12.854.146a.5.5%200%200%200-.707%200L10.5%201.793%2014.207%205.5l1.647-1.646a.5.5%200%200%200%200-.708l-3-3zm.646%206.061L9.793%202.5%203.293%209H3.5a.5.5%200%200%201%20.5.5v.5h.5a.5.5%200%200%201%20.5.5v.5h.5a.5.5%200%200%201%20.5.5v.5h.5a.5.5%200%200%201%20.5.5v.207l6.5-6.5zm-7.468%207.468A.5.5%200%200%201%206%2013.5V13h-.5a.5.5%200%200%201-.5-.5V12h-.5a.5.5%200%200%201-.5-.5V11h-.5a.5.5%200%200%201-.5-.5V10h-.5a.499.499%200%200%201-.175-.032l-.179.178a.5.5%200%200%200-.11.168l-2%205a.5.5%200%200%200%20.65.65l5-2a.5.5%200%200%200%20.168-.11l.178-.178z%22/%3E%3C/svg%3E');--md-admonition-icon--abstract:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-file-earmark-text-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M9.293%200H4a2%202%200%200%200-2%202v12a2%202%200%200%200%202%202h8a2%202%200%200%200%202-2V4.707A1%201%200%200%200%2013.707%204L10%20.293A1%201%200%200%200%209.293%200zM9.5%203.5v-2l3%203h-2a1%201%200%200%201-1-1zM4.5%209a.5.5%200%200%201%200-1h7a.5.5%200%200%201%200%201h-7zM4%2010.5a.5.5%200%200%201%20.5-.5h7a.5.5%200%200%201%200%201h-7a.5.5%200%200%201-.5-.5zm.5%202.5a.5.5%200%200%201%200-1h4a.5.5%200%200%201%200%201h-4z%22/%3E%3C/svg%3E');--md-admonition-icon--info:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-info-circle-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M8%2016A8%208%200%201%200%208%200a8%208%200%200%200%200%2016zm.93-9.412-1%204.705c-.07.34.029.533.304.533.194%200%20.487-.07.686-.246l-.088.416c-.287.346-.92.598-1.465.598-.703%200-1.002-.422-.808-1.319l.738-3.468c.064-.293.006-.399-.287-.47l-.451-.081.082-.381%202.29-.287zM8%205.5a1%201%200%201%201%200-2%201%201%200%200%201%200%202z%22/%3E%3C/svg%3E');--md-admonition-icon--tip:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-exclamation-circle-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M16%208A8%208%200%201%201%200%208a8%208%200%200%201%2016%200zM8%204a.905.905%200%200%200-.9.995l.35%203.507a.552.552%200%200%200%201.1%200l.35-3.507A.905.905%200%200%200%208%204zm.002%206a1%201%200%201%200%200%202%201%201%200%200%200%200-2z%22/%3E%3C/svg%3E');--md-admonition-icon--success:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-check-circle-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M16%208A8%208%200%201%201%200%208a8%208%200%200%201%2016%200zm-3.97-3.03a.75.75%200%200%200-1.08.022L7.477%209.417%205.384%207.323a.75.75%200%200%200-1.06%201.06L6.97%2011.03a.75.75%200%200%200%201.079-.02l3.992-4.99a.75.75%200%200%200-.01-1.05z%22/%3E%3C/svg%3E');--md-admonition-icon--question:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-question-circle-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M16%208A8%208%200%201%201%200%208a8%208%200%200%201%2016%200zM5.496%206.033h.825c.138%200%20.248-.113.266-.25.09-.656.54-1.134%201.342-1.134.686%200%201.314.343%201.314%201.168%200%20.635-.374.927-.965%201.371-.673.489-1.206%201.06-1.168%201.987l.003.217a.25.25%200%200%200%20.25.246h.811a.25.25%200%200%200%20.25-.25v-.105c0-.718.273-.927%201.01-1.486.609-.463%201.244-.977%201.244-2.056%200-1.511-1.276-2.241-2.673-2.241-1.267%200-2.655.59-2.75%202.286a.237.237%200%200%200%20.241.247zm2.325%206.443c.61%200%201.029-.394%201.029-.927%200-.552-.42-.94-1.029-.94-.584%200-1.009.388-1.009.94%200%20.533.425.927%201.01.927z%22/%3E%3C/svg%3E');--md-admonition-icon--warning:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-exclamation-triangle-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M8.982%201.566a1.13%201.13%200%200%200-1.96%200L.165%2013.233c-.457.778.091%201.767.98%201.767h13.713c.889%200%201.438-.99.98-1.767L8.982%201.566zM8%205c.535%200%20.954.462.9.995l-.35%203.507a.552.552%200%200%201-1.1%200L7.1%205.995A.905.905%200%200%201%208%205zm.002%206a1%201%200%201%201%200%202%201%201%200%200%201%200-2z%22/%3E%3C/svg%3E');--md-admonition-icon--failure:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-x-octagon-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M11.46.146A.5.5%200%200%200%2011.107%200H4.893a.5.5%200%200%200-.353.146L.146%204.54A.5.5%200%200%200%200%204.893v6.214a.5.5%200%200%200%20.146.353l4.394%204.394a.5.5%200%200%200%20.353.146h6.214a.5.5%200%200%200%20.353-.146l4.394-4.394a.5.5%200%200%200%20.146-.353V4.893a.5.5%200%200%200-.146-.353L11.46.146zm-6.106%204.5L8%207.293l2.646-2.647a.5.5%200%200%201%20.708.708L8.707%208l2.647%202.646a.5.5%200%200%201-.708.708L8%208.707l-2.646%202.647a.5.5%200%200%201-.708-.708L7.293%208%204.646%205.354a.5.5%200%201%201%20.708-.708z%22/%3E%3C/svg%3E');--md-admonition-icon--danger:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-exclamation-diamond-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M9.05.435c-.58-.58-1.52-.58-2.1%200L.436%206.95c-.58.58-.58%201.519%200%202.098l6.516%206.516c.58.58%201.519.58%202.098%200l6.516-6.516c.58-.58.58-1.519%200-2.098L9.05.435zM8%204c.535%200%20.954.462.9.995l-.35%203.507a.552.552%200%200%201-1.1%200L7.1%204.995A.905.905%200%200%201%208%204zm.002%206a1%201%200%201%201%200%202%201%201%200%200%201%200-2z%22/%3E%3C/svg%3E');--md-admonition-icon--bug:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-bug-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M4.978.855a.5.5%200%201%200-.956.29l.41%201.352A4.985%204.985%200%200%200%203%206h10a4.985%204.985%200%200%200-1.432-3.503l.41-1.352a.5.5%200%201%200-.956-.29l-.291.956A4.978%204.978%200%200%200%208%201a4.979%204.979%200%200%200-2.731.811l-.29-.956z%22/%3E%20%20%3Cpath%20d%3D%22M13%206v1H8.5v8.975A5%205%200%200%200%2013%2011h.5a.5.5%200%200%201%20.5.5v.5a.5.5%200%201%200%201%200v-.5a1.5%201.5%200%200%200-1.5-1.5H13V9h1.5a.5.5%200%200%200%200-1H13V7h.5A1.5%201.5%200%200%200%2015%205.5V5a.5.5%200%200%200-1%200v.5a.5.5%200%200%201-.5.5H13zm-5.5%209.975V7H3V6h-.5a.5.5%200%200%201-.5-.5V5a.5.5%200%200%200-1%200v.5A1.5%201.5%200%200%200%202.5%207H3v1H1.5a.5.5%200%200%200%200%201H3v1h-.5A1.5%201.5%200%200%200%201%2011.5v.5a.5.5%200%201%200%201%200v-.5a.5.5%200%200%201%20.5-.5H3a5%205%200%200%200%204.5%204.975z%22/%3E%3C/svg%3E');--md-admonition-icon--example:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-mortarboard-fill%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M8.211%202.047a.5.5%200%200%200-.422%200l-7.5%203.5a.5.5%200%200%200%20.025.917l7.5%203a.5.5%200%200%200%20.372%200L14%207.14V13a1%201%200%200%200-1%201v2h3v-2a1%201%200%200%200-1-1V6.739l.686-.275a.5.5%200%200%200%20.025-.917l-7.5-3.5Z%22/%3E%20%20%3Cpath%20d%3D%22M4.176%209.032a.5.5%200%200%200-.656.327l-.5%201.7a.5.5%200%200%200%20.294.605l4.5%201.8a.5.5%200%200%200%20.372%200l4.5-1.8a.5.5%200%200%200%20.294-.605l-.5-1.7a.5.5%200%200%200-.656-.327L8%2010.466%204.176%209.032Z%22/%3E%3C/svg%3E');--md-admonition-icon--quote:url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2216%22%20height%3D%2216%22%20fill%3D%22currentColor%22%20class%3D%22bi%20bi-quote%22%20viewBox%3D%220%200%2016%2016%22%3E%20%20%3Cpath%20d%3D%22M12%2012a1%201%200%200%200%201-1V8.558a1%201%200%200%200-1-1h-1.388c0-.351.021-.703.062-1.054.062-.372.166-.703.31-.992.145-.29.331-.517.559-.683.227-.186.516-.279.868-.279V3c-.579%200-1.085.124-1.52.372a3.322%203.322%200%200%200-1.085.992%204.92%204.92%200%200%200-.62%201.458A7.712%207.712%200%200%200%209%207.558V11a1%201%200%200%200%201%201h2Zm-6%200a1%201%200%200%200%201-1V8.558a1%201%200%200%200-1-1H4.612c0-.351.021-.703.062-1.054.062-.372.166-.703.31-.992.145-.29.331-.517.559-.683.227-.186.516-.279.868-.279V3c-.579%200-1.085.124-1.52.372a3.322%203.322%200%200%200-1.085.992%204.92%204.92%200%200%200-.62%201.458A7.712%207.712%200%200%200%203%207.558V11a1%201%200%200%200%201%201h2Z%22/%3E%3C/svg%3E');}</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="../../stylesheets/extra.css">
|
|
|
|
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
|
|
|
|
|
|
|
|
|
|
|
</head>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<body dir="ltr" data-md-color-scheme="webrecorder" data-md-color-primary="indigo" data-md-color-accent="indigo">
|
|
|
|
|
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
|
<label class="md-overlay" for="__drawer"></label>
|
|
<div data-md-component="skip">
|
|
|
|
|
|
<a href="#creating-and-using-browser-profiles" class="md-skip">
|
|
Skip to content
|
|
</a>
|
|
|
|
</div>
|
|
<div data-md-component="announce">
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<header class="md-header md-header--shadow md-header--lifted" data-md-component="header">
|
|
<nav class="md-header__inner md-grid" aria-label="Header">
|
|
<a href="../.." title="Browsertrix Crawler Docs" class="md-header__button md-logo" aria-label="Browsertrix Crawler Docs" data-md-component="logo">
|
|
|
|
<img src="../../assets/brand/browsertrix-crawler-white.svg" alt="logo">
|
|
|
|
</a>
|
|
<label class="md-header__button md-icon" for="__drawer">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
|
</label>
|
|
<div class="md-header__title" data-md-component="header-title">
|
|
<div class="md-header__ellipsis">
|
|
<div class="md-header__topic">
|
|
<span class="md-ellipsis">
|
|
Browsertrix Crawler Docs
|
|
</span>
|
|
</div>
|
|
<div class="md-header__topic" data-md-component="header-topic">
|
|
<span class="md-ellipsis">
|
|
|
|
Creating and Using Browser Profiles
|
|
|
|
</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-header__button md-icon" for="__search">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
|
</label>
|
|
<div class="md-search" data-md-component="search" role="dialog">
|
|
<label class="md-search__overlay" for="__search"></label>
|
|
<div class="md-search__inner" role="search">
|
|
<form class="md-search__form" name="search">
|
|
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
|
|
<label class="md-search__icon md-icon" for="__search">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
|
</label>
|
|
<nav class="md-search__options" aria-label="Search">
|
|
|
|
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
|
|
</button>
|
|
</nav>
|
|
|
|
<div class="md-search__suggest" data-md-component="search-suggest"></div>
|
|
|
|
</form>
|
|
<div class="md-search__output">
|
|
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
|
|
<div class="md-search-result" data-md-component="search-result">
|
|
<div class="md-search-result__meta">
|
|
Initializing search
|
|
</div>
|
|
<ol class="md-search-result__list" role="presentation"></ol>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-header__source">
|
|
<a href="https://github.com/webrecorder/browsertrix-crawler/" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-github" viewBox="0 0 16 16">
|
|
<path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z"/>
|
|
</svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
Browsertrix Crawler
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
</nav>
|
|
|
|
|
|
|
|
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
|
|
<div class="md-grid">
|
|
<ul class="md-tabs__list">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../.." class="md-tabs__link">
|
|
|
|
|
|
|
|
|
|
|
|
Home
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item">
|
|
<a href="../../develop/" class="md-tabs__link">
|
|
|
|
|
|
|
|
|
|
|
|
Develop
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-tabs__item md-tabs__item--active">
|
|
<a href="../" class="md-tabs__link">
|
|
|
|
|
|
|
|
|
|
|
|
User Guide
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</div>
|
|
</nav>
|
|
|
|
|
|
</header>
|
|
|
|
<div class="md-container" data-md-component="container">
|
|
|
|
|
|
|
|
|
|
<main class="md-main" data-md-component="main">
|
|
<div class="md-main__inner md-grid">
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
|
|
<label class="md-nav__title" for="__drawer">
|
|
<a href="../.." title="Browsertrix Crawler Docs" class="md-nav__button md-logo" aria-label="Browsertrix Crawler Docs" data-md-component="logo">
|
|
|
|
<img src="../../assets/brand/browsertrix-crawler-white.svg" alt="logo">
|
|
|
|
</a>
|
|
Browsertrix Crawler Docs
|
|
</label>
|
|
|
|
<div class="md-nav__source">
|
|
<a href="https://github.com/webrecorder/browsertrix-crawler/" title="Go to repository" class="md-source" data-md-component="source">
|
|
<div class="md-source__icon md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-github" viewBox="0 0 16 16">
|
|
<path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z"/>
|
|
</svg>
|
|
</div>
|
|
<div class="md-source__repository">
|
|
Browsertrix Crawler
|
|
</div>
|
|
</a>
|
|
</div>
|
|
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../.." class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Home
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
|
|
|
|
|
|
<div class="md-nav__link md-nav__container">
|
|
<a href="../../develop/" class="md-nav__link ">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Develop
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
|
|
|
|
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="0">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
</div>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
|
<label class="md-nav__title" for="__nav_2">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
|
|
|
|
Develop
|
|
|
|
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../../develop/docs/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Documentation
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
|
|
|
|
|
|
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
|
|
|
|
|
|
<div class="md-nav__link md-nav__container">
|
|
<a href="../" class="md-nav__link ">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
User Guide
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
|
|
|
|
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
</div>
|
|
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
|
|
<label class="md-nav__title" for="__nav_3">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
|
|
|
|
User Guide
|
|
|
|
|
|
</label>
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../outputs/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Outputs
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../exit-codes/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Exit codes
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../common-options/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Commonly-Used Options
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../crawl-scope/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Crawl Scope
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../yaml-config/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
YAML Crawl Config
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item md-nav__item--active">
|
|
|
|
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__link md-nav__link--active" for="__toc">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Creating and Using Browser Profiles
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
<span class="md-nav__icon md-icon"></span>
|
|
</label>
|
|
|
|
<a href="./" class="md-nav__link md-nav__link--active">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Creating and Using Browser Profiles
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#interactive-profile-creation" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Interactive Profile Creation
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#headless-vs-headful-profiles" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Headless vs Headful Profiles
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#automated-profile-creation-for-user-login" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Automated Profile Creation for User Login
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#using-browser-profile-with-a-crawl" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Using Browser Profile with a Crawl
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../proxies/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Crawling with Proxies
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../behaviors/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Browser Behaviors
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../qa/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
Quality Assurance
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<li class="md-nav__item">
|
|
<a href="../cli-options/" class="md-nav__link">
|
|
|
|
|
|
|
|
<span class="md-ellipsis">
|
|
|
|
|
|
All Command-Line Options
|
|
|
|
|
|
|
|
</span>
|
|
|
|
|
|
|
|
</a>
|
|
</li>
|
|
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
|
|
</li>
|
|
|
|
|
|
|
|
</ul>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
|
<div class="md-sidebar__scrollwrap">
|
|
<div class="md-sidebar__inner">
|
|
|
|
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<label class="md-nav__title" for="__toc">
|
|
<span class="md-nav__icon md-icon"></span>
|
|
Table of contents
|
|
</label>
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#interactive-profile-creation" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Interactive Profile Creation
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#headless-vs-headful-profiles" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Headless vs Headful Profiles
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#automated-profile-creation-for-user-login" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Automated Profile Creation for User Login
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
<li class="md-nav__item">
|
|
<a href="#using-browser-profile-with-a-crawl" class="md-nav__link">
|
|
<span class="md-ellipsis">
|
|
|
|
Using Browser Profile with a Crawl
|
|
|
|
</span>
|
|
</a>
|
|
|
|
</li>
|
|
|
|
</ul>
|
|
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="md-content" data-md-component="content">
|
|
|
|
<article class="md-content__inner md-typeset">
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://github.com/webrecorder/browsertrix-crawler/edit/main/docs/docs/user-guide/browser-profiles.md" title="Edit this page" class="md-content__button md-icon" rel="edit">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-pencil" viewBox="0 0 16 16">
|
|
<path d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5 13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5zm-9.761 5.175-.106.106-1.528 3.821 3.821-1.528.106-.106A.5.5 0 0 1 5 12.5V12h-.5a.5.5 0 0 1-.5-.5V11h-.5a.5.5 0 0 1-.468-.325z"/>
|
|
</svg>
|
|
</a>
|
|
|
|
|
|
|
|
|
|
<h1 id="creating-and-using-browser-profiles">Creating and Using Browser Profiles<a class="headerlink" href="#creating-and-using-browser-profiles" title="Permanent link">¶</a></h1>
|
|
<p>Browsertrix Crawler can use existing browser profiles when running a crawl. This allows the browser to be pre-configured by logging in to certain sites or changing other settings, before running a crawl. By creating a logged in profile, the actual login credentials are not included in the crawl, only (temporary) session cookies.</p>
|
|
<h2 id="interactive-profile-creation">Interactive Profile Creation<a class="headerlink" href="#interactive-profile-creation" title="Permanent link">¶</a></h2>
|
|
<p>Interactive profile creation is used for creating profiles of more complex sites, or logging in to multiple sites at once.</p>
|
|
<p>To use this mode, don't specify <code>--username</code> or <code>--password</code> flags and expose two ports on the Docker container to allow DevTools to connect to the browser and to serve a status page.</p>
|
|
<p>In profile creation mode, Browsertrix Crawler launches a browser which uses a VNC server (via <a href="https://novnc.com/">noVNC</a>) running on port 6080 to provide a 'remote desktop' for interacting with the browser.</p>
|
|
<p>After interactively logging into desired sites or configuring other settings, <em>Create Profile</em> should be clicked to initiate profile creation. Browsertrix Crawler will then stop the browser, and save the browser profile.</p>
|
|
<p>To start in interactive profile creation mode, run:</p>
|
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a>docker<span class="w"> </span>run<span class="w"> </span>-p<span class="w"> </span><span class="m">6080</span>:6080<span class="w"> </span>-p<span class="w"> </span><span class="m">9223</span>:9223<span class="w"> </span>-v<span class="w"> </span><span class="nv">$PWD</span>/crawls/profiles:/crawls/profiles/<span class="w"> </span>-it<span class="w"> </span>webrecorder/browsertrix-crawler<span class="w"> </span>create-login-profile<span class="w"> </span>--url<span class="w"> </span><span class="s2">"https://example.com/"</span>
|
|
</code></pre></div>
|
|
<p>Then, open a browser pointing to <code>http://localhost:9223/</code> and use the embedded browser to log in to any sites or configure any settings as needed.</p>
|
|
<p>Click <em>Create Profile</em> at the top when done. The profile will then be created in <code>./crawls/profiles/profile.tar.gz</code> containing the settings of this browsing session.</p>
|
|
<p>It is also possible to use an existing profile via the <code>--profile</code> flag. This allows previous browsing sessions to be extended as needed.</p>
|
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a>docker<span class="w"> </span>run<span class="w"> </span>-p<span class="w"> </span><span class="m">6080</span>:6080<span class="w"> </span>-p<span class="w"> </span><span class="m">9223</span>:9223<span class="w"> </span>-v<span class="w"> </span><span class="nv">$PWD</span>/crawls/profiles:/crawls/profiles<span class="w"> </span>-it<span class="w"> </span>webrecorder/browsertrix-crawler<span class="w"> </span>create-login-profile<span class="w"> </span>--url<span class="w"> </span><span class="s2">"https://example.com/"</span><span class="w"> </span>--filename<span class="w"> </span><span class="s2">"/crawls/profiles/newProfile.tar.gz"</span><span class="w"> </span>--profile<span class="w"> </span><span class="s2">"/crawls/profiles/oldProfile.tar.gz"</span>
|
|
</code></pre></div>
|
|
<h2 id="headless-vs-headful-profiles">Headless vs Headful Profiles<a class="headerlink" href="#headless-vs-headful-profiles" title="Permanent link">¶</a></h2>
|
|
<p>Browsertrix Crawler supports both headful and headless crawling. We have historically recommended using headful crawling to be most accurate to user experience, however, headless crawling may be faster and in recent versions of Chromium-based browsers should be much closer in fidelity to headful crawling.</p>
|
|
<p>To use profiles in headless mode, profiles should also be created with <code>--headless</code> flag.</p>
|
|
<p>When creating browser profile in headless mode, Browsertrix will use the devtools protocol on port 9222 to stream the browser interface.</p>
|
|
<p>To create a profile in headless mode, run:</p>
|
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a>docker<span class="w"> </span>run<span class="w"> </span>-p<span class="w"> </span><span class="m">9222</span>:9222<span class="w"> </span>-p<span class="w"> </span><span class="m">9223</span>:9223<span class="w"> </span>-v<span class="w"> </span><span class="nv">$PWD</span>/crawls/profiles:/crawls/profiles/<span class="w"> </span>-it<span class="w"> </span>webrecorder/browsertrix-crawler<span class="w"> </span>create-login-profile<span class="w"> </span>--headless<span class="w"> </span>--url<span class="w"> </span><span class="s2">"https://example.com/"</span>
|
|
</code></pre></div>
|
|
<h2 id="automated-profile-creation-for-user-login">Automated Profile Creation for User Login<a class="headerlink" href="#automated-profile-creation-for-user-login" title="Permanent link">¶</a></h2>
|
|
<p>If the <code>--automated</code> flag is provided, Browsertrix Crawler will attempt to create a profile automatically after logging in to sites with a username and password. The username and password can be provided via <code>--username</code> and <code>--password</code> flags or, if omitted, from a command-line prompt.</p>
|
|
<p>When using <code>--automated</code> or <code>--username</code> / <code>--password</code>, Browsertrix Crawler will not launch an interactive browser and instead will attempt to finish automatically.</p>
|
|
<p>The automated profile creation system will log in to a single website with supplied credentials and then save the profile.</p>
|
|
<p>The script profile creation system also take a screenshot so you can check if the login succeeded.</p>
|
|
<div class="admonition example">
|
|
<p class="admonition-title">Example: Launch a browser and login to the digipres.club Mastodon instance</p>
|
|
<p>To automatically created a logged-in browser profile, run:</p>
|
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a>docker<span class="w"> </span>run<span class="w"> </span>-v<span class="w"> </span><span class="nv">$PWD</span>/crawls/profiles:/crawls/profiles<span class="w"> </span>-it<span class="w"> </span>webrecorder/browsertrix-crawler<span class="w"> </span>create-login-profile<span class="w"> </span>--url<span class="w"> </span><span class="s2">"https://digipres.club/"</span>
|
|
</code></pre></div>
|
|
<p>The script will then prompt you for login credentials, attempt to login, and create a tar.gz file in <code>./crawls/profiles/profile.tar.gz</code>.</p>
|
|
</div>
|
|
<ul>
|
|
<li>
|
|
<p>The <code>--url</code> parameter should specify the URL of a login page.</p>
|
|
</li>
|
|
<li>
|
|
<p>To specify a custom filename, pass along <code>--filename</code> parameter.</p>
|
|
</li>
|
|
<li>
|
|
<p>To specify the username and password on the command line (for automated profile creation), pass <code>--username</code> and <code>--password</code> flags.</p>
|
|
</li>
|
|
<li>
|
|
<p>To specify headless mode, add the <code>--headless</code> flag. Note that for crawls run with <code>--headless</code> flag, it is recommended to also create the profile with <code>--headless</code> to ensure the profile is compatible.</p>
|
|
</li>
|
|
<li>
|
|
<p>To specify the window size for the profile creation embedded browser, specify <code>--windowSize WIDTH,HEIGHT</code>. (The default is 1600x900)</p>
|
|
</li>
|
|
</ul>
|
|
<p>The profile creation script attempts to detect the username and password fields on a site as generically as possible, but may not work for all sites.</p>
|
|
<h2 id="using-browser-profile-with-a-crawl">Using Browser Profile with a Crawl<a class="headerlink" href="#using-browser-profile-with-a-crawl" title="Permanent link">¶</a></h2>
|
|
<p>To use a previously created profile with a crawl, use the <code>--profile</code> flag or <code>profile</code> option. The <code>--profile</code> flag can then be used to specify any Brave Browser profile stored as a tarball. Browser profile can be either stored locally and provided as a path, or available online at any HTTP(S) URL which will be downloaded before starting the crawl. Using profiles created with same or older version of Browsertrix Crawler is recommended to ensure compatibility. This option allows running a crawl with the browser already pre-configured, logged in to certain sites, language settings configured, etc.</p>
|
|
<p>After running the above command, you can now run a crawl with the profile, as follows:</p>
|
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a>docker<span class="w"> </span>run<span class="w"> </span>-v<span class="w"> </span><span class="nv">$PWD</span>/crawls:/crawls/<span class="w"> </span>-it<span class="w"> </span>webrecorder/browsertrix-crawler<span class="w"> </span>crawl<span class="w"> </span>--profile<span class="w"> </span>/crawls/profiles/profile.tar.gz<span class="w"> </span>--url<span class="w"> </span>https://digipres.club/<span class="w"> </span>--generateWACZ<span class="w"> </span>--collection<span class="w"> </span>test-with-profile
|
|
</code></pre></div>
|
|
<p>Profiles can also be loaded from an http/https URL, eg. <code>--profile https://example.com/path/to/profile.tar.gz</code>.</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</article>
|
|
</div>
|
|
|
|
|
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
|
</div>
|
|
|
|
</main>
|
|
|
|
<footer class="md-footer">
|
|
|
|
|
|
|
|
<nav class="md-footer__inner md-grid" aria-label="Footer" >
|
|
|
|
|
|
<a href="../yaml-config/" class="md-footer__link md-footer__link--prev" aria-label="Previous: YAML Crawl Config">
|
|
<div class="md-footer__button md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
|
|
</div>
|
|
<div class="md-footer__title">
|
|
<span class="md-footer__direction">
|
|
Previous
|
|
</span>
|
|
<div class="md-ellipsis">
|
|
YAML Crawl Config
|
|
</div>
|
|
</div>
|
|
</a>
|
|
|
|
|
|
|
|
<a href="../proxies/" class="md-footer__link md-footer__link--next" aria-label="Next: Crawling with Proxies">
|
|
<div class="md-footer__title">
|
|
<span class="md-footer__direction">
|
|
Next
|
|
</span>
|
|
<div class="md-ellipsis">
|
|
Crawling with Proxies
|
|
</div>
|
|
</div>
|
|
<div class="md-footer__button md-icon">
|
|
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11z"/></svg>
|
|
</div>
|
|
</a>
|
|
|
|
</nav>
|
|
|
|
|
|
<div class="md-footer-meta md-typeset">
|
|
<div class="md-footer-meta__inner md-grid">
|
|
<div class="md-copyright">
|
|
|
|
<div class="md-copyright__highlight">
|
|
Creative Commons Attribution 4.0 International (CC BY 4.0)
|
|
</div>
|
|
|
|
|
|
</div>
|
|
|
|
|
|
<div class="md-social">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://webrecorder.net" target="_blank" rel="noopener" title="webrecorder.net" class="md-social__link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-globe" viewBox="0 0 16 16">
|
|
<path d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8zm7.5-6.923c-.67.204-1.335.82-1.887 1.855A7.97 7.97 0 0 0 5.145 4H7.5V1.077zM4.09 4a9.267 9.267 0 0 1 .64-1.539 6.7 6.7 0 0 1 .597-.933A7.025 7.025 0 0 0 2.255 4H4.09zm-.582 3.5c.03-.877.138-1.718.312-2.5H1.674a6.958 6.958 0 0 0-.656 2.5h2.49zM4.847 5a12.5 12.5 0 0 0-.338 2.5H7.5V5H4.847zM8.5 5v2.5h2.99a12.495 12.495 0 0 0-.337-2.5H8.5zM4.51 8.5a12.5 12.5 0 0 0 .337 2.5H7.5V8.5H4.51zm3.99 0V11h2.653c.187-.765.306-1.608.338-2.5H8.5zM5.145 12c.138.386.295.744.468 1.068.552 1.035 1.218 1.65 1.887 1.855V12H5.145zm.182 2.472a6.696 6.696 0 0 1-.597-.933A9.268 9.268 0 0 1 4.09 12H2.255a7.024 7.024 0 0 0 3.072 2.472zM3.82 11a13.652 13.652 0 0 1-.312-2.5h-2.49c.062.89.291 1.733.656 2.5H3.82zm6.853 3.472A7.024 7.024 0 0 0 13.745 12H11.91a9.27 9.27 0 0 1-.64 1.539 6.688 6.688 0 0 1-.597.933zM8.5 12v2.923c.67-.204 1.335-.82 1.887-1.855.173-.324.33-.682.468-1.068H8.5zm3.68-1h2.146c.365-.767.594-1.61.656-2.5h-2.49a13.65 13.65 0 0 1-.312 2.5zm2.802-3.5a6.959 6.959 0 0 0-.656-2.5H12.18c.174.782.282 1.623.312 2.5h2.49zM11.27 2.461c.247.464.462.98.64 1.539h1.835a7.024 7.024 0 0 0-3.072-2.472c.218.284.418.598.597.933zM10.855 4a7.966 7.966 0 0 0-.468-1.068C9.835 1.897 9.17 1.282 8.5 1.077V4h2.355z"/>
|
|
</svg>
|
|
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://forum.webrecorder.net/" target="_blank" rel="noopener" title="forum.webrecorder.net" class="md-social__link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-chat-left-text-fill" viewBox="0 0 16 16">
|
|
<path d="M0 2a2 2 0 0 1 2-2h12a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H4.414a1 1 0 0 0-.707.293L.854 15.146A.5.5 0 0 1 0 14.793V2zm3.5 1a.5.5 0 0 0 0 1h9a.5.5 0 0 0 0-1h-9zm0 2.5a.5.5 0 0 0 0 1h9a.5.5 0 0 0 0-1h-9zm0 2.5a.5.5 0 0 0 0 1h5a.5.5 0 0 0 0-1h-5z"/>
|
|
</svg>
|
|
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://digipres.club/@webrecorder" target="_blank" rel="noopener me" title="digipres.club" class="md-social__link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-mastodon" viewBox="0 0 16 16">
|
|
<path d="M11.19 12.195c2.016-.24 3.77-1.475 3.99-2.603.348-1.778.32-4.339.32-4.339 0-3.47-2.286-4.488-2.286-4.488C12.062.238 10.083.017 8.027 0h-.05C5.92.017 3.942.238 2.79.765c0 0-2.285 1.017-2.285 4.488l-.002.662c-.004.64-.007 1.35.011 2.091.083 3.394.626 6.74 3.78 7.57 1.454.383 2.703.463 3.709.408 1.823-.1 2.847-.647 2.847-.647l-.06-1.317s-1.303.41-2.767.36c-1.45-.05-2.98-.156-3.215-1.928a3.614 3.614 0 0 1-.033-.496s1.424.346 3.228.428c1.103.05 2.137-.064 3.188-.189zm1.613-2.47H11.13v-4.08c0-.859-.364-1.295-1.091-1.295-.804 0-1.207.517-1.207 1.541v2.233H7.168V5.89c0-1.024-.403-1.541-1.207-1.541-.727 0-1.091.436-1.091 1.296v4.079H3.197V5.522c0-.859.22-1.541.66-2.046.456-.505 1.052-.764 1.793-.764.856 0 1.504.328 1.933.983L8 4.39l.417-.695c.429-.655 1.077-.983 1.934-.983.74 0 1.336.259 1.791.764.442.505.661 1.187.661 2.046v4.203z"/>
|
|
</svg>
|
|
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://www.youtube.com/@webrecorder" target="_blank" rel="noopener" title="www.youtube.com" class="md-social__link">
|
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-youtube" viewBox="0 0 16 16">
|
|
<path d="M8.051 1.999h.089c.822.003 4.987.033 6.11.335a2.01 2.01 0 0 1 1.415 1.42c.101.38.172.883.22 1.402l.01.104.022.26.008.104c.065.914.073 1.77.074 1.957v.075c-.001.194-.01 1.108-.082 2.06l-.008.105-.009.104c-.05.572-.124 1.14-.235 1.558a2.007 2.007 0 0 1-1.415 1.42c-1.16.312-5.569.334-6.18.335h-.142c-.309 0-1.587-.006-2.927-.052l-.17-.006-.087-.004-.171-.007-.171-.007c-1.11-.049-2.167-.128-2.654-.26a2.007 2.007 0 0 1-1.415-1.419c-.111-.417-.185-.986-.235-1.558L.09 9.82l-.008-.104A31.4 31.4 0 0 1 0 7.68v-.123c.002-.215.01-.958.064-1.778l.007-.103.003-.052.008-.104.022-.26.01-.104c.048-.519.119-1.023.22-1.402a2.007 2.007 0 0 1 1.415-1.42c.487-.13 1.544-.21 2.654-.26l.17-.007.172-.006.086-.003.171-.007A99.788 99.788 0 0 1 7.858 2h.193zM6.4 5.209v4.818l4.157-2.408L6.4 5.209z"/>
|
|
</svg>
|
|
</a>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
</footer>
|
|
|
|
</div>
|
|
<div class="md-dialog" data-md-component="dialog">
|
|
<div class="md-dialog__inner md-typeset"></div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": ["navigation.sections", "navigation.tabs", "navigation.tabs.sticky", "navigation.instant", "navigation.tracking", "navigation.indexes", "navigation.footer", "content.code.copy", "content.action.edit", "content.tooltips", "search.suggest"], "search": "../../assets/javascripts/workers/search.7a47a382.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
|
|
|
|
|
<script src="../../assets/javascripts/bundle.e71a0d61.min.js"></script>
|
|
|
|
|
|
</body>
|
|
</html> |