Decode content bytes only with supplied charset or static list of charsets to try

This commit is contained in:
benoit74 2024-06-14 13:21:18 +00:00
parent 4c12681b1c
commit b1c8a35212
No known key found for this signature in database
GPG key ID: B89606434FC7B530
18 changed files with 1343 additions and 271 deletions

View file

@ -35,10 +35,6 @@ It provide two main features:
Except that, scraper directly uses WarcRecord (returned by cdxj_indexer, implemented in warcio) to access metadata and such.
## chardet
[chardet Python library](https://pypi.org/project/chardet/) is used to detect character encoding of files when it is absent (only HTML file typically specify its encoding) or incoherent.
## zimscraperlib
[zimscraperlib Python library](https://pypi.org/project/zimscraperlib) is used for ZIM operations.

View file

@ -13,7 +13,6 @@ dependencies = [
"requests==2.32.3",
"zimscraperlib==3.3.2",
"jinja2==3.1.4",
"chardet==5.2.0",
# to support possible brotli content in warcs, must be added separately
"brotlipy==0.7.0",
"cdxj_indexer==1.4.5",

View file

@ -63,6 +63,7 @@ class Rewriter:
existing_zim_paths: set[ZimPath],
missing_zim_paths: set[ZimPath] | None,
js_modules: set[ZimPath],
charsets_to_try: list[str],
):
self.content = get_record_content(record)
@ -78,24 +79,11 @@ class Rewriter:
self.rewrite_mode = self.get_rewrite_mode(record, mimetype)
self.js_modules = js_modules
self.charsets_to_try = charsets_to_try
@property
def content_str(self) -> str:
try:
result = to_string(self.content, self.encoding)
if self.encoding and result.encoding and result.encoding != self.encoding:
logger.warning(
f"Encoding issue, '{result.encoding}' has been used instead of "
f"'{self.encoding}' to decode content of '{self.orig_url_str}'"
)
if result.chars_ignored:
logger.warning(
"Encoding issue, some chars had to be ignored to properly decode "
f"content of '{self.orig_url_str}' with '{result.encoding}'"
)
return result.value
except ValueError as e:
raise RuntimeError(f"Impossible to decode item {self.path.value}") from e
return to_string(self.content, self.encoding, self.charsets_to_try)
def rewrite(
self, pre_head_template: Template, post_head_template: Template

View file

@ -192,6 +192,9 @@ class Converter:
self.redirections: dict[ZimPath, ZimPath] = {}
self.missing_zim_paths: set[ZimPath] | None = set() if args.verbose else None
self.js_modules: set[ZimPath] = set()
self.charsets_to_try: list[str] = [
charset_to_try.strip() for charset_to_try in args.charsets_to_try.split(",")
]
# progress file handling
self.stats_filename = (
@ -747,6 +750,7 @@ class Converter:
self.expected_zim_items,
self.missing_zim_paths,
self.js_modules,
self.charsets_to_try,
)
if len(payload_item.content) != 0:

View file

@ -33,13 +33,19 @@ class WARCPayloadItem(StaticItem):
existing_zim_paths: set[ZimPath],
missing_zim_paths: set[ZimPath] | None,
js_modules: set[ZimPath],
charsets_to_try: list[str],
):
super().__init__()
self.path = path.value
self.mimetype = get_record_mime_type(record)
(self.title, self.content) = Rewriter(
path, record, existing_zim_paths, missing_zim_paths, js_modules
path,
record,
existing_zim_paths,
missing_zim_paths,
js_modules,
charsets_to_try,
).rewrite(pre_head_template, post_head_template)
def get_hints(self):

View file

@ -110,6 +110,14 @@ def main(raw_args=None):
dest="disable_metadata_checks",
)
parser.add_argument(
"--charsets-to-try",
help="List of charsets to try decode content when charset is not defined at "
"document or HTTP level. Single string, values separated by a comma. Default: "
"UTF-8,ISO-8859-1",
default="UTF-8,ISO-8859-1",
)
args = parser.parse_args(args=raw_args)
converter = Converter(args)
return converter.run()

View file

@ -5,9 +5,7 @@ from __future__ import annotations
import re
from http import HTTPStatus
from typing import NamedTuple
import chardet
from bs4 import BeautifulSoup
from warcio.recordloader import ArcWarcRecord
@ -19,12 +17,6 @@ ENCODING_RE = re.compile(
)
class StringConversionResult(NamedTuple):
value: str
encoding: str | None
chars_ignored: bool
def get_version():
return __version__
@ -132,84 +124,58 @@ def get_record_encoding(record: ArcWarcRecord) -> str | None:
return m.group("encoding")
def to_string(input_: str | bytes, encoding: str | None) -> StringConversionResult:
def to_string(
input_: str | bytes, http_encoding: str | None, charsets_to_try: list[str]
) -> str:
"""
Decode content to string, trying to be the more tolerant possible to invalid
declared encoding.
Decode content to string based on charset declared in content or fallback.
This try to decode the content using 3 methods:
- From http headers in the warc record (given as `encoding` argument)
- From encoding declaration inside the content (hopping that content can be
losely decode using ascii to something usable)
- From statistical analysis of the content (made by chardet)
This method tries to not be smarter than necessary.
If all these methods fails, try again with the encoding passed via http headers but
ignore all unrecognized characters.
First, it tries to find an charset declaration inside the first bytes of the content
(hopping that content first bytes can be losely decoded using few known encoding to
something usable). If found, it is used to decode and any bad character is
automatically replaced, assuming document editor is right.
Returns the decoded content, the encoding used (or None if the input was already
decoded) and a boolean indicating wether unrecognized characters had to been ignored
or not.
Second, if no charset declaration has been found in content, it uses the charset
declared in HTTP `Content-Type` header. This is passed to this method as
`http_encoding` argument. If present, it is used to decode and any bad character is
automatically replaced, assuming web server is right.
Finally, we fallback to use `charsets_to_try` argument, which is a list of charsets
to try. Each charset is tried in order, but any bad character found is raising an
error. If none of these charsets achieves to decode the content, an exception is
raised.
Returns the decoded content.
"""
http_encoding = encoding
tried_encodings: set[str] = set()
if isinstance(input_, str):
return StringConversionResult(input_, None, False)
return input_
if not input_:
# Empty bytes are easy to decode
return StringConversionResult("", None, False)
if encoding:
try:
return StringConversionResult(input_.decode(encoding), encoding, False)
except (ValueError, LookupError):
tried_encodings.add(encoding)
pass
return ""
# Search for encoding from content first bytes based on regexp
content_start = input_[:1024].decode("ascii", errors="replace")
if m := ENCODING_RE.search(content_start):
encoding = m.group("encoding")
if encoding and encoding not in tried_encodings:
try:
return StringConversionResult(input_.decode(encoding), encoding, False)
except (ValueError, LookupError):
tried_encodings.add(encoding)
pass
for encoding in ["ascii", "utf-16", "utf-32"]:
content_start = input_[:1024].decode(encoding, errors="replace")
if m := ENCODING_RE.search(content_start):
head_encoding = m.group("encoding")
return input_.decode(head_encoding, errors="replace")
# Try to detect the most probable encoding with chardet (and only most probable
# one, since otherwise we will likely find an encoding which pass but produces only
# garbage with most characters badly decoded just due to a wrongly encoded character
# see https://github.com/openzim/warc2zim/issues/221)
# Nota: we use the detect_all method of chardet even if we are interesting only in
# the most probable encoding, because (as-of chardet 5.2.0 at least) the detect
# chardet method seems to be more naive, and detect_all gives better results in our
# tests
chardet_encodings = chardet.detect_all(input_)
if len(chardet_encodings):
chardet_encoding = chardet_encodings[0]["encoding"]
if chardet_encoding and chardet_encoding not in tried_encodings:
try:
return StringConversionResult(
input_.decode(chardet_encoding), chardet_encoding, False
)
except (ValueError, LookupError):
tried_encodings.add(chardet_encoding)
pass
# Try again encoding detected by chardet (most probable one), but this time ignore
# all bad chars
if http_encoding:
return input_.decode(http_encoding, errors="replace")
# Try all charsets_to_try passed
for charset_to_try in charsets_to_try:
try:
return StringConversionResult(
input_.decode(http_encoding, errors="ignore"), http_encoding, True
)
return input_.decode(charset_to_try)
except (ValueError, LookupError):
pass
raise ValueError(f"Impossible to decode content {input_[:200]}")
raise ValueError(f"No suitable charset found to decode content {input_[:200]}")
def get_record_content(record: ArcWarcRecord):

View file

@ -0,0 +1,87 @@
{
"files": [
{
"filename": "file01.js",
"source": "https://www.marxists.org/espanol/menu.js",
"date": "2024-06",
"probable_charset": "ISO-8859-1",
"expected_strings": [
"Afanásiev, Víktor",
"Andrópov, Yuri",
"Amaguaña, Tránsito",
"Cunhal, Álvaro",
"De la Cruz, Juana Inés",
"Faure, Sèbastien"
]
},
{
"filename": "file02.js",
"source": "https://www.cloudflare.com/vendor/onetrust/scripttemplates/202308.2.0/otBannerSdk.js",
"date": "2024-06",
"probable_charset": "UTF-8",
"expected_strings": [
"_Container:\"#ot-ven-lst\",P_Ven_Bx:\"ot-ven-box\",P_Ven_Name:\".ot-ven-name\"",
"ist,IabType:e.IabType,InactiveText:e.InactiveText,IsConsentLoggingEnabled:e.IsConsentLoggingEnabl",
"0;\\n transition: visibility 0s \"+e+\"ms, opacity \"+e+\"ms linear;\\n \",!0);var",
"r.prototype.escapeRegExp=function(e){return e.replace(/[-/\\\\^$*+?.()|[\\]{}]/g,\"\\\\$&\")}"
]
},
{
"filename": "file03.html",
"source": "https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/?thematique=internet",
"date": "2024-06",
"probable_charset": "UTF-8",
"contains_bad_chars": true,
"expected_strings": [
"Vous souhaitez changer de navigateur et utiliser Firefox ? Ce tutoriel vous détaille la procédure d'installation et la configuration pour une premi<6D>..."
]
},
{
"filename": "file04.js",
"source": "https://static.mailerlite.com/js/w/ml_jQuery.inputmask.bundle.min.js?v3.3.1",
"date": "2024-06",
"probable_charset": "ascii",
"expected_strings": [
"1,this.isOptional=b||!1,this.isQuantifier=c||!1,this.isAlterna",
"is;if(na=!1,g.clearMaskOnLostFocus&&document.activeElement!==b){var c=x().slice(),d=b.inputmask._v"
]
},
{
"filename": "file05.js",
"source": "https://static.sketchfab.com/static/builds/web/dist/ac0f732c4fc1a30c77920d75c1a9be83-v2.js",
"date": "2024-06",
"probable_charset": "ascii",
"expected_strings": [
"isTickUsed||(this._isTickUsed=!0,this._schedule(this.drainQueues))},s.prototype._reset=function(){this._is"
]
},
{
"filename": "file06.html",
"source": "https://website.test.openzim.org/chinese-encoding.html",
"date": "2024-06",
"known_charset": "gb2312",
"expected_strings": [
"simplified chinese characters: 汉字"
]
},
{
"filename": "file07.html",
"source": "https://website.test.openzim.org/chinese-encoding.html without <meta> header",
"date": "2024-06",
"known_charset": "gb2312",
"http_charset": "gb2312",
"expected_strings": [
"simplified chinese characters: 汉字"
]
},
{
"filename": "file08.js",
"source": "https://community.mozilla.org/wp-content/plugins/events-manager/includes/js/events-manager.min.js?ver=6.4.1",
"date": "2024-06",
"probable_charset": "UTF-8",
"expected_strings": [
"t Array]\"===Object.prototype.toString.call(e)},s={a:\"[aḀḁĂăÂâǍǎȺⱥȦȧẠạÄäÀàÁáĀāÃãÅåąĄÃąĄ]\",b:\"[b␢β"
]
}
]
}

438
tests/encodings/file01.js Normal file
View file

@ -0,0 +1,438 @@
function selectaplace(form) {
var appname= navigator.appName;
var appversion=parseInt(navigator.appVersion);
if (appname == "Netscape" && appversion >= 3) {
var formindex=form.select1.selectedIndex;
var storage=form.select1.options[formindex].text;
if (form.select1.options[formindex].value != "none") {
var msg=storage+"You are now being transferred to the -> "+storage;
for (var spot=0;spot<msg.length-storage.length;spot++) {
var x=msg.substring(spot,msg.length);
form.select1.options[formindex].text=x;
for(var d=0;d<150;d++) { };
}
window.location=form.select1.options[formindex].value;
form.select1[formindex].text=storage;
} else {
form.select1[formindex].text="Not a real option!";
for(var d=0;d<1250;d++) { };
form.select1[formindex].text=storage;
}
}
else {
var formindex=form.select1.selectedIndex;
window.location=form.select1.options[formindex].value;
}
}
function makeMyMenu() {
document.write ('<table align=center border=0><tr>');
document.write ('<td>');
document.write ('<form><select name="select1" onChange="selectaplace(this.form)" size=1>');
document.write ('<option value="">Escoja el autor que desea leer:');
document.write ('<option value="tematica/palestina/index.htm">Abu Nimah, Hasan');
document.write ('<option value="tematica/palestina/index.htm">Abunimah, Ali');
document.write ('<option value="adler-max/index.htm">Adler, Max');
document.write ('<option value="adorno/index.htm">Adorno, Theodor');
document.write ('<option value="tematica/mujer/autores/arianzen/index.htm">Arianzen, Catalina');
document.write ('<option value="afanasiev/index.htm">Afanásiev, Víktor');
document.write ('<option value="aldred/index.htm">Aldred, Guy');
document.write ('<option value="alia/index.htm">Alia, Ramiz');
document.write ('<option value="althusser/index.htm">Althusser, Louis');
document.write ('<option value="alvarado/index.htm">Alvarado, Huberto');
document.write ('<option value="andropov/index.htm">Andrópov, Yuri');
document.write ('<option value="arismendi/index.htm">Arismendi, Rodney');
document.write ('<option value="armand/index.htm">Armand, Inessa');
document.write ('<option value="allende/index.htm">Allende, Salvador');
document.write ('<option value="amaguana/index.htm">Amaguaña, Tránsito');
document.write ('<option value="tematica/mujer/amar/index.htm">Amar y Borbón, Josepha');
document.write ('<option value="andrade/index.htm">Andrade, Juan');
document.write ('<option value="andropov/index.htm">Andrópov, Yuri');
document.write ('<option value="arafat/index.htm">Arafat, Yasser');
document.write ('<option value="tematica/elsalvador/tamba/index.htm">Aragón Cabrera, Carlos Francisco');
document.write ('<option value="tematica/mujer/autores/arenal/index.htm">Arenal, Concepción');
document.write ('<option value="tematica/cienpol/index.htm">Aristoteles');
document.write ('<option value="ave-lallemant/index.htm">Avé-Lallemant, Germán');
document.write ('<option value="tematica/mujer/autores/baader/index.htm">Baader, Ottilie');
document.write ('<option value="babeuf/index.htm">Babeuf, Gracchus');
document.write ('<option value="bajtin/index.htm">Bajtin, Mijail');
document.write ('<option value="bakunin/index.htm">Bakunin, Mikhail');
document.write ('<option value="barbusse/index.htm">Barbusse, Henri');
document.write ('<option value="batkis/index.htm">Batkis, Grigorii');
document.write ('<option value="begino/index.htm">Begino, Juana María');
document.write ('<option value="berdyaev/index.htm">Berdyaev, Nikolai');
document.write ('<option value="beslay/index.htm">Beslay, Charles');
document.write ('<option value="bebel/index.htm">Bebel, Auguste');
document.write ('<option value="argala/index.htm">Beñaran Ordeñana, José');
document.write ('<option value="tematica/mujer/autores/berkins/index.htm">Berkins, Lohana');
document.write ('<option value="berkman/index.htm">Berkman, Alexander');
document.write ('<option value="besteiro/index.htm">Besteiro, Julián');
document.write ('<option value="bilbao/index.htm">Bilbao, Esteban');
document.write ('<option value="bland/index.htm">Bland, Bill');
document.write ('<option value="blanqui/index.htm">Blanqui, Auguste');
document.write ('<option value="bloch/index.htm">Bloch, Ernst');
document.write ('<option value="bogdanov/index.htm">Bogdánov, Aleksandr');
document.write ('<option value="bordiga/index.htm">Bordiga, Amadeo');
document.write ('<option value="borojov/index.htm">Borojov, Dov Ber');
document.write ('<option value="bortenstein/index.htm">Bortenstein, Mieczyslaw');
document.write ('<option value="tematica/india/index.htm#bose">Bose, Subhas Chandra');
document.write ('<option value="brendel/index.htm">Brendel, Cajo');
document.write ('<option value="breton/index.htm">Breton, André');
document.write ('<option value="brezhnev/index.htm">Brezhnev, Leonid');
document.write ('<option value="broue/index.htm">Broué, Pierre');
document.write ('<option value="brust/index.htm">Brust, Joachim');
document.write ('<option value="bujarin/index.htm">Bujarin, Nicolas');
document.write ('<option value="burnham/index.htm">Burnham, James');
document.write ('<option value="cacuango/index.htm">Cacuango, Dolores');
document.write ('<option value="cannon/index.htm">Cannon, James P.');
document.write ('<option value="tematica/mujer/autores/campoamor/index.htm">Campoamor, Clara');
document.write ('<option value="tematica/elsalvador/cardenal/index.htm">Cardenal Caldera, Antonio');
document.write ('<option value="tematica/elsalvador/carpio/index.htm">Carpio, Salvador Cayetano');
document.write ('<option value="castoriadis/index.htm">Castoriadis, Cornelius');
document.write ('<option value="castro/index.htm">Castro, Fidel');
document.write ('<option value="caudwell/index.htm">Caudwell, Christopher');
document.write ('<option value="ceausescu/index.htm">Ceau&#351;escu, Nicolae');
document.write ('<option value="cerpa/index.htm">Cerpa Cartolini, Néstor');
document.write ('<option value="cliff/index.htm">Cliff, Tony');
document.write ('<option value="codovilla/index.htm">Codovilla, Victorio');
document.write ('<option value="cooke/index.htm">Cooke, John William');
document.write ('<option value="cornu/index.htm">Cornu, Auguste');
document.write ('<option value="corvalan/index.htm">Corvalán, Luis');
document.write ('<option value="tematica/mujer/autores/cristobal/opresion_y_lucha_de_la_mujer_trabajadora.pdf">Cristóbal, Olga');
document.write ('<option value="tematica/guerrilla/uruguay/mln/cultelli/index.htm">Cultelli, Andrés');
document.write ('<option value="cunhal/index.htm">Cunhal, Álvaro');
document.write ('<option value="cunow/index.htm">Cunow, Heinrich');
document.write ('<option value="chavez/index.htm">Chávez, Hugo');
document.write ('<option value="checa/index.htm">Checa, Pedro');
document.write ('<option value="chen/index.htm">Chen Boda');
document.write ('<option value="chen_duxiu/index.htm">Chen Duxiu');
document.write ('<option value="chen_yun/index.htm">Chen Yun');
document.write ('<option value="chernenko/index.htm">Chernenko, Konstantín');
document.write ('<option value="chernyshevski/index.htm">Chernyshevski, Nikolai');
document.write ('<option value="chernin/index.htm">Chernin, Rose');
document.write ('<option value="tematica/elsalvador/chicas/index.htm">Chicas, Eugenio');
document.write ('<option value="chicherin/index.htm">Chicherin, Georgi V.');
document.write ('<option value="tematica/elsalvador/dalton/index.htm">Dalton, Roque');
document.write ('<option value="damen/index.htm">Damen, Onorato');
document.write ('<option value="darwin/index.htm">Darwin, Charles');
document.write ('<option value="tematica/mujer/autores/datri/index.htm">DAtri, Andrea');
document.write ('<option value="tematica/mujer/autores/davis/index.htm">Davis, Angela Y.');
document.write ('<option value="debs/index.htm">Debs, Eugene');
document.write ('<option value="deng/index.htm">Deng Xiaoping');
document.write ('<option value="tematica/mujer/autores/index.htm">De la Cruz, Juana Inés');
document.write ('<option value="delapuente/index.htm">De la Puente Uceda, Luís');
document.write ('<option value="delprado/index.htm">del Prado, Jorge');
document.write ('<option value="deutsch/index.htm">Deutsch, Leo');
document.write ('<option value="deutscher/index.htm">Deutscher, Isaac');
document.write ('<option value="deville/index.htm">Deville, Gabriel');
document.write ('<option value="di_bartolomeo/index.htm">di Bartolomeo, Nicola');
document.write ('<option value="diaz_martinez/index.htm">Díaz Martínez, Antonio');
document.write ('<option value="diaz/index.htm">Díaz Ramos, José');
document.write ('<option value="dickmann/index.htm">Dickmann, Enrique');
document.write ('<option value="dimitrov/index.htm">Dimitrov, Jorge');
document.write ('<option value="tematica/mujer/autores/dixon/index.htm">Dixon, Marlene');
document.write ('<option value="dobbs/index.htm">Dobbs, Farrell');
document.write ('<option value="drabkina/index.htm">Drabkina, Elisaveta');
document.write ('<option value="draper/index.htm">Draper, Hal');
document.write ('<option value="dunayevskaya/index.htm">Dunayevskaya, Raya');
document.write ('<option value="edelman/index.htm">Edelman, Fanny');
document.write ('<option value="einstein/index.htm">Einstein, Albert');
document.write ('<option value="m-e/index.htm">Engels, Friedrich');
document.write ('<option value="escobar/index.htm">Escobar Zapata, Federico');
document.write ('<option value="fabbri/index.htm">Fabbri, Luigi');
document.write ('<option value="fadeyev/index.htm">Fadéyev, Aleksandr');
document.write ('<option value=""fanon/index.htm">Fanon, Frantz');
document.write ('<option value="farrell_james/index.htm">Farrell, James T.');
document.write ('<option value="faure/index.htm">Faure, Sèbastien');
document.write ('<option value="fava/index.htm">Fava, Athos');
document.write ('<option value="tematica/mujer/autores/federici/2017/0001.htm">Federici, Silvia');
document.write ('<option value="fernandez_oct/index.htm">Fernández Vilchis, Octavio');
document.write ('<option value="figueroa/index.htm">Figueroa, Humilde');
document.write ('<option value="tematica/histsov/fiodorov/index.htm">Fiodorov, Alexei');
document.write ('<option value="fischer/index.htm">Fischer, Ernst');
document.write ('<option value="floresgalindo/index.htm">Flores Galindo, Alberto');
document.write ('<option value="fonseca/index.htm">Fonseca Amador, Carlos');
document.write ('<option value="ford/index.htm">Ford, James W.');
document.write ('<option value="foster/index.htm">Foster, William Z.');
document.write ('<option value="france/index.htm">France, Anatole');
document.write ('<option value="tematica/autores/frencia/index.htm">Frencia, Cintia');
document.write ('<option value="frondizi/index.htm">Frondizi, Silvio');
document.write ('<option value="tematica/mujer/gago/index.htm">Gago, Angie');
document.write ('<option value="tematica/india/organismos/cpi-maoista/ganapathy/2010/enero/0001.htm">Ganapathy');
document.write ('<option value="tematica/india/index.htm#gandhi">Gandhi, Mohandas K.');
document.write ('<option value="tematica/agro/index.htm">García, José M.');
document.write ('<option value="tematica/nepal/index.htm">Gaurav');
document.write ('<option value="ghandy/index.htm">Ghandy, Anuradha');
document.write ('<option value="gherra/index.htm">Gherra, Pepita');
document.write ('<option value="m-e/biografia.htm">Gemkow, Henrich');
document.write ('<option value="ghandy/index.htm">Ghandy, Anuradha');
document.write ('<option valie="glotzer/index.htm">Glotzer, Albert');
document.write ('<option value="tematica/mujer/autores/gogol/index.htm">Gogol, Eugene');
document.write ('<option value="goldman/index.htm">Goldman, Emma');
document.write ('<option value="gonzalez_prada/index.htm">González Prada, Manuel');
document.write ('<option value="goonewardene-leslie/index.htm">Goonewardene, Leslie');
document.write ('<option value="gorbachov/index.htm">Gorbachov, Mikhail');
document.write ('<option value="gorki/index.htm">Gorki, Máxim');
document.write ('<option value="gorkin-a-f/index.htm">Gorkin, Aleksandr');
document.write ('<option value="gorkin/index.htm">Gorkin, Julián');
document.write ('<option value="gorter/index.htm">Gorter, Herman');
document.write ('<option value="tematica/mujer/gouges/index.htm">des Gouges, Olympe');
document.write ('<option value="gramsci/index.htm">Gramsci, Antonio');
document.write ('<option value="tematica/elsalvador/grande/index.htm">Grande, Rutilio');
document.write ('<option value="grant/index.htm">Grant, Ted');
document.write ('<option value="guerin/index.htm">Guérin, Daniel');
document.write ('<option value="guevara/index.htm">Guevara, Ernesto Che');
document.write ('<option value="guillen/index.htm">Guillén, Abraham');
document.write ('<option value="guizot/index.htm">Guizot, Francois');
document.write ('<option value="tematica/palestina/index.htm">Habash, George');
document.write ('<option value="hall/index.htm">Hall, Gus');
document.write ('<option value="hallas/index.htm">Hallas, Duncan');
document.write ('<option value="handal/index.htm">Handal, Schafik Jorge');
document.write ('<option value="harman/index.htm">Harman, Chris');
document.write ('<option value="tematica/mujer/autores/hart/index.htm">Hart, Dana');
document.write ('<option value="harnecker/index.htm">Harnecker, Marta');
document.write ('<option value="haya/index.htm">Haya de la Torre, Víctor Raúl');
document.write ('<option value="hegel/index.htm">Hegel, G. W. F.');
document.write ('<option value="heijenoort/index.htm">van Heijenoort, Jean');
document.write ('<option value="hekmat/index.htm">Hekmat, Mansoor');
document.write ('<option value="heraud/index.htm">Heraud, Javier');
document.write ('<option value="hernandez/index.htm">Hernández Tomás, Jesús');
document.write ('<option value="tematica/agro/index.htm">Heysen, Luis');
document.write ('<option value="hic/index.htm">Hic, Marcel');
document.write ('<option value="ho/index.htm">Ho Chi Minh');
document.write ('<option value="hobbes/index.htm">Hobbes, Thomas');
document.write ('<option value="honecker/index.htm">Honecker, Erich');
document.write ('<option value="feist-honecker/index.htm">Honecker, Margot');
document.write ('<option value="horkheimer/index.htm">Horkheimer, Max');
document.write ('<option value="tematica/econpol/inde.htm">Howell, Peter');
document.write ('<option value="enver/index.htm">Hoxha, Enver');
document.write ('<option value="hu/index.htm">Hu Yaobang');
document.write ('<option value="hua/index.htm">Hua Guofeng');
document.write ('<option value="huartado/index.htm">Hurtado, Ludovico');
document.write ('<option value="hyndman/index.htm">Hyndman, Henry');
document.write ('<option value="ibarruri/index.htm">Ibarruri, Dolores');
document.write ('<option value="ilienkov/index.htm">Ilienkov, Evald');
document.write ('<option value="ingenieros/index.htm">Ingenieros, José');
document.write ('<option value="jackson/index.htm">Jackson, George');
document.write ('<option value="james/index.htm">James, C. L. R.');
document.write ('<option value="jarrin/index.htm">Jarrin, Arturo');
document.write ('<option value="jaures/index.htm">Jaurès, Jean');
document.write ('<option value="tematica/mujer/autores/jeria/index.htm">Jeria, Carmela');
document.write ('<option value="jiang/index.htm">Jiang Qing');
document.write ('<option value="tematica/india/index.htm#jinnah">Jinnah, Muhammad Ali');
document.write ('<option value="just/index.htm">Just, Stéphane');
document.write ('<option value="justo-juanb/index.htm">Justo, Juan B.');
document.write ('<option value="justo/index.htm">Justo, Liborio');
document.write ('<option value="kalinin/index.htm">Kalinin, Mijail');
document.write ('<option value="kapo/index.htm">Kapo, Hysni');
document.write ('<option value="kardelj/index.htm">Kardelj, Edvard');
document.write ('<option value="katayama/index.htm">Katayama, Sen');
document.write ('<option value="kathrada/index.htm">Kathrada, Ahmed');
document.write ('<option value="kautsky/index.htm">Kautsky, Karl');
document.write ('<option value="kautsky-luise/index.htm">Kautsky, Luise');
document.write ('<option value="kaypakkaya/index.htm">Kaypakkaya, Ibrahim');
document.write ('<option value="khrushchev/index.htm">Khrushchev, Nikita');
document.write ('<option value="kim/index.htm">Kim Il Sung');
document.write ('<option value="kim-jong-il/index.htm">Kim Jong Il');
document.write ('<option value="klement/index.htm">Klement, Rudolf');
document.write ('<option value="klingender/index.htm">Klingender, Francis');
document.write ('<option value="kollontai/index.htm">Kollontai, Alejandra');
document.write ('<option value="korsch/index.htm">Korsch, Karl');
document.write ('<option value="kotane/index.htm">Kotane, Moses');
document.write ('<option value="krilenko/index.htm">Krilenko, Nikolai');
document.write ('<option value="kropotkin/index.htm">Kropotkin, Pedro');
document.write ('<option value="krupskaya/index.htm">Krupskaya, Nadezhda');
document.write ('<option value="laufenberg/index.htm">Laufenberg, Heinrich');
document.write ('<option value="labriola/index.htm">Labriola, Antonio');
document.write ('<option value="lacroix-henri/index.htm">Lacroix, Henri');
document.write ('<option value="lafargue/index.htm">Lafargue, Paul');
document.write ('<option value="le-duan/index.htm">Lê Du&#7849;n');
document.write ('<option value="lenin/index.htm">Lenin, V. I.');
document.write ('<option value="lifschitz/index.htm">Lifschitz, Mikhail');
document.write ('<option value="leon/index.htm">Leon, Abraham');
document.write ('<option value="lessner/index.htm">Lessner, Friedrich');
document.write ('<option value="levano/index.htm">Lévano, Delfín');
document.write ('<option value="levi/index.htm">Levi, Paul');
document.write ('<option value="lifujen/index.htm">Li Fu-jen');
document.write ('<option value="li/index.htm">Li Xiannian');
document.write ('<option value="liebknecht/index.htm">Liebknecht, Karl');
document.write ('<option value="liebknecht-w/index.htm">Liebknecht, Wilhelm');
document.write ('<option value="lin/index.htm">Lin Biao');
document.write ('<option value="lister/index.htm">Líster, Enrique');
document.write ('<option value="liu/index.htm">Liu Shaoqi');
document.write ('<option value="lora/index.htm">Lora, Guillermo');
document.write ('<option value="lora-cam/index.htm">Lora Cam, José F. W.');
document.write ('<option value="losovsky/index.htm">Losovsky, Alexandr');
document.write ('<option value="tematica/mujer/lugones/index.htm">Lugones, Leopoldo');
document.write ('<option value="lukacs/index.htm">Lucáks, Georg');
document.write ('<option value="lumumba/index.htm">Lumumba, Patrice');
document.write ('<option value="lunacha/index.htm">Lunacharsky, Anatoly');
document.write ('<option value="luxem/index.htm">Luxemburgo, Rosa');
document.write ('<option value="makarenko/index.htm">Makarenko, Anton');
document.write ('<option value="makhno/index.htm">Makhno, Nestor');
document.write ('<option value="malatesta/index.htm">Malatesta, Errico');
document.write ('<option value="malato/index.htm">Malato, Charles');
document.write ('<option value="malenkov/index.htm">Malenkov, Georgy');
document.write ('<option value="mandel/index.htm">Mandel, Ernest');
document.write ('<option value="mao/index.htm">Mao Zedong');
document.write ('<option value="tematica/mujer/autores/shila/index.htm">Marandi, Shila');
document.write ('<option value="marcuse/index.htm">Marcuse, Herbert');
document.write ('<option value="mares/index.htm">Mares, Pompeyo');
document.write ('<option value="mariateg/index.htm">Mariátegui, José Carlos');
document.write ('<option value="marigh/index.htm">Marighela, Carlos');
document.write ('<option value="tematica/mujer/autores/g_marin/index.htm">Marín, Gladys');
document.write ('<option value="tematica/mujer/autores/marin/index.htm">Marín, Olga Lucía');
document.write ('<option value="mariner-roberto/index.htm">Mariner, Roberto');
document.write ('<option value="markovic/index.htm">Markovic, Dragoslav');
document.write ('<option value="marks-j-b/index.htm">Marks, John Beaver');
document.write ('<option value="marof/index.htm">Marof, Tristán');
document.write ('<option value="martov/index.htm">Mártov, Julius');
document.write ('<option value="m-e/index.htm">Marx, Carlos');
document.write ('<option value="marx-eleanor/index.htm">Marx, Eleanor');
document.write ('<option value="marx-laura/index.htm"> Marx, Laura');
document.write ('<option value="mathiez/index.htm">Mathiez, Albert');
document.write ('<option value="mattick/index.htm">Mattick, Paul');
document.write ('<option value="maurin/index.htm">Maurin, Joaquín');
document.write ('<option value="mazumdar/index.htm">Mazumdar, Charu');
document.write ('<option value="tematica/mujer/autores/meena/index.htm">Meena');
document.write ('<option value="mehring/index.htm">Mehring, Franz');
document.write ('<option value="tematica/elsalvador/melendez/index.htm">Meléndez, Jorge');
document.write ('<option value="melis/index.htm">Mélis, Jean Baptiste');
document.write ('<option value="mella/index.htm">Mella, Julio Antonio');
document.write ('<option value="miasnikov/index.htm">Miasnikov, Gabriel');
document.write ('<option value="mishra/index.htm">Mishra, Vinod');
document.write ('<option value="molotov/index.htm">Molotov, Vyacheslav');
document.write ('<option value="tematica/guerrillas/index.htm">Morales Hernández, José de Jesús');
document.write ('<option value="moreno/index.htm">Moreno, Nahuel');
document.write ('<option value="morgan/index.htm">Morgan, Lewis Henry');
document.write ('<option value="morrow/index.htm">Morrow, Felix');
document.write ('<option value="mosquera/index.htm">Mosquera, Francisco');
document.write ('<option value="munis/index.htm">Munis, Grandizo');
document.write ('<option value="musin/index.htm">Musin, Rashid Musinovich');
document.write ('<option value="tematica/palestina/documentos/externos/pci-2009.htm">Nafah, Mujammad');
document.write ('<option value="naville-pierre/index.htm">Naville, Pierre');
document.write ('<option value="tematica/india/index.htm#nehru">Nehru, Jawaharlal');
document.write ('<option value="ngo/index.htm">Ngo Van Xuyet');
document.write ('<option value="nietzsche/index.htm">Nietzsche, Friedrich');
document.write ('<option value="nin/index.htm">Nin, Andreu');
document.write ('<option value="novack/index.htm">Novack, George');
document.write ('<option value="ossinski/index.htm">Ossinski, Nikolai');
document.write ('<option value="pablo/index.htm">Pablo, Michel');
document.write ('<option value="palacios/index.htm">Palacios, Alfredo');
document.write ('<option value="pannekoek/index.htm">Pannekoek, Anton');
document.write ('<option value="paredes/index.htm">Paredes Macedo, Saturnino');
document.write ('<option value="parvus/index.htm">Parvus, Alexander');
document.write ('<option value="pashukanis/index.htm">Pashukanis, Evgeny');
document.write ('<option value="pauker/index.htm">Pauker, Ana');
document.write ('<option value="tematica/mujer/autores/harnecker/1994/retos.htm">Peña, Lorena');
document.write ('<option value="peredo/index.htm">Peredo, Inti');
document.write ('<option value="peret/index.htm">Péret, Benjamin');
document.write ('<option value="pfempfert/index.htm">Pfempfert, Franz');
document.write ('<option value="piatnitsky/index.htm">Piatnitsky, Osip');
document.write ('<option value="plejanov/index.htm">Plejanov, Georgi');
document.write ('<option value="ponomariov/index.htm">Ponomariov, Boris');
document.write ('<option value="posadas/index.htm">Posadas, J.');
document.write ('<option value="pottier/index.htm">Pottier, Eugene');
document.write ('<option value="poulantzas/index.htm">Poulantzas, Nicos');
document.write ('<option value="tematica/nepal/index.htm">Prachanda');
document.write ('<option value="prager-rodolphe/index.htm">Prager, Rodolophe');
document.write ('<option value="preobrazhenski/index.htm">Preobrazhenski, Eugenio');
document.write ('<option value="pribicevic/index.htm">Pribicevic, Branko');
document.write ('<option value="radek/index.htm">Radek, Karl');
document.write ('<option value="rajkumar-azad/index.htm">Rajkumar, Cherukumi');
document.write ('<option value="rakovski/index.htm">Rakovski, Christian');
document.write ('<option value="ramos/index.htm">Ramos, Jorge Abelardo');
document.write ('<option value="kishenji-rao/index.htm">Rao, Koteshwar');
document.write ('<option value="rappoport/index.htmn">Rappoport, Charles');
document.write ('<option value="real/index.htm">Real, Juan José');
document.write ('<option value="recabarren/index.htm">Recabarren, Luis E.');
document.write ('<option value="reclus/index.htm">Reclus, Élisée');
document.write ('<option value="reed-evelyn/index.htm">Reed, Evelyn');
document.write ('<option value="reed/index.htm">Reed, John');
document.write ('<option value="riazanov/index.htm">Riazanov, David');
document.write ('<option value="rivera/index.htm">Rivera, Enrique');
document.write ('<option value="tematica/cienpol/index.htm">Robespierre, Maximillien');
document.write ('<option value="roces/index.htm">Roces, Wenceslao');
document.write ('<option value="tematica/guerrilla/index.htm">Rodríguez Jaramillo, Antonio');
document.write ('<option value="rosental/index.htm">Rosental, M. M.');
document.write ('<option value="rosmer/index.htm">Rosmer, Alfred');
document.write ('<option value="rousseau/index.htm">Rousseau, Jean-Jacques');
document.write ('<option value="tematica/women/autores/roy/index.htm">Roy, Arundhati');
document.write ('<option value="roy-m-n/index.htm">Roy, Manabendra Nath');
document.write ('<option value="rubin/index.htm">Rubin, Isaak');
document.write ('<option value="tematica/mujer/autores/ruether/index.htm">Ruether, Rosemary Radford')
document.write ('<option value="ruhle/index.htm">Rühle, Otto');
document.write ('<option value="saad/index.htm">Saad, Pedro');
document.write ('<option value="tematica/palestina/said/index.htm">Said, Edward');
document.write ('<option value="tematica/elsalvador/sanchez/index.htm">Sánchez Cerén, Salvador');
document.write ('<option value="tematica/elsalvador/sancho/index.htm">Sancho, Eduardo');
document.write ('<option value="sankara/index.htm">Sankara, Thomas');
document.write ('<option value="tematica/elsalvador/santa_cruz/index.htm">Santa Cruz, Domingo');
document.write ('<option value="santucho/index.htm">Santucho, Mario Roberto');
document.write ('<option value="scheiner/index.htm">Scheiner, Rosa');
document.write ('<option value="tematica/palestina/documentos/schoenman/historiaocultadelsionismo.pdf">Schoenman, Ralph');
document.write ('<option value="sedov/index.htm">Sedov, Leon');
document.write ('<option value="sedova/index.htm">Sedova Trotsky, Natalia');
document.write ('<option value="tematica/mujer/autores/serebrennikov/index.htm">Ser&eacute;brennikov, T.');
document.write ('<option value="serge/index.htm">Serge, Víctor');
document.write ('<option value="shapovalov/index.htm">Shapovalov, Aleksandr');
document.write ('<option value="shiskin/index.htm">Shishkin, A. F.');
document.write ('<option value="sikder/index.htm">Sikder, Siraj');
document.write ('<option value="sison/index.htm">Sison, José María');
document.write ('<option value="slovo/index.htm">Slovo, Joe');
document.write ('<option value="smith_adam/index.htm">Smith, Adam');
document.write ('<option value="sorel/index.htm">Sorel, Georges');
document.write ('<option value="sosnovsky/index.htm">Sosnovsky, Lev');
document.write ('<option value="spector/index.htm">Spector, Maurice');
document.write ('<option value="spilimbergo/index.htm">Spilimbergo, Jorge Enea');
document.write ('<option value="stalin/index.htm">Stalin, José');
document.write ('<option value="stucka/index.htm">Stucka, Peteris');
document.write ('<option value="tematica/palestina/index.htm">Suleiman, Fahd');
document.write ('<option value="ta/index.htm">Ta Thu Thau');
document.write ('<option value="teitelboim/index.htm">Teitelboim, Volodia');
document.write ('<option value="tematica/india/index.htm#tilak">Tilak, Balawant Gangadhar');
document.write ('<option value="tito/index.htm">Tito, Josip Broz');
document.write ('<option value="tokunaga/index.htm">Tokunaga, Sunao');
document.write ('<option value="tematica/mujer/autores/toledo/index.htm">Toledo, Cecilia');
document.write ('<option value="togliatti/index.htm">Togliatti, Palmiro');
document.write ('<option value="camilo/index.htm">Torres, Camilo');
document.write ('<option value="trent/index.htm">Trent, Evelyn');
document.write ('<option value="tretiakov/index.htm">Tretiakov, Sergei');
document.write ('<option value="tristan/index.htm">Tristán, Flora');
document.write ('<option value="troise/index.htm">Troise, Emilio');
document.write ('<option value="trotsky/index.htm">Trotsky, León');
document.write ('<option value="truong/index.htm">Truòng Chinh');
document.write ('<option value="tsintsandze/index.htm">Tsintsandze, Kote');
document.write ('<option value="tugan-baranovsky/index.htm">Tugan-Baranovsky, Mikhail');
document.write ('<option value="ugarte/index.htm">Ugarte, Manuel');
document.write ('<option value="tematica/elsalvador/valladares/index.htm">Valladares, María Marta');
document.write ('<option value="tematica/mujer/autores/vallejo_dowling/index.htm">Vallejo Dowling, Camila');
document.write ('<option value="vallejo/index.htm">Vallejo, César');
document.write ('<option value="van-den-eynde/index.htm">van den Eynde, Arturo');
document.write ('<option value="velasco/index.htm">Velasco Alvarado, Juan');
document.write ('<option value="tematica/elsalvador/villalobos/index.htm">Villalobos, Joaquín');
document.write ('<option value="vinogradskaya/index.htm">Vinogradskaya, Polina');
document.write ('<option value="volin/index.htn">Volin, Boris');
document.write ('<option value="voronsky/index.htm">Voronsky, Aleksandr');
document.write ('<option value="voroshilov/index.htm">Voroshílov, Kliment');
document.write ('<option value="wagner/index.htm">Wagner, Helmut');
document.write ('<option value="wang/index.htm">Wang Hongwen');
document.write ('<option value="westphalen/index.htm">von Westphalen Marx, Jenny');
document.write ('<option value="whyte/index.htm">Whyte, Harry Otter');
document.write ('<option value="widick-bj/index.htm">Widick, BJ');
document.write ('<option value="williams/index.htm">Williams, Albert Rhys');
document.write ('<option value="tematica/mujer/autores/wollstonecraft/index.htm">Wollstonecraft, Mary');
document.write ('<option value="malcolm_x/index.htm">X, Malcolm');
document.write ('<option value="xholi/index.htm">Xholi, Zija');
document.write ('<option value="tematica/econpol/inde.htm">Yaffe, David');
document.write ('<option value="yao/index.htm">Yao Wenyuan');
document.write ('<option value="zasulich/index.htm">Zasulich, Vera');
document.write ('<option value="zetkin/index.htm">Zetkin, Clara');
document.write ('<option value="zhang/index.htm">Zhang Chunqiao');
document.write ('<option value="zhu/index.htm">Zhu De');
document.write ('<option value="zinoviev/index.htm">Zinoviev, Gregory');
document.write ('<option value="tematica/autores/zouroff/index.htm">Zouroff, Vera');
document.write ('<option value="zuluaga/index.htm">Zuluaga Monedero, Gerardo');
document.write ('</select>');
document.write ('</form></td></tr></table>');
}
makeMyMenu();

File diff suppressed because one or more lines are too long

588
tests/encodings/file03.html Normal file
View file

@ -0,0 +1,588 @@
<!DOCTYPE html>
<html lang="fr-FR">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no"/>
<link rel="profile" href="https://gmpg.org/xfn/11" />
<title>Comprendre les cookies</title>
<link rel="shortcut icon" href="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images//ico/favicon.svg" >
<script>
var _paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://stats.data.gouv.fr/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '121']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.type='text/javascript'; g.async=true; g.defer=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<script>
var theme_url = "https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/js/";
var site_url = "https://www.solidarite-numerique.fr";
</script>
<script data-cfasync='false'>
window.$crisp=[];
CRISP_RUNTIME_CONFIG = {
locale : 'fr'
};
CRISP_WEBSITE_ID = '3688d516-e09f-4b8d-a4f6-dc163392dd11';(function(){
d=document;s=d.createElement('script');
s.src='https://client.crisp.chat/l.js';
s.async=1;d.getElementsByTagName('head')[0].appendChild(s);
})();</script>
<!-- This site is optimized with the Yoast SEO plugin v16.1.1 - https://yoast.com/wordpress/plugins/seo/ -->
<title>Les cookies | A quoi servent-ils ? | Solidarité Numérique</title>
<meta name="description" content="Vous souhaitez en savoir plus sur les cookies ? Cette foire aux questions vous aidera à en apprendre davantage sur leurs spécificités." />
<meta name="robots" content="index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1" />
<link rel="canonical" href="https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/" />
<meta property="og:locale" content="fr_FR" />
<meta property="og:type" content="article" />
<meta property="og:title" content="Les cookies | A quoi servent-ils ? | Solidarité Numérique" />
<meta property="og:description" content="Vous souhaitez en savoir plus sur les cookies ? Cette foire aux questions vous aidera à en apprendre davantage sur leurs spécificités." />
<meta property="og:url" content="https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/" />
<meta property="og:site_name" content="Solidarité Numérique" />
<meta property="article:modified_time" content="2021-09-17T11:08:44+00:00" />
<meta property="og:image" content="https://www.solidarite-numerique.fr/wp-content/uploads/2021/09/cookies-logiciel.jpg" />
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:label1" content="Est. reading time">
<meta name="twitter:data1" content="3 minutes">
<script type="application/ld+json" class="yoast-schema-graph">{"@context":"https://schema.org","@graph":[{"@type":"Organization","@id":"https://www.solidarite-numerique.fr/#organization","name":"Solidarit\u00e9 Num\u00e9rique","url":"https://www.solidarite-numerique.fr/","sameAs":[],"logo":{"@type":"ImageObject","@id":"https://www.solidarite-numerique.fr/#logo","inLanguage":"fr-FR","url":"https://www.solidarite-numerique.fr/wp-content/uploads/2021/04/Asset-5@300x.png","contentUrl":"https://www.solidarite-numerique.fr/wp-content/uploads/2021/04/Asset-5@300x.png","width":1615,"height":665,"caption":"Solidarit\u00e9 Num\u00e9rique"},"image":{"@id":"https://www.solidarite-numerique.fr/#logo"}},{"@type":"WebSite","@id":"https://www.solidarite-numerique.fr/#website","url":"https://www.solidarite-numerique.fr/","name":"Solidarit\u00e9 Num\u00e9rique","description":"","publisher":{"@id":"https://www.solidarite-numerique.fr/#organization"},"potentialAction":[{"@type":"SearchAction","target":"https://www.solidarite-numerique.fr/?s={search_term_string}","query-input":"required name=search_term_string"}],"inLanguage":"fr-FR"},{"@type":"ImageObject","@id":"https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/#primaryimage","inLanguage":"fr-FR","url":"https://www.solidarite-numerique.fr/wp-content/uploads/2021/09/cookies-logiciel.jpg","contentUrl":"https://www.solidarite-numerique.fr/wp-content/uploads/2021/09/cookies-logiciel.jpg","width":640,"height":396,"caption":"Photo d'un ordinateur avec en fond d'\u00e9cran des g\u00e2teaux"},{"@type":"WebPage","@id":"https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/#webpage","url":"https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/","name":"Les cookies | A quoi servent-ils ? | Solidarit\u00e9 Num\u00e9rique","isPartOf":{"@id":"https://www.solidarite-numerique.fr/#website"},"primaryImageOfPage":{"@id":"https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/#primaryimage"},"datePublished":"2021-09-20T08:00:47+00:00","dateModified":"2021-09-17T11:08:44+00:00","description":"Vous souhaitez en savoir plus sur les cookies ? Cette foire aux questions vous aidera \u00e0 en apprendre davantage sur leurs sp\u00e9cificit\u00e9s.","breadcrumb":{"@id":"https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/#breadcrumb"},"inLanguage":"fr-FR","potentialAction":[{"@type":"ReadAction","target":["https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/"]}]},{"@type":"BreadcrumbList","@id":"https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"item":{"@type":"WebPage","@id":"https://www.solidarite-numerique.fr/","url":"https://www.solidarite-numerique.fr/","name":"Home"}},{"@type":"ListItem","position":2,"item":{"@type":"WebPage","@id":"https://www.solidarite-numerique.fr/tutoriels/","url":"https://www.solidarite-numerique.fr/tutoriels/","name":"Tutoriels"}},{"@type":"ListItem","position":3,"item":{"@type":"WebPage","@id":"https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/","url":"https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/","name":"Comprendre les cookies"}}]}]}</script>
<!-- / Yoast SEO plugin. -->
<link rel='dns-prefetch' href='//s.w.org' />
<script type="text/javascript">
window._wpemojiSettings = {"baseUrl":"https:\/\/s.w.org\/images\/core\/emoji\/13.0.1\/72x72\/","ext":".png","svgUrl":"https:\/\/s.w.org\/images\/core\/emoji\/13.0.1\/svg\/","svgExt":".svg","source":{"concatemoji":"https:\/\/www.solidarite-numerique.fr\/wp-includes\/js\/wp-emoji-release.min.js?ver=5.6.2"}};
!function(e,a,t){var n,r,o,i=a.createElement("canvas"),p=i.getContext&&i.getContext("2d");function s(e,t){var a=String.fromCharCode;p.clearRect(0,0,i.width,i.height),p.fillText(a.apply(this,e),0,0);e=i.toDataURL();return p.clearRect(0,0,i.width,i.height),p.fillText(a.apply(this,t),0,0),e===i.toDataURL()}function c(e){var t=a.createElement("script");t.src=e,t.defer=t.type="text/javascript",a.getElementsByTagName("head")[0].appendChild(t)}for(o=Array("flag","emoji"),t.supports={everything:!0,everythingExceptFlag:!0},r=0;r<o.length;r++)t.supports[o[r]]=function(e){if(!p||!p.fillText)return!1;switch(p.textBaseline="top",p.font="600 32px Arial",e){case"flag":return s([127987,65039,8205,9895,65039],[127987,65039,8203,9895,65039])?!1:!s([55356,56826,55356,56819],[55356,56826,8203,55356,56819])&&!s([55356,57332,56128,56423,56128,56418,56128,56421,56128,56430,56128,56423,56128,56447],[55356,57332,8203,56128,56423,8203,56128,56418,8203,56128,56421,8203,56128,56430,8203,56128,56423,8203,56128,56447]);case"emoji":return!s([55357,56424,8205,55356,57212],[55357,56424,8203,55356,57212])}return!1}(o[r]),t.supports.everything=t.supports.everything&&t.supports[o[r]],"flag"!==o[r]&&(t.supports.everythingExceptFlag=t.supports.everythingExceptFlag&&t.supports[o[r]]);t.supports.everythingExceptFlag=t.supports.everythingExceptFlag&&!t.supports.flag,t.DOMReady=!1,t.readyCallback=function(){t.DOMReady=!0},t.supports.everything||(n=function(){t.readyCallback()},a.addEventListener?(a.addEventListener("DOMContentLoaded",n,!1),e.addEventListener("load",n,!1)):(e.attachEvent("onload",n),a.attachEvent("onreadystatechange",function(){"complete"===a.readyState&&t.readyCallback()})),(n=t.source||{}).concatemoji?c(n.concatemoji):n.wpemoji&&n.twemoji&&(c(n.twemoji),c(n.wpemoji)))}(window,document,window._wpemojiSettings);
</script>
<style type="text/css">
img.wp-smiley,
img.emoji {
display: inline !important;
border: none !important;
box-shadow: none !important;
height: 1em !important;
width: 1em !important;
margin: 0 .07em !important;
vertical-align: -0.1em !important;
background: none !important;
padding: 0 !important;
}
</style>
<link rel='stylesheet' id='wp-block-library-css' href='https://www.solidarite-numerique.fr/wp-includes/css/dist/block-library/style.min.css?ver=5.6.2' type='text/css' media='all' />
<link rel='stylesheet' id='contact-form-7-css' href='https://www.solidarite-numerique.fr/wp-content/plugins/contact-form-7/includes/css/styles.css?ver=5.2' type='text/css' media='all' />
<style id='contact-form-7-inline-css' type='text/css'>
.wpcf7 .wpcf7-recaptcha iframe {margin-bottom: 0;}.wpcf7 .wpcf7-recaptcha[data-align="center"] > div {margin: 0 auto;}.wpcf7 .wpcf7-recaptcha[data-align="right"] > div {margin: 0 0 0 auto;}
</style>
<link rel='stylesheet' id='animate-css' href='https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/css/animate.css?ver=5.6.2' type='text/css' media='all' />
<link rel='stylesheet' id='main-css' href='https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/css/main.css?ver=5.6.2' type='text/css' media='all' />
<link rel='stylesheet' id='responsive-css' href='https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/css/responsive.css?ver=5.6.2' type='text/css' media='all' />
<link rel='stylesheet' id='slick-css' href='https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/css/slick.css?ver=5.6.2' type='text/css' media='all' />
<link rel='stylesheet' id='addtoany-css' href='https://www.solidarite-numerique.fr/wp-content/plugins/add-to-any/addtoany.min.css?ver=1.15' type='text/css' media='all' />
<link rel='stylesheet' id='learn-press-bundle-css' href='https://www.solidarite-numerique.fr/wp-content/plugins/learnpress/assets/css/bundle.min.css?ver=3.2.8.8' type='text/css' media='all' />
<link rel='stylesheet' id='learn-press-css' href='https://www.solidarite-numerique.fr/wp-content/plugins/learnpress/assets/css/frontend/learnpress.min.css?ver=3.2.8.8' type='text/css' media='all' />
<link rel='stylesheet' id='lp-overlay-css' href='https://www.solidarite-numerique.fr/wp-content/plugins/learnpress/assets/css/frontend/lp-overlay.min.css?ver=3.2.8.8' type='text/css' media='all' />
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-includes/js/jquery/jquery.min.js?ver=3.5.1' id='jquery-core-js'></script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-includes/js/jquery/jquery-migrate.min.js?ver=3.3.2' id='jquery-migrate-js'></script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-content/plugins/add-to-any/addtoany.min.js?ver=1.1' id='addtoany-js'></script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/js/sn_mato.js?ver=5.6.2' id='sn_mato-js'></script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-includes/js/underscore.min.js?ver=1.8.3' id='underscore-js'></script>
<script type='text/javascript' id='utils-js-extra'>
/* <![CDATA[ */
var userSettings = {"url":"\/","uid":"0","time":"1718351134","secure":"1"};
/* ]]> */
</script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-includes/js/utils.min.js?ver=5.6.2' id='utils-js'></script>
<script type='text/javascript' id='lp-global-js-extra'>
/* <![CDATA[ */
var lpGlobalSettings = {"url":"https:\/\/www.solidarite-numerique.fr\/tutoriels\/comprendre-les-cookies\/?thematique=internet","siteurl":"https:\/\/www.solidarite-numerique.fr","ajax":"https:\/\/www.solidarite-numerique.fr\/wp-admin\/admin-ajax.php","theme":"snum-v2","localize":{"button_ok":"OK","button_cancel":"Annuler","button_yes":"Oui","button_no":"Non"},"show_popup_confirm_finish":"yes"};
/* ]]> */
</script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-content/plugins/learnpress/assets/js/global.min.js?ver=3.2.8.8' id='lp-global-js'></script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-content/plugins/learnpress/assets/js/dist/utils.min.js?ver=3.2.8.8' id='lp-utils-js'></script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-content/plugins/learnpress/assets/src/js/vendor/watch.min.js?ver=3.2.8.8' id='watch-js'></script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-content/plugins/learnpress/assets/js/vendor/plugins.all.min.js?ver=3.2.8.8' id='lp-plugins-all-js'></script>
<link rel="https://api.w.org/" href="https://www.solidarite-numerique.fr/wp-json/" /><link rel="alternate" type="application/json" href="https://www.solidarite-numerique.fr/wp-json/wp/v2/tutoriels/5338" /><link rel="EditURI" type="application/rsd+xml" title="RSD" href="https://www.solidarite-numerique.fr/xmlrpc.php?rsd" />
<link rel="wlwmanifest" type="application/wlwmanifest+xml" href="https://www.solidarite-numerique.fr/wp-includes/wlwmanifest.xml" />
<meta name="generator" content="WordPress 5.6.2" />
<link rel='shortlink' href='https://www.solidarite-numerique.fr/?p=5338' />
<link rel="alternate" type="application/json+oembed" href="https://www.solidarite-numerique.fr/wp-json/oembed/1.0/embed?url=https%3A%2F%2Fwww.solidarite-numerique.fr%2Ftutoriels%2Fcomprendre-les-cookies%2F" />
<link rel="alternate" type="text/xml+oembed" href="https://www.solidarite-numerique.fr/wp-json/oembed/1.0/embed?url=https%3A%2F%2Fwww.solidarite-numerique.fr%2Ftutoriels%2Fcomprendre-les-cookies%2F&#038;format=xml" />
<link rel="alternate" href="https://www.solidarite-numerique.fr/tutoriels/comprendre-les-cookies/" hreflang="x-default" />
<script data-cfasync="false">
window.a2a_config=window.a2a_config||{};a2a_config.callbacks=[];a2a_config.overlays=[];a2a_config.templates={};a2a_localize = {
Share: "Partager",
Save: "Enregistrer",
Subscribe: "S'abonner",
Email: "Email",
Bookmark: "Signet",
ShowAll: "Montrer tout",
ShowLess: "Montrer moins",
FindServices: "Trouver des service(s)",
FindAnyServiceToAddTo: "Trouver instantan&eacute;ment des services &agrave; ajouter &agrave;",
PoweredBy: "Propuls&eacute; par",
ShareViaEmail: "Share via email",
SubscribeViaEmail: "Subscribe via email",
BookmarkInYourBrowser: "Ajouter un signet dans votre fureteur",
BookmarkInstructions: "Press Ctrl+D or \u2318+D to bookmark this page",
AddToYourFavorites: "Ajouter &agrave; vos favoris",
SendFromWebOrProgram: "Send from any email address or email program",
EmailProgram: "Email program",
More: "More&#8230;",
ThanksForSharing: "Thanks for sharing!",
ThanksForFollowing: "Thanks for following!"
};
a2a_config.icon_color="#2a2a2a,#ffffff";
(function(d,s,a,b){a=d.createElement(s);b=d.getElementsByTagName(s)[0];a.async=1;a.src="https://static.addtoany.com/menu/page.js";b.parentNode.insertBefore(a,b);})(document,"script");
</script>
<style type="text/css">.recentcomments a{display:inline !important;padding:0 !important;margin:0 !important;}</style>
<!-- <style>
@import url("https://use.typekit.net/bye3yzz.css");
</style> -->
<script>
jQuery(document).ready(function($){
$(document).on('click', '.burger, .burger2', function(e){
e.preventDefault();
if($('.menu-mobile').hasClass('showMenu'))
{
$('.menu-mobile').removeClass('showMenu');
}
else
{
$('.menu-mobile').addClass('showMenu');
}
});
});
</script>
</head>
<!-- class="tutoriels-template-default single single-tutoriels postid-5338 wp-embed-responsive multiple-domain-www-solidarite-numerique-fr" -->
<body >
<div class="inner_content page-demarche">
<div class="cookies">
<div class="container flex politique" id="cookie_politique" >
<p>Nous utilisons des cookies anonymisés afin de vous offrir la meilleure expérience sur notre site.<br><br>Pour en savoir plus sur l’utilisation de nos cookies, vous pouvez consulter notre Politique de confidentialité.</p>
<aside class="flex">
<a href="#" id="agree_cookie">Fermer</a>
<a href="https://www.solidarite-numerique.fr/politique-de-confidentialite" target="_blank">Politique de confidentialité</a>
</aside>
</div>
<div class="container flex">
<!--<p class="for-desk">Je bénéficie d'une aide par téléphone <strong>du lundi au vendredi de 9h00 à 18h00</strong> au <strong><a href="tel:+33170772372">01 70 772 372</a></strong> (au prix d’un appel local)</p>-->
<p class="for-desk"><strong>Le numéro d'appel n'est pas accessible aujourd'hui. Veuillez nous excuser pour la gêne occasionnée.</strong></p>
<!--<p class="for-desk hollyday">
<strong>La plateforme Solidarité Numérique est fermée pendant l'été du 02 au 15 août.</strong> En attendant, vous pouvez toujours trouver de l'aide pour vos démarches en ligne sur ce site. Si vous souhaitez nous contacter, vous pouvez envoyer un mail à l'adresse contact@solidarite-numerique.fr. Nous vous répondrons à notre retour.
</p>-->
<!--<p class="for-mobile">
<strong>Aide par téléphone</strong><br>
<a href="tel: +33170772372 " class="logo">
<strong>01 70 772 372</strong><br><small>(appel non surtaxé)</small>
</a>
</p>-->
<p class="for-mobile">
<strong>Le numéro d'appel n'est pas accessible aujourd'hui. Veuillez nous excuser pour la gêne occasionnée.</strong><br>
</p>
<!--<p class="for-mobile hollyday">
<strong>La plateforme Solidarité Numérique est fermée pendant l'été du 02 au 15 août.</strong>
</p>-->
</div>
<a href="" class="delete"></a>
</div>
<section class="inner_content_container">
<!-- menu -->
<div class="main_menu new_main_menu">
<div class="container flex">
<ul class="flex for-mobile">
<li>
<a href="https://www.solidarite-numerique.fr/recherche" ><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/lens-blue.svg" alt="Rechercher"/></a>
</li>
<li>
<a href="https://www.solidarite-numerique.fr" class="logo"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/logoSN.svg" alt="Solidarité Numérique" class="lg-mob"/></a>
</li>
<li><a href="#" class="burger"></a></li>
</ul>
<div class="menu-mobile">
<a href="" class="closing"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/close.svg" alt="" class="burger2"/></a>
<div class="head-menu-mobile">
<a href="index.html"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/logoSN.svg" alt="Solidarité Numérique" class="lg-mob"/></a>
</div>
<ul>
<li><a href="https://www.solidarite-numerique.fr" class="">Accueil</a></li>
<li><a href="https://www.solidarite-numerique.fr/a-propos" class="" >A propos</a></li>
<li><a href="https://www.solidarite-numerique.fr/thematiques" class="">Thématiques</a></li>
<li><a href="https://www.solidarite-numerique.fr/cartographie" class="">Cartographie</a></li>
<li><a href="https://www.solidarite-numerique.fr/recherche" class="">Recherche</a></li>
</ul>
</div>
<ul class="flex for-desk">
<li>
<a href="https://www.solidarite-numerique.fr" class="logo"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/logoSN.svg" alt="Solidarité Numérique" class="lg-mob"/></a>
</li>
<!-- <li class="vocal"><a href="vocal.html">&nbsp;</a></li> -->
<li class="home "><a href="https://www.solidarite-numerique.fr">Accueil</a></li>
<li class="about "><a href="https://www.solidarite-numerique.fr/a-propos" >A propos</a></li>
<li class="them "><a href="https://www.solidarite-numerique.fr/thematiques">Thématiques</a></li>
<li class="cart "><a href="https://www.solidarite-numerique.fr/cartographie">Cartographie</a></li>
<li class="search "><a href="https://www.solidarite-numerique.fr/recherche">Recherche</a></li>
</ul>
</div>
<div id="box_load_pdf" style="text-align: center; display: none; background: #f4e891;">
<form id="form_gen_pdf" style="display: none;">
<input type="hidden" name="id_post" value="5338">
</form>
<a id="loading_pdf" target="_blank"></a>
</div>
</div>
<div class="main_menu new_main_menu blue-menu">
<div class="container flex for-desk long-menu">
<ul id="menu-menu-principal" class="flex"><li id="menu-item-2414" class="menu-item menu-item-type-taxonomy menu-item-object-thematiques current-tutoriels-ancestor menu-item-2414"><a href="https://www.solidarite-numerique.fr/thematiques/premiers-pas/">Premiers pas</a></li>
<li id="menu-item-2791" class="menu-item menu-item-type-taxonomy menu-item-object-thematiques menu-item-2791"><a href="https://www.solidarite-numerique.fr/thematiques/sante/">Santé</a></li>
<li id="menu-item-2330" class="menu-item menu-item-type-taxonomy menu-item-object-thematiques menu-item-2330"><a href="https://www.solidarite-numerique.fr/thematiques/communiquer/">Communiquer</a></li>
<li id="menu-item-2332" class="menu-item menu-item-type-taxonomy menu-item-object-thematiques menu-item-2332"><a href="https://www.solidarite-numerique.fr/thematiques/demarches-administratives/">Démarches administratives</a></li>
<li id="menu-item-3665" class="menu-item menu-item-type-taxonomy menu-item-object-thematiques menu-item-3665"><a href="https://www.solidarite-numerique.fr/thematiques/education-et-formation/">Education et formation</a></li>
<li id="menu-item-3666" class="menu-item menu-item-type-taxonomy menu-item-object-thematiques menu-item-3666"><a href="https://www.solidarite-numerique.fr/thematiques/au-quotidien/">Au quotidien</a></li>
</ul> </div>
</div>
<!--action-->
<!-- <section class="action ressources clr wow fadeIn" data-wow-delay="0s" data-wow-duration="1s" data-wow-offset="1"> -->
<section id="cnt_master" class="action ressources clr wow fadeIn" data-wow-delay="0s" data-wow-duration="1s" data-wow-offset="1">
<div class="container">
<div class="sommaire">
<div class="som-control">
<h4>Sommaire</h4>
<a href="#" class="som-link desable"><span>Réduire</span><span>Agrandir</span></a>
</div>
<ul class="share-btn a2a_kit addtoany_list">
<li><a class="a2a_button_facebook" href="/#facebook"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/facebooksharemain.svg" alt=""></a></li>
<li><a class="a2a_button_linkedin" href="/#linkedin"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/linkedinsharemain.svg" alt=""></a></li>
<li><a class="a2a_button_twitter" href="/#twitter"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/twittersharemain.svg" alt=""></a></li>
<li><a class="a2a_button_copy_link" href="/#copy_link"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/bloc sommaire/copylinksmall.svg" alt=""></a></li>
<li><a class="a2a_button_email" href="/#email"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/bloc sommaire/emailshare.svg" alt=""></a></li>
<li><a class="save_print" href="#"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/Iconprintmain.svg" alt=""></a>
</li>
</ul>
<form id="form_gen_pdf" style="display: none;">
<input type="hidden" name="id_post" value="5338">
</form>
<ul class="sommaire-lnk"><li><a class="scroll" href="#a-quoi-sert-un-cookie">À quoi sert un cookie ?</a></li><li><a class="scroll" href="#que-dit-la-loi-sur-lutilisation-des-cookies">Que dit la loi sur l'utilisation des cookies ?</a></li><li><a class="scroll" href="#puis-je-parametrer-ou-bloquer-lutilisation-des-cookies">Puis-je paramétrer ou bloquer l'utilisation des cookies ?</a></li></ul>
</div>
<div class="text-content">
<article class="flex">
<aside class="highlight demarche">
<figure>
<img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/user-jaune.svg" alt="">
</figure>
<div class="content-highlight">
<ul class="breadcrumb">
<li><a href="https://www.solidarite-numerique.fr"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/home.svg" alt=""></a></li>
<li><a href="https://www.solidarite-numerique.fr/thematiques">Thématiques</a></li>
<li><a href="https://www.solidarite-numerique.fr/thematiques/premiers-pas/">Premiers pas</a></li><li><a href="https://www.solidarite-numerique.fr/thematiques/internet/">Internet</a></li> </ul>
<h5>Comprendre les cookies</h5>
</div>
</aside>
</article>
<p class="date-pub"><em>Dernière modification : 17 septembre 2021</em></p>
<div class="sharing">
<article class="a2a_kit addtoany_list">
<span>Partager</span>
<a class="a2a_button_facebook" href="/#facebook"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/facebooksharemain.svg" alt=""></a>
<a class="a2a_button_linkedin" href="/#linkedin"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/linkedinsharemain.svg" alt=""></a>
<a class="a2a_button_twitter" href="/#twitter"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/twittersharemain.svg" alt=""></a>
<a class="a2a_button_copy_link" href="/#copy_link"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/copylinkmain.svg" alt=""></a>
<a class="a2a_button_email" href="/#email"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/bloc sommaire/emailshare.svg" alt=""></a>
</article>
<article>
<span><a class="save_print" href="#"><span class="for-desk">Enregistrer / Imprimer</span><span class="for-mobile">Enregistrer</span></a></span>
<a class="save_print" href="#"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/Iconprintmain.svg" alt=""></a>
</article>
</div>
<div class="important">
<p>Vous souhaitez en savoir plus sur les cookies ? Cette foire aux questions vous aidera à comprendre et à en apprendre davantage sur leurs spécificités.</p>
</div>
<p class="tags">Tags : <a href="https://www.solidarite-numerique.fr/tag/confidentialite/">#Confidentialité</a> <a href="https://www.solidarite-numerique.fr/tag/cookies/">#Cookies</a> <a href="https://www.solidarite-numerique.fr/tag/donneesdenavigation/">#DonnéesDeNavigation</a> <a href="https://www.solidarite-numerique.fr/tag/donneespersonnelles/">#DonnéesPersonnelles</a> <a href="https://www.solidarite-numerique.fr/tag/rgpd/">#RGPD</a></p>
<h3>Prérequis</h3>
<div class="prerequis">
<ul>
<li>Maîtriser la navigation sur Internet</li>
</ul>
</div>
<!-- Contenu -->
<h5><img class="alignnone wp-image-5544 " src="https://www.solidarite-numerique.fr/wp-content/uploads/2021/09/cookies-logiciel.jpg" alt="Photo d'un ordinateur sur une table avec en fond d'écran des gâteaux" width="600" height="371" /></h5>
<p>Les cookies sont des petits fichiers informatiques déposés par le site Internet que vous visitez. Il s'installe sur votre ordinateur, tablette ou smartphone par le <a href="https://www.solidarite-numerique.fr/tutoriels/decouvrir-les-principaux-navigateurs-web/" target="_blank" rel="noopener">navigateur web</a>.</p>
<h5>Questions - réponses</h5>
<h2 id="a-quoi-sert-un-cookie">À quoi sert un cookie ?</h2>
<p><strong>4 types de cookies les plus courants</strong> :</p>
<ul>
<li><strong>Nécessaires </strong>: pour le bon fonctionnement du site Internet. Ils <strong>conservent des données pour simplifier votre navigation</strong>. Par exemple, les sites les utilisent pour mémoriser le panier sur un site de commerce en ligne ou pour authentifier directement l'internaute. <strong>Ils sont nécessaires au bon fonctionnement d'un site Internet. </strong>Ils personnalisent également l'expérience de l'utilisateur sur un site Internet, en mémorisant ses préférences (sans consentement).</li>
<li><strong>Statistiques</strong> : pistent un utilisateur pour mesurer l'audience afin de réaliser des statistiques de fréquentation d'un site Internet.</li>
<li><strong>Internes : </strong>pour connaître votre comportement et proposer de la publicité ciblée</li>
<li><strong>Tierces parties</strong> : déposés par un site différent du site que vous visitez. Généralement utilisés par des régies publicitaires pour adapter l'affichage de leurs publicités, ou bien par les réseaux sociaux (Facebook par exemple)</li>
</ul>
<h2 id="que-dit-la-loi-sur-lutilisation-des-cookies">Que dit la loi sur l'utilisation des cookies ?</h2>
<p>Ceux nécessaires au bon fonctionnement d'un site Internet ne nécessitent pas le consentement de l'utilisateur.<br />
<strong>Ceux utilisés par les réseaux sociaux, ou à des fins publicitaires, doivent faire l'objet d'un consentement préalable de l'utilisateur. </strong>C'est le <strong>´le des bandeaux affichés lors de l'ouverture d'un site Internet.<br />
Il doit respecter les conditions du Règlement Général sur la Protection des Données (RGPD)</strong> : libre, spécifique, éclairé, univoque, et doit pouvoir être retiré à tout moment. </p>
<p>Certains sites proposent de payer un abonnement mensuel contre l'absence d'utilisation de cookies. Ce type de dispositif profite d'un flou juridique. La Commission Nationale Informatique et Liberté (CNIL) évalue au cas par cas la légalité de ces dispositifs.</p>
<p><iframe src="//www.youtube.com/embed/oYGEXD8oTUE" width="600" height="336" allowfullscreen="allowfullscreen"></iframe></p>
<h2 id="puis-je-parametrer-ou-bloquer-lutilisation-des-cookies">Puis-je paramétrer ou bloquer l'utilisation des cookies ?</h2>
<p>Vous pouvez les paramétrer, voire les bloquer directement sur le navigateur Internet, ou encore par l'intermédiaire du bandeau proposé par les sites Internet.</p>
<h3>Depuis le navigateur Internet</h3>
<p>Les navigateurs Internet permettent de paramétrer le pistage des sites Internet :</p>
<ul>
<li><strong>Microsoft Edge :</strong> cliquez sur <strong>Paramètres</strong> &gt;<strong> Cookies et autorisations de site </strong>&gt;<strong> Gérer et supprimer les cookies et les données du site</strong></li>
<li><strong>Google Chrome : </strong>cliquez sur <strong>Paramètres </strong>&gt;<strong> Confidentialité et sécurité </strong>&gt;<strong> Cookies et autres données des sites</strong></li>
<li><strong>Mozilla Firefox : </strong>dans les <strong>Paramètres </strong>&gt;<strong> Vie privée et sécurité </strong>&gt;<strong> Protection renforcée contre le pistage</strong>, ainsi que <strong>Cookies et données de site</strong></li>
</ul>
<p><div class="prerequis savoir"><p><strong>À SAVOIR</strong></p>
<ul>
<li><em>Le paramétrage au niveau du navigateur bloque les cookies techniques nécessaires au bon fonctionnement des sites Internet : cette configuration doit être effectué avec prudence.<br />
</em></li>
<li><em><strong>La navigation en mode privée empêche le dépôt de cookies par le navigateur Internet</strong>, mais ne bloque pas forcément les autres moyens de pistage d&#8217;un site (pixel espion, collecte d&#8217;informations du navigateur&#8230;).</em></li>
<li><em>Pour supprimer des cookies déjà déposés sur votre appareil, consultez le tutoriel à ce sujet : </em><a href="https://www.solidarite-numerique.fr/tutoriels/supprimer-les-cookies-sur-les-navigateurs-firefox-chrome-edge/" target="_blank" rel="noopener"><em>Supprimer les cookies sur les navigateurs Firefox, Chrome et Edge</em>.</a></li>
</ul>
</div></p>
<h3>Depuis le bandeau de connexion sur le site Internet</h3>
<p>Les sites Internet qui les emploient à des fins de pistage sont dans l'obligation de proposer aux utilisateurs d'accepter, refuser ou paramétrer l'utilisation de ces cookies de manière consentie, libre, et éclairée. <br />
Quand vous accédez à un site, n'hésitez pas à cliquer sur : <strong>Continuer sans accepter </strong>ou<strong> Refuser</strong>, placé en haut ou en bas du bandeau.<br />
Sur notre exemple, l'indication se trouve en plus petit, en bas à gauche.</p>
<p><img class="alignnone wp-image-5304 " src="https://www.solidarite-numerique.fr/wp-content/uploads/2021/08/bandeau-cookie-leboncoin.png" alt="Bandeau de paramétrage des cookies du site Le Bon Coin" width="600" height="541" /></p>
<p>Si cette mention n'apparait pas, sélectionnez ceux que vous souhaitez autoriser ou bloquer en cliquant sur <strong>Personnaliser</strong>. En fonction des sites Internet, l'affichage des options de personnalisation peuvent varier.</p>
<p><img class="alignnone wp-image-5306 " src="https://www.solidarite-numerique.fr/wp-content/uploads/2021/08/bandeau-cookie-leboncoin-parametre.png" alt="Bandeau de paramétrage détaillé des cookies du site Le Bon Coin" width="600" height="819" /></p>
<p>&nbsp;</p>
<h5>Liens Utiles</h5>
<div class="tags"><p><a href="https://www.cnil.fr/fr/definition/cookie" target="_blank" rel="noopener">Définition des cookies par la CNIL</a></p>
<p><a href="https://www.cnil.fr/fr/cookies-et-traceurs-que-dit-la-loi" target="_blank" rel="noopener">Cookies et traceurs : que dit la loi ?</a></p>
<p><a href="https://www.numerama.com/tech/701279-payer-pour-eviter-les-cookies-publicitaires-est-ce-legal.html" target="_blank" rel="noopener">Payer pour éviter les cookies publicitaires, est-ce légal ?</a></p>
</div>
<h5>Licence</h5>
<p class="licence"> <em>Ce tutoriel est mis à disposition sous les termes de la Licence Ouverte 2.0 (ou cc by SA) Ce tutoriel a été produit dans le cadre du projet Solidarité Numérique. L’objectif est d’accompagner les citoyens dans leurs besoins numériques. Tous les éléments reproduits dans les captures d’écran sont la propriété des sites desquels ils sont tirés.</em></p>
<p class="date-pub"><em>Dernière modification : 17 septembre 2021</em></p>
<div class="sharing">
<article class="a2a_kit addtoany_list">
<span>Partager</span>
<a class="a2a_button_facebook" href="/#facebook"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/facebooksharemain.svg" alt=""></a>
<a class="a2a_button_linkedin" href="/#linkedin"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/linkedinsharemain.svg" alt=""></a>
<a class="a2a_button_twitter" href="/#twitter"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/twittersharemain.svg" alt=""></a>
<a class="a2a_button_copy_link" href="/#copy_link"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/copylinkmain.svg" alt=""></a>
<a class="a2a_button_email" href="/#email"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/bloc sommaire/emailshare.svg" alt=""></a>
</article>
<article>
<span><a class="save_print" href="#"><span class="for-desk">Enregistrer / Imprimer</span><span class="for-mobile">Enregistrer</span></a></span>
<a class="save_print" href="#"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/page tutoriel/corps d'article/Iconprintmain.svg" alt=""></a>
</article>
</div>
</div>
</div>
</section>
<section class="action ressources clr wow fadeIn tutos_thematique" data-wow-delay="0s" data-wow-duration="1s" data-wow-offset="1">
<div class="container">
<h4>Les tutoriels sur la<span> même thématique</span></h4>
<article class="flex">
<aside onclick="document.location.href='https://www.solidarite-numerique.fr/tutoriels/supprimer-les-cookies-sur-les-navigateurs-firefox-chrome-edge/?thematique=internet';" class="internet">
<figure>
<img src="https://www.solidarite-numerique.fr/wp-content/uploads/2021/02/icoÌne-deÌ<65>buter-SolidariteÌ<65>-NumeÌ<65>rique_1@300x.png" alt="image">
<h5>Supprimer les cookies sur les navigateurs Firefox, Chrome, Edge</h5>
</figure>
<span>Internet</span>
<p>Dans certains cas, effacer les cookies de son navigateur est nécessaire. Ce tutoriel vous explique comment procéder.... <a href="https://www.solidarite-numerique.fr/tutoriels/supprimer-les-cookies-sur-les-navigateurs-firefox-chrome-edge/?thematique=internet">lire la suite</a></p>
</aside>
<aside onclick="document.location.href='https://www.solidarite-numerique.fr/tutoriels/decouvrir-les-principaux-moteurs-de-recherche/?thematique=internet';" class="internet">
<figure>
<img src="https://www.solidarite-numerique.fr/wp-content/uploads/2021/02/icoÌne-deÌ<65>buter-SolidariteÌ<65>-NumeÌ<65>rique_1@300x.png" alt="image">
<h5>Découvrir les principaux moteurs de recherche</h5>
</figure>
<span>Internet</span>
<p>Vous souhaitez en savoir plus sur les moteurs de recherche ? Ce panorama vous présente les principaux avec leurs spécificités.... <a href="https://www.solidarite-numerique.fr/tutoriels/decouvrir-les-principaux-moteurs-de-recherche/?thematique=internet">lire la suite</a></p>
</aside>
<aside onclick="document.location.href='https://www.solidarite-numerique.fr/tutoriels/installer-et-utiliser-firefox-sous-windows/?thematique=internet';" class="internet">
<figure>
<img src="https://www.solidarite-numerique.fr/wp-content/uploads/2021/02/icoÌne-deÌ<65>buter-SolidariteÌ<65>-NumeÌ<65>rique_1@300x.png" alt="image">
<h5>Installer et utiliser Firefox sous Windows</h5>
</figure>
<span>Internet</span>
<p>Vous souhaitez changer de navigateur et utiliser Firefox ? Ce tutoriel vous détaille la procédure d'installation et la configuration pour une premiÃ... <a href="https://www.solidarite-numerique.fr/tutoriels/installer-et-utiliser-firefox-sous-windows/?thematique=internet">lire la suite</a></p>
</aside>
</article>
<p><a href="https://www.solidarite-numerique.fr/tutoriels" class="see-more">Voir plus</a></p>
</div>
</section>
<section class="action ressources clr wow fadeIn autres_thematique" data-wow-delay="0s" data-wow-duration="1s" data-wow-offset="1">
<div class="container">
<h4>Les autres thématiques</h4>
<div class="" id="slider-them">
<aside onclick="document.location.href='https://www.solidarite-numerique.fr/thematiques/au-quotidien/';" class="debuter item">
<div>
<figure>
<img src="https://www.solidarite-numerique.fr/wp-content/uploads/2021/04/IcoÌne-Sinformer-SolidariteÌ<65>-NumeÌ<65>rique_1@300x.png" alt="image">
</figure>
<span>Au quotidien<br><small>20 tutoriels</small></span>
</div>
<div>
<p><p>Vous recherchez de l'aide pour d'autres actions et démarches numériques du quotidien ? Ce dossier est fait pour vous ! Vous y trouverez des resso...... <a href="https://www.solidarite-numerique.fr/thematiques/au-quotidien/">lire la suite</a></p>
</div>
</aside>
<aside onclick="document.location.href='https://www.solidarite-numerique.fr/thematiques/communiquer/';" class="debuter item">
<div>
<figure>
<img src="https://www.solidarite-numerique.fr/wp-content/uploads/2021/02/icoÌne-Communiquer-SolidariteÌ<65>-NumeÌ<65>rique_2@300x.png" alt="image">
</figure>
<span>Communiquer<br><small>20 tutoriels</small></span>
</div>
<div>
<p>Vous souhaitez envoyer un email ou même réaliser un appel visio, mais vous ne savez pas comment vous y prendre ?
Ce dossier vous donne les bases p...... <a href="https://www.solidarite-numerique.fr/thematiques/communiquer/">lire la suite</a></p>
</div>
</aside>
<aside onclick="document.location.href='https://www.solidarite-numerique.fr/thematiques/demarches-administratives/';" class="debuter item">
<div>
<figure>
<img src="https://www.solidarite-numerique.fr/wp-content/uploads/2021/02/IcoÌne-DeÌ<65>marches-SolidariteÌ<65>-NumeÌ<65>rique_1@300x.png" alt="image">
</figure>
<span>Démarches administratives<br><small>22 tutoriels</small></span>
</div>
<div>
<p><p>Vous avez besoin d'aide pour vos démarches administratives ?<br />
Vous trouverez dans ce dossier toutes les réponses pour réaliser vos opérat...... <a href="https://www.solidarite-numerique.fr/thematiques/demarches-administratives/">lire la suite</a></p>
</div>
</aside>
<aside onclick="document.location.href='https://www.solidarite-numerique.fr/thematiques/education-et-formation/';" class="debuter item">
<div>
<figure>
<img src="https://www.solidarite-numerique.fr/wp-content/uploads/2021/04/IcoÌne-Apprendre-SolidariteÌ<65>-NumeÌ<65>rique_1@300x.png" alt="image">
</figure>
<span>Education et formation<br><small>6 tutoriels</small></span>
</div>
<div>
<p><p>Vous rencontrez des difficultés à faire l'école à la maison pour vos enfants ? Ce dossier vous présente des ressources éducatives ainsi que d...... <a href="https://www.solidarite-numerique.fr/thematiques/education-et-formation/">lire la suite</a></p>
</div>
</aside>
<aside onclick="document.location.href='https://www.solidarite-numerique.fr/thematiques/premiers-pas/';" class="debuter item">
<div>
<figure>
<img src="https://www.solidarite-numerique.fr/wp-content/uploads/2021/02/icoÌne-deÌ<65>buter-SolidariteÌ<65>-NumeÌ<65>rique_1@300x.png" alt="image">
</figure>
<span>Premiers pas<br><small>33 tutoriels</small></span>
</div>
<div>
<p><p>Vous débutez dans votre apprentissage de l'informatique ?<br />
Ce dossier est fait pour vous ! Vous y trouverez des ressources qui vous guideron...... <a href="https://www.solidarite-numerique.fr/thematiques/premiers-pas/">lire la suite</a></p>
</div>
</aside>
<aside onclick="document.location.href='https://www.solidarite-numerique.fr/thematiques/sante/';" class="debuter item">
<div>
<figure>
<img src="https://www.solidarite-numerique.fr/wp-content/uploads/2021/02/icoÌne-SanteÌ<65>-SolidariteÌ<65>-NumeÌ<65>rique_2@300x.png" alt="image">
</figure>
<span>Santé<br><small>15 tutoriels</small></span>
</div>
<div>
<p>Vous souhaitez prendre rendez-vous avec un professionnel de santé, mais les prises de rendez-vous en ligne ou les consultations à distance vous semb...... <a href="https://www.solidarite-numerique.fr/thematiques/sante/">lire la suite</a></p>
</div>
</aside>
</div>
</div>
</section>
<!--/action ressources clr-->
</section>
<!--Footer-->
<footer class="wow fadeInUp" data-wow-delay=".5s" data-wow-duration="1s" data-wow-offset="1">
<div class="container flex">
<a href="https://www.solidarite-numerique.fr" class="logo"><img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/logoSN-footer.svg" alt="Solidarité Numérique"/></a>
<ul><li id="menu-item-2417" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-home menu-item-2417"><a href="https://www.solidarite-numerique.fr/">Accueil</a></li>
<li id="menu-item-4629" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-4629"><a href="https://www.solidarite-numerique.fr/a-propos/">A propos</a></li>
<li id="menu-item-4630" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-4630"><a href="https://www.solidarite-numerique.fr/contact/">Contact</a></li>
<li id="menu-item-2418" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-2418"><a href="https://www.solidarite-numerique.fr/cartographie/">Cartographie</a></li>
<li id="menu-item-2421" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-2421"><a href="https://www.solidarite-numerique.fr/thematiques/">Thématiques</a></li>
<li id="menu-item-2419" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-2419"><a href="https://www.solidarite-numerique.fr/mentions-legales/">Mentions légales</a></li>
<li id="menu-item-2420" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-2420"><a href="https://www.solidarite-numerique.fr/politique-de-confidentialite/">Politique de <br> confidentialité</a></li>
</ul>
</div>
</footer>
<div class="last-footer">
<div class="container flex">
<img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/MN.svg" alt=""/>
<img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/RF.svg" alt=""/>
<img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/FR.svg" alt=""/>
<img src="https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/images/SE.svg" alt=""/>
</div>
</div>
<!--/Footer-->
<script type='text/javascript' id='contact-form-7-js-extra'>
/* <![CDATA[ */
var wpcf7 = {"apiSettings":{"root":"https:\/\/www.solidarite-numerique.fr\/wp-json\/contact-form-7\/v1","namespace":"contact-form-7\/v1"}};
/* ]]> */
</script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-content/plugins/contact-form-7/includes/js/scripts.js?ver=5.2' id='contact-form-7-js'></script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/js/slick.js?ver=1' id='slick-js'></script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/js/custom.js?ver=1' id='custom-js'></script>
<script type='text/javascript' id='pdf_print-js-extra'>
/* <![CDATA[ */
var ajaxurl = "https:\/\/www.solidarite-numerique.fr\/wp-admin\/admin-ajax.php";
/* ]]> */
</script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-content/themes/snum-v2/js/pdf_print.js?ver=1' id='pdf_print-js'></script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-content/plugins/learnpress/assets/js/frontend/course.min.js?ver=3.2.8.8' id='course-js'></script>
<script type='text/javascript' src='https://www.solidarite-numerique.fr/wp-includes/js/wp-embed.min.js?ver=5.6.2' id='wp-embed-js'></script>
<!-- autocomplete -->
</section><!--/inner_content_container -->
</div>
</body>
</html>

10
tests/encodings/file04.js Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,27 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="gb2312">
<title>Test website</title>
<link rel="apple-touch-icon" sizes="180x180" href="./icons/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="./icons/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="./icons/favicon-16x16.png">
<link rel="manifest" href="./icons/site.webmanifest">
<link rel="shortcut icon" href="./icons/favicon.ico">
<link rel="stylesheet" href="./css-broken-file.css">
</head>
<body>
<h2>Chinese encoding</h2>
<p>
<img src="http://www.mediaelementjs.com/images/big_buck_bunny.jpg">
</p>
<p>This file has been encoded with Simplified Chinese gb2312 encoding and simplified chinese characters: ºº×Ö.</p>
</body>
</html>

View file

@ -0,0 +1,26 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>Test website</title>
<link rel="apple-touch-icon" sizes="180x180" href="./icons/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="./icons/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="./icons/favicon-16x16.png">
<link rel="manifest" href="./icons/site.webmanifest">
<link rel="shortcut icon" href="./icons/favicon.ico">
<link rel="stylesheet" href="./css-broken-file.css">
</head>
<body>
<h2>Chinese encoding</h2>
<p>
<img src="http://www.mediaelementjs.com/images/big_buck_bunny.jpg">
</p>
<p>This file has been encoded with Simplified Chinese gb2312 encoding and simplified chinese characters: ºº×Ö.</p>
</body>
</html>

11
tests/encodings/file08.js Normal file

File diff suppressed because one or more lines are too long

View file

@ -38,6 +38,7 @@ def rewrite_generator():
set(),
set(),
set(),
["UTF-8", "ISO-8859-1"],
).rewrite(Template(""), Template(""))
yield generate_and_call

View file

@ -1,4 +1,7 @@
import json
from collections.abc import Generator
from dataclasses import dataclass
from pathlib import Path
import pytest
@ -25,7 +28,9 @@ class EncodedForTest:
@pytest.fixture(
params=[
"Simple ascii content",
"A content with non ascii char éœo€ð",
"A content with non ascii chars éœo€ð",
"Latin1 contént",
"Latin2 conteňt",
"这是中文文本", # "This is a chinese text" (in chinese)
]
)
@ -40,6 +45,7 @@ def content(request):
"utf-16",
"utf-32",
"latin1",
"latin2",
"gb2312",
"gbk",
]
@ -53,192 +59,38 @@ def simple_encoded_content(content, encoding):
return EncodedForTest(content, encoding)
def test_decode(simple_encoded_content):
def test_decode_http_header(simple_encoded_content):
if not simple_encoded_content.valid:
# Nothing to test
return
result = to_string(simple_encoded_content.encoded, None)
assert result.value == simple_encoded_content.content
assert result.encoding
assert not result.chars_ignored
@pytest.fixture(
params=[
"ascii",
"utf-8",
"utf-16",
"utf-32",
"latin1",
"gb2312",
"gbk",
"wrong-encoding",
]
)
def declared_encoding(request):
return request.param
# This is a set of content/encoding/decoding that we know to fail.
# For exemple, the content "Simple ascii content" encoded using ascii, can be decoded
# by utf-16. However, it doesn't mean that it decoded it correctly.
# In this case, "utf-16" decoded it as "...."
# And this combination is not only based on the tuple encoding/decoding.
# The content itself may inpact if we can decode the bytes, and so if we try heuristics
# or not. No real choice to maintain a dict of untestable configuration.
FAILING_DECODE_COMBINATION = {
# Encodings/decodings failing for simple ascii content
"Simple ascii content": {
# Decoding failing for simple ascii content encoded in ascii
"ascii": ["utf-16"],
# Decoding failing for simple ascii content encoded in utf-8
"utf-8": [
"utf-16",
"utf-32",
"gb2312",
"gbk",
],
"utf-16": ["latin1"],
"utf-32": ["utf-16", "latin1"],
"latin1": ["utf-16"],
"gb2312": ["utf-16"],
"gbk": ["utf-16"],
},
"A content with non ascii char éœo€ð": {
"ascii": [],
"utf-8": ["utf-16", "latin1"],
"utf-16": ["latin1"],
"utf-32": ["utf-16", "latin1"],
"latin1": [],
"gb2312": [],
"gbk": [],
},
"这是中文文本": {
"ascii": [],
"utf-8": ["utf-16", "latin1"],
"utf-16": ["latin1"],
"utf-32": ["utf-16", "latin1"],
"latin1": [],
"gb2312": ["utf-16", "latin1"],
"gbk": ["utf-16", "latin1"],
},
}
assert (
to_string(simple_encoded_content.encoded, simple_encoded_content.encoding, [])
== simple_encoded_content.content
)
@dataclass
class DeclaredEncodedForTest(EncodedForTest):
declared_encoding: str
correct: bool
def __init__(self, content: str, encoding: str, declared_encoding: str):
super().__init__(content, encoding)
self.declared_encoding = declared_encoding
self.correct = self.valid
if (
self.valid
and content in FAILING_DECODE_COMBINATION
and declared_encoding in FAILING_DECODE_COMBINATION[content][encoding]
):
self.correct = False
class DeclaredHtmlEncodedForTest(EncodedForTest):
def __init__(self, content: str, encoding: str):
html_content = f'<html><meta charset="{encoding}"><body>{content}</body></html>'
super().__init__(html_content, encoding)
@pytest.fixture
def declared_encoded_content(content, encoding, declared_encoding):
return DeclaredEncodedForTest(content, encoding, declared_encoding)
def declared_html_encoded_content(content, encoding):
return DeclaredHtmlEncodedForTest(content, encoding)
def test_declared_decode(declared_encoded_content):
test_case = declared_encoded_content
if not test_case.valid:
return
result = to_string(test_case.encoded, test_case.declared_encoding)
if test_case.correct:
assert result.value == test_case.content
assert result.encoding
assert not result.chars_ignored
# This is a set of content/encoding/decoding that we know to fail.
# For exemple, the content "Simple ascii content" encoded using ascii, can be decoded
# by utf-16. However, it doesn't mean that it decoded it correctly.
# In this case, "utf-16" decoded it as "...."
# And this combination is not only based on the tuple encoding/decoding.
# The content itself may inpact if we can decode the bytes, and so if we try heuristics
# or not. No real choice to maintain a dict of untestable configuration.
FAILING_DECODE_HTML_COMBINATION = {
# All encoding/declared_encodingcoding failing for simple ascii content
"Simple ascii content": {
"ascii": [],
"utf-8": [],
"utf-16": [],
"utf-32": [],
"latin1": [],
"gb2312": [],
"gbk": [],
},
"A content with non ascii char éœo€ð": {
"ascii": [],
"utf-8": ["latin1"],
"utf-16": [],
"utf-32": [],
"latin1": [],
"gb2312": [],
"gbk": [],
},
"这是中文文本": {
"ascii": [],
"utf-8": ["latin1"],
"utf-16": [],
"utf-32": [],
"latin1": [],
"gb2312": ["latin1"],
"gbk": ["latin1"],
},
}
@dataclass
class DeclaredHtmlEncodedForTest(DeclaredEncodedForTest):
declared_encoding: str
correct: bool
def __init__(self, content: str, encoding: str, declared_encoding: str):
html_content = (
f'<html><meta charset="{declared_encoding}"><body>{content}</body></html>'
)
super().__init__(html_content, encoding, declared_encoding)
self.correct = self.valid
if (
self.valid
and declared_encoding in FAILING_DECODE_HTML_COMBINATION[content][encoding]
):
self.correct = False
@pytest.fixture
def declared_html_encoded_content(content, encoding, declared_encoding):
return DeclaredHtmlEncodedForTest(content, encoding, declared_encoding)
def test_declared_decode_html(declared_html_encoded_content):
def test_decode_html_header(declared_html_encoded_content):
test_case = declared_html_encoded_content
if not test_case.valid:
return
result = to_string(test_case.encoded, None)
if test_case.correct:
assert result.value == test_case.content
assert result.encoding
assert not result.chars_ignored
assert to_string(test_case.encoded, None, []) == test_case.content
def test_decode_str(content, declared_encoding):
result = to_string(content, declared_encoding)
assert result.value == content
assert result.encoding is None
assert not result.chars_ignored
def test_decode_str(content, encoding):
result = to_string(content, encoding, [])
assert result == content
def test_binary_content():
@ -246,21 +98,77 @@ def test_binary_content():
content = bytes([0xEF, 0xBB, 0xBF]) + content
# [0xEF, 0xBB, 0xBF] is a BOM marker for utf-8
# It will trick chardet to be really confident it is utf-8.
# However, this cannot be decoded using utf-8
with pytest.raises(ValueError):
assert to_string(content, None)
# Make coverage pass on code avoiding us to try the same encoding twice
result = to_string(content, "UTF-8-SIG")
assert result.encoding == "UTF-8-SIG"
assert result.chars_ignored
# However, this cannot be properly decoded using utf-8 ; but a value is still
# returned, since upstream server promised this is utf-8
assert to_string(content, "UTF-8", [])
def test_single_bad_character():
content = bytes([0xEF, 0xBB, 0xBF]) + b"prem" + bytes([0xC3]) + "ière".encode()
# [0xEF, 0xBB, 0xBF] is a BOM marker for utf-8-sig
# 0xC3 is a bad character (nothing in utf-8-sig at this position)
result = to_string(content, "utf-8-sig")
assert result.value == "première"
assert result.encoding == "utf-8-sig"
assert result.chars_ignored
result = to_string(content, "utf-8-sig", [])
assert result == "prem<EFBFBD>ière"
def test_decode_charset_to_try(simple_encoded_content):
if not simple_encoded_content.valid:
# Nothing to test
return
assert (
to_string(
simple_encoded_content.encoded, None, [simple_encoded_content.encoding]
)
== simple_encoded_content.content
)
def test_decode_weird_encoding_not_declared_not_in_try_list():
with pytest.raises(ValueError):
to_string("Latin1 contént".encode("latin1"), None, ["UTF-8"])
def test_decode_weird_encoding_not_declared_in_try_list():
content = "Latin1 contént"
assert to_string(content.encode("latin1"), None, ["UTF-8", "latin1"]) == content
@dataclass
class CharsetsTestData:
filename: str
probable_charset: str | None # probable charset to use
known_charset: str | None # charset we know is being used (fake file typically)
http_charset: (
str | None
) # encoding to pass as http header because file is missing details and encoding is
# not standard
expected_strings: list[str]
def get_testdata() -> Generator[CharsetsTestData, None, None]:
data = json.loads(
(Path(__file__).parent / "encodings" / "definition.json").read_bytes()
)
for file in data["files"]:
yield CharsetsTestData(
filename=file["filename"],
probable_charset=file.get("probable_charset", None),
known_charset=file.get("known_charset", None),
http_charset=file.get("http_charset", None),
expected_strings=file.get("expected_strings", []),
)
def get_testdata_id(test_data: CharsetsTestData) -> str:
return test_data.filename
@pytest.mark.parametrize("testdata", get_testdata(), ids=get_testdata_id)
def test_decode_files(testdata: CharsetsTestData):
result = to_string(
(Path(__file__).parent / "encodings" / testdata.filename).read_bytes(),
testdata.http_charset,
["UTF-8", "latin1"],
)
for expected_string in testdata.expected_strings:
assert expected_string in result