mirror of
https://github.com/python/cpython.git
synced 2025-11-01 06:01:29 +00:00
This patch changes the default behaviour of the builtin charmap
codec to not apply Latin-1 mappings for keys which are not found in the mapping dictionaries, but instead treat them as undefined mappings. The patch was originally written by Martin v. Loewis with some additional (cosmetic) changes and an updated test script by Marc-Andre Lemburg. The standard codecs were recreated from the most current files available at the Unicode.org site using the Tools/scripts/gencodec.py tool. This patch closes the bugs #116285 and #119960.
This commit is contained in:
parent
b55b7bb3ab
commit
a866df806d
56 changed files with 424 additions and 293 deletions
|
|
@ -539,6 +539,21 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
|
||||||
sr.file_encoding = file_encoding
|
sr.file_encoding = file_encoding
|
||||||
return sr
|
return sr
|
||||||
|
|
||||||
|
### Helpers for charmap-based codecs
|
||||||
|
|
||||||
|
def make_identity_dict(rng):
|
||||||
|
|
||||||
|
""" make_identity_dict(rng) -> dict
|
||||||
|
|
||||||
|
Return a dictionary where elements of the rng sequence are
|
||||||
|
mapped to themselves.
|
||||||
|
|
||||||
|
"""
|
||||||
|
res = {}
|
||||||
|
for i in rng:
|
||||||
|
res[i]=i
|
||||||
|
return res
|
||||||
|
|
||||||
### Tests
|
### Tests
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP037.TXT'.
|
""" Python Character Mapping Codec generated from 'CP037.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0004: 0x009c, # CONTROL
|
0x0004: 0x009c, # CONTROL
|
||||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||||
0x0006: 0x0086, # CONTROL
|
0x0006: 0x0086, # CONTROL
|
||||||
|
|
@ -273,7 +273,7 @@ def getregentry():
|
||||||
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||||
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||||
0x00ff: 0x009f, # CONTROL
|
0x00ff: 0x009f, # CONTROL
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1006.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO
|
0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO
|
||||||
0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE
|
0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE
|
||||||
0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO
|
0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO
|
||||||
|
|
@ -131,7 +131,7 @@ def getregentry():
|
||||||
0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM
|
0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM
|
||||||
0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM
|
0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM
|
||||||
0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
|
0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1026.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1026.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0004: 0x009c, # CONTROL
|
0x0004: 0x009c, # CONTROL
|
||||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||||
0x0006: 0x0086, # CONTROL
|
0x0006: 0x0086, # CONTROL
|
||||||
|
|
@ -273,7 +273,7 @@ def getregentry():
|
||||||
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||||
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||||
0x00ff: 0x009f, # CONTROL
|
0x00ff: 0x009f, # CONTROL
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1250.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1250.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
|
@ -116,7 +116,7 @@ def getregentry():
|
||||||
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||||
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
||||||
0x00ff: 0x02d9, # DOT ABOVE
|
0x00ff: 0x02d9, # DOT ABOVE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1251.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||||
0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
|
@ -150,7 +150,7 @@ def getregentry():
|
||||||
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
|
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
|
||||||
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
|
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
|
||||||
0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA
|
0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1252.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1252.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
|
@ -69,7 +69,7 @@ def getregentry():
|
||||||
0x009d: None, # UNDEFINED
|
0x009d: None, # UNDEFINED
|
||||||
0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||||
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1253.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
|
@ -144,7 +144,7 @@ def getregentry():
|
||||||
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||||
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||||
0x00ff: None, # UNDEFINED
|
0x00ff: None, # UNDEFINED
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1254.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
|
@ -75,7 +75,7 @@ def getregentry():
|
||||||
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||||
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||||
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1255.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1255.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
|
@ -136,7 +136,7 @@ def getregentry():
|
||||||
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
||||||
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
||||||
0x00ff: None, # UNDEFINED
|
0x00ff: None, # UNDEFINED
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1256.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: 0x067e, # ARABIC LETTER PEH
|
0x0081: 0x067e, # ARABIC LETTER PEH
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
|
@ -122,7 +122,7 @@ def getregentry():
|
||||||
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
||||||
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
||||||
0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE
|
0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1257.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
|
@ -124,7 +124,7 @@ def getregentry():
|
||||||
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||||
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||||
0x00ff: 0x02d9, # DOT ABOVE
|
0x00ff: 0x02d9, # DOT ABOVE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1258.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1258.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
|
@ -83,7 +83,7 @@ def getregentry():
|
||||||
0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN
|
0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN
|
||||||
0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN
|
0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN
|
||||||
0x00fe: 0x20ab, # DONG SIGN
|
0x00fe: 0x20ab, # DONG SIGN
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP424.TXT'.
|
""" Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0004: 0x009c, # SELECT
|
0x0004: 0x009c, # SELECT
|
||||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||||
0x0006: 0x0086, # REQUIRED NEW LINE
|
0x0006: 0x0086, # REQUIRED NEW LINE
|
||||||
|
|
@ -273,7 +273,7 @@ def getregentry():
|
||||||
0x00fd: None, # UNDEFINED
|
0x00fd: None, # UNDEFINED
|
||||||
0x00fe: None, # UNDEFINED
|
0x00fe: None, # UNDEFINED
|
||||||
0x00ff: 0x009f, # EIGHT ONES
|
0x00ff: 0x009f, # EIGHT ONES
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP437.TXT'.
|
""" Python Character Mapping Codec generated from 'CP437.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP500.TXT'.
|
""" Python Character Mapping Codec generated from 'CP500.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0004: 0x009c, # CONTROL
|
0x0004: 0x009c, # CONTROL
|
||||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||||
0x0006: 0x0086, # CONTROL
|
0x0006: 0x0086, # CONTROL
|
||||||
|
|
@ -273,7 +273,7 @@ def getregentry():
|
||||||
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||||
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||||
0x00ff: 0x009f, # CONTROL
|
0x00ff: 0x009f, # CONTROL
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP737.TXT'.
|
""" Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
|
0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
|
||||||
0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
|
0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
|
||||||
0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP775.TXT'.
|
""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
|
0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP850.TXT'.
|
""" Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP852.TXT'.
|
""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
|
0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP855.TXT'.
|
""" Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
|
0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
|
||||||
0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||||
0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
|
0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x00a7, # SECTION SIGN
|
0x00fd: 0x00a7, # SECTION SIGN
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP856.TXT'.
|
""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x05d0, # HEBREW LETTER ALEF
|
0x0080: 0x05d0, # HEBREW LETTER ALEF
|
||||||
0x0081: 0x05d1, # HEBREW LETTER BET
|
0x0081: 0x05d1, # HEBREW LETTER BET
|
||||||
0x0082: 0x05d2, # HEBREW LETTER GIMEL
|
0x0082: 0x05d2, # HEBREW LETTER GIMEL
|
||||||
|
|
@ -120,10 +120,10 @@ def getregentry():
|
||||||
0x00d0: None, # UNDEFINED
|
0x00d0: None, # UNDEFINED
|
||||||
0x00d1: None, # UNDEFINED
|
0x00d1: None, # UNDEFINED
|
||||||
0x00d2: None, # UNDEFINED
|
0x00d2: None, # UNDEFINED
|
||||||
0x00d3: None, # UNDEFINED
|
0x00d3: None, # UNDEFINEDS
|
||||||
0x00d4: None, # UNDEFINED
|
0x00d4: None, # UNDEFINED
|
||||||
0x00d5: None, # UNDEFINED
|
0x00d5: None, # UNDEFINED
|
||||||
0x00d6: None, # UNDEFINED
|
0x00d6: None, # UNDEFINEDE
|
||||||
0x00d7: None, # UNDEFINED
|
0x00d7: None, # UNDEFINED
|
||||||
0x00d8: None, # UNDEFINED
|
0x00d8: None, # UNDEFINED
|
||||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP857.TXT'.
|
""" Python Character Mapping Codec generated from 'CP857.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
|
@ -164,7 +164,7 @@ def getregentry():
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP860.TXT'.
|
""" Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP861.TXT'.
|
""" Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP862.TXT'.
|
""" Python Character Mapping Codec generated from 'CP862.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x05d0, # HEBREW LETTER ALEF
|
0x0080: 0x05d0, # HEBREW LETTER ALEF
|
||||||
0x0081: 0x05d1, # HEBREW LETTER BET
|
0x0081: 0x05d1, # HEBREW LETTER BET
|
||||||
0x0082: 0x05d2, # HEBREW LETTER GIMEL
|
0x0082: 0x05d2, # HEBREW LETTER GIMEL
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP863.TXT'.
|
""" Python Character Mapping Codec generated from 'CP863.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP864.TXT'.
|
""" Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0025: 0x066a, # ARABIC PERCENT SIGN
|
0x0025: 0x066a, # ARABIC PERCENT SIGN
|
||||||
0x0080: 0x00b0, # DEGREE SIGN
|
0x0080: 0x00b0, # DEGREE SIGN
|
||||||
0x0081: 0x00b7, # MIDDLE DOT
|
0x0081: 0x00b7, # MIDDLE DOT
|
||||||
|
|
@ -163,7 +163,7 @@ def getregentry():
|
||||||
0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
|
0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: None, # UNDEFINED
|
0x00ff: None, # UNDEFINED
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP865.TXT'.
|
""" Python Character Mapping Codec generated from 'CP865.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP866.TXT'.
|
""" Python Character Mapping Codec generated from 'CP866.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
|
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
|
||||||
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
||||||
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x00a4, # CURRENCY SIGN
|
0x00fd: 0x00a4, # CURRENCY SIGN
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP869.TXT'.
|
""" Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: None, # UNDEFINED
|
0x0080: None, # UNDEFINED
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: None, # UNDEFINED
|
0x0082: None, # UNDEFINED
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP874.TXT'.
|
""" Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: None, # UNDEFINED
|
0x0082: None, # UNDEFINED
|
||||||
|
|
@ -164,7 +164,7 @@ def getregentry():
|
||||||
0x00fd: None, # UNDEFINED
|
0x00fd: None, # UNDEFINED
|
||||||
0x00fe: None, # UNDEFINED
|
0x00fe: None, # UNDEFINED
|
||||||
0x00ff: None, # UNDEFINED
|
0x00ff: None, # UNDEFINED
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP875.TXT'.
|
""" Python Character Mapping Codec generated from 'CP875.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0004: 0x009c, # CONTROL
|
0x0004: 0x009c, # CONTROL
|
||||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||||
0x0006: 0x0086, # CONTROL
|
0x0006: 0x0086, # CONTROL
|
||||||
|
|
@ -274,7 +274,7 @@ def getregentry():
|
||||||
0x00fd: 0x001a, # SUBSTITUTE
|
0x00fd: 0x001a, # SUBSTITUTE
|
||||||
0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||||
0x00ff: 0x009f, # CONTROL
|
0x00ff: 0x009f, # CONTROL
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-1.TXT'.
|
""" Python Character Mapping Codec generated from '8859-1.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -22,10 +22,7 @@ def decode(self,input,errors='strict'):
|
||||||
return codecs.charmap_decode(input,errors,decoding_map)
|
return codecs.charmap_decode(input,errors,decoding_map)
|
||||||
|
|
||||||
class StreamWriter(Codec,codecs.StreamWriter):
|
class StreamWriter(Codec,codecs.StreamWriter):
|
||||||
|
pass
|
||||||
def __init__(self,stream,errors='strict'):
|
|
||||||
|
|
||||||
codecs.StreamWriter.__init__(self,strict,errors)
|
|
||||||
|
|
||||||
class StreamReader(Codec,codecs.StreamReader):
|
class StreamReader(Codec,codecs.StreamReader):
|
||||||
pass
|
pass
|
||||||
|
|
@ -38,9 +35,9 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-10.TXT'.
|
""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||||
0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
|
0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
|
||||||
0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||||
|
|
@ -83,7 +83,7 @@ def getregentry():
|
||||||
0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE
|
0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE
|
||||||
0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
|
0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
|
||||||
0x00ff: 0x0138, # LATIN SMALL LETTER KRA
|
0x00ff: 0x0138, # LATIN SMALL LETTER KRA
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-13.TXT'.
|
""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||||
0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||||
0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||||
|
|
@ -93,7 +93,7 @@ def getregentry():
|
||||||
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||||
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||||
0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-14.TXT'.
|
""" Python Character Mapping Codec generated from '8859-14.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE
|
0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE
|
||||||
0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE
|
0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE
|
||||||
0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||||
|
|
@ -68,7 +68,7 @@ def getregentry():
|
||||||
0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX
|
0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX
|
||||||
0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE
|
0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE
|
||||||
0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX
|
0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-15.TXT'.
|
""" Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a4: 0x20ac, # EURO SIGN
|
0x00a4: 0x20ac, # EURO SIGN
|
||||||
0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||||
0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||||
|
|
@ -45,7 +45,7 @@ def getregentry():
|
||||||
0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE
|
0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE
|
||||||
0x00bd: 0x0153, # LATIN SMALL LIGATURE OE
|
0x00bd: 0x0153, # LATIN SMALL LIGATURE OE
|
||||||
0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-2.TXT'.
|
""" Python Character Mapping Codec generated from '8859-2.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||||
0x00a2: 0x02d8, # BREVE
|
0x00a2: 0x02d8, # BREVE
|
||||||
0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
|
0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
|
||||||
|
|
@ -94,7 +94,7 @@ def getregentry():
|
||||||
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||||
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
||||||
0x00ff: 0x02d9, # DOT ABOVE
|
0x00ff: 0x02d9, # DOT ABOVE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-3.TXT'.
|
""" Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,15 +35,17 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE
|
0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE
|
||||||
0x00a2: 0x02d8, # BREVE
|
0x00a2: 0x02d8, # BREVE
|
||||||
|
0x00a5: None,
|
||||||
0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
||||||
0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||||
0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
||||||
0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
||||||
|
0x00ae: None,
|
||||||
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||||
0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE
|
0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE
|
||||||
0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX
|
0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX
|
||||||
|
|
@ -51,21 +53,26 @@ def getregentry():
|
||||||
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||||
0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||||
0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX
|
0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX
|
||||||
|
0x00be: None,
|
||||||
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||||
|
0x00c3: None,
|
||||||
0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||||
0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
||||||
|
0x00d0: None,
|
||||||
0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||||
0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
||||||
0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE
|
0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE
|
||||||
0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
||||||
|
0x00e3: None,
|
||||||
0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE
|
0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE
|
||||||
0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX
|
0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX
|
||||||
|
0x00f0: None,
|
||||||
0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE
|
0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE
|
||||||
0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX
|
0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX
|
||||||
0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE
|
0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE
|
||||||
0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX
|
0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX
|
||||||
0x00ff: 0x02d9, # DOT ABOVE
|
0x00ff: 0x02d9, # DOT ABOVE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-4.TXT'.
|
""" Python Character Mapping Codec generated from '8859-4.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||||
0x00a2: 0x0138, # LATIN SMALL LETTER KRA
|
0x00a2: 0x0138, # LATIN SMALL LETTER KRA
|
||||||
0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
|
0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||||
|
|
@ -87,7 +87,7 @@ def getregentry():
|
||||||
0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE
|
0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE
|
||||||
0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON
|
0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON
|
||||||
0x00ff: 0x02d9, # DOT ABOVE
|
0x00ff: 0x02d9, # DOT ABOVE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-5.TXT'.
|
""" Python Character Mapping Codec generated from '8859-5.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO
|
0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO
|
||||||
0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||||
0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
||||||
|
|
@ -131,7 +131,7 @@ def getregentry():
|
||||||
0x00fd: 0x00a7, # SECTION SIGN
|
0x00fd: 0x00a7, # SECTION SIGN
|
||||||
0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U
|
0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U
|
||||||
0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE
|
0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-6.TXT'.
|
""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,11 +35,38 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
|
0x00a1: None,
|
||||||
|
0x00a2: None,
|
||||||
|
0x00a3: None,
|
||||||
|
0x00a5: None,
|
||||||
|
0x00a6: None,
|
||||||
|
0x00a7: None,
|
||||||
|
0x00a8: None,
|
||||||
|
0x00a9: None,
|
||||||
|
0x00aa: None,
|
||||||
|
0x00ab: None,
|
||||||
0x00ac: 0x060c, # ARABIC COMMA
|
0x00ac: 0x060c, # ARABIC COMMA
|
||||||
|
0x00ae: None,
|
||||||
|
0x00af: None,
|
||||||
|
0x00b0: None,
|
||||||
|
0x00b1: None,
|
||||||
|
0x00b2: None,
|
||||||
|
0x00b3: None,
|
||||||
|
0x00b4: None,
|
||||||
|
0x00b5: None,
|
||||||
|
0x00b6: None,
|
||||||
|
0x00b7: None,
|
||||||
|
0x00b8: None,
|
||||||
|
0x00b9: None,
|
||||||
|
0x00ba: None,
|
||||||
0x00bb: 0x061b, # ARABIC SEMICOLON
|
0x00bb: 0x061b, # ARABIC SEMICOLON
|
||||||
|
0x00bc: None,
|
||||||
|
0x00bd: None,
|
||||||
|
0x00be: None,
|
||||||
0x00bf: 0x061f, # ARABIC QUESTION MARK
|
0x00bf: 0x061f, # ARABIC QUESTION MARK
|
||||||
|
0x00c0: None,
|
||||||
0x00c1: 0x0621, # ARABIC LETTER HAMZA
|
0x00c1: 0x0621, # ARABIC LETTER HAMZA
|
||||||
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||||
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||||
|
|
@ -66,6 +93,11 @@ def getregentry():
|
||||||
0x00d8: 0x0638, # ARABIC LETTER ZAH
|
0x00d8: 0x0638, # ARABIC LETTER ZAH
|
||||||
0x00d9: 0x0639, # ARABIC LETTER AIN
|
0x00d9: 0x0639, # ARABIC LETTER AIN
|
||||||
0x00da: 0x063a, # ARABIC LETTER GHAIN
|
0x00da: 0x063a, # ARABIC LETTER GHAIN
|
||||||
|
0x00db: None,
|
||||||
|
0x00dc: None,
|
||||||
|
0x00dd: None,
|
||||||
|
0x00de: None,
|
||||||
|
0x00df: None,
|
||||||
0x00e0: 0x0640, # ARABIC TATWEEL
|
0x00e0: 0x0640, # ARABIC TATWEEL
|
||||||
0x00e1: 0x0641, # ARABIC LETTER FEH
|
0x00e1: 0x0641, # ARABIC LETTER FEH
|
||||||
0x00e2: 0x0642, # ARABIC LETTER QAF
|
0x00e2: 0x0642, # ARABIC LETTER QAF
|
||||||
|
|
@ -85,7 +117,20 @@ def getregentry():
|
||||||
0x00f0: 0x0650, # ARABIC KASRA
|
0x00f0: 0x0650, # ARABIC KASRA
|
||||||
0x00f1: 0x0651, # ARABIC SHADDA
|
0x00f1: 0x0651, # ARABIC SHADDA
|
||||||
0x00f2: 0x0652, # ARABIC SUKUN
|
0x00f2: 0x0652, # ARABIC SUKUN
|
||||||
}
|
0x00f3: None,
|
||||||
|
0x00f4: None,
|
||||||
|
0x00f5: None,
|
||||||
|
0x00f6: None,
|
||||||
|
0x00f7: None,
|
||||||
|
0x00f8: None,
|
||||||
|
0x00f9: None,
|
||||||
|
0x00fa: None,
|
||||||
|
0x00fb: None,
|
||||||
|
0x00fc: None,
|
||||||
|
0x00fd: None,
|
||||||
|
0x00fe: None,
|
||||||
|
0x00ff: None,
|
||||||
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-7.TXT'.
|
""" Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,10 +35,14 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK
|
0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||||
0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||||
|
0x00a4: None,
|
||||||
|
0x00a5: None,
|
||||||
|
0x00aa: None,
|
||||||
|
0x00ae: None,
|
||||||
0x00af: 0x2015, # HORIZONTAL BAR
|
0x00af: 0x2015, # HORIZONTAL BAR
|
||||||
0x00b4: 0x0384, # GREEK TONOS
|
0x00b4: 0x0384, # GREEK TONOS
|
||||||
0x00b5: 0x0385, # GREEK DIALYTIKA TONOS
|
0x00b5: 0x0385, # GREEK DIALYTIKA TONOS
|
||||||
|
|
@ -67,6 +71,7 @@ def getregentry():
|
||||||
0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON
|
0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON
|
||||||
0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI
|
0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI
|
||||||
0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO
|
0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO
|
||||||
|
0x00d2: None,
|
||||||
0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||||
0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU
|
0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU
|
||||||
0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON
|
0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON
|
||||||
|
|
@ -111,7 +116,8 @@ def getregentry():
|
||||||
0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
|
0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||||
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||||
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||||
}
|
0x00ff: None,
|
||||||
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-8.TXT'.
|
""" Python Character Mapping Codec generated from '8859-8.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,11 +35,43 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
|
0x00a1: None,
|
||||||
0x00aa: 0x00d7, # MULTIPLICATION SIGN
|
0x00aa: 0x00d7, # MULTIPLICATION SIGN
|
||||||
0x00af: 0x203e, # OVERLINE
|
|
||||||
0x00ba: 0x00f7, # DIVISION SIGN
|
0x00ba: 0x00f7, # DIVISION SIGN
|
||||||
|
0x00bf: None,
|
||||||
|
0x00c0: None,
|
||||||
|
0x00c1: None,
|
||||||
|
0x00c2: None,
|
||||||
|
0x00c3: None,
|
||||||
|
0x00c4: None,
|
||||||
|
0x00c5: None,
|
||||||
|
0x00c6: None,
|
||||||
|
0x00c7: None,
|
||||||
|
0x00c8: None,
|
||||||
|
0x00c9: None,
|
||||||
|
0x00ca: None,
|
||||||
|
0x00cb: None,
|
||||||
|
0x00cc: None,
|
||||||
|
0x00cd: None,
|
||||||
|
0x00ce: None,
|
||||||
|
0x00cf: None,
|
||||||
|
0x00d0: None,
|
||||||
|
0x00d1: None,
|
||||||
|
0x00d2: None,
|
||||||
|
0x00d3: None,
|
||||||
|
0x00d4: None,
|
||||||
|
0x00d5: None,
|
||||||
|
0x00d6: None,
|
||||||
|
0x00d7: None,
|
||||||
|
0x00d8: None,
|
||||||
|
0x00d9: None,
|
||||||
|
0x00da: None,
|
||||||
|
0x00db: None,
|
||||||
|
0x00dc: None,
|
||||||
|
0x00dd: None,
|
||||||
|
0x00de: None,
|
||||||
0x00df: 0x2017, # DOUBLE LOW LINE
|
0x00df: 0x2017, # DOUBLE LOW LINE
|
||||||
0x00e0: 0x05d0, # HEBREW LETTER ALEF
|
0x00e0: 0x05d0, # HEBREW LETTER ALEF
|
||||||
0x00e1: 0x05d1, # HEBREW LETTER BET
|
0x00e1: 0x05d1, # HEBREW LETTER BET
|
||||||
|
|
@ -68,7 +100,12 @@ def getregentry():
|
||||||
0x00f8: 0x05e8, # HEBREW LETTER RESH
|
0x00f8: 0x05e8, # HEBREW LETTER RESH
|
||||||
0x00f9: 0x05e9, # HEBREW LETTER SHIN
|
0x00f9: 0x05e9, # HEBREW LETTER SHIN
|
||||||
0x00fa: 0x05ea, # HEBREW LETTER TAV
|
0x00fa: 0x05ea, # HEBREW LETTER TAV
|
||||||
}
|
0x00fb: None,
|
||||||
|
0x00fc: None,
|
||||||
|
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
||||||
|
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
||||||
|
0x00ff: None,
|
||||||
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-9.TXT'.
|
""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,15 +35,15 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
||||||
0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||||
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||||
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||||
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'KOI8-R.TXT'.
|
""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||||
0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||||
0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||||
|
|
@ -165,7 +165,7 @@ def getregentry():
|
||||||
0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
|
0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
|
||||||
0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE
|
0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE
|
||||||
0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
|
0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CYRILLIC.TXT'.
|
""" Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
|
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
|
||||||
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
||||||
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
||||||
|
|
@ -160,7 +160,7 @@ def getregentry():
|
||||||
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
|
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
|
||||||
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
|
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
|
||||||
0x00ff: 0x00a4, # CURRENCY SIGN
|
0x00ff: 0x00a4, # CURRENCY SIGN
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'GREEK.TXT'.
|
""" Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
0x0081: 0x00b9, # SUPERSCRIPT ONE
|
0x0081: 0x00b9, # SUPERSCRIPT ONE
|
||||||
0x0082: 0x00b2, # SUPERSCRIPT TWO
|
0x0082: 0x00b2, # SUPERSCRIPT TWO
|
||||||
|
|
@ -163,7 +163,7 @@ def getregentry():
|
||||||
0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||||
0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||||
0x00ff: None, # UNDEFINED
|
0x00ff: None, # UNDEFINED
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'ICELAND.TXT'.
|
""" Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||||
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
|
|
@ -159,7 +159,7 @@ def getregentry():
|
||||||
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||||
0x00fe: 0x02db, # OGONEK
|
0x00fe: 0x02db, # OGONEK
|
||||||
0x00ff: 0x02c7, # CARON
|
0x00ff: 0x02c7, # CARON
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'LATIN2.TXT'.
|
""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
|
0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
|
||||||
0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON
|
0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON
|
||||||
|
|
@ -163,7 +163,7 @@ def getregentry():
|
||||||
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||||
0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||||
0x00ff: 0x02c7, # CARON
|
0x00ff: 0x02c7, # CARON
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'ROMAN.TXT'.
|
""" Python Character Mapping Codec generated from 'ROMAN.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||||
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
|
|
@ -160,7 +160,7 @@ def getregentry():
|
||||||
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||||
0x00fe: 0x02db, # OGONEK
|
0x00fe: 0x02db, # OGONEK
|
||||||
0x00ff: 0x02c7, # CARON
|
0x00ff: 0x02c7, # CARON
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'TURKISH.TXT'.
|
""" Python Character Mapping Codec generated from 'TURKISH.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||||
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
|
|
@ -160,7 +160,7 @@ def getregentry():
|
||||||
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||||
0x00fe: 0x02db, # OGONEK
|
0x00fe: 0x02db, # OGONEK
|
||||||
0x00ff: 0x02c7, # CARON
|
0x00ff: 0x02c7, # CARON
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -494,14 +494,15 @@ def __str__(self):
|
||||||
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
|
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
|
||||||
'cp863', 'cp865', 'cp866',
|
'cp863', 'cp865', 'cp866',
|
||||||
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
|
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
|
||||||
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
|
'iso8859_2', 'iso8859_4', 'iso8859_5',
|
||||||
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
|
'iso8859_9', 'koi8_r', 'latin_1',
|
||||||
'mac_cyrillic', 'mac_latin2',
|
'mac_cyrillic', 'mac_latin2',
|
||||||
|
|
||||||
### These have undefined mappings:
|
### These have undefined mappings:
|
||||||
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
|
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
|
||||||
#'cp1256', 'cp1257', 'cp1258',
|
#'cp1256', 'cp1257', 'cp1258',
|
||||||
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
|
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
|
||||||
|
#'iso8859_3', 'iso8859_6', 'iso8859_7',
|
||||||
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
|
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
|
||||||
|
|
||||||
### These fail the round-trip:
|
### These fail the round-trip:
|
||||||
|
|
|
||||||
|
|
@ -1970,11 +1970,11 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
|
||||||
Py_DECREF(w);
|
Py_DECREF(w);
|
||||||
if (x == NULL) {
|
if (x == NULL) {
|
||||||
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
|
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
|
||||||
/* No mapping found: default to Latin-1 mapping */
|
/* No mapping found means: mapping is undefined. */
|
||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
*p++ = (Py_UNICODE)ch;
|
x = Py_None;
|
||||||
continue;
|
Py_INCREF(x);
|
||||||
}
|
} else
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2086,16 +2086,11 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
|
||||||
Py_DECREF(w);
|
Py_DECREF(w);
|
||||||
if (x == NULL) {
|
if (x == NULL) {
|
||||||
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
|
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
|
||||||
/* No mapping found: default to Latin-1 mapping if possible */
|
/* No mapping found means: mapping is undefined. */
|
||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
if (ch < 256) {
|
x = Py_None;
|
||||||
*s++ = (char)ch;
|
Py_INCREF(x);
|
||||||
continue;
|
} else
|
||||||
}
|
|
||||||
else if (!charmap_encoding_error(&p, &s, errors,
|
|
||||||
"missing character mapping"))
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
""" Unicode Mapping Parser and Codec Generator.
|
""" Unicode Mapping Parser and Codec Generator.
|
||||||
|
|
||||||
This script parses Unicode mapping files as available from the Unicode
|
This script parses Unicode mapping files as available from the Unicode
|
||||||
site (ftp.unicode.org) and creates Python codec modules from them. The
|
site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
|
||||||
codecs use the standard character mapping codec to actually apply the
|
modules from them. The codecs use the standard character mapping codec
|
||||||
mapping.
|
to actually apply the mapping.
|
||||||
|
|
||||||
Synopsis: gencodec.py dir codec_prefix
|
Synopsis: gencodec.py dir codec_prefix
|
||||||
|
|
||||||
|
|
@ -18,6 +18,7 @@
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright Guido van Rossum, 2000.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -70,6 +71,10 @@ def readmap(filename,
|
||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
f.close()
|
f.close()
|
||||||
enc2uni = {}
|
enc2uni = {}
|
||||||
|
identity = []
|
||||||
|
unmapped = range(256)
|
||||||
|
for i in range(256):
|
||||||
|
unmapped[i] = i
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = strip(line)
|
line = strip(line)
|
||||||
if not line or line[0] == '#':
|
if not line or line[0] == '#':
|
||||||
|
|
@ -85,8 +90,22 @@ def readmap(filename,
|
||||||
comment = ''
|
comment = ''
|
||||||
else:
|
else:
|
||||||
comment = comment[1:]
|
comment = comment[1:]
|
||||||
if enc != uni:
|
if enc < 256:
|
||||||
|
unmapped.remove(enc)
|
||||||
|
if enc == uni:
|
||||||
|
identity.append(enc)
|
||||||
|
else:
|
||||||
|
enc2uni[enc] = (uni,comment)
|
||||||
|
else:
|
||||||
enc2uni[enc] = (uni,comment)
|
enc2uni[enc] = (uni,comment)
|
||||||
|
# If there are more identity-mapped entries than unmapped entries,
|
||||||
|
# it pays to generate an identity dictionary first, add add explicit
|
||||||
|
# mappings to None for the rest
|
||||||
|
if len(identity)>=len(unmapped):
|
||||||
|
for enc in unmapped:
|
||||||
|
enc2uni[enc] = (None, "")
|
||||||
|
enc2uni['IDENTITY'] = 256
|
||||||
|
|
||||||
return enc2uni
|
return enc2uni
|
||||||
|
|
||||||
def hexrepr(t,
|
def hexrepr(t,
|
||||||
|
|
@ -143,11 +162,12 @@ def codegen(name,map,comments=1):
|
||||||
"""
|
"""
|
||||||
l = [
|
l = [
|
||||||
'''\
|
'''\
|
||||||
""" Python Character Mapping Codec generated from '%s'.
|
""" Python Character Mapping Codec generated from '%s' with gencodec.py.
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
|
@ -178,15 +198,23 @@ def getregentry():
|
||||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
|
||||||
''' % name,
|
''' % name,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if map.has_key("IDENTITY"):
|
||||||
|
l.append("decoding_map = codecs.make_identity_dict(range(%d))"
|
||||||
|
% map["IDENTITY"])
|
||||||
|
l.append("decoding_map.update({")
|
||||||
|
splits = 1
|
||||||
|
del map["IDENTITY"]
|
||||||
|
else:
|
||||||
|
l.append("decoding_map = {")
|
||||||
|
splits = 0
|
||||||
|
|
||||||
mappings = map.items()
|
mappings = map.items()
|
||||||
mappings.sort()
|
mappings.sort()
|
||||||
append = l.append
|
append = l.append
|
||||||
i = 0
|
i = 0
|
||||||
splits = 0
|
|
||||||
for e,value in mappings:
|
for e,value in mappings:
|
||||||
try:
|
try:
|
||||||
(u,c) = value
|
(u,c) = value
|
||||||
|
|
@ -198,7 +226,7 @@ def getregentry():
|
||||||
append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c))
|
append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c))
|
||||||
else:
|
else:
|
||||||
append('\t%s: %s,' % (key,unicoderepr(u)))
|
append('\t%s: %s,' % (key,unicoderepr(u)))
|
||||||
i = i + 1
|
i += 1
|
||||||
if i == 4096:
|
if i == 4096:
|
||||||
# Split the definition into parts to that the Python
|
# Split the definition into parts to that the Python
|
||||||
# parser doesn't dump core
|
# parser doesn't dump core
|
||||||
|
|
@ -206,7 +234,7 @@ def getregentry():
|
||||||
append('}')
|
append('}')
|
||||||
else:
|
else:
|
||||||
append('})')
|
append('})')
|
||||||
append('map.update({')
|
append('decoding_map.update({')
|
||||||
i = 0
|
i = 0
|
||||||
splits = splits + 1
|
splits = splits + 1
|
||||||
if splits == 0:
|
if splits == 0:
|
||||||
|
|
@ -265,7 +293,7 @@ def rewritepythondir(dir,prefix='',comments=1):
|
||||||
|
|
||||||
mapnames = os.listdir(dir)
|
mapnames = os.listdir(dir)
|
||||||
for mapname in mapnames:
|
for mapname in mapnames:
|
||||||
if mapname[-len('.mapping'):] != '.mapping':
|
if not mapname.endswith('.mapping'):
|
||||||
continue
|
continue
|
||||||
codefile = mapname[:-len('.mapping')] + '.py'
|
codefile = mapname[:-len('.mapping')] + '.py'
|
||||||
print 'converting %s to %s' % (mapname,
|
print 'converting %s to %s' % (mapname,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue