This patch changes the default behaviour of the builtin charmap

codec to not apply Latin-1 mappings for keys which are not found in the mapping dictionaries, but instead treat them as undefined mappings. The patch was originally written by Martin v. Loewis with some additional (cosmetic) changes and an updated test script by Marc-Andre Lemburg. The standard codecs were recreated from the most current files available at the Unicode.org site using the Tools/scripts/gencodec.py tool. This patch closes the bugs #116285 and #119960.
2025-11-01 06:01:29 +00:00 · 2001-01-03 21:29:14 +00:00 · 2001-01-03 21:29:14 +00:00 · a866df806d
commit a866df806d
parent b55b7bb3ab
56 changed files with 424 additions and 293 deletions
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@ -539,6 +539,21 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
    sr.file_encoding = file_encoding
    return sr
 ### Helpers for charmap-based codecs
 def make_identity_dict(rng):
    """ make_identity_dict(rng) -> dict
        Return a dictionary where elements of the rng sequence are
        mapped to themselves.
    """
    res = {}
    for i in rng:
        res[i]=i
    return res
 ### Tests
 if __name__ == '__main__':
--- a/Lib/encodings/cp037.py
+++ b/Lib/encodings/cp037.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP037.TXT'.
+""" Python Character Mapping Codec generated from 'CP037.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0004: 0x009c,	# CONTROL
 	0x0005: 0x0009,	# HORIZONTAL TABULATION
 	0x0006: 0x0086,	# CONTROL
@ -273,7 +273,7 @@ def getregentry():
 	0x00fd: 0x00d9,	# LATIN CAPITAL LETTER U WITH GRAVE
 	0x00fe: 0x00da,	# LATIN CAPITAL LETTER U WITH ACUTE
 	0x00ff: 0x009f,	# CONTROL
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp1006.py
+++ b/Lib/encodings/cp1006.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1006.TXT'.
+""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00a1: 0x06f0,	# 	EXTENDED ARABIC-INDIC DIGIT ZERO
 	0x00a2: 0x06f1,	# 	EXTENDED ARABIC-INDIC DIGIT ONE
 	0x00a3: 0x06f2,	# 	EXTENDED ARABIC-INDIC DIGIT TWO
@ -131,7 +131,7 @@ def getregentry():
 	0x00fd: 0xfbae,	# 	ARABIC LETTER YEH BARREE ISOLATED FORM
 	0x00fe: 0xfe7c,	# 	ARABIC SHADDA ISOLATED FORM
 	0x00ff: 0xfe7d,	# 	ARABIC SHADDA MEDIAL FORM
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp1026.py
+++ b/Lib/encodings/cp1026.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1026.TXT'.
+""" Python Character Mapping Codec generated from 'CP1026.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0004: 0x009c,	# CONTROL
 	0x0005: 0x0009,	# HORIZONTAL TABULATION
 	0x0006: 0x0086,	# CONTROL
@ -273,7 +273,7 @@ def getregentry():
 	0x00fd: 0x00d9,	# LATIN CAPITAL LETTER U WITH GRAVE
 	0x00fe: 0x00da,	# LATIN CAPITAL LETTER U WITH ACUTE
 	0x00ff: 0x009f,	# CONTROL
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp1250.py
+++ b/Lib/encodings/cp1250.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1250.TXT'.
+""" Python Character Mapping Codec generated from 'CP1250.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x20ac,	# EURO SIGN
 	0x0081: None,	# UNDEFINED
 	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
@ -116,7 +116,7 @@ def getregentry():
 	0x00fb: 0x0171,	# LATIN SMALL LETTER U WITH DOUBLE ACUTE
 	0x00fe: 0x0163,	# LATIN SMALL LETTER T WITH CEDILLA
 	0x00ff: 0x02d9,	# DOT ABOVE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp1251.py
+++ b/Lib/encodings/cp1251.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1251.TXT'.
+""" Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x0402,	# CYRILLIC CAPITAL LETTER DJE
 	0x0081: 0x0403,	# CYRILLIC CAPITAL LETTER GJE
 	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
@ -150,7 +150,7 @@ def getregentry():
 	0x00fd: 0x044d,	# CYRILLIC SMALL LETTER E
 	0x00fe: 0x044e,	# CYRILLIC SMALL LETTER YU
 	0x00ff: 0x044f,	# CYRILLIC SMALL LETTER YA
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp1252.py
+++ b/Lib/encodings/cp1252.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1252.TXT'.
+""" Python Character Mapping Codec generated from 'CP1252.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x20ac,	# EURO SIGN
 	0x0081: None,	# UNDEFINED
 	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
@ -69,7 +69,7 @@ def getregentry():
 	0x009d: None,	# UNDEFINED
 	0x009e: 0x017e,	# LATIN SMALL LETTER Z WITH CARON
 	0x009f: 0x0178,	# LATIN CAPITAL LETTER Y WITH DIAERESIS
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp1253.py
+++ b/Lib/encodings/cp1253.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1253.TXT'.
+""" Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x20ac,	# EURO SIGN
 	0x0081: None,	# UNDEFINED
 	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
@ -144,7 +144,7 @@ def getregentry():
 	0x00fd: 0x03cd,	# GREEK SMALL LETTER UPSILON WITH TONOS
 	0x00fe: 0x03ce,	# GREEK SMALL LETTER OMEGA WITH TONOS
 	0x00ff: None,	# UNDEFINED
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp1254.py
+++ b/Lib/encodings/cp1254.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1254.TXT'.
+""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x20ac,	# EURO SIGN
 	0x0081: None,	# UNDEFINED
 	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
@ -75,7 +75,7 @@ def getregentry():
 	0x00f0: 0x011f,	# LATIN SMALL LETTER G WITH BREVE
 	0x00fd: 0x0131,	# LATIN SMALL LETTER DOTLESS I
 	0x00fe: 0x015f,	# LATIN SMALL LETTER S WITH CEDILLA
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp1255.py
+++ b/Lib/encodings/cp1255.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1255.TXT'.
+""" Python Character Mapping Codec generated from 'CP1255.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x20ac,	# EURO SIGN
 	0x0081: None,	# UNDEFINED
 	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
@ -136,7 +136,7 @@ def getregentry():
 	0x00fd: 0x200e,	# LEFT-TO-RIGHT MARK
 	0x00fe: 0x200f,	# RIGHT-TO-LEFT MARK
 	0x00ff: None,	# UNDEFINED
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp1256.py
+++ b/Lib/encodings/cp1256.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1256.TXT'.
+""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x20ac,	# EURO SIGN
 	0x0081: 0x067e,	# ARABIC LETTER PEH
 	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
@ -122,7 +122,7 @@ def getregentry():
 	0x00fd: 0x200e,	# LEFT-TO-RIGHT MARK
 	0x00fe: 0x200f,	# RIGHT-TO-LEFT MARK
 	0x00ff: 0x06d2,	# ARABIC LETTER YEH BARREE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp1257.py
+++ b/Lib/encodings/cp1257.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1257.TXT'.
+""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x20ac,	# EURO SIGN
 	0x0081: None,	# UNDEFINED
 	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
@ -124,7 +124,7 @@ def getregentry():
 	0x00fd: 0x017c,	# LATIN SMALL LETTER Z WITH DOT ABOVE
 	0x00fe: 0x017e,	# LATIN SMALL LETTER Z WITH CARON
 	0x00ff: 0x02d9,	# DOT ABOVE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp1258.py
+++ b/Lib/encodings/cp1258.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1258.TXT'.
+""" Python Character Mapping Codec generated from 'CP1258.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x20ac,	# EURO SIGN
 	0x0081: None,	# UNDEFINED
 	0x0082: 0x201a,	# SINGLE LOW-9 QUOTATION MARK
@ -83,7 +83,7 @@ def getregentry():
 	0x00f5: 0x01a1,	# LATIN SMALL LETTER O WITH HORN
 	0x00fd: 0x01b0,	# LATIN SMALL LETTER U WITH HORN
 	0x00fe: 0x20ab,	# DONG SIGN
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp424.py
+++ b/Lib/encodings/cp424.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP424.TXT'.
+""" Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0004: 0x009c,	# SELECT
 	0x0005: 0x0009,	# HORIZONTAL TABULATION
 	0x0006: 0x0086,	# REQUIRED NEW LINE
@ -273,7 +273,7 @@ def getregentry():
 	0x00fd: None,	# UNDEFINED
 	0x00fe: None,	# UNDEFINED
 	0x00ff: 0x009f,	# EIGHT ONES
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp437.py
+++ b/Lib/encodings/cp437.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP437.TXT'.
+""" Python Character Mapping Codec generated from 'CP437.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
 	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
 	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x00b2,	# SUPERSCRIPT TWO
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp500.py
+++ b/Lib/encodings/cp500.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP500.TXT'.
+""" Python Character Mapping Codec generated from 'CP500.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0004: 0x009c,	# CONTROL
 	0x0005: 0x0009,	# HORIZONTAL TABULATION
 	0x0006: 0x0086,	# CONTROL
@ -273,7 +273,7 @@ def getregentry():
 	0x00fd: 0x00d9,	# LATIN CAPITAL LETTER U WITH GRAVE
 	0x00fe: 0x00da,	# LATIN CAPITAL LETTER U WITH ACUTE
 	0x00ff: 0x009f,	# CONTROL
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp737.py
+++ b/Lib/encodings/cp737.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP737.TXT'.
+""" Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x0391,	# GREEK CAPITAL LETTER ALPHA
 	0x0081: 0x0392,	# GREEK CAPITAL LETTER BETA
 	0x0082: 0x0393,	# GREEK CAPITAL LETTER GAMMA
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x00b2,	# SUPERSCRIPT TWO
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp775.py
+++ b/Lib/encodings/cp775.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP775.TXT'.
+""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x0106,	# LATIN CAPITAL LETTER C WITH ACUTE
 	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
 	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x00b2,	# SUPERSCRIPT TWO
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp850.py
+++ b/Lib/encodings/cp850.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP850.TXT'.
+""" Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
 	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
 	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x00b2,	# SUPERSCRIPT TWO
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp852.py
+++ b/Lib/encodings/cp852.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP852.TXT'.
+""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
 	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
 	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x0159,	# LATIN SMALL LETTER R WITH CARON
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp855.py
+++ b/Lib/encodings/cp855.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP855.TXT'.
+""" Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x0452,	# CYRILLIC SMALL LETTER DJE
 	0x0081: 0x0402,	# CYRILLIC CAPITAL LETTER DJE
 	0x0082: 0x0453,	# CYRILLIC SMALL LETTER GJE
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x00a7,	# SECTION SIGN
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp856.py
+++ b/Lib/encodings/cp856.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP856.TXT'.
+""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x05d0,	# HEBREW LETTER ALEF
 	0x0081: 0x05d1,	# HEBREW LETTER BET
 	0x0082: 0x05d2,	# HEBREW LETTER GIMEL
@ -120,10 +120,10 @@ def getregentry():
 	0x00d0: None,	# UNDEFINED
 	0x00d1: None,	# UNDEFINED
 	0x00d2: None,	# UNDEFINED
-	0x00d3: None,	# UNDEFINED
+	0x00d3: None,	# UNDEFINEDS
 	0x00d4: None,	# UNDEFINED
 	0x00d5: None,	# UNDEFINED
-	0x00d6: None,	# UNDEFINED
+	0x00d6: None,	# UNDEFINEDE
 	0x00d7: None,	# UNDEFINED
 	0x00d8: None,	# UNDEFINED
 	0x00d9: 0x2518,	# BOX DRAWINGS LIGHT UP AND LEFT
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x00b2,	# SUPERSCRIPT TWO
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp857.py
+++ b/Lib/encodings/cp857.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP857.TXT'.
+""" Python Character Mapping Codec generated from 'CP857.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
 	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
 	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
@ -164,7 +164,7 @@ def getregentry():
 	0x00fd: 0x00b2,	# SUPERSCRIPT TWO
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp860.py
+++ b/Lib/encodings/cp860.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP860.TXT'.
+""" Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
 	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
 	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x00b2,	# SUPERSCRIPT TWO
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp861.py
+++ b/Lib/encodings/cp861.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP861.TXT'.
+""" Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
 	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
 	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x00b2,	# SUPERSCRIPT TWO
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp862.py
+++ b/Lib/encodings/cp862.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP862.TXT'.
+""" Python Character Mapping Codec generated from 'CP862.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x05d0,	# HEBREW LETTER ALEF
 	0x0081: 0x05d1,	# HEBREW LETTER BET
 	0x0082: 0x05d2,	# HEBREW LETTER GIMEL
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x00b2,	# SUPERSCRIPT TWO
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp863.py
+++ b/Lib/encodings/cp863.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP863.TXT'.
+""" Python Character Mapping Codec generated from 'CP863.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
 	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
 	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x00b2,	# SUPERSCRIPT TWO
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp864.py
+++ b/Lib/encodings/cp864.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP864.TXT'.
+""" Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0025: 0x066a,	# ARABIC PERCENT SIGN
 	0x0080: 0x00b0,	# DEGREE SIGN
 	0x0081: 0x00b7,	# MIDDLE DOT
@ -163,7 +163,7 @@ def getregentry():
 	0x00fd: 0xfef1,	# ARABIC LETTER YEH ISOLATED FORM
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: None,	# UNDEFINED
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp865.py
+++ b/Lib/encodings/cp865.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP865.TXT'.
+""" Python Character Mapping Codec generated from 'CP865.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
 	0x0081: 0x00fc,	# LATIN SMALL LETTER U WITH DIAERESIS
 	0x0082: 0x00e9,	# LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x00b2,	# SUPERSCRIPT TWO
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp866.py
+++ b/Lib/encodings/cp866.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP866.TXT'.
+""" Python Character Mapping Codec generated from 'CP866.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x0410,	# CYRILLIC CAPITAL LETTER A
 	0x0081: 0x0411,	# CYRILLIC CAPITAL LETTER BE
 	0x0082: 0x0412,	# CYRILLIC CAPITAL LETTER VE
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x00a4,	# CURRENCY SIGN
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp869.py
+++ b/Lib/encodings/cp869.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP869.TXT'.
+""" Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: None,	# UNDEFINED
 	0x0081: None,	# UNDEFINED
 	0x0082: None,	# UNDEFINED
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x03ce,	# GREEK SMALL LETTER OMEGA WITH TONOS
 	0x00fe: 0x25a0,	# BLACK SQUARE
 	0x00ff: 0x00a0,	# NO-BREAK SPACE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp874.py
+++ b/Lib/encodings/cp874.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP874.TXT'.
+""" Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x20ac,	# EURO SIGN
 	0x0081: None,	# UNDEFINED
 	0x0082: None,	# UNDEFINED
@ -164,7 +164,7 @@ def getregentry():
 	0x00fd: None,	# UNDEFINED
 	0x00fe: None,	# UNDEFINED
 	0x00ff: None,	# UNDEFINED
-}
+})
 ### Encoding Map
--- a/Lib/encodings/cp875.py
+++ b/Lib/encodings/cp875.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP875.TXT'.
+""" Python Character Mapping Codec generated from 'CP875.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0004: 0x009c,	# CONTROL
 	0x0005: 0x0009,	# HORIZONTAL TABULATION
 	0x0006: 0x0086,	# CONTROL
@ -274,7 +274,7 @@ def getregentry():
 	0x00fd: 0x001a,	# SUBSTITUTE
 	0x00fe: 0x00bb,	# RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
 	0x00ff: 0x009f,	# CONTROL
-}
+})
 ### Encoding Map
--- a/Lib/encodings/iso8859_1.py
+++ b/Lib/encodings/iso8859_1.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-1.TXT'.
+""" Python Character Mapping Codec generated from '8859-1.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -22,10 +22,7 @@ def decode(self,input,errors='strict'):
        return codecs.charmap_decode(input,errors,decoding_map)
 class StreamWriter(Codec,codecs.StreamWriter):
-
+    pass
    def __init__(self,stream,errors='strict'):
        codecs.StreamWriter.__init__(self,strict,errors)
 class StreamReader(Codec,codecs.StreamReader):
    pass
@ -38,9 +35,9 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
-}
+})
 ### Encoding Map
--- a/Lib/encodings/iso8859_10.py
+++ b/Lib/encodings/iso8859_10.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-10.TXT'.
+""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00a1: 0x0104,	# 	LATIN CAPITAL LETTER A WITH OGONEK
 	0x00a2: 0x0112,	# 	LATIN CAPITAL LETTER E WITH MACRON
 	0x00a3: 0x0122,	# 	LATIN CAPITAL LETTER G WITH CEDILLA
@ -83,7 +83,7 @@ def getregentry():
 	0x00f7: 0x0169,	# 	LATIN SMALL LETTER U WITH TILDE
 	0x00f9: 0x0173,	# 	LATIN SMALL LETTER U WITH OGONEK
 	0x00ff: 0x0138,	# 	LATIN SMALL LETTER KRA
-}
+})
 ### Encoding Map
--- a/Lib/encodings/iso8859_13.py
+++ b/Lib/encodings/iso8859_13.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-13.TXT'.
+""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00a1: 0x201d,	# 	RIGHT DOUBLE QUOTATION MARK
 	0x00a5: 0x201e,	# 	DOUBLE LOW-9 QUOTATION MARK
 	0x00a8: 0x00d8,	# 	LATIN CAPITAL LETTER O WITH STROKE
@ -93,7 +93,7 @@ def getregentry():
 	0x00fd: 0x017c,	# 	LATIN SMALL LETTER Z WITH DOT ABOVE
 	0x00fe: 0x017e,	# 	LATIN SMALL LETTER Z WITH CARON
 	0x00ff: 0x2019,	# 	RIGHT SINGLE QUOTATION MARK
-}
+})
 ### Encoding Map
--- a/Lib/encodings/iso8859_14.py
+++ b/Lib/encodings/iso8859_14.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-14.TXT'.
+""" Python Character Mapping Codec generated from '8859-14.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00a1: 0x1e02,	# 	LATIN CAPITAL LETTER B WITH DOT ABOVE
 	0x00a2: 0x1e03,	# 	LATIN SMALL LETTER B WITH DOT ABOVE
 	0x00a4: 0x010a,	# 	LATIN CAPITAL LETTER C WITH DOT ABOVE
@ -68,7 +68,7 @@ def getregentry():
 	0x00f0: 0x0175,	# 	LATIN SMALL LETTER W WITH CIRCUMFLEX
 	0x00f7: 0x1e6b,	# 	LATIN SMALL LETTER T WITH DOT ABOVE
 	0x00fe: 0x0177,	# 	LATIN SMALL LETTER Y WITH CIRCUMFLEX
-}
+})
 ### Encoding Map
--- a/Lib/encodings/iso8859_15.py
+++ b/Lib/encodings/iso8859_15.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-15.TXT'.
+""" Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00a4: 0x20ac,	# 	EURO SIGN
 	0x00a6: 0x0160,	# 	LATIN CAPITAL LETTER S WITH CARON
 	0x00a8: 0x0161,	# 	LATIN SMALL LETTER S WITH CARON
@ -45,7 +45,7 @@ def getregentry():
 	0x00bc: 0x0152,	# 	LATIN CAPITAL LIGATURE OE
 	0x00bd: 0x0153,	# 	LATIN SMALL LIGATURE OE
 	0x00be: 0x0178,	# 	LATIN CAPITAL LETTER Y WITH DIAERESIS
-}
+})
 ### Encoding Map
--- a/Lib/encodings/iso8859_2.py
+++ b/Lib/encodings/iso8859_2.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-2.TXT'.
+""" Python Character Mapping Codec generated from '8859-2.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00a1: 0x0104,	# 	LATIN CAPITAL LETTER A WITH OGONEK
 	0x00a2: 0x02d8,	# 	BREVE
 	0x00a3: 0x0141,	# 	LATIN CAPITAL LETTER L WITH STROKE
@ -94,7 +94,7 @@ def getregentry():
 	0x00fb: 0x0171,	# 	LATIN SMALL LETTER U WITH DOUBLE ACUTE
 	0x00fe: 0x0163,	# 	LATIN SMALL LETTER T WITH CEDILLA
 	0x00ff: 0x02d9,	# 	DOT ABOVE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/iso8859_3.py
+++ b/Lib/encodings/iso8859_3.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-3.TXT'.
+""" Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,15 +35,17 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00a1: 0x0126,	# 	LATIN CAPITAL LETTER H WITH STROKE
 	0x00a2: 0x02d8,	# 	BREVE
 	0x00a5: None,
 	0x00a6: 0x0124,	# 	LATIN CAPITAL LETTER H WITH CIRCUMFLEX
 	0x00a9: 0x0130,	# 	LATIN CAPITAL LETTER I WITH DOT ABOVE
 	0x00aa: 0x015e,	# 	LATIN CAPITAL LETTER S WITH CEDILLA
 	0x00ab: 0x011e,	# 	LATIN CAPITAL LETTER G WITH BREVE
 	0x00ac: 0x0134,	# 	LATIN CAPITAL LETTER J WITH CIRCUMFLEX
 	0x00ae: None,
 	0x00af: 0x017b,	# 	LATIN CAPITAL LETTER Z WITH DOT ABOVE
 	0x00b1: 0x0127,	# 	LATIN SMALL LETTER H WITH STROKE
 	0x00b6: 0x0125,	# 	LATIN SMALL LETTER H WITH CIRCUMFLEX
@ -51,21 +53,26 @@ def getregentry():
 	0x00ba: 0x015f,	# 	LATIN SMALL LETTER S WITH CEDILLA
 	0x00bb: 0x011f,	# 	LATIN SMALL LETTER G WITH BREVE
 	0x00bc: 0x0135,	# 	LATIN SMALL LETTER J WITH CIRCUMFLEX
 	0x00be: None,
 	0x00bf: 0x017c,	# 	LATIN SMALL LETTER Z WITH DOT ABOVE
 	0x00c3: None,
 	0x00c5: 0x010a,	# 	LATIN CAPITAL LETTER C WITH DOT ABOVE
 	0x00c6: 0x0108,	# 	LATIN CAPITAL LETTER C WITH CIRCUMFLEX
 	0x00d0: None,
 	0x00d5: 0x0120,	# 	LATIN CAPITAL LETTER G WITH DOT ABOVE
 	0x00d8: 0x011c,	# 	LATIN CAPITAL LETTER G WITH CIRCUMFLEX
 	0x00dd: 0x016c,	# 	LATIN CAPITAL LETTER U WITH BREVE
 	0x00de: 0x015c,	# 	LATIN CAPITAL LETTER S WITH CIRCUMFLEX
 	0x00e3: None,
 	0x00e5: 0x010b,	# 	LATIN SMALL LETTER C WITH DOT ABOVE
 	0x00e6: 0x0109,	# 	LATIN SMALL LETTER C WITH CIRCUMFLEX
 	0x00f0: None,
 	0x00f5: 0x0121,	# 	LATIN SMALL LETTER G WITH DOT ABOVE
 	0x00f8: 0x011d,	# 	LATIN SMALL LETTER G WITH CIRCUMFLEX
 	0x00fd: 0x016d,	# 	LATIN SMALL LETTER U WITH BREVE
 	0x00fe: 0x015d,	# 	LATIN SMALL LETTER S WITH CIRCUMFLEX
 	0x00ff: 0x02d9,	# 	DOT ABOVE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/iso8859_4.py
+++ b/Lib/encodings/iso8859_4.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-4.TXT'.
+""" Python Character Mapping Codec generated from '8859-4.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00a1: 0x0104,	# 	LATIN CAPITAL LETTER A WITH OGONEK
 	0x00a2: 0x0138,	# 	LATIN SMALL LETTER KRA
 	0x00a3: 0x0156,	# 	LATIN CAPITAL LETTER R WITH CEDILLA
@ -87,7 +87,7 @@ def getregentry():
 	0x00fd: 0x0169,	# 	LATIN SMALL LETTER U WITH TILDE
 	0x00fe: 0x016b,	# 	LATIN SMALL LETTER U WITH MACRON
 	0x00ff: 0x02d9,	# 	DOT ABOVE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/iso8859_5.py
+++ b/Lib/encodings/iso8859_5.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-5.TXT'.
+""" Python Character Mapping Codec generated from '8859-5.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00a1: 0x0401,	# 	CYRILLIC CAPITAL LETTER IO
 	0x00a2: 0x0402,	# 	CYRILLIC CAPITAL LETTER DJE
 	0x00a3: 0x0403,	# 	CYRILLIC CAPITAL LETTER GJE
@ -131,7 +131,7 @@ def getregentry():
 	0x00fd: 0x00a7,	# 	SECTION SIGN
 	0x00fe: 0x045e,	# 	CYRILLIC SMALL LETTER SHORT U
 	0x00ff: 0x045f,	# 	CYRILLIC SMALL LETTER DZHE
-}
+})
 ### Encoding Map
--- a/Lib/encodings/iso8859_6.py
+++ b/Lib/encodings/iso8859_6.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-6.TXT'.
+""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,11 +35,38 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00a1: None,
 	0x00a2: None,
 	0x00a3: None,
 	0x00a5: None,
 	0x00a6: None,
 	0x00a7: None,
 	0x00a8: None,
 	0x00a9: None,
 	0x00aa: None,
 	0x00ab: None,
 	0x00ac: 0x060c,	# 	ARABIC COMMA
 	0x00ae: None,
 	0x00af: None,
 	0x00b0: None,
 	0x00b1: None,
 	0x00b2: None,
 	0x00b3: None,
 	0x00b4: None,
 	0x00b5: None,
 	0x00b6: None,
 	0x00b7: None,
 	0x00b8: None,
 	0x00b9: None,
 	0x00ba: None,
 	0x00bb: 0x061b,	# 	ARABIC SEMICOLON
 	0x00bc: None,
 	0x00bd: None,
 	0x00be: None,
 	0x00bf: 0x061f,	# 	ARABIC QUESTION MARK
 	0x00c0: None,
 	0x00c1: 0x0621,	# 	ARABIC LETTER HAMZA
 	0x00c2: 0x0622,	# 	ARABIC LETTER ALEF WITH MADDA ABOVE
 	0x00c3: 0x0623,	# 	ARABIC LETTER ALEF WITH HAMZA ABOVE
@ -66,6 +93,11 @@ def getregentry():
 	0x00d8: 0x0638,	# 	ARABIC LETTER ZAH
 	0x00d9: 0x0639,	# 	ARABIC LETTER AIN
 	0x00da: 0x063a,	# 	ARABIC LETTER GHAIN
 	0x00db: None,
 	0x00dc: None,
 	0x00dd: None,
 	0x00de: None,
 	0x00df: None,
 	0x00e0: 0x0640,	# 	ARABIC TATWEEL
 	0x00e1: 0x0641,	# 	ARABIC LETTER FEH
 	0x00e2: 0x0642,	# 	ARABIC LETTER QAF
@ -85,7 +117,20 @@ def getregentry():
 	0x00f0: 0x0650,	# 	ARABIC KASRA
 	0x00f1: 0x0651,	# 	ARABIC SHADDA
 	0x00f2: 0x0652,	# 	ARABIC SUKUN
-}
+	0x00f3: None,
 	0x00f4: None,
 	0x00f5: None,
 	0x00f6: None,
 	0x00f7: None,
 	0x00f8: None,
 	0x00f9: None,
 	0x00fa: None,
 	0x00fb: None,
 	0x00fc: None,
 	0x00fd: None,
 	0x00fe: None,
 	0x00ff: None,
 })
 ### Encoding Map
--- a/Lib/encodings/iso8859_7.py
+++ b/Lib/encodings/iso8859_7.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-7.TXT'.
+""" Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,10 +35,14 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00a1: 0x2018,	# 	LEFT SINGLE QUOTATION MARK
 	0x00a2: 0x2019,	# 	RIGHT SINGLE QUOTATION MARK
 	0x00a4: None,
 	0x00a5: None,
 	0x00aa: None,
 	0x00ae: None,
 	0x00af: 0x2015,	# 	HORIZONTAL BAR
 	0x00b4: 0x0384,	# 	GREEK TONOS
 	0x00b5: 0x0385,	# 	GREEK DIALYTIKA TONOS
@ -67,6 +71,7 @@ def getregentry():
 	0x00cf: 0x039f,	# 	GREEK CAPITAL LETTER OMICRON
 	0x00d0: 0x03a0,	# 	GREEK CAPITAL LETTER PI
 	0x00d1: 0x03a1,	# 	GREEK CAPITAL LETTER RHO
 	0x00d2: None,
 	0x00d3: 0x03a3,	# 	GREEK CAPITAL LETTER SIGMA
 	0x00d4: 0x03a4,	# 	GREEK CAPITAL LETTER TAU
 	0x00d5: 0x03a5,	# 	GREEK CAPITAL LETTER UPSILON
@ -111,7 +116,8 @@ def getregentry():
 	0x00fc: 0x03cc,	# 	GREEK SMALL LETTER OMICRON WITH TONOS
 	0x00fd: 0x03cd,	# 	GREEK SMALL LETTER UPSILON WITH TONOS
 	0x00fe: 0x03ce,	# 	GREEK SMALL LETTER OMEGA WITH TONOS
-}
+	0x00ff: None,
 })
 ### Encoding Map
--- a/Lib/encodings/iso8859_8.py
+++ b/Lib/encodings/iso8859_8.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-8.TXT'.
+""" Python Character Mapping Codec generated from '8859-8.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,11 +35,43 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00a1: None,
 	0x00aa: 0x00d7,	# 	MULTIPLICATION SIGN
 	0x00af: 0x203e,	# 	OVERLINE
 	0x00ba: 0x00f7,	# 	DIVISION SIGN
 	0x00bf: None,
 	0x00c0: None,
 	0x00c1: None,
 	0x00c2: None,
 	0x00c3: None,
 	0x00c4: None,
 	0x00c5: None,
 	0x00c6: None,
 	0x00c7: None,
 	0x00c8: None,
 	0x00c9: None,
 	0x00ca: None,
 	0x00cb: None,
 	0x00cc: None,
 	0x00cd: None,
 	0x00ce: None,
 	0x00cf: None,
 	0x00d0: None,
 	0x00d1: None,
 	0x00d2: None,
 	0x00d3: None,
 	0x00d4: None,
 	0x00d5: None,
 	0x00d6: None,
 	0x00d7: None,
 	0x00d8: None,
 	0x00d9: None,
 	0x00da: None,
 	0x00db: None,
 	0x00dc: None,
 	0x00dd: None,
 	0x00de: None,
 	0x00df: 0x2017,	# 	DOUBLE LOW LINE
 	0x00e0: 0x05d0,	# 	HEBREW LETTER ALEF
 	0x00e1: 0x05d1,	# 	HEBREW LETTER BET
@ -68,7 +100,12 @@ def getregentry():
 	0x00f8: 0x05e8,	# 	HEBREW LETTER RESH
 	0x00f9: 0x05e9,	# 	HEBREW LETTER SHIN
 	0x00fa: 0x05ea,	# 	HEBREW LETTER TAV
-}
+	0x00fb: None,
 	0x00fc: None,
 	0x00fd: 0x200e,	# 	LEFT-TO-RIGHT MARK
 	0x00fe: 0x200f,	# 	RIGHT-TO-LEFT MARK
 	0x00ff: None,
 })
 ### Encoding Map
--- a/Lib/encodings/iso8859_9.py
+++ b/Lib/encodings/iso8859_9.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-9.TXT'.
+""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,15 +35,15 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x00d0: 0x011e,	# 	LATIN CAPITAL LETTER G WITH BREVE
 	0x00dd: 0x0130,	# 	LATIN CAPITAL LETTER I WITH DOT ABOVE
 	0x00de: 0x015e,	# 	LATIN CAPITAL LETTER S WITH CEDILLA
 	0x00f0: 0x011f,	# 	LATIN SMALL LETTER G WITH BREVE
 	0x00fd: 0x0131,	# 	LATIN SMALL LETTER DOTLESS I
 	0x00fe: 0x015f,	# 	LATIN SMALL LETTER S WITH CEDILLA
-}
+})
 ### Encoding Map
--- a/Lib/encodings/koi8_r.py
+++ b/Lib/encodings/koi8_r.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'KOI8-R.TXT'.
+""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x2500,	# 	BOX DRAWINGS LIGHT HORIZONTAL
 	0x0081: 0x2502,	# 	BOX DRAWINGS LIGHT VERTICAL
 	0x0082: 0x250c,	# 	BOX DRAWINGS LIGHT DOWN AND RIGHT
@ -165,7 +165,7 @@ def getregentry():
 	0x00fd: 0x0429,	# 	CYRILLIC CAPITAL LETTER SHCHA
 	0x00fe: 0x0427,	# 	CYRILLIC CAPITAL LETTER CHE
 	0x00ff: 0x042a,	# 	CYRILLIC CAPITAL LETTER HARD SIGN
-}
+})
 ### Encoding Map
--- a/Lib/encodings/mac_cyrillic.py
+++ b/Lib/encodings/mac_cyrillic.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CYRILLIC.TXT'.
+""" Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x0410,	# CYRILLIC CAPITAL LETTER A
 	0x0081: 0x0411,	# CYRILLIC CAPITAL LETTER BE
 	0x0082: 0x0412,	# CYRILLIC CAPITAL LETTER VE
@ -160,7 +160,7 @@ def getregentry():
 	0x00fd: 0x044d,	# CYRILLIC SMALL LETTER E
 	0x00fe: 0x044e,	# CYRILLIC SMALL LETTER YU
 	0x00ff: 0x00a4,	# CURRENCY SIGN
-}
+})
 ### Encoding Map
--- a/Lib/encodings/mac_greek.py
+++ b/Lib/encodings/mac_greek.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'GREEK.TXT'.
+""" Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c4,	# LATIN CAPITAL LETTER A WITH DIAERESIS
 	0x0081: 0x00b9,	# SUPERSCRIPT ONE
 	0x0082: 0x00b2,	# SUPERSCRIPT TWO
@ -163,7 +163,7 @@ def getregentry():
 	0x00fd: 0x0390,	# GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
 	0x00fe: 0x03b0,	# GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
 	0x00ff: None,	# UNDEFINED
-}
+})
 ### Encoding Map
--- a/Lib/encodings/mac_iceland.py
+++ b/Lib/encodings/mac_iceland.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'ICELAND.TXT'.
+""" Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c4,	# LATIN CAPITAL LETTER A WITH DIAERESIS
 	0x0081: 0x00c5,	# LATIN CAPITAL LETTER A WITH RING ABOVE
 	0x0082: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
@ -159,7 +159,7 @@ def getregentry():
 	0x00fd: 0x02dd,	# DOUBLE ACUTE ACCENT
 	0x00fe: 0x02db,	# OGONEK
 	0x00ff: 0x02c7,	# CARON
-}
+})
 ### Encoding Map
--- a/Lib/encodings/mac_latin2.py
+++ b/Lib/encodings/mac_latin2.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'LATIN2.TXT'.
+""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c4,	# LATIN CAPITAL LETTER A WITH DIAERESIS
 	0x0081: 0x0100,	# LATIN CAPITAL LETTER A WITH MACRON
 	0x0082: 0x0101,	# LATIN SMALL LETTER A WITH MACRON
@ -163,7 +163,7 @@ def getregentry():
 	0x00fd: 0x017c,	# LATIN SMALL LETTER Z WITH DOT ABOVE
 	0x00fe: 0x0122,	# LATIN CAPITAL LETTER G WITH CEDILLA
 	0x00ff: 0x02c7,	# CARON
-}
+})
 ### Encoding Map
--- a/Lib/encodings/mac_roman.py
+++ b/Lib/encodings/mac_roman.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'ROMAN.TXT'.
+""" Python Character Mapping Codec generated from 'ROMAN.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c4,	# LATIN CAPITAL LETTER A WITH DIAERESIS
 	0x0081: 0x00c5,	# LATIN CAPITAL LETTER A WITH RING ABOVE
 	0x0082: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
@ -160,7 +160,7 @@ def getregentry():
 	0x00fd: 0x02dd,	# DOUBLE ACUTE ACCENT
 	0x00fe: 0x02db,	# OGONEK
 	0x00ff: 0x02c7,	# CARON
-}
+})
 ### Encoding Map
--- a/Lib/encodings/mac_turkish.py
+++ b/Lib/encodings/mac_turkish.py
@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'TURKISH.TXT'.
+""" Python Character Mapping Codec generated from 'TURKISH.TXT' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -35,8 +35,8 @@ def getregentry():
 ### Decoding Map
-decoding_map = {
+decoding_map = codecs.make_identity_dict(range(256))
-
+decoding_map.update({
 	0x0080: 0x00c4,	# LATIN CAPITAL LETTER A WITH DIAERESIS
 	0x0081: 0x00c5,	# LATIN CAPITAL LETTER A WITH RING ABOVE
 	0x0082: 0x00c7,	# LATIN CAPITAL LETTER C WITH CEDILLA
@ -160,7 +160,7 @@ def getregentry():
 	0x00fd: 0x02dd,	# DOUBLE ACUTE ACCENT
 	0x00fe: 0x02db,	# OGONEK
 	0x00ff: 0x02c7,	# CARON
-}
+})
 ### Encoding Map
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@ -494,14 +494,15 @@ def __str__(self):
    'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
    'cp863', 'cp865', 'cp866',
    'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
-    'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
+    'iso8859_2', 'iso8859_4', 'iso8859_5', 
-    'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
+    'iso8859_9', 'koi8_r', 'latin_1',
    'mac_cyrillic', 'mac_latin2',
    ### These have undefined mappings:
    #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
    #'cp1256', 'cp1257', 'cp1258',
    #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
    #'iso8859_3', 'iso8859_6', 'iso8859_7', 
    #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
    ### These fail the round-trip:
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -1970,11 +1970,11 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
 	Py_DECREF(w);
 	if (x == NULL) {
 	    if (PyErr_ExceptionMatches(PyExc_LookupError)) {
-		/* No mapping found: default to Latin-1 mapping */
+		/* No mapping found means: mapping is undefined. */
 		PyErr_Clear();
-		*p++ = (Py_UNICODE)ch;
+		x = Py_None;
-		continue;
+		Py_INCREF(x);
-	    }
+	    } else
 	    goto onError;
 	}
@ -2086,16 +2086,11 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
 	Py_DECREF(w);
 	if (x == NULL) {
 	    if (PyErr_ExceptionMatches(PyExc_LookupError)) {
-		/* No mapping found: default to Latin-1 mapping if possible */
+		/* No mapping found means: mapping is undefined. */
 		PyErr_Clear();
-		if (ch < 256) {
+		x = Py_None;
-		    *s++ = (char)ch;
+		Py_INCREF(x);
-		    continue;
+	    } else
 		}
 		else if (!charmap_encoding_error(&p, &s, errors,
 				     "missing character mapping"))
 		    continue;
 	    }
 	    goto onError;
 	}
--- a/Tools/scripts/gencodec.py
+++ b/Tools/scripts/gencodec.py
@ -1,9 +1,9 @@
 """ Unicode Mapping Parser and Codec Generator.
 This script parses Unicode mapping files as available from the Unicode
-site (ftp.unicode.org) and creates Python codec modules from them. The
+site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
-codecs use the standard character mapping codec to actually apply the
+modules from them. The codecs use the standard character mapping codec
-mapping.
+to actually apply the mapping.
 Synopsis: gencodec.py dir codec_prefix
@ -18,6 +18,7 @@
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright Guido van Rossum, 2000.
 """#"
@ -70,6 +71,10 @@ def readmap(filename,
    lines = f.readlines()
    f.close()
    enc2uni = {}
    identity = []
    unmapped = range(256)
    for i in range(256):
        unmapped[i] = i
    for line in lines:
        line = strip(line)
        if not line or line[0] == '#':
@ -85,8 +90,22 @@ def readmap(filename,
            comment = ''
        else:
            comment = comment[1:]
-        if enc != uni:
+        if enc < 256:
            unmapped.remove(enc)
            if enc == uni:
                identity.append(enc)
            else:
                enc2uni[enc] = (uni,comment)
        else:
            enc2uni[enc] = (uni,comment)
    # If there are more identity-mapped entries than unmapped entries,
    # it pays to generate an identity dictionary first, add add explicit
    # mappings to None for the rest
    if len(identity)>=len(unmapped):
        for enc in unmapped:
            enc2uni[enc] = (None, "")
        enc2uni['IDENTITY'] = 256
    return enc2uni
 def hexrepr(t,
@ -143,11 +162,12 @@ def codegen(name,map,comments=1):
    """
    l = [
        '''\
-""" Python Character Mapping Codec generated from '%s'.
+""" Python Character Mapping Codec generated from '%s' with gencodec.py.
 Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 (c) Copyright 2000 Guido van Rossum.
 """#"
@ -178,15 +198,23 @@ def getregentry():
    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
 ### Decoding Map
 decoding_map = {
 ''' % name,
        ]
    if map.has_key("IDENTITY"):
        l.append("decoding_map = codecs.make_identity_dict(range(%d))"
                 % map["IDENTITY"])
        l.append("decoding_map.update({")
        splits = 1
        del map["IDENTITY"]
    else:
        l.append("decoding_map = {")
        splits = 0
    mappings = map.items()
    mappings.sort()
    append = l.append
    i = 0
    splits = 0
    for e,value in mappings:
        try:
            (u,c) = value
@ -198,7 +226,7 @@ def getregentry():
            append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c))
        else:
            append('\t%s: %s,' % (key,unicoderepr(u)))
-        i = i + 1
+        i += 1
        if i == 4096:
            # Split the definition into parts to that the Python
            # parser doesn't dump core
@ -206,7 +234,7 @@ def getregentry():
                append('}')
            else:
                append('})')
-            append('map.update({')
+            append('decoding_map.update({')
            i = 0
            splits = splits + 1
    if splits == 0:
@ -265,7 +293,7 @@ def rewritepythondir(dir,prefix='',comments=1):
    mapnames = os.listdir(dir)
    for mapname in mapnames:
-        if mapname[-len('.mapping'):] != '.mapping':
+        if not mapname.endswith('.mapping'):
            continue
        codefile = mapname[:-len('.mapping')] + '.py'
        print 'converting %s to %s' % (mapname,