mirror of
https://github.com/python/cpython.git
synced 2025-11-01 06:01:29 +00:00
Issue #4008: Fix problems with non-ASCII source files.
This commit is contained in:
parent
1cbb17a818
commit
975a079794
3 changed files with 21 additions and 12 deletions
|
|
@ -74,10 +74,11 @@ def coding_spec(data):
|
||||||
Raise a LookupError if the encoding is declared but unknown.
|
Raise a LookupError if the encoding is declared but unknown.
|
||||||
"""
|
"""
|
||||||
if isinstance(data, bytes):
|
if isinstance(data, bytes):
|
||||||
try:
|
# This encoding might be wrong. However, the coding
|
||||||
lines = data.decode('utf-8')
|
# spec must be ASCII-only, so any non-ASCII characters
|
||||||
except UnicodeDecodeError:
|
# around here will be ignored. Decoding to Latin-1 should
|
||||||
return None
|
# never fail (except for memory outage)
|
||||||
|
lines = data.decode('iso-8859-1')
|
||||||
else:
|
else:
|
||||||
lines = data
|
lines = data
|
||||||
# consider only the first two lines
|
# consider only the first two lines
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ What's New in IDLE 3.1a1?
|
||||||
|
|
||||||
*Release date: XX-XXX-XXXX*
|
*Release date: XX-XXX-XXXX*
|
||||||
|
|
||||||
|
- Issue #4008: Fix problems with non-ASCII source files.
|
||||||
|
|
||||||
- Issue #4323: Always encode source as UTF-8 without asking
|
- Issue #4323: Always encode source as UTF-8 without asking
|
||||||
the user (unless a different encoding is declared); remove
|
the user (unless a different encoding is declared); remove
|
||||||
user configuration of source encoding; all according to
|
user configuration of source encoding; all according to
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@
|
||||||
import tokenize
|
import tokenize
|
||||||
import tkinter.messagebox as tkMessageBox
|
import tkinter.messagebox as tkMessageBox
|
||||||
from idlelib.EditorWindow import EditorWindow
|
from idlelib.EditorWindow import EditorWindow
|
||||||
from idlelib import PyShell
|
from idlelib import PyShell, IOBinding
|
||||||
|
|
||||||
from idlelib.configHandler import idleConf
|
from idlelib.configHandler import idleConf
|
||||||
|
|
||||||
|
|
@ -62,7 +62,13 @@ def check_module_event(self, event):
|
||||||
return 'break'
|
return 'break'
|
||||||
|
|
||||||
def tabnanny(self, filename):
|
def tabnanny(self, filename):
|
||||||
f = open(filename, 'r')
|
# XXX: tabnanny should work on binary files as well
|
||||||
|
with open(filename, 'r', encoding='iso-8859-1') as f:
|
||||||
|
two_lines = f.readline() + f.readline()
|
||||||
|
encoding = IOBinding.coding_spec(two_lines)
|
||||||
|
if not encoding:
|
||||||
|
encoding = 'utf-8'
|
||||||
|
f = open(filename, 'r', encoding=encoding)
|
||||||
try:
|
try:
|
||||||
tabnanny.process_tokens(tokenize.generate_tokens(f.readline))
|
tabnanny.process_tokens(tokenize.generate_tokens(f.readline))
|
||||||
except tokenize.TokenError as msg:
|
except tokenize.TokenError as msg:
|
||||||
|
|
@ -82,14 +88,14 @@ def checksyntax(self, filename):
|
||||||
self.shell = shell = self.flist.open_shell()
|
self.shell = shell = self.flist.open_shell()
|
||||||
saved_stream = shell.get_warning_stream()
|
saved_stream = shell.get_warning_stream()
|
||||||
shell.set_warning_stream(shell.stderr)
|
shell.set_warning_stream(shell.stderr)
|
||||||
f = open(filename, 'r')
|
f = open(filename, 'rb')
|
||||||
source = f.read()
|
source = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
if '\r' in source:
|
if b'\r' in source:
|
||||||
source = re.sub(r"\r\n", "\n", source)
|
source = source.replace(b'\r\n', b'\n')
|
||||||
source = re.sub(r"\r", "\n", source)
|
source = source.replace(b'\r', b'\n')
|
||||||
if source and source[-1] != '\n':
|
if source and source[-1] != ord(b'\n'):
|
||||||
source = source + '\n'
|
source = source + b'\n'
|
||||||
editwin = self.editwin
|
editwin = self.editwin
|
||||||
text = editwin.text
|
text = editwin.text
|
||||||
text.tag_remove("ERROR", "1.0", "end")
|
text.tag_remove("ERROR", "1.0", "end")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue