Issue #4008: Fix problems with non-ASCII source files.

This commit is contained in:
Martin v. Löwis 2009-01-18 20:15:42 +00:00
parent 1cbb17a818
commit 975a079794
3 changed files with 21 additions and 12 deletions

View file

@ -74,10 +74,11 @@ def coding_spec(data):
Raise a LookupError if the encoding is declared but unknown. Raise a LookupError if the encoding is declared but unknown.
""" """
if isinstance(data, bytes): if isinstance(data, bytes):
try: # This encoding might be wrong. However, the coding
lines = data.decode('utf-8') # spec must be ASCII-only, so any non-ASCII characters
except UnicodeDecodeError: # around here will be ignored. Decoding to Latin-1 should
return None # never fail (except for memory outage)
lines = data.decode('iso-8859-1')
else: else:
lines = data lines = data
# consider only the first two lines # consider only the first two lines

View file

@ -3,6 +3,8 @@ What's New in IDLE 3.1a1?
*Release date: XX-XXX-XXXX* *Release date: XX-XXX-XXXX*
- Issue #4008: Fix problems with non-ASCII source files.
- Issue #4323: Always encode source as UTF-8 without asking - Issue #4323: Always encode source as UTF-8 without asking
the user (unless a different encoding is declared); remove the user (unless a different encoding is declared); remove
user configuration of source encoding; all according to user configuration of source encoding; all according to

View file

@ -24,7 +24,7 @@
import tokenize import tokenize
import tkinter.messagebox as tkMessageBox import tkinter.messagebox as tkMessageBox
from idlelib.EditorWindow import EditorWindow from idlelib.EditorWindow import EditorWindow
from idlelib import PyShell from idlelib import PyShell, IOBinding
from idlelib.configHandler import idleConf from idlelib.configHandler import idleConf
@ -62,7 +62,13 @@ def check_module_event(self, event):
return 'break' return 'break'
def tabnanny(self, filename): def tabnanny(self, filename):
f = open(filename, 'r') # XXX: tabnanny should work on binary files as well
with open(filename, 'r', encoding='iso-8859-1') as f:
two_lines = f.readline() + f.readline()
encoding = IOBinding.coding_spec(two_lines)
if not encoding:
encoding = 'utf-8'
f = open(filename, 'r', encoding=encoding)
try: try:
tabnanny.process_tokens(tokenize.generate_tokens(f.readline)) tabnanny.process_tokens(tokenize.generate_tokens(f.readline))
except tokenize.TokenError as msg: except tokenize.TokenError as msg:
@ -82,14 +88,14 @@ def checksyntax(self, filename):
self.shell = shell = self.flist.open_shell() self.shell = shell = self.flist.open_shell()
saved_stream = shell.get_warning_stream() saved_stream = shell.get_warning_stream()
shell.set_warning_stream(shell.stderr) shell.set_warning_stream(shell.stderr)
f = open(filename, 'r') f = open(filename, 'rb')
source = f.read() source = f.read()
f.close() f.close()
if '\r' in source: if b'\r' in source:
source = re.sub(r"\r\n", "\n", source) source = source.replace(b'\r\n', b'\n')
source = re.sub(r"\r", "\n", source) source = source.replace(b'\r', b'\n')
if source and source[-1] != '\n': if source and source[-1] != ord(b'\n'):
source = source + '\n' source = source + b'\n'
editwin = self.editwin editwin = self.editwin
text = editwin.text text = editwin.text
text.tag_remove("ERROR", "1.0", "end") text.tag_remove("ERROR", "1.0", "end")