diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index b46f82985a8..304bbdbff0e 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -14,16 +14,28 @@ # Regular expressions used for parsing -incomplete = regex.compile( - '') +interesting = regex.compile('[&<]') +incomplete = regex.compile('&\([a-zA-Z][a-zA-Z0-9]*\|#[0-9]*\)?\|' + '<\([a-zA-Z][^<>]*\|' + '/\([a-zA-Z][^<>]*\)?\|' + '![^<>]*\)?') + +entityref = regex.compile('&\([a-zA-Z][a-zA-Z0-9]*\)[^a-zA-Z0-9]') +charref = regex.compile('&#\([0-9]+\)[^0-9]') + +starttagopen = regex.compile('<[>a-zA-Z]') +shorttagopen = regex.compile('<[a-zA-Z][a-zA-Z0-9]*/') +shorttag = regex.compile('<\([a-zA-Z][a-zA-Z0-9]*\)/\([^/]*\)/') +endtagopen = regex.compile('a-zA-Z]') +endbracket = regex.compile('[<>]') special = regex.compile(']*>') commentopen = regex.compile('