gh-139489: Add xml.is_valid_name() (GH-139768)

It allows to check whether a string can be used as an element or attribute
name in XML.
This commit is contained in:
Serhiy Storchaka 2026-05-05 11:25:22 +03:00 committed by GitHub
parent 02d02f44fe
commit a76d9573e4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 79 additions and 1 deletions

View file

@ -41,6 +41,21 @@ The XML handling submodules are:
* :mod:`xml.sax`: SAX2 base classes and convenience functions
* :mod:`xml.parsers.expat`: the Expat parser binding
This module also defines utility functions.
.. function:: is_valid_name(name)
Return ``True`` if the string is a valid element or attribute name,
``False`` otherwise.
Almost all characters are permitted in names, except control characters and
those which either are or reasonably could be used as delimiters.
Characters like ":", "-", ".", "_", and "·" are permitted, but "<", "/",
"!", "?", and "=" are forbidden.
The name cannot start with a digit or a character like "-", ".", and "·".
..versionadded:: next
.. _xml-security:
.. _xml-vulnerabilities:

View file

@ -1653,6 +1653,14 @@ wave
(Contributed by Lionel Koenig and Michiel W. Beijen in :gh:`60729`.)
xml
---
* Add the :func:`xml.is_valid_name` function, which allows to check
whether a string can be used as an element or attribute name in XML.
(Contributed by Serhiy Storchaka in :gh:`139489`.)
xml.parsers.expat
-----------------

27
Lib/test/test_xml.py Normal file
View file

@ -0,0 +1,27 @@
import xml
import unittest
class TestUtils(unittest.TestCase):
def test_is_valid_name(self):
is_valid_name = xml.is_valid_name
self.assertFalse(is_valid_name(''))
self.assertTrue(is_valid_name('name'))
self.assertTrue(is_valid_name('NAME'))
self.assertTrue(is_valid_name('name0:-._·'))
self.assertTrue(is_valid_name('_'))
self.assertTrue(is_valid_name(':'))
self.assertTrue(is_valid_name('Ñàḿĕ'))
self.assertTrue(is_valid_name('\U000EFFFF'))
self.assertFalse(is_valid_name('0'))
self.assertFalse(is_valid_name('-'))
self.assertFalse(is_valid_name('.'))
self.assertFalse(is_valid_name('·'))
self.assertFalse(is_valid_name('na me'))
for c in '<>/!?=\x00\x01\x7f\ud800\udfff\ufffe\uffff\U000F0000':
self.assertFalse(is_valid_name('name' + c))
if __name__ == '__main__':
unittest.main()

View file

@ -16,5 +16,6 @@
"""
from .utils import *
__all__ = ["dom", "parsers", "sax", "etree"]
__all__ = ["dom", "parsers", "sax", "etree", "is_valid_name"]

25
Lib/xml/utils.py Normal file
View file

@ -0,0 +1,25 @@
lazy import re as _re
def is_valid_name(name):
"""Test whether a string is a valid element or attribute name."""
# https://www.w3.org/TR/xml/#NT-Name
return _re.fullmatch(
# NameStartChar
'['
':A-Z_a-z'
'\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF'
'\u200C\u200D'
'\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF'
'\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF'
']'
# NameChar
'['
r'\-.0-9:A-Z_a-z'
'\xB7'
'\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF'
'\u200C\u200D\u203F\u2040'
'\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF'
'\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF'
']*+',
name) is not None

View file

@ -0,0 +1,2 @@
Add the :func:`xml.is_valid_name` function, which allows to check
whether a string can be used as an element or attribute name in XML.