Deprecate/warn usage of yaml.load(input)

The `load` and `load_all` methods will issue a warning when they are
called without the 'Loader=' parameter. The warning will point to a URL
that is always up to date with the latest information on the usage of
`load`.

There are several ways to stop the warning:

* Use `full_load(input)` - sugar for `yaml.load(input, FullLoader)`
  * FullLoader is the new safe but complete loader class
* Use `safe_load(input)` - sugar for `yaml.load(input, SafeLoader)`
  * Make sure your input YAML consists of the 'safe' subset
* Use `unsafe_load(input)` - sugar for `yaml.load(input, UnsafeLoader)`
  * Make sure your input YAML consists of the 'safe' subset
* Use `yaml.load(input, Loader=yaml.<loader>)`
  * Or shorter `yaml.load(input, yaml.<loader>)`
  * Where '<loader>' can be:
    * FullLoader - safe, complete Python YAML loading
    * SafeLoader - safe, partial Python YAML loading
    * UnsafeLoader - more explicit name for the old, unsafe 'Loader' class
* yaml.warnings({'YAMLLoadWarning': False})
  * Use this when you use third party modules that use `yaml.load(input)`
  * Only do this if input is trusted

The above `load()` expressions all have `load_all()` counterparts.

You can get the original unsafe behavior with:
* `yaml.unsafe_load(input)`
* `yaml.load(input, Loader=yaml.UnsafeLoader)`

In a future release, `yaml.load(input)` will raise an exception.

The new loader called FullLoader is almost entirely complete as
Loader/UnsafeLoader but it does it avoids all known code execution
paths. It is the preferred YAML loader, and the current default for
`yaml.load(input)` when you get the warning.

Here are some of the exploits that can be triggered with UnsafeLoader
but not with FullLoader:
```
python -c 'import os, yaml; yaml.full_load("!!python/object/new:os.system [echo EXPLOIT!]")'`
python -c 'import yaml; print yaml.full_load("!!python/object/new:abs [-5]")'
python -c 'import yaml; yaml.full_load("!!python/object/new:eval [exit(5)]")' ; echo $?
python -c 'import yaml; yaml.full_load("!!python/object/new:exit [5]")' ; echo $?
This commit is contained in:
Ingy döt Net 2019-02-17 19:22:58 -08:00
parent d13a3d0f96
commit 0cedb2a069
8 changed files with 228 additions and 64 deletions

View file

@ -8,7 +8,7 @@ from nodes import *
from loader import * from loader import *
from dumper import * from dumper import *
__version__ = '4.1' __version__ = '3.13'
try: try:
from cyaml import * from cyaml import *
@ -16,6 +16,45 @@ try:
except ImportError: except ImportError:
__with_libyaml__ = False __with_libyaml__ = False
#------------------------------------------------------------------------------
# Warnings control
#------------------------------------------------------------------------------
# 'Global' warnings state:
_warnings_enabled = {
'YAMLLoadWarning': True,
}
# Get or set global warnings' state
def warnings(settings=None):
if settings is None:
return _warnings_enabled
if type(settings) is dict:
for key in settings:
if key in _warnings_enabled:
_warnings_enabled[key] = settings[key]
# Warn when load() is called without Loader=...
class YAMLLoadWarning(RuntimeWarning):
pass
def load_warning(method):
if _warnings_enabled['YAMLLoadWarning'] is False:
return
import warnings
message = (
"calling yaml.%s() without Loader=... is deprecated, as the "
"default Loader is unsafe. Please read "
"https://msg.pyyaml.org/load for full details."
) % method
warnings.warn(message, YAMLLoadWarning, stacklevel=3)
#------------------------------------------------------------------------------
def scan(stream, Loader=Loader): def scan(stream, Loader=Loader):
""" """
Scan a YAML stream and produce scanning tokens. Scan a YAML stream and produce scanning tokens.
@ -61,22 +100,30 @@ def compose_all(stream, Loader=Loader):
finally: finally:
loader.dispose() loader.dispose()
def load(stream, Loader=Loader): def load(stream, Loader=None):
""" """
Parse the first YAML document in a stream Parse the first YAML document in a stream
and produce the corresponding Python object. and produce the corresponding Python object.
""" """
if Loader is None:
load_warning('load')
Loader = FullLoader
loader = Loader(stream) loader = Loader(stream)
try: try:
return loader.get_single_data() return loader.get_single_data()
finally: finally:
loader.dispose() loader.dispose()
def load_all(stream, Loader=Loader): def load_all(stream, Loader=None):
""" """
Parse all YAML documents in a stream Parse all YAML documents in a stream
and produce corresponding Python objects. and produce corresponding Python objects.
""" """
if Loader is None:
load_warning('load_all')
Loader = FullLoader
loader = Loader(stream) loader = Loader(stream)
try: try:
while loader.check_data(): while loader.check_data():
@ -84,11 +131,33 @@ def load_all(stream, Loader=Loader):
finally: finally:
loader.dispose() loader.dispose()
def full_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve all tags except those known to be
unsafe on untrusted input.
"""
return load(stream, FullLoader)
def full_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve all tags except those known to be
unsafe on untrusted input.
"""
return load_all(stream, FullLoader)
def safe_load(stream): def safe_load(stream):
""" """
Parse the first YAML document in a stream Parse the first YAML document in a stream
and produce the corresponding Python object. and produce the corresponding Python object.
Resolve only basic YAML tags.
Resolve only basic YAML tags. This is known
to be safe for untrusted input.
""" """
return load(stream, SafeLoader) return load(stream, SafeLoader)
@ -96,10 +165,32 @@ def safe_load_all(stream):
""" """
Parse all YAML documents in a stream Parse all YAML documents in a stream
and produce corresponding Python objects. and produce corresponding Python objects.
Resolve only basic YAML tags.
Resolve only basic YAML tags. This is known
to be safe for untrusted input.
""" """
return load_all(stream, SafeLoader) return load_all(stream, SafeLoader)
def unsafe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve all tags, even those known to be
unsafe on untrusted input.
"""
return load(stream, UnsafeLoader)
def unsafe_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve all tags, even those known to be
unsafe on untrusted input.
"""
return load_all(stream, UnsafeLoader)
def emit(events, stream=None, Dumper=Dumper, def emit(events, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None, canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None): allow_unicode=None, line_break=None):

View file

@ -1,6 +1,12 @@
__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', __all__ = [
'ConstructorError'] 'BaseConstructor',
'SafeConstructor',
'FullConstructor',
'UnsafeConstructor',
'Constructor',
'ConstructorError'
]
from error import * from error import *
from nodes import * from nodes import *
@ -464,7 +470,7 @@ SafeConstructor.add_constructor(
SafeConstructor.add_constructor(None, SafeConstructor.add_constructor(None,
SafeConstructor.construct_undefined) SafeConstructor.construct_undefined)
class Constructor(SafeConstructor): class FullConstructor(SafeConstructor):
def construct_python_str(self, node): def construct_python_str(self, node):
return self.construct_scalar(node).encode('utf-8') return self.construct_scalar(node).encode('utf-8')
@ -481,18 +487,22 @@ class Constructor(SafeConstructor):
def construct_python_tuple(self, node): def construct_python_tuple(self, node):
return tuple(self.construct_sequence(node)) return tuple(self.construct_sequence(node))
def find_python_module(self, name, mark): def find_python_module(self, name, mark, unsafe=False):
if not name: if not name:
raise ConstructorError("while constructing a Python module", mark, raise ConstructorError("while constructing a Python module", mark,
"expected non-empty name appended to the tag", mark) "expected non-empty name appended to the tag", mark)
if unsafe:
try: try:
__import__(name) __import__(name)
except ImportError, exc: except ImportError, exc:
raise ConstructorError("while constructing a Python module", mark, raise ConstructorError("while constructing a Python module", mark,
"cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark) "cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark)
if not name in sys.modules:
raise ConstructorError("while constructing a Python module", mark,
"module %r is not imported" % name.encode('utf-8'), mark)
return sys.modules[name] return sys.modules[name]
def find_python_name(self, name, mark): def find_python_name(self, name, mark, unsafe=False):
if not name: if not name:
raise ConstructorError("while constructing a Python object", mark, raise ConstructorError("while constructing a Python object", mark,
"expected non-empty name appended to the tag", mark) "expected non-empty name appended to the tag", mark)
@ -501,11 +511,15 @@ class Constructor(SafeConstructor):
else: else:
module_name = '__builtin__' module_name = '__builtin__'
object_name = name object_name = name
if unsafe:
try: try:
__import__(module_name) __import__(module_name)
except ImportError, exc: except ImportError, exc:
raise ConstructorError("while constructing a Python object", mark, raise ConstructorError("while constructing a Python object", mark,
"cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark) "cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark)
if not module_name in sys.modules:
raise ConstructorError("while constructing a Python object", mark,
"module %r is not imported" % module_name.encode('utf-8'), mark)
module = sys.modules[module_name] module = sys.modules[module_name]
if not hasattr(module, object_name): if not hasattr(module, object_name):
raise ConstructorError("while constructing a Python object", mark, raise ConstructorError("while constructing a Python object", mark,
@ -532,12 +546,16 @@ class Constructor(SafeConstructor):
class classobj: pass class classobj: pass
def make_python_instance(self, suffix, node, def make_python_instance(self, suffix, node,
args=None, kwds=None, newobj=False): args=None, kwds=None, newobj=False, unsafe=False):
if not args: if not args:
args = [] args = []
if not kwds: if not kwds:
kwds = {} kwds = {}
cls = self.find_python_name(suffix, node.start_mark) cls = self.find_python_name(suffix, node.start_mark)
if not (unsafe or isinstance(cls, type) or isinstance(cls, type(self.classobj))):
raise ConstructorError("while constructing a Python instance", node.start_mark,
"expected a class, but found %r" % type(cls),
node.start_mark)
if newobj and isinstance(cls, type(self.classobj)) \ if newobj and isinstance(cls, type(self.classobj)) \
and not args and not kwds: and not args and not kwds:
instance = self.classobj() instance = self.classobj()
@ -609,67 +627,83 @@ class Constructor(SafeConstructor):
def construct_python_object_new(self, suffix, node): def construct_python_object_new(self, suffix, node):
return self.construct_python_object_apply(suffix, node, newobj=True) return self.construct_python_object_apply(suffix, node, newobj=True)
Constructor.add_constructor( FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/none', u'tag:yaml.org,2002:python/none',
Constructor.construct_yaml_null) FullConstructor.construct_yaml_null)
Constructor.add_constructor( FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/bool', u'tag:yaml.org,2002:python/bool',
Constructor.construct_yaml_bool) FullConstructor.construct_yaml_bool)
Constructor.add_constructor( FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/str', u'tag:yaml.org,2002:python/str',
Constructor.construct_python_str) FullConstructor.construct_python_str)
Constructor.add_constructor( FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/unicode', u'tag:yaml.org,2002:python/unicode',
Constructor.construct_python_unicode) FullConstructor.construct_python_unicode)
Constructor.add_constructor( FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/int', u'tag:yaml.org,2002:python/int',
Constructor.construct_yaml_int) FullConstructor.construct_yaml_int)
Constructor.add_constructor( FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/long', u'tag:yaml.org,2002:python/long',
Constructor.construct_python_long) FullConstructor.construct_python_long)
Constructor.add_constructor( FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/float', u'tag:yaml.org,2002:python/float',
Constructor.construct_yaml_float) FullConstructor.construct_yaml_float)
Constructor.add_constructor( FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/complex', u'tag:yaml.org,2002:python/complex',
Constructor.construct_python_complex) FullConstructor.construct_python_complex)
Constructor.add_constructor( FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/list', u'tag:yaml.org,2002:python/list',
Constructor.construct_yaml_seq) FullConstructor.construct_yaml_seq)
Constructor.add_constructor( FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/tuple', u'tag:yaml.org,2002:python/tuple',
Constructor.construct_python_tuple) FullConstructor.construct_python_tuple)
Constructor.add_constructor( FullConstructor.add_constructor(
u'tag:yaml.org,2002:python/dict', u'tag:yaml.org,2002:python/dict',
Constructor.construct_yaml_map) FullConstructor.construct_yaml_map)
Constructor.add_multi_constructor( FullConstructor.add_multi_constructor(
u'tag:yaml.org,2002:python/name:', u'tag:yaml.org,2002:python/name:',
Constructor.construct_python_name) FullConstructor.construct_python_name)
Constructor.add_multi_constructor( FullConstructor.add_multi_constructor(
u'tag:yaml.org,2002:python/module:', u'tag:yaml.org,2002:python/module:',
Constructor.construct_python_module) FullConstructor.construct_python_module)
Constructor.add_multi_constructor( FullConstructor.add_multi_constructor(
u'tag:yaml.org,2002:python/object:', u'tag:yaml.org,2002:python/object:',
Constructor.construct_python_object) FullConstructor.construct_python_object)
Constructor.add_multi_constructor( FullConstructor.add_multi_constructor(
u'tag:yaml.org,2002:python/object/apply:', u'tag:yaml.org,2002:python/object/apply:',
Constructor.construct_python_object_apply) FullConstructor.construct_python_object_apply)
Constructor.add_multi_constructor( FullConstructor.add_multi_constructor(
u'tag:yaml.org,2002:python/object/new:', u'tag:yaml.org,2002:python/object/new:',
Constructor.construct_python_object_new) FullConstructor.construct_python_object_new)
class UnsafeConstructor(FullConstructor):
def find_python_module(self, name, mark):
return super(UnsafeConstructor, self).find_python_module(name, mark, unsafe=True)
def find_python_name(self, name, mark):
return super(UnsafeConstructor, self).find_python_name(name, mark, unsafe=True)
def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False):
return super(UnsafeConstructor, self).make_python_instance(
suffix, node, args, kwds, newobj, unsafe=True)
# Constructor is same as UnsafeConstructor. Need to leave this in place in case
# people have extended it directly.
class Constructor(UnsafeConstructor):
pass

View file

@ -1,6 +1,8 @@
__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', __all__ = [
'CBaseDumper', 'CSafeDumper', 'CDumper'] 'CBaseLoader', 'CSafeLoader', 'CFullLoader', 'CUnsafeLoader', 'CLoader',
'CBaseDumper', 'CSafeDumper', 'CDumper'
]
from _yaml import CParser, CEmitter from _yaml import CParser, CEmitter
@ -25,6 +27,20 @@ class CSafeLoader(CParser, SafeConstructor, Resolver):
SafeConstructor.__init__(self) SafeConstructor.__init__(self)
Resolver.__init__(self) Resolver.__init__(self)
class CFullLoader(CParser, FullConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
FullConstructor.__init__(self)
Resolver.__init__(self)
class CUnsafeLoader(CParser, UnsafeConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
UnsafeConstructor.__init__(self)
Resolver.__init__(self)
class CLoader(CParser, Constructor, Resolver): class CLoader(CParser, Constructor, Resolver):
def __init__(self, stream): def __init__(self, stream):

View file

@ -1,5 +1,5 @@
__all__ = ['BaseLoader', 'SafeLoader', 'Loader'] __all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader']
from reader import * from reader import *
from scanner import * from scanner import *
@ -18,6 +18,16 @@ class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolve
BaseConstructor.__init__(self) BaseConstructor.__init__(self)
BaseResolver.__init__(self) BaseResolver.__init__(self)
class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
FullConstructor.__init__(self)
Resolver.__init__(self)
class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):
def __init__(self, stream): def __init__(self, stream):
@ -38,3 +48,16 @@ class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
Constructor.__init__(self) Constructor.__init__(self)
Resolver.__init__(self) Resolver.__init__(self)
# UnsafeLoader is the same as Loader (which is and was always unsafe on
# untrusted input). Use of either Loader or UnsafeLoader should be rare, since
# FullLoad should be able to load almost all YAML safely. Loader is left intact
# to ensure backwards compatability.
class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)

View file

@ -1,6 +1,6 @@
NAME = 'PyYAML' NAME = 'PyYAML'
VERSION = '4.1' VERSION = '3.13'
DESCRIPTION = "YAML parser and emitter for Python" DESCRIPTION = "YAML parser and emitter for Python"
LONG_DESCRIPTION = """\ LONG_DESCRIPTION = """\
YAML is a data serialization format designed for human readability YAML is a data serialization format designed for human readability

View file

@ -3,7 +3,7 @@ import yaml, test_emitter
def test_loader_error(error_filename, verbose=False): def test_loader_error(error_filename, verbose=False):
try: try:
list(yaml.load_all(open(error_filename, 'rb'))) list(yaml.load_all(open(error_filename, 'rb'), yaml.FullLoader))
except yaml.YAMLError, exc: except yaml.YAMLError, exc:
if verbose: if verbose:
print "%s:" % exc.__class__.__name__, exc print "%s:" % exc.__class__.__name__, exc
@ -14,7 +14,7 @@ test_loader_error.unittest = ['.loader-error']
def test_loader_error_string(error_filename, verbose=False): def test_loader_error_string(error_filename, verbose=False):
try: try:
list(yaml.load_all(open(error_filename, 'rb').read())) list(yaml.load_all(open(error_filename, 'rb').read(), yaml.FullLoader))
except yaml.YAMLError, exc: except yaml.YAMLError, exc:
if verbose: if verbose:
print "%s:" % exc.__class__.__name__, exc print "%s:" % exc.__class__.__name__, exc
@ -25,7 +25,7 @@ test_loader_error_string.unittest = ['.loader-error']
def test_loader_error_single(error_filename, verbose=False): def test_loader_error_single(error_filename, verbose=False):
try: try:
yaml.load(open(error_filename, 'rb').read()) yaml.load(open(error_filename, 'rb').read(), yaml.FullLoader)
except yaml.YAMLError, exc: except yaml.YAMLError, exc:
if verbose: if verbose:
print "%s:" % exc.__class__.__name__, exc print "%s:" % exc.__class__.__name__, exc

View file

@ -17,7 +17,7 @@ def _unicode_open(file, encoding, errors='strict'):
def test_unicode_input(unicode_filename, verbose=False): def test_unicode_input(unicode_filename, verbose=False):
data = open(unicode_filename, 'rb').read().decode('utf-8') data = open(unicode_filename, 'rb').read().decode('utf-8')
value = ' '.join(data.split()) value = ' '.join(data.split())
output = yaml.load(_unicode_open(StringIO.StringIO(data.encode('utf-8')), 'utf-8')) output = yaml.full_load(_unicode_open(StringIO.StringIO(data.encode('utf-8')), 'utf-8'))
assert output == value, (output, value) assert output == value, (output, value)
for input in [data, data.encode('utf-8'), for input in [data, data.encode('utf-8'),
codecs.BOM_UTF8+data.encode('utf-8'), codecs.BOM_UTF8+data.encode('utf-8'),
@ -25,9 +25,9 @@ def test_unicode_input(unicode_filename, verbose=False):
codecs.BOM_UTF16_LE+data.encode('utf-16-le')]: codecs.BOM_UTF16_LE+data.encode('utf-16-le')]:
if verbose: if verbose:
print "INPUT:", repr(input[:10]), "..." print "INPUT:", repr(input[:10]), "..."
output = yaml.load(input) output = yaml.full_load(input)
assert output == value, (output, value) assert output == value, (output, value)
output = yaml.load(StringIO.StringIO(input)) output = yaml.full_load(StringIO.StringIO(input))
assert output == value, (output, value) assert output == value, (output, value)
test_unicode_input.unittest = ['.unicode'] test_unicode_input.unittest = ['.unicode']
@ -40,14 +40,14 @@ def test_unicode_input_errors(unicode_filename, verbose=False):
codecs.BOM_UTF8+data.encode('utf-16-le')]: codecs.BOM_UTF8+data.encode('utf-16-le')]:
try: try:
yaml.load(input) yaml.full_load(input)
except yaml.YAMLError, exc: except yaml.YAMLError, exc:
if verbose: if verbose:
print exc print exc
else: else:
raise AssertionError("expected an exception") raise AssertionError("expected an exception")
try: try:
yaml.load(StringIO.StringIO(input)) yaml.full_load(StringIO.StringIO(input))
except yaml.YAMLError, exc: except yaml.YAMLError, exc:
if verbose: if verbose:
print exc print exc

View file

@ -30,7 +30,7 @@ def test_recursive(recursive_filename, verbose=False):
output2 = None output2 = None
try: try:
output1 = yaml.dump(value1) output1 = yaml.dump(value1)
value2 = yaml.load(output1) value2 = yaml.load(output1, yaml.FullLoader)
output2 = yaml.dump(value2) output2 = yaml.dump(value2)
assert output1 == output2, (output1, output2) assert output1 == output2, (output1, output2)
finally: finally: