Strict YAML deserialization in Python with marshmallow library

Initial task



  • It is necessary to read a non-trivial config from the .yaml file.
  • The config structure is described using data classes.
  • It is required that type checks are performed during deserialization and an exception is thrown if the data is not valid.


That is, to put it simply, you need a function of the form:







def strict_load_yaml(yaml: str, loaded_type: Type[Any]):
    """
    Here is some magic
    """
    pass
      
      





And this function will be used like this:







@dataclass
class MyConfig:
    """
    Here is object tree
    """
    pass

try:
    config = strict_load_yamp(open("config.yaml", "w").read(), MyConfig)
except Exception:
    logging.exception("Config is invalid")
      
      





Configuration classes



The file config.py



looks like this:







from dataclasses import dataclass
from enum import Enum
from typing import Optional

class Color(Enum):
    RED = "red"
    GREEN = "green"
    BLUE = "blue"

@dataclass
class BattleStationConfig:
    @dataclass
    class Processor:
        core_count: int
        manufacturer: str

    processor: Processor
    memory_gb: int
    led_color: Optional[Color] = None

      
      





Option that doesn't work



The original problem is common, isn't it? So the solution should be trivial. Just import the standard yaml library and you're done?







PyYaml load



:







from pprint import pprint

from yaml import load, SafeLoader

yaml = """
processor:
  core_count: 8
  manufacturer: Intel
memory_gb: 8
led_color: red
"""

loaded = load(yaml, Loader=SafeLoader)
pprint(loaded)

      
      





:







{'led_color': 'red',
 'memory_gb': 8,
 'processor': {'core_count': 8, 'manufacturer': 'Intel'}}
      
      





Yaml , . , **args



:







parsed_config = BattleStationConfig(**loaded)
pprint(parsed_config)
      
      





:







BattleStationConfig(processor={'core_count': 8, 'manufacturer': 'Intel'}, memory_gb=8, led_color='red')
      
      





! ! … -. processor ? .







Python Processor



. stackowerflow.







, yaml-



stackowerflow PyYaml , yaml- . YAMLObject



, config_with_tag.py



:







from dataclasses import dataclass
from enum import Enum
from typing import Optional

from yaml import YAMLObject, SafeLoader

class Color(Enum):
    RED = "red"
    GREEN = "green"
    BLUE = "blue"

@dataclass
class BattleStationConfig(YAMLObject):
    yaml_tag = "!BattleStationConfig"
    yaml_loader = SafeLoader

    @dataclass
    class Processor(YAMLObject):
        yaml_tag = "!Processor"
        yaml_loader = SafeLoader

        core_count: int
        manufacturer: str

    processor: Processor
    memory_gb: int
    led_color: Optional[Color] = None
      
      





:







from pprint import pprint

from yaml import load, SafeLoader

from config_with_tag import BattleStationConfig

yaml = """
--- !BattleStationConfig
processor: !Processor
  core_count: 8
  manufacturer: Intel
memory_gb: 8
led_color: red
"""

a = BattleStationConfig

loaded = load(yaml, Loader=SafeLoader)
pprint(loaded)
      
      





?







BattleStationConfig(processor=BattleStationConfig.Processor(core_count=8, manufacturer='Intel'), memory_gb=8, led_color='red')
      
      





. yaml- . , Color



- . YAMLObject



? ? , .







class Color(Enum, YAMLObject):
    RED = "red"
    GREEN = "green"
    BLUE = "blue"
      
      





:







TypeError: metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases
      
      





. yaml-, .







marshmallow



stackowerflow marshmallow , JSON-. , , , yaml JSON. class_schema



, -:







from pprint import pprint

from yaml import load, SafeLoader
from marshmallow_dataclass import class_schema

from config import BattleStationConfig

yaml = """
processor:
  core_count: 8
  manufacturer: Intel
memory_gb: 8
led_color: red
"""

loaded = load(yaml, Loader=SafeLoader)
pprint(loaded)

BattleStationConfigSchema = class_schema(BattleStationConfig)

result = BattleStationConfigSchema().load(loaded)
pprint(result)

      
      





, , :







marshmallow.exceptions.ValidationError: {'led_color': ['Invalid enum member red']}
      
      





, marshmallow enum, . yaml- :







processor:
  core_count: 8
  manufacturer: Intel
memory_gb: 8
led_color: RED
      
      





, , :







BattleStationConfig(processor=BattleStationConfig.Processor(core_count=8, manufacturer='Intel'), memory_gb=8, led_color=<Color.RED: 'red'>)
      
      





, yaml-. marshmallow :







Setting by_value=True



. This will cause both dumping and loading to use the value of the enum.

, metadata



field



:







@dataclass
class BattleStationConfig:
    led_color: Optional[Color] = field(default=None, metadata={"by_value": True})
      
      





, "" , yaml-.









, :







def strict_load_yaml(yaml: str, loaded_type: Type[Any]):
    schema = class_schema(loaded_type)
    return schema().load(load(yaml, Loader=SafeLoader))
      
      





This function may require additional configuration for data classes, but it solves the original problem and does not require tags in yaml.







A quick note on ForwardRef



If you define data classes with ForwardRef (string with the class name) marshmallow will be confused and will not be able to parse this class.







For example, such a configuration







from dataclasses import dataclass, field
from enum import Enum
from typing import Optional, ForwardRef

@dataclass
class BattleStationConfig:
    processor: ForwardRef("Processor")
    memory_gb: int
    led_color: Optional["Color"] = field(default=None, metadata={"by_value": True})

    @dataclass
    class Processor:
        core_count: int
        manufacturer: str

class Color(Enum):
    RED = "red"
    GREEN = "green"
    BLUE = "blue"

      
      





will result in an error







marshmallow.exceptions.RegistryError: Class with name 'Processor' was not found. You may need to import the class.
      
      





And if you move the class Processor



higher, marshmallow will lose the class Color



with a similar error. So, if possible, don't use ForwardRef on your classes if you want to parse them with marshmallow.







The code



All code is available in the GitHub repository .








All Articles