"""
Module to read CFDIs.
"""
from __future__ import annotations
import re
from decimal import Decimal
from typing import Callable, Type, Union
import pydantic
import xmltodict
from cfdibills.errors import InvalidCFDIError, UnsupportedCFDIError
from cfdibills.schemas.cfdi33 import CFDI33
from cfdibills.schemas.cfdi40 import CFDI40
_name_pattern = re.compile(r"(.)([A-Z][a-z]+)")
_snake_pattern = re.compile(r"([a-z0-9])([A-Z])")
def _get_cfdi_with_version(candidate: dict) -> tuple[dict, str]:
try:
cfdi = candidate["comprobante"]
version = cfdi["version"]
except KeyError as e:
raise InvalidCFDIError(f"The XML given does not contain a '{e.args[0]}'.")
return cfdi, version
def _xml_to_json(path: str, normalize: bool = True) -> dict:
with open(path, "rb") as f:
raw_xml = xmltodict.parse(f, dict_constructor=dict)
return normalize_dict_keys(raw_xml) if normalize else raw_xml
def _parse_cfdi(cfdi: dict, version: str) -> Union[CFDI33, CFDI40]:
mapper = {"3.3": CFDI33, "4.0": CFDI40}
if (parser := mapper.get(version, None)) is None:
raise UnsupportedCFDIError(f"Version '{version}' is not supported. It must be one of {mapper.keys()}.")
try:
# Mypy doesn't know that the parser is also of type BaseModel, so we have to tell it to ignore this line
parsed = parser.parse_obj(cfdi) # type: ignore
except pydantic.ValidationError as e:
raise InvalidCFDIError(str(e)) from None
return parsed
def _camel_to_snake(camelcase: str) -> str:
"""
Converts a camelCase string to a snake_case string
Source: https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case
Parameters
----------
camelcase: str
string to convert
Returns
-------
str
snake_cased string
"""
camelcase = _name_pattern.sub(r"\1_\2", camelcase)
return _snake_pattern.sub(r"\1_\2", camelcase).lower()
[docs]def normalize_dict_keys(ugly_dict: dict) -> dict:
"""
Maps the raw keys of a xmlschema to human-readable keys.
xmlschema returns a dict with keys that:
* begin with "@" when they are leaf nodes
* begin with "cfdi:", "tfd:" or similar when they are nodes
* contain "xmlns" and "xsi" when are namespace definition
* are written in camelCase as defined by SAT's xsd
So all of these are normalized to plain snake_case strings
Some special cases:
* if the item is a Decimal, map it to a float python number
* if the item is a dictionary, normalize its children
* if it is an array, normalize every item in it
Parameters
----------
ugly_dict: dict
Dictionary as output from xmltodict
Returns
-------
dict
Dictionary with keys in camel_case format
"""
result = dict()
normalization: dict[Type[Union[list, dict, Decimal]], Callable] = {
list: lambda x: [normalize_dict_keys(y) for y in x],
dict: lambda x: normalize_dict_keys(x),
Decimal: lambda x: float(x),
}
# normalize key by key in a DFS way
for key, value in ugly_dict.items():
# namespaces are not part of cfdi's, so they are omitted
if "xmlns" not in key and "xsi" not in key:
# get the normalized version of this key removing unwanted chars
new_key = _camel_to_snake(key[1:] if "@" in key else key.split(":")[-1])
# normalize the item
result[new_key] = normalization[type(value)](value) if type(value) in normalization else value
return result
[docs]def read_xml(path: str) -> Union[CFDI33, CFDI40]:
"""
Reads a CFDI in a .xml and maps it to a pydantic object.
Parameters
----------
path: path to the xml file to read
Returns
-------
Union[CFDI33, CFDI40]
Pydantic object of the CFDI
Raises
------
InvalidCFDIError
If the xml is not in a valid format
UnsupportedCFDIError
If the CFDI version of the XML is not supported
"""
normalized_xml = _xml_to_json(path)
cfdi, version = _get_cfdi_with_version(normalized_xml)
return _parse_cfdi(cfdi, version)