Source code for zsl.utils.xml_to_json

"""
:mod:`zsl.utils.xml_to_json` -- xml helpers
-------------------------------------------

Helper functions for simpler parsing xml into object with schemas.
"""


[docs] class XmlToJsonException(Exception): """Exception raised during converting xml to json.""" # TODO make use of build in exception, see bug #13299 pass
[docs] class NotCompleteXmlException(Exception): """Exception raised during parsing an invalid XML.""" # TODO make use of build in exception, see bug #13299 pass
[docs] def xml_to_json(element, definition, required=False): # TODO document tuple - it looks little too complex """Convert XML (ElementTree) to dictionary from a definition schema. Definition schema can be a simple string - XPath or @attribute for direct extraction or a complex one described by * dictionary ``{key: 'xpath or @attribute', second: 'complex definition'}`` \ required parameters can be marked with * at the end * list ``[xpath, [definition]]`` - create a list of all elements found by \ xpath, parse the parts with given definition if provided as second \ argument * Callable - parse the element by given function, can be handy as a part \ of complex definition :param element: ElementTree element :type element: ElementTree.Element :param definition: schema for the json :type definition: Union[str, tuple, dict, list, Callable] :param required: parsed value should be not None :type required: bool :return: parsed xml :rtype: Union[dict, str, list] """ # handle simple definition if isinstance(definition, str) and len(definition) > 0: if definition[0] == '@': # test for attribute return element.get(definition[1:]) # get tag text else: sub_element = element.find(definition) if sub_element is None: if required: raise NotCompleteXmlException('Expecting {0} in element {1}'.format(definition, element.tag)) return None return sub_element.text.strip() if sub_element.text else None # handle tuple elif isinstance(definition, tuple): return _parse_tuple(element, definition, required) # handle dict elif isinstance(definition, dict): return _parse_dict(element, definition) # handle list elif isinstance(definition, list): return _parse_list(element, definition) elif hasattr(definition, '__call__'): return definition(element) # default else: return element.text.strip() if element.text else None
def _parse_dict(element, definition): """Parse xml element by a definition given in dict format. :param element: ElementTree element :param definition: definition schema :type definition: dict :return: parsed xml :rtype: dict """ sub_dict = {} for name, subdef in definition.items(): (name, required) = _parse_name(name) sub_dict[name] = xml_to_json(element, subdef, required) return sub_dict def _parse_tuple(element, definition, required): """Parse xml element by a definition given in tuple format. :param element: ElementTree element :param definition: definition schema :type definition: tuple :param required: parsed value should be not None :type required: bool :return: parsed xml """ # TODO needs to be documented properly. d_len = len(definition) if d_len == 0: return None if d_len == 1: return xml_to_json(element, definition[0], required) first = definition[0] if hasattr(first, '__call__'): # TODO I think it could be done without creating the array # first(xml_to_json(element, d) for d in definition[1:]) test it return first(*[xml_to_json(element, d) for d in definition[1:]]) if not isinstance(first, str): raise XmlToJsonException('Tuple definition must start with function or string') if first[0] == '@': raise XmlToJsonException('Tuple definition must not start with attribute') sub_elem = element.find(first) if sub_elem is None: if required: raise NotCompleteXmlException('Expecting {0} in element {1}'.format(first, element.tag)) return None return xml_to_json(sub_elem, definition[1], required) def _parse_list(element, definition): """Parse xml element by definition given by list. Find all elements matched by the string given as the first value in the list (as XPath or @attribute). If there is a second argument it will be handled as a definitions for the elements matched or the text when not. :param element: ElementTree element :param definition: definition schema :type definition: list :return: parsed xml :rtype: list """ if len(definition) == 0: raise XmlToJsonException('List definition needs some definition') tag = definition[0] tag_def = definition[1] if len(definition) > 1 else None sub_list = [] for el in element.findall(tag): sub_list.append(xml_to_json(el, tag_def)) return sub_list def _parse_name(name): """Parse name in complex dict definition. In complex definition required params can be marked with `*`. :param name: :return: name and required flag :rtype: tuple """ required = False if name[-1] == '*': name = name[0:-1] required = True return name, required