From 6e545aa2512485eccbd629156489cebc58e43b49 Mon Sep 17 00:00:00 2001 From: Alejandro Villar Date: Fri, 15 Mar 2024 14:24:57 +0100 Subject: [PATCH] Add more config properties to xml input filter --- ogc/na/input_filters/xml.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/ogc/na/input_filters/xml.py b/ogc/na/input_filters/xml.py index 981a265..cfecf81 100644 --- a/ogc/na/input_filters/xml.py +++ b/ogc/na/input_filters/xml.py @@ -1,12 +1,18 @@ """ XML Input filter for ingest_json. -Processes XML files with [xmltodict](https://pypi.org/project/xmltodict/). +Processes XML files with [xmltodict](https://pypi.org/project/xmltodict/). Attributes are prefixed with `_` instead +of `@` by default. Configuration values: * `process-namespaces` (default: `False`): Whether to process and expand namespaces (see xmltodict documentation) * `namespaces` (default: `None`): Namespace to prefix mappings dict in `url: prefix` format. +* `attr-prefix` (default: `_`): Prefix that will be used for attributes (in order to avoid potential clashes with + element names). +* `namespace-separator` (default `:`): String that will be used to separate the namespace prefix and the local name + when processing namespaces. +* `text-property` (default: `_`): property name that will be used to put the element's text content into """ from __future__ import annotations @@ -19,6 +25,9 @@ DEFAULT_CONF = { 'process-namespaces': False, 'namespaces': None, + 'attr-prefix': '_', + 'namespace-separator': ':', + 'text-property': '_', } @@ -34,6 +43,10 @@ def apply_filter(content: bytes, conf: dict[str, Any] | None) -> tuple[dict[str, textio = StringIO(content.decode('utf-8')) result = xmltodict.parse(textio.read(), process_namespaces=conf['process-namespaces'], - namespaces=conf['namespaces']) + namespaces=conf['namespaces'], + attr_prefix=conf['attr-prefix'], + namespace_separator=conf['namespace-separator'], + cdata_key=conf['text-property'], + ) return result, metadata