From 28ad1cfd9e0f980d06fafdfe4b124156a978bdc1 Mon Sep 17 00:00:00 2001 From: Evie Lau <689163+evie-lau@users.noreply.github.com> Date: Wed, 3 Jul 2024 18:18:46 -0500 Subject: [PATCH] Add XPath support for namespace-uri() condition and attribute elements (#4287) * Add support for namespace-uri() condition * Update copyrights, add @Override * add missing overrides * Formatting, remove test that was accidentally added * Improve attribute element matching. Add more XPath test coverage and TODO test cases. * Formatting * Handle namespaced attribute element with conditions * Remove latest updated year in copyright * Move splitter pattern to class field --------- Co-authored-by: Tim te Beek --- .../org/openrewrite/xml/XPathMatcher.java | 111 ++++++++++++------ .../org/openrewrite/xml/tree/Namespaced.java | 33 ++++++ .../java/org/openrewrite/xml/tree/Xml.java | 64 +++++++++- .../org/openrewrite/xml/XPathMatcherTest.java | 65 +++++++++- 4 files changed, 234 insertions(+), 39 deletions(-) create mode 100644 rewrite-xml/src/main/java/org/openrewrite/xml/tree/Namespaced.java diff --git a/rewrite-xml/src/main/java/org/openrewrite/xml/XPathMatcher.java b/rewrite-xml/src/main/java/org/openrewrite/xml/XPathMatcher.java index 7c6066b72f9..bae47c1b672 100644 --- a/rewrite-xml/src/main/java/org/openrewrite/xml/XPathMatcher.java +++ b/rewrite-xml/src/main/java/org/openrewrite/xml/XPathMatcher.java @@ -19,6 +19,7 @@ import org.openrewrite.internal.StringUtils; import org.openrewrite.internal.lang.Nullable; import org.openrewrite.xml.search.FindTags; +import org.openrewrite.xml.tree.Namespaced; import org.openrewrite.xml.tree.Xml; import java.util.*; @@ -36,8 +37,9 @@ */ public class XPathMatcher { + private static final Pattern XPATH_ELEMENT_SPLITTER = Pattern.compile("((?<=/)(?=/)|[^/\\[]|\\[[^]]*\\])+"); // Regular expression to support conditional tags like `plugin[artifactId='maven-compiler-plugin']` or foo[@bar='baz'] - private static final Pattern PATTERN = Pattern.compile("([-\\w]+|\\*)\\[((local-name|namespace-uri)\\(\\)|(@)?([-\\w]+|\\*))='([-\\w.]+)']"); + private static final Pattern PATTERN = Pattern.compile("(@)?([-:\\w]+|\\*)\\[((local-name|namespace-uri)\\(\\)|(@)?([-\\w]+|\\*))='(.*)']"); private final String expression; private final boolean startsWithSlash; @@ -48,7 +50,16 @@ public XPathMatcher(String expression) { this.expression = expression; startsWithSlash = expression.startsWith("/"); startsWithDoubleSlash = expression.startsWith("//"); - parts = expression.substring(startsWithDoubleSlash ? 2 : startsWithSlash ? 1 : 0).split("/"); + parts = splitOnXPathSeparator(expression.substring(startsWithDoubleSlash ? 2 : startsWithSlash ? 1 : 0)); + } + + private String[] splitOnXPathSeparator(String input) { + List matches = new ArrayList<>(); + Matcher m = XPATH_ELEMENT_SPLITTER.matcher(input); + while (m.find()) { + matches.add(m.group()); + } + return matches.toArray(new String[0]); } /** @@ -78,13 +89,17 @@ public boolean matches(Cursor cursor) { if (index < 0) { return false; } - //if is Attribute - if (part.charAt(index + 1) == '@') { - partWithCondition = part; - tagForCondition = path.get(i); - } else if (part.contains("(") && part.contains(")")) { //if is function + if (part.startsWith("@")) { // is attribute selector partWithCondition = part; - tagForCondition = path.get(i); + tagForCondition = i > 0 ? path.get(i - 1) : path.get(i); + } else { // is element selector + if (part.charAt(index + 1) == '@') { // is Attribute condition + partWithCondition = part; + tagForCondition = path.get(i); + } else if (part.contains("(") && part.contains(")")) { // is function condition + partWithCondition = part; + tagForCondition = path.get(i); + } } } else if (i < path.size() && i > 0 && parts[i - 1].endsWith("]")) { String partBefore = parts[i - 1]; @@ -102,24 +117,30 @@ public boolean matches(Cursor cursor) { } String partName; + boolean matchedCondition = false; Matcher matcher; if (tagForCondition != null && partWithCondition.endsWith("]") && (matcher = PATTERN.matcher( partWithCondition)).matches()) { - String optionalPartName = matchesCondition(matcher, tagForCondition, cursor); + String optionalPartName = matchesElementWithConditionFunction(matcher, tagForCondition, cursor); if (optionalPartName == null) { return false; } partName = optionalPartName; + matchedCondition = true; } else { partName = null; } if (part.startsWith("@")) { - if (!(cursor.getValue() instanceof Xml.Attribute && - (((Xml.Attribute) cursor.getValue()).getKeyAsString().equals(part.substring(1))) || - "*".equals(part.substring(1)))) { - return false; + if (!matchedCondition) { + if (!(cursor.getValue() instanceof Xml.Attribute)) { + return false; + } + Xml.Attribute attribute = cursor.getValue(); + if (!attribute.getKeyAsString().equals(part.substring(1)) && !"*".equals(part.substring(1))) { + return false; + } } pathIndex--; @@ -145,7 +166,7 @@ public boolean matches(Cursor cursor) { Collections.reverse(path); // Deal with the two forward slashes in the expression; works, but I'm not proud of it. - if (expression.contains("//") && Arrays.stream(parts).anyMatch(StringUtils::isBlank)) { + if (expression.contains("//") && !expression.contains("://") && Arrays.stream(parts).anyMatch(StringUtils::isBlank)) { int blankPartIndex = Arrays.asList(parts).indexOf(""); int doubleSlashIndex = expression.indexOf("//"); @@ -176,24 +197,30 @@ public boolean matches(Cursor cursor) { for (int i = 0; i < parts.length; i++) { String part = parts[i]; - Xml.Tag tag = i < path.size() ? path.get(i) : null; + int isAttr = part.startsWith("@") ? 1 : 0; + Xml.Tag tag = i - isAttr < path.size() ? path.get(i - isAttr) : null; String partName; + boolean matchedCondition = false; Matcher matcher; if (tag != null && part.endsWith("]") && (matcher = PATTERN.matcher(part)).matches()) { - String optionalPartName = matchesCondition(matcher, tag, cursor); + String optionalPartName = matchesElementWithConditionFunction(matcher, tag, cursor); if (optionalPartName == null) { return false; } partName = optionalPartName; + matchedCondition = true; } else { partName = part; } if (part.startsWith("@")) { + if (matchedCondition) { + return true; + } return cursor.getValue() instanceof Xml.Attribute && - (((Xml.Attribute) cursor.getValue()).getKeyAsString().equals(part.substring(1)) || - "*".equals(part.substring(1))); + (((Xml.Attribute) cursor.getValue()).getKeyAsString().equals(part.substring(1)) || + "*".equals(part.substring(1))); } if (path.size() < i + 1 || (tag != null && !tag.getName().equals(partName) && !partName.equals("*") && !"*".equals(part))) { @@ -206,32 +233,32 @@ public boolean matches(Cursor cursor) { } @Nullable - private String matchesCondition(Matcher matcher, Xml.Tag tag, Cursor cursor) { - String name = matcher.group(1); - boolean isAttribute = matcher.group(4) != null; // either group4 != null, or group 2 startsWith @ - String selector = isAttribute ? matcher.group(5) : matcher.group(2); - boolean isFunction = selector.endsWith("()"); - String value = matcher.group(6); + private String matchesElementWithConditionFunction(Matcher matcher, Xml.Tag tag, Cursor cursor) { + boolean isAttributeElement = matcher.group(1) != null; + String element = matcher.group(2); + boolean isAttributeCondition = matcher.group(5) != null; // either group4 != null, or group 2 startsWith @ + String selector = isAttributeCondition ? matcher.group(6) : matcher.group(3); + boolean isFunctionCondition = selector.endsWith("()"); + String value = matcher.group(7); boolean matchCondition = false; - if (isAttribute) { + if (isAttributeCondition) { for (Xml.Attribute a : tag.getAttributes()) { if ((a.getKeyAsString().equals(selector) || "*".equals(selector)) && a.getValueAsString().equals(value)) { matchCondition = true; break; } } - } else if (isFunction) { - if (!name.equals("*") && !tag.getLocalName().equals(name)) { - matchCondition = false; - } else if (selector.equals("local-name()")) { - if (tag.getLocalName().equals(value)) { - matchCondition = true; - } - } else if (selector.equals("namespace-uri()")) { - if (tag.getNamespaceUri(cursor).get().equals(value)) { - matchCondition = true; + } else if (isFunctionCondition) { + if (isAttributeElement) { + for (Xml.Attribute a : tag.getAttributes()) { + if (matchesElementAndFunction(a, cursor, element, selector, value)) { + matchCondition = true; + break; + } } + } else { + matchCondition = matchesElementAndFunction(tag, cursor, element, selector, value); } } else { // other [] conditions for (Xml.Tag t : FindTags.find(tag, selector)) { @@ -242,6 +269,18 @@ private String matchesCondition(Matcher matcher, Xml.Tag tag, Cursor cursor) { } } - return matchCondition ? name : null; + return matchCondition ? element : null; + } + + private static boolean matchesElementAndFunction(Namespaced tagOrAttribute, Cursor cursor, String element, String selector, String value) { + if (!element.equals("*") && !tagOrAttribute.getName().equals(element)) { + return false; + } else if (selector.equals("local-name()")) { + return tagOrAttribute.getLocalName().equals(value); + } else if (selector.equals("namespace-uri()")) { + Optional nsUri = tagOrAttribute.getNamespaceUri(cursor); + return nsUri.isPresent() && nsUri.get().equals(value); + } + return false; } } diff --git a/rewrite-xml/src/main/java/org/openrewrite/xml/tree/Namespaced.java b/rewrite-xml/src/main/java/org/openrewrite/xml/tree/Namespaced.java new file mode 100644 index 00000000000..cfa0d366a0c --- /dev/null +++ b/rewrite-xml/src/main/java/org/openrewrite/xml/tree/Namespaced.java @@ -0,0 +1,33 @@ +/* + * Copyright 2024 the original author or authors. + *

+ * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * https://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.openrewrite.xml.tree; + +import org.openrewrite.Cursor; + +import java.util.Map; +import java.util.Optional; + +public interface Namespaced extends Xml { + String getName(); + + String getLocalName(); + + Optional getNamespacePrefix(); + + Optional getNamespaceUri(Cursor cursor); + + Map getAllNamespaces(Cursor cursor); +} diff --git a/rewrite-xml/src/main/java/org/openrewrite/xml/tree/Xml.java b/rewrite-xml/src/main/java/org/openrewrite/xml/tree/Xml.java index 251cbefc309..e8485e70825 100755 --- a/rewrite-xml/src/main/java/org/openrewrite/xml/tree/Xml.java +++ b/rewrite-xml/src/main/java/org/openrewrite/xml/tree/Xml.java @@ -311,7 +311,7 @@ public

Xml acceptXml(XmlVisitor

v, P p) { @SuppressWarnings("unused") @Value @EqualsAndHashCode(callSuper = false, onlyExplicitlyIncluded = true) - class Tag implements Xml, Content { + class Tag implements Xml, Content, Namespaced { @EqualsAndHashCode.Include @With UUID id; @@ -345,6 +345,7 @@ public Map getNamespaces() { * @param cursor the cursor to search from * @return a map containing all namespaces defined in the current scope, including all parent scopes. */ + @Override public Map getAllNamespaces(Cursor cursor) { Map namespaces = getNamespaces(); while (cursor != null) { @@ -615,6 +616,7 @@ public Tag withContent(@Nullable List content) { /** * @return The local name for this tag, without any namespace prefix. */ + @Override public String getLocalName() { return extractLocalName(name); } @@ -622,6 +624,7 @@ public String getLocalName() { /** * @return The namespace prefix for this tag, if any. */ + @Override public Optional getNamespacePrefix() { String extractedNamespacePrefix = extractNamespacePrefix(name); return Optional.ofNullable(StringUtils.isNotEmpty(extractedNamespacePrefix) ? extractedNamespacePrefix : null); @@ -630,6 +633,7 @@ public Optional getNamespacePrefix() { /** * @return The namespace URI for this tag, if any. */ + @Override public Optional getNamespaceUri(Cursor cursor) { Optional maybeNamespacePrefix = getNamespacePrefix(); return maybeNamespacePrefix.flatMap(s -> Optional.ofNullable(getAllNamespaces(cursor).get(s))); @@ -688,7 +692,7 @@ public String toString() { @lombok.Value @EqualsAndHashCode(callSuper = false, onlyExplicitlyIncluded = true) @With - class Attribute implements Xml { + class Attribute implements Xml, Namespaced { @EqualsAndHashCode.Include UUID id; @@ -755,6 +759,62 @@ public String getValueAsString() { return value.getValue(); } + @Override + public String getName() { + return key.getName(); + } + + /** + * @return The local name for this attribute, without any namespace prefix. + */ + @Override + public String getLocalName() { + return extractLocalName(getKeyAsString()); + } + + /** + * @return The namespace prefix for this attribute, if any. + */ + @Override + public Optional getNamespacePrefix() { + String extractedNamespacePrefix = extractNamespacePrefix(getKeyAsString()); + return Optional.ofNullable(StringUtils.isNotEmpty(extractedNamespacePrefix) ? extractedNamespacePrefix : null); + } + + /** + * @return The namespace URI for this attribute, if any. + */ + @Override + public Optional getNamespaceUri(Cursor cursor) { + Optional maybeNamespacePrefix = getNamespacePrefix(); + return maybeNamespacePrefix.flatMap(s -> Optional.ofNullable(getAllNamespaces(cursor).get(s))); + } + + /** + * Gets a map containing all namespaces defined in the current scope, including all parent scopes. + * + * @param cursor the cursor to search from + * @return a map containing all namespaces defined in the current scope, including all parent scopes. + */ + @Override + public Map getAllNamespaces(Cursor cursor) { + Map namespaces = new HashMap<>(); + while (cursor != null) { + Xml.Tag enclosing = cursor.firstEnclosing(Xml.Tag.class); + if (enclosing != null) { + for (Map.Entry ns : enclosing.getNamespaces().entrySet()) { + if (namespaces.containsValue(ns.getKey())) { + throw new IllegalStateException(java.lang.String.format("Cannot have two namespaces with the same prefix (%s): '%s' and '%s'", ns.getKey(), namespaces.get(ns.getKey()), ns.getValue())); + } + namespaces.put(ns.getKey(), ns.getValue()); + } + } + cursor = cursor.getParent(); + } + + return namespaces; + } + @Override public String toString() { return getKeyAsString() + "=" + getValueAsString(); diff --git a/rewrite-xml/src/test/java/org/openrewrite/xml/XPathMatcherTest.java b/rewrite-xml/src/test/java/org/openrewrite/xml/XPathMatcherTest.java index 1fefbf336ae..04bee0758b2 100755 --- a/rewrite-xml/src/test/java/org/openrewrite/xml/XPathMatcherTest.java +++ b/rewrite-xml/src/test/java/org/openrewrite/xml/XPathMatcherTest.java @@ -105,6 +105,9 @@ class XPathMatcherTest { http://www.example.com/namespace3 http://www.example.com/namespace3.xsd"> content1 content2 + + content3 + """ ).toList().get(0); @@ -132,6 +135,8 @@ void matchRelative() { assertThat(match("//dependency", xmlDoc)).isTrue(); assertThat(match("dependency/*", xmlDoc)).isTrue(); assertThat(match("dne", xmlDoc)).isFalse(); + assertThat(match("/dependencies//dependency", xmlDoc)).isTrue(); + assertThat(match("/dependencies//dependency/groupId", xmlDoc)).isTrue(); } @Test @@ -157,6 +162,8 @@ void matchPom() { pomXml1)).isTrue(); assertThat(match("/project/build//plugins/plugin/configuration/source", pomXml2)).isTrue(); +// assertThat(match("/project/build//plugin/configuration/source", pomXml2)).isTrue(); // TODO: seems parser only handles // up to 1 level +// assertThat(match("/project//configuration/source", pomXml2)).isTrue(); // TODO: was already failing previously } private final SourceFile attributeXml = new XmlParser().parse( @@ -250,7 +257,7 @@ void matchFunctions() { } @Test - void testMatchLocalName() { + void matchLocalNameFunctionCondition() { assertThat(match("/*[local-name()='root']", namespacedXml)).isTrue(); assertThat(match("/*[local-name()='element1']", namespacedXml)).isFalse(); assertThat(match("/*[local-name()='element2']", namespacedXml)).isFalse(); @@ -263,6 +270,62 @@ void testMatchLocalName() { assertThat(match("//element2[local-name()='element2']", namespacedXml)).isFalse(); assertThat(match("//ns2:element2[local-name()='element2']", namespacedXml)).isTrue(); assertThat(match("//dne[local-name()='dne']", namespacedXml)).isFalse(); + + // TODO: fix mid-path // with condition +// assertThat(match("/root//element1[local-name()='element1']", namespacedXml)).isTrue(); + } + + @Test + void matchNamespaceUriFunctionCondition() { + assertThat(match("/root/*[namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue(); + assertThat(match("/*[namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isFalse(); + assertThat(match("//*[namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue(); + assertThat(match("//ns2:element2[namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue(); + assertThat(match("//element2[namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isFalse(); + } + + @Test + void matchAttributeCondition() { + assertThat(match("//*[@*='content3']", namespacedXml)).isTrue(); + assertThat(match("//*[@*='content2']", namespacedXml)).isFalse(); + assertThat(match("//*[@ns3:attribute1='content3']", namespacedXml)).isTrue(); + assertThat(match("//*[@attribute1='content3']", namespacedXml)).isFalse(); + assertThat(match("//element1[@ns3:attribute1='content3']", namespacedXml)).isTrue(); + assertThat(match("//element1[@attribute1='content3']", namespacedXml)).isFalse(); + assertThat(match("//element1[@*='content3']", namespacedXml)).isTrue(); + assertThat(match("//element1[@*='dne']", namespacedXml)).isFalse(); + assertThat(match("/root/element1[@*='content3']", namespacedXml)).isTrue(); + assertThat(match("/root/element1[@*='dne']", namespacedXml)).isFalse(); + // TODO: fix mid-path // match with condition +// assertThat(match("/root//element1[@*='content3']", namespacedXml)).isTrue(); +// assertThat(match("/root//element1[@*='dne']", namespacedXml)).isFalse(); + } + + @Test + void matchAttributeElement() { + assertThat(match("//@ns3:attribute1", namespacedXml)).isTrue(); + assertThat(match("//@attribute1", namespacedXml)).isFalse(); + assertThat(match("//@*", namespacedXml)).isTrue(); + assertThat(match("//@*[local-name()='attribute1']", namespacedXml)).isTrue(); + assertThat(match("//@*[local-name()='attribute2']", namespacedXml)).isFalse(); + assertThat(match("//@*[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue(); + assertThat(match("//@*[namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isFalse(); + + assertThat(match("//element1/@*", namespacedXml)).isTrue(); + assertThat(match("/root/element1/@*", namespacedXml)).isTrue(); + assertThat(match("//element1/@*[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue(); + assertThat(match("//element1/@*[namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isFalse(); + assertThat(match("//ns2:element2/@*", namespacedXml)).isFalse(); + assertThat(match("/root/ns2:element2/@*", namespacedXml)).isFalse(); + + assertThat(match("/root/parent/element3/@attr", namespacedXml)).isFalse(); + assertThat(match("/root/parent/element3/@ns3:attr", namespacedXml)).isTrue(); + assertThat(match("/root/parent/element3/@ns3:attr[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue(); + assertThat(match("//element3/@ns3:attr[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue(); + + // TODO: fix mid-path // match with attribute element +// assertThat(match("/root//element1/@*", namespacedXml)).isTrue(); +// assertThat(match("/root//element1/@*[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue(); } private boolean match(String xpath, SourceFile x) {