Skip to content

Commit

Permalink
XPath condition enhancements - multiple conditions and and/or operato…
Browse files Browse the repository at this point in the history
…rs (#4305)

* Support chained conditions

* Add support for and/or XPath operators

* Autoformat

* Simplify pattern for matching XPath conditions

* Update javadoc, add more complex condition tests

* Enable the namespace match functions test separately

---------

Co-authored-by: Tim te Beek <tim@moderne.io>
  • Loading branch information
evie-lau and timtebeek authored Jul 8, 2024
1 parent 1125021 commit b4d3f93
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 46 deletions.
107 changes: 72 additions & 35 deletions rewrite-xml/src/main/java/org/openrewrite/xml/XPathMatcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
/**
* Supports a limited set of XPath expressions, specifically those documented on <a
* href="https://www.w3schools.com/xml/xpath_syntax.asp">this page</a>.
* Additionally, supports `local-name()` and `namespace-uri()` conditions, `and`/`or` operators, and chained conditions.
* <p>
* Used for checking whether a visitor's cursor meets a certain XPath expression.
* <p>
Expand All @@ -37,9 +38,11 @@
*/
public class XPathMatcher {

private static final Pattern XPATH_ELEMENT_SPLITTER = Pattern.compile("((?<=/)(?=/)|[^/\\[]|\\[[^]]*\\])+");
private static final Pattern XPATH_ELEMENT_SPLITTER = Pattern.compile("((?<=/)(?=/)|[^/\\[]|\\[[^]]*])+");
// Regular expression to support conditional tags like `plugin[artifactId='maven-compiler-plugin']` or foo[@bar='baz']
private static final Pattern PATTERN = Pattern.compile("(@)?([-:\\w]+|\\*)\\[((local-name|namespace-uri)\\(\\)|(@)?([-\\w]+|\\*))='(.*)']");
private static final Pattern ELEMENT_WITH_CONDITION_PATTERN = Pattern.compile("(@)?([-:\\w]+|\\*)(\\[.+])");
private static final Pattern CONDITION_PATTERN = Pattern.compile("(\\[.*?])+?");
private static final Pattern CONDITION_CONJUNCTION_PATTERN = Pattern.compile("(((local-name|namespace-uri)\\(\\)|(@)?([-\\w:]+|\\*))='(.*?)'(\\h?(or|and)\\h?)?)+?");

private final String expression;
private final boolean startsWithSlash;
Expand Down Expand Up @@ -120,8 +123,8 @@ public boolean matches(Cursor cursor) {
boolean matchedCondition = false;

Matcher matcher;
if (tagForCondition != null && partWithCondition.endsWith("]") && (matcher = PATTERN.matcher(
partWithCondition)).matches()) {
if (tagForCondition != null && partWithCondition.endsWith("]")
&& (matcher = ELEMENT_WITH_CONDITION_PATTERN.matcher(partWithCondition)).matches()) {
String optionalPartName = matchesElementWithConditionFunction(matcher, tagForCondition, cursor);
if (optionalPartName == null) {
return false;
Expand All @@ -147,16 +150,16 @@ public boolean matches(Cursor cursor) {
continue;
}

boolean conditionNotFulfilled =
tagForCondition == null || (!part.equals(partName) && !tagForCondition.getName()
.equals(partName));
boolean conditionNotFulfilled = tagForCondition == null
|| (!part.equals(partName) && !tagForCondition.getName().equals(partName));

int idx = part.indexOf("[");
if (idx > 0) {
part = part.substring(0, idx);
}
if (path.size() < i + 1 || (
!(path.get(pathIndex).getName().equals(part)) && !"*".equals(part)) || conditionIsBefore && conditionNotFulfilled) {
if (path.size() < i + 1
|| (!(path.get(pathIndex).getName().equals(part)) && !"*".equals(part))
|| conditionIsBefore && conditionNotFulfilled) {
return false;
}
}
Expand Down Expand Up @@ -203,7 +206,7 @@ public boolean matches(Cursor cursor) {
boolean matchedCondition = false;

Matcher matcher;
if (tag != null && part.endsWith("]") && (matcher = PATTERN.matcher(part)).matches()) {
if (tag != null && part.endsWith("]") && (matcher = ELEMENT_WITH_CONDITION_PATTERN.matcher(part)).matches()) {
String optionalPartName = matchesElementWithConditionFunction(matcher, tag, cursor);
if (optionalPartName == null) {
return false;
Expand Down Expand Up @@ -236,40 +239,74 @@ public boolean matches(Cursor cursor) {
private String matchesElementWithConditionFunction(Matcher matcher, Xml.Tag tag, Cursor cursor) {
boolean isAttributeElement = matcher.group(1) != null;
String element = matcher.group(2);
boolean isAttributeCondition = matcher.group(5) != null; // either group4 != null, or group 2 startsWith @
String selector = isAttributeCondition ? matcher.group(6) : matcher.group(3);
boolean isFunctionCondition = selector.endsWith("()");
String value = matcher.group(7);

boolean matchCondition = false;
if (isAttributeCondition) {
for (Xml.Attribute a : tag.getAttributes()) {
if ((a.getKeyAsString().equals(selector) || "*".equals(selector)) && a.getValueAsString().equals(value)) {
matchCondition = true;
String allConditions = matcher.group(3);

// Fail quickly if element name doesn't match
if (!isAttributeElement && !tag.getName().equals(element) && !"*".equals(element)) {
return null;
}

// check that all conditions match on current element
Matcher conditions = CONDITION_PATTERN.matcher(allConditions);
boolean stillMatchesConditions = true;
while (conditions.find() && stillMatchesConditions) {
String conditionGroup = conditions.group(1);
Matcher condition = CONDITION_CONJUNCTION_PATTERN.matcher(conditionGroup);
boolean orCondition = false;

while (condition.find() && (stillMatchesConditions || orCondition)) {
boolean matchCurrentCondition = false;

boolean isAttributeCondition = condition.group(4) != null;
String selector = isAttributeCondition ? condition.group(5) : condition.group(2);
boolean isFunctionCondition = selector.endsWith("()");
String value = condition.group(6);
String conjunction = condition.group(8);
orCondition = conjunction != null && conjunction.equals("or");

// invalid conjunction if not 'or' or 'and'
if (!orCondition && conjunction != null && !conjunction.equals("and")) {
// TODO: throw exception for invalid or unsupported XPath conjunction?
stillMatchesConditions = false;
break;
}
}
} else if (isFunctionCondition) {
if (isAttributeElement) {
for (Xml.Attribute a : tag.getAttributes()) {
if (matchesElementAndFunction(a, cursor, element, selector, value)) {
matchCondition = true;
break;

if (isAttributeCondition) { // [@attr='value'] pattern
for (Xml.Attribute a : tag.getAttributes()) {
if ((a.getKeyAsString().equals(selector) || "*".equals(selector)) && a.getValueAsString().equals(value)) {
matchCurrentCondition = true;
break;
}
}
} else if (isFunctionCondition) { // [local-name()='name'] pattern
if (isAttributeElement) {
for (Xml.Attribute a : tag.getAttributes()) {
if (matchesElementAndFunction(a, cursor, element, selector, value)) {
matchCurrentCondition = true;
break;
}
}
} else {
matchCurrentCondition = matchesElementAndFunction(tag, cursor, element, selector, value);
}
} else { // other [] conditions
for (Xml.Tag t : FindTags.find(tag, selector)) {
if (t.getValue().map(v -> v.equals(value)).orElse(false)) {
matchCurrentCondition = true;
break;
}
}
}
} else {
matchCondition = matchesElementAndFunction(tag, cursor, element, selector, value);
}
} else { // other [] conditions
for (Xml.Tag t : FindTags.find(tag, selector)) {
if (t.getValue().map(v -> v.equals(value)).orElse(false)) {
matchCondition = true;
// break condition early if first OR condition is fulfilled
if (matchCurrentCondition && orCondition) {
break;
}

stillMatchesConditions = matchCurrentCondition;
}
}

return matchCondition ? element : null;
return stillMatchesConditions ? element : null;
}

private static boolean matchesElementAndFunction(Namespaced tagOrAttribute, Cursor cursor, String element, String selector, String value) {
Expand Down
101 changes: 90 additions & 11 deletions rewrite-xml/src/test/java/org/openrewrite/xml/XPathMatcherTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class XPathMatcherTest {
<ns2:element2>content2</ns2:element2>
<parent>
<element3 ns3:attr='test'>content3</element3>
<ns2:element4 ns3:attr='test2'>content4</ns2:element4>
</parent>
</root>
"""
Expand Down Expand Up @@ -228,9 +229,8 @@ void relativePathsWithConditions() {
}

@Test
@Disabled
@Issue("https://github.com/openrewrite/rewrite/issues/3919")
void matchFunctions() {
void namespaceMatchFunctions() {
assertThat(match("/root/element1", namespacedXml)).isTrue();
assertThat(match("/root/ns2:element2", namespacedXml)).isTrue();
assertThat(match("/root/dne", namespacedXml)).isFalse();
Expand All @@ -243,17 +243,21 @@ void matchFunctions() {
assertThat(match("/*[namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isFalse();
assertThat(match("//*[namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue();
assertThat(match("//@*[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue();
}

@Test
@Disabled
void otherUncoveredXpathFunctions() {
// Other common XPath functions
assertThat(match("contains(/root/element1, 'content1')", namespacedXml)).isTrue();
assertThat(match("not(contains(/root/element1, 'content1'))", namespacedXml)).isFalse();
assertThat(match("string-length(/root/element1) > 2", namespacedXml)).isTrue();
assertThat(match("starts-with(/root/element1, 'content1')", namespacedXml)).isTrue();
assertThat(match("ends-with(/root/element1, 'content1')", namespacedXml)).isTrue();
assertThat(match("substring-before(/root/element1, '1') = 'content'", namespacedXml)).isTrue();
assertThat(match("substring-after(/root/element1, 'content') = '1'", namespacedXml)).isTrue();
assertThat(match("/root/element1/text()", namespacedXml)).isTrue();
assertThat(match("count(/root/*)", namespacedXml)).isTrue();
assertThat(match("contains(/root/element1, 'content1')", namespacedXml)).isTrue();
assertThat(match("not(contains(/root/element1, 'content1'))", namespacedXml)).isFalse();
assertThat(match("string-length(/root/element1) > 2", namespacedXml)).isTrue();
assertThat(match("starts-with(/root/element1, 'content1')", namespacedXml)).isTrue();
assertThat(match("ends-with(/root/element1, 'content1')", namespacedXml)).isTrue();
assertThat(match("substring-before(/root/element1, '1') = 'content'", namespacedXml)).isTrue();
assertThat(match("substring-after(/root/element1, 'content') = '1'", namespacedXml)).isTrue();
assertThat(match("/root/element1/text()", namespacedXml)).isTrue();
assertThat(match("count(/root/*)", namespacedXml)).isTrue();
}

@Test
Expand Down Expand Up @@ -328,6 +332,81 @@ void matchAttributeElement() {
// assertThat(match("/root//element1/@*[namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue();
}

@Test
void matchMultipleConditions() {
assertThat(match("//*[namespace-uri()='http://www.example.com/namespace2'][local-name()='element2']", namespacedXml)).isTrue();
assertThat(match("//*[local-name()='element2'][namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue();

assertThat(match("//*[namespace-uri()='http://www.example.com/namespace2'][local-name()='dne']", namespacedXml)).isFalse();
assertThat(match("//*[local-name()='dne'][namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isFalse();

assertThat(match("//*[local-name()='element1'][@ns3:attribute1='content3']", namespacedXml)).isTrue();
assertThat(match("//@*[namespace-uri()='http://www.example.com/namespace3'][local-name()='attribute1']", namespacedXml)).isTrue();
assertThat(match("//@*[namespace-uri()='http://www.example.com/namespace3'][local-name()='dne']", namespacedXml)).isFalse();

assertThat(match("//*[@ns3:attr='test'][local-name()='element3']", namespacedXml)).isTrue();
assertThat(match("//*[@ns3:attr='test'][local-name()='elementX']", namespacedXml)).isFalse();

assertThat(match("//*[@ns3:attr='test2'][local-name()='element4'][namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue();
assertThat(match("//*[@ns3:attr='testX'][local-name()='element4'][namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isFalse();
assertThat(match("//*[@ns3:attr='test2'][local-name()='elementX'][namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isFalse();
assertThat(match("//*[@ns3:attr='test2'][local-name()='element4'][namespace-uri()='http://www.example.com/namespaceX']", namespacedXml)).isFalse();
}

@Test
void matchConditionsWithConjunctions() {
// T&T, T&F, F&T, F&F
assertThat(match("//*[local-name()='element3' and @ns3:attr='test']", namespacedXml)).isTrue();
assertThat(match("//*[local-name()='element3' and @ns3:attr='dne']", namespacedXml)).isFalse();
assertThat(match("//*[local-name()='dne' and @ns3:attr='test']", namespacedXml)).isFalse();
assertThat(match("//*[local-name()='dne' and @ns3:attr='dne']", namespacedXml)).isFalse();

// T|T, T|F, F|T, F|F
assertThat(match("//*[local-name()='element2' or namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue();
assertThat(match("//*[local-name()='element2' or namespace-uri()='dne']", namespacedXml)).isTrue();
assertThat(match("//*[local-name()='dne' or local-name()='element2']", namespacedXml)).isTrue();
assertThat(match("//*[local-name()='dne' or local-name()='dne2']", namespacedXml)).isFalse();

assertThat(match("//@*[namespace-uri()='dne' or namespace-uri()='http://www.example.com/namespace3']", namespacedXml)).isTrue();

// T&T&T = T
assertThat(match("//*[local-name()='element4' and namespace-uri()='http://www.example.com/namespace2' and @ns3:attr='test2']", namespacedXml)).isTrue();
// T&T&F = F
assertThat(match("//*[local-name()='element4' and namespace-uri()='http://www.example.com/namespace2' and @ns3:attr='dne']", namespacedXml)).isFalse();
// T&T|F = T
assertThat(match("//*[local-name()='element4' and namespace-uri()='http://www.example.com/namespace2' or @ns3:attr='dne']", namespacedXml)).isTrue();
// T&F|T = T
assertThat(match("//*[local-name()='element4' and @ns3:attr='dne' or namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue();
// T&F|F = F
assertThat(match("//*[local-name()='element4' and @ns3:attr='dne' or namespace-uri()='http://www.example.com/namespaceX']", namespacedXml)).isFalse();

// F|F|T = T
assertThat(match("//*[local-name()='dne' or local-name()='dne2' or local-name()='element2']", namespacedXml)).isTrue();

// [T&T][T] = T
assertThat(match("//*[local-name()='element4' and namespace-uri()='http://www.example.com/namespace2'][@ns3:attr='test2']", namespacedXml)).isTrue();
// [T&T][F] = F
assertThat(match("//*[local-name()='element4' and namespace-uri()='http://www.example.com/namespace2'][@ns3:attr='dne']", namespacedXml)).isFalse();
// [F&T][T] = F
assertThat(match("//*[local-name()='dne' and namespace-uri()='http://www.example.com/namespace2'][@ns3:attr='test2']", namespacedXml)).isFalse();
// [F|T][T] = T
assertThat(match("//*[local-name()='dne' or local-name()='element4'][namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue();
// [F|T][F] = F
assertThat(match("//*[local-name()='dne' or local-name()='element4'][namespace-uri()='http://www.example.com/namespaceX']", namespacedXml)).isFalse();

// F|T&T = T
assertThat(match("//*[local-name()='dne' or local-name()='element4' and namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue();
// F|T&F = F
assertThat(match("//*[local-name()='dne' or local-name()='element4' and namespace-uri()='http://www.example.com/namespaceX']", namespacedXml)).isFalse();
// F|F&T = F
assertThat(match("//*[local-name()='dne' or namespace-uri()='http://www.example.com/namespaceX' and local-name()='element4']", namespacedXml)).isFalse();

// T|F & T = T
assertThat(match("//*[local-name()='element4' or local-name()='dne' and namespace-uri()='http://www.example.com/namespace2']", namespacedXml)).isTrue();
// T|F & F = T
assertThat(match("//*[local-name()='element4' or local-name()='dne' and namespace-uri()='http://www.example.com/namespaceX']", namespacedXml)).isTrue();
}

private boolean match(String xpath, SourceFile x) {
XPathMatcher matcher = new XPathMatcher(xpath);
return !TreeVisitor.collect(new XmlVisitor<>() {
Expand Down

0 comments on commit b4d3f93

Please sign in to comment.