diff --git a/src/main/scala/com/amazon/deequ/analyzers/MaxLength.scala b/src/main/scala/com/amazon/deequ/analyzers/MaxLength.scala index 69a39eb1..141d92fb 100644 --- a/src/main/scala/com/amazon/deequ/analyzers/MaxLength.scala +++ b/src/main/scala/com/amazon/deequ/analyzers/MaxLength.scala @@ -36,6 +36,9 @@ case class MaxLength(column: String, where: Option[String] = None, analyzerOptio with FilterableAnalyzer { override def aggregationFunctions(): Seq[Column] = { + // The criterion returns a column where each row contains an array of 2 elements. + // The first element of the array is a string that indicates if the row is "in scope" or "filtered" out. + // The second element is the value used for calculating the metric. We use "element_at" to extract it. max(element_at(criterion, 2).cast(DoubleType)) :: Nil } @@ -58,7 +61,7 @@ case class MaxLength(column: String, where: Option[String] = None, analyzerOptio case NullBehavior.Fail => when(isNullCheck, Double.MaxValue).otherwise(colLength) // Empty String is 0 length string case NullBehavior.EmptyString => when(isNullCheck, lit(0.0)).otherwise(colLength) - case NullBehavior.Ignore => length(col(column)) + case NullBehavior.Ignore => colLength } conditionalSelectionWithAugmentedOutcome(updatedColumn, where) diff --git a/src/main/scala/com/amazon/deequ/analyzers/Maximum.scala b/src/main/scala/com/amazon/deequ/analyzers/Maximum.scala index 90143f5e..1e52a7ae 100644 --- a/src/main/scala/com/amazon/deequ/analyzers/Maximum.scala +++ b/src/main/scala/com/amazon/deequ/analyzers/Maximum.scala @@ -46,6 +46,9 @@ case class Maximum(column: String, where: Option[String] = None, analyzerOptions with FilterableAnalyzer { override def aggregationFunctions(): Seq[Column] = { + // The criterion returns a column where each row contains an array of 2 elements. + // The first element of the array is a string that indicates if the row is "in scope" or "filtered" out. + // The second element is the value used for calculating the metric. We use "element_at" to extract it. max(element_at(criterion, 2).cast(DoubleType)) :: Nil } diff --git a/src/main/scala/com/amazon/deequ/analyzers/MinLength.scala b/src/main/scala/com/amazon/deequ/analyzers/MinLength.scala index dd982b4e..ddc4497b 100644 --- a/src/main/scala/com/amazon/deequ/analyzers/MinLength.scala +++ b/src/main/scala/com/amazon/deequ/analyzers/MinLength.scala @@ -36,6 +36,9 @@ case class MinLength(column: String, where: Option[String] = None, analyzerOptio with FilterableAnalyzer { override def aggregationFunctions(): Seq[Column] = { + // The criterion returns a column where each row contains an array of 2 elements. + // The first element of the array is a string that indicates if the row is "in scope" or "filtered" out. + // The second element is the value used for calculating the metric. We use "element_at" to extract it. min(element_at(criterion, 2).cast(DoubleType)) :: Nil } @@ -58,7 +61,7 @@ case class MinLength(column: String, where: Option[String] = None, analyzerOptio case NullBehavior.Fail => when(isNullCheck, Double.MinValue).otherwise(colLength) // Empty String is 0 length string case NullBehavior.EmptyString => when(isNullCheck, lit(0.0)).otherwise(colLength) - case NullBehavior.Ignore => length(col(column)) + case NullBehavior.Ignore => colLength } conditionalSelectionWithAugmentedOutcome(updatedColumn, where) diff --git a/src/main/scala/com/amazon/deequ/analyzers/Minimum.scala b/src/main/scala/com/amazon/deequ/analyzers/Minimum.scala index ffc78d27..701ae0f0 100644 --- a/src/main/scala/com/amazon/deequ/analyzers/Minimum.scala +++ b/src/main/scala/com/amazon/deequ/analyzers/Minimum.scala @@ -46,6 +46,9 @@ case class Minimum(column: String, where: Option[String] = None, analyzerOptions with FilterableAnalyzer { override def aggregationFunctions(): Seq[Column] = { + // The criterion returns a column where each row contains an array of 2 elements. + // The first element of the array is a string that indicates if the row is "in scope" or "filtered" out. + // The second element is the value used for calculating the metric. We use "element_at" to extract it. min(element_at(criterion, 2).cast(DoubleType)) :: Nil }