Skip to content

Commit

Permalink
Addressed feedback from PR review
Browse files Browse the repository at this point in the history
- Added comments to the "aggregationFunctions" method in Min, Max, MinLength and MaxLength analyzers.
- Refactored the criterion method to reuse an existing variable.
  • Loading branch information
rdsharma26 committed Mar 10, 2024
1 parent 783bb0b commit 7bb7086
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/main/scala/com/amazon/deequ/analyzers/MaxLength.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ case class MaxLength(column: String, where: Option[String] = None, analyzerOptio
with FilterableAnalyzer {

override def aggregationFunctions(): Seq[Column] = {
// The criterion returns a column where each row contains an array of 2 elements.
// The first element of the array is a string that indicates if the row is "in scope" or "filtered" out.
// The second element is the value used for calculating the metric. We use "element_at" to extract it.
max(element_at(criterion, 2).cast(DoubleType)) :: Nil
}

Expand All @@ -58,7 +61,7 @@ case class MaxLength(column: String, where: Option[String] = None, analyzerOptio
case NullBehavior.Fail => when(isNullCheck, Double.MaxValue).otherwise(colLength)
// Empty String is 0 length string
case NullBehavior.EmptyString => when(isNullCheck, lit(0.0)).otherwise(colLength)
case NullBehavior.Ignore => length(col(column))
case NullBehavior.Ignore => colLength
}

conditionalSelectionWithAugmentedOutcome(updatedColumn, where)
Expand Down
3 changes: 3 additions & 0 deletions src/main/scala/com/amazon/deequ/analyzers/Maximum.scala
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ case class Maximum(column: String, where: Option[String] = None, analyzerOptions
with FilterableAnalyzer {

override def aggregationFunctions(): Seq[Column] = {
// The criterion returns a column where each row contains an array of 2 elements.
// The first element of the array is a string that indicates if the row is "in scope" or "filtered" out.
// The second element is the value used for calculating the metric. We use "element_at" to extract it.
max(element_at(criterion, 2).cast(DoubleType)) :: Nil
}

Expand Down
5 changes: 4 additions & 1 deletion src/main/scala/com/amazon/deequ/analyzers/MinLength.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ case class MinLength(column: String, where: Option[String] = None, analyzerOptio
with FilterableAnalyzer {

override def aggregationFunctions(): Seq[Column] = {
// The criterion returns a column where each row contains an array of 2 elements.
// The first element of the array is a string that indicates if the row is "in scope" or "filtered" out.
// The second element is the value used for calculating the metric. We use "element_at" to extract it.
min(element_at(criterion, 2).cast(DoubleType)) :: Nil
}

Expand All @@ -58,7 +61,7 @@ case class MinLength(column: String, where: Option[String] = None, analyzerOptio
case NullBehavior.Fail => when(isNullCheck, Double.MinValue).otherwise(colLength)
// Empty String is 0 length string
case NullBehavior.EmptyString => when(isNullCheck, lit(0.0)).otherwise(colLength)
case NullBehavior.Ignore => length(col(column))
case NullBehavior.Ignore => colLength
}

conditionalSelectionWithAugmentedOutcome(updatedColumn, where)
Expand Down
3 changes: 3 additions & 0 deletions src/main/scala/com/amazon/deequ/analyzers/Minimum.scala
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ case class Minimum(column: String, where: Option[String] = None, analyzerOptions
with FilterableAnalyzer {

override def aggregationFunctions(): Seq[Column] = {
// The criterion returns a column where each row contains an array of 2 elements.
// The first element of the array is a string that indicates if the row is "in scope" or "filtered" out.
// The second element is the value used for calculating the metric. We use "element_at" to extract it.
min(element_at(criterion, 2).cast(DoubleType)) :: Nil
}

Expand Down

0 comments on commit 7bb7086

Please sign in to comment.