From 40a4999722c26cc99b9a674f08b5bd2bdd2b1d7e Mon Sep 17 00:00:00 2001
From: arianna <arianna.jakositz@colorado.edu>
Date: Fri, 1 May 2020 11:14:43 -0600
Subject: [PATCH 01/29] adding stagedDate to both A&E indices

---
 .../resources/mappings/analysis_error_collectionIndex.json    | 4 ++++
 .../main/resources/mappings/analysis_error_granuleIndex.json  | 4 ++++
 .../java/org/cedar/onestop/indexer/util/IndexingUtils.java    | 1 +
 3 files changed, 9 insertions(+)

diff --git a/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json b/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json
index 8d080c175..3080898f1 100644
--- a/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json
+++ b/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json
@@ -2,6 +2,10 @@
   "mappings": {
     "dynamic": "strict",
     "properties": {
+      "stagedDate": {
+        "type": "date",
+        "format": "epoch_millis"
+      },
       "dataAccess": {
         "properties": {
           "dataAccessExists": {
diff --git a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
index 8d080c175..3080898f1 100644
--- a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
+++ b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
@@ -2,6 +2,10 @@
   "mappings": {
     "dynamic": "strict",
     "properties": {
+      "stagedDate": {
+        "type": "date",
+        "format": "epoch_millis"
+      },
       "dataAccess": {
         "properties": {
           "dataAccessExists": {
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
index 67ff8b2b5..01a9a14e9 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
@@ -77,6 +77,7 @@ public static DocWriteRequest<?> buildAnalysisAndErrorWriteRequest(String indexN
     else {
       var formattedRecord = new HashMap<String, Object>();
       formattedRecord.putAll(TransformationUtils.reformatMessageForAnalysisAndErrors(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields()));
+      formattedRecord.put("stagedDate", input.getValue().timestamp());
       return new IndexRequest(indexName).opType(opType).id(input.getKey()).source(formattedRecord);
     }
   }

From 86363f709d68dc379f039c3f01a53854d686f97b Mon Sep 17 00:00:00 2001
From: arianna <arianna.jakositz@colorado.edu>
Date: Fri, 1 May 2020 11:19:29 -0600
Subject: [PATCH 02/29] adding internalParentIdentifier to granule A&E index

---
 .../main/resources/mappings/analysis_error_granuleIndex.json   | 3 +++
 .../org/cedar/onestop/indexer/util/TransformationUtils.java    | 1 +
 2 files changed, 4 insertions(+)

diff --git a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
index 3080898f1..6f9ca09ce 100644
--- a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
+++ b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
@@ -6,6 +6,9 @@
         "type": "date",
         "format": "epoch_millis"
       },
+      "internalParentIdentifier": {
+        "type": "keyword"
+      },
       "dataAccess": {
         "properties": {
           "dataAccessExists": {
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index a38b130ae..eb279acc8 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -32,6 +32,7 @@ public static Map<String, Object> reformatMessageForAnalysisAndErrors(ParsedReco
     var errors = record.getErrors();
 
     var analysisMap = AvroUtils.avroToMap(analysis, true);
+    analysisMap.put("internalParentIdentifier", prepareInternalParentIdentifier(record));
     var errorsList = errors.stream()
         .map(e -> AvroUtils.avroToMap(e))
         .collect(Collectors.toList());

From 589b77e0cd16dd2601682af3534817a8db8b4323 Mon Sep 17 00:00:00 2001
From: arianna <arianna.jakositz@colorado.edu>
Date: Mon, 11 May 2020 16:57:21 -0600
Subject: [PATCH 03/29] now isGranule instead of matchesIdentifiers, which is
 more clear

---
 buildSrc/src/main/kotlin/utils.kt             |   2 +-
 .../analysis_error_collectionIndex.json       |   2 +-
 .../mappings/analysis_error_granuleIndex.json |   2 +-
 .../onestop/indexer/util/ValidationUtils.java |  34 +++--
 .../indexer/util/ValidationUtilsSpec.groovy   | 133 +++++++++---------
 5 files changed, 87 insertions(+), 86 deletions(-)

diff --git a/buildSrc/src/main/kotlin/utils.kt b/buildSrc/src/main/kotlin/utils.kt
index abcfdf687..1a919da23 100644
--- a/buildSrc/src/main/kotlin/utils.kt
+++ b/buildSrc/src/main/kotlin/utils.kt
@@ -40,7 +40,7 @@ object Versions {
     const val PAC4J = "3.8.3"
     const val SNAKE_YAML = "1.24"
 
-    const val ONESTOP_SCHEMAS: String = "0.5.5"
+    const val ONESTOP_SCHEMAS: String = "analysis-updates-SNAPSHOT"
 }
 
 // data classes
diff --git a/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json b/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json
index 3080898f1..6b290290d 100644
--- a/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json
+++ b/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json
@@ -58,7 +58,7 @@
           "hierarchyLevelNameExists": {
             "type": "boolean"
           },
-          "matchesIdentifiers": {
+          "isGranule": {
             "type": "boolean"
           },
           "parentIdentifierExists": {
diff --git a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
index 6f9ca09ce..487ab98dd 100644
--- a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
+++ b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
@@ -61,7 +61,7 @@
           "hierarchyLevelNameExists": {
             "type": "boolean"
           },
-          "matchesIdentifiers": {
+          "isGranule": {
             "type": "boolean"
           },
           "parentIdentifierExists": {
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/ValidationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/ValidationUtils.java
index 356635fc7..e42609778 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/ValidationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/ValidationUtils.java
@@ -39,7 +39,7 @@ public static ParsedRecord addValidationErrors(ValueWithTopic<ParsedRecord> valu
     return ParsedRecord.newBuilder(record).setErrors(errors).build();
   }
 
-  private static List<ErrorEvent> validateRootRecord(ParsedRecord record) {
+  public static List<ErrorEvent> validateRootRecord(ParsedRecord record) {
     var result = new ArrayList<ErrorEvent>();
     if (record.getDiscovery() == null || record.getDiscovery() == Discovery.newBuilder().build()) {
       result.add(buildValidationError("Discovery metadata missing. No metadata to load into OneStop."));
@@ -50,7 +50,7 @@ private static List<ErrorEvent> validateRootRecord(ParsedRecord record) {
     return result;
   }
 
-  private static List<ErrorEvent> validateIdentification(ParsedRecord record) {
+  public static List<ErrorEvent> validateIdentification(ParsedRecord record) {
     var result = new ArrayList<ErrorEvent>();
     var identification = record.getAnalysis().getIdentification();
     if (identification != null && !identification.getFileIdentifierExists() && !identification.getDoiExists()) {
@@ -59,13 +59,10 @@ private static List<ErrorEvent> validateIdentification(ParsedRecord record) {
     if (record.getType() == null ) {
       result.add(buildValidationError("Metadata type error -- type unknown."));
     }
-    if (identification != null && !identification.getMatchesIdentifiers()) {
-      result.add(buildValidationError("Metadata type error -- hierarchyLevelName is 'granule' but no parentIdentifier provided."));
-    }
     return result;
   }
 
-  private static List<ErrorEvent> validateTopicPlacement(ParsedRecord record, String topic) {
+  public static List<ErrorEvent> validateTopicPlacement(ParsedRecord record, String topic) {
     var result = new ArrayList<ErrorEvent>();
     var declaredRecordType = record.getType();
     var recordTypeForTopic = IndexingUtils.determineTypeFromTopic(topic);
@@ -77,19 +74,28 @@ private static List<ErrorEvent> validateTopicPlacement(ParsedRecord record, Stri
     }
 
     var identification = record.getAnalysis().getIdentification();
-    var isGranule = identification.getParentIdentifierExists() && identification.getHierarchyLevelNameExists()
-        && record.getDiscovery().getHierarchyLevelName().toLowerCase().equals("granule");
-    if(isGranule && recordTypeForTopic != RecordType.granule) {
+    var hlm = record.getDiscovery().getHierarchyLevelName();
+    // Granule on collection topic
+    if(identification != null && identification.getIsGranule() && recordTypeForTopic != RecordType.granule) {
       result.add(buildValidationError("Metadata indicates granule type but record is not on granule topic."));
     }
-    if(!isGranule && recordTypeForTopic == RecordType.granule) {
+    // Non-granule on granule topic
+    if(identification != null && !identification.getIsGranule() && recordTypeForTopic == RecordType.granule) {
       result.add(buildValidationError("Metadata indicates non-granule type but record is on granule topic."));
+      if(!identification.getParentIdentifierExists()) {
+        result.add(buildValidationError("Expected granule record but missing parentIdentifier."));
+      }
+      if(!identification.getHierarchyLevelNameExists()) {
+        result.add(buildValidationError("Expected granule record but missing hierarchyLevelName. This must be present and equal to case-insensitive 'granule'."));
+      }
+      if(identification.getHierarchyLevelNameExists() && !hlm.toLowerCase().equals("granule")) {
+        result.add(buildValidationError("Expected granule record but hierarchyLevelName is [ " + hlm + " ] and should be case-insensitive 'granule'."));
+      }
     }
-
     return result;
   }
 
-  private static List<ErrorEvent> validateTitles(ParsedRecord record) {
+  public static List<ErrorEvent> validateTitles(ParsedRecord record) {
     var result = new ArrayList<ErrorEvent>();
     var titles = record.getAnalysis().getTitles();
     if (!titles.getTitleExists()) {
@@ -98,7 +104,7 @@ private static List<ErrorEvent> validateTitles(ParsedRecord record) {
     return result;
   }
 
-  private static List<ErrorEvent> validateTemporalBounds(ParsedRecord record) {
+  public static List<ErrorEvent> validateTemporalBounds(ParsedRecord record) {
     var result = new ArrayList<ErrorEvent>();
     var temporal = record.getAnalysis().getTemporalBounding();
     if (temporal.getBeginDescriptor() == INVALID) {
@@ -113,7 +119,7 @@ private static List<ErrorEvent> validateTemporalBounds(ParsedRecord record) {
     return result;
   }
 
-  private static List<ErrorEvent> validateSpatialBounds(ParsedRecord record) {
+  public static List<ErrorEvent> validateSpatialBounds(ParsedRecord record) {
     var result = new ArrayList<ErrorEvent>();
     var spatial = record.getAnalysis().getSpatialBounding();
     if (spatial.getSpatialBoundingExists() && !spatial.getIsValid()) {
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/ValidationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/ValidationUtilsSpec.groovy
index d782f095c..c1894c1c0 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/ValidationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/ValidationUtilsSpec.groovy
@@ -20,17 +20,13 @@ import static org.cedar.schemas.avro.psi.ValidDescriptor.VALID
 @Unroll
 class ValidationUtilsSpec extends Specification {
 
-  MockProcessorContext mockProcessorContext
-  TopicIdentifier<ParsedRecord> ti
-
-  def setup() {
-    mockProcessorContext = new MockProcessorContext()
+  def "valid message passes validation check"() {
+    given:
+    MockProcessorContext mockProcessorContext = new MockProcessorContext()
     mockProcessorContext.setTopic(TestUtils.collectionTopic)
-    ti = new TopicIdentifier<>()
+    TopicIdentifier<ParsedRecord> ti = new TopicIdentifier<>()
     ti.init(mockProcessorContext)
-  }
 
-  def "valid message passes validation check"() {
     when:
     ValueWithTopic<ParsedRecord> testInput = ti.transform(TestUtils.inputAvroRecord)
 
@@ -39,6 +35,12 @@ class ValidationUtilsSpec extends Specification {
   }
 
   def "validation passes tombstones through"() {
+    given:
+    MockProcessorContext mockProcessorContext = new MockProcessorContext()
+    mockProcessorContext.setTopic(TestUtils.collectionTopic)
+    TopicIdentifier<ParsedRecord> ti = new TopicIdentifier<>()
+    ti.init(mockProcessorContext)
+
     when:
     ValueWithTopic<ParsedRecord> testInput = ti.transform(null)
 
@@ -47,16 +49,15 @@ class ValidationUtilsSpec extends Specification {
   }
 
   def "validates titles when #testCase"() {
-    def titleAnalysis = TitleAnalysis.newBuilder(TestUtils.inputAvroRecord.analysis.titles).setTitleExists(titleExists).build()
-    def analysis = Analysis.newBuilder(TestUtils.inputAvroRecord.analysis).setTitles(titleAnalysis).build()
-    def record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord).setAnalysis(analysis).build()
+    def titleAnalysis = TitleAnalysis.newBuilder().setTitleExists(titleExists).build()
+    def analysis = Analysis.newBuilder().setTitles(titleAnalysis).build()
+    def record = ParsedRecord.newBuilder().setAnalysis(analysis).build()
 
     when:
-    ValueWithTopic<ParsedRecord> testInput = ti.transform(record)
-    def validated = ValidationUtils.addValidationErrors(testInput)
+    def errors = ValidationUtils.validateTitles(record)
 
     then:
-    validated.errors.isEmpty() == isValid
+    errors.isEmpty() == isValid
 
     where:
     testCase                | isValid | titleExists
@@ -65,106 +66,100 @@ class ValidationUtilsSpec extends Specification {
   }
 
   def "validates identification when #testCase"() {
-    def identificationAnalysis = IdentificationAnalysis.newBuilder(TestUtils.inputAvroRecord.analysis.identification)
+    def identificationAnalysis = IdentificationAnalysis.newBuilder()
         .setFileIdentifierExists(hasFileId)
         .setDoiExists(hasDoi)
-        .setMatchesIdentifiers(matches)
         .build()
-    def analysis = Analysis.newBuilder(TestUtils.inputAvroRecord.analysis).setIdentification(identificationAnalysis).build()
-    def record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord).setAnalysis(analysis).build()
+    def analysis = Analysis.newBuilder().setIdentification(identificationAnalysis).build()
+    def record = ParsedRecord.newBuilder().setType(type).setAnalysis(analysis)build()
 
     when:
-    ValueWithTopic<ParsedRecord> testInput = ti.transform(record)
-    def validated = ValidationUtils.addValidationErrors(testInput)
+    def errors = ValidationUtils.validateIdentification(record)
 
     then:
-    validated.errors.size() == errors
+    errors.size() == errorCount
 
     where:
-    testCase                | errors  | hasFileId | hasDoi  | matches
-    "has only fileId"       | 0       | true      | false   | true
-    "has only doi"          | 0       | false     | true    | true
-    "has no fileId nor doi" | 1       | false     | false   | true
-    "has mismatched type"   | 1       | true      | true    | false
-    "no id and mismatched"  | 2       | false     | false   | false
+    testCase                      | errorCount  | hasFileId | hasDoi  | type
+    "has only fileId"             | 0           | true      | false   | RecordType.collection
+    "has only doi"                | 0           | false     | true    | RecordType.granule
+    "has fileId and doi"          | 0           | true      | true    | RecordType.collection
+    "has no ids"                  | 1           | false     | false   | RecordType.granule
+    "has unknown type"            | 1           | true      | true    | null
+    "has no ids and unknown type" | 2           | false     | false   | null
   }
 
   def "validates temporal bounds when #testCase"() {
-    def temporalAnalysis = TemporalBoundingAnalysis.newBuilder(TestUtils.inputAvroRecord.analysis.temporalBounding)
+    def temporalAnalysis = TemporalBoundingAnalysis.newBuilder()
         .setBeginDescriptor(beginValid ? VALID : INVALID)
         .setEndDescriptor(endValid ? VALID : INVALID)
         .setInstantDescriptor(instantValid ? VALID : INVALID)
         .build()
-    def analysis = Analysis.newBuilder(TestUtils.inputAvroRecord.analysis).setTemporalBounding(temporalAnalysis).build()
-    def record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord).setAnalysis(analysis).build()
+    def analysis = Analysis.newBuilder().setTemporalBounding(temporalAnalysis).build()
+    def record = ParsedRecord.newBuilder().setAnalysis(analysis).build()
 
     when:
-    ValueWithTopic<ParsedRecord> testInput = ti.transform(record)
-    def validated = ValidationUtils.addValidationErrors(testInput)
+    def errors = ValidationUtils.validateTemporalBounds(record)
 
     then:
-    validated.errors.size() == errors
+    errors.size() == errorCount
 
     where:
-    testCase                    | errors  | beginValid| endValid| instantValid
-    "has valid bounds"          | 0       | true      | true    | true
-    "has invalid start"         | 1       | false     | true    | true
-    "has invalid end"           | 1       | true      | false   | true
-    "has invalid start and end" | 2       | false     | false   | true
-    "is invalid instant"        | 1       | true      | true    | false
-    "is completely invalid"     | 3       | false     | false   | false
+    testCase                    | errorCount  | beginValid| endValid| instantValid
+    "has valid bounds"          | 0           | true      | true    | true
+    "has invalid start"         | 1           | false     | true    | true
+    "has invalid end"           | 1           | true      | false   | true
+    "has invalid start and end" | 2           | false     | false   | true
+    "is invalid instant"        | 1           | true      | true    | false
+    "is completely invalid"     | 3           | false     | false   | false
   }
 
   def "validates spatial bounds when #testCase"() {
-    def spatialAnalysis = SpatialBoundingAnalysis.newBuilder(TestUtils.inputAvroRecord.analysis.spatialBounding)
+    def spatialAnalysis = SpatialBoundingAnalysis.newBuilder()
         .setSpatialBoundingExists(exists)
         .setIsValid(valid)
         .build()
-    def analysis = Analysis.newBuilder(TestUtils.inputAvroRecord.analysis).setSpatialBounding(spatialAnalysis).build()
-    def record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord).setAnalysis(analysis).build()
+    def analysis = Analysis.newBuilder().setSpatialBounding(spatialAnalysis).build()
+    def record = ParsedRecord.newBuilder().setAnalysis(analysis).build()
 
     when:
-    ValueWithTopic<ParsedRecord> testInput = ti.transform(record)
-    def validated = ValidationUtils.addValidationErrors(testInput)
+    def errors = ValidationUtils.validateSpatialBounds(record)
 
     then:
-    validated.errors.size() == errors
+    errors.size() == errorCount
 
     where:
-    testCase                | errors  | exists  | valid
-    "bounds are valid"      | 0       | true    | true
-    "bounds are invalid"    | 1       | true    | false
-    "bounds not not exist"  | 0       | false   | false
+    testCase                       | errorCount  | exists  | valid
+    "bounds exist and are valid"   | 0           | true    | true
+    "bounds exist and are invalid" | 1           | true    | false
+    "bounds do not exist"          | 0           | false   | true
   }
 
   def "validates topic placement when #testCase"() {
-    def identification = IdentificationAnalysis.newBuilder(TestUtils.inputAvroRecord.analysis.identification)
+    given:
+    def identification = IdentificationAnalysis.newBuilder()
         .setParentIdentifierExists(hasParentId)
         .setHierarchyLevelNameExists(hlm != null)
+        .setIsGranule(hasParentId && hlm != null && hlm.equals("granule"))
         .build()
-    def discovery = Discovery.newBuilder(TestUtils.inputAvroRecord.getDiscovery()).setHierarchyLevelName(hlm).build()
-    def analysis = Analysis.newBuilder(TestUtils.inputAvroRecord.analysis).setIdentification(identification).build()
-    def record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord).setType(type).setAnalysis(analysis).setDiscovery(discovery).build()
-
-    // Setup places record on the collection topic, so we overwrite setup here
-    mockProcessorContext.setTopic(topic)
-    ti = new TopicIdentifier<>()
-    ti.init(mockProcessorContext)
+    def analysis = Analysis.newBuilder().setIdentification(identification).build()
+    def discovery = Discovery.newBuilder().setHierarchyLevelName(hlm).build()
+    def record = ParsedRecord.newBuilder().setType(type).setAnalysis(analysis).setDiscovery(discovery).build()
 
     when:
-    ValueWithTopic<ParsedRecord> testInput = ti.transform(record)
-    def validated = ValidationUtils.addValidationErrors(testInput)
+    def errors = ValidationUtils.validateTopicPlacement(record, topic)
 
     then:
-    validated.errors.size() == errors
+    errors.size() == errorCount
 
     where:
-    testCase                                        | errors | hasParentId | hlm          | type                  | topic
-    "it's valid"                                    | 0      | false       | null         | RecordType.collection | TestUtils.collectionTopic
-    "RecordType only doesn't match"                 | 1      | false       | "collection" | RecordType.granule    | TestUtils.collectionTopic
-    "granule on collection topic (metadata check)"  | 1      | true        | "granule"    | RecordType.collection | TestUtils.collectionTopic
-    "non-granule on granule topic (metadata check)" | 1      | false       | null         | RecordType.granule    | TestUtils.granuleTopic
-    "metadata check and RecordType check fail"      | 2      | false       | "collection" | RecordType.collection | TestUtils.granuleTopic
-
+    testCase                                        | errorCount | hasParentId | hlm          | type                  | topic
+    "it's valid"                                    | 0          | false       | null         | RecordType.collection | TestUtils.collectionTopic
+    "RecordType only doesn't match"                 | 1          | false       | "collection" | RecordType.granule    | TestUtils.collectionTopic
+    "granule on collection topic (metadata check)"  | 1          | true        | "granule"    | RecordType.collection | TestUtils.collectionTopic
+    "non-granule on granule topic (no pid)"         | 2          | false       | "granule"    | RecordType.granule    | TestUtils.granuleTopic
+    "non-granule on granule topic (no hlm)"         | 2          | true        | null         | RecordType.granule    | TestUtils.granuleTopic
+    "non-granule on granule topic (no pid or hlm)"  | 3          | false       | null         | RecordType.granule    | TestUtils.granuleTopic
+    "metadata check and RecordType check fail"      | 4          | false       | "collection" | RecordType.collection | TestUtils.granuleTopic
   }
 }

From 9c9cd28b67bc0b937e575a2aaca181b5dab9892f Mon Sep 17 00:00:00 2001
From: arianna <arianna.jakositz@colorado.edu>
Date: Fri, 15 May 2020 14:41:53 -0600
Subject: [PATCH 04/29] updating definition of INSTANT and comments

---
 .../cedar/onestop/indexer/util/TransformationUtils.java    | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index eb279acc8..b13dc7199 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -15,6 +15,7 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.cedar.schemas.avro.psi.ValidDescriptor.UNDEFINED;
 import static org.cedar.schemas.avro.psi.ValidDescriptor.VALID;
 
 /**
@@ -268,7 +269,7 @@ private static Map<String, Object> prepareDates(TemporalBounding bounding, Tempo
     var result = new HashMap<String, Object>();
 
     // If bounding is actually an instant, set search fields accordingly
-    if (analysis.getRangeDescriptor() == TimeRangeDescriptor.INSTANT) {
+    if (analysis.getRangeDescriptor() == TimeRangeDescriptor.INSTANT && analysis.getBeginDescriptor() == UNDEFINED) {
       beginDate = analysis.getInstantUtcDateTimeString();
       year = parseYear(beginDate);
 
@@ -293,8 +294,8 @@ private static Map<String, Object> prepareDates(TemporalBounding bounding, Tempo
       beginYear = year;
       endYear = year;
     } else {
-      // If dates exist and are validSearchFormat (only false here if paleo, since we filtered out bad data earlier),
-      // use value from analysis block where dates are UTC datetime normalized
+      // If dates exist (thus VALID) and are indexable use value from analysis block where dates are UTC datetime normalized,
+      // else only set the year values as this is indicative of a paleo date
       beginDate = analysis.getBeginDescriptor() == VALID && analysis.getBeginIndexable() ? analysis.getBeginUtcDateTimeString() : null;
       beginYear = parseYear(analysis.getBeginUtcDateTimeString());
       endDate = analysis.getEndDescriptor() == VALID && analysis.getEndIndexable() ? analysis.getEndUtcDateTimeString() : null;

From 9c57833314e9623c4051644d06dc2497abaef249 Mon Sep 17 00:00:00 2001
From: arianna <arianna.jakositz@colorado.edu>
Date: Fri, 15 May 2020 17:21:04 -0600
Subject: [PATCH 05/29] suddenly salad!

---
 .../onestop/indexer/util/ValidationUtils.java | 112 ++++++++++----
 .../indexer/util/ValidationUtilsSpec.groovy   | 141 ++++++++++++++++--
 2 files changed, 212 insertions(+), 41 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/ValidationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/ValidationUtils.java
index e42609778..ba8fb99e9 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/ValidationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/ValidationUtils.java
@@ -9,8 +9,8 @@
 import java.util.ArrayList;
 import java.util.List;
 
-import static org.cedar.schemas.avro.psi.ValidDescriptor.INVALID;
-import static org.cedar.schemas.avro.psi.ValidDescriptor.UNDEFINED;
+import static org.cedar.schemas.avro.psi.ValidDescriptor.*;
+import static org.cedar.schemas.avro.psi.TimeRangeDescriptor.*;
 
 /**
  * This class contains utilities for validating the contents of the Avro (schemas) records prior to indexing
@@ -19,8 +19,6 @@
 public class ValidationUtils {
   static final private Logger log = LoggerFactory.getLogger(ValidationUtils.class);
 
-  static final private String VALIDATION_ERROR_TITLE = "Invalid for search indexing";
-
   public static ParsedRecord addValidationErrors(ValueWithTopic<ParsedRecord> value) {
     ParsedRecord record = value == null ? null : value.getValue();
     if (record == null) {
@@ -41,11 +39,13 @@ public static ParsedRecord addValidationErrors(ValueWithTopic<ParsedRecord> valu
 
   public static List<ErrorEvent> validateRootRecord(ParsedRecord record) {
     var result = new ArrayList<ErrorEvent>();
-    if (record.getDiscovery() == null || record.getDiscovery() == Discovery.newBuilder().build()) {
-      result.add(buildValidationError("Discovery metadata missing. No metadata to load into OneStop."));
+    if (record.getDiscovery() == null || record.getDiscovery().equals(Discovery.newBuilder().build())) {
+      result.add(buildValidationError(ValidationError.ROOT,
+          "Discovery metadata missing -- no metadata to load into OneStop."));
     }
-    if (record.getAnalysis() == null || record.getAnalysis() == Analysis.newBuilder().build()) {
-      result.add(buildValidationError("Analysis metadata missing. Cannot verify metadata quality for OneStop."));
+    if (record.getAnalysis() == null || record.getAnalysis().equals(Analysis.newBuilder().build())) {
+      result.add(buildValidationError(ValidationError.ROOT,
+          "Analysis metadata missing -- cannot verify metadata quality for OneStop."));
     }
     return result;
   }
@@ -54,10 +54,12 @@ public static List<ErrorEvent> validateIdentification(ParsedRecord record) {
     var result = new ArrayList<ErrorEvent>();
     var identification = record.getAnalysis().getIdentification();
     if (identification != null && !identification.getFileIdentifierExists() && !identification.getDoiExists()) {
-      result.add(buildValidationError("Missing identifier - record contains neither a fileIdentifier nor a DOI"));
+      result.add(buildValidationError(ValidationError.IDENTIFICATION,
+          "Missing identifier -- record contains neither a fileIdentifier nor a DOI"));
     }
     if (record.getType() == null ) {
-      result.add(buildValidationError("Metadata type error -- type unknown."));
+      result.add(buildValidationError(ValidationError.IDENTIFICATION,
+          "Metadata type error -- type unknown."));
     }
     return result;
   }
@@ -68,28 +70,34 @@ public static List<ErrorEvent> validateTopicPlacement(ParsedRecord record, Strin
     var recordTypeForTopic = IndexingUtils.determineTypeFromTopic(topic);
 
     if(declaredRecordType != recordTypeForTopic) {
-      result.add(buildValidationError("Declared record type [ " + declaredRecordType.toString() +
+      result.add(buildValidationError(ValidationError.TYPE,
+          "Declared record type [ " + declaredRecordType.toString() +
           " ] does not match expected type [ " + recordTypeForTopic.toString() +
-          " ]. Metadata was ingested downstream into wrong topic."));
+          " ]. Metadata was ingested upstream into wrong topic."));
     }
 
     var identification = record.getAnalysis().getIdentification();
     var hlm = record.getDiscovery().getHierarchyLevelName();
     // Granule on collection topic
     if(identification != null && identification.getIsGranule() && recordTypeForTopic != RecordType.granule) {
-      result.add(buildValidationError("Metadata indicates granule type but record is not on granule topic."));
+      result.add(buildValidationError(ValidationError.TYPE,
+          "Metadata indicates granule type but record is not on granule topic."));
     }
     // Non-granule on granule topic
     if(identification != null && !identification.getIsGranule() && recordTypeForTopic == RecordType.granule) {
-      result.add(buildValidationError("Metadata indicates non-granule type but record is on granule topic."));
+      result.add(buildValidationError(ValidationError.TYPE,
+          "Metadata indicates non-granule type but record is on granule topic."));
       if(!identification.getParentIdentifierExists()) {
-        result.add(buildValidationError("Expected granule record but missing parentIdentifier."));
+        result.add(buildValidationError(ValidationError.TYPE,
+            "Expected granule record but missing parentIdentifier."));
       }
       if(!identification.getHierarchyLevelNameExists()) {
-        result.add(buildValidationError("Expected granule record but missing hierarchyLevelName. This must be present and equal to case-insensitive 'granule'."));
+        result.add(buildValidationError(ValidationError.TYPE,
+            "Expected granule record but missing hierarchyLevelName. This must be present and equal to case-insensitive 'granule'."));
       }
       if(identification.getHierarchyLevelNameExists() && !hlm.toLowerCase().equals("granule")) {
-        result.add(buildValidationError("Expected granule record but hierarchyLevelName is [ " + hlm + " ] and should be case-insensitive 'granule'."));
+        result.add(buildValidationError(ValidationError.TYPE,
+            "Expected granule record but hierarchyLevelName is [ " + hlm + " ] and should be case-insensitive 'granule'."));
       }
     }
     return result;
@@ -99,23 +107,55 @@ public static List<ErrorEvent> validateTitles(ParsedRecord record) {
     var result = new ArrayList<ErrorEvent>();
     var titles = record.getAnalysis().getTitles();
     if (!titles.getTitleExists()) {
-      result.add(buildValidationError("Missing title"));
+      result.add(buildValidationError(ValidationError.TITLE,
+          "Missing title"));
     }
     return result;
   }
 
   public static List<ErrorEvent> validateTemporalBounds(ParsedRecord record) {
     var result = new ArrayList<ErrorEvent>();
-    var temporal = record.getAnalysis().getTemporalBounding();
-    if (temporal.getBeginDescriptor() == INVALID) {
-      result.add(buildValidationError("Invalid beginDate"));
+    var temporalAnalysis = record.getAnalysis().getTemporalBounding();
+
+    // No temporal information is okay
+    if (temporalAnalysis == null) {
+      return result;
+    }
+
+    var range = temporalAnalysis.getRangeDescriptor();
+    if (range == NOT_APPLICABLE) {
+      // Range is always NOT_APPLICABLE when there is an error in one or more individual date fields; temporalBounding
+      // access is null-safe here since an INVALID date only occurs with parsing errors
+      var temporalDiscovery = record.getDiscovery().getTemporalBounding();
+      var begin = temporalDiscovery.getBeginDate();
+      var end = temporalDiscovery.getEndDate();
+      var instant = temporalDiscovery.getInstant();
+      if (temporalAnalysis.getBeginDescriptor() == ValidDescriptor.INVALID) {
+        result.add(buildValidationError(ValidationError.TEMPORAL_FIELD,
+            "The beginDate [ " + begin + " ] could not be parsed."));
+      }
+      if (temporalAnalysis.getEndDescriptor() == ValidDescriptor.INVALID) {
+        result.add(buildValidationError(ValidationError.TEMPORAL_FIELD,
+            "The endDate [ " + end + " ] could not be parsed."));
+      }
+      if (temporalAnalysis.getInstantDescriptor() == ValidDescriptor.INVALID) {
+        result.add(buildValidationError(ValidationError.TEMPORAL_FIELD,
+            "The instant [ " + instant + " ] could not be parsed."));
+      }
     }
-    if (temporal.getEndDescriptor() == INVALID) {
-      result.add(buildValidationError("Invalid endDate"));
+    else if (range == AMBIGUOUS) {
+      result.add(buildValidationError(ValidationError.TEMPORAL_RANGE,
+          "Ambiguous temporal bounding -- both an instant and a beginDate present, defining two valid ranges."));
     }
-    if (temporal.getBeginDescriptor() != UNDEFINED && temporal.getEndDescriptor() != UNDEFINED && temporal.getInstantDescriptor() == INVALID) {
-      result.add(buildValidationError("Invalid instant-only date"));
+    else if (range == BACKWARDS) {
+      result.add(buildValidationError(ValidationError.TEMPORAL_RANGE,
+          "Backwards temporal bounding -- beginDate after endDate."));
     }
+    else if (range == TimeRangeDescriptor.INVALID) {
+      result.add(buildValidationError(ValidationError.TEMPORAL_RANGE,
+          "Invalid temporal bounding -- endDate present without beginDate."));
+    }
+
     return result;
   }
 
@@ -123,14 +163,30 @@ public static List<ErrorEvent> validateSpatialBounds(ParsedRecord record) {
     var result = new ArrayList<ErrorEvent>();
     var spatial = record.getAnalysis().getSpatialBounding();
     if (spatial.getSpatialBoundingExists() && !spatial.getIsValid()) {
-      result.add(buildValidationError("Invalid GeoJSON for spatial bounding"));
+      result.add(buildValidationError(ValidationError.SPATIAL,
+          "Invalid GeoJSON for spatial bounding"));
     }
     return result;
   }
 
-  private static ErrorEvent buildValidationError(String details) {
+  public enum ValidationError {
+    ROOT("Record Missing Major Component"),
+    IDENTIFICATION("Identification Error"),
+    TYPE("Type Error"),
+    TITLE("Title Error"),
+    TEMPORAL_FIELD("Temporal Bounding Field Error"),
+    TEMPORAL_RANGE("Temporal Bounding Range Error"),
+    SPATIAL("Spatial Bounding Error");
+
+    private final String title;
+    ValidationError(String title) { this.title = title; }
+
+    String getTitle() { return title; }
+  }
+
+  private static ErrorEvent buildValidationError(ValidationError errorCategory, String details) {
     return ErrorEvent.newBuilder()
-        .setTitle(VALIDATION_ERROR_TITLE)
+        .setTitle(errorCategory.getTitle())
         .setDetail(details)
         .setSource(StreamsApps.INDEXER_ID)
         .build();
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/ValidationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/ValidationUtilsSpec.groovy
index c1894c1c0..32aba54df 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/ValidationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/ValidationUtilsSpec.groovy
@@ -9,13 +9,18 @@ import org.cedar.schemas.avro.psi.IdentificationAnalysis
 import org.cedar.schemas.avro.psi.ParsedRecord
 import org.cedar.schemas.avro.psi.RecordType
 import org.cedar.schemas.avro.psi.SpatialBoundingAnalysis
+import org.cedar.schemas.avro.psi.TemporalBounding
 import org.cedar.schemas.avro.psi.TemporalBoundingAnalysis
+import org.cedar.schemas.avro.psi.TimeRangeDescriptor
 import org.cedar.schemas.avro.psi.TitleAnalysis
+import org.cedar.schemas.avro.psi.ValidDescriptor
 import spock.lang.Specification
 import spock.lang.Unroll
 
-import static org.cedar.schemas.avro.psi.ValidDescriptor.INVALID
-import static org.cedar.schemas.avro.psi.ValidDescriptor.VALID
+import static org.cedar.schemas.avro.psi.ValidDescriptor.*;
+import static org.cedar.schemas.avro.psi.TimeRangeDescriptor.*;
+
+import static org.cedar.onestop.indexer.util.ValidationUtils.ValidationError.*;
 
 @Unroll
 class ValidationUtilsSpec extends Specification {
@@ -48,6 +53,58 @@ class ValidationUtilsSpec extends Specification {
     ValidationUtils.addValidationErrors(testInput) == null
   }
 
+  def "null Discovery fails root validation"() {
+    def analysis = Analysis.newBuilder(TestUtils.inputAvroRecord.analysis).build()
+    def record = ParsedRecord.newBuilder().setAnalysis(analysis).build()
+
+    when:
+    def errors = ValidationUtils.validateRootRecord(record)
+
+    then:
+    record.discovery == null
+    errors.size() == 1
+    errors[0].title.equals(ROOT.title)
+  }
+
+  def "empty Discovery fails root validation"() {
+    def analysis = Analysis.newBuilder(TestUtils.inputAvroRecord.analysis).build()
+    def record = ParsedRecord.newBuilder().setDiscovery(Discovery.newBuilder().build()).setAnalysis(analysis).build()
+
+    when:
+    def errors = ValidationUtils.validateRootRecord(record)
+
+    then:
+    record.discovery == Discovery.newBuilder().build()
+    errors.size() == 1
+    errors[0].title.equals(ROOT.title)
+  }
+
+  def "null Analysis fails root validation"() {
+    def discovery = Discovery.newBuilder(TestUtils.inputAvroRecord.discovery).build()
+    def record = ParsedRecord.newBuilder().setDiscovery(discovery).build()
+
+    when:
+    def errors = ValidationUtils.validateRootRecord(record)
+
+    then:
+    record.analysis == null
+    errors.size() == 1
+    errors[0].title.equals(ROOT.title)
+  }
+
+  def "empty Analysis fails root validation"() {
+    def discovery = Discovery.newBuilder(TestUtils.inputAvroRecord.discovery).build()
+    def record = ParsedRecord.newBuilder().setDiscovery(discovery).setAnalysis(Analysis.newBuilder().build()).build()
+
+    when:
+    def errors = ValidationUtils.validateRootRecord(record)
+
+    then:
+    record.analysis == Analysis.newBuilder().build()
+    errors.size() == 1
+    errors[0].title.equals(ROOT.title)
+  }
+
   def "validates titles when #testCase"() {
     def titleAnalysis = TitleAnalysis.newBuilder().setTitleExists(titleExists).build()
     def analysis = Analysis.newBuilder().setTitles(titleAnalysis).build()
@@ -59,6 +116,11 @@ class ValidationUtilsSpec extends Specification {
     then:
     errors.isEmpty() == isValid
 
+    and:
+    if(!isValid) {
+      errors.each({ e -> e.title.equals(TITLE.title) })
+    }
+
     where:
     testCase                | isValid | titleExists
     "title is missing"      | false   | false
@@ -79,6 +141,11 @@ class ValidationUtilsSpec extends Specification {
     then:
     errors.size() == errorCount
 
+    and:
+    if(errorCount > 0) {
+      errors.each({ e -> e.title.equals(IDENTIFICATION.title) })
+    }
+
     where:
     testCase                      | errorCount  | hasFileId | hasDoi  | type
     "has only fileId"             | 0           | true      | false   | RecordType.collection
@@ -89,11 +156,42 @@ class ValidationUtilsSpec extends Specification {
     "has no ids and unknown type" | 2           | false     | false   | null
   }
 
-  def "validates temporal bounds when #testCase"() {
+  def "validates temporal bounds by field when #testCase"() {
+    def temporalAnalysis = TemporalBoundingAnalysis.newBuilder()
+        .setBeginDescriptor(begin)
+        .setEndDescriptor(end)
+        .setInstantDescriptor(instant)
+        .setRangeDescriptor(NOT_APPLICABLE) // Forces traversal through field checks for all test cases
+        .build()
+    def analysis = Analysis.newBuilder().setTemporalBounding(temporalAnalysis).build()
+    // Need to supply content for Discovery here to avoid NPEs
+    def temporalBounding = TemporalBounding.newBuilder().setBeginDate("begin").setEndDate("end").setInstant("instant").build()
+    def discovery = Discovery.newBuilder().setTemporalBounding(temporalBounding).build()
+    def record = ParsedRecord.newBuilder().setAnalysis(analysis).setDiscovery(discovery).build()
+
+    when:
+    def errors = ValidationUtils.validateTemporalBounds(record)
+
+    then:
+    errors.size() == errorCount
+
+    and:
+    if(errorCount > 0) {
+      errors.each({ e -> e.title.equals(TEMPORAL_FIELD.title) })
+    }
+
+    where:
+    testCase                    | errorCount  | begin                     | end                       | instant
+    "all dates undefined"       | 0           | ValidDescriptor.UNDEFINED | ValidDescriptor.UNDEFINED | ValidDescriptor.UNDEFINED
+    "all dates valid"           | 0           | VALID                     | VALID                     | VALID
+    "has invalid begin"         | 1           | INVALID                   | VALID                     | VALID
+    "has invalid end"           | 1           | VALID                     | INVALID                   | VALID
+    "has invalid instant"       | 1           | VALID                     | VALID                     | INVALID
+  }
+
+  def "validates temporal bounds by range when #testCase"() {
     def temporalAnalysis = TemporalBoundingAnalysis.newBuilder()
-        .setBeginDescriptor(beginValid ? VALID : INVALID)
-        .setEndDescriptor(endValid ? VALID : INVALID)
-        .setInstantDescriptor(instantValid ? VALID : INVALID)
+        .setRangeDescriptor(range)
         .build()
     def analysis = Analysis.newBuilder().setTemporalBounding(temporalAnalysis).build()
     def record = ParsedRecord.newBuilder().setAnalysis(analysis).build()
@@ -104,14 +202,21 @@ class ValidationUtilsSpec extends Specification {
     then:
     errors.size() == errorCount
 
+    and:
+    if(errorCount > 0) {
+      errors.each({ e -> e.title.equals(TEMPORAL_RANGE.title) })
+    }
+
     where:
-    testCase                    | errorCount  | beginValid| endValid| instantValid
-    "has valid bounds"          | 0           | true      | true    | true
-    "has invalid start"         | 1           | false     | true    | true
-    "has invalid end"           | 1           | true      | false   | true
-    "has invalid start and end" | 2           | false     | false   | true
-    "is invalid instant"        | 1           | true      | true    | false
-    "is completely invalid"     | 3           | false     | false   | false
+    testCase              | errorCount | range
+    "has BOUNDED range"   | 0          | BOUNDED
+    "has INSTANT range"   | 0          | INSTANT
+    "has ONGOING range"   | 0          | ONGOING
+    "has UNDEFINED range" | 0          | TimeRangeDescriptor.UNDEFINED
+    "has AMBIGUOUS range" | 1          | AMBIGUOUS
+    "has BACKWARDS range" | 1          | BACKWARDS
+    "has INVALID range"   | 1          | TimeRangeDescriptor.INVALID
+    // NOT_APPLICABLE range does not generate TEMPORAL_RANGE error and is tested in validation by fields test
   }
 
   def "validates spatial bounds when #testCase"() {
@@ -128,6 +233,11 @@ class ValidationUtilsSpec extends Specification {
     then:
     errors.size() == errorCount
 
+    and:
+    if(errorCount > 0) {
+      errors.each({ e -> e.title.equals(SPATIAL.title) })
+    }
+
     where:
     testCase                       | errorCount  | exists  | valid
     "bounds exist and are valid"   | 0           | true    | true
@@ -152,6 +262,11 @@ class ValidationUtilsSpec extends Specification {
     then:
     errors.size() == errorCount
 
+    and:
+    if(errorCount > 0) {
+      errors.each({ e -> e.title.equals(TYPE.title) })
+    }
+
     where:
     testCase                                        | errorCount | hasParentId | hlm          | type                  | topic
     "it's valid"                                    | 0          | false       | null         | RecordType.collection | TestUtils.collectionTopic

From 9321f1ed9166dc9a4a5af2418f009f0c5a446ff4 Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Wed, 20 May 2020 17:04:21 -0600
Subject: [PATCH 06/29] Update analysis and error mappings to prevent indexing
 failures due to strictness.

---
 .../resources/mappings/analysis_error_collectionIndex.json | 4 ++--
 .../resources/mappings/analysis_error_granuleIndex.json    | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json b/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json
index 6b290290d..ba5e2d670 100644
--- a/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json
+++ b/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json
@@ -163,7 +163,7 @@
       "errors": {
         "type": "nested",
         "properties": {
-          "applicationSource": {
+          "source": {
             "type": "keyword"
           },
           "title": {
@@ -176,4 +176,4 @@
       }
     }
   }
-}
\ No newline at end of file
+}
diff --git a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
index 487ab98dd..591132b3a 100644
--- a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
+++ b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
@@ -66,6 +66,9 @@
           },
           "parentIdentifierExists": {
             "type": "boolean"
+          },
+          "parentIdentifierString": {
+            "type": "text"
           }
         }
       },
@@ -166,7 +169,7 @@
       "errors": {
         "type": "nested",
         "properties": {
-          "applicationSource": {
+          "source": {
             "type": "keyword"
           },
           "title": {
@@ -179,4 +182,4 @@
       }
     }
   }
-}
\ No newline at end of file
+}

From 9c3f3356a27f4f79bfaf61d1c6127d803f350d8b Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Tue, 26 May 2020 13:43:07 -0600
Subject: [PATCH 07/29] mess of half completed nested mapping comparison

---
 buildSrc/src/main/kotlin/utils.kt             |  4 +-
 .../mappings/analysis_error_granuleIndex.json |  3 -
 .../onestop/indexer/util/IndexingInput.java   | 18 +++++
 .../onestop/indexer/util/IndexingUtils.java   |  4 +
 .../indexer/util/TransformationUtils.java     | 80 +++++++++++++++++++
 .../util/TransformationUtilsSpec.groovy       | 51 +++++++++++-
 6 files changed, 154 insertions(+), 6 deletions(-)

diff --git a/buildSrc/src/main/kotlin/utils.kt b/buildSrc/src/main/kotlin/utils.kt
index 1a919da23..6e925dd10 100644
--- a/buildSrc/src/main/kotlin/utils.kt
+++ b/buildSrc/src/main/kotlin/utils.kt
@@ -40,7 +40,7 @@ object Versions {
     const val PAC4J = "3.8.3"
     const val SNAKE_YAML = "1.24"
 
-    const val ONESTOP_SCHEMAS: String = "analysis-updates-SNAPSHOT"
+    const val ONESTOP_SCHEMAS: String = "1250-date-parsing-exception-SNAPSHOT"
 }
 
 // data classes
@@ -86,4 +86,4 @@ fun parseDateISO(date: String): Date {
     val timeFormatter: DateTimeFormatter = DateTimeFormatter.ISO_DATE_TIME
     val accessor: TemporalAccessor = timeFormatter.parse(date)
     return Date.from(Instant.from(accessor))
-}
\ No newline at end of file
+}
diff --git a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
index 591132b3a..9a86b537c 100644
--- a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
+++ b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
@@ -46,9 +46,6 @@
               }
             }
           },
-          "fileIdentifierExists": {
-            "type": "boolean"
-          },
           "fileIdentifierString": {
             "type": "text",
             "fields": {
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java
index fcfe59659..f5ba13220 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java
@@ -72,6 +72,24 @@ public Set<String> getTargetAnalysisAndErrorsIndexFields() {
     }
   }
 
+  // public Map<String, Object> getTargetAnalysisAndErrorsIndexMapping() {
+  //   var aeAlias = esConfig.analysisAndErrorsAliasFromType(recordType.toString());
+  //   if(aeAlias != null) {
+  //     return esConfig.indexedProperties(aeAlias);
+  //   }
+  //   else {
+  //     return new HashMap<>();
+  //   }
+  // }
+
+  // public static Map<String, Object> getNestedKeys(Map<String, Object> originalMap) {
+  //   if (keysToKeep == null || keysToKeep.size() == 0) {
+  //     return new HashMap<>();
+  //   }
+  //   return originalMap.entrySet().stream()
+  //       .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+  // }
+
   @Override
   public String toString() {
     return "IndexingInput {" +
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
index 01a9a14e9..3be1903ed 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
@@ -64,6 +64,8 @@ public static DocWriteRequest<?> buildSearchWriteRequest(String indexName, DocWr
     }
     else {
       var formattedRecord = new HashMap<String, Object>();
+      // log.info("build search write request "+input.getValue().value()+ " and "+input.getTargetSearchIndexFields());
+      // log.info("transforms to "+TransformationUtils.reformatMessageForSearch(input.getValue().value(), input.getTargetSearchIndexFields()));
       formattedRecord.putAll(TransformationUtils.reformatMessageForSearch(input.getValue().value(), input.getTargetSearchIndexFields()));
       formattedRecord.put("stagedDate", input.getValue().timestamp());
       return new IndexRequest(indexName).opType(opType).id(input.getKey()).source(formattedRecord);
@@ -76,6 +78,8 @@ public static DocWriteRequest<?> buildAnalysisAndErrorWriteRequest(String indexN
     }
     else {
       var formattedRecord = new HashMap<String, Object>();
+      log.info("build A&E write request "+input.getValue().value() +" and "+ input.getTargetAnalysisAndErrorsIndexFields());
+      log.info("transforms to "+TransformationUtils.reformatMessageForAnalysisAndErrors(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields()));
       formattedRecord.putAll(TransformationUtils.reformatMessageForAnalysisAndErrors(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields()));
       formattedRecord.put("stagedDate", input.getValue().timestamp());
       return new IndexRequest(indexName).opType(opType).id(input.getKey()).source(formattedRecord);
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index b13dc7199..dfcb0b03d 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -18,6 +18,9 @@
 import static org.cedar.schemas.avro.psi.ValidDescriptor.UNDEFINED;
 import static org.cedar.schemas.avro.psi.ValidDescriptor.VALID;
 
+
+// TODO import org.apache.kafka.streams.StreamsBuilder;
+
 /**
  * This class contains utilities for transforming the contents of the Avro (schemas) records into the appropriate
  * corresponding Elasticsearch mapping format.
@@ -38,14 +41,91 @@ public static Map<String, Object> reformatMessageForAnalysisAndErrors(ParsedReco
         .map(e -> AvroUtils.avroToMap(e))
         .collect(Collectors.toList());
 
+
     analysisMap.put("errors", errorsList);
 
     // drop fields not present in target index
+    // TODO make recursive!
     var result = new LinkedHashMap<String, Object>(targetFields.size());
     targetFields.forEach(f -> result.put(f, analysisMap.get(f)));
     return result;
   }
 
+  public static Map<String, Object> unfilteredAEMessage(ParsedRecord record) {
+    var analysis = record.getAnalysis();
+    var errors = record.getErrors();
+
+    var analysisMap = AvroUtils.avroToMap(analysis, true);
+    analysisMap.put("internalParentIdentifier", prepareInternalParentIdentifier(record));
+    var errorsList = errors.stream()
+        .map(e -> AvroUtils.avroToMap(e))
+        .collect(Collectors.toList());
+
+
+    var garbageError = new LinkedHashMap<String, Object>();
+    garbageError.put("nonsense", "horrible");
+    garbageError.put("source", "valid field" );
+    errorsList.add(garbageError);
+
+
+    analysisMap.put("errors", errorsList);
+    analysisMap.put("garbage", "nuke meeee"); // FIXME
+    return analysisMap;
+  }
+
+  public static Map<String, Object> stuffToRemove(Map<String, Object> analysisMap, Map<String, Object> mapping) {
+    var result = new LinkedHashMap<String, Object>();
+    // analysisMap.entrySet().stream().forEach(e -> {
+    //   if( !mapping.containsKey(e.getKey())) {
+    //     result.put(e.getKey(), e.getValue());
+    //   } else {
+    //     if (e.getValue() instanceof Map<?,?>){
+    //       System.out.println("ZEB: the value is a map!");
+    //       // System.out.println("mapping: "+mapping.get(e.getKey()).get("properties"));
+    //       // System.out.println("--> "+stuffToRemove((Map<String, Object>)e.getValue(), (Map<String, Object>)mapping.get(e.getKey()).get("properties")));
+    //       result.put(e.getKey(), stuffToRemove((Map<String, Object>)e.getValue(), (Map<String, Object>)((Map<String, Object>)mapping.get(e.getKey())).get("properties"))); // TODO brute force assumes mapping is an object map string:object here too
+    //     } else if(e.getValue() instanceof Collection<?>){
+    //       // TODO!!!!
+    //       // result.put(e.getKey(), ((Collection<?>)e.getValue()).filter(item -> !stuffToRemove((Map<String, Object>)item, (Map<String, Object>)((Map<String, Object>)mapping.get(e.getKey())).get("properties"))).isEmpty());
+    //     }
+    //   }
+    // });
+    // return result;
+
+    analysisMap.forEach((k, v) -> {
+      if (!mapping.containsKey(k)) {
+        result.put(k, v);
+      } else {
+        Map<String, Object> nestedProperties = (Map<String, Object>)((Map<String, Object>)mapping.get(k)).get("properties"); // TODO assumes mapping is also a Map!
+
+        if (v instanceof Map) {
+          result.put(k, stuffToRemove((Map<String, Object>) v, nestedProperties));
+        } else if (v instanceof List) {
+          var list = ((List) v).stream().map(item -> stuffToRemove((Map<String, Object>) item, nestedProperties)).filter(item -> !((Map<String, Object>)item).isEmpty())
+                .collect(Collectors.toList());
+          System.out.println("ZEB - list: "+list);
+          result.put(k, list);
+        }
+      }
+    });
+    return result;
+  }
+/*
+  toRemove.forEach((k, v) -> {
+    var originalValue = mergedMap.get(k);
+    if (v instanceof Map && originalValue instanceof Map) {
+      mergedMap.put(k, removeFromMap((Map) originalValue, (Map) v));
+    }
+    else if (v instanceof List && originalValue instanceof List) {
+      var mergedList = new HashSet<>((List) originalValue);
+      mergedList.removeAll((List) v);
+      mergedMap.put(k, mergedList);
+    }
+    else if ((v == null && originalValue == null) || v.equals(originalValue)) {
+      mergedMap.remove(k);
+    }
+  });
+*/
 
   ///////////////////////////////////////////////////////////////////////////////
   //                          Indexing For Search                              //
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index 1859851ae..f9e6e5c6c 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -1,6 +1,10 @@
 package org.cedar.onestop.indexer.util
 
 import org.cedar.schemas.analyze.Analyzers
+import org.cedar.schemas.analyze.Temporal
+import org.cedar.schemas.avro.psi.Analysis
+import org.cedar.schemas.avro.psi.TemporalBoundingAnalysis
+import org.cedar.schemas.avro.psi.ValidDescriptor
 import org.cedar.schemas.avro.psi.Discovery
 import org.cedar.schemas.avro.psi.FileInformation
 import org.cedar.schemas.avro.psi.ParsedRecord
@@ -8,9 +12,12 @@ import org.cedar.schemas.avro.psi.RecordType
 import org.cedar.schemas.avro.psi.Relationship
 import org.cedar.schemas.avro.psi.RelationshipType
 import org.cedar.schemas.avro.psi.TemporalBounding
+import java.time.temporal.ChronoUnit
 import spock.lang.Specification
 import spock.lang.Unroll
 
+import groovy.json.JsonOutput
+
 import static org.cedar.schemas.avro.util.TemporalTestData.getSituations
 
 @Unroll
@@ -18,6 +25,7 @@ class TransformationUtilsSpec extends Specification {
 
   static collectionFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS).keySet()
   static granuleFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS).keySet()
+  static granuleAnalysisErrorFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS).keySet()
 
   static expectedKeywords = [
       "SIO > Super Important Organization",
@@ -83,6 +91,47 @@ class TransformationUtilsSpec extends Specification {
     'granule'     | granuleFields     | TestUtils.inputGranuleRecord
   }
 
+  def "only mapped nested fields are indexed"() {
+    when:
+    def result = TransformationUtils.reformatMessageForAnalysisAndErrors(TestUtils.inputGranuleRecord, granuleAnalysisErrorFields)
+
+
+    def asdf = TransformationUtils.stuffToRemove(TransformationUtils.unfilteredAEMessage(TestUtils.inputGranuleRecord), TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
+
+    println("ZEB")
+    println(result)
+    println(JsonOutput.toJson(asdf))
+
+    then:
+    result.keySet().each({ assert granuleAnalysisErrorFields.contains(it) })
+  }
+
+  def "can i construct a record"() {
+    when:
+    ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
+        .setAnalysis(
+          Analysis.newBuilder().setTemporalBounding(
+          TemporalBoundingAnalysis.newBuilder()
+              .setBeginDescriptor(ValidDescriptor.VALID)
+              .setBeginIndexable(true)
+              .setBeginPrecision(ChronoUnit.DAYS.toString())
+              .setBeginZoneSpecified(null)
+              .setBeginUtcDateTimeString("2000-02-01")
+              .setBeginYear(2000)
+              .setBeginMonth(2)
+              .setBeginDayOfYear(32)
+              .setBeginDayOfMonth(1)
+              .build()
+              ).build()).build()
+        def asdf = TransformationUtils.stuffToRemove(TransformationUtils.unfilteredAEMessage(record), TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
+
+            println("ZEB")
+            println(JsonOutput.toJson(asdf))
+
+            then:
+            asdf.keySet().each({ assert granuleAnalysisErrorFields.contains(it) })
+  }
+
   ////////////////////////////////
   // Identifiers, "Names"       //
   ////////////////////////////////
@@ -266,7 +315,7 @@ class TransformationUtilsSpec extends Specification {
   def "temporal bounding with #testCase dates is prepared correctly"() {
     given:
     def bounding = TemporalBounding.newBuilder().setBeginDate(begin).setEndDate(end).build()
-    def analysis = Analyzers.analyzeTemporalBounding(Discovery.newBuilder().setTemporalBounding(bounding).build())
+    def analysis = Temporal.analyzeBounding(Discovery.newBuilder().setTemporalBounding(bounding).build())
 
     when:
     def result = TransformationUtils.prepareDates(bounding, analysis)

From 21d3448a40ea3aef54a3f049faf49f8c350f3ce0 Mon Sep 17 00:00:00 2001
From: arianna <arianna.jakositz@colorado.edu>
Date: Wed, 27 May 2020 09:38:00 -0600
Subject: [PATCH 08/29] wip making new data-utils module

---
 data-utils/build.gradle                       |  14 ++
 .../org/cedar/onestop/utils/ListUtils.java    |  48 ++++++
 .../org/cedar/onestop/utils/MapUtils.java     | 155 ++++++++++++++++++
 elastic-common/build.gradle                   |   2 +
 .../elastic/common/ElasticsearchConfig.java   |   1 +
 .../onestop/kafka/common/util/DataUtils.java  |   2 +
 settings.gradle.kts                           |   1 +
 7 files changed, 223 insertions(+)
 create mode 100644 data-utils/build.gradle
 create mode 100644 data-utils/src/main/java/org/cedar/onestop/utils/ListUtils.java
 create mode 100644 data-utils/src/main/java/org/cedar/onestop/utils/MapUtils.java

diff --git a/data-utils/build.gradle b/data-utils/build.gradle
new file mode 100644
index 000000000..96c856cb7
--- /dev/null
+++ b/data-utils/build.gradle
@@ -0,0 +1,14 @@
+sourceCompatibility = 11
+targetCompatibility = 11
+
+dependencies {
+  def Versions = project.Versions
+
+  compileOnly("org.slf4j:slf4j-api:1.7.25")
+
+  implementation("org.yaml:snakeyaml:${Versions.SNAKE_YAML}")
+}
+
+jar {
+  archiveBaseName.set("${rootProject.name}-${project.name}")
+}
\ No newline at end of file
diff --git a/data-utils/src/main/java/org/cedar/onestop/utils/ListUtils.java b/data-utils/src/main/java/org/cedar/onestop/utils/ListUtils.java
new file mode 100644
index 000000000..6b26ad1d1
--- /dev/null
+++ b/data-utils/src/main/java/org/cedar/onestop/utils/ListUtils.java
@@ -0,0 +1,48 @@
+package org.cedar.onestop.utils;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class ListUtils {
+
+  public static <T> List<T> addOrInit(List<T> list, T item) {
+    var result = new ArrayList<T>();
+    if (list != null && !list.isEmpty()) {
+      result.addAll(list);
+    }
+    if (item != null) {
+      result.add(item);
+    }
+    return result;
+  }
+
+  /**
+   *
+   * @param list list to truncate
+   * @param maxListSize list size limit
+   * @param mostRecentAdditions if true, returned list reflects end of original list as opposed to start
+   * @param <T> list object type
+   * @return truncated list of T objects
+   * @throws IllegalArgumentException if maxListSize is less than or equal to 0
+   */
+  public static <T> List<T> truncateList(List<T> list, int maxListSize, boolean mostRecentAdditions) {
+    if (maxListSize <= 0) {
+      throw new IllegalArgumentException("Attempted to make a list of size [ " + maxListSize + " ]. " +
+          "Expected a size limit greater than 0.");
+    }
+
+    var result = new ArrayList<T>();
+    if (list != null && !list.isEmpty()) {
+      var size = list.size();
+      if(size <= maxListSize) {
+        result.addAll(list);
+      }
+      else {
+        var fromIndex = mostRecentAdditions ? size - maxListSize : 0;
+        var toIndex = mostRecentAdditions ? size : maxListSize;
+        result.addAll(list.subList(fromIndex, toIndex));
+      }
+    }
+    return result;
+  }
+}
diff --git a/data-utils/src/main/java/org/cedar/onestop/utils/MapUtils.java b/data-utils/src/main/java/org/cedar/onestop/utils/MapUtils.java
new file mode 100644
index 000000000..fe4525182
--- /dev/null
+++ b/data-utils/src/main/java/org/cedar/onestop/utils/MapUtils.java
@@ -0,0 +1,155 @@
+package org.cedar.onestop.utils;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class MapUtils {
+
+//  public static Map<String, Object> parseJsonMap(String json) throws IOException {
+//    if (json == null || json == "") {
+//      return new LinkedHashMap();
+//    }
+//    else {
+//      return new ObjectMapper().readValue(json, Map.class);
+//    }
+//  }
+
+  /**
+   * Returns a merged Map of the original and toAdd Maps. Deep merges of nested Maps and Lists are performed and
+   * explicit duplicates (exact matches for all fields) are avoided.
+   * @param original Base Map to which elements will be merged from toAdd
+   * @param toAdd Map of elements to add to the original Map
+   * @return An updated original Map where new elements from toAdd have been merged. Returns empty Map if
+   * original and toAdd are empty or null.
+   */
+  public static Map<String, Object> mergeMaps(Map<String, Object> original, Map<String, Object> toAdd) {
+    Map<String, Object> mergedMap = original == null ? new LinkedHashMap<>() : new LinkedHashMap<>(original);
+    if (original == null && toAdd == null) {
+      return Collections.emptyMap();
+    }
+    if (original == null || original.size() == 0) {
+      return toAdd;
+    }
+    if (toAdd == null || toAdd.size() == 0) {
+      return original;
+    }
+
+    toAdd.forEach((k, v) -> {
+      var originalValue = mergedMap.get(k);
+      if (v instanceof Map && originalValue instanceof Map) {
+        mergedMap.put(k, mergeMaps((Map) originalValue, (Map) v));
+      }
+      else if (v instanceof List && originalValue instanceof List) {
+        var mergedList = new HashSet((List) originalValue);
+        mergedList.addAll((List) v);
+        mergedMap.put(k, new ArrayList(mergedList));
+      }
+      else {
+        /* This overwrites simple values but also mismatched object types. Accepting that "risk" here since
+        useful errors are generated downstream for objects being cast to avro pojos but also because unknown JSON is
+        allowed to pass through later parsing/analysis steps untouched (either type change could be erroneous but
+        there's no way to know which) */
+        mergedMap.put(k, v);
+      }
+    });
+
+    return mergedMap;
+  }
+
+  /**
+   * Returns a new Map of the original with elements in toRemove discarded. Elements in toRemove must match those in
+   * original exactly, or they will not be removed. Handles nested Maps and Lists.
+   * @param original Base Map from which elements in toRemove will be removed
+   * @param toRemove Map of elements to remove from the original Map
+   * @return An updated original Map where matching elements from toRemove have been discarded. Returns empty Map if
+   * original is empty or null.
+   */
+  public static Map<String, Object> removeFromMap(Map<String, Object> original, Map<String, Object> toRemove) {
+    Map mergedMap = original == null ? new LinkedHashMap<>() : new LinkedHashMap<>(original);
+    if (original == null && toRemove == null) {
+      return Collections.emptyMap();
+    }
+    if (original == null || original.size() == 0) {
+      return Collections.emptyMap();
+    }
+    if (toRemove == null || toRemove.size() == 0) {
+      return original;
+    }
+
+    toRemove.forEach((k, v) -> {
+      var originalValue = mergedMap.get(k);
+      if (v instanceof Map && originalValue instanceof Map) {
+        mergedMap.put(k, removeFromMap((Map) originalValue, (Map) v));
+      }
+      else if (v instanceof List && originalValue instanceof List) {
+        var mergedList = new HashSet<>((List) originalValue);
+        mergedList.removeAll((List) v);
+        mergedMap.put(k, mergedList);
+      }
+      else if ((v == null && originalValue == null) || v.equals(originalValue)) {
+        mergedMap.remove(k);
+      }
+    });
+
+    return mergedMap;
+  }
+
+  /**
+   * Turns a nested map into a flat map with nested keys appended together with the delimiter
+   * @param parentKey Prefix that all flattened keys start with. Null, empty, or whitespace-only value results in no prefix
+   * @param delimiter String to delimit between each nested key. Defaults to "." if null or empty
+   * @param originalMap Nested-key map to be flattened
+   * @return Single-level map with flattened keys
+   */
+  public static Map<String, Object> consolidateNestedKeysInMap(String parentKey, String delimiter, Map<String, Object> originalMap) {
+    var parent = (parentKey == null || parentKey.isBlank()) ? new String() : parentKey;
+    var delimiterString = (delimiter == null || delimiter.isEmpty()) ? "." : delimiter;
+    var newMap = new HashMap<String, Object>();
+
+    if(originalMap != null && !originalMap.isEmpty()) {
+      originalMap.forEach((k, v) -> {
+        String newKey = parent.isEmpty() ? k : parent + delimiterString + k;
+        if(v instanceof Map) {
+          newMap.putAll(consolidateNestedKeysInMap(newKey, delimiterString, (Map<String, Object>) v));
+        }
+        else {
+          newMap.put(newKey, v);
+        }
+      });
+    }
+    return newMap;
+  }
+
+  /**
+   * Removes the given trimString from any keys in originalMap that match. For example a trim string 'abc.' would turn
+   * key 'abc.123' into key '123'.
+   * @param trimString Case insensitive prefix to remove from keys in originalMap
+   * @param originalMap
+   * @return New map with modified keys
+   */
+  public static Map<String, Object> trimMapKeys(String trimString, Map<String, Object> originalMap) {
+    Map<String, Object> trimmedKeysMap = new LinkedHashMap<>();
+    originalMap.forEach((k, v) -> {
+      String trimmedKey = k.toLowerCase().startsWith(trimString.toLowerCase()) ? k.substring(trimString.length()) : k;
+      trimmedKeysMap.put(trimmedKey, v);
+    });
+    return trimmedKeysMap;
+  }
+
+  /**
+   * Returns an map with all keys not contained in the given collection removed
+   * @param keysToKeep A collection of the keys to preserve in the filtered output; all others will be removed
+   * @return The filtered map
+   */
+  public static Map<String, Object> filterMapKeys(Collection<String> keysToKeep, Map<String, Object> originalMap) {
+    if (keysToKeep == null || keysToKeep.size() == 0) {
+      return new HashMap<>();
+    }
+    return originalMap.entrySet().stream()
+        .filter(e -> keysToKeep.contains(e.getKey()))
+        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+  }
+}
diff --git a/elastic-common/build.gradle b/elastic-common/build.gradle
index 80c8ca9ee..ef207391c 100644
--- a/elastic-common/build.gradle
+++ b/elastic-common/build.gradle
@@ -6,6 +6,8 @@ dependencies {
 
     compileOnly("org.slf4j:slf4j-api:1.7.25")
 
+    implementation(project(':data-utils'))
+
     implementation("org.elasticsearch.client:elasticsearch-rest-client:${Versions.ELASTIC}")
     implementation("org.elasticsearch.client:elasticsearch-rest-high-level-client:${Versions.ELASTIC}")
     implementation("com.fasterxml.jackson.core:jackson-databind:2.10.0")
diff --git a/elastic-common/src/main/java/org/cedar/onestop/elastic/common/ElasticsearchConfig.java b/elastic-common/src/main/java/org/cedar/onestop/elastic/common/ElasticsearchConfig.java
index 2910d2502..ac6951cb0 100644
--- a/elastic-common/src/main/java/org/cedar/onestop/elastic/common/ElasticsearchConfig.java
+++ b/elastic-common/src/main/java/org/cedar/onestop/elastic/common/ElasticsearchConfig.java
@@ -127,6 +127,7 @@ public String jsonMapping(String alias) {
   public Map<String, ?> indexedProperties(String alias) {
     var parsed = (Map<String, Map>) parsedMapping(alias);
     var mappings = (Map<String, Map>) parsed.getOrDefault("mappings", Collections.emptyMap());
+
     return (Map<String, Map>) mappings.getOrDefault("properties", Collections.emptyMap());
   }
 
diff --git a/kafka-common/src/main/java/org/cedar/onestop/kafka/common/util/DataUtils.java b/kafka-common/src/main/java/org/cedar/onestop/kafka/common/util/DataUtils.java
index c77813cdc..8150b5739 100644
--- a/kafka-common/src/main/java/org/cedar/onestop/kafka/common/util/DataUtils.java
+++ b/kafka-common/src/main/java/org/cedar/onestop/kafka/common/util/DataUtils.java
@@ -200,6 +200,8 @@ public static Map<String, Object> filterMapKeys(Collection<String> keysToKeep, M
         .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
   }
 
+  ////////// DELETE ^^^^^^ //////////////////
+
   /**
    * @param builderType   type of schema builder either ParsedRecord or AggregatedInput, otherwise error out
    * @param fieldData     parsed or input metadata values
diff --git a/settings.gradle.kts b/settings.gradle.kts
index 02047c692..1e852b37f 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -2,6 +2,7 @@ rootProject.name = "onestop"
 
 include(
     "client",
+    "data-utils",
     "e2e-tests",
     "elastic-common",
     "geoportal-search",

From cb4eb38754d350a28e35f7e757ec30c8267fb93b Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Wed, 27 May 2020 11:39:54 -0600
Subject: [PATCH 09/29] Got functions cleaning up the object based on nested ES
 mapping and fields we deliberately aren't indexing mostly sorted out. Lots of
 code cleanup and tests left.

---
 .../mappings/analysis_error_granuleIndex.json |  3 +
 .../indexer/util/TransformationUtils.java     | 53 ++++++++++++--
 .../util/TransformationUtilsSpec.groovy       | 72 +++++++++++++++++--
 3 files changed, 117 insertions(+), 11 deletions(-)

diff --git a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
index 9a86b537c..591132b3a 100644
--- a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
+++ b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
@@ -46,6 +46,9 @@
               }
             }
           },
+          "fileIdentifierExists": {
+            "type": "boolean"
+          },
           "fileIdentifierString": {
             "type": "text",
             "fields": {
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index dfcb0b03d..3262a8d26 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -18,6 +18,7 @@
 import static org.cedar.schemas.avro.psi.ValidDescriptor.UNDEFINED;
 import static org.cedar.schemas.avro.psi.ValidDescriptor.VALID;
 
+import org.cedar.onestop.kafka.common.util.DataUtils;
 
 // TODO import org.apache.kafka.streams.StreamsBuilder;
 
@@ -73,7 +74,29 @@ public static Map<String, Object> unfilteredAEMessage(ParsedRecord record) {
     return analysisMap;
   }
 
-  public static Map<String, Object> stuffToRemove(Map<String, Object> analysisMap, Map<String, Object> mapping) {
+  public static Map<String, Object> pruneKnownUnmappedFields(Map<String, Object> analysisMap, Map<String, Object> unmappedFields) {
+
+    var result = new LinkedHashMap<String, Object>();
+    analysisMap.forEach((k, v) -> {
+      if (!unmappedFields.containsKey(k)) {
+        result.put(k, v);
+      } else {
+        Map<String, Object> nestedProperties = (Map<String, Object>)((Map<String, Object>)unmappedFields.get(k)); // TODO almost identical to stuff to remove... but reversed... and no ".properties" layer...
+
+        if (v instanceof Map) {
+          result.put(k, pruneKnownUnmappedFields((Map<String, Object>) v, nestedProperties));
+        } else if (v instanceof List) {
+          var list = ((List) v).stream().map(item -> pruneKnownUnmappedFields((Map<String, Object>) item, nestedProperties)).filter(item -> !((Map<String, Object>)item).isEmpty())
+                .collect(Collectors.toList());
+          System.out.println("ZEB - list: "+list);
+          result.put(k, list);
+        }
+      }
+    });
+    return result;
+  }
+
+  public static Map<String, Object> identifyUnmappedFields(Map<String, Object> analysisMap, Map<String, Object> mapping) {
     var result = new LinkedHashMap<String, Object>();
     // analysisMap.entrySet().stream().forEach(e -> {
     //   if( !mapping.containsKey(e.getKey())) {
@@ -82,15 +105,31 @@ public static Map<String, Object> stuffToRemove(Map<String, Object> analysisMap,
     //     if (e.getValue() instanceof Map<?,?>){
     //       System.out.println("ZEB: the value is a map!");
     //       // System.out.println("mapping: "+mapping.get(e.getKey()).get("properties"));
-    //       // System.out.println("--> "+stuffToRemove((Map<String, Object>)e.getValue(), (Map<String, Object>)mapping.get(e.getKey()).get("properties")));
-    //       result.put(e.getKey(), stuffToRemove((Map<String, Object>)e.getValue(), (Map<String, Object>)((Map<String, Object>)mapping.get(e.getKey())).get("properties"))); // TODO brute force assumes mapping is an object map string:object here too
+    //       // System.out.println("--> "+identifyUnmappedFields((Map<String, Object>)e.getValue(), (Map<String, Object>)mapping.get(e.getKey()).get("properties")));
+    //       result.put(e.getKey(), identifyUnmappedFields((Map<String, Object>)e.getValue(), (Map<String, Object>)((Map<String, Object>)mapping.get(e.getKey())).get("properties"))); // TODO brute force assumes mapping is an object map string:object here too
     //     } else if(e.getValue() instanceof Collection<?>){
     //       // TODO!!!!
-    //       // result.put(e.getKey(), ((Collection<?>)e.getValue()).filter(item -> !stuffToRemove((Map<String, Object>)item, (Map<String, Object>)((Map<String, Object>)mapping.get(e.getKey())).get("properties"))).isEmpty());
+    //       // result.put(e.getKey(), ((Collection<?>)e.getValue()).filter(item -> !identifyUnmappedFields((Map<String, Object>)item, (Map<String, Object>)((Map<String, Object>)mapping.get(e.getKey())).get("properties"))).isEmpty());
     //     }
     //   }
     // });
     // return result;
+    //
+    // const knownUnmappedTemporalFields = new HashMap<String, Object>();
+    // knownUnmappedTemporalFields.put("beginYear", "mapped to search index instead");
+    // knownUnmappedTemporalFields.put("beginDayOfYear", "mapped to search index instead");
+    // knownUnmappedTemporalFields.put("beginDayOfMonth", "mapped to search index instead");
+    // knownUnmappedTemporalFields.put("beginMonth", "mapped to search index instead");
+    // knownUnmappedTemporalFields.put("endYear", "mapped to search index instead");
+    // knownUnmappedTemporalFields.put("endDayOfYear", "mapped to search index instead");
+    // knownUnmappedTemporalFields.put("endDayOfMonth", "mapped to search index instead");
+    // knownUnmappedTemporalFields.put("endMonth", "mapped to search index instead");
+    // knownUnmappedTemporalFields.put("instantYear", "mapped to search index instead");
+    // knownUnmappedTemporalFields.put("instantDayOfYear", "mapped to search index instead");
+    // knownUnmappedTemporalFields.put("instantDayOfMonth", "mapped to search index instead");
+    // knownUnmappedTemporalFields.put("instantMonth", "mapped to search index instead");
+    // const knownUnmappedFields = new HashMap<String, Object>();
+    // knownUnmappedFields.put("temporalBounding", knownUnmappedTemporalFields);
 
     analysisMap.forEach((k, v) -> {
       if (!mapping.containsKey(k)) {
@@ -98,10 +137,12 @@ public static Map<String, Object> stuffToRemove(Map<String, Object> analysisMap,
       } else {
         Map<String, Object> nestedProperties = (Map<String, Object>)((Map<String, Object>)mapping.get(k)).get("properties"); // TODO assumes mapping is also a Map!
 
+        // Map<String, Object> knownUnmapped = (Map<String, Object>)knownUnmappedFields.get(k);
+
         if (v instanceof Map) {
-          result.put(k, stuffToRemove((Map<String, Object>) v, nestedProperties));
+          result.put(k, identifyUnmappedFields((Map<String, Object>) v, nestedProperties));
         } else if (v instanceof List) {
-          var list = ((List) v).stream().map(item -> stuffToRemove((Map<String, Object>) item, nestedProperties)).filter(item -> !((Map<String, Object>)item).isEmpty())
+          var list = ((List) v).stream().map(item -> identifyUnmappedFields((Map<String, Object>) item, nestedProperties)).filter(item -> !((Map<String, Object>)item).isEmpty())
                 .collect(Collectors.toList());
           System.out.println("ZEB - list: "+list);
           result.put(k, list);
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index f9e6e5c6c..59a3f0608 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -20,6 +20,8 @@ import groovy.json.JsonOutput
 
 import static org.cedar.schemas.avro.util.TemporalTestData.getSituations
 
+import org.cedar.onestop.kafka.common.util.DataUtils;
+
 @Unroll
 class TransformationUtilsSpec extends Specification {
 
@@ -96,7 +98,7 @@ class TransformationUtilsSpec extends Specification {
     def result = TransformationUtils.reformatMessageForAnalysisAndErrors(TestUtils.inputGranuleRecord, granuleAnalysisErrorFields)
 
 
-    def asdf = TransformationUtils.stuffToRemove(TransformationUtils.unfilteredAEMessage(TestUtils.inputGranuleRecord), TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
+    def asdf = TransformationUtils.identifyUnmappedFields(TransformationUtils.unfilteredAEMessage(TestUtils.inputGranuleRecord), TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
 
     println("ZEB")
     println(result)
@@ -108,6 +110,33 @@ class TransformationUtilsSpec extends Specification {
 
   def "can i construct a record"() {
     when:
+    println("YO ZEB")
+    // println(
+    //     DataUtils.wipMapKeys('type', DataUtils.wipMapKeys('properties', DataUtils.consolidateNestedKeysInMap(null, ".", TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))))
+    // )
+
+
+    def esmapping = DataUtils.wipMapKeys('type', DataUtils.wipMapKeys('properties', DataUtils.consolidateNestedKeysInMap(null, ".", TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))))
+    println("esmapping: "+JsonOutput.toJson(esmapping))
+
+
+
+    def knownUnmappedTemporalFields = new HashMap<String, Object>();
+    knownUnmappedTemporalFields.put("beginYear", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("beginDayOfYear", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("beginDayOfMonth", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("beginMonth", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("endYear", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("endDayOfYear", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("endDayOfMonth", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("endMonth", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("instantYear", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("instantDayOfYear", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("instantDayOfMonth", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("instantMonth", new HashMap<String, Object>());
+    def knownUnmappedFields = new HashMap<String, Object>();
+    knownUnmappedFields.put("temporalBounding", knownUnmappedTemporalFields);
+
     ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
         .setAnalysis(
           Analysis.newBuilder().setTemporalBounding(
@@ -123,13 +152,46 @@ class TransformationUtilsSpec extends Specification {
               .setBeginDayOfMonth(1)
               .build()
               ).build()).build()
-        def asdf = TransformationUtils.stuffToRemove(TransformationUtils.unfilteredAEMessage(record), TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
 
-            println("ZEB")
-            println(JsonOutput.toJson(asdf))
+              def parsed = TransformationUtils.unfilteredAEMessage(record)
+        def asdf = TransformationUtils.identifyUnmappedFields(parsed, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
+
+            println("ZEB from "+JsonOutput.toJson(parsed))
+            println("ZEB minus "+JsonOutput.toJson(asdf))
+            println("AND CLEANED? "+JsonOutput.toJson(DataUtils.removeFromMap((parsed), asdf)))
+            def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, knownUnmappedFields)
+            println("pruned unampped? "+JsonOutput.toJson(pruned))
+            def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
+            println("creates minus: "+JsonOutput.toJson(minus))
+            println("which results in indexing: "+ JsonOutput.toJson(DataUtils.removeFromMap(pruned, minus)))
+            asdf.get("temporalBounding").remove("instantYear")
+
+            asdf.get("temporalBounding").remove("beginYear")
+            asdf.get("temporalBounding").remove("beginDayOfYear")
+            asdf.get("temporalBounding").remove("beginDayOfMonth")
+            asdf.get("temporalBounding").remove("beginMonth")
+            asdf.get("temporalBounding").remove("endYear")
+            asdf.get("temporalBounding").remove("endDayOfYear")
+            asdf.get("temporalBounding").remove("endDayOfMonth")
+            asdf.get("temporalBounding").remove("endMonth")
+            asdf.get("temporalBounding").remove("instantYear")
+            asdf.get("temporalBounding").remove("instantDayOfYear")
+            asdf.get("temporalBounding").remove("instantDayOfMonth")
+            asdf.get("temporalBounding").remove("instantMonth")
+            println("cleaned up for dumb logging: "+JsonOutput.toJson(asdf))
+
+            def objkeys = DataUtils.consolidateNestedKeysInMap(null, ".", parsed)
+            println("objkeys: "+JsonOutput.toJson(objkeys))
+            def junkToremove = DataUtils.filterMapKeys(esmapping.keySet(), objkeys)
+            println("remove me:" + JsonOutput.toJson(junkToremove))
+            // def trimmed = DataUtils.filterMapKeys(junkToremove.keySet(), objkeys)
+            def trimmed = DataUtils.removeFromMap(objkeys, junkToremove)
+            println("fixed: "+JsonOutput.toJson(trimmed))
+
 
             then:
-            asdf.keySet().each({ assert granuleAnalysisErrorFields.contains(it) })
+            // asdf.keySet().each({ assert granuleAnalysisErrorFields.contains(it) })
+            trimmed == [ "foo" : "bar"]
   }
 
   ////////////////////////////////

From be1dfe8965821902dea487ca3280a07c6c543804 Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Wed, 27 May 2020 12:00:34 -0600
Subject: [PATCH 10/29] Begin cleaning up test...

---
 .../indexer/util/TransformationUtils.java     | 15 ----
 .../util/TransformationUtilsSpec.groovy       | 90 +++++++++----------
 2 files changed, 40 insertions(+), 65 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index 3262a8d26..9cfb43edc 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -115,21 +115,6 @@ public static Map<String, Object> identifyUnmappedFields(Map<String, Object> ana
     // });
     // return result;
     //
-    // const knownUnmappedTemporalFields = new HashMap<String, Object>();
-    // knownUnmappedTemporalFields.put("beginYear", "mapped to search index instead");
-    // knownUnmappedTemporalFields.put("beginDayOfYear", "mapped to search index instead");
-    // knownUnmappedTemporalFields.put("beginDayOfMonth", "mapped to search index instead");
-    // knownUnmappedTemporalFields.put("beginMonth", "mapped to search index instead");
-    // knownUnmappedTemporalFields.put("endYear", "mapped to search index instead");
-    // knownUnmappedTemporalFields.put("endDayOfYear", "mapped to search index instead");
-    // knownUnmappedTemporalFields.put("endDayOfMonth", "mapped to search index instead");
-    // knownUnmappedTemporalFields.put("endMonth", "mapped to search index instead");
-    // knownUnmappedTemporalFields.put("instantYear", "mapped to search index instead");
-    // knownUnmappedTemporalFields.put("instantDayOfYear", "mapped to search index instead");
-    // knownUnmappedTemporalFields.put("instantDayOfMonth", "mapped to search index instead");
-    // knownUnmappedTemporalFields.put("instantMonth", "mapped to search index instead");
-    // const knownUnmappedFields = new HashMap<String, Object>();
-    // knownUnmappedFields.put("temporalBounding", knownUnmappedTemporalFields);
 
     analysisMap.forEach((k, v) -> {
       if (!mapping.containsKey(k)) {
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index 59a3f0608..9b07c0d36 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -110,14 +110,19 @@ class TransformationUtilsSpec extends Specification {
 
   def "can i construct a record"() {
     when:
-    println("YO ZEB")
+    def parsed = [identification:null, titles:null, description:null, dataAccess:null, thumbnail:null, temporalBounding:[
+    beginDescriptor:ValidDescriptor.VALID, beginPrecision:ChronoUnit.DAYS.toString(), beginIndexable:true, beginZoneSpecified:null, beginUtcDateTimeString:2000-02-01, beginYear:2000, beginDayOfYear:32, beginDayOfMonth:1, beginMonth:2,
+    endDescriptor:null, endPrecision:null, endIndexable:null, endZoneSpecified:null, endUtcDateTimeString:null, endYear:null, endDayOfYear:null, endDayOfMonth:null, endMonth:null,
+    instantDescriptor:null, instantPrecision:null, instantIndexable:null, instantZoneSpecified:null, instantUtcDateTimeString:null, instantYear:null, instantDayOfYear:null, instantDayOfMonth:null, instantMonth:null,
+    rangeDescriptor:null],
+    spatialBounding:null, internalParentIdentifier:null, errors:[[nonsense:"horrible", source:"valid field"]], garbage:"nuke meeee"]
     // println(
     //     DataUtils.wipMapKeys('type', DataUtils.wipMapKeys('properties', DataUtils.consolidateNestedKeysInMap(null, ".", TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))))
     // )
 
 
-    def esmapping = DataUtils.wipMapKeys('type', DataUtils.wipMapKeys('properties', DataUtils.consolidateNestedKeysInMap(null, ".", TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))))
-    println("esmapping: "+JsonOutput.toJson(esmapping))
+    // def esmapping = DataUtils.wipMapKeys('type', DataUtils.wipMapKeys('properties', DataUtils.consolidateNestedKeysInMap(null, ".", TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))))
+    // println("esmapping: "+JsonOutput.toJson(esmapping))
 
 
 
@@ -137,61 +142,46 @@ class TransformationUtilsSpec extends Specification {
     def knownUnmappedFields = new HashMap<String, Object>();
     knownUnmappedFields.put("temporalBounding", knownUnmappedTemporalFields);
 
-    ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
-        .setAnalysis(
-          Analysis.newBuilder().setTemporalBounding(
-          TemporalBoundingAnalysis.newBuilder()
-              .setBeginDescriptor(ValidDescriptor.VALID)
-              .setBeginIndexable(true)
-              .setBeginPrecision(ChronoUnit.DAYS.toString())
-              .setBeginZoneSpecified(null)
-              .setBeginUtcDateTimeString("2000-02-01")
-              .setBeginYear(2000)
-              .setBeginMonth(2)
-              .setBeginDayOfYear(32)
-              .setBeginDayOfMonth(1)
-              .build()
-              ).build()).build()
-
-              def parsed = TransformationUtils.unfilteredAEMessage(record)
-        def asdf = TransformationUtils.identifyUnmappedFields(parsed, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
-
-            println("ZEB from "+JsonOutput.toJson(parsed))
-            println("ZEB minus "+JsonOutput.toJson(asdf))
-            println("AND CLEANED? "+JsonOutput.toJson(DataUtils.removeFromMap((parsed), asdf)))
+    // ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
+    //     .setAnalysis(
+    //       Analysis.newBuilder().setTemporalBounding(
+    //       TemporalBoundingAnalysis.newBuilder()
+    //           .setBeginDescriptor(ValidDescriptor.VALID)
+    //           .setBeginIndexable(true)
+    //           .setBeginPrecision(ChronoUnit.DAYS.toString())
+    //           .setBeginZoneSpecified(null)
+    //           .setBeginUtcDateTimeString("2000-02-01")
+    //           .setBeginYear(2000)
+    //           .setBeginMonth(2)
+    //           .setBeginDayOfYear(32)
+    //           .setBeginDayOfMonth(1)
+    //           .build()
+    //           ).build()).build()
+
+              // def parsed = TransformationUtils.unfilteredAEMessage(record)
+        // def asdf = TransformationUtils.identifyUnmappedFields(parsed, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
+
+        println("parsed "+JsonOutput.toJson(parsed))
+        // println("???"+parsed)
+        // println("in groov"+ [identification:null, titles:null, description:null, dataAccess:null, thumbnail:null, temporalBounding:[
+        // beginDescriptor:ValidDescriptor.VALID, beginPrecision:ChronoUnit.DAYS.toString(), beginIndexable:true, beginZoneSpecified:null, beginUtcDateTimeString:2000-02-01, beginYear:2000, beginDayOfYear:32, beginDayOfMonth:1, beginMonth:2,
+        // endDescriptor:null, endPrecision:null, endIndexable:null, endZoneSpecified:null, endUtcDateTimeString:null, endYear:null, endDayOfYear:null, endDayOfMonth:null, endMonth:null,
+        // instantDescriptor:null, instantPrecision:null, instantIndexable:null, instantZoneSpecified:null, instantUtcDateTimeString:null, instantYear:null, instantDayOfYear:null, instantDayOfMonth:null, instantMonth:null,
+        // rangeDescriptor:null],
+        // spatialBounding:null, internalParentIdentifier:null, errors:[[nonsense:"horrible", source:"valid field"]], garbage:"nuke meeee"]
+// )
             def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, knownUnmappedFields)
             println("pruned unampped? "+JsonOutput.toJson(pruned))
             def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
             println("creates minus: "+JsonOutput.toJson(minus))
             println("which results in indexing: "+ JsonOutput.toJson(DataUtils.removeFromMap(pruned, minus)))
-            asdf.get("temporalBounding").remove("instantYear")
-
-            asdf.get("temporalBounding").remove("beginYear")
-            asdf.get("temporalBounding").remove("beginDayOfYear")
-            asdf.get("temporalBounding").remove("beginDayOfMonth")
-            asdf.get("temporalBounding").remove("beginMonth")
-            asdf.get("temporalBounding").remove("endYear")
-            asdf.get("temporalBounding").remove("endDayOfYear")
-            asdf.get("temporalBounding").remove("endDayOfMonth")
-            asdf.get("temporalBounding").remove("endMonth")
-            asdf.get("temporalBounding").remove("instantYear")
-            asdf.get("temporalBounding").remove("instantDayOfYear")
-            asdf.get("temporalBounding").remove("instantDayOfMonth")
-            asdf.get("temporalBounding").remove("instantMonth")
-            println("cleaned up for dumb logging: "+JsonOutput.toJson(asdf))
-
-            def objkeys = DataUtils.consolidateNestedKeysInMap(null, ".", parsed)
-            println("objkeys: "+JsonOutput.toJson(objkeys))
-            def junkToremove = DataUtils.filterMapKeys(esmapping.keySet(), objkeys)
-            println("remove me:" + JsonOutput.toJson(junkToremove))
-            // def trimmed = DataUtils.filterMapKeys(junkToremove.keySet(), objkeys)
-            def trimmed = DataUtils.removeFromMap(objkeys, junkToremove)
-            println("fixed: "+JsonOutput.toJson(trimmed))
-
 
             then:
             // asdf.keySet().each({ assert granuleAnalysisErrorFields.contains(it) })
-            trimmed == [ "foo" : "bar"]
+            // trimmed == [ "foo" : "bar"]
+            minus == [temporalBounding:[
+            instantIndexable:null],
+            errors:[[nonsense:"horrible"]], garbage:"nuke meeee"]
   }
 
   ////////////////////////////////

From acdb286ff244468a0d021bb23fa1ce86d55b0cda Mon Sep 17 00:00:00 2001
From: arianna <arianna.jakositz@colorado.edu>
Date: Wed, 27 May 2020 15:33:22 -0600
Subject: [PATCH 11/29] Revert "wip making new data-utils module"

This reverts commit 21d3448a40ea3aef54a3f049faf49f8c350f3ce0.
---
 data-utils/build.gradle                       |  14 --
 .../org/cedar/onestop/utils/ListUtils.java    |  48 ------
 .../org/cedar/onestop/utils/MapUtils.java     | 155 ------------------
 elastic-common/build.gradle                   |   2 -
 .../elastic/common/ElasticsearchConfig.java   |   1 -
 .../onestop/kafka/common/util/DataUtils.java  |   2 -
 settings.gradle.kts                           |   1 -
 7 files changed, 223 deletions(-)
 delete mode 100644 data-utils/build.gradle
 delete mode 100644 data-utils/src/main/java/org/cedar/onestop/utils/ListUtils.java
 delete mode 100644 data-utils/src/main/java/org/cedar/onestop/utils/MapUtils.java

diff --git a/data-utils/build.gradle b/data-utils/build.gradle
deleted file mode 100644
index 96c856cb7..000000000
--- a/data-utils/build.gradle
+++ /dev/null
@@ -1,14 +0,0 @@
-sourceCompatibility = 11
-targetCompatibility = 11
-
-dependencies {
-  def Versions = project.Versions
-
-  compileOnly("org.slf4j:slf4j-api:1.7.25")
-
-  implementation("org.yaml:snakeyaml:${Versions.SNAKE_YAML}")
-}
-
-jar {
-  archiveBaseName.set("${rootProject.name}-${project.name}")
-}
\ No newline at end of file
diff --git a/data-utils/src/main/java/org/cedar/onestop/utils/ListUtils.java b/data-utils/src/main/java/org/cedar/onestop/utils/ListUtils.java
deleted file mode 100644
index 6b26ad1d1..000000000
--- a/data-utils/src/main/java/org/cedar/onestop/utils/ListUtils.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package org.cedar.onestop.utils;
-
-import java.util.ArrayList;
-import java.util.List;
-
-public class ListUtils {
-
-  public static <T> List<T> addOrInit(List<T> list, T item) {
-    var result = new ArrayList<T>();
-    if (list != null && !list.isEmpty()) {
-      result.addAll(list);
-    }
-    if (item != null) {
-      result.add(item);
-    }
-    return result;
-  }
-
-  /**
-   *
-   * @param list list to truncate
-   * @param maxListSize list size limit
-   * @param mostRecentAdditions if true, returned list reflects end of original list as opposed to start
-   * @param <T> list object type
-   * @return truncated list of T objects
-   * @throws IllegalArgumentException if maxListSize is less than or equal to 0
-   */
-  public static <T> List<T> truncateList(List<T> list, int maxListSize, boolean mostRecentAdditions) {
-    if (maxListSize <= 0) {
-      throw new IllegalArgumentException("Attempted to make a list of size [ " + maxListSize + " ]. " +
-          "Expected a size limit greater than 0.");
-    }
-
-    var result = new ArrayList<T>();
-    if (list != null && !list.isEmpty()) {
-      var size = list.size();
-      if(size <= maxListSize) {
-        result.addAll(list);
-      }
-      else {
-        var fromIndex = mostRecentAdditions ? size - maxListSize : 0;
-        var toIndex = mostRecentAdditions ? size : maxListSize;
-        result.addAll(list.subList(fromIndex, toIndex));
-      }
-    }
-    return result;
-  }
-}
diff --git a/data-utils/src/main/java/org/cedar/onestop/utils/MapUtils.java b/data-utils/src/main/java/org/cedar/onestop/utils/MapUtils.java
deleted file mode 100644
index fe4525182..000000000
--- a/data-utils/src/main/java/org/cedar/onestop/utils/MapUtils.java
+++ /dev/null
@@ -1,155 +0,0 @@
-package org.cedar.onestop.utils;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import java.io.IOException;
-import java.util.*;
-import java.util.stream.Collectors;
-
-public class MapUtils {
-
-//  public static Map<String, Object> parseJsonMap(String json) throws IOException {
-//    if (json == null || json == "") {
-//      return new LinkedHashMap();
-//    }
-//    else {
-//      return new ObjectMapper().readValue(json, Map.class);
-//    }
-//  }
-
-  /**
-   * Returns a merged Map of the original and toAdd Maps. Deep merges of nested Maps and Lists are performed and
-   * explicit duplicates (exact matches for all fields) are avoided.
-   * @param original Base Map to which elements will be merged from toAdd
-   * @param toAdd Map of elements to add to the original Map
-   * @return An updated original Map where new elements from toAdd have been merged. Returns empty Map if
-   * original and toAdd are empty or null.
-   */
-  public static Map<String, Object> mergeMaps(Map<String, Object> original, Map<String, Object> toAdd) {
-    Map<String, Object> mergedMap = original == null ? new LinkedHashMap<>() : new LinkedHashMap<>(original);
-    if (original == null && toAdd == null) {
-      return Collections.emptyMap();
-    }
-    if (original == null || original.size() == 0) {
-      return toAdd;
-    }
-    if (toAdd == null || toAdd.size() == 0) {
-      return original;
-    }
-
-    toAdd.forEach((k, v) -> {
-      var originalValue = mergedMap.get(k);
-      if (v instanceof Map && originalValue instanceof Map) {
-        mergedMap.put(k, mergeMaps((Map) originalValue, (Map) v));
-      }
-      else if (v instanceof List && originalValue instanceof List) {
-        var mergedList = new HashSet((List) originalValue);
-        mergedList.addAll((List) v);
-        mergedMap.put(k, new ArrayList(mergedList));
-      }
-      else {
-        /* This overwrites simple values but also mismatched object types. Accepting that "risk" here since
-        useful errors are generated downstream for objects being cast to avro pojos but also because unknown JSON is
-        allowed to pass through later parsing/analysis steps untouched (either type change could be erroneous but
-        there's no way to know which) */
-        mergedMap.put(k, v);
-      }
-    });
-
-    return mergedMap;
-  }
-
-  /**
-   * Returns a new Map of the original with elements in toRemove discarded. Elements in toRemove must match those in
-   * original exactly, or they will not be removed. Handles nested Maps and Lists.
-   * @param original Base Map from which elements in toRemove will be removed
-   * @param toRemove Map of elements to remove from the original Map
-   * @return An updated original Map where matching elements from toRemove have been discarded. Returns empty Map if
-   * original is empty or null.
-   */
-  public static Map<String, Object> removeFromMap(Map<String, Object> original, Map<String, Object> toRemove) {
-    Map mergedMap = original == null ? new LinkedHashMap<>() : new LinkedHashMap<>(original);
-    if (original == null && toRemove == null) {
-      return Collections.emptyMap();
-    }
-    if (original == null || original.size() == 0) {
-      return Collections.emptyMap();
-    }
-    if (toRemove == null || toRemove.size() == 0) {
-      return original;
-    }
-
-    toRemove.forEach((k, v) -> {
-      var originalValue = mergedMap.get(k);
-      if (v instanceof Map && originalValue instanceof Map) {
-        mergedMap.put(k, removeFromMap((Map) originalValue, (Map) v));
-      }
-      else if (v instanceof List && originalValue instanceof List) {
-        var mergedList = new HashSet<>((List) originalValue);
-        mergedList.removeAll((List) v);
-        mergedMap.put(k, mergedList);
-      }
-      else if ((v == null && originalValue == null) || v.equals(originalValue)) {
-        mergedMap.remove(k);
-      }
-    });
-
-    return mergedMap;
-  }
-
-  /**
-   * Turns a nested map into a flat map with nested keys appended together with the delimiter
-   * @param parentKey Prefix that all flattened keys start with. Null, empty, or whitespace-only value results in no prefix
-   * @param delimiter String to delimit between each nested key. Defaults to "." if null or empty
-   * @param originalMap Nested-key map to be flattened
-   * @return Single-level map with flattened keys
-   */
-  public static Map<String, Object> consolidateNestedKeysInMap(String parentKey, String delimiter, Map<String, Object> originalMap) {
-    var parent = (parentKey == null || parentKey.isBlank()) ? new String() : parentKey;
-    var delimiterString = (delimiter == null || delimiter.isEmpty()) ? "." : delimiter;
-    var newMap = new HashMap<String, Object>();
-
-    if(originalMap != null && !originalMap.isEmpty()) {
-      originalMap.forEach((k, v) -> {
-        String newKey = parent.isEmpty() ? k : parent + delimiterString + k;
-        if(v instanceof Map) {
-          newMap.putAll(consolidateNestedKeysInMap(newKey, delimiterString, (Map<String, Object>) v));
-        }
-        else {
-          newMap.put(newKey, v);
-        }
-      });
-    }
-    return newMap;
-  }
-
-  /**
-   * Removes the given trimString from any keys in originalMap that match. For example a trim string 'abc.' would turn
-   * key 'abc.123' into key '123'.
-   * @param trimString Case insensitive prefix to remove from keys in originalMap
-   * @param originalMap
-   * @return New map with modified keys
-   */
-  public static Map<String, Object> trimMapKeys(String trimString, Map<String, Object> originalMap) {
-    Map<String, Object> trimmedKeysMap = new LinkedHashMap<>();
-    originalMap.forEach((k, v) -> {
-      String trimmedKey = k.toLowerCase().startsWith(trimString.toLowerCase()) ? k.substring(trimString.length()) : k;
-      trimmedKeysMap.put(trimmedKey, v);
-    });
-    return trimmedKeysMap;
-  }
-
-  /**
-   * Returns an map with all keys not contained in the given collection removed
-   * @param keysToKeep A collection of the keys to preserve in the filtered output; all others will be removed
-   * @return The filtered map
-   */
-  public static Map<String, Object> filterMapKeys(Collection<String> keysToKeep, Map<String, Object> originalMap) {
-    if (keysToKeep == null || keysToKeep.size() == 0) {
-      return new HashMap<>();
-    }
-    return originalMap.entrySet().stream()
-        .filter(e -> keysToKeep.contains(e.getKey()))
-        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
-  }
-}
diff --git a/elastic-common/build.gradle b/elastic-common/build.gradle
index ef207391c..80c8ca9ee 100644
--- a/elastic-common/build.gradle
+++ b/elastic-common/build.gradle
@@ -6,8 +6,6 @@ dependencies {
 
     compileOnly("org.slf4j:slf4j-api:1.7.25")
 
-    implementation(project(':data-utils'))
-
     implementation("org.elasticsearch.client:elasticsearch-rest-client:${Versions.ELASTIC}")
     implementation("org.elasticsearch.client:elasticsearch-rest-high-level-client:${Versions.ELASTIC}")
     implementation("com.fasterxml.jackson.core:jackson-databind:2.10.0")
diff --git a/elastic-common/src/main/java/org/cedar/onestop/elastic/common/ElasticsearchConfig.java b/elastic-common/src/main/java/org/cedar/onestop/elastic/common/ElasticsearchConfig.java
index ac6951cb0..2910d2502 100644
--- a/elastic-common/src/main/java/org/cedar/onestop/elastic/common/ElasticsearchConfig.java
+++ b/elastic-common/src/main/java/org/cedar/onestop/elastic/common/ElasticsearchConfig.java
@@ -127,7 +127,6 @@ public String jsonMapping(String alias) {
   public Map<String, ?> indexedProperties(String alias) {
     var parsed = (Map<String, Map>) parsedMapping(alias);
     var mappings = (Map<String, Map>) parsed.getOrDefault("mappings", Collections.emptyMap());
-
     return (Map<String, Map>) mappings.getOrDefault("properties", Collections.emptyMap());
   }
 
diff --git a/kafka-common/src/main/java/org/cedar/onestop/kafka/common/util/DataUtils.java b/kafka-common/src/main/java/org/cedar/onestop/kafka/common/util/DataUtils.java
index 8150b5739..c77813cdc 100644
--- a/kafka-common/src/main/java/org/cedar/onestop/kafka/common/util/DataUtils.java
+++ b/kafka-common/src/main/java/org/cedar/onestop/kafka/common/util/DataUtils.java
@@ -200,8 +200,6 @@ public static Map<String, Object> filterMapKeys(Collection<String> keysToKeep, M
         .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
   }
 
-  ////////// DELETE ^^^^^^ //////////////////
-
   /**
    * @param builderType   type of schema builder either ParsedRecord or AggregatedInput, otherwise error out
    * @param fieldData     parsed or input metadata values
diff --git a/settings.gradle.kts b/settings.gradle.kts
index 1e852b37f..02047c692 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -2,7 +2,6 @@ rootProject.name = "onestop"
 
 include(
     "client",
-    "data-utils",
     "e2e-tests",
     "elastic-common",
     "geoportal-search",

From d525128a9b7d22e18eb04b719e327a9ea04ab0b6 Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Wed, 27 May 2020 15:58:58 -0600
Subject: [PATCH 12/29] Fixing up tests.

---
 .../analysis_error_collectionIndex.json       |   6 +
 .../mappings/analysis_error_granuleIndex.json |   6 +
 .../util/TransformationUtilsSpec.groovy       | 201 ++++++++++++++----
 3 files changed, 175 insertions(+), 38 deletions(-)

diff --git a/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json b/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json
index ba5e2d670..3bbb551f4 100644
--- a/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json
+++ b/elastic-common/src/main/resources/mappings/analysis_error_collectionIndex.json
@@ -108,6 +108,9 @@
           "endUtcDateTimeString": {
             "type": "keyword"
           },
+          "endZoneSpecified": {
+            "type": "keyword"
+          },
           "instantDescriptor": {
             "type": "keyword"
           },
@@ -120,6 +123,9 @@
           "instantUtcDateTimeString": {
             "type": "keyword"
           },
+          "instantZoneSpecified": {
+            "type": "keyword"
+          },
           "rangeDescriptor": {
             "type": "keyword"
           }
diff --git a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
index 591132b3a..5cd6136c2 100644
--- a/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
+++ b/elastic-common/src/main/resources/mappings/analysis_error_granuleIndex.json
@@ -114,6 +114,9 @@
           "endUtcDateTimeString": {
             "type": "keyword"
           },
+          "endZoneSpecified": {
+            "type": "keyword"
+          },
           "instantDescriptor": {
             "type": "keyword"
           },
@@ -126,6 +129,9 @@
           "instantUtcDateTimeString": {
             "type": "keyword"
           },
+          "instantZoneSpecified": {
+            "type": "keyword"
+          },
           "rangeDescriptor": {
             "type": "keyword"
           }
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index 9b07c0d36..e7f29ddf7 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -108,21 +108,55 @@ class TransformationUtilsSpec extends Specification {
     result.keySet().each({ assert granuleAnalysisErrorFields.contains(it) })
   }
 
-  def "can i construct a record"() {
+  def "clean up nested map before indexing strictly mapped fields"() {
     when:
-    def parsed = [identification:null, titles:null, description:null, dataAccess:null, thumbnail:null, temporalBounding:[
-    beginDescriptor:ValidDescriptor.VALID, beginPrecision:ChronoUnit.DAYS.toString(), beginIndexable:true, beginZoneSpecified:null, beginUtcDateTimeString:2000-02-01, beginYear:2000, beginDayOfYear:32, beginDayOfMonth:1, beginMonth:2,
-    endDescriptor:null, endPrecision:null, endIndexable:null, endZoneSpecified:null, endUtcDateTimeString:null, endYear:null, endDayOfYear:null, endDayOfMonth:null, endMonth:null,
-    instantDescriptor:null, instantPrecision:null, instantIndexable:null, instantZoneSpecified:null, instantUtcDateTimeString:null, instantYear:null, instantDayOfYear:null, instantDayOfMonth:null, instantMonth:null,
-    rangeDescriptor:null],
-    spatialBounding:null, internalParentIdentifier:null, errors:[[nonsense:"horrible", source:"valid field"]], garbage:"nuke meeee"]
-    // println(
-    //     DataUtils.wipMapKeys('type', DataUtils.wipMapKeys('properties', DataUtils.consolidateNestedKeysInMap(null, ".", TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))))
-    // )
-
-
-    // def esmapping = DataUtils.wipMapKeys('type', DataUtils.wipMapKeys('properties', DataUtils.consolidateNestedKeysInMap(null, ".", TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))))
-    // println("esmapping: "+JsonOutput.toJson(esmapping))
+    def parsed = [
+      identification: null,
+      titles: null,
+      description: null,
+      dataAccess: null,
+      thumbnail: null,
+      temporalBounding: [
+        beginDescriptor: ValidDescriptor.VALID,
+        beginPrecision: ChronoUnit.DAYS.toString(),
+        beginIndexable: true,
+        beginZoneSpecified: null,
+        beginUtcDateTimeString: "2000-02-01",
+        beginYear: 2000,
+        beginDayOfYear: 32,
+        beginDayOfMonth: 1,
+        beginMonth: 2,
+        endDescriptor: null,
+        endPrecision: null,
+        endIndexable: null,
+        endZoneSpecified: null,
+        endUtcDateTimeString: null,
+        endYear: null,
+        endDayOfYear: null,
+        endDayOfMonth: null,
+        endMonth: null,
+        instantDescriptor: null,
+        instantPrecision: null,
+        instantIndexable: null,
+        instantZoneSpecified: null,
+        instantUtcDateTimeString: null,
+        instantYear: null,
+        instantDayOfYear: null,
+        instantDayOfMonth: null,
+        instantMonth: null,
+        rangeDescriptor: null,
+        fakeField: 123
+      ],
+      spatialBounding: null,
+      internalParentIdentifier: null,
+      errors: [
+        [
+          nonsense: "horrible",
+          source: "valid field"
+        ]
+      ],
+      garbage:"nuke meeee"
+    ]
 
 
 
@@ -158,30 +192,121 @@ class TransformationUtilsSpec extends Specification {
     //           .build()
     //           ).build()).build()
 
-              // def parsed = TransformationUtils.unfilteredAEMessage(record)
-        // def asdf = TransformationUtils.identifyUnmappedFields(parsed, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
-
-        println("parsed "+JsonOutput.toJson(parsed))
-        // println("???"+parsed)
-        // println("in groov"+ [identification:null, titles:null, description:null, dataAccess:null, thumbnail:null, temporalBounding:[
-        // beginDescriptor:ValidDescriptor.VALID, beginPrecision:ChronoUnit.DAYS.toString(), beginIndexable:true, beginZoneSpecified:null, beginUtcDateTimeString:2000-02-01, beginYear:2000, beginDayOfYear:32, beginDayOfMonth:1, beginMonth:2,
-        // endDescriptor:null, endPrecision:null, endIndexable:null, endZoneSpecified:null, endUtcDateTimeString:null, endYear:null, endDayOfYear:null, endDayOfMonth:null, endMonth:null,
-        // instantDescriptor:null, instantPrecision:null, instantIndexable:null, instantZoneSpecified:null, instantUtcDateTimeString:null, instantYear:null, instantDayOfYear:null, instantDayOfMonth:null, instantMonth:null,
-        // rangeDescriptor:null],
-        // spatialBounding:null, internalParentIdentifier:null, errors:[[nonsense:"horrible", source:"valid field"]], garbage:"nuke meeee"]
-// )
-            def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, knownUnmappedFields)
-            println("pruned unampped? "+JsonOutput.toJson(pruned))
-            def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
-            println("creates minus: "+JsonOutput.toJson(minus))
-            println("which results in indexing: "+ JsonOutput.toJson(DataUtils.removeFromMap(pruned, minus)))
-
-            then:
-            // asdf.keySet().each({ assert granuleAnalysisErrorFields.contains(it) })
-            // trimmed == [ "foo" : "bar"]
-            minus == [temporalBounding:[
-            instantIndexable:null],
-            errors:[[nonsense:"horrible"]], garbage:"nuke meeee"]
+            // def parsed = TransformationUtils.unfilteredAEMessage(record)
+
+    println("parsed "+JsonOutput.toJson(parsed))
+    def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, knownUnmappedFields)
+    println("pruned unampped? "+JsonOutput.toJson(pruned))
+    def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
+    println("creates minus: "+JsonOutput.toJson(minus))
+    def indexedRecord = DataUtils.removeFromMap(pruned, minus)
+    println("which results in indexing: "+ JsonOutput.toJson(indexedRecord))
+
+    then:
+    minus == [
+      temporalBounding: [
+        fakeField: 123
+      ],
+      errors: [
+        [
+          nonsense: "horrible",
+        ]
+      ],
+      garbage:"nuke meeee"
+    ]
+
+    // println("wtf"+JsonOutput.toJson(indexedRecord))
+    //     println("wtf"+JsonOutput.toJson([
+    //       identification: null,
+    //       titles: null,
+    //       description: null,
+    //       dataAccess: null,
+    //       thumbnail: null,
+    //       temporalBounding: [
+    //         beginDescriptor: ValidDescriptor.VALID,
+    //         beginPrecision: ChronoUnit.DAYS.toString(),
+    //         beginIndexable: true,
+    //         beginZoneSpecified: null,
+    //         beginUtcDateTimeString: "2000-02-01",
+    //         endDescriptor: null,
+    //         endPrecision: null,
+    //         endIndexable: null,
+    //         endZoneSpecified: null,
+    //         endUtcDateTimeString: null,
+    //         instantDescriptor: null,
+    //         instantPrecision: null,
+    //         instantIndexable: null,
+    //         instantZoneSpecified: null,
+    //         instantUtcDateTimeString: null,
+    //         rangeDescriptor: null
+    //       ],
+    //       spatialBounding: null,
+    //       internalParentIdentifier: null,
+    //       errors: [
+    //         [nonsense:"horrible",
+    //           source: "valid field"
+    //         ]
+    //       ]
+    //     ]))
+    // assert indexedRecord == [
+    //   identification: null,
+    //   titles: null,
+    //   description: null,
+    //   dataAccess: null,
+    //   thumbnail: null,
+    //   temporalBounding: [
+    //     beginDescriptor: ValidDescriptor.VALID,
+    //     beginPrecision: ChronoUnit.DAYS.toString(),
+    //     beginIndexable: true,
+    //     beginZoneSpecified: null,
+    //     beginUtcDateTimeString: "2000-02-01",
+    //     endDescriptor: null,
+    //     endPrecision: null,
+    //     endIndexable: null,
+    //     endZoneSpecified: null,
+    //     endUtcDateTimeString: null,
+    //     instantDescriptor: null,
+    //     instantPrecision: null,
+    //     instantIndexable: null,
+    //     instantZoneSpecified: null,
+    //     instantUtcDateTimeString: null,
+    //     rangeDescriptor: null
+    //   ],
+    //   spatialBounding: null,
+    //   internalParentIdentifier: null,
+    //   errors: [
+    //     [nonsense:"horrible", // FIXME this is not actually desired
+    //       source: "valid field"
+    //     ]
+    //   ]
+    // ]
+    def expectedKeyset = ["identification", "titles", "description", "dataAccess", "thumbnail", "temporalBounding", "spatialBounding", "internalParentIdentifier", "errors" ]
+    indexedRecord.keySet().size() == expectedKeyset.size()
+    indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
+
+    indexedRecord.temporalBounding == [
+        beginDescriptor: ValidDescriptor.VALID,
+        beginPrecision: ChronoUnit.DAYS.toString(),
+        beginIndexable: true,
+        beginZoneSpecified: null,
+        beginUtcDateTimeString: "2000-02-01",
+        endDescriptor: null,
+        endPrecision: null,
+        endIndexable: null,
+        endZoneSpecified: null,
+        endUtcDateTimeString: null,
+        instantDescriptor: null,
+        instantPrecision: null,
+        instantIndexable: null,
+        instantZoneSpecified: null,
+        instantUtcDateTimeString: null,
+        rangeDescriptor: null
+      ]
+
+    indexedRecord.errors.size() == 1
+    indexedRecord.errors[0] == [nonsense:"horrible", // FIXME this is not actually desired
+          source: "valid field"
+        ]
   }
 
   ////////////////////////////////

From 0e61e83dd84eedd7506bdff00573809385680878 Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Mon, 1 Jun 2020 10:03:03 -0600
Subject: [PATCH 13/29] Changed method signatures to index while reporting
 un-indexable fields and removing them.

---
 .../elastic/common/ElasticsearchConfig.java   |   6 +-
 .../onestop/indexer/util/IndexingInput.java   |  32 +++-
 .../onestop/indexer/util/IndexingUtils.java   |   6 +-
 .../indexer/util/TransformationUtils.java     | 117 ++++++--------
 .../util/TransformationUtilsSpec.groovy       | 148 ++++--------------
 5 files changed, 112 insertions(+), 197 deletions(-)

diff --git a/elastic-common/src/main/java/org/cedar/onestop/elastic/common/ElasticsearchConfig.java b/elastic-common/src/main/java/org/cedar/onestop/elastic/common/ElasticsearchConfig.java
index 2910d2502..85d69488a 100644
--- a/elastic-common/src/main/java/org/cedar/onestop/elastic/common/ElasticsearchConfig.java
+++ b/elastic-common/src/main/java/org/cedar/onestop/elastic/common/ElasticsearchConfig.java
@@ -119,12 +119,12 @@ public String jsonMapping(String alias) {
     return this.jsonMappings.getOrDefault(alias, null);
   }
 
-  public Map<String, ?> parsedMapping(String alias) {
+  public Map<String, Map> parsedMapping(String alias) {
     // retrieve JSON mapping for index alias
     return this.parsedMappings.getOrDefault(alias, Collections.emptyMap());
   }
 
-  public Map<String, ?> indexedProperties(String alias) {
+  public Map<String, Map> indexedProperties(String alias) {
     var parsed = (Map<String, Map>) parsedMapping(alias);
     var mappings = (Map<String, Map>) parsed.getOrDefault("mappings", Collections.emptyMap());
     return (Map<String, Map>) mappings.getOrDefault("properties", Collections.emptyMap());
@@ -168,4 +168,4 @@ public Boolean sitemapEnabled() {
     return SITEMAP_ENABLED;
   }
 
-}
\ No newline at end of file
+}
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java
index f5ba13220..a1f46e046 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java
@@ -52,23 +52,43 @@ public String getTargetAnalysisAndErrorsIndex() {
     return esConfig.analysisAndErrorsAliasFromType(recordType.toString());
   }
 
-  public Set<String> getTargetSearchIndexFields() {
+  public static Map<String, Object> getUnmappedAnalysisAndErrorsIndexFields() {
+    // this method is just to prevent us from logging warnings about fields in the analysis schema that we know and choose not to map
+    Map<String, Object> knownUnmappedTemporalFields = new HashMap<String, Object>();
+    knownUnmappedTemporalFields.put("beginYear", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("beginDayOfYear", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("beginDayOfMonth", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("beginMonth", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("endYear", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("endDayOfYear", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("endDayOfMonth", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("endMonth", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("instantYear", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("instantDayOfYear", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("instantDayOfMonth", new HashMap<String, Object>());
+    knownUnmappedTemporalFields.put("instantMonth", new HashMap<String, Object>());
+    Map<String, Object> knownUnmappedFields = new HashMap<String, Object>();
+    knownUnmappedFields.put("temporalBounding", knownUnmappedTemporalFields);
+    return knownUnmappedFields;
+  }
+
+  public Map<String, Map> getTargetSearchIndexFields() {
     var searchAlias = esConfig.searchAliasFromType(recordType.toString());
     if(searchAlias != null) {
-      return esConfig.indexedProperties(searchAlias).keySet();
+      return esConfig.indexedProperties(searchAlias);
     }
     else {
-      return new HashSet<>();
+      return new HashMap<>();
     }
   }
 
-  public Set<String> getTargetAnalysisAndErrorsIndexFields() {
+  public Map<String, Map> getTargetAnalysisAndErrorsIndexFields() {
     var aeAlias = esConfig.analysisAndErrorsAliasFromType(recordType.toString());
     if(aeAlias != null) {
-      return esConfig.indexedProperties(aeAlias).keySet();
+      return esConfig.indexedProperties(aeAlias);
     }
     else {
-      return new HashSet<>();
+      return new HashMap<>();
     }
   }
 
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
index 3be1903ed..e528071b8 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
@@ -78,9 +78,9 @@ public static DocWriteRequest<?> buildAnalysisAndErrorWriteRequest(String indexN
     }
     else {
       var formattedRecord = new HashMap<String, Object>();
-      log.info("build A&E write request "+input.getValue().value() +" and "+ input.getTargetAnalysisAndErrorsIndexFields());
-      log.info("transforms to "+TransformationUtils.reformatMessageForAnalysisAndErrors(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields()));
-      formattedRecord.putAll(TransformationUtils.reformatMessageForAnalysisAndErrors(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields()));
+      // log.info("build A&E write request "+input.getValue().value() +" and "+ input.getTargetAnalysisAndErrorsIndexFields());
+      // log.info("transforms to "+TransformationUtils.reformatMessageForAnalysisAndErrors(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields(), input.getUnmappedAnalysisAndErrorsIndexFields())); // TODO change this to pass the ES mapping in instead
+      formattedRecord.putAll(TransformationUtils.reformatMessageForAnalysisAndErrors(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields(), input.getUnmappedAnalysisAndErrorsIndexFields()));
       formattedRecord.put("stagedDate", input.getValue().timestamp());
       return new IndexRequest(indexName).opType(opType).id(input.getKey()).source(formattedRecord);
     }
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index 9cfb43edc..af7da4757 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -32,7 +32,7 @@ public class TransformationUtils {
   ///////////////////////////////////////////////////////////////////////////////
   //                     Indexing For Analysis & Errors                        //
   ///////////////////////////////////////////////////////////////////////////////
-  public static Map<String, Object> reformatMessageForAnalysisAndErrors(ParsedRecord record, Set<String> targetFields) {
+  public static Map<String, Object> reformatMessageForAnalysisAndErrors(ParsedRecord record, Map<String, Map> targetFieldsMapping, Map<String, Object> knownUnmappedFields) {
     var analysis = record.getAnalysis();
     var errors = record.getErrors();
 
@@ -46,34 +46,35 @@ public static Map<String, Object> reformatMessageForAnalysisAndErrors(ParsedReco
     analysisMap.put("errors", errorsList);
 
     // drop fields not present in target index
-    // TODO make recursive!
-    var result = new LinkedHashMap<String, Object>(targetFields.size());
-    targetFields.forEach(f -> result.put(f, analysisMap.get(f)));
-    return result;
-  }
-
-  public static Map<String, Object> unfilteredAEMessage(ParsedRecord record) {
-    var analysis = record.getAnalysis();
-    var errors = record.getErrors();
-
-    var analysisMap = AvroUtils.avroToMap(analysis, true);
-    analysisMap.put("internalParentIdentifier", prepareInternalParentIdentifier(record));
-    var errorsList = errors.stream()
-        .map(e -> AvroUtils.avroToMap(e))
-        .collect(Collectors.toList());
-
-
-    var garbageError = new LinkedHashMap<String, Object>();
-    garbageError.put("nonsense", "horrible");
-    garbageError.put("source", "valid field" );
-    errorsList.add(garbageError);
 
-
-    analysisMap.put("errors", errorsList);
-    analysisMap.put("garbage", "nuke meeee"); // FIXME
-    return analysisMap;
+    var pruned = TransformationUtils.pruneKnownUnmappedFields(analysisMap, knownUnmappedFields);
+    var minus = TransformationUtils.identifyUnmappedFields(pruned, targetFieldsMapping); // TODO identify which it's going to
+    log.warn("The following fields were dropped when indexing to analysis and errors: " + minus);  // TODO "add for record `id`"
+    return DataUtils.removeFromMap(pruned, minus);
   }
 
+  // public static Map<String, Object> unfilteredAEMessage(ParsedRecord record) {
+  //   var analysis = record.getAnalysis();
+  //   var errors = record.getErrors();
+  //
+  //   var analysisMap = AvroUtils.avroToMap(analysis, true);
+  //   analysisMap.put("internalParentIdentifier", prepareInternalParentIdentifier(record));
+  //   var errorsList = errors.stream()
+  //       .map(e -> AvroUtils.avroToMap(e))
+  //       .collect(Collectors.toList());
+  //
+  //
+  //   var garbageError = new LinkedHashMap<String, Object>();
+  //   garbageError.put("nonsense", "horrible");
+  //   garbageError.put("source", "valid field" );
+  //   errorsList.add(garbageError);
+  //
+  //
+  //   analysisMap.put("errors", errorsList);
+  //   analysisMap.put("garbage", "nuke meeee"); // FIXME
+  //   return analysisMap;
+  // }
+
   public static Map<String, Object> pruneKnownUnmappedFields(Map<String, Object> analysisMap, Map<String, Object> unmappedFields) {
 
     var result = new LinkedHashMap<String, Object>();
@@ -81,14 +82,13 @@ public static Map<String, Object> pruneKnownUnmappedFields(Map<String, Object> a
       if (!unmappedFields.containsKey(k)) {
         result.put(k, v);
       } else {
-        Map<String, Object> nestedProperties = (Map<String, Object>)((Map<String, Object>)unmappedFields.get(k)); // TODO almost identical to stuff to remove... but reversed... and no ".properties" layer...
+        Map<String, Object> nestedProperties = (Map<String, Object>)((Map<String, Object>)unmappedFields.get(k));
 
         if (v instanceof Map) {
           result.put(k, pruneKnownUnmappedFields((Map<String, Object>) v, nestedProperties));
         } else if (v instanceof List) {
           var list = ((List) v).stream().map(item -> pruneKnownUnmappedFields((Map<String, Object>) item, nestedProperties)).filter(item -> !((Map<String, Object>)item).isEmpty())
                 .collect(Collectors.toList());
-          System.out.println("ZEB - list: "+list);
           result.put(k, list);
         }
       }
@@ -96,67 +96,35 @@ public static Map<String, Object> pruneKnownUnmappedFields(Map<String, Object> a
     return result;
   }
 
-  public static Map<String, Object> identifyUnmappedFields(Map<String, Object> analysisMap, Map<String, Object> mapping) {
+  public static Map<String, Object> identifyUnmappedFields(Map<String, Object> analysisMap, Map<String, Map> mapping) {
     var result = new LinkedHashMap<String, Object>();
-    // analysisMap.entrySet().stream().forEach(e -> {
-    //   if( !mapping.containsKey(e.getKey())) {
-    //     result.put(e.getKey(), e.getValue());
-    //   } else {
-    //     if (e.getValue() instanceof Map<?,?>){
-    //       System.out.println("ZEB: the value is a map!");
-    //       // System.out.println("mapping: "+mapping.get(e.getKey()).get("properties"));
-    //       // System.out.println("--> "+identifyUnmappedFields((Map<String, Object>)e.getValue(), (Map<String, Object>)mapping.get(e.getKey()).get("properties")));
-    //       result.put(e.getKey(), identifyUnmappedFields((Map<String, Object>)e.getValue(), (Map<String, Object>)((Map<String, Object>)mapping.get(e.getKey())).get("properties"))); // TODO brute force assumes mapping is an object map string:object here too
-    //     } else if(e.getValue() instanceof Collection<?>){
-    //       // TODO!!!!
-    //       // result.put(e.getKey(), ((Collection<?>)e.getValue()).filter(item -> !identifyUnmappedFields((Map<String, Object>)item, (Map<String, Object>)((Map<String, Object>)mapping.get(e.getKey())).get("properties"))).isEmpty());
-    //     }
-    //   }
-    // });
-    // return result;
-    //
+
+    if (mapping == null) {
+      return analysisMap;
+    }
 
     analysisMap.forEach((k, v) -> {
       if (!mapping.containsKey(k)) {
         result.put(k, v);
       } else {
-        Map<String, Object> nestedProperties = (Map<String, Object>)((Map<String, Object>)mapping.get(k)).get("properties"); // TODO assumes mapping is also a Map!
-
-        // Map<String, Object> knownUnmapped = (Map<String, Object>)knownUnmappedFields.get(k);
+        Map<String, Map> nestedProperties = (Map<String, Map>)((Map<String, Map>)mapping.get(k)).get("properties"); // TODO assumes mapping is also a Map!
 
         if (v instanceof Map) {
           result.put(k, identifyUnmappedFields((Map<String, Object>) v, nestedProperties));
         } else if (v instanceof List) {
-          var list = ((List) v).stream().map(item -> identifyUnmappedFields((Map<String, Object>) item, nestedProperties)).filter(item -> !((Map<String, Object>)item).isEmpty())
+          var list = ((List) v).stream().filter(item -> item instanceof Map).map(item -> identifyUnmappedFields((Map<String, Object>) item, nestedProperties)).filter(item -> !((Map<String, Object>)item).isEmpty())
                 .collect(Collectors.toList());
-          System.out.println("ZEB - list: "+list);
           result.put(k, list);
         }
       }
     });
     return result;
   }
-/*
-  toRemove.forEach((k, v) -> {
-    var originalValue = mergedMap.get(k);
-    if (v instanceof Map && originalValue instanceof Map) {
-      mergedMap.put(k, removeFromMap((Map) originalValue, (Map) v));
-    }
-    else if (v instanceof List && originalValue instanceof List) {
-      var mergedList = new HashSet<>((List) originalValue);
-      mergedList.removeAll((List) v);
-      mergedMap.put(k, mergedList);
-    }
-    else if ((v == null && originalValue == null) || v.equals(originalValue)) {
-      mergedMap.remove(k);
-    }
-  });
-*/
 
   ///////////////////////////////////////////////////////////////////////////////
   //                          Indexing For Search                              //
   ///////////////////////////////////////////////////////////////////////////////
-  public static Map<String, Object> reformatMessageForSearch(ParsedRecord record, Set<String> targetFields) {
+  public static Map<String, Object> reformatMessageForSearch(ParsedRecord record, Map<String, Map> targetFieldsMapping) {
     var discovery = record.getDiscovery();
     var analysis = record.getAnalysis();
     var discoveryMap = AvroUtils.avroToMap(discovery, true);
@@ -174,9 +142,16 @@ public static Map<String, Object> reformatMessageForSearch(ParsedRecord record,
     discoveryMap.put("checksums", prepareChecksums(record));
 
     // drop fields not present in target index
-    var result = new LinkedHashMap<String, Object>(targetFields.size());
-    targetFields.forEach(f -> result.put(f, discoveryMap.get(f)));
-    return result;
+    // // FIXME
+    // var result = new LinkedHashMap<String, Object>(targetFieldsMapping.size());
+    // // targetFields.forEach(f -> result.put(f, discoveryMap.get(f)));
+    // return result;
+
+    // var pruned = TransformationUtils.pruneKnownUnmappedFields(discoveryMap, knownUnmappedFields);
+    var pruned = discoveryMap;
+    var minus = TransformationUtils.identifyUnmappedFields(pruned, targetFieldsMapping);
+    log.warn("The following fields were dropped when indexing to search: " + minus); // TODO "add for record `id`"
+    return DataUtils.removeFromMap(pruned, minus);
   }
 
   ////////////////////////////////
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index e7f29ddf7..984af8e2c 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -25,9 +25,9 @@ import org.cedar.onestop.kafka.common.util.DataUtils;
 @Unroll
 class TransformationUtilsSpec extends Specification {
 
-  static collectionFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS).keySet()
-  static granuleFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS).keySet()
-  static granuleAnalysisErrorFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS).keySet()
+  static Map<String, Map> collectionFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS)
+  static Map<String, Map> granuleFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS)
+  static Map<String, Map> granuleAnalysisErrorFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS)
 
   static expectedKeywords = [
       "SIO > Super Important Organization",
@@ -80,35 +80,20 @@ class TransformationUtilsSpec extends Specification {
   ///////////////////////////////
   // Generic Indexed Fields    //
   ///////////////////////////////
-  def "only mapped #type fields are indexed"() {
-    when:
-    def result = TransformationUtils.reformatMessageForSearch(record, fields)
-
-    then:
-    result.keySet().each({ assert fields.contains(it) })
-
-    where:
-    type          | fields            | record
-    'collection'  | collectionFields  | TestUtils.inputCollectionRecord
-    'granule'     | granuleFields     | TestUtils.inputGranuleRecord
-  }
-
-  def "only mapped nested fields are indexed"() {
-    when:
-    def result = TransformationUtils.reformatMessageForAnalysisAndErrors(TestUtils.inputGranuleRecord, granuleAnalysisErrorFields)
-
-
-    def asdf = TransformationUtils.identifyUnmappedFields(TransformationUtils.unfilteredAEMessage(TestUtils.inputGranuleRecord), TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
-
-    println("ZEB")
-    println(result)
-    println(JsonOutput.toJson(asdf))
-
-    then:
-    result.keySet().each({ assert granuleAnalysisErrorFields.contains(it) })
-  }
-
-  def "clean up nested map before indexing strictly mapped fields"() {
+  // def "only mapped #type fields are indexed"() {
+  //   when:
+  //   def result = TransformationUtils.reformatMessageForSearch(record, fields)
+  //
+  //   then:
+  //   result.keySet().each({ assert fields.keySet().contains(it) }) // TODO this is a shallow only check!
+  //
+  //   where:
+  //   type          | fields            | record
+  //   'collection'  | collectionFields  | TestUtils.inputCollectionRecord
+  //   'granule'     | granuleFields     | TestUtils.inputGranuleRecord
+  // }
+
+  def "clean up nested map before indexing strictly mapped fields"() { // TODO change to use reformatMessageFor method
     when:
     def parsed = [
       identification: null,
@@ -160,21 +145,21 @@ class TransformationUtilsSpec extends Specification {
 
 
 
-    def knownUnmappedTemporalFields = new HashMap<String, Object>();
-    knownUnmappedTemporalFields.put("beginYear", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("beginDayOfYear", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("beginDayOfMonth", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("beginMonth", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("endYear", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("endDayOfYear", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("endDayOfMonth", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("endMonth", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("instantYear", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("instantDayOfYear", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("instantDayOfMonth", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("instantMonth", new HashMap<String, Object>());
-    def knownUnmappedFields = new HashMap<String, Object>();
-    knownUnmappedFields.put("temporalBounding", knownUnmappedTemporalFields);
+    // def knownUnmappedTemporalFields = new HashMap<String, Object>();
+    // knownUnmappedTemporalFields.put("beginYear", new HashMap<String, Object>());
+    // knownUnmappedTemporalFields.put("beginDayOfYear", new HashMap<String, Object>());
+    // knownUnmappedTemporalFields.put("beginDayOfMonth", new HashMap<String, Object>());
+    // knownUnmappedTemporalFields.put("beginMonth", new HashMap<String, Object>());
+    // knownUnmappedTemporalFields.put("endYear", new HashMap<String, Object>());
+    // knownUnmappedTemporalFields.put("endDayOfYear", new HashMap<String, Object>());
+    // knownUnmappedTemporalFields.put("endDayOfMonth", new HashMap<String, Object>());
+    // knownUnmappedTemporalFields.put("endMonth", new HashMap<String, Object>());
+    // knownUnmappedTemporalFields.put("instantYear", new HashMap<String, Object>());
+    // knownUnmappedTemporalFields.put("instantDayOfYear", new HashMap<String, Object>());
+    // knownUnmappedTemporalFields.put("instantDayOfMonth", new HashMap<String, Object>());
+    // knownUnmappedTemporalFields.put("instantMonth", new HashMap<String, Object>());
+    // def knownUnmappedFields = new HashMap<String, Object>();
+    // knownUnmappedFields.put("temporalBounding", knownUnmappedTemporalFields);
 
     // ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
     //     .setAnalysis(
@@ -195,7 +180,7 @@ class TransformationUtilsSpec extends Specification {
             // def parsed = TransformationUtils.unfilteredAEMessage(record)
 
     println("parsed "+JsonOutput.toJson(parsed))
-    def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, knownUnmappedFields)
+    def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
     println("pruned unampped? "+JsonOutput.toJson(pruned))
     def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
     println("creates minus: "+JsonOutput.toJson(minus))
@@ -215,71 +200,6 @@ class TransformationUtilsSpec extends Specification {
       garbage:"nuke meeee"
     ]
 
-    // println("wtf"+JsonOutput.toJson(indexedRecord))
-    //     println("wtf"+JsonOutput.toJson([
-    //       identification: null,
-    //       titles: null,
-    //       description: null,
-    //       dataAccess: null,
-    //       thumbnail: null,
-    //       temporalBounding: [
-    //         beginDescriptor: ValidDescriptor.VALID,
-    //         beginPrecision: ChronoUnit.DAYS.toString(),
-    //         beginIndexable: true,
-    //         beginZoneSpecified: null,
-    //         beginUtcDateTimeString: "2000-02-01",
-    //         endDescriptor: null,
-    //         endPrecision: null,
-    //         endIndexable: null,
-    //         endZoneSpecified: null,
-    //         endUtcDateTimeString: null,
-    //         instantDescriptor: null,
-    //         instantPrecision: null,
-    //         instantIndexable: null,
-    //         instantZoneSpecified: null,
-    //         instantUtcDateTimeString: null,
-    //         rangeDescriptor: null
-    //       ],
-    //       spatialBounding: null,
-    //       internalParentIdentifier: null,
-    //       errors: [
-    //         [nonsense:"horrible",
-    //           source: "valid field"
-    //         ]
-    //       ]
-    //     ]))
-    // assert indexedRecord == [
-    //   identification: null,
-    //   titles: null,
-    //   description: null,
-    //   dataAccess: null,
-    //   thumbnail: null,
-    //   temporalBounding: [
-    //     beginDescriptor: ValidDescriptor.VALID,
-    //     beginPrecision: ChronoUnit.DAYS.toString(),
-    //     beginIndexable: true,
-    //     beginZoneSpecified: null,
-    //     beginUtcDateTimeString: "2000-02-01",
-    //     endDescriptor: null,
-    //     endPrecision: null,
-    //     endIndexable: null,
-    //     endZoneSpecified: null,
-    //     endUtcDateTimeString: null,
-    //     instantDescriptor: null,
-    //     instantPrecision: null,
-    //     instantIndexable: null,
-    //     instantZoneSpecified: null,
-    //     instantUtcDateTimeString: null,
-    //     rangeDescriptor: null
-    //   ],
-    //   spatialBounding: null,
-    //   internalParentIdentifier: null,
-    //   errors: [
-    //     [nonsense:"horrible", // FIXME this is not actually desired
-    //       source: "valid field"
-    //     ]
-    //   ]
-    // ]
     def expectedKeyset = ["identification", "titles", "description", "dataAccess", "thumbnail", "temporalBounding", "spatialBounding", "internalParentIdentifier", "errors" ]
     indexedRecord.keySet().size() == expectedKeyset.size()
     indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
@@ -577,7 +497,7 @@ class TransformationUtilsSpec extends Specification {
 
   def "accession values are not included"() {
     when:
-    def result = TransformationUtils.reformatMessageForSearch(TestUtils.inputAvroRecord, TestUtils.esConfig.parsedMapping(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS).keySet())
+    def result = TransformationUtils.reformatMessageForSearch(TestUtils.inputAvroRecord, collectionFields)
 
     then:
     result.accessionValues == null

From a82f22459d335182f84486a309d7d5556f8ad637 Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Mon, 1 Jun 2020 11:38:40 -0600
Subject: [PATCH 14/29] more unit tests

---
 .../indexer/util/TransformationUtils.java     |  33 ++-
 .../util/TransformationUtilsSpec.groovy       | 272 ++++++++++++++++--
 2 files changed, 266 insertions(+), 39 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index af7da4757..f4d586b11 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -396,23 +396,26 @@ private static Map<String, Object> prepareDates(TemporalBounding bounding, Tempo
 
   private static HashMap<String, Object> parseAdditionalTimeFields(String prefix, String time){
     var result = new HashMap<String, Object>();
-    Integer dayOfYear, dayOfMonth, month;
-    if (time != null) {
-      ZonedDateTime dateTime = ZonedDateTime.parse(time);
+    try {
 
-      dayOfYear = dateTime.getDayOfYear();
-      dayOfMonth = dateTime.getDayOfMonth();
-      month = dateTime.getMonthValue();
-    }
-    else {
-      dayOfYear = null;
-      dayOfMonth = null;
-      month = null;
-    }
+      Integer dayOfYear, dayOfMonth, month;
+      if (time != null) {
+        ZonedDateTime dateTime = ZonedDateTime.parse(time);
+
+        dayOfYear = dateTime.getDayOfYear();
+        dayOfMonth = dateTime.getDayOfMonth();
+        month = dateTime.getMonthValue();
+      }
+      else {
+        dayOfYear = null;
+        dayOfMonth = null;
+        month = null;
+      }
 
-    result.put(prefix + "DayOfYear", dayOfYear);
-    result.put(prefix + "DayOfMonth", dayOfMonth);
-    result.put(prefix + "Month", month);
+      result.put(prefix + "DayOfYear", dayOfYear);
+      result.put(prefix + "DayOfMonth", dayOfMonth);
+      result.put(prefix + "Month", month);
+    } catch (Exception e) {} // TODO temporary
     return result;
   }
 
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index 984af8e2c..a1b95f592 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -93,10 +93,140 @@ class TransformationUtilsSpec extends Specification {
   //   'granule'     | granuleFields     | TestUtils.inputGranuleRecord
   // }
 
-  def "clean up nested map before indexing strictly mapped fields"() { // TODO change to use reformatMessageFor method
+
+  def "clean up nested map before indexing strictly mapped fields for search (granule)"() {
+    when:
+    // def parsed = [
+    //   identification: null,
+    //   titles: null,
+    //   description: null,
+    //   dataAccess: null,
+    //   thumbnail: null,
+    //   temporalBounding: [
+    //     beginDescriptor: ValidDescriptor.VALID,
+    //     beginPrecision: ChronoUnit.DAYS.toString(),
+    //     beginIndexable: true,
+    //     beginZoneSpecified: null,
+    //     beginUtcDateTimeString: "2000-02-01",
+    //     beginYear: 2000,
+    //     beginDayOfYear: 32,
+    //     beginDayOfMonth: 1,
+    //     beginMonth: 2,
+    //     endDescriptor: null,
+    //     endPrecision: null,
+    //     endIndexable: null,
+    //     endZoneSpecified: null,
+    //     endUtcDateTimeString: null,
+    //     endYear: null,
+    //     endDayOfYear: null,
+    //     endDayOfMonth: null,
+    //     endMonth: null,
+    //     instantDescriptor: null,
+    //     instantPrecision: null,
+    //     instantIndexable: null,
+    //     instantZoneSpecified: null,
+    //     instantUtcDateTimeString: null,
+    //     instantYear: null,
+    //     instantDayOfYear: null,
+    //     instantDayOfMonth: null,
+    //     instantMonth: null,
+    //     rangeDescriptor: null,
+    //     fakeField: 123
+    //   ],
+    //   spatialBounding: null,
+    //   internalParentIdentifier: null,
+    //   errors: [
+    //     [
+    //       nonsense: "horrible",
+    //       source: "valid field"
+    //     ]
+    //   ],
+    //   garbage:"nuke meeee"
+    // ]
+    ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
+        .setAnalysis(
+          Analysis.newBuilder().setTemporalBounding(
+          TemporalBoundingAnalysis.newBuilder()
+              .setBeginDescriptor(ValidDescriptor.VALID)
+              .setBeginIndexable(true)
+              .setBeginPrecision(ChronoUnit.DAYS.toString())
+              .setBeginZoneSpecified(null)
+              .setBeginUtcDateTimeString("2000-02-01")
+              .setBeginYear(2000)
+              .setBeginMonth(2)
+              .setBeginDayOfYear(32)
+              .setBeginDayOfMonth(1)
+              .build()
+              ).build()).build()
+
+
+    // def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
+    def indexedRecord = TransformationUtils.reformatMessageForSearch(record, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS))
+    // def indexedRecord = DataUtils.removeFromMap(pruned, minus)
+
+    then:
+    // minus == [
+    //   temporalBounding: [
+    //     fakeField: 123
+    //   ],
+    //   errors: [
+    //     [
+    //       nonsense: "horrible",
+    //     ]
+    //   ],
+    //   garbage:"nuke meeee"
+    // ]
+
+    def expectedKeyset = ["fileIdentifier", "parentIdentifier", "doi", "title", "description", "keywords", "topicCategories", "temporalBounding", "spatialBounding", "isGlobal", "acquisitionInstruments", "acquisitionOperations", "acquisitionPlatforms", "dataFormats", "links", "responsibleParties", "thumbnail", "citeAsStatements", "crossReferences", "largerWorks", "legalConstraints", "services", "gcmdVerticalResolution", "gcmdDataCenters", "gcmdTemporalResolution", "gcmdLocations", "gcmdScience", "beginDate", "endDate", "endDayOfYear", "beginYear", "endMonth", "endYear", "endDayOfMonth", "dataFormat", "linkProtocol", "serviceLinks", "serviceLinkProtocol", "internalParentIdentifier", "filename", "checksums"]
+
+
+    indexedRecord.keySet().size() == expectedKeyset.size()
+    indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
+
+  }
+
+
+  def "clean up nested map before indexing strictly mapped fields for search (collection)"() {
+    when:
+
+    ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
+        .setAnalysis(
+          Analysis.newBuilder().setTemporalBounding(
+          TemporalBoundingAnalysis.newBuilder()
+              .setBeginDescriptor(ValidDescriptor.VALID)
+              .setBeginIndexable(true)
+              .setBeginPrecision(ChronoUnit.DAYS.toString())
+              .setBeginZoneSpecified(null)
+              .setBeginUtcDateTimeString("2000-02-01")
+              .setBeginYear(2000)
+              .setBeginMonth(2)
+              .setBeginDayOfYear(32)
+              .setBeginDayOfMonth(1)
+              .build()
+              ).build()).build()
+
+
+    // def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
+    def indexedRecord = TransformationUtils.reformatMessageForSearch(record, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS))
+    // def indexedRecord = DataUtils.removeFromMap(pruned, minus)
+
+    then:
+
+    def expectedKeyset = ["fileIdentifier", "parentIdentifier", "doi", "title", "description", "keywords", "topicCategories", "temporalBounding", "spatialBounding", "isGlobal", "acquisitionInstruments", "acquisitionOperations", "acquisitionPlatforms", "dataFormats", "links", "responsibleParties", "thumbnail", "citeAsStatements", "crossReferences", "largerWorks", "useLimitation", "legalConstraints", "accessFeeStatement", "orderingInstructions", "edition", "dsmmAverage", "services", "gcmdVerticalResolution", "gcmdDataCenters", "gcmdTemporalResolution", "gcmdLocations", "gcmdScience", "beginDate", "endDate", "endDayOfYear", "beginYear", "endMonth", "endYear", "endDayOfMonth", "dataFormat", "linkProtocol", "serviceLinks", "serviceLinkProtocol", "organizationNames",
+    "individualNames", "checksums"]
+
+
+    indexedRecord.keySet().size() == expectedKeyset.size()
+    expectedKeyset.each({ assert indexedRecord.keySet().contains(it) })
+    indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
+
+  }
+
+  def "clean up nested map before indexing strictly mapped fields for analysis and errors (granule)"() { // TODO change to use reformatMessageFor method
     when:
     def parsed = [
       identification: null,
+      internalParentIdentifier: null,
       titles: null,
       description: null,
       dataAccess: null,
@@ -133,7 +263,6 @@ class TransformationUtilsSpec extends Specification {
         fakeField: 123
       ],
       spatialBounding: null,
-      internalParentIdentifier: null,
       errors: [
         [
           nonsense: "horrible",
@@ -143,24 +272,6 @@ class TransformationUtilsSpec extends Specification {
       garbage:"nuke meeee"
     ]
 
-
-
-    // def knownUnmappedTemporalFields = new HashMap<String, Object>();
-    // knownUnmappedTemporalFields.put("beginYear", new HashMap<String, Object>());
-    // knownUnmappedTemporalFields.put("beginDayOfYear", new HashMap<String, Object>());
-    // knownUnmappedTemporalFields.put("beginDayOfMonth", new HashMap<String, Object>());
-    // knownUnmappedTemporalFields.put("beginMonth", new HashMap<String, Object>());
-    // knownUnmappedTemporalFields.put("endYear", new HashMap<String, Object>());
-    // knownUnmappedTemporalFields.put("endDayOfYear", new HashMap<String, Object>());
-    // knownUnmappedTemporalFields.put("endDayOfMonth", new HashMap<String, Object>());
-    // knownUnmappedTemporalFields.put("endMonth", new HashMap<String, Object>());
-    // knownUnmappedTemporalFields.put("instantYear", new HashMap<String, Object>());
-    // knownUnmappedTemporalFields.put("instantDayOfYear", new HashMap<String, Object>());
-    // knownUnmappedTemporalFields.put("instantDayOfMonth", new HashMap<String, Object>());
-    // knownUnmappedTemporalFields.put("instantMonth", new HashMap<String, Object>());
-    // def knownUnmappedFields = new HashMap<String, Object>();
-    // knownUnmappedFields.put("temporalBounding", knownUnmappedTemporalFields);
-
     // ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
     //     .setAnalysis(
     //       Analysis.newBuilder().setTemporalBounding(
@@ -179,13 +290,9 @@ class TransformationUtilsSpec extends Specification {
 
             // def parsed = TransformationUtils.unfilteredAEMessage(record)
 
-    println("parsed "+JsonOutput.toJson(parsed))
     def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
-    println("pruned unampped? "+JsonOutput.toJson(pruned))
     def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
-    println("creates minus: "+JsonOutput.toJson(minus))
     def indexedRecord = DataUtils.removeFromMap(pruned, minus)
-    println("which results in indexing: "+ JsonOutput.toJson(indexedRecord))
 
     then:
     minus == [
@@ -227,8 +334,125 @@ class TransformationUtilsSpec extends Specification {
     indexedRecord.errors[0] == [nonsense:"horrible", // FIXME this is not actually desired
           source: "valid field"
         ]
+
   }
 
+    def "clean up nested map before indexing strictly mapped fields for analysis and errors (collection)"() { // TODO change to use reformatMessageFor method
+      when:
+      def parsed = [
+        identification: null,
+        internalParentIdentifier: null,
+        titles: null,
+        description: null,
+        dataAccess: null,
+        thumbnail: null,
+        temporalBounding: [
+          beginDescriptor: ValidDescriptor.VALID,
+          beginPrecision: ChronoUnit.DAYS.toString(),
+          beginIndexable: true,
+          beginZoneSpecified: null,
+          beginUtcDateTimeString: "2000-02-01",
+          beginYear: 2000,
+          beginDayOfYear: 32,
+          beginDayOfMonth: 1,
+          beginMonth: 2,
+          endDescriptor: null,
+          endPrecision: null,
+          endIndexable: null,
+          endZoneSpecified: null,
+          endUtcDateTimeString: null,
+          endYear: null,
+          endDayOfYear: null,
+          endDayOfMonth: null,
+          endMonth: null,
+          instantDescriptor: null,
+          instantPrecision: null,
+          instantIndexable: null,
+          instantZoneSpecified: null,
+          instantUtcDateTimeString: null,
+          instantYear: null,
+          instantDayOfYear: null,
+          instantDayOfMonth: null,
+          instantMonth: null,
+          rangeDescriptor: null,
+          fakeField: 123
+        ],
+        spatialBounding: null,
+        errors: [
+          [
+            nonsense: "horrible",
+            source: "valid field"
+          ]
+        ],
+        garbage:"nuke meeee"
+      ]
+
+      // ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
+      //     .setAnalysis(
+      //       Analysis.newBuilder().setTemporalBounding(
+      //       TemporalBoundingAnalysis.newBuilder()
+      //           .setBeginDescriptor(ValidDescriptor.VALID)
+      //           .setBeginIndexable(true)
+      //           .setBeginPrecision(ChronoUnit.DAYS.toString())
+      //           .setBeginZoneSpecified(null)
+      //           .setBeginUtcDateTimeString("2000-02-01")
+      //           .setBeginYear(2000)
+      //           .setBeginMonth(2)
+      //           .setBeginDayOfYear(32)
+      //           .setBeginDayOfMonth(1)
+      //           .build()
+      //           ).build()).build()
+
+              // def parsed = TransformationUtils.unfilteredAEMessage(record)
+
+      def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
+      def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_ERROR_AND_ANALYSIS_INDEX_ALIAS))
+      def indexedRecord = DataUtils.removeFromMap(pruned, minus)
+
+      then:
+      minus == [
+        internalParentIdentifier: null, // ok for granule, not collection
+        temporalBounding: [
+          fakeField: 123
+        ],
+        errors: [
+          [
+            nonsense: "horrible",
+          ]
+        ],
+        garbage:"nuke meeee"
+      ]
+
+      def expectedKeyset = ["identification", "titles", "description", "dataAccess", "thumbnail", "temporalBounding", "spatialBounding", "errors" ]
+      indexedRecord.keySet().size() == expectedKeyset.size()
+      indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
+
+      indexedRecord.temporalBounding == [
+          beginDescriptor: ValidDescriptor.VALID,
+          beginPrecision: ChronoUnit.DAYS.toString(),
+          beginIndexable: true,
+          beginZoneSpecified: null,
+          beginUtcDateTimeString: "2000-02-01",
+          endDescriptor: null,
+          endPrecision: null,
+          endIndexable: null,
+          endZoneSpecified: null,
+          endUtcDateTimeString: null,
+          instantDescriptor: null,
+          instantPrecision: null,
+          instantIndexable: null,
+          instantZoneSpecified: null,
+          instantUtcDateTimeString: null,
+          rangeDescriptor: null
+        ]
+
+      indexedRecord.errors.size() == 1
+      indexedRecord.errors[0] == [nonsense:"horrible", // FIXME this is not actually desired
+            source: "valid field"
+          ]
+
+    }
+
   ////////////////////////////////
   // Identifiers, "Names"       //
   ////////////////////////////////

From 3a8fcf0be01cf75e1dc11151928759e577ebe721 Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Fri, 5 Jun 2020 10:17:43 -0600
Subject: [PATCH 15/29] only add fields that should be there, transformed if
 needed

---
 .../onestop/indexer/util/IndexingInput.java   |  50 +-
 .../onestop/indexer/util/IndexingUtils.java   |   6 +-
 .../indexer/util/TransformationUtils.java     | 152 +--
 .../util/TransformationUtilsSpec.groovy       | 925 +++++++++++++-----
 4 files changed, 740 insertions(+), 393 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java
index a1f46e046..fcfe59659 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java
@@ -52,64 +52,26 @@ public String getTargetAnalysisAndErrorsIndex() {
     return esConfig.analysisAndErrorsAliasFromType(recordType.toString());
   }
 
-  public static Map<String, Object> getUnmappedAnalysisAndErrorsIndexFields() {
-    // this method is just to prevent us from logging warnings about fields in the analysis schema that we know and choose not to map
-    Map<String, Object> knownUnmappedTemporalFields = new HashMap<String, Object>();
-    knownUnmappedTemporalFields.put("beginYear", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("beginDayOfYear", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("beginDayOfMonth", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("beginMonth", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("endYear", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("endDayOfYear", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("endDayOfMonth", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("endMonth", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("instantYear", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("instantDayOfYear", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("instantDayOfMonth", new HashMap<String, Object>());
-    knownUnmappedTemporalFields.put("instantMonth", new HashMap<String, Object>());
-    Map<String, Object> knownUnmappedFields = new HashMap<String, Object>();
-    knownUnmappedFields.put("temporalBounding", knownUnmappedTemporalFields);
-    return knownUnmappedFields;
-  }
-
-  public Map<String, Map> getTargetSearchIndexFields() {
+  public Set<String> getTargetSearchIndexFields() {
     var searchAlias = esConfig.searchAliasFromType(recordType.toString());
     if(searchAlias != null) {
-      return esConfig.indexedProperties(searchAlias);
+      return esConfig.indexedProperties(searchAlias).keySet();
     }
     else {
-      return new HashMap<>();
+      return new HashSet<>();
     }
   }
 
-  public Map<String, Map> getTargetAnalysisAndErrorsIndexFields() {
+  public Set<String> getTargetAnalysisAndErrorsIndexFields() {
     var aeAlias = esConfig.analysisAndErrorsAliasFromType(recordType.toString());
     if(aeAlias != null) {
-      return esConfig.indexedProperties(aeAlias);
+      return esConfig.indexedProperties(aeAlias).keySet();
     }
     else {
-      return new HashMap<>();
+      return new HashSet<>();
     }
   }
 
-  // public Map<String, Object> getTargetAnalysisAndErrorsIndexMapping() {
-  //   var aeAlias = esConfig.analysisAndErrorsAliasFromType(recordType.toString());
-  //   if(aeAlias != null) {
-  //     return esConfig.indexedProperties(aeAlias);
-  //   }
-  //   else {
-  //     return new HashMap<>();
-  //   }
-  // }
-
-  // public static Map<String, Object> getNestedKeys(Map<String, Object> originalMap) {
-  //   if (keysToKeep == null || keysToKeep.size() == 0) {
-  //     return new HashMap<>();
-  //   }
-  //   return originalMap.entrySet().stream()
-  //       .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
-  // }
-
   @Override
   public String toString() {
     return "IndexingInput {" +
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
index e528071b8..01a9a14e9 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
@@ -64,8 +64,6 @@ public static DocWriteRequest<?> buildSearchWriteRequest(String indexName, DocWr
     }
     else {
       var formattedRecord = new HashMap<String, Object>();
-      // log.info("build search write request "+input.getValue().value()+ " and "+input.getTargetSearchIndexFields());
-      // log.info("transforms to "+TransformationUtils.reformatMessageForSearch(input.getValue().value(), input.getTargetSearchIndexFields()));
       formattedRecord.putAll(TransformationUtils.reformatMessageForSearch(input.getValue().value(), input.getTargetSearchIndexFields()));
       formattedRecord.put("stagedDate", input.getValue().timestamp());
       return new IndexRequest(indexName).opType(opType).id(input.getKey()).source(formattedRecord);
@@ -78,9 +76,7 @@ public static DocWriteRequest<?> buildAnalysisAndErrorWriteRequest(String indexN
     }
     else {
       var formattedRecord = new HashMap<String, Object>();
-      // log.info("build A&E write request "+input.getValue().value() +" and "+ input.getTargetAnalysisAndErrorsIndexFields());
-      // log.info("transforms to "+TransformationUtils.reformatMessageForAnalysisAndErrors(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields(), input.getUnmappedAnalysisAndErrorsIndexFields())); // TODO change this to pass the ES mapping in instead
-      formattedRecord.putAll(TransformationUtils.reformatMessageForAnalysisAndErrors(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields(), input.getUnmappedAnalysisAndErrorsIndexFields()));
+      formattedRecord.putAll(TransformationUtils.reformatMessageForAnalysisAndErrors(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields()));
       formattedRecord.put("stagedDate", input.getValue().timestamp());
       return new IndexRequest(indexName).opType(opType).id(input.getKey()).source(formattedRecord);
     }
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index f4d586b11..bf0f17d47 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -32,126 +32,76 @@ public class TransformationUtils {
   ///////////////////////////////////////////////////////////////////////////////
   //                     Indexing For Analysis & Errors                        //
   ///////////////////////////////////////////////////////////////////////////////
-  public static Map<String, Object> reformatMessageForAnalysisAndErrors(ParsedRecord record, Map<String, Map> targetFieldsMapping, Map<String, Object> knownUnmappedFields) {
+  public static Map<String, Object> reformatMessageForAnalysisAndErrors(ParsedRecord record, Set<String> fields) {
     var analysis = record.getAnalysis();
     var errors = record.getErrors();
 
     var analysisMap = AvroUtils.avroToMap(analysis, true);
-    analysisMap.put("internalParentIdentifier", prepareInternalParentIdentifier(record));
+    var message = new HashMap<String, Object>();
+
+    fields.forEach(field -> {
+      message.put(field, analysisMap.get(field));
+    });
+    if (fields.contains("internalParentIdentifier")) {
+      analysisMap.put("internalParentIdentifier", prepareInternalParentIdentifier(record));
+    }
     var errorsList = errors.stream()
         .map(e -> AvroUtils.avroToMap(e))
         .collect(Collectors.toList());
 
+    message.put("errors", errorsList);
 
-    analysisMap.put("errors", errorsList);
-
-    // drop fields not present in target index
-
-    var pruned = TransformationUtils.pruneKnownUnmappedFields(analysisMap, knownUnmappedFields);
-    var minus = TransformationUtils.identifyUnmappedFields(pruned, targetFieldsMapping); // TODO identify which it's going to
-    log.warn("The following fields were dropped when indexing to analysis and errors: " + minus);  // TODO "add for record `id`"
-    return DataUtils.removeFromMap(pruned, minus);
-  }
-
-  // public static Map<String, Object> unfilteredAEMessage(ParsedRecord record) {
-  //   var analysis = record.getAnalysis();
-  //   var errors = record.getErrors();
-  //
-  //   var analysisMap = AvroUtils.avroToMap(analysis, true);
-  //   analysisMap.put("internalParentIdentifier", prepareInternalParentIdentifier(record));
-  //   var errorsList = errors.stream()
-  //       .map(e -> AvroUtils.avroToMap(e))
-  //       .collect(Collectors.toList());
-  //
-  //
-  //   var garbageError = new LinkedHashMap<String, Object>();
-  //   garbageError.put("nonsense", "horrible");
-  //   garbageError.put("source", "valid field" );
-  //   errorsList.add(garbageError);
-  //
-  //
-  //   analysisMap.put("errors", errorsList);
-  //   analysisMap.put("garbage", "nuke meeee"); // FIXME
-  //   return analysisMap;
-  // }
-
-  public static Map<String, Object> pruneKnownUnmappedFields(Map<String, Object> analysisMap, Map<String, Object> unmappedFields) {
-
-    var result = new LinkedHashMap<String, Object>();
-    analysisMap.forEach((k, v) -> {
-      if (!unmappedFields.containsKey(k)) {
-        result.put(k, v);
-      } else {
-        Map<String, Object> nestedProperties = (Map<String, Object>)((Map<String, Object>)unmappedFields.get(k));
-
-        if (v instanceof Map) {
-          result.put(k, pruneKnownUnmappedFields((Map<String, Object>) v, nestedProperties));
-        } else if (v instanceof List) {
-          var list = ((List) v).stream().map(item -> pruneKnownUnmappedFields((Map<String, Object>) item, nestedProperties)).filter(item -> !((Map<String, Object>)item).isEmpty())
-                .collect(Collectors.toList());
-          result.put(k, list);
-        }
-      }
-    });
-    return result;
-  }
-
-  public static Map<String, Object> identifyUnmappedFields(Map<String, Object> analysisMap, Map<String, Map> mapping) {
-    var result = new LinkedHashMap<String, Object>();
-
-    if (mapping == null) {
-      return analysisMap;
-    }
-
-    analysisMap.forEach((k, v) -> {
-      if (!mapping.containsKey(k)) {
-        result.put(k, v);
-      } else {
-        Map<String, Map> nestedProperties = (Map<String, Map>)((Map<String, Map>)mapping.get(k)).get("properties"); // TODO assumes mapping is also a Map!
-
-        if (v instanceof Map) {
-          result.put(k, identifyUnmappedFields((Map<String, Object>) v, nestedProperties));
-        } else if (v instanceof List) {
-          var list = ((List) v).stream().filter(item -> item instanceof Map).map(item -> identifyUnmappedFields((Map<String, Object>) item, nestedProperties)).filter(item -> !((Map<String, Object>)item).isEmpty())
-                .collect(Collectors.toList());
-          result.put(k, list);
-        }
-      }
-    });
-    return result;
+    return message;
   }
 
   ///////////////////////////////////////////////////////////////////////////////
   //                          Indexing For Search                              //
   ///////////////////////////////////////////////////////////////////////////////
-  public static Map<String, Object> reformatMessageForSearch(ParsedRecord record, Map<String, Map> targetFieldsMapping) {
+  public static Map<String, Object> reformatMessageForSearch(ParsedRecord record, Set<String> fields) {
     var discovery = record.getDiscovery();
     var analysis = record.getAnalysis();
     var discoveryMap = AvroUtils.avroToMap(discovery, true);
 
+    var message = new HashMap<String, Object>();
+    fields.forEach(field -> {
+      message.put(field, discoveryMap.get(field));
+    });
     // prepare and apply fields that need to be reformatted for search
-    discoveryMap.putAll(prepareGcmdKeyword(discovery));
-    discoveryMap.putAll(prepareDates(discovery.getTemporalBounding(), analysis.getTemporalBounding()));
-    discoveryMap.put("dataFormat", prepareDataFormats(discovery));
-    discoveryMap.put("linkProtocol", prepareLinkProtocols(discovery));
-    discoveryMap.put("serviceLinks", prepareServiceLinks(discovery));
-    discoveryMap.put("serviceLinkProtocol", prepareServiceLinkProtocols(discovery));
-    discoveryMap.putAll(prepareResponsibleParties(record));
-    discoveryMap.put("internalParentIdentifier", prepareInternalParentIdentifier(record));
-    discoveryMap.put("filename", prepareFilename(record));
-    discoveryMap.put("checksums", prepareChecksums(record));
-
-    // drop fields not present in target index
-    // // FIXME
-    // var result = new LinkedHashMap<String, Object>(targetFieldsMapping.size());
-    // // targetFields.forEach(f -> result.put(f, discoveryMap.get(f)));
-    // return result;
-
-    // var pruned = TransformationUtils.pruneKnownUnmappedFields(discoveryMap, knownUnmappedFields);
-    var pruned = discoveryMap;
-    var minus = TransformationUtils.identifyUnmappedFields(pruned, targetFieldsMapping);
-    log.warn("The following fields were dropped when indexing to search: " + minus); // TODO "add for record `id`"
-    return DataUtils.removeFromMap(pruned, minus);
+    message.putAll(prepareGcmdKeyword(discovery));// TODO does this need and iff?
+    message.putAll(prepareDates(discovery.getTemporalBounding(), analysis.getTemporalBounding())); // TODO does this need and iff?
+    if (fields.contains("dataFormat")) {
+      message.put("dataFormat", prepareDataFormats(discovery));
+    }
+    if (fields.contains("linkProtocol")) {
+      message.put("linkProtocol", prepareLinkProtocols(discovery));
+    }
+    if (fields.contains("serviceLinks")) {
+      message.put("serviceLinks", prepareServiceLinks(discovery));
+    }
+    if (fields.contains("serviceLinkProtocol")) {
+      message.put("serviceLinkProtocol", prepareServiceLinkProtocols(discovery));
+    }
+    Map<String, Set<String>> responsibleParties = prepareResponsibleParties(record);
+    responsibleParties.forEach((key, value) -> {
+      if (fields.contains(key)) {
+        message.put(key, value);
+      }
+    });
+
+    if (fields.contains("internalParentIdentifier")) {
+      message.put("internalParentIdentifier", prepareInternalParentIdentifier(record));
+    }
+    if (fields.contains("filename")) {
+      message.put("filename", prepareFilename(record));
+    }
+    if (fields.contains("checksums")) {
+      message.put("checksums", prepareChecksums(record));
+      log.info("ZEB - including checksums (presumed granule)");
+    } else {
+      log.info("ZEB - excluding checksums (presumed collection)");
+    }
+
+    return message;
   }
 
   ////////////////////////////////
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index a1b95f592..83ad4575b 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -5,6 +5,8 @@ import org.cedar.schemas.analyze.Temporal
 import org.cedar.schemas.avro.psi.Analysis
 import org.cedar.schemas.avro.psi.TemporalBoundingAnalysis
 import org.cedar.schemas.avro.psi.ValidDescriptor
+import org.cedar.schemas.avro.psi.Checksum
+import org.cedar.schemas.avro.psi.ChecksumAlgorithm
 import org.cedar.schemas.avro.psi.Discovery
 import org.cedar.schemas.avro.psi.FileInformation
 import org.cedar.schemas.avro.psi.ParsedRecord
@@ -17,6 +19,8 @@ import spock.lang.Specification
 import spock.lang.Unroll
 
 import groovy.json.JsonOutput
+import groovy.json.JsonSlurper
+import org.cedar.schemas.avro.util.AvroUtils
 
 import static org.cedar.schemas.avro.util.TemporalTestData.getSituations
 
@@ -25,9 +29,9 @@ import org.cedar.onestop.kafka.common.util.DataUtils;
 @Unroll
 class TransformationUtilsSpec extends Specification {
 
-  static Map<String, Map> collectionFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS)
-  static Map<String, Map> granuleFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS)
-  static Map<String, Map> granuleAnalysisErrorFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS)
+  static Set<String> collectionFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS).keySet()
+  static Set<String> granuleFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS).keySet()
+  static Set<String> granuleAnalysisErrorFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS).keySet()
 
   static expectedKeywords = [
       "SIO > Super Important Organization",
@@ -93,6 +97,24 @@ class TransformationUtilsSpec extends Specification {
   //   'granule'     | granuleFields     | TestUtils.inputGranuleRecord
   // }
 
+  def "why is it complaining about checksums #label"() {
+    when:
+
+    ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
+      .setFileInformation(FileInformation.newBuilder().setChecksums([Checksum.newBuilder().setAlgorithm(ChecksumAlgorithm.MD5).setValue('abc').build()]).build()).build()
+
+    def indexedRecord = TransformationUtils.reformatMessageForSearch(record, fields)
+
+    then:
+
+    indexedRecord.keySet().contains("checksums") == shouldIncludeChecksums
+
+    where:
+    label | shouldIncludeChecksums | fields
+    'collections' | false | TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS).keySet()
+    'granules' | true | TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS).keySet()
+
+  }
 
   def "clean up nested map before indexing strictly mapped fields for search (granule)"() {
     when:
@@ -161,7 +183,7 @@ class TransformationUtilsSpec extends Specification {
 
 
     // def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
-    def indexedRecord = TransformationUtils.reformatMessageForSearch(record, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS))
+    def indexedRecord = TransformationUtils.reformatMessageForSearch(record, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS).keySet())
     // def indexedRecord = DataUtils.removeFromMap(pruned, minus)
 
     then:
@@ -177,14 +199,431 @@ class TransformationUtilsSpec extends Specification {
     //   garbage:"nuke meeee"
     // ]
 
-    def expectedKeyset = ["fileIdentifier", "parentIdentifier", "doi", "title", "description", "keywords", "topicCategories", "temporalBounding", "spatialBounding", "isGlobal", "acquisitionInstruments", "acquisitionOperations", "acquisitionPlatforms", "dataFormats", "links", "responsibleParties", "thumbnail", "citeAsStatements", "crossReferences", "largerWorks", "legalConstraints", "services", "gcmdVerticalResolution", "gcmdDataCenters", "gcmdTemporalResolution", "gcmdLocations", "gcmdScience", "beginDate", "endDate", "endDayOfYear", "beginYear", "endMonth", "endYear", "endDayOfMonth", "dataFormat", "linkProtocol", "serviceLinks", "serviceLinkProtocol", "internalParentIdentifier", "filename", "checksums"]
+    // def expectedKeyset = ["fileIdentifier", "parentIdentifier", "doi", "title", "description", "keywords", "topicCategories", "temporalBounding", "spatialBounding", "isGlobal", "acquisitionInstruments", "acquisitionOperations", "acquisitionPlatforms", "dataFormats", "links", "responsibleParties", "thumbnail", "citeAsStatements", "crossReferences", "largerWorks", "legalConstraints", "services", "gcmdVerticalResolution", "gcmdDataCenters", "gcmdTemporalResolution", "gcmdLocations", "gcmdScience", "beginDate", "endDate", "endDayOfYear", "beginYear", "endMonth", "endYear", "endDayOfMonth", "dataFormat", "linkProtocol", "serviceLinks", "serviceLinkProtocol", "internalParentIdentifier", "filename", "checksums"]
 
 
-    indexedRecord.keySet().size() == expectedKeyset.size()
-    indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
+    indexedRecord.keySet().size() == granuleFields.size()
+    indexedRecord.keySet().each({ assert granuleFields.contains(it) })
 
   }
 
+  // def "prune fields - spatial"() {
+  //   when:
+  //   def mapWithSpatial = [
+  //     spatialBounding: [
+  //     type: "MultiPolygon",
+	// 	coordinates: [
+	// 		[
+	// 			[
+	// 				[-180.0, -14.28],
+	// 				[-61.821, -14.28],
+	// 				[-61.821, 70.4],
+	// 				[-180.0, 70.4],
+	// 				[-180.0, -14.28]
+	// 			]
+	// 		],
+	// 		[
+	// 			[
+	// 				[144.657, -14.28],
+	// 				[180.0, -14.28],
+	// 				[180.0, 70.4],
+	// 				[144.657, 70.4],
+	// 				[144.657, -14.28]
+	// 			]
+	// 		]
+	// 	]
+  //     ]
+  //     ]
+  //   def minus = TransformationUtils.identifyUnmappedFields(mapWithSpatial, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS))
+  //
+  //   then:
+  //   minus == []
+  // }
+
+//   def "debug integration" () {
+//     when:
+//     def jsonrecord = (new JsonSlurper()).parseText("""{
+// 			"type": "collection",
+// 			"discovery": {
+// 				"fileIdentifier": "gov.noaa.nodc:NDBC-COOPS",
+// 				"parentIdentifier": null,
+// 				"hierarchyLevelName": null,
+// 				"doi": "doi:10.5072/FK2TEST",
+// 				"purpose": "Basic research",
+// 				"status": "completed",
+// 				"credit": null,
+// 				"title": "Coastal meteorological and water temperature data from National Water Level Observation Network (NWLON) and Physical Oceanographic Real-Time System (PORTS) stations of the NOAA Center for Operational Oceanographic Products and Services (CO-OPS)",
+// 				"alternateTitle": null,
+// 				"description": "The National Water Level Observation Network (NWLON) is a network of long-term water level stations operated and maintained by CO-OPS. NWLON stations are located on shore-based platforms, and primarily collect real-time water level measurements. As of January 2013, approximately 180 of 210 NWLON stations also collect real-time meteorological data. About 20 CO-OPS Physical Oceanographic Real-Time Systems (PORTS) comprise a group of water level stations, and 65 of these stations also collect real-time meteorological data. Data parameters include barometric pressure, wind direction, speed and gust, air temperature, and water temperature.",
+// 				"keywords": [{
+// 					"values": ["DOC/NOAA/NESDIS/NODC > National Oceanographic Data Center, NESDIS, NOAA, U.S. Department of Commerce", "DOC/NOAA/NESDIS/NCEI > National Centers for Environmental Information, NESDIS, NOAA, U.S. Department of Commerce"],
+// 					"type": "dataCenter",
+// 					"namespace": "GCMD Keywords - Data Centers"
+// 				}, {
+// 					"values": ["0107939", "0108059", "0109292", "0111163", "0112393", "0113250", "0113898", "0114473", "0115274", "0115910", "0116703", "0117348", "0117811", "0118682", "0120725", "0120726", "0122183", "0122220", "0123085", "0123363", "0124305", "0125493", "0126410", "0126781", "0127407", "0128443", "0129526", "0130004", "0131097", "0131931", "0137308", "0138303", "0139574", "0141136", "0144301", "0145770", "0148198", "0151779", "0154391", "0155989"],
+// 					"type": null,
+// 					"namespace": "NCEI ACCESSION NUMBER"
+// 				}, {
+// 					"values": ["AIR TEMPERATURE", "BAROMETRIC PRESSURE", "DEWPOINT", "RELATIVE HUMIDITY", "SEA SURFACE TEMPERATURE", "VISIBILITY", "WIND DIRECTION", "WIND GUST", "WIND SPEED"],
+// 					"type": "theme",
+// 					"namespace": "NODC DATA TYPES THESAURUS"
+// 				}, {
+// 					"values": ["anemometer", "barometers", "meteorological sensors", "thermistor"],
+// 					"type": "instrument",
+// 					"namespace": "NODC INSTRUMENT TYPES THESAURUS"
+// 				}, {
+// 					"values": ["meteorological", "physical"],
+// 					"type": "theme",
+// 					"namespace": "NODC OBSERVATION TYPES THESAURUS"
+// 				}, {
+// 					"values": ["FIXED PLATFORM"],
+// 					"type": "platform",
+// 					"namespace": "NODC PLATFORM NAMES THESAURUS"
+// 				}, {
+// 					"values": ["US DOC; NOAA; NOS; Center for Operational Oceanographic Products and Services"],
+// 					"type": "dataCenter",
+// 					"namespace": "NODC COLLECTING INSTITUTION NAMES THESAURUS"
+// 				}, {
+// 					"values": ["US DOC; NOAA; NWS; National Data Buoy Center"],
+// 					"type": "dataCenter",
+// 					"namespace": "NODC SUBMITTING INSTITUTION NAMES THESAURUS"
+// 				}, {
+// 					"values": ["National Water Level Observation Network (NWLON)", "Physical Oceanographic Real-Time System (PORTS)"],
+// 					"type": "project",
+// 					"namespace": "NODC PROJECT NAMES THESAURUS"
+// 				}, {
+// 					"values": ["Bay of Fundy", "Beaufort Sea", "Bering Sea", "Caribbean Sea", "Coastal waters of Alabama", "Coastal Waters of Florida", "Coastal Waters of Louisiana", "Coastal Waters of Mississippi", "Coastal Waters of Southeast Alaska and British Columbia", "Coastal Waters of Texas", "Florida Keys National Marine Sanctuary", "Great Lakes", "Gulf of Alaska", "Gulf of Mexico", "Kaneohe Bay", "Monterey Bay National Marine Sanctuary", "North Atlantic Ocean", "North Pacific Ocean", "Papahanaumokuakea Marine National Monument", "Philippine Sea", "San Diego Bay", "South Pacific Ocean", "Yaquina Bay"],
+// 					"type": "place",
+// 					"namespace": "NODC SEA AREA NAMES THESAURUS"
+// 				}, {
+// 					"values": ["oceanography"],
+// 					"type": "theme",
+// 					"namespace": "WMO_CategoryCode"
+// 				}, {
+// 					"values": ["GOVERNMENT AGENCIES-U.S. FEDERAL AGENCIES > DOC > NOAA > DOC/NOAA/NOS/CO-OPS > Center for Operational Oceanographic Products and Services, National Ocean Service, NOAA, U.S. Department of Commerce > http://tidesandcurrents.noaa.gov/", "GOVERNMENT AGENCIES-U.S. FEDERAL AGENCIES > DOC > NOAA > DOC/NOAA/NWS/NDBC > National Data Buoy Center, National Weather Service, NOAA, U.S. Department of Commerce > http://www.ndbc.noaa.gov/"],
+// 					"type": "dataCenter",
+// 					"namespace": "GCMD Keywords - Data Centers"
+// 				}, {
+// 					"values": ["EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC PRESSURE", "EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC TEMPERATURE", "EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC TEMPERATURE > SURFACE TEMPERATURE > DEW POINT TEMPERATURE", "EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC WATER VAPOR > HUMIDITY", "EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC WINDS > SURFACE WINDS > WIND SPEED/WIND DIRECTION", "EARTH SCIENCE > OCEANS > OCEAN OPTICS", "EARTH SCIENCE > OCEANS > OCEAN TEMPERATURE > SEA SURFACE TEMPERATURE"],
+// 					"type": "theme",
+// 					"namespace": "GCMD Keywords - Science Keywords"
+// 				}, {
+// 					"values": ["In Situ/Laboratory Instruments > Current/Wind Meters > ANEMOMETERS", "In Situ/Laboratory Instruments > Pressure/Height Meters > BAROMETERS", "In Situ/Laboratory Instruments > Temperature/Humidity Sensors > Thermistors > THERMISTORS"],
+// 					"type": "instrument",
+// 					"namespace": "GCMD Keywords - Instruments"
+// 				}, {
+// 					"values": ["air_pressure_at_sea_level", "air_temperature", "dew_point_temperature", "relative_humidity", "sea_surface_temperature", "time", "visibility_in_air", "wind_from_direction", "wind_speed", "wind_speed_of_gust"],
+// 					"type": "theme",
+// 					"namespace": "NetCDF Climate and Forecast (CF) Metadata Convention Standard Name Table"
+// 				}, {
+// 					"values": ["air_temperature_sensor", "anemometer", "barometer", "ct_sensor", "humidity_sensor", "ocean_temperature_sensor", "visibility_sensor"],
+// 					"type": "instrument",
+// 					"namespace": "NOS SENSOR THESAURUS"
+// 				}, {
+// 					"values": ["1611400 - NWWH1", "1612340 - OOUH1", "1612480 - MOKH1", "1615680 - KLIH1", "1617433 - KWHH1", "1617760 - ILOH1", "1619910 - SNDP5", "1630000 - APRP7", "1631428 - PGBP7", "1770000 - NSTP6", "1820000 - KWJP8", "1890000 - WAKP8", "2695540 - BEPB6", "8311030 - OBGN6", "8311062 - ALXN6", "8410140 - PSBM1", "8411060 - CFWM1", "8413320 - ATGM1", "8418150 - CASM1", "8419317 - WELM1", "8443970 - BHBM3", "8447386 - FRVM3", "8447387 - BLTM3", "8447412 - FRXM3", "8447930 - BZBM3", "8449130 - NTKM3", "8452660 - NWPR1", "8452944 - CPTR1", "8452951 - PTCR1", "8454000 - FOXR1", "8454049 - QPTR1", "8461490 - NLNC3", "8465705 - NWHC3", "8467150 - BRHC3", "8510560 - MTKN6", "8516945 - KPTN6", "8518750 - BATN6", "8519483 - BGNN4", "8519532 - MHRN6", "8530973 - ROBN4", "8531680 - SDHN4", "8534720 - ACYN4", "8536110 - CMAN4", "8537121 - SJSN4", "8538886 - TPBN4", "8539094 - BDRN4", "8540433 - MRCP1", "8545240 - PHBP1", "8548989 - NBLP1", "8551762 - DELD1", "8551910 - RDYD1", "8557380 - LWSD1", "8570283 - OCIM2", "8571421 - BISM2", "8571892 - CAMM2", "8573364 - TCBM2", "8573927 - CHCM2", "8574680 - BLTM2", "8574728 - FSKM2", "8575512 - APAM2", "8577018 - COVM2", "8577330 - SLIM2", "8578240 - PPTM2", "8594900 - WASD2", "8631044 - WAHV2", "8632200 - KPTV2", "8632837 - RPLV2", "8635027 - NCDV2", "8635750 - LWTV2", "8637611 - YKRV2", "8637689 - YKTV2", "8638511 - DOMV2", "8638595 - CRYV2", "8638610 - SWPV2", "8638614 - WDSV2", "8638863 - CBBV2", "8638999 - CHYV2", "8639348 - MNPV2", "8651370 - DUKN7", "8652587 - ORIN7", "8654467 - HCGN7", "8656483 - BFTN7", "8658120 - WLON7", "8658163 - JMPN7", "8661070 - MROS1", "8665530 - CHTS1", "8670870 - FPKG1", "8720030 - FRDF1", "8720215 - NFDF1", "8720218 - MYPF1", "8720219 - DMSF1", "8720228 - LTJF1", "8720233 - BLIF1", "8720245 - JXUF1", "8720357 - BKBF1", "8720503 - GCVF1", "8721604 - TRDF1", "8722670 - LKWF1", "8723214 - VAKF1", "8723970 - VCAF1", "8724580 - KYWF1", "8725110 - NPSF1", "8725520 - FMRF1", "8726384 - PMAF1", "8726412 - MTBF1", "8726520 - SAPF1", "8726607 - OPTF1", "8726667 - MCYF1", "8726669 - ERTF1", "8726673 - SBLF1", "8726679 - TSHF1", "8726694 - TPAF1", "8726724 - CWBF1", "8727520 - CKYF1", "8728690 - APCF1", "8729108 - PACF1", "8729210 - PCBF1", "8729840 - PCLF1", "8732828 - WBYA1", "8734673 - FMOA1", "8735180 - DILA1", "8736163 - MBPA1", "8736897 - MCGA1", "8737005 - PTOA1", "8737048 - OBLA1", "8741003 - PTBM6", "8741041 - ULAM6", "8741094 - RARM6", "8741501 - DKCM6", "8741533 - PNLM6", "8747437 - WYCM6", "8760721 - PILL1", "8760922 - PSTL1", "8761305 - SHBL1", "8761724 - GISL1", "8761927 - NWCL1", "8761955 - CARL1", "8762482 - BYGL1", "8762484 - FREL1", "8764044 - TESL1", "8764227 - AMRL1", "8764314 - EINL1", "8766072 - FRWL1", "8767816 - LCLL1", "8767961 - BKTL1", "8768094 - CAPL1", "8770570 - SBPT2", "8770613 - MGPT2", "8770822 - TXPT2", "8771013 - EPTT2", "8771341 - GNJT2", "8771450 - GTOT2", "8772447 - FCGT2", "8774770 - RCPT2", "8775870 - MQTT2", "8779770 - PTIT2", "9014070 - AGCM4", "9014090 - MBRM4", "9014098 - FTGM4", "9052030 - OSGN6", "9052058 - RCRN6", "9063012 - NIAN6", "9063020 - BUFN6", "9063028 - PSTN6", "9063038 - EREP1", "9063053 - FAIO1", "9063063 - CNDO1", "9063079 - MRHO1", "9063085 - THRO1", "9075014 - HRBM4", "9075065 - LPNM4", "9075080 - MACM4", "9075099 - DTLM4", "9076024 - RCKM4", "9076027 - WNEM4", "9076033 - LTRM4", "9076070 - SWPM4", "9087023 - LDTM4", "9087031 - HLNM4", "9087044 - CMTI2", "9087069 - KWNW3", "9087088 - MNMM4", "9087096 - PNLM4", "9099004 - PTIM4", "9099018 - MCGM4", "9099064 - DULM5", "9099090 - GDMM5", "9410170 - SDBC1", "9410172 - IIWC1", "9410230 - LJAC1", "9410660 - OHBC1", "9410665 - PRJC1", "9410670 - PFXC1", "9410840 - ICAC1", "9411340 - NTBC1", "9411406 - HRVC1", "9412110 - PSLC1", "9413450 - MTYC1", "9414290 - FTPC1", "9414296 - PXSC1", "9414311 - PXOC1", "9414523 - RTYC1", "9414750 - AAMC1", "9414763 - LNDC1", "9414769 - OMHC1", "9414776 - OKXC1", "9414797 - OBXC1", "9414847 - PPXC1", "9414863 - RCMC1", "9415020 - PRYC1", "9415102 - MZXC1", "9415115 - PSBC1", "9415118 - UPBC1", "9415141 - DPXC1", "9415144 - PCOC1", "9416841 - ANVC1", "9418767 - HBYC1", "9419750 - CECC1", "9431647 - PORO3", "9432780 - CHAO3", "9435380 - SBEO3", "9437540 - TLBO3", "9439011 - HMDO3", "9439040 - ASTO3", "9440422 - LOPW1", "9440910 - TOKW1", "9441102 - WPTW1", "9442396 - LAPW1", "9443090 - NEAW1", "9444090 - PTAW1", "9444900 - PTWW1", "9446482 - TCMW1", "9446484 - TCNW1", "9447130 - EBSW1", "9449424 - CHYW1", "9449880 - FRDW1", "9450460 - KECA2", "9451054 - PLXA2", "9451600 - ITKA2", "9452210 - JNEA2", "9452400 - SKTA2", "9452634 - ELFA2", "9453220 - YATA2", "9454050 - CRVA2", "9454240 - VDZA2", "9455090 - SWLA2", "9455500 - OVIA2", "9455760 - NKTA2", "9455920 - ANTA2", "9457292 - KDAA2", "9457804 - ALIA2", "9459450 - SNDA2", "9459881 - KGCA2", "9461380 - ADKA2", "9461710 - ATKA2", "9462450 - OLSA2", "9462620 - UNLA2", "9463502 - PMOA2", "9464212 - VCVA2", "9468756 - NMTA2", "9491094 - RDDA2", "9497645 - PRDA2", "9751364 - CHSV3", "9751381 - LAMV3", "9751401 - LTBV3", "9751639 - CHAV3", "9752695 - ESPP4", "9755371 - SJNP4", "9759110 - MGIP4", "9759394 - MGZP4", "9759412 - AUDP4", "9759938 - MISP4", "9761115 - BARA9"],
+// 					"type": "platform",
+// 					"namespace": "NOS - NWSLI PLATFORM THESAURUS"
+// 				}, {
+// 					"values": ["CONTINENT > NORTH AMERICA > CANADA > GREAT LAKES, CANADA", "CONTINENT > NORTH AMERICA > UNITED STATES OF AMERICA > GREAT LAKES", "OCEAN > ARCTIC OCEAN > BEAUFORT SEA", "OCEAN > ATLANTIC OCEAN > NORTH ATLANTIC OCEAN", "OCEAN > ATLANTIC OCEAN > NORTH ATLANTIC OCEAN > BAY OF FUNDY", "OCEAN > ATLANTIC OCEAN > NORTH ATLANTIC OCEAN > CARIBBEAN SEA", "OCEAN > ATLANTIC OCEAN > NORTH ATLANTIC OCEAN > GULF OF MEXICO", "OCEAN > PACIFIC OCEAN > CENTRAL PACIFIC OCEAN > HAWAIIAN ISLANDS", "OCEAN > PACIFIC OCEAN > NORTH PACIFIC OCEAN", "OCEAN > PACIFIC OCEAN > NORTH PACIFIC OCEAN > BERING SEA", "OCEAN > PACIFIC OCEAN > NORTH PACIFIC OCEAN > GULF OF ALASKA", "OCEAN > PACIFIC OCEAN > SOUTH PACIFIC OCEAN"],
+// 					"type": "place",
+// 					"namespace": "GCMD Keywords - Locations"
+// 				}],
+// 				"topicCategories": ["environment", "oceans", "climatologyMeteorologyAtmosphere"],
+// 				"temporalBounding": {
+// 					"beginDate": "2013-03-01",
+// 					"beginIndeterminate": null,
+// 					"endDate": null,
+// 					"endIndeterminate": "now",
+// 					"instant": null,
+// 					"instantIndeterminate": null,
+// 					"description": null
+// 				},
+// 				"spatialBounding": {
+// 					"type": "MultiPolygon",
+// 					"coordinates": [
+// 						[
+// 							[
+// 								[-180.0, -14.28],
+// 								[-61.821, -14.28],
+// 								[-61.821, 70.4],
+// 								[-180.0, 70.4],
+// 								[-180.0, -14.28]
+// 							]
+// 						],
+// 						[
+// 							[
+// 								[144.657, -14.28],
+// 								[180.0, -14.28],
+// 								[180.0, 70.4],
+// 								[144.657, 70.4],
+// 								[144.657, -14.28]
+// 							]
+// 						]
+// 					]
+// 				},
+// 				"isGlobal": false,
+// 				"acquisitionInstruments": [],
+// 				"acquisitionOperations": [],
+// 				"acquisitionPlatforms": [],
+// 				"dataFormats": [{
+// 					"name": "ORIGINATOR DATA FORMAT",
+// 					"version": null
+// 				}],
+// 				"links": [{
+// 					"linkName": "Descriptive Information",
+// 					"linkProtocol": "HTTP",
+// 					"linkUrl": "http://data.nodc.noaa.gov/cgi-bin/iso?id=gov.noaa.nodc:NDBC-COOPS",
+// 					"linkDescription": "Navigate directly to the URL for a descriptive web page with download links.",
+// 					"linkFunction": "information"
+// 				}, {
+// 					"linkName": "Granule Search",
+// 					"linkProtocol": "HTTP",
+// 					"linkUrl": "http://www.nodc.noaa.gov/search/granule/rest/find/document?searchText=fileIdentifier%3ACO-OPS*&start=1&max=100&expandResults=true&f=searchPage",
+// 					"linkDescription": "Granule Search",
+// 					"linkFunction": "search"
+// 				}, {
+// 					"linkName": "THREDDS",
+// 					"linkProtocol": "THREDDS",
+// 					"linkUrl": "http://data.nodc.noaa.gov/thredds/catalog/ndbc/co-ops/",
+// 					"linkDescription": "These data are available through a variety of services via a THREDDS (Thematic Real-time Environmental Distributed Data Services) Data Server (TDS). Depending on the dataset, the TDS can provide WMS, WCS, DAP, HTTP, and other data access and metadata services as well. For more information on the TDS, see http://www.unidata.ucar.edu/software/thredds/current/tds/.",
+// 					"linkFunction": "download"
+// 				}, {
+// 					"linkName": "OPeNDAP",
+// 					"linkProtocol": "DAP",
+// 					"linkUrl": "http://data.nodc.noaa.gov/opendap/ndbc/co-ops/",
+// 					"linkDescription": "These data are available through the Data Access Protocol (DAP) via an OPeNDAP Hyrax server. For a listing of OPeNDAP clients which may be used to access OPeNDAP-enabled data sets, please see the OPeNDAP website at http://opendap.org/.",
+// 					"linkFunction": "download"
+// 				}, {
+// 					"linkName": "HTTP",
+// 					"linkProtocol": "HTTP",
+// 					"linkUrl": "http://data.nodc.noaa.gov/ndbc/co-ops/",
+// 					"linkDescription": "Navigate directly to the URL for data access and direct download.",
+// 					"linkFunction": "download"
+// 				}, {
+// 					"linkName": "FTP",
+// 					"linkProtocol": "FTP",
+// 					"linkUrl": "ftp://ftp.nodc.noaa.gov/pub/data.nodc/ndbc/co-ops/",
+// 					"linkDescription": "These data are available through the File Transfer Protocol (FTP). You may use any FTP client to download these data.",
+// 					"linkFunction": "download"
+// 				}],
+// 				"responsibleParties": [{
+// 					"individualName": null,
+// 					"organizationName": "DOC/NOAA/NESDIS/NCEI > National Centers for Environmental Information, NESDIS, NOAA, U.S. Department of Commerce",
+// 					"positionName": null,
+// 					"role": "publisher",
+// 					"email": "NODC.DataOfficer@noaa.gov",
+// 					"phone": "301-713-3277"
+// 				}, {
+// 					"individualName": null,
+// 					"organizationName": "DOC/NOAA/NESDIS/NODC > National Oceanographic Data Center, NESDIS, NOAA, U.S. Department of Commerce",
+// 					"positionName": null,
+// 					"role": "publisher",
+// 					"email": "NODC.DataOfficer@noaa.gov",
+// 					"phone": "301-713-3277"
+// 				}, {
+// 					"individualName": "Rex V Hervey",
+// 					"organizationName": "US DOC; NOAA; NWS; National Data Buoy Center (NDBC)",
+// 					"positionName": null,
+// 					"role": "resourceProvider",
+// 					"email": "rex.hervey@noaa.gov",
+// 					"phone": "228-688-3007"
+// 				}, {
+// 					"individualName": null,
+// 					"organizationName": "US DOC; NOAA; NWS; National Data Buoy Center (NDBC)",
+// 					"positionName": null,
+// 					"role": "resourceProvider",
+// 					"email": null,
+// 					"phone": null
+// 				}, {
+// 					"individualName": null,
+// 					"organizationName": "DOC/NOAA/NESDIS/NCEI > National Centers for Environmental Information, NESDIS, NOAA, U.S. Department of Commerce",
+// 					"positionName": null,
+// 					"role": "pointOfContact",
+// 					"email": "NCEI.Info@noaa.gov",
+// 					"phone": "301-713-3277"
+// 				}, {
+// 					"individualName": null,
+// 					"organizationName": "Global Change Data Center, Science and Exploration Directorate, Goddard Space Flight Center (GSFC) National Aeronautics and Space Administration (NASA)",
+// 					"positionName": null,
+// 					"role": "custodian",
+// 					"email": null,
+// 					"phone": null
+// 				}],
+// 				"thumbnail": "http://data.nodc.noaa.gov/cgi-bin/gfx?id=gov.noaa.nodc:NDBC-COOPS",
+// 				"thumbnailDescription": "Preview graphic",
+// 				"creationDate": null,
+// 				"revisionDate": null,
+// 				"publicationDate": "2013-06-05",
+// 				"citeAsStatements": ["Cite as: Hervey, R. V. and US DOC; NOAA; NWS; National Data Buoy Center (2013). Coastal meteorological and water temperature data from National Water Level Observation Network (NWLON) and Physical Oceanographic Real-Time System (PORTS) stations of the NOAA Center for Operational Oceanographic Products and Services (CO-OPS). National Oceanographic Data Center, NOAA. Dataset. [access date]"],
+// 				"crossReferences": [],
+// 				"largerWorks": [],
+// 				"useLimitation": "accessLevel: Public",
+// 				"legalConstraints": ["Cite as: Hervey, R. V. and US DOC; NOAA; NWS; National Data Buoy Center (2013). Coastal meteorological and water temperature data from National Water Level Observation Network (NWLON) and Physical Oceanographic Real-Time System (PORTS) stations of the NOAA Center for Operational Oceanographic Products and Services (CO-OPS). National Oceanographic Data Center, NOAA. Dataset. [access date]", "NOAA and NCEI cannot provide any warranty as to the accuracy, reliability, or completeness of furnished data. Users assume responsibility to determine the usability of these data. The user is responsible for the results of any application of this data for other than its intended purpose."],
+// 				"accessFeeStatement": null,
+// 				"orderingInstructions": null,
+// 				"edition": null,
+// 				"dsmmAccessibility": 0,
+// 				"dsmmDataIntegrity": 0,
+// 				"dsmmDataQualityAssessment": 0,
+// 				"dsmmDataQualityAssurance": 0,
+// 				"dsmmDataQualityControlMonitoring": 0,
+// 				"dsmmPreservability": 0,
+// 				"dsmmProductionSustainability": 0,
+// 				"dsmmTransparencyTraceability": 0,
+// 				"dsmmUsability": 0,
+// 				"dsmmAverage": 0.0,
+// 				"updateFrequency": "asNeeded",
+// 				"presentationForm": "tableDigital",
+// 				"services": []
+// 			},
+// 			"analysis": {
+// 				"identification": {
+// 					"fileIdentifierExists": true,
+// 					"fileIdentifierString": "gov.noaa.nodc:NDBC-COOPS",
+// 					"doiExists": true,
+// 					"doiString": "doi:10.5072/FK2TEST",
+// 					"parentIdentifierExists": false,
+// 					"parentIdentifierString": null,
+// 					"hierarchyLevelNameExists": false,
+// 					"isGranule": false
+// 				},
+// 				"titles": {
+// 					"titleExists": true,
+// 					"titleCharacters": 244,
+// 					"alternateTitleExists": false,
+// 					"alternateTitleCharacters": 0,
+// 					"titleFleschReadingEaseScore": -15.662258064516124,
+// 					"alternateTitleFleschReadingEaseScore": null,
+// 					"titleFleschKincaidReadingGradeLevel": 23.14516129032258,
+// 					"alternateTitleFleschKincaidReadingGradeLevel": null
+// 				},
+// 				"description": {
+// 					"descriptionExists": true,
+// 					"descriptionCharacters": 642,
+// 					"descriptionFleschReadingEaseScore": 24.320808988764043,
+// 					"descriptionFleschKincaidReadingGradeLevel": 14.289078651685397
+// 				},
+// 				"dataAccess": {
+// 					"dataAccessExists": true
+// 				},
+// 				"thumbnail": {
+// 					"thumbnailExists": true
+// 				},
+// 				"temporalBounding": {
+// 					"beginDescriptor": "VALID",
+// 					"beginPrecision": "Days",
+// 					"beginIndexable": true,
+// 					"beginZoneSpecified": null,
+// 					"beginUtcDateTimeString": "2013-03-01T00:00:00Z",
+// 					"beginYear": 2013,
+// 					"beginDayOfYear": 60,
+// 					"beginDayOfMonth": 1,
+// 					"beginMonth": 3,
+// 					"endDescriptor": "UNDEFINED",
+// 					"endPrecision": null,
+// 					"endIndexable": true,
+// 					"endZoneSpecified": null,
+// 					"endUtcDateTimeString": null,
+// 					"endYear": null,
+// 					"endDayOfYear": null,
+// 					"endDayOfMonth": null,
+// 					"endMonth": null,
+// 					"instantDescriptor": "UNDEFINED",
+// 					"instantPrecision": null,
+// 					"instantIndexable": true,
+// 					"instantZoneSpecified": null,
+// 					"instantUtcDateTimeString": null,
+// 					"instantYear": null,
+// 					"instantDayOfYear": null,
+// 					"instantDayOfMonth": null,
+// 					"instantMonth": null,
+// 					"rangeDescriptor": "ONGOING"
+// 				},
+// 				"spatialBounding": {
+// 					"spatialBoundingExists": true,
+// 					"isValid": true,
+// 					"validationError": null
+// 				}
+// 			},
+// 			"fileInformation": null,
+// 			"fileLocations": {},
+// 			"publishing": {
+// 				"isPrivate": false,
+// 				"until": null
+// 			},
+// 			"relationships": [],
+// 			"errors": []
+// 		}""")
+//     def record = AvroUtils.mapToAvro((Map)jsonrecord, ParsedRecord)
+//
+//     // println("zeb "+JsonOutput.toJson(parsed))
+//     println("ZEB")
+//     println(record)
+//     def discovery = record.getDiscovery();
+//     def analysis = record.getAnalysis();
+//     def discoveryMap = AvroUtils.avroToMap(discovery, true);
+//
+//     // prepare and apply fields that need to be reformatted for search
+//     discoveryMap.putAll(TransformationUtils.prepareGcmdKeyword(discovery));
+//     discoveryMap.putAll(TransformationUtils.prepareDates(discovery.getTemporalBounding(), analysis.getTemporalBounding()));
+//     discoveryMap.put("dataFormat", TransformationUtils.prepareDataFormats(discovery));
+//     discoveryMap.put("linkProtocol", TransformationUtils.prepareLinkProtocols(discovery));
+//     discoveryMap.put("serviceLinks", TransformationUtils.prepareServiceLinks(discovery));
+//     discoveryMap.put("serviceLinkProtocol", TransformationUtils.prepareServiceLinkProtocols(discovery));
+//     discoveryMap.putAll(TransformationUtils.prepareResponsibleParties(record));
+//     discoveryMap.put("internalParentIdentifier", TransformationUtils.prepareInternalParentIdentifier(record));
+//     discoveryMap.put("filename", TransformationUtils.prepareFilename(record));
+//     discoveryMap.put("checksums", TransformationUtils.prepareChecksums(record));
+//
+//     def pruned = TransformationUtils.pruneKnownUnmappedFields(discoveryMap, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
+//     def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS))
+//     // def indexedRecord = DataUtils.removeFromMap(pruned, minus)
+//
+// println(JsonOutput.toJson(pruned))
+// println(JsonOutput.toJson(minus))
+//     then:
+//     pruned == []
+//     minus == [
+//       internalParentIdentifier: null, // ok for granule, not collection
+//       temporalBounding: [
+//         fakeField: 123
+//       ],
+//       errors: [
+//         [
+//           nonsense: "horrible",
+//         ]
+//       ],
+//       garbage:"nuke meeee"
+//     ]
+//     //
+//     // def expectedKeyset = ["identification", "titles", "description", "dataAccess", "thumbnail", "temporalBounding", "spatialBounding", "errors" ]
+//     // indexedRecord.keySet().size() == expectedKeyset.size()
+//     // indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
+//     //
+//     // indexedRecord.temporalBounding == [
+//     //     beginDescriptor: ValidDescriptor.VALID,
+//     //     beginPrecision: ChronoUnit.DAYS.toString(),
+//     //     beginIndexable: true,
+//     //     beginZoneSpecified: null,
+//     //     beginUtcDateTimeString: "2000-02-01",
+//     //     endDescriptor: null,
+//     //     endPrecision: null,
+//     //     endIndexable: null,
+//     //     endZoneSpecified: null,
+//     //     endUtcDateTimeString: null,
+//     //     instantDescriptor: null,
+//     //     instantPrecision: null,
+//     //     instantIndexable: null,
+//     //     instantZoneSpecified: null,
+//     //     instantUtcDateTimeString: null,
+//     //     rangeDescriptor: null
+//     //   ]
+//     //
+//     // indexedRecord.errors.size() == 1
+//     // indexedRecord.errors[0] == [nonsense:"horrible", // FIXME this is not actually desired
+//     //       source: "valid field"
+//     //     ]
+//   }
 
   def "clean up nested map before indexing strictly mapped fields for search (collection)"() {
     when:
@@ -207,251 +646,251 @@ class TransformationUtilsSpec extends Specification {
 
 
     // def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
-    def indexedRecord = TransformationUtils.reformatMessageForSearch(record, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS))
+    def indexedRecord = TransformationUtils.reformatMessageForSearch(record, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS).keySet())
     // def indexedRecord = DataUtils.removeFromMap(pruned, minus)
 
     then:
+    //
+    // def expectedKeyset = ["fileIdentifier", "parentIdentifier", "doi", "title", "description", "keywords", "topicCategories", "temporalBounding", "spatialBounding", "isGlobal", "acquisitionInstruments", "acquisitionOperations", "acquisitionPlatforms", "dataFormats", "links", "responsibleParties", "thumbnail", "citeAsStatements", "crossReferences", "largerWorks", "useLimitation", "legalConstraints", "accessFeeStatement", "orderingInstructions", "edition", "dsmmAverage", "services", "gcmdVerticalResolution", "gcmdDataCenters", "gcmdTemporalResolution", "gcmdLocations", "gcmdScience", "beginDate", "endDate", "endDayOfYear", "beginYear", "endMonth", "endYear", "endDayOfMonth", "dataFormat", "linkProtocol", "serviceLinks", "serviceLinkProtocol", "organizationNames",
+    // "individualNames", "checksums"]
 
-    def expectedKeyset = ["fileIdentifier", "parentIdentifier", "doi", "title", "description", "keywords", "topicCategories", "temporalBounding", "spatialBounding", "isGlobal", "acquisitionInstruments", "acquisitionOperations", "acquisitionPlatforms", "dataFormats", "links", "responsibleParties", "thumbnail", "citeAsStatements", "crossReferences", "largerWorks", "useLimitation", "legalConstraints", "accessFeeStatement", "orderingInstructions", "edition", "dsmmAverage", "services", "gcmdVerticalResolution", "gcmdDataCenters", "gcmdTemporalResolution", "gcmdLocations", "gcmdScience", "beginDate", "endDate", "endDayOfYear", "beginYear", "endMonth", "endYear", "endDayOfMonth", "dataFormat", "linkProtocol", "serviceLinks", "serviceLinkProtocol", "organizationNames",
-    "individualNames", "checksums"]
-
-
-    indexedRecord.keySet().size() == expectedKeyset.size()
-    expectedKeyset.each({ assert indexedRecord.keySet().contains(it) })
-    indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
-
-  }
-
-  def "clean up nested map before indexing strictly mapped fields for analysis and errors (granule)"() { // TODO change to use reformatMessageFor method
-    when:
-    def parsed = [
-      identification: null,
-      internalParentIdentifier: null,
-      titles: null,
-      description: null,
-      dataAccess: null,
-      thumbnail: null,
-      temporalBounding: [
-        beginDescriptor: ValidDescriptor.VALID,
-        beginPrecision: ChronoUnit.DAYS.toString(),
-        beginIndexable: true,
-        beginZoneSpecified: null,
-        beginUtcDateTimeString: "2000-02-01",
-        beginYear: 2000,
-        beginDayOfYear: 32,
-        beginDayOfMonth: 1,
-        beginMonth: 2,
-        endDescriptor: null,
-        endPrecision: null,
-        endIndexable: null,
-        endZoneSpecified: null,
-        endUtcDateTimeString: null,
-        endYear: null,
-        endDayOfYear: null,
-        endDayOfMonth: null,
-        endMonth: null,
-        instantDescriptor: null,
-        instantPrecision: null,
-        instantIndexable: null,
-        instantZoneSpecified: null,
-        instantUtcDateTimeString: null,
-        instantYear: null,
-        instantDayOfYear: null,
-        instantDayOfMonth: null,
-        instantMonth: null,
-        rangeDescriptor: null,
-        fakeField: 123
-      ],
-      spatialBounding: null,
-      errors: [
-        [
-          nonsense: "horrible",
-          source: "valid field"
-        ]
-      ],
-      garbage:"nuke meeee"
-    ]
-
-    // ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
-    //     .setAnalysis(
-    //       Analysis.newBuilder().setTemporalBounding(
-    //       TemporalBoundingAnalysis.newBuilder()
-    //           .setBeginDescriptor(ValidDescriptor.VALID)
-    //           .setBeginIndexable(true)
-    //           .setBeginPrecision(ChronoUnit.DAYS.toString())
-    //           .setBeginZoneSpecified(null)
-    //           .setBeginUtcDateTimeString("2000-02-01")
-    //           .setBeginYear(2000)
-    //           .setBeginMonth(2)
-    //           .setBeginDayOfYear(32)
-    //           .setBeginDayOfMonth(1)
-    //           .build()
-    //           ).build()).build()
-
-            // def parsed = TransformationUtils.unfilteredAEMessage(record)
-
-    def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
-    def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
-    def indexedRecord = DataUtils.removeFromMap(pruned, minus)
 
-    then:
-    minus == [
-      temporalBounding: [
-        fakeField: 123
-      ],
-      errors: [
-        [
-          nonsense: "horrible",
-        ]
-      ],
-      garbage:"nuke meeee"
-    ]
-
-    def expectedKeyset = ["identification", "titles", "description", "dataAccess", "thumbnail", "temporalBounding", "spatialBounding", "internalParentIdentifier", "errors" ]
-    indexedRecord.keySet().size() == expectedKeyset.size()
-    indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
-
-    indexedRecord.temporalBounding == [
-        beginDescriptor: ValidDescriptor.VALID,
-        beginPrecision: ChronoUnit.DAYS.toString(),
-        beginIndexable: true,
-        beginZoneSpecified: null,
-        beginUtcDateTimeString: "2000-02-01",
-        endDescriptor: null,
-        endPrecision: null,
-        endIndexable: null,
-        endZoneSpecified: null,
-        endUtcDateTimeString: null,
-        instantDescriptor: null,
-        instantPrecision: null,
-        instantIndexable: null,
-        instantZoneSpecified: null,
-        instantUtcDateTimeString: null,
-        rangeDescriptor: null
-      ]
-
-    indexedRecord.errors.size() == 1
-    indexedRecord.errors[0] == [nonsense:"horrible", // FIXME this is not actually desired
-          source: "valid field"
-        ]
+    indexedRecord.keySet().size() == collectionFields.size()
+    collectionFields.each({ assert indexedRecord.keySet().contains(it) })
+    indexedRecord.keySet().each({ assert collectionFields.contains(it) })
 
   }
 
-    def "clean up nested map before indexing strictly mapped fields for analysis and errors (collection)"() { // TODO change to use reformatMessageFor method
-      when:
-      def parsed = [
-        identification: null,
-        internalParentIdentifier: null,
-        titles: null,
-        description: null,
-        dataAccess: null,
-        thumbnail: null,
-        temporalBounding: [
-          beginDescriptor: ValidDescriptor.VALID,
-          beginPrecision: ChronoUnit.DAYS.toString(),
-          beginIndexable: true,
-          beginZoneSpecified: null,
-          beginUtcDateTimeString: "2000-02-01",
-          beginYear: 2000,
-          beginDayOfYear: 32,
-          beginDayOfMonth: 1,
-          beginMonth: 2,
-          endDescriptor: null,
-          endPrecision: null,
-          endIndexable: null,
-          endZoneSpecified: null,
-          endUtcDateTimeString: null,
-          endYear: null,
-          endDayOfYear: null,
-          endDayOfMonth: null,
-          endMonth: null,
-          instantDescriptor: null,
-          instantPrecision: null,
-          instantIndexable: null,
-          instantZoneSpecified: null,
-          instantUtcDateTimeString: null,
-          instantYear: null,
-          instantDayOfYear: null,
-          instantDayOfMonth: null,
-          instantMonth: null,
-          rangeDescriptor: null,
-          fakeField: 123
-        ],
-        spatialBounding: null,
-        errors: [
-          [
-            nonsense: "horrible",
-            source: "valid field"
-          ]
-        ],
-        garbage:"nuke meeee"
-      ]
-
-      // ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
-      //     .setAnalysis(
-      //       Analysis.newBuilder().setTemporalBounding(
-      //       TemporalBoundingAnalysis.newBuilder()
-      //           .setBeginDescriptor(ValidDescriptor.VALID)
-      //           .setBeginIndexable(true)
-      //           .setBeginPrecision(ChronoUnit.DAYS.toString())
-      //           .setBeginZoneSpecified(null)
-      //           .setBeginUtcDateTimeString("2000-02-01")
-      //           .setBeginYear(2000)
-      //           .setBeginMonth(2)
-      //           .setBeginDayOfYear(32)
-      //           .setBeginDayOfMonth(1)
-      //           .build()
-      //           ).build()).build()
-
-              // def parsed = TransformationUtils.unfilteredAEMessage(record)
-
-      def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
-      def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_ERROR_AND_ANALYSIS_INDEX_ALIAS))
-      def indexedRecord = DataUtils.removeFromMap(pruned, minus)
-
-      then:
-      minus == [
-        internalParentIdentifier: null, // ok for granule, not collection
-        temporalBounding: [
-          fakeField: 123
-        ],
-        errors: [
-          [
-            nonsense: "horrible",
-          ]
-        ],
-        garbage:"nuke meeee"
-      ]
-
-      def expectedKeyset = ["identification", "titles", "description", "dataAccess", "thumbnail", "temporalBounding", "spatialBounding", "errors" ]
-      indexedRecord.keySet().size() == expectedKeyset.size()
-      indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
-
-      indexedRecord.temporalBounding == [
-          beginDescriptor: ValidDescriptor.VALID,
-          beginPrecision: ChronoUnit.DAYS.toString(),
-          beginIndexable: true,
-          beginZoneSpecified: null,
-          beginUtcDateTimeString: "2000-02-01",
-          endDescriptor: null,
-          endPrecision: null,
-          endIndexable: null,
-          endZoneSpecified: null,
-          endUtcDateTimeString: null,
-          instantDescriptor: null,
-          instantPrecision: null,
-          instantIndexable: null,
-          instantZoneSpecified: null,
-          instantUtcDateTimeString: null,
-          rangeDescriptor: null
-        ]
-
-      indexedRecord.errors.size() == 1
-      indexedRecord.errors[0] == [nonsense:"horrible", // FIXME this is not actually desired
-            source: "valid field"
-          ]
+  // def "clean up nested map before indexing strictly mapped fields for analysis and errors (granule)"() { // TODO change to use reformatMessageFor method
+  //   when:
+  //   def parsed = [
+  //     identification: null,
+  //     internalParentIdentifier: null,
+  //     titles: null,
+  //     description: null,
+  //     dataAccess: null,
+  //     thumbnail: null,
+  //     temporalBounding: [
+  //       beginDescriptor: ValidDescriptor.VALID,
+  //       beginPrecision: ChronoUnit.DAYS.toString(),
+  //       beginIndexable: true,
+  //       beginZoneSpecified: null,
+  //       beginUtcDateTimeString: "2000-02-01",
+  //       beginYear: 2000,
+  //       beginDayOfYear: 32,
+  //       beginDayOfMonth: 1,
+  //       beginMonth: 2,
+  //       endDescriptor: null,
+  //       endPrecision: null,
+  //       endIndexable: null,
+  //       endZoneSpecified: null,
+  //       endUtcDateTimeString: null,
+  //       endYear: null,
+  //       endDayOfYear: null,
+  //       endDayOfMonth: null,
+  //       endMonth: null,
+  //       instantDescriptor: null,
+  //       instantPrecision: null,
+  //       instantIndexable: null,
+  //       instantZoneSpecified: null,
+  //       instantUtcDateTimeString: null,
+  //       instantYear: null,
+  //       instantDayOfYear: null,
+  //       instantDayOfMonth: null,
+  //       instantMonth: null,
+  //       rangeDescriptor: null,
+  //       fakeField: 123
+  //     ],
+  //     spatialBounding: null,
+  //     errors: [
+  //       [
+  //         nonsense: "horrible",
+  //         source: "valid field"
+  //       ]
+  //     ],
+  //     garbage:"nuke meeee"
+  //   ]
+  //
+  //   // ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
+  //   //     .setAnalysis(
+  //   //       Analysis.newBuilder().setTemporalBounding(
+  //   //       TemporalBoundingAnalysis.newBuilder()
+  //   //           .setBeginDescriptor(ValidDescriptor.VALID)
+  //   //           .setBeginIndexable(true)
+  //   //           .setBeginPrecision(ChronoUnit.DAYS.toString())
+  //   //           .setBeginZoneSpecified(null)
+  //   //           .setBeginUtcDateTimeString("2000-02-01")
+  //   //           .setBeginYear(2000)
+  //   //           .setBeginMonth(2)
+  //   //           .setBeginDayOfYear(32)
+  //   //           .setBeginDayOfMonth(1)
+  //   //           .build()
+  //   //           ).build()).build()
+  //
+  //           // def parsed = TransformationUtils.unfilteredAEMessage(record)
+  //
+  //   def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
+  //   def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
+  //   def indexedRecord = DataUtils.removeFromMap(pruned, minus)
+  //
+  //   then:
+  //   minus == [
+  //     temporalBounding: [
+  //       fakeField: 123
+  //     ],
+  //     errors: [
+  //       [
+  //         nonsense: "horrible",
+  //       ]
+  //     ],
+  //     garbage:"nuke meeee"
+  //   ]
+  //
+  //   def expectedKeyset = ["identification", "titles", "description", "dataAccess", "thumbnail", "temporalBounding", "spatialBounding", "internalParentIdentifier", "errors" ]
+  //   indexedRecord.keySet().size() == expectedKeyset.size()
+  //   indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
+  //
+  //   indexedRecord.temporalBounding == [
+  //       beginDescriptor: ValidDescriptor.VALID,
+  //       beginPrecision: ChronoUnit.DAYS.toString(),
+  //       beginIndexable: true,
+  //       beginZoneSpecified: null,
+  //       beginUtcDateTimeString: "2000-02-01",
+  //       endDescriptor: null,
+  //       endPrecision: null,
+  //       endIndexable: null,
+  //       endZoneSpecified: null,
+  //       endUtcDateTimeString: null,
+  //       instantDescriptor: null,
+  //       instantPrecision: null,
+  //       instantIndexable: null,
+  //       instantZoneSpecified: null,
+  //       instantUtcDateTimeString: null,
+  //       rangeDescriptor: null
+  //     ]
+  //
+  //   indexedRecord.errors.size() == 1
+  //   indexedRecord.errors[0] == [nonsense:"horrible", // FIXME this is not actually desired
+  //         source: "valid field"
+  //       ]
+  //
+  // }
 
-    }
+    // def "clean up nested map before indexing strictly mapped fields for analysis and errors (collection)"() { // TODO change to use reformatMessageFor method
+    //   when:
+    //   def parsed = [
+    //     identification: null,
+    //     internalParentIdentifier: null,
+    //     titles: null,
+    //     description: null,
+    //     dataAccess: null,
+    //     thumbnail: null,
+    //     temporalBounding: [
+    //       beginDescriptor: ValidDescriptor.VALID,
+    //       beginPrecision: ChronoUnit.DAYS.toString(),
+    //       beginIndexable: true,
+    //       beginZoneSpecified: null,
+    //       beginUtcDateTimeString: "2000-02-01",
+    //       beginYear: 2000,
+    //       beginDayOfYear: 32,
+    //       beginDayOfMonth: 1,
+    //       beginMonth: 2,
+    //       endDescriptor: null,
+    //       endPrecision: null,
+    //       endIndexable: null,
+    //       endZoneSpecified: null,
+    //       endUtcDateTimeString: null,
+    //       endYear: null,
+    //       endDayOfYear: null,
+    //       endDayOfMonth: null,
+    //       endMonth: null,
+    //       instantDescriptor: null,
+    //       instantPrecision: null,
+    //       instantIndexable: null,
+    //       instantZoneSpecified: null,
+    //       instantUtcDateTimeString: null,
+    //       instantYear: null,
+    //       instantDayOfYear: null,
+    //       instantDayOfMonth: null,
+    //       instantMonth: null,
+    //       rangeDescriptor: null,
+    //       fakeField: 123
+    //     ],
+    //     spatialBounding: null,
+    //     errors: [
+    //       [
+    //         nonsense: "horrible",
+    //         source: "valid field"
+    //       ]
+    //     ],
+    //     garbage:"nuke meeee"
+    //   ]
+    //
+    //   // ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
+    //   //     .setAnalysis(
+    //   //       Analysis.newBuilder().setTemporalBounding(
+    //   //       TemporalBoundingAnalysis.newBuilder()
+    //   //           .setBeginDescriptor(ValidDescriptor.VALID)
+    //   //           .setBeginIndexable(true)
+    //   //           .setBeginPrecision(ChronoUnit.DAYS.toString())
+    //   //           .setBeginZoneSpecified(null)
+    //   //           .setBeginUtcDateTimeString("2000-02-01")
+    //   //           .setBeginYear(2000)
+    //   //           .setBeginMonth(2)
+    //   //           .setBeginDayOfYear(32)
+    //   //           .setBeginDayOfMonth(1)
+    //   //           .build()
+    //   //           ).build()).build()
+    //
+    //           // def parsed = TransformationUtils.unfilteredAEMessage(record)
+    //
+    //   def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
+    //   def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_ERROR_AND_ANALYSIS_INDEX_ALIAS))
+    //   def indexedRecord = DataUtils.removeFromMap(pruned, minus)
+    //
+    //   then:
+    //   minus == [
+    //     internalParentIdentifier: null, // ok for granule, not collection
+    //     temporalBounding: [
+    //       fakeField: 123
+    //     ],
+    //     errors: [
+    //       [
+    //         nonsense: "horrible",
+    //       ]
+    //     ],
+    //     garbage:"nuke meeee"
+    //   ]
+    //
+    //   def expectedKeyset = ["identification", "titles", "description", "dataAccess", "thumbnail", "temporalBounding", "spatialBounding", "errors" ]
+    //   indexedRecord.keySet().size() == expectedKeyset.size()
+    //   indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
+    //
+    //   indexedRecord.temporalBounding == [
+    //       beginDescriptor: ValidDescriptor.VALID,
+    //       beginPrecision: ChronoUnit.DAYS.toString(),
+    //       beginIndexable: true,
+    //       beginZoneSpecified: null,
+    //       beginUtcDateTimeString: "2000-02-01",
+    //       endDescriptor: null,
+    //       endPrecision: null,
+    //       endIndexable: null,
+    //       endZoneSpecified: null,
+    //       endUtcDateTimeString: null,
+    //       instantDescriptor: null,
+    //       instantPrecision: null,
+    //       instantIndexable: null,
+    //       instantZoneSpecified: null,
+    //       instantUtcDateTimeString: null,
+    //       rangeDescriptor: null
+    //     ]
+    //
+    //   indexedRecord.errors.size() == 1
+    //   indexedRecord.errors[0] == [nonsense:"horrible", // FIXME this is not actually desired
+    //         source: "valid field"
+    //       ]
+    //
+    // }
 
   ////////////////////////////////
   // Identifiers, "Names"       //

From e74fa5adc18e8c6f616e4cd84f7aec9a97dec9b8 Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Fri, 5 Jun 2020 10:38:41 -0600
Subject: [PATCH 16/29] Collapse into single method.

---
 .../onestop/indexer/util/IndexingUtils.java   |  4 +-
 .../indexer/util/TransformationUtils.java     | 46 +++++++++----------
 .../util/TransformationUtilsSpec.groovy       | 12 ++---
 3 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
index 01a9a14e9..aab650564 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
@@ -64,7 +64,7 @@ public static DocWriteRequest<?> buildSearchWriteRequest(String indexName, DocWr
     }
     else {
       var formattedRecord = new HashMap<String, Object>();
-      formattedRecord.putAll(TransformationUtils.reformatMessageForSearch(input.getValue().value(), input.getTargetSearchIndexFields()));
+      formattedRecord.putAll(TransformationUtils.reformatMessage(input.getValue().value(), input.getTargetSearchIndexFields()));
       formattedRecord.put("stagedDate", input.getValue().timestamp());
       return new IndexRequest(indexName).opType(opType).id(input.getKey()).source(formattedRecord);
     }
@@ -76,7 +76,7 @@ public static DocWriteRequest<?> buildAnalysisAndErrorWriteRequest(String indexN
     }
     else {
       var formattedRecord = new HashMap<String, Object>();
-      formattedRecord.putAll(TransformationUtils.reformatMessageForAnalysisAndErrors(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields()));
+      formattedRecord.putAll(TransformationUtils.reformatMessage(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields()));
       formattedRecord.put("stagedDate", input.getValue().timestamp());
       return new IndexRequest(indexName).opType(opType).id(input.getKey()).source(formattedRecord);
     }
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index bf0f17d47..e04e6f0d4 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -30,17 +30,20 @@ public class TransformationUtils {
   static final private Logger log = LoggerFactory.getLogger(TransformationUtils.class);
 
   ///////////////////////////////////////////////////////////////////////////////
-  //                     Indexing For Analysis & Errors                        //
+  //                     Convert to Indexing Message                           //
   ///////////////////////////////////////////////////////////////////////////////
-  public static Map<String, Object> reformatMessageForAnalysisAndErrors(ParsedRecord record, Set<String> fields) {
+  public static Map<String, Object> reformatMessage(ParsedRecord record, Set<String> fields) {
+
+    var discovery = record.getDiscovery();
     var analysis = record.getAnalysis();
     var errors = record.getErrors();
-
+    var discoveryMap = AvroUtils.avroToMap(discovery, true);
     var analysisMap = AvroUtils.avroToMap(analysis, true);
     var message = new HashMap<String, Object>();
 
     fields.forEach(field -> {
       message.put(field, analysisMap.get(field));
+      message.put(field, discoveryMap.get(field));
     });
     if (fields.contains("internalParentIdentifier")) {
       analysisMap.put("internalParentIdentifier", prepareInternalParentIdentifier(record));
@@ -49,26 +52,23 @@ public static Map<String, Object> reformatMessageForAnalysisAndErrors(ParsedReco
         .map(e -> AvroUtils.avroToMap(e))
         .collect(Collectors.toList());
 
-    message.put("errors", errorsList);
-
-    return message;
-  }
-
-  ///////////////////////////////////////////////////////////////////////////////
-  //                          Indexing For Search                              //
-  ///////////////////////////////////////////////////////////////////////////////
-  public static Map<String, Object> reformatMessageForSearch(ParsedRecord record, Set<String> fields) {
-    var discovery = record.getDiscovery();
-    var analysis = record.getAnalysis();
-    var discoveryMap = AvroUtils.avroToMap(discovery, true);
+    if (fields.contains("errors")) {
+      message.put("errors", errorsList);
+    }
 
-    var message = new HashMap<String, Object>();
-    fields.forEach(field -> {
-      message.put(field, discoveryMap.get(field));
-    });
     // prepare and apply fields that need to be reformatted for search
-    message.putAll(prepareGcmdKeyword(discovery));// TODO does this need and iff?
-    message.putAll(prepareDates(discovery.getTemporalBounding(), analysis.getTemporalBounding())); // TODO does this need and iff?
+    Map<String, Set<String>> gcmdKeywords = prepareGcmdKeyword(discovery);
+    gcmdKeywords.forEach((key, value) -> {
+      if (fields.contains(key)) {
+        message.put(key, value);
+      }
+    });
+    Map<String, Object> dates = prepareDates(discovery.getTemporalBounding(), analysis.getTemporalBounding());
+    dates.forEach((key, value) -> {
+      if (fields.contains(key)) {
+        message.put(key, value);
+      }
+    });
     if (fields.contains("dataFormat")) {
       message.put("dataFormat", prepareDataFormats(discovery));
     }
@@ -96,11 +96,7 @@ public static Map<String, Object> reformatMessageForSearch(ParsedRecord record,
     }
     if (fields.contains("checksums")) {
       message.put("checksums", prepareChecksums(record));
-      log.info("ZEB - including checksums (presumed granule)");
-    } else {
-      log.info("ZEB - excluding checksums (presumed collection)");
     }
-
     return message;
   }
 
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index 83ad4575b..f456be6e5 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -86,7 +86,7 @@ class TransformationUtilsSpec extends Specification {
   ///////////////////////////////
   // def "only mapped #type fields are indexed"() {
   //   when:
-  //   def result = TransformationUtils.reformatMessageForSearch(record, fields)
+  //   def result = TransformationUtils.reformatMessage(record, fields)
   //
   //   then:
   //   result.keySet().each({ assert fields.keySet().contains(it) }) // TODO this is a shallow only check!
@@ -103,7 +103,7 @@ class TransformationUtilsSpec extends Specification {
     ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
       .setFileInformation(FileInformation.newBuilder().setChecksums([Checksum.newBuilder().setAlgorithm(ChecksumAlgorithm.MD5).setValue('abc').build()]).build()).build()
 
-    def indexedRecord = TransformationUtils.reformatMessageForSearch(record, fields)
+    def indexedRecord = TransformationUtils.reformatMessage(record, fields)
 
     then:
 
@@ -183,7 +183,7 @@ class TransformationUtilsSpec extends Specification {
 
 
     // def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
-    def indexedRecord = TransformationUtils.reformatMessageForSearch(record, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS).keySet())
+    def indexedRecord = TransformationUtils.reformatMessage(record, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS).keySet())
     // def indexedRecord = DataUtils.removeFromMap(pruned, minus)
 
     then:
@@ -646,7 +646,7 @@ class TransformationUtilsSpec extends Specification {
 
 
     // def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
-    def indexedRecord = TransformationUtils.reformatMessageForSearch(record, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS).keySet())
+    def indexedRecord = TransformationUtils.reformatMessage(record, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS).keySet())
     // def indexedRecord = DataUtils.removeFromMap(pruned, minus)
 
     then:
@@ -1043,7 +1043,7 @@ class TransformationUtilsSpec extends Specification {
   def "party names are not included in granule search info"() {
     when:
     def record = TestUtils.inputGranuleRecord // <-- granule!
-    def result = TransformationUtils.reformatMessageForSearch(record, collectionFields) // <-- top level reformat method!
+    def result = TransformationUtils.reformatMessage(record, collectionFields) // <-- top level reformat method!
 
     then:
     result.individualNames == [] as Set
@@ -1160,7 +1160,7 @@ class TransformationUtilsSpec extends Specification {
 
   def "accession values are not included"() {
     when:
-    def result = TransformationUtils.reformatMessageForSearch(TestUtils.inputAvroRecord, collectionFields)
+    def result = TransformationUtils.reformatMessage(TestUtils.inputAvroRecord, collectionFields)
 
     then:
     result.accessionValues == null

From 05658860b31c4624394010744a4143c6b34f4f14 Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Fri, 5 Jun 2020 12:09:08 -0600
Subject: [PATCH 17/29] Clean up tests.

---
 .../indexer/util/TransformationUtils.java     |  25 +
 .../util/TransformationUtilsSpec.groovy       | 833 +-----------------
 2 files changed, 71 insertions(+), 787 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index e04e6f0d4..dce5ac3b2 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -69,6 +69,9 @@ public static Map<String, Object> reformatMessage(ParsedRecord record, Set<Strin
         message.put(key, value);
       }
     });
+    if (fields.contains("temporalBounding")) {
+      message.put("temporalBounding", prepareTemporalBounding(analysis.getTemporalBounding()));
+    }
     if (fields.contains("dataFormat")) {
       message.put("dataFormat", prepareDataFormats(discovery));
     }
@@ -287,6 +290,28 @@ private static Map<String, Set<String>> prepareResponsibleParties(ParsedRecord r
   ////////////////////////////
   // Dates                  //
   ////////////////////////////
+
+  private static Map<String, Object> prepareTemporalBounding(TemporalBoundingAnalysis analysis) {
+    var result = new HashMap<String, Object>();
+    result.put("beginDescriptor", analysis.getBeginDescriptor());
+    result.put("beginIndexable", analysis.getBeginIndexable());
+    result.put("beginPrecision", analysis.getBeginPrecision());
+    result.put("beginUtcDateTimeString", analysis.getBeginUtcDateTimeString());
+    result.put("beginZoneSpecified", analysis.getBeginZoneSpecified());
+    result.put("endDescriptor", analysis.getEndDescriptor());
+    result.put("endIndexable", analysis.getEndIndexable());
+    result.put("endPrecision", analysis.getEndPrecision());
+    result.put("endUtcDateTimeString", analysis.getEndUtcDateTimeString());
+    result.put("endZoneSpecified", analysis.getEndZoneSpecified());
+    result.put("instantDescriptor", analysis.getInstantDescriptor());
+    result.put("instantIndexable", analysis.getInstantIndexable());
+    result.put("instantPrecision", analysis.getInstantPrecision());
+    result.put("instantUtcDateTimeString", analysis.getInstantUtcDateTimeString());
+    result.put("instantZoneSpecified", analysis.getInstantZoneSpecified());
+    result.put("rangeDescriptor", analysis.getRangeDescriptor());
+    return result;
+  }
+
   private static Map<String, Object> prepareDates(TemporalBounding bounding, TemporalBoundingAnalysis analysis) {
     String beginDate, endDate;
     Long year;
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index f456be6e5..d3da590de 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -29,9 +29,10 @@ import org.cedar.onestop.kafka.common.util.DataUtils;
 @Unroll
 class TransformationUtilsSpec extends Specification {
 
-  static Set<String> collectionFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS).keySet()
-  static Set<String> granuleFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS).keySet()
+  static Set<String> collectionSearchFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS).keySet()
+  static Set<String> granuleSearchFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS).keySet()
   static Set<String> granuleAnalysisErrorFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS).keySet()
+  static Set<String> collectionAnalysisErrorFields = TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_ERROR_AND_ANALYSIS_INDEX_ALIAS).keySet()
 
   static expectedKeywords = [
       "SIO > Super Important Organization",
@@ -93,805 +94,63 @@ class TransformationUtilsSpec extends Specification {
   //
   //   where:
   //   type          | fields            | record
-  //   'collection'  | collectionFields  | TestUtils.inputCollectionRecord
-  //   'granule'     | granuleFields     | TestUtils.inputGranuleRecord
+  //   'collection'  | collectionSearchFields  | TestUtils.inputCollectionRecord
+  //   'granule'     | granuleSearchFields     | TestUtils.inputGranuleRecord
   // }
 
-  def "why is it complaining about checksums #label"() {
+  def "reformatMessage populates with correct fields for #label"() {
     when:
 
     ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
-      .setFileInformation(FileInformation.newBuilder().setChecksums([Checksum.newBuilder().setAlgorithm(ChecksumAlgorithm.MD5).setValue('abc').build()]).build()).build()
+      .setFileInformation(
+        FileInformation.newBuilder()
+        .setChecksums(
+          [
+          Checksum.newBuilder()
+          .setAlgorithm(ChecksumAlgorithm.MD5)
+          .setValue('abc')
+          .build()
+          ]
+        ).build()
+      )
+      .setAnalysis(
+        Analysis.newBuilder().setTemporalBounding(
+        TemporalBoundingAnalysis.newBuilder()
+            .setBeginDescriptor(ValidDescriptor.VALID)
+            .setBeginIndexable(true)
+            .setBeginPrecision(ChronoUnit.DAYS.toString())
+            .setBeginZoneSpecified(null)
+            .setBeginUtcDateTimeString("2000-02-01")
+            .setBeginYear(2000)
+            .setBeginMonth(2)
+            .setBeginDayOfYear(32)
+            .setBeginDayOfMonth(1)
+            .build()
+          ).build()
+        )
+      .build()
 
     def indexedRecord = TransformationUtils.reformatMessage(record, fields)
 
     then:
 
+    println(label)
+    println(JsonOutput.toJson(AvroUtils.avroToMap(record.getAnalysis(), true)))
+    println(JsonOutput.toJson(indexedRecord))
     indexedRecord.keySet().contains("checksums") == shouldIncludeChecksums
+    indexedRecord.keySet().contains("internalParentIdentifier") == shouldIncludeParentIdentifier
+    (indexedRecord.keySet().contains("temporalBounding") && indexedRecord.get("temporalBounding").keySet().contains("beginMonth")) == false
+    (indexedRecord.keySet().contains("temporalBounding") && indexedRecord.get("temporalBounding").keySet().contains("beginIndexable")) == shouldIncludeTemporalAnalysis
 
     where:
-    label | shouldIncludeChecksums | fields
-    'collections' | false | TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS).keySet()
-    'granules' | true | TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS).keySet()
+    label | fields | shouldIncludeChecksums | shouldIncludeTemporalAnalysis | shouldIncludeParentIdentifier
+    'search collections'              | collectionSearchFields        | false | false | false
+    'search granules'                 | granuleSearchFields           | true  | false | true
+    'analysis and errors collections' | collectionAnalysisErrorFields | false | true  | false
+    'analysis and errors granules'    | granuleAnalysisErrorFields    | false | true  | true
 
   }
 
-  def "clean up nested map before indexing strictly mapped fields for search (granule)"() {
-    when:
-    // def parsed = [
-    //   identification: null,
-    //   titles: null,
-    //   description: null,
-    //   dataAccess: null,
-    //   thumbnail: null,
-    //   temporalBounding: [
-    //     beginDescriptor: ValidDescriptor.VALID,
-    //     beginPrecision: ChronoUnit.DAYS.toString(),
-    //     beginIndexable: true,
-    //     beginZoneSpecified: null,
-    //     beginUtcDateTimeString: "2000-02-01",
-    //     beginYear: 2000,
-    //     beginDayOfYear: 32,
-    //     beginDayOfMonth: 1,
-    //     beginMonth: 2,
-    //     endDescriptor: null,
-    //     endPrecision: null,
-    //     endIndexable: null,
-    //     endZoneSpecified: null,
-    //     endUtcDateTimeString: null,
-    //     endYear: null,
-    //     endDayOfYear: null,
-    //     endDayOfMonth: null,
-    //     endMonth: null,
-    //     instantDescriptor: null,
-    //     instantPrecision: null,
-    //     instantIndexable: null,
-    //     instantZoneSpecified: null,
-    //     instantUtcDateTimeString: null,
-    //     instantYear: null,
-    //     instantDayOfYear: null,
-    //     instantDayOfMonth: null,
-    //     instantMonth: null,
-    //     rangeDescriptor: null,
-    //     fakeField: 123
-    //   ],
-    //   spatialBounding: null,
-    //   internalParentIdentifier: null,
-    //   errors: [
-    //     [
-    //       nonsense: "horrible",
-    //       source: "valid field"
-    //     ]
-    //   ],
-    //   garbage:"nuke meeee"
-    // ]
-    ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
-        .setAnalysis(
-          Analysis.newBuilder().setTemporalBounding(
-          TemporalBoundingAnalysis.newBuilder()
-              .setBeginDescriptor(ValidDescriptor.VALID)
-              .setBeginIndexable(true)
-              .setBeginPrecision(ChronoUnit.DAYS.toString())
-              .setBeginZoneSpecified(null)
-              .setBeginUtcDateTimeString("2000-02-01")
-              .setBeginYear(2000)
-              .setBeginMonth(2)
-              .setBeginDayOfYear(32)
-              .setBeginDayOfMonth(1)
-              .build()
-              ).build()).build()
-
-
-    // def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
-    def indexedRecord = TransformationUtils.reformatMessage(record, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_SEARCH_INDEX_ALIAS).keySet())
-    // def indexedRecord = DataUtils.removeFromMap(pruned, minus)
-
-    then:
-    // minus == [
-    //   temporalBounding: [
-    //     fakeField: 123
-    //   ],
-    //   errors: [
-    //     [
-    //       nonsense: "horrible",
-    //     ]
-    //   ],
-    //   garbage:"nuke meeee"
-    // ]
-
-    // def expectedKeyset = ["fileIdentifier", "parentIdentifier", "doi", "title", "description", "keywords", "topicCategories", "temporalBounding", "spatialBounding", "isGlobal", "acquisitionInstruments", "acquisitionOperations", "acquisitionPlatforms", "dataFormats", "links", "responsibleParties", "thumbnail", "citeAsStatements", "crossReferences", "largerWorks", "legalConstraints", "services", "gcmdVerticalResolution", "gcmdDataCenters", "gcmdTemporalResolution", "gcmdLocations", "gcmdScience", "beginDate", "endDate", "endDayOfYear", "beginYear", "endMonth", "endYear", "endDayOfMonth", "dataFormat", "linkProtocol", "serviceLinks", "serviceLinkProtocol", "internalParentIdentifier", "filename", "checksums"]
-
-
-    indexedRecord.keySet().size() == granuleFields.size()
-    indexedRecord.keySet().each({ assert granuleFields.contains(it) })
-
-  }
-
-  // def "prune fields - spatial"() {
-  //   when:
-  //   def mapWithSpatial = [
-  //     spatialBounding: [
-  //     type: "MultiPolygon",
-	// 	coordinates: [
-	// 		[
-	// 			[
-	// 				[-180.0, -14.28],
-	// 				[-61.821, -14.28],
-	// 				[-61.821, 70.4],
-	// 				[-180.0, 70.4],
-	// 				[-180.0, -14.28]
-	// 			]
-	// 		],
-	// 		[
-	// 			[
-	// 				[144.657, -14.28],
-	// 				[180.0, -14.28],
-	// 				[180.0, 70.4],
-	// 				[144.657, 70.4],
-	// 				[144.657, -14.28]
-	// 			]
-	// 		]
-	// 	]
-  //     ]
-  //     ]
-  //   def minus = TransformationUtils.identifyUnmappedFields(mapWithSpatial, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS))
-  //
-  //   then:
-  //   minus == []
-  // }
-
-//   def "debug integration" () {
-//     when:
-//     def jsonrecord = (new JsonSlurper()).parseText("""{
-// 			"type": "collection",
-// 			"discovery": {
-// 				"fileIdentifier": "gov.noaa.nodc:NDBC-COOPS",
-// 				"parentIdentifier": null,
-// 				"hierarchyLevelName": null,
-// 				"doi": "doi:10.5072/FK2TEST",
-// 				"purpose": "Basic research",
-// 				"status": "completed",
-// 				"credit": null,
-// 				"title": "Coastal meteorological and water temperature data from National Water Level Observation Network (NWLON) and Physical Oceanographic Real-Time System (PORTS) stations of the NOAA Center for Operational Oceanographic Products and Services (CO-OPS)",
-// 				"alternateTitle": null,
-// 				"description": "The National Water Level Observation Network (NWLON) is a network of long-term water level stations operated and maintained by CO-OPS. NWLON stations are located on shore-based platforms, and primarily collect real-time water level measurements. As of January 2013, approximately 180 of 210 NWLON stations also collect real-time meteorological data. About 20 CO-OPS Physical Oceanographic Real-Time Systems (PORTS) comprise a group of water level stations, and 65 of these stations also collect real-time meteorological data. Data parameters include barometric pressure, wind direction, speed and gust, air temperature, and water temperature.",
-// 				"keywords": [{
-// 					"values": ["DOC/NOAA/NESDIS/NODC > National Oceanographic Data Center, NESDIS, NOAA, U.S. Department of Commerce", "DOC/NOAA/NESDIS/NCEI > National Centers for Environmental Information, NESDIS, NOAA, U.S. Department of Commerce"],
-// 					"type": "dataCenter",
-// 					"namespace": "GCMD Keywords - Data Centers"
-// 				}, {
-// 					"values": ["0107939", "0108059", "0109292", "0111163", "0112393", "0113250", "0113898", "0114473", "0115274", "0115910", "0116703", "0117348", "0117811", "0118682", "0120725", "0120726", "0122183", "0122220", "0123085", "0123363", "0124305", "0125493", "0126410", "0126781", "0127407", "0128443", "0129526", "0130004", "0131097", "0131931", "0137308", "0138303", "0139574", "0141136", "0144301", "0145770", "0148198", "0151779", "0154391", "0155989"],
-// 					"type": null,
-// 					"namespace": "NCEI ACCESSION NUMBER"
-// 				}, {
-// 					"values": ["AIR TEMPERATURE", "BAROMETRIC PRESSURE", "DEWPOINT", "RELATIVE HUMIDITY", "SEA SURFACE TEMPERATURE", "VISIBILITY", "WIND DIRECTION", "WIND GUST", "WIND SPEED"],
-// 					"type": "theme",
-// 					"namespace": "NODC DATA TYPES THESAURUS"
-// 				}, {
-// 					"values": ["anemometer", "barometers", "meteorological sensors", "thermistor"],
-// 					"type": "instrument",
-// 					"namespace": "NODC INSTRUMENT TYPES THESAURUS"
-// 				}, {
-// 					"values": ["meteorological", "physical"],
-// 					"type": "theme",
-// 					"namespace": "NODC OBSERVATION TYPES THESAURUS"
-// 				}, {
-// 					"values": ["FIXED PLATFORM"],
-// 					"type": "platform",
-// 					"namespace": "NODC PLATFORM NAMES THESAURUS"
-// 				}, {
-// 					"values": ["US DOC; NOAA; NOS; Center for Operational Oceanographic Products and Services"],
-// 					"type": "dataCenter",
-// 					"namespace": "NODC COLLECTING INSTITUTION NAMES THESAURUS"
-// 				}, {
-// 					"values": ["US DOC; NOAA; NWS; National Data Buoy Center"],
-// 					"type": "dataCenter",
-// 					"namespace": "NODC SUBMITTING INSTITUTION NAMES THESAURUS"
-// 				}, {
-// 					"values": ["National Water Level Observation Network (NWLON)", "Physical Oceanographic Real-Time System (PORTS)"],
-// 					"type": "project",
-// 					"namespace": "NODC PROJECT NAMES THESAURUS"
-// 				}, {
-// 					"values": ["Bay of Fundy", "Beaufort Sea", "Bering Sea", "Caribbean Sea", "Coastal waters of Alabama", "Coastal Waters of Florida", "Coastal Waters of Louisiana", "Coastal Waters of Mississippi", "Coastal Waters of Southeast Alaska and British Columbia", "Coastal Waters of Texas", "Florida Keys National Marine Sanctuary", "Great Lakes", "Gulf of Alaska", "Gulf of Mexico", "Kaneohe Bay", "Monterey Bay National Marine Sanctuary", "North Atlantic Ocean", "North Pacific Ocean", "Papahanaumokuakea Marine National Monument", "Philippine Sea", "San Diego Bay", "South Pacific Ocean", "Yaquina Bay"],
-// 					"type": "place",
-// 					"namespace": "NODC SEA AREA NAMES THESAURUS"
-// 				}, {
-// 					"values": ["oceanography"],
-// 					"type": "theme",
-// 					"namespace": "WMO_CategoryCode"
-// 				}, {
-// 					"values": ["GOVERNMENT AGENCIES-U.S. FEDERAL AGENCIES > DOC > NOAA > DOC/NOAA/NOS/CO-OPS > Center for Operational Oceanographic Products and Services, National Ocean Service, NOAA, U.S. Department of Commerce > http://tidesandcurrents.noaa.gov/", "GOVERNMENT AGENCIES-U.S. FEDERAL AGENCIES > DOC > NOAA > DOC/NOAA/NWS/NDBC > National Data Buoy Center, National Weather Service, NOAA, U.S. Department of Commerce > http://www.ndbc.noaa.gov/"],
-// 					"type": "dataCenter",
-// 					"namespace": "GCMD Keywords - Data Centers"
-// 				}, {
-// 					"values": ["EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC PRESSURE", "EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC TEMPERATURE", "EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC TEMPERATURE > SURFACE TEMPERATURE > DEW POINT TEMPERATURE", "EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC WATER VAPOR > HUMIDITY", "EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC WINDS > SURFACE WINDS > WIND SPEED/WIND DIRECTION", "EARTH SCIENCE > OCEANS > OCEAN OPTICS", "EARTH SCIENCE > OCEANS > OCEAN TEMPERATURE > SEA SURFACE TEMPERATURE"],
-// 					"type": "theme",
-// 					"namespace": "GCMD Keywords - Science Keywords"
-// 				}, {
-// 					"values": ["In Situ/Laboratory Instruments > Current/Wind Meters > ANEMOMETERS", "In Situ/Laboratory Instruments > Pressure/Height Meters > BAROMETERS", "In Situ/Laboratory Instruments > Temperature/Humidity Sensors > Thermistors > THERMISTORS"],
-// 					"type": "instrument",
-// 					"namespace": "GCMD Keywords - Instruments"
-// 				}, {
-// 					"values": ["air_pressure_at_sea_level", "air_temperature", "dew_point_temperature", "relative_humidity", "sea_surface_temperature", "time", "visibility_in_air", "wind_from_direction", "wind_speed", "wind_speed_of_gust"],
-// 					"type": "theme",
-// 					"namespace": "NetCDF Climate and Forecast (CF) Metadata Convention Standard Name Table"
-// 				}, {
-// 					"values": ["air_temperature_sensor", "anemometer", "barometer", "ct_sensor", "humidity_sensor", "ocean_temperature_sensor", "visibility_sensor"],
-// 					"type": "instrument",
-// 					"namespace": "NOS SENSOR THESAURUS"
-// 				}, {
-// 					"values": ["1611400 - NWWH1", "1612340 - OOUH1", "1612480 - MOKH1", "1615680 - KLIH1", "1617433 - KWHH1", "1617760 - ILOH1", "1619910 - SNDP5", "1630000 - APRP7", "1631428 - PGBP7", "1770000 - NSTP6", "1820000 - KWJP8", "1890000 - WAKP8", "2695540 - BEPB6", "8311030 - OBGN6", "8311062 - ALXN6", "8410140 - PSBM1", "8411060 - CFWM1", "8413320 - ATGM1", "8418150 - CASM1", "8419317 - WELM1", "8443970 - BHBM3", "8447386 - FRVM3", "8447387 - BLTM3", "8447412 - FRXM3", "8447930 - BZBM3", "8449130 - NTKM3", "8452660 - NWPR1", "8452944 - CPTR1", "8452951 - PTCR1", "8454000 - FOXR1", "8454049 - QPTR1", "8461490 - NLNC3", "8465705 - NWHC3", "8467150 - BRHC3", "8510560 - MTKN6", "8516945 - KPTN6", "8518750 - BATN6", "8519483 - BGNN4", "8519532 - MHRN6", "8530973 - ROBN4", "8531680 - SDHN4", "8534720 - ACYN4", "8536110 - CMAN4", "8537121 - SJSN4", "8538886 - TPBN4", "8539094 - BDRN4", "8540433 - MRCP1", "8545240 - PHBP1", "8548989 - NBLP1", "8551762 - DELD1", "8551910 - RDYD1", "8557380 - LWSD1", "8570283 - OCIM2", "8571421 - BISM2", "8571892 - CAMM2", "8573364 - TCBM2", "8573927 - CHCM2", "8574680 - BLTM2", "8574728 - FSKM2", "8575512 - APAM2", "8577018 - COVM2", "8577330 - SLIM2", "8578240 - PPTM2", "8594900 - WASD2", "8631044 - WAHV2", "8632200 - KPTV2", "8632837 - RPLV2", "8635027 - NCDV2", "8635750 - LWTV2", "8637611 - YKRV2", "8637689 - YKTV2", "8638511 - DOMV2", "8638595 - CRYV2", "8638610 - SWPV2", "8638614 - WDSV2", "8638863 - CBBV2", "8638999 - CHYV2", "8639348 - MNPV2", "8651370 - DUKN7", "8652587 - ORIN7", "8654467 - HCGN7", "8656483 - BFTN7", "8658120 - WLON7", "8658163 - JMPN7", "8661070 - MROS1", "8665530 - CHTS1", "8670870 - FPKG1", "8720030 - FRDF1", "8720215 - NFDF1", "8720218 - MYPF1", "8720219 - DMSF1", "8720228 - LTJF1", "8720233 - BLIF1", "8720245 - JXUF1", "8720357 - BKBF1", "8720503 - GCVF1", "8721604 - TRDF1", "8722670 - LKWF1", "8723214 - VAKF1", "8723970 - VCAF1", "8724580 - KYWF1", "8725110 - NPSF1", "8725520 - FMRF1", "8726384 - PMAF1", "8726412 - MTBF1", "8726520 - SAPF1", "8726607 - OPTF1", "8726667 - MCYF1", "8726669 - ERTF1", "8726673 - SBLF1", "8726679 - TSHF1", "8726694 - TPAF1", "8726724 - CWBF1", "8727520 - CKYF1", "8728690 - APCF1", "8729108 - PACF1", "8729210 - PCBF1", "8729840 - PCLF1", "8732828 - WBYA1", "8734673 - FMOA1", "8735180 - DILA1", "8736163 - MBPA1", "8736897 - MCGA1", "8737005 - PTOA1", "8737048 - OBLA1", "8741003 - PTBM6", "8741041 - ULAM6", "8741094 - RARM6", "8741501 - DKCM6", "8741533 - PNLM6", "8747437 - WYCM6", "8760721 - PILL1", "8760922 - PSTL1", "8761305 - SHBL1", "8761724 - GISL1", "8761927 - NWCL1", "8761955 - CARL1", "8762482 - BYGL1", "8762484 - FREL1", "8764044 - TESL1", "8764227 - AMRL1", "8764314 - EINL1", "8766072 - FRWL1", "8767816 - LCLL1", "8767961 - BKTL1", "8768094 - CAPL1", "8770570 - SBPT2", "8770613 - MGPT2", "8770822 - TXPT2", "8771013 - EPTT2", "8771341 - GNJT2", "8771450 - GTOT2", "8772447 - FCGT2", "8774770 - RCPT2", "8775870 - MQTT2", "8779770 - PTIT2", "9014070 - AGCM4", "9014090 - MBRM4", "9014098 - FTGM4", "9052030 - OSGN6", "9052058 - RCRN6", "9063012 - NIAN6", "9063020 - BUFN6", "9063028 - PSTN6", "9063038 - EREP1", "9063053 - FAIO1", "9063063 - CNDO1", "9063079 - MRHO1", "9063085 - THRO1", "9075014 - HRBM4", "9075065 - LPNM4", "9075080 - MACM4", "9075099 - DTLM4", "9076024 - RCKM4", "9076027 - WNEM4", "9076033 - LTRM4", "9076070 - SWPM4", "9087023 - LDTM4", "9087031 - HLNM4", "9087044 - CMTI2", "9087069 - KWNW3", "9087088 - MNMM4", "9087096 - PNLM4", "9099004 - PTIM4", "9099018 - MCGM4", "9099064 - DULM5", "9099090 - GDMM5", "9410170 - SDBC1", "9410172 - IIWC1", "9410230 - LJAC1", "9410660 - OHBC1", "9410665 - PRJC1", "9410670 - PFXC1", "9410840 - ICAC1", "9411340 - NTBC1", "9411406 - HRVC1", "9412110 - PSLC1", "9413450 - MTYC1", "9414290 - FTPC1", "9414296 - PXSC1", "9414311 - PXOC1", "9414523 - RTYC1", "9414750 - AAMC1", "9414763 - LNDC1", "9414769 - OMHC1", "9414776 - OKXC1", "9414797 - OBXC1", "9414847 - PPXC1", "9414863 - RCMC1", "9415020 - PRYC1", "9415102 - MZXC1", "9415115 - PSBC1", "9415118 - UPBC1", "9415141 - DPXC1", "9415144 - PCOC1", "9416841 - ANVC1", "9418767 - HBYC1", "9419750 - CECC1", "9431647 - PORO3", "9432780 - CHAO3", "9435380 - SBEO3", "9437540 - TLBO3", "9439011 - HMDO3", "9439040 - ASTO3", "9440422 - LOPW1", "9440910 - TOKW1", "9441102 - WPTW1", "9442396 - LAPW1", "9443090 - NEAW1", "9444090 - PTAW1", "9444900 - PTWW1", "9446482 - TCMW1", "9446484 - TCNW1", "9447130 - EBSW1", "9449424 - CHYW1", "9449880 - FRDW1", "9450460 - KECA2", "9451054 - PLXA2", "9451600 - ITKA2", "9452210 - JNEA2", "9452400 - SKTA2", "9452634 - ELFA2", "9453220 - YATA2", "9454050 - CRVA2", "9454240 - VDZA2", "9455090 - SWLA2", "9455500 - OVIA2", "9455760 - NKTA2", "9455920 - ANTA2", "9457292 - KDAA2", "9457804 - ALIA2", "9459450 - SNDA2", "9459881 - KGCA2", "9461380 - ADKA2", "9461710 - ATKA2", "9462450 - OLSA2", "9462620 - UNLA2", "9463502 - PMOA2", "9464212 - VCVA2", "9468756 - NMTA2", "9491094 - RDDA2", "9497645 - PRDA2", "9751364 - CHSV3", "9751381 - LAMV3", "9751401 - LTBV3", "9751639 - CHAV3", "9752695 - ESPP4", "9755371 - SJNP4", "9759110 - MGIP4", "9759394 - MGZP4", "9759412 - AUDP4", "9759938 - MISP4", "9761115 - BARA9"],
-// 					"type": "platform",
-// 					"namespace": "NOS - NWSLI PLATFORM THESAURUS"
-// 				}, {
-// 					"values": ["CONTINENT > NORTH AMERICA > CANADA > GREAT LAKES, CANADA", "CONTINENT > NORTH AMERICA > UNITED STATES OF AMERICA > GREAT LAKES", "OCEAN > ARCTIC OCEAN > BEAUFORT SEA", "OCEAN > ATLANTIC OCEAN > NORTH ATLANTIC OCEAN", "OCEAN > ATLANTIC OCEAN > NORTH ATLANTIC OCEAN > BAY OF FUNDY", "OCEAN > ATLANTIC OCEAN > NORTH ATLANTIC OCEAN > CARIBBEAN SEA", "OCEAN > ATLANTIC OCEAN > NORTH ATLANTIC OCEAN > GULF OF MEXICO", "OCEAN > PACIFIC OCEAN > CENTRAL PACIFIC OCEAN > HAWAIIAN ISLANDS", "OCEAN > PACIFIC OCEAN > NORTH PACIFIC OCEAN", "OCEAN > PACIFIC OCEAN > NORTH PACIFIC OCEAN > BERING SEA", "OCEAN > PACIFIC OCEAN > NORTH PACIFIC OCEAN > GULF OF ALASKA", "OCEAN > PACIFIC OCEAN > SOUTH PACIFIC OCEAN"],
-// 					"type": "place",
-// 					"namespace": "GCMD Keywords - Locations"
-// 				}],
-// 				"topicCategories": ["environment", "oceans", "climatologyMeteorologyAtmosphere"],
-// 				"temporalBounding": {
-// 					"beginDate": "2013-03-01",
-// 					"beginIndeterminate": null,
-// 					"endDate": null,
-// 					"endIndeterminate": "now",
-// 					"instant": null,
-// 					"instantIndeterminate": null,
-// 					"description": null
-// 				},
-// 				"spatialBounding": {
-// 					"type": "MultiPolygon",
-// 					"coordinates": [
-// 						[
-// 							[
-// 								[-180.0, -14.28],
-// 								[-61.821, -14.28],
-// 								[-61.821, 70.4],
-// 								[-180.0, 70.4],
-// 								[-180.0, -14.28]
-// 							]
-// 						],
-// 						[
-// 							[
-// 								[144.657, -14.28],
-// 								[180.0, -14.28],
-// 								[180.0, 70.4],
-// 								[144.657, 70.4],
-// 								[144.657, -14.28]
-// 							]
-// 						]
-// 					]
-// 				},
-// 				"isGlobal": false,
-// 				"acquisitionInstruments": [],
-// 				"acquisitionOperations": [],
-// 				"acquisitionPlatforms": [],
-// 				"dataFormats": [{
-// 					"name": "ORIGINATOR DATA FORMAT",
-// 					"version": null
-// 				}],
-// 				"links": [{
-// 					"linkName": "Descriptive Information",
-// 					"linkProtocol": "HTTP",
-// 					"linkUrl": "http://data.nodc.noaa.gov/cgi-bin/iso?id=gov.noaa.nodc:NDBC-COOPS",
-// 					"linkDescription": "Navigate directly to the URL for a descriptive web page with download links.",
-// 					"linkFunction": "information"
-// 				}, {
-// 					"linkName": "Granule Search",
-// 					"linkProtocol": "HTTP",
-// 					"linkUrl": "http://www.nodc.noaa.gov/search/granule/rest/find/document?searchText=fileIdentifier%3ACO-OPS*&start=1&max=100&expandResults=true&f=searchPage",
-// 					"linkDescription": "Granule Search",
-// 					"linkFunction": "search"
-// 				}, {
-// 					"linkName": "THREDDS",
-// 					"linkProtocol": "THREDDS",
-// 					"linkUrl": "http://data.nodc.noaa.gov/thredds/catalog/ndbc/co-ops/",
-// 					"linkDescription": "These data are available through a variety of services via a THREDDS (Thematic Real-time Environmental Distributed Data Services) Data Server (TDS). Depending on the dataset, the TDS can provide WMS, WCS, DAP, HTTP, and other data access and metadata services as well. For more information on the TDS, see http://www.unidata.ucar.edu/software/thredds/current/tds/.",
-// 					"linkFunction": "download"
-// 				}, {
-// 					"linkName": "OPeNDAP",
-// 					"linkProtocol": "DAP",
-// 					"linkUrl": "http://data.nodc.noaa.gov/opendap/ndbc/co-ops/",
-// 					"linkDescription": "These data are available through the Data Access Protocol (DAP) via an OPeNDAP Hyrax server. For a listing of OPeNDAP clients which may be used to access OPeNDAP-enabled data sets, please see the OPeNDAP website at http://opendap.org/.",
-// 					"linkFunction": "download"
-// 				}, {
-// 					"linkName": "HTTP",
-// 					"linkProtocol": "HTTP",
-// 					"linkUrl": "http://data.nodc.noaa.gov/ndbc/co-ops/",
-// 					"linkDescription": "Navigate directly to the URL for data access and direct download.",
-// 					"linkFunction": "download"
-// 				}, {
-// 					"linkName": "FTP",
-// 					"linkProtocol": "FTP",
-// 					"linkUrl": "ftp://ftp.nodc.noaa.gov/pub/data.nodc/ndbc/co-ops/",
-// 					"linkDescription": "These data are available through the File Transfer Protocol (FTP). You may use any FTP client to download these data.",
-// 					"linkFunction": "download"
-// 				}],
-// 				"responsibleParties": [{
-// 					"individualName": null,
-// 					"organizationName": "DOC/NOAA/NESDIS/NCEI > National Centers for Environmental Information, NESDIS, NOAA, U.S. Department of Commerce",
-// 					"positionName": null,
-// 					"role": "publisher",
-// 					"email": "NODC.DataOfficer@noaa.gov",
-// 					"phone": "301-713-3277"
-// 				}, {
-// 					"individualName": null,
-// 					"organizationName": "DOC/NOAA/NESDIS/NODC > National Oceanographic Data Center, NESDIS, NOAA, U.S. Department of Commerce",
-// 					"positionName": null,
-// 					"role": "publisher",
-// 					"email": "NODC.DataOfficer@noaa.gov",
-// 					"phone": "301-713-3277"
-// 				}, {
-// 					"individualName": "Rex V Hervey",
-// 					"organizationName": "US DOC; NOAA; NWS; National Data Buoy Center (NDBC)",
-// 					"positionName": null,
-// 					"role": "resourceProvider",
-// 					"email": "rex.hervey@noaa.gov",
-// 					"phone": "228-688-3007"
-// 				}, {
-// 					"individualName": null,
-// 					"organizationName": "US DOC; NOAA; NWS; National Data Buoy Center (NDBC)",
-// 					"positionName": null,
-// 					"role": "resourceProvider",
-// 					"email": null,
-// 					"phone": null
-// 				}, {
-// 					"individualName": null,
-// 					"organizationName": "DOC/NOAA/NESDIS/NCEI > National Centers for Environmental Information, NESDIS, NOAA, U.S. Department of Commerce",
-// 					"positionName": null,
-// 					"role": "pointOfContact",
-// 					"email": "NCEI.Info@noaa.gov",
-// 					"phone": "301-713-3277"
-// 				}, {
-// 					"individualName": null,
-// 					"organizationName": "Global Change Data Center, Science and Exploration Directorate, Goddard Space Flight Center (GSFC) National Aeronautics and Space Administration (NASA)",
-// 					"positionName": null,
-// 					"role": "custodian",
-// 					"email": null,
-// 					"phone": null
-// 				}],
-// 				"thumbnail": "http://data.nodc.noaa.gov/cgi-bin/gfx?id=gov.noaa.nodc:NDBC-COOPS",
-// 				"thumbnailDescription": "Preview graphic",
-// 				"creationDate": null,
-// 				"revisionDate": null,
-// 				"publicationDate": "2013-06-05",
-// 				"citeAsStatements": ["Cite as: Hervey, R. V. and US DOC; NOAA; NWS; National Data Buoy Center (2013). Coastal meteorological and water temperature data from National Water Level Observation Network (NWLON) and Physical Oceanographic Real-Time System (PORTS) stations of the NOAA Center for Operational Oceanographic Products and Services (CO-OPS). National Oceanographic Data Center, NOAA. Dataset. [access date]"],
-// 				"crossReferences": [],
-// 				"largerWorks": [],
-// 				"useLimitation": "accessLevel: Public",
-// 				"legalConstraints": ["Cite as: Hervey, R. V. and US DOC; NOAA; NWS; National Data Buoy Center (2013). Coastal meteorological and water temperature data from National Water Level Observation Network (NWLON) and Physical Oceanographic Real-Time System (PORTS) stations of the NOAA Center for Operational Oceanographic Products and Services (CO-OPS). National Oceanographic Data Center, NOAA. Dataset. [access date]", "NOAA and NCEI cannot provide any warranty as to the accuracy, reliability, or completeness of furnished data. Users assume responsibility to determine the usability of these data. The user is responsible for the results of any application of this data for other than its intended purpose."],
-// 				"accessFeeStatement": null,
-// 				"orderingInstructions": null,
-// 				"edition": null,
-// 				"dsmmAccessibility": 0,
-// 				"dsmmDataIntegrity": 0,
-// 				"dsmmDataQualityAssessment": 0,
-// 				"dsmmDataQualityAssurance": 0,
-// 				"dsmmDataQualityControlMonitoring": 0,
-// 				"dsmmPreservability": 0,
-// 				"dsmmProductionSustainability": 0,
-// 				"dsmmTransparencyTraceability": 0,
-// 				"dsmmUsability": 0,
-// 				"dsmmAverage": 0.0,
-// 				"updateFrequency": "asNeeded",
-// 				"presentationForm": "tableDigital",
-// 				"services": []
-// 			},
-// 			"analysis": {
-// 				"identification": {
-// 					"fileIdentifierExists": true,
-// 					"fileIdentifierString": "gov.noaa.nodc:NDBC-COOPS",
-// 					"doiExists": true,
-// 					"doiString": "doi:10.5072/FK2TEST",
-// 					"parentIdentifierExists": false,
-// 					"parentIdentifierString": null,
-// 					"hierarchyLevelNameExists": false,
-// 					"isGranule": false
-// 				},
-// 				"titles": {
-// 					"titleExists": true,
-// 					"titleCharacters": 244,
-// 					"alternateTitleExists": false,
-// 					"alternateTitleCharacters": 0,
-// 					"titleFleschReadingEaseScore": -15.662258064516124,
-// 					"alternateTitleFleschReadingEaseScore": null,
-// 					"titleFleschKincaidReadingGradeLevel": 23.14516129032258,
-// 					"alternateTitleFleschKincaidReadingGradeLevel": null
-// 				},
-// 				"description": {
-// 					"descriptionExists": true,
-// 					"descriptionCharacters": 642,
-// 					"descriptionFleschReadingEaseScore": 24.320808988764043,
-// 					"descriptionFleschKincaidReadingGradeLevel": 14.289078651685397
-// 				},
-// 				"dataAccess": {
-// 					"dataAccessExists": true
-// 				},
-// 				"thumbnail": {
-// 					"thumbnailExists": true
-// 				},
-// 				"temporalBounding": {
-// 					"beginDescriptor": "VALID",
-// 					"beginPrecision": "Days",
-// 					"beginIndexable": true,
-// 					"beginZoneSpecified": null,
-// 					"beginUtcDateTimeString": "2013-03-01T00:00:00Z",
-// 					"beginYear": 2013,
-// 					"beginDayOfYear": 60,
-// 					"beginDayOfMonth": 1,
-// 					"beginMonth": 3,
-// 					"endDescriptor": "UNDEFINED",
-// 					"endPrecision": null,
-// 					"endIndexable": true,
-// 					"endZoneSpecified": null,
-// 					"endUtcDateTimeString": null,
-// 					"endYear": null,
-// 					"endDayOfYear": null,
-// 					"endDayOfMonth": null,
-// 					"endMonth": null,
-// 					"instantDescriptor": "UNDEFINED",
-// 					"instantPrecision": null,
-// 					"instantIndexable": true,
-// 					"instantZoneSpecified": null,
-// 					"instantUtcDateTimeString": null,
-// 					"instantYear": null,
-// 					"instantDayOfYear": null,
-// 					"instantDayOfMonth": null,
-// 					"instantMonth": null,
-// 					"rangeDescriptor": "ONGOING"
-// 				},
-// 				"spatialBounding": {
-// 					"spatialBoundingExists": true,
-// 					"isValid": true,
-// 					"validationError": null
-// 				}
-// 			},
-// 			"fileInformation": null,
-// 			"fileLocations": {},
-// 			"publishing": {
-// 				"isPrivate": false,
-// 				"until": null
-// 			},
-// 			"relationships": [],
-// 			"errors": []
-// 		}""")
-//     def record = AvroUtils.mapToAvro((Map)jsonrecord, ParsedRecord)
-//
-//     // println("zeb "+JsonOutput.toJson(parsed))
-//     println("ZEB")
-//     println(record)
-//     def discovery = record.getDiscovery();
-//     def analysis = record.getAnalysis();
-//     def discoveryMap = AvroUtils.avroToMap(discovery, true);
-//
-//     // prepare and apply fields that need to be reformatted for search
-//     discoveryMap.putAll(TransformationUtils.prepareGcmdKeyword(discovery));
-//     discoveryMap.putAll(TransformationUtils.prepareDates(discovery.getTemporalBounding(), analysis.getTemporalBounding()));
-//     discoveryMap.put("dataFormat", TransformationUtils.prepareDataFormats(discovery));
-//     discoveryMap.put("linkProtocol", TransformationUtils.prepareLinkProtocols(discovery));
-//     discoveryMap.put("serviceLinks", TransformationUtils.prepareServiceLinks(discovery));
-//     discoveryMap.put("serviceLinkProtocol", TransformationUtils.prepareServiceLinkProtocols(discovery));
-//     discoveryMap.putAll(TransformationUtils.prepareResponsibleParties(record));
-//     discoveryMap.put("internalParentIdentifier", TransformationUtils.prepareInternalParentIdentifier(record));
-//     discoveryMap.put("filename", TransformationUtils.prepareFilename(record));
-//     discoveryMap.put("checksums", TransformationUtils.prepareChecksums(record));
-//
-//     def pruned = TransformationUtils.pruneKnownUnmappedFields(discoveryMap, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
-//     def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS))
-//     // def indexedRecord = DataUtils.removeFromMap(pruned, minus)
-//
-// println(JsonOutput.toJson(pruned))
-// println(JsonOutput.toJson(minus))
-//     then:
-//     pruned == []
-//     minus == [
-//       internalParentIdentifier: null, // ok for granule, not collection
-//       temporalBounding: [
-//         fakeField: 123
-//       ],
-//       errors: [
-//         [
-//           nonsense: "horrible",
-//         ]
-//       ],
-//       garbage:"nuke meeee"
-//     ]
-//     //
-//     // def expectedKeyset = ["identification", "titles", "description", "dataAccess", "thumbnail", "temporalBounding", "spatialBounding", "errors" ]
-//     // indexedRecord.keySet().size() == expectedKeyset.size()
-//     // indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
-//     //
-//     // indexedRecord.temporalBounding == [
-//     //     beginDescriptor: ValidDescriptor.VALID,
-//     //     beginPrecision: ChronoUnit.DAYS.toString(),
-//     //     beginIndexable: true,
-//     //     beginZoneSpecified: null,
-//     //     beginUtcDateTimeString: "2000-02-01",
-//     //     endDescriptor: null,
-//     //     endPrecision: null,
-//     //     endIndexable: null,
-//     //     endZoneSpecified: null,
-//     //     endUtcDateTimeString: null,
-//     //     instantDescriptor: null,
-//     //     instantPrecision: null,
-//     //     instantIndexable: null,
-//     //     instantZoneSpecified: null,
-//     //     instantUtcDateTimeString: null,
-//     //     rangeDescriptor: null
-//     //   ]
-//     //
-//     // indexedRecord.errors.size() == 1
-//     // indexedRecord.errors[0] == [nonsense:"horrible", // FIXME this is not actually desired
-//     //       source: "valid field"
-//     //     ]
-//   }
-
-  def "clean up nested map before indexing strictly mapped fields for search (collection)"() {
-    when:
-
-    ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
-        .setAnalysis(
-          Analysis.newBuilder().setTemporalBounding(
-          TemporalBoundingAnalysis.newBuilder()
-              .setBeginDescriptor(ValidDescriptor.VALID)
-              .setBeginIndexable(true)
-              .setBeginPrecision(ChronoUnit.DAYS.toString())
-              .setBeginZoneSpecified(null)
-              .setBeginUtcDateTimeString("2000-02-01")
-              .setBeginYear(2000)
-              .setBeginMonth(2)
-              .setBeginDayOfYear(32)
-              .setBeginDayOfMonth(1)
-              .build()
-              ).build()).build()
-
-
-    // def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
-    def indexedRecord = TransformationUtils.reformatMessage(record, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_SEARCH_INDEX_ALIAS).keySet())
-    // def indexedRecord = DataUtils.removeFromMap(pruned, minus)
-
-    then:
-    //
-    // def expectedKeyset = ["fileIdentifier", "parentIdentifier", "doi", "title", "description", "keywords", "topicCategories", "temporalBounding", "spatialBounding", "isGlobal", "acquisitionInstruments", "acquisitionOperations", "acquisitionPlatforms", "dataFormats", "links", "responsibleParties", "thumbnail", "citeAsStatements", "crossReferences", "largerWorks", "useLimitation", "legalConstraints", "accessFeeStatement", "orderingInstructions", "edition", "dsmmAverage", "services", "gcmdVerticalResolution", "gcmdDataCenters", "gcmdTemporalResolution", "gcmdLocations", "gcmdScience", "beginDate", "endDate", "endDayOfYear", "beginYear", "endMonth", "endYear", "endDayOfMonth", "dataFormat", "linkProtocol", "serviceLinks", "serviceLinkProtocol", "organizationNames",
-    // "individualNames", "checksums"]
-
-
-    indexedRecord.keySet().size() == collectionFields.size()
-    collectionFields.each({ assert indexedRecord.keySet().contains(it) })
-    indexedRecord.keySet().each({ assert collectionFields.contains(it) })
-
-  }
-
-  // def "clean up nested map before indexing strictly mapped fields for analysis and errors (granule)"() { // TODO change to use reformatMessageFor method
-  //   when:
-  //   def parsed = [
-  //     identification: null,
-  //     internalParentIdentifier: null,
-  //     titles: null,
-  //     description: null,
-  //     dataAccess: null,
-  //     thumbnail: null,
-  //     temporalBounding: [
-  //       beginDescriptor: ValidDescriptor.VALID,
-  //       beginPrecision: ChronoUnit.DAYS.toString(),
-  //       beginIndexable: true,
-  //       beginZoneSpecified: null,
-  //       beginUtcDateTimeString: "2000-02-01",
-  //       beginYear: 2000,
-  //       beginDayOfYear: 32,
-  //       beginDayOfMonth: 1,
-  //       beginMonth: 2,
-  //       endDescriptor: null,
-  //       endPrecision: null,
-  //       endIndexable: null,
-  //       endZoneSpecified: null,
-  //       endUtcDateTimeString: null,
-  //       endYear: null,
-  //       endDayOfYear: null,
-  //       endDayOfMonth: null,
-  //       endMonth: null,
-  //       instantDescriptor: null,
-  //       instantPrecision: null,
-  //       instantIndexable: null,
-  //       instantZoneSpecified: null,
-  //       instantUtcDateTimeString: null,
-  //       instantYear: null,
-  //       instantDayOfYear: null,
-  //       instantDayOfMonth: null,
-  //       instantMonth: null,
-  //       rangeDescriptor: null,
-  //       fakeField: 123
-  //     ],
-  //     spatialBounding: null,
-  //     errors: [
-  //       [
-  //         nonsense: "horrible",
-  //         source: "valid field"
-  //       ]
-  //     ],
-  //     garbage:"nuke meeee"
-  //   ]
-  //
-  //   // ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
-  //   //     .setAnalysis(
-  //   //       Analysis.newBuilder().setTemporalBounding(
-  //   //       TemporalBoundingAnalysis.newBuilder()
-  //   //           .setBeginDescriptor(ValidDescriptor.VALID)
-  //   //           .setBeginIndexable(true)
-  //   //           .setBeginPrecision(ChronoUnit.DAYS.toString())
-  //   //           .setBeginZoneSpecified(null)
-  //   //           .setBeginUtcDateTimeString("2000-02-01")
-  //   //           .setBeginYear(2000)
-  //   //           .setBeginMonth(2)
-  //   //           .setBeginDayOfYear(32)
-  //   //           .setBeginDayOfMonth(1)
-  //   //           .build()
-  //   //           ).build()).build()
-  //
-  //           // def parsed = TransformationUtils.unfilteredAEMessage(record)
-  //
-  //   def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
-  //   def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.GRANULE_ERROR_AND_ANALYSIS_INDEX_ALIAS))
-  //   def indexedRecord = DataUtils.removeFromMap(pruned, minus)
-  //
-  //   then:
-  //   minus == [
-  //     temporalBounding: [
-  //       fakeField: 123
-  //     ],
-  //     errors: [
-  //       [
-  //         nonsense: "horrible",
-  //       ]
-  //     ],
-  //     garbage:"nuke meeee"
-  //   ]
-  //
-  //   def expectedKeyset = ["identification", "titles", "description", "dataAccess", "thumbnail", "temporalBounding", "spatialBounding", "internalParentIdentifier", "errors" ]
-  //   indexedRecord.keySet().size() == expectedKeyset.size()
-  //   indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
-  //
-  //   indexedRecord.temporalBounding == [
-  //       beginDescriptor: ValidDescriptor.VALID,
-  //       beginPrecision: ChronoUnit.DAYS.toString(),
-  //       beginIndexable: true,
-  //       beginZoneSpecified: null,
-  //       beginUtcDateTimeString: "2000-02-01",
-  //       endDescriptor: null,
-  //       endPrecision: null,
-  //       endIndexable: null,
-  //       endZoneSpecified: null,
-  //       endUtcDateTimeString: null,
-  //       instantDescriptor: null,
-  //       instantPrecision: null,
-  //       instantIndexable: null,
-  //       instantZoneSpecified: null,
-  //       instantUtcDateTimeString: null,
-  //       rangeDescriptor: null
-  //     ]
-  //
-  //   indexedRecord.errors.size() == 1
-  //   indexedRecord.errors[0] == [nonsense:"horrible", // FIXME this is not actually desired
-  //         source: "valid field"
-  //       ]
-  //
-  // }
-
-    // def "clean up nested map before indexing strictly mapped fields for analysis and errors (collection)"() { // TODO change to use reformatMessageFor method
-    //   when:
-    //   def parsed = [
-    //     identification: null,
-    //     internalParentIdentifier: null,
-    //     titles: null,
-    //     description: null,
-    //     dataAccess: null,
-    //     thumbnail: null,
-    //     temporalBounding: [
-    //       beginDescriptor: ValidDescriptor.VALID,
-    //       beginPrecision: ChronoUnit.DAYS.toString(),
-    //       beginIndexable: true,
-    //       beginZoneSpecified: null,
-    //       beginUtcDateTimeString: "2000-02-01",
-    //       beginYear: 2000,
-    //       beginDayOfYear: 32,
-    //       beginDayOfMonth: 1,
-    //       beginMonth: 2,
-    //       endDescriptor: null,
-    //       endPrecision: null,
-    //       endIndexable: null,
-    //       endZoneSpecified: null,
-    //       endUtcDateTimeString: null,
-    //       endYear: null,
-    //       endDayOfYear: null,
-    //       endDayOfMonth: null,
-    //       endMonth: null,
-    //       instantDescriptor: null,
-    //       instantPrecision: null,
-    //       instantIndexable: null,
-    //       instantZoneSpecified: null,
-    //       instantUtcDateTimeString: null,
-    //       instantYear: null,
-    //       instantDayOfYear: null,
-    //       instantDayOfMonth: null,
-    //       instantMonth: null,
-    //       rangeDescriptor: null,
-    //       fakeField: 123
-    //     ],
-    //     spatialBounding: null,
-    //     errors: [
-    //       [
-    //         nonsense: "horrible",
-    //         source: "valid field"
-    //       ]
-    //     ],
-    //     garbage:"nuke meeee"
-    //   ]
-    //
-    //   // ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
-    //   //     .setAnalysis(
-    //   //       Analysis.newBuilder().setTemporalBounding(
-    //   //       TemporalBoundingAnalysis.newBuilder()
-    //   //           .setBeginDescriptor(ValidDescriptor.VALID)
-    //   //           .setBeginIndexable(true)
-    //   //           .setBeginPrecision(ChronoUnit.DAYS.toString())
-    //   //           .setBeginZoneSpecified(null)
-    //   //           .setBeginUtcDateTimeString("2000-02-01")
-    //   //           .setBeginYear(2000)
-    //   //           .setBeginMonth(2)
-    //   //           .setBeginDayOfYear(32)
-    //   //           .setBeginDayOfMonth(1)
-    //   //           .build()
-    //   //           ).build()).build()
-    //
-    //           // def parsed = TransformationUtils.unfilteredAEMessage(record)
-    //
-    //   def pruned = TransformationUtils.pruneKnownUnmappedFields(parsed, IndexingInput.getUnmappedAnalysisAndErrorsIndexFields())
-    //   def minus = TransformationUtils.identifyUnmappedFields(pruned, TestUtils.esConfig.indexedProperties(TestUtils.esConfig.COLLECTION_ERROR_AND_ANALYSIS_INDEX_ALIAS))
-    //   def indexedRecord = DataUtils.removeFromMap(pruned, minus)
-    //
-    //   then:
-    //   minus == [
-    //     internalParentIdentifier: null, // ok for granule, not collection
-    //     temporalBounding: [
-    //       fakeField: 123
-    //     ],
-    //     errors: [
-    //       [
-    //         nonsense: "horrible",
-    //       ]
-    //     ],
-    //     garbage:"nuke meeee"
-    //   ]
-    //
-    //   def expectedKeyset = ["identification", "titles", "description", "dataAccess", "thumbnail", "temporalBounding", "spatialBounding", "errors" ]
-    //   indexedRecord.keySet().size() == expectedKeyset.size()
-    //   indexedRecord.keySet().each({ assert expectedKeyset.contains(it) })
-    //
-    //   indexedRecord.temporalBounding == [
-    //       beginDescriptor: ValidDescriptor.VALID,
-    //       beginPrecision: ChronoUnit.DAYS.toString(),
-    //       beginIndexable: true,
-    //       beginZoneSpecified: null,
-    //       beginUtcDateTimeString: "2000-02-01",
-    //       endDescriptor: null,
-    //       endPrecision: null,
-    //       endIndexable: null,
-    //       endZoneSpecified: null,
-    //       endUtcDateTimeString: null,
-    //       instantDescriptor: null,
-    //       instantPrecision: null,
-    //       instantIndexable: null,
-    //       instantZoneSpecified: null,
-    //       instantUtcDateTimeString: null,
-    //       rangeDescriptor: null
-    //     ]
-    //
-    //   indexedRecord.errors.size() == 1
-    //   indexedRecord.errors[0] == [nonsense:"horrible", // FIXME this is not actually desired
-    //         source: "valid field"
-    //       ]
-    //
-    // }
-
   ////////////////////////////////
   // Identifiers, "Names"       //
   ////////////////////////////////
@@ -1043,7 +302,7 @@ class TransformationUtilsSpec extends Specification {
   def "party names are not included in granule search info"() {
     when:
     def record = TestUtils.inputGranuleRecord // <-- granule!
-    def result = TransformationUtils.reformatMessage(record, collectionFields) // <-- top level reformat method!
+    def result = TransformationUtils.reformatMessage(record, collectionSearchFields) // <-- top level reformat method!
 
     then:
     result.individualNames == [] as Set
@@ -1160,7 +419,7 @@ class TransformationUtilsSpec extends Specification {
 
   def "accession values are not included"() {
     when:
-    def result = TransformationUtils.reformatMessage(TestUtils.inputAvroRecord, collectionFields)
+    def result = TransformationUtils.reformatMessage(TestUtils.inputAvroRecord, collectionSearchFields)
 
     then:
     result.accessionValues == null

From 6025f02fbf502def12700abf23a4f852cc2d231e Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Tue, 9 Jun 2020 14:28:45 -0600
Subject: [PATCH 18/29] Minor cleanup

---
 .../indexer/util/TransformationUtilsSpec.groovy      | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index d3da590de..971741271 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -85,18 +85,6 @@ class TransformationUtilsSpec extends Specification {
   ///////////////////////////////
   // Generic Indexed Fields    //
   ///////////////////////////////
-  // def "only mapped #type fields are indexed"() {
-  //   when:
-  //   def result = TransformationUtils.reformatMessage(record, fields)
-  //
-  //   then:
-  //   result.keySet().each({ assert fields.keySet().contains(it) }) // TODO this is a shallow only check!
-  //
-  //   where:
-  //   type          | fields            | record
-  //   'collection'  | collectionSearchFields  | TestUtils.inputCollectionRecord
-  //   'granule'     | granuleSearchFields     | TestUtils.inputGranuleRecord
-  // }
 
   def "reformatMessage populates with correct fields for #label"() {
     when:

From f49e8a0815b3cf76d45bb6690326434d991d61bc Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Mon, 15 Jun 2020 11:43:57 -0600
Subject: [PATCH 19/29] Undo combining reformatMessage functions.

---
 .../onestop/indexer/util/IndexingUtils.java   |  4 +-
 .../indexer/util/TransformationUtils.java     | 55 +++++++++++++++--
 .../util/TransformationUtilsSpec.groovy       | 61 +++++++++++++++++--
 3 files changed, 108 insertions(+), 12 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
index aab650564..fbb1a5594 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
@@ -64,7 +64,7 @@ public static DocWriteRequest<?> buildSearchWriteRequest(String indexName, DocWr
     }
     else {
       var formattedRecord = new HashMap<String, Object>();
-      formattedRecord.putAll(TransformationUtils.reformatMessage(input.getValue().value(), input.getTargetSearchIndexFields()));
+      formattedRecord.putAll(TransformationUtils.reformatMessageForSearch(input.getValue().value(), input.getTargetSearchIndexFields()));
       formattedRecord.put("stagedDate", input.getValue().timestamp());
       return new IndexRequest(indexName).opType(opType).id(input.getKey()).source(formattedRecord);
     }
@@ -76,7 +76,7 @@ public static DocWriteRequest<?> buildAnalysisAndErrorWriteRequest(String indexN
     }
     else {
       var formattedRecord = new HashMap<String, Object>();
-      formattedRecord.putAll(TransformationUtils.reformatMessage(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields()));
+      formattedRecord.putAll(TransformationUtils.reformatMessageForAnalysis(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields()));
       formattedRecord.put("stagedDate", input.getValue().timestamp());
       return new IndexRequest(indexName).opType(opType).id(input.getKey()).source(formattedRecord);
     }
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index dce5ac3b2..9f395fa2e 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -32,18 +32,15 @@ public class TransformationUtils {
   ///////////////////////////////////////////////////////////////////////////////
   //                     Convert to Indexing Message                           //
   ///////////////////////////////////////////////////////////////////////////////
-  public static Map<String, Object> reformatMessage(ParsedRecord record, Set<String> fields) {
+  public static Map<String, Object> reformatMessageForAnalysis(ParsedRecord record, Set<String> fields) {
 
-    var discovery = record.getDiscovery();
     var analysis = record.getAnalysis();
     var errors = record.getErrors();
-    var discoveryMap = AvroUtils.avroToMap(discovery, true);
     var analysisMap = AvroUtils.avroToMap(analysis, true);
     var message = new HashMap<String, Object>();
 
     fields.forEach(field -> {
       message.put(field, analysisMap.get(field));
-      message.put(field, discoveryMap.get(field));
     });
     if (fields.contains("internalParentIdentifier")) {
       analysisMap.put("internalParentIdentifier", prepareInternalParentIdentifier(record));
@@ -56,6 +53,56 @@ public static Map<String, Object> reformatMessage(ParsedRecord record, Set<Strin
       message.put("errors", errorsList);
     }
 
+    if (fields.contains("temporalBounding")) {
+      message.put("temporalBounding", prepareTemporalBounding(analysis.getTemporalBounding()));
+    }
+    if (fields.contains("identification")) {
+      message.put("identification", prepareIdentification(analysis.getIdentification()));
+    }
+
+    return message;
+  }
+
+  public static Map<String, Object> prepareIdentification(IdentificationAnalysis identification) {
+    var result = new HashMap<String, Object>();
+    var analysis = AvroUtils.avroToMap(identification); // TODO using map because I need javadocs on the IdentificationAnalysis object...
+
+    if (analysis == null) {
+      return result;
+    }
+    result.put("doiExists", analysis.get("doiExists"));
+    result.put("doiString", analysis.get("doiString"));
+    result.put("fileIdentifierExists", analysis.get("fileIdentifierExists"));
+    result.put("fileIdentifierString", analysis.get("fileIdentifierString"));
+    result.put("hierarchyLevelNameExists", analysis.get("hierarchyLevelNameExists"));
+    result.put("isGranule", analysis.get("isGranule"));
+    result.put("parentIdentifierExists", analysis.get("parentIdentifierExists"));
+    // if ((Boolean)analysis.get("isGranule")) { FIXME
+    //   result.put("parentIdentifierString", analysis.get("parentIdentifierString"));
+    // }
+    return result;
+  }
+
+  public static Map<String, Object> reformatMessageForSearch(ParsedRecord record, Set<String> fields) {
+
+    var discovery = record.getDiscovery();
+    var analysis = record.getAnalysis();
+    var errors = record.getErrors();
+    var discoveryMap = AvroUtils.avroToMap(discovery, true);
+    var analysisMap = AvroUtils.avroToMap(analysis, true);
+    var message = new HashMap<String, Object>();
+
+    fields.forEach(field -> {
+      message.put(field, discoveryMap.get(field));
+    });
+    var errorsList = errors.stream()
+        .map(e -> AvroUtils.avroToMap(e))
+        .collect(Collectors.toList());
+
+    if (fields.contains("errors")) {
+      message.put("errors", errorsList);
+    }
+
     // prepare and apply fields that need to be reformatted for search
     Map<String, Set<String>> gcmdKeywords = prepareGcmdKeyword(discovery);
     gcmdKeywords.forEach((key, value) -> {
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index 971741271..74eec7759 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -86,7 +86,7 @@ class TransformationUtilsSpec extends Specification {
   // Generic Indexed Fields    //
   ///////////////////////////////
 
-  def "reformatMessage populates with correct fields for #label"() {
+  def "reformatMessageForAnalysis populates with correct fields for #label"() {
     when:
 
     ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
@@ -118,7 +118,7 @@ class TransformationUtilsSpec extends Specification {
         )
       .build()
 
-    def indexedRecord = TransformationUtils.reformatMessage(record, fields)
+    def indexedRecord = TransformationUtils.reformatMessageForAnalysis(record, fields)
 
     then:
 
@@ -132,13 +132,62 @@ class TransformationUtilsSpec extends Specification {
 
     where:
     label | fields | shouldIncludeChecksums | shouldIncludeTemporalAnalysis | shouldIncludeParentIdentifier
-    'search collections'              | collectionSearchFields        | false | false | false
-    'search granules'                 | granuleSearchFields           | true  | false | true
     'analysis and errors collections' | collectionAnalysisErrorFields | false | true  | false
     'analysis and errors granules'    | granuleAnalysisErrorFields    | false | true  | true
 
   }
 
+
+  def "reformatMessageForSearch populates with correct fields for #label"() {
+    when:
+
+    ParsedRecord record = ParsedRecord.newBuilder(TestUtils.inputAvroRecord)
+      .setFileInformation(
+        FileInformation.newBuilder()
+        .setChecksums(
+          [
+          Checksum.newBuilder()
+          .setAlgorithm(ChecksumAlgorithm.MD5)
+          .setValue('abc')
+          .build()
+          ]
+        ).build()
+      )
+      .setAnalysis(
+        Analysis.newBuilder().setTemporalBounding(
+        TemporalBoundingAnalysis.newBuilder()
+            .setBeginDescriptor(ValidDescriptor.VALID)
+            .setBeginIndexable(true)
+            .setBeginPrecision(ChronoUnit.DAYS.toString())
+            .setBeginZoneSpecified(null)
+            .setBeginUtcDateTimeString("2000-02-01")
+            .setBeginYear(2000)
+            .setBeginMonth(2)
+            .setBeginDayOfYear(32)
+            .setBeginDayOfMonth(1)
+            .build()
+          ).build()
+        )
+      .build()
+
+    def indexedRecord = TransformationUtils.reformatMessageForSearch(record, fields)
+
+    then:
+
+    println(label)
+    println(JsonOutput.toJson(AvroUtils.avroToMap(record.getAnalysis(), true)))
+    println(JsonOutput.toJson(indexedRecord))
+    indexedRecord.keySet().contains("checksums") == shouldIncludeChecksums
+    indexedRecord.keySet().contains("internalParentIdentifier") == shouldIncludeParentIdentifier
+    (indexedRecord.keySet().contains("temporalBounding") && indexedRecord.get("temporalBounding").keySet().contains("beginMonth")) == false
+    (indexedRecord.keySet().contains("temporalBounding") && indexedRecord.get("temporalBounding").keySet().contains("beginIndexable")) == shouldIncludeTemporalAnalysis
+
+    where:
+    label | fields | shouldIncludeChecksums | shouldIncludeTemporalAnalysis | shouldIncludeParentIdentifier
+    'search collections'              | collectionSearchFields        | false | false | false
+    'search granules'                 | granuleSearchFields           | true  | false | true
+  }
+
   ////////////////////////////////
   // Identifiers, "Names"       //
   ////////////////////////////////
@@ -290,7 +339,7 @@ class TransformationUtilsSpec extends Specification {
   def "party names are not included in granule search info"() {
     when:
     def record = TestUtils.inputGranuleRecord // <-- granule!
-    def result = TransformationUtils.reformatMessage(record, collectionSearchFields) // <-- top level reformat method!
+    def result = TransformationUtils.reformatMessageForSearch(record, collectionSearchFields) // <-- top level reformat method!
 
     then:
     result.individualNames == [] as Set
@@ -407,7 +456,7 @@ class TransformationUtilsSpec extends Specification {
 
   def "accession values are not included"() {
     when:
-    def result = TransformationUtils.reformatMessage(TestUtils.inputAvroRecord, collectionSearchFields)
+    def result = TransformationUtils.reformatMessageForSearch(TestUtils.inputAvroRecord, collectionSearchFields)
 
     then:
     result.accessionValues == null

From 9a136f904330b9527eb5ee0ab344ae97ba24c91b Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Wed, 17 Jun 2020 13:27:06 -0600
Subject: [PATCH 20/29] Handle differences between granule and collection
 index.

---
 .../cedar/onestop/indexer/util/IndexingInput.java    |  4 ++++
 .../cedar/onestop/indexer/util/IndexingUtils.java    |  2 +-
 .../onestop/indexer/util/TransformationUtils.java    | 12 ++++++------
 .../indexer/util/TransformationUtilsSpec.groovy      |  2 +-
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java
index fcfe59659..ceb79da2b 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingInput.java
@@ -72,6 +72,10 @@ public Set<String> getTargetAnalysisAndErrorsIndexFields() {
     }
   }
 
+  public RecordType getRecordType() {
+    return recordType;
+  }
+
   @Override
   public String toString() {
     return "IndexingInput {" +
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
index fbb1a5594..764b39526 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/IndexingUtils.java
@@ -76,7 +76,7 @@ public static DocWriteRequest<?> buildAnalysisAndErrorWriteRequest(String indexN
     }
     else {
       var formattedRecord = new HashMap<String, Object>();
-      formattedRecord.putAll(TransformationUtils.reformatMessageForAnalysis(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields()));
+      formattedRecord.putAll(TransformationUtils.reformatMessageForAnalysis(input.getValue().value(), input.getTargetAnalysisAndErrorsIndexFields(), input.getRecordType()));
       formattedRecord.put("stagedDate", input.getValue().timestamp());
       return new IndexRequest(indexName).opType(opType).id(input.getKey()).source(formattedRecord);
     }
diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index 9f395fa2e..27434082e 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -32,7 +32,7 @@ public class TransformationUtils {
   ///////////////////////////////////////////////////////////////////////////////
   //                     Convert to Indexing Message                           //
   ///////////////////////////////////////////////////////////////////////////////
-  public static Map<String, Object> reformatMessageForAnalysis(ParsedRecord record, Set<String> fields) {
+  public static Map<String, Object> reformatMessageForAnalysis(ParsedRecord record, Set<String> fields, RecordType recordType) {
 
     var analysis = record.getAnalysis();
     var errors = record.getErrors();
@@ -57,13 +57,13 @@ public static Map<String, Object> reformatMessageForAnalysis(ParsedRecord record
       message.put("temporalBounding", prepareTemporalBounding(analysis.getTemporalBounding()));
     }
     if (fields.contains("identification")) {
-      message.put("identification", prepareIdentification(analysis.getIdentification()));
+      message.put("identification", prepareIdentification(analysis.getIdentification(), recordType));
     }
 
     return message;
   }
 
-  public static Map<String, Object> prepareIdentification(IdentificationAnalysis identification) {
+  public static Map<String, Object> prepareIdentification(IdentificationAnalysis identification, RecordType recordType) {
     var result = new HashMap<String, Object>();
     var analysis = AvroUtils.avroToMap(identification); // TODO using map because I need javadocs on the IdentificationAnalysis object...
 
@@ -77,9 +77,9 @@ public static Map<String, Object> prepareIdentification(IdentificationAnalysis i
     result.put("hierarchyLevelNameExists", analysis.get("hierarchyLevelNameExists"));
     result.put("isGranule", analysis.get("isGranule"));
     result.put("parentIdentifierExists", analysis.get("parentIdentifierExists"));
-    // if ((Boolean)analysis.get("isGranule")) { FIXME
-    //   result.put("parentIdentifierString", analysis.get("parentIdentifierString"));
-    // }
+    if (recordType == RecordType.granule) {
+      result.put("parentIdentifierString", analysis.get("parentIdentifierString"));
+    }
     return result;
   }
 
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index 74eec7759..00db07fa7 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -118,7 +118,7 @@ class TransformationUtilsSpec extends Specification {
         )
       .build()
 
-    def indexedRecord = TransformationUtils.reformatMessageForAnalysis(record, fields)
+    def indexedRecord = TransformationUtils.reformatMessageForAnalysis(record, fields, RecordType.granule)
 
     then:
 

From a9dab1c96887722ca1cbcbd377d347e681c30b56 Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Mon, 22 Jun 2020 14:59:53 -0600
Subject: [PATCH 21/29] Refactor date parsing based on moving (some) logic to
 analysis. TODO: more of the logic really belongs in analysis.

---
 .../indexer/util/TransformationUtils.java     | 303 +++++++++++++++---
 .../util/TransformationUtilsSpec.groovy       |  59 +++-
 2 files changed, 299 insertions(+), 63 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index 27434082e..de501f5db 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -17,6 +17,9 @@
 
 import static org.cedar.schemas.avro.psi.ValidDescriptor.UNDEFINED;
 import static org.cedar.schemas.avro.psi.ValidDescriptor.VALID;
+// import org.cedar.schemas.analyze.Temporal;
+//
+// import java.time.temporal.ChronoField;
 
 import org.cedar.onestop.kafka.common.util.DataUtils;
 
@@ -359,93 +362,293 @@ private static Map<String, Object> prepareTemporalBounding(TemporalBoundingAnaly
     return result;
   }
 
-  private static Map<String, Object> prepareDates(TemporalBounding bounding, TemporalBoundingAnalysis analysis) {
+  private static Map<String, Object> prepareDatesForInstant(TemporalBounding bounding, TemporalBoundingAnalysis analysis) {
     String beginDate, endDate;
-    Long year;
     Long beginYear, endYear;
-    int beginDayOfYear, beginDayOfMonth, beginMonth;
-    int endDayOfYear, endDayOfMonth, endMonth;
+    Integer beginDayOfYear, beginDayOfMonth, beginMonth;
+    Integer endDayOfYear, endDayOfMonth, endMonth;
     var result = new HashMap<String, Object>();
 
-    // If bounding is actually an instant, set search fields accordingly
-    if (analysis.getRangeDescriptor() == TimeRangeDescriptor.INSTANT && analysis.getBeginDescriptor() == UNDEFINED) {
+    if (!analysis.getInstantIndexable()) {
+      // paleo dates are not indexable, so don't add beginDate or endDate to the index
+      beginDate = null;
+      endDate = null;
+      beginDayOfYear = null;
+      beginDayOfMonth = null;
+      beginMonth = null;
+      endDayOfYear = null;
+      endDayOfMonth = null;
+      endMonth = null;
+    } else {
       beginDate = analysis.getInstantUtcDateTimeString();
-      year = parseYear(beginDate);
-
-      // Add time and/or date to endDate based on precision
       var precision = analysis.getInstantPrecision();
       if (precision.equals(ChronoUnit.DAYS.toString())) {
         // End of day
         endDate = bounding.getInstant() + "T23:59:59Z";
       } else if (precision.equals(ChronoUnit.YEARS.toString())) {
-        if (!analysis.getInstantIndexable()) {
-          // Paleo date, so only return year value (null out dates)
-          beginDate = null;
-          endDate = null;
-        } else {
-          // Last day of year + end of day
-          endDate = bounding.getInstant() + "-12-31T23:59:59Z";
-        }
+        // Last day of year + end of day
+        endDate = bounding.getInstant() + "-12-31T23:59:59Z";
       } else {
         // Precision is NANOS so use instant value as-is
+        // TODO hopefully it's not a weird rare precision like "month"
         endDate = beginDate;
       }
-      beginYear = year;
-      endYear = year;
-    } else {
-      // If dates exist (thus VALID) and are indexable use value from analysis block where dates are UTC datetime normalized,
-      // else only set the year values as this is indicative of a paleo date
-      beginDate = analysis.getBeginDescriptor() == VALID && analysis.getBeginIndexable() ? analysis.getBeginUtcDateTimeString() : null;
-      beginYear = parseYear(analysis.getBeginUtcDateTimeString());
-      endDate = analysis.getEndDescriptor() == VALID && analysis.getEndIndexable() ? analysis.getEndUtcDateTimeString() : null;
-      endYear = parseYear(analysis.getEndUtcDateTimeString());
+
+      if (analysis.getInstantDayOfYear() != null) {
+        beginDayOfYear = analysis.getInstantDayOfYear();
+        endDayOfYear = analysis.getInstantDayOfYear();
+      } else {
+        beginDayOfYear = 1;
+        endDayOfYear = 365; // TODO leap year
+      }
+
+      if (analysis.getInstantDayOfMonth() != null) {
+        beginDayOfMonth = analysis.getInstantDayOfMonth();
+        endDayOfMonth = analysis.getInstantDayOfMonth();
+      } else {
+        beginDayOfMonth = 1;
+        endDayOfMonth = 31; // TODO depends on if there is a month but no day, but for the moment I'm assuming it's Year or Day precision, but not month...
+      }
+
+      if (analysis.getInstantMonth() != null) {
+        beginMonth = analysis.getInstantMonth();
+        endMonth = analysis.getInstantMonth();
+      } else {
+        beginMonth = 1;
+        endMonth = 12;
+      }
     }
 
+    beginYear = analysis.getInstantYear();
+    endYear = analysis.getInstantYear();
+
     result.put("beginDate", beginDate);
     result.put("beginYear", beginYear);
-    result.putAll(parseAdditionalTimeFields("begin", beginDate));
+    result.put("beginDayOfYear", beginDayOfYear);
+    result.put("beginDayOfMonth", beginDayOfMonth);
+    result.put("beginMonth", beginMonth);
 
     result.put("endDate", endDate);
     result.put("endYear", endYear);
-    result.putAll(parseAdditionalTimeFields("end", endDate));
+    result.put("endDayOfYear", endDayOfYear);
+    result.put("endDayOfMonth", endDayOfMonth);
+    result.put("endMonth", endMonth);
 
     return result;
   }
 
-  private static HashMap<String, Object> parseAdditionalTimeFields(String prefix, String time){
+  private static Map<String, Object> prepareBeginDate(TemporalBounding bounding, TemporalBoundingAnalysis analysis) {
     var result = new HashMap<String, Object>();
-    try {
+    Integer beginDayOfYear, beginDayOfMonth, beginMonth;
 
-      Integer dayOfYear, dayOfMonth, month;
-      if (time != null) {
-        ZonedDateTime dateTime = ZonedDateTime.parse(time);
+    if (analysis.getBeginDescriptor() == VALID) {
+      if (analysis.getBeginIndexable()) {
+        result.put("beginDate", analysis.getBeginUtcDateTimeString());
 
-        dayOfYear = dateTime.getDayOfYear();
-        dayOfMonth = dateTime.getDayOfMonth();
-        month = dateTime.getMonthValue();
-      }
-      else {
-        dayOfYear = null;
-        dayOfMonth = null;
-        month = null;
+        var precision = analysis.getBeginPrecision();
+
+        // if (Temporal.extractField(parsedDate, ChronoField.DAY_OF_YEAR) != null) {
+        if (precision.equals(ChronoUnit.DAYS.toString()) || precision.equals(ChronoUnit.NANOS.toString())) {
+          beginDayOfYear = analysis.getBeginDayOfYear();
+          beginDayOfMonth = analysis.getBeginDayOfMonth();
+          beginMonth = analysis.getBeginMonth();
+        }
+        else {
+          beginDayOfYear = 1;
+          beginDayOfMonth = 1;
+          beginMonth = 1; // TODO base off month precision, if applicable
+        }
+
+      } else {
+        beginDayOfYear = null;
+        beginDayOfMonth = null;
+        beginMonth = null;
       }
+      result.put("beginYear", analysis.getBeginYear());
+
+      // if (precision.equals(ChronoUnit.DAYS.toString())) {
+      //   beginDayOfYear = analysis.getBeginDayOfYear();
+      //   beginDayOfMonth = analysis.getBeginDayOfMonth();
+      //   beginMonth = analysis.getBeginMonth();
+      // }
+      // // else {
+      // //   beginDayOfYear = 1;
+      // //   beginDayOfMonth = 1;
+      // //   beginMonth = 1; // TODO base off month precision, if applicable
+      // // }
+      // else {
+      //   beginDayOfYear = null;
+      //   beginDayOfMonth = null;
+      //   beginMonth = null;
+      // }
+
+      result.put("beginDayOfYear", beginDayOfYear);
+      result.put("beginDayOfMonth", beginDayOfMonth);
+      result.put("beginMonth", beginMonth);
+    }
+    return result;
+  }
+
+  private static Map<String, Object> prepareEndDate(TemporalBounding bounding, TemporalBoundingAnalysis analysis) {
+    var result = new HashMap<String, Object>();
+    Integer endDayOfYear, endDayOfMonth, endMonth;
+
+    if (analysis.getEndDescriptor() == VALID) {
+      if (analysis.getEndIndexable()) {
+        result.put("endDate", analysis.getEndUtcDateTimeString());
 
-      result.put(prefix + "DayOfYear", dayOfYear);
-      result.put(prefix + "DayOfMonth", dayOfMonth);
-      result.put(prefix + "Month", month);
-    } catch (Exception e) {} // TODO temporary
+        var precision = analysis.getEndPrecision();
+        if (precision.equals(ChronoUnit.DAYS.toString())) {
+          endDayOfYear = analysis.getEndDayOfYear();
+          endDayOfMonth = analysis.getEndDayOfMonth();
+          endMonth = analysis.getEndMonth();
+        }
+        else { // TODO this implies other precision checks (begin date) are also needed
+          endDayOfYear = 365; // TODO leap years
+          endDayOfMonth = 31; // TODO base off month precision, if applicable
+          endMonth = 12; // TODO base off month precision, if applicable
+        }
+      } else {
+        endDayOfYear = null;
+        endDayOfMonth = null;
+        endMonth = null;
+      }
+      result.put("endYear", analysis.getEndYear());
+
+
+      // if (precision.equals(ChronoUnit.DAYS.toString())) {
+      //   endDayOfYear = analysis.getEndDayOfYear();
+      //   endDayOfMonth = analysis.getEndDayOfMonth();
+      //   endMonth = analysis.getEndMonth();
+      // }
+      // // else { // TODO this implies other precision checks (begin date) are also needed
+      //   // endDayOfYear = 365; // TODO leap years
+      //   // endDayOfMonth = 31; // TODO base off month precision, if applicable
+      //   // endMonth = 12; // TODO base off month precision, if applicable
+      // // }
+      // else {
+      //   endDayOfYear = null;
+      //   endDayOfMonth = null;
+      //   endMonth = null;
+      // }
+
+      result.put("endDayOfYear", endDayOfYear);
+      result.put("endDayOfMonth", endDayOfMonth);
+      result.put("endMonth", endMonth);
+    }
     return result;
   }
 
-  private static Long parseYear(String utcDateTime) {
-    if (StringUtils.isBlank(utcDateTime)) {
-      return null;
+  private static Map<String, Object> prepareDates(TemporalBounding bounding, TemporalBoundingAnalysis analysis) {
+    String beginDate, endDate;
+    // Long year;
+    Long beginYear, endYear;
+    Integer beginDayOfYear, beginDayOfMonth, beginMonth;
+    Integer endDayOfYear, endDayOfMonth, endMonth;
+    var result = new HashMap<String, Object>();
+
+    // If bounding is actually an instant, set search fields accordingly
+    if (analysis.getRangeDescriptor() == TimeRangeDescriptor.INSTANT && analysis.getBeginDescriptor() == UNDEFINED) {
+      return prepareDatesForInstant(bounding, analysis);
     } else {
-      // Watch out for BCE years
-      return Long.parseLong(utcDateTime.substring(0, utcDateTime.indexOf('-', 1)));
+      // If dates exist (thus VALID) and are indexable use value from analysis block where dates are UTC datetime normalized,
+      // else only set the year values as this is indicative of a paleo date TODO does this all behave the same now?
+
+      result.putAll(prepareBeginDate(bounding, analysis));
+      result.putAll(prepareEndDate(bounding, analysis));
+      // if (analysis.getBeginDescriptor() == VALID && analysis.getBeginIndexable()) {
+      //   beginDate = analysis.getBeginUtcDateTimeString();
+      //   beginYear = analysis.getBeginYear();
+      //   var precision = analysis.getBeginPrecision();
+      //   if (precision.equals(ChronoUnit.DAYS.toString())) {
+      //     beginDayOfYear = analysis.getBeginDayOfYear();
+      //     beginDayOfMonth = analysis.getBeginDayOfMonth();
+      //     beginMonth = analysis.getBeginMonth();
+      //   } else {
+      //     beginDayOfYear = 1;
+      //     beginDayOfMonth = 1;
+      //     beginMonth = 1; // TODO base off month precision, if applicable
+      //   }
+      // } else {
+      //   beginDate = null;
+      //   beginYear = null;
+      //   beginDayOfYear = null;
+      //   beginDayOfMonth = null;
+      //   beginMonth = null;
+      // }
+      // if (analysis.getEndDescriptor() == VALID && analysis.getEndIndexable()) {
+      //   endDate = analysis.getEndUtcDateTimeString();
+      //   endYear = analysis.getEndYear();
+      //   var precision = analysis.getEndPrecision();
+      //   if (precision.equals(ChronoUnit.DAYS.toString())) {
+      //     endDayOfYear = analysis.getEndDayOfYear();
+      //     endDayOfMonth = analysis.getEndDayOfMonth();
+      //     endMonth = analysis.getEndMonth();
+      //   } else { // TODO this implies other precision checks (begin date) are also needed
+      //     endDayOfYear = 365; // TODO leap years
+      //     endDayOfMonth = 31; // TODO base off month precision, if applicable
+      //     endMonth = 12; // TODO base off month precision, if applicable
+      //   }
+      // } else {
+      //   endDate = null;
+      //   endYear = null;
+      //   endDayOfYear = null;
+      //   endDayOfMonth = null;
+      //   endMonth = null;
+      // }
     }
+
+    // result.put("beginDate", beginDate);
+    // result.put("beginYear", beginYear);
+    // result.put("beginDayOfYear", beginDayOfYear);
+    // result.put("beginDayOfMonth", beginDayOfMonth);
+    // result.put("beginMonth", beginMonth);
+    // result.putAll(parseAdditionalTimeFields("begin", beginDate));
+
+    // result.put("endDate", endDate);
+    // result.put("endYear", endYear);
+    // result.put("endDayOfYear", endDayOfYear);
+    // result.put("endDayOfMonth", endDayOfMonth);
+    // result.put("endMonth", endMonth);
+    // result.putAll(parseAdditionalTimeFields("end", endDate));
+
+    return result;
   }
 
+  // private static HashMap<String, Object> parseAdditionalTimeFields(String prefix, String time){
+  //   var result = new HashMap<String, Object>();
+  //   try {
+  //
+  //     Integer dayOfYear, dayOfMonth, month;
+  //     if (time != null) {
+  //       ZonedDateTime dateTime = ZonedDateTime.parse(time);
+  //
+  //       dayOfYear = dateTime.getDayOfYear();
+  //       dayOfMonth = dateTime.getDayOfMonth();
+  //       month = dateTime.getMonthValue();
+  //     }
+  //     else {
+  //       dayOfYear = null;
+  //       dayOfMonth = null;
+  //       month = null;
+  //     }
+  //
+  //     result.put(prefix + "DayOfYear", dayOfYear);
+  //     result.put(prefix + "DayOfMonth", dayOfMonth);
+  //     result.put(prefix + "Month", month);
+  //   } catch (Exception e) {} // TODO temporary
+  //   return result;
+  // }
+
+  // private static Long parseYear(String utcDateTime) {
+  //   if (StringUtils.isBlank(utcDateTime)) {
+  //     return null;
+  //   } else {
+  //     // Watch out for BCE years
+  //     return Long.parseLong(utcDateTime.substring(0, utcDateTime.indexOf('-', 1)));
+  //   }
+  // }
+
   ////////////////////////////
   // Keywords               //
   ////////////////////////////
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index 00db07fa7..b0792a5e2 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -349,25 +349,58 @@ class TransformationUtilsSpec extends Specification {
   ////////////////////////////
   // Dates                  //
   ////////////////////////////
-  def "When #situation.description, expected temporal bounding generated"() {
+  def "when #label, expected temporal bounding generated"() {
     when:
-    def newTimeMetadata = TransformationUtils.prepareDates(situation.bounding, situation.analysis)
-
+    def discovery = Discovery.newBuilder().setTemporalBounding(input).build()
+    def newTimeMetadata = TransformationUtils.prepareDates(input, Temporal.analyzeBounding(discovery))
+    println("debug"+label)
+    println(Temporal.analyzeBounding(discovery))
     then:
-    newTimeMetadata.sort() == expectedResult
+    newTimeMetadata.beginDate == beginDate
+    newTimeMetadata.beginYear == beginYear
+    newTimeMetadata.beginDayOfYear == beginDayOfYear
+    newTimeMetadata.beginDayOfMonth == beginDayOfMonth
+    newTimeMetadata.beginMonth == beginMonth
+    newTimeMetadata.endDate == endDate
+    newTimeMetadata.endYear == endYear
+    newTimeMetadata.endDayOfYear == endDayOfYear
+    newTimeMetadata.endDayOfMonth == endDayOfMonth
+    newTimeMetadata.endMonth == endMonth
 
     where:
-    situation               | expectedResult
-    situations.instantDay   | [beginDate: '1999-12-31T00:00:00Z', beginYear: 1999, beginDayOfYear: 365, beginDayOfMonth: 31, beginMonth: 12, endDate: '1999-12-31T23:59:59Z', endYear: 1999, endDayOfYear:365, endDayOfMonth:31, endMonth:12].sort()
-    situations.instantYear  | [beginDate: '1999-01-01T00:00:00Z', beginYear: 1999, beginDayOfYear: 1, beginDayOfMonth:1, beginMonth: 1, endDate: '1999-12-31T23:59:59Z', endYear: 1999, endDayOfMonth:31, endDayOfYear:365, endMonth:12].sort()
-    situations.instantPaleo | [beginDate: null, endDate: null, beginYear: -1000000000, endYear: -1000000000, beginDayOfYear: null, beginDayOfMonth:null, beginMonth: null, endDayOfYear: null, endDayOfMonth:null, endMonth:null].sort()
-    situations.instantNano  | [beginDate: '2008-04-01T00:00:00Z', beginYear: 2008, beginDayOfYear: 92, beginDayOfMonth:1, beginMonth: 4, endDate: '2008-04-01T00:00:00Z', endYear: 2008,  endDayOfYear: 92, endDayOfMonth:1, endMonth:4].sort()
-    situations.bounded      | [beginDate: '1900-01-01T00:00:00Z',  beginYear: 1900, beginDayOfYear: 1, beginDayOfMonth:1, beginMonth: 1, endDate: '2009-12-31T23:59:59Z', endYear: 2009, endDayOfYear:365, endDayOfMonth:31, endMonth:12].sort()
-    situations.paleoBounded | [beginDate: null, endDate: null, beginYear: -2000000000, endYear: -1000000000, beginDayOfYear: null, beginDayOfMonth:null, beginMonth: null, endDayOfYear: null, endDayOfMonth:null, endMonth:null].sort()
-    situations.ongoing      | [beginDate: "1975-06-15T12:30:00Z", beginDayOfMonth:15, beginDayOfYear:166, beginMonth:6, beginYear:1975, endDate:null, endYear:null, endDayOfYear: null, endDayOfMonth: null, endMonth: null].sort()
-    situations.empty        | [beginDate: null, endDate: null, beginYear: null, endYear: null, beginDayOfYear: null, beginDayOfMonth:null, beginMonth: null, endDayOfYear: null, endDayOfMonth:null, endMonth:null].sort()
+    label | input | beginDate | beginYear | beginDayOfYear | beginDayOfMonth | beginMonth | endDate | endYear | endDayOfYear | endDayOfMonth | endMonth
+
+    "undefined range" | TemporalBounding.newBuilder().build() | null | null | null | null | null | null | null | null | null | null
+    "non-paleo bounded range with day and year precision" | TemporalBounding.newBuilder().setBeginDate('1900-01-01').setEndDate('2009').build() | '1900-01-01T00:00:00Z' | 1900 | 1 | 1 | 1 | '2009-12-31T23:59:59.999Z' | 2009 | 365 | 31 | 12 // TODO does this assumption re end date make sense, really? TODO had to add the .999 to endDate - why and is that good/bad/other?
+    "paleo bounded range" | TemporalBounding.newBuilder().setBeginDate('-2000000000').setEndDate('-1000000000').build() | null | -2000000000 | null | null | null | null | -1000000000 | null | null | null
+    "ongoing range with second precision for begin" | TemporalBounding.newBuilder().setBeginDate('1975-06-15T12:30:00Z').build() | "1975-06-15T12:30:00Z" | 1975 | 166 | 15 | 6 | null | null | null | null | null
+    // INSTANTS:
+    "non-paleo instant with years precision" | TemporalBounding.newBuilder().setInstant('1999').build() | '1999-01-01T00:00:00Z' | 1999 | 1 | 1 | 1 | '1999-12-31T23:59:59Z' | 1999 | 365 | 31 | 12
+    "non-paleo instant with days precision" | TemporalBounding.newBuilder().setInstant('1999-12-31').build() | '1999-12-31T00:00:00Z' | 1999 | 365 | 31 | 12 | '1999-12-31T23:59:59Z' | 1999 | 365 | 31 | 12
+    "paleo instant with years precision" | TemporalBounding.newBuilder().setInstant('-1000000000').build() | null | -1000000000 | null | null | null | null | -1000000000 | null | null | null // TODO I think this is a bug in analysis, that it doesn't populate instantYears
+    "non-paleo instant with nanos precision" | TemporalBounding.newBuilder().setInstant('2008-04-01T00:00:00Z').build() | '2008-04-01T00:00:00Z' | 2008 | 92 | 1 | 4 | '2008-04-01T00:00:00Z' | 2008 | 92 | 1 | 4
   }
 
+  // def "When #situation.description, expected temporal bounding generated"() {
+  //   when:
+  //   def discovery = Discovery.newBuilder().setTemporalBounding(situation.bounding).build()
+  //   def newTimeMetadata = TransformationUtils.prepareDates(situation.bounding, Temporal.analyzeBounding(discovery))
+  //
+  //   then:
+  //   newTimeMetadata.sort() == expectedResult
+  //
+  //   where:
+  //   situation               | expectedResult
+  //   // situations.instantDay   | [beginDate: '1999-12-31T00:00:00Z', beginYear: 1999, beginDayOfYear: 365, beginDayOfMonth: 31, beginMonth: 12, endDate: '1999-12-31T23:59:59Z', endYear: 1999, endDayOfYear:365, endDayOfMonth:31, endMonth:12].sort()
+  //   // situations.instantYear  | [beginDate: '1999-01-01T00:00:00Z', beginYear: 1999, beginDayOfYear: 1, beginDayOfMonth:1, beginMonth: 1, endDate: '1999-12-31T23:59:59Z', endYear: 1999, endDayOfMonth:31, endDayOfYear:365, endMonth:12].sort()
+  //   // situations.instantPaleo | [beginDate: null, endDate: null, beginYear: -1000000000, endYear: -1000000000, beginDayOfYear: null, beginDayOfMonth:null, beginMonth: null, endDayOfYear: null, endDayOfMonth:null, endMonth:null].sort()
+  //   // situations.instantNano  | [beginDate: '2008-04-01T00:00:00Z', beginYear: 2008, beginDayOfYear: 92, beginDayOfMonth:1, beginMonth: 4, endDate: '2008-04-01T00:00:00Z', endYear: 2008,  endDayOfYear: 92, endDayOfMonth:1, endMonth:4].sort()
+  //   // situations.bounded      | [beginDate: '1900-01-01T00:00:00Z',  beginYear: 1900, beginDayOfYear: 1, beginDayOfMonth:1, beginMonth: 1, endDate: '2009-12-31T23:59:59Z', endYear: 2009, endDayOfYear:365, endDayOfMonth:31, endMonth:12].sort() // TODO does this assumption re end date make sense, really?
+  //   // situations.paleoBounded | [beginDate: null, endDate: null, beginYear: -2000000000, endYear: -1000000000, beginDayOfYear: null, beginDayOfMonth:null, beginMonth: null, endDayOfYear: null, endDayOfMonth:null, endMonth:null].sort()
+  //   // situations.ongoing      | [beginDate: "1975-06-15T12:30:00Z", beginDayOfMonth:15, beginDayOfYear:166, beginMonth:6, beginYear:1975, endDate:null, endYear:null, endDayOfYear: null, endDayOfMonth: null, endMonth: null].sort()
+  //   situations.empty        | [beginDate: null, endDate: null, beginYear: null, endYear: null, beginDayOfYear: null, beginDayOfMonth:null, beginMonth: null, endDayOfYear: null, endDayOfMonth:null, endMonth:null].sort()
+  // }
+
   def "temporal bounding with #testCase dates is prepared correctly"() {
     given:
     def bounding = TemporalBounding.newBuilder().setBeginDate(begin).setEndDate(end).build()

From ca62c8159c5caf4ab1340e24b952f023c4fb59ef Mon Sep 17 00:00:00 2001
From: Erin <erin.rowland@colorado.edu>
Date: Tue, 23 Jun 2020 12:09:21 -0600
Subject: [PATCH 22/29] Added null check for passed in analysis in
 prepareTemporalBounding

---
 .../org/cedar/onestop/indexer/util/TransformationUtils.java  | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index 27434082e..46105be49 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -340,6 +340,11 @@ private static Map<String, Set<String>> prepareResponsibleParties(ParsedRecord r
 
   private static Map<String, Object> prepareTemporalBounding(TemporalBoundingAnalysis analysis) {
     var result = new HashMap<String, Object>();
+
+    if (analysis == null) {
+      return result;
+    }
+
     result.put("beginDescriptor", analysis.getBeginDescriptor());
     result.put("beginIndexable", analysis.getBeginIndexable());
     result.put("beginPrecision", analysis.getBeginPrecision());

From bd73edb3d1e6b29a37f7c40a049a4c80818f9a5d Mon Sep 17 00:00:00 2001
From: Erin <erin.rowland@colorado.edu>
Date: Tue, 23 Jun 2020 12:12:10 -0600
Subject: [PATCH 23/29] npm run format

---
 client/src/components/collections/detail/GranulesSummary.jsx | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/client/src/components/collections/detail/GranulesSummary.jsx b/client/src/components/collections/detail/GranulesSummary.jsx
index 5ba814d26..2e8b3f589 100644
--- a/client/src/components/collections/detail/GranulesSummary.jsx
+++ b/client/src/components/collections/detail/GranulesSummary.jsx
@@ -48,7 +48,9 @@ export default class GranulesSummary extends React.Component {
 
     const linkText =
       loading || totalGranuleFilteredCount == totalGranuleCount
-        ? `Show all ${totalGranuleCount? totalGranuleCount.toLocaleString(): '0'} files in collection`
+        ? `Show all ${totalGranuleCount
+            ? totalGranuleCount.toLocaleString()
+            : '0'} files in collection`
         : `Show ${totalGranuleFilteredCount} matching files of ${totalGranuleCount} in collection`
 
     // TODO 508 this should probably be a link, not a button

From 65b0a3a17b4b0dbc3925bf5a924d051f81c111fd Mon Sep 17 00:00:00 2001
From: Erin <erin.rowland@colorado.edu>
Date: Tue, 23 Jun 2020 12:21:16 -0600
Subject: [PATCH 24/29] Added test reformatMessageForAnalysis output missing
 parentIdentifierString

---
 .../util/TransformationUtilsSpec.groovy       | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index 00db07fa7..f0544a2bd 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -3,6 +3,7 @@ package org.cedar.onestop.indexer.util
 import org.cedar.schemas.analyze.Analyzers
 import org.cedar.schemas.analyze.Temporal
 import org.cedar.schemas.avro.psi.Analysis
+import org.cedar.schemas.avro.psi.IdentificationAnalysis
 import org.cedar.schemas.avro.psi.TemporalBoundingAnalysis
 import org.cedar.schemas.avro.psi.ValidDescriptor
 import org.cedar.schemas.avro.psi.Checksum
@@ -137,6 +138,34 @@ class TransformationUtilsSpec extends Specification {
 
   }
 
+  def "reformatMessageForAnalysis populates #label"() {
+    String identifier = 'gov.noaa.nodc:0173643'
+    when:
+    def identificationAnalysis = IdentificationAnalysis.newBuilder()
+        .setFileIdentifierExists(true)
+        .setDoiExists(false)
+        .setParentIdentifierString(identifier)
+        .build()
+    def analysis = Analysis.newBuilder().setIdentification(identificationAnalysis).build()
+    ParsedRecord record = ParsedRecord.newBuilder().setType(type).setAnalysis(analysis).build()
+
+    def indexedRecord = TransformationUtils.reformatMessageForAnalysis(record, fields, RecordType.granule)
+
+    then:
+    println(label)
+    println(JsonOutput.toJson(AvroUtils.avroToMap(record.getAnalysis(), true)))
+    println(JsonOutput.toJson(indexedRecord))
+    indexedRecord.each {
+      key, value -> println ("key=$key value=$value")
+    }
+
+    indexedRecord?.identification?.parentIdentifierString == identifier
+
+    where:
+    label                                     | fields                        | type
+    'collections with parentIdentifierString' | collectionAnalysisErrorFields | RecordType.collection
+    'granules with parentIdentifierString'    | granuleAnalysisErrorFields    | RecordType.granule
+  }
 
   def "reformatMessageForSearch populates with correct fields for #label"() {
     when:

From 95a943acb11510c6af14bb80b6c9397cb34aa430 Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Fri, 26 Jun 2020 15:31:45 -0600
Subject: [PATCH 25/29] Move more logic into analysis - TODO some things may
 still be renamed.

---
 .../indexer/util/TransformationUtils.java     | 224 ++----------------
 .../util/TransformationUtilsSpec.groovy       |  40 +---
 2 files changed, 28 insertions(+), 236 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index de501f5db..b67818e03 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -17,9 +17,6 @@
 
 import static org.cedar.schemas.avro.psi.ValidDescriptor.UNDEFINED;
 import static org.cedar.schemas.avro.psi.ValidDescriptor.VALID;
-// import org.cedar.schemas.analyze.Temporal;
-//
-// import java.time.temporal.ChronoField;
 
 import org.cedar.onestop.kafka.common.util.DataUtils;
 
@@ -373,52 +370,17 @@ private static Map<String, Object> prepareDatesForInstant(TemporalBounding bound
       // paleo dates are not indexable, so don't add beginDate or endDate to the index
       beginDate = null;
       endDate = null;
-      beginDayOfYear = null;
-      beginDayOfMonth = null;
-      beginMonth = null;
-      endDayOfYear = null;
-      endDayOfMonth = null;
-      endMonth = null;
     } else {
       beginDate = analysis.getInstantUtcDateTimeString();
-      var precision = analysis.getInstantPrecision();
-      if (precision.equals(ChronoUnit.DAYS.toString())) {
-        // End of day
-        endDate = bounding.getInstant() + "T23:59:59Z";
-      } else if (precision.equals(ChronoUnit.YEARS.toString())) {
-        // Last day of year + end of day
-        endDate = bounding.getInstant() + "-12-31T23:59:59Z";
-      } else {
-        // Precision is NANOS so use instant value as-is
-        // TODO hopefully it's not a weird rare precision like "month"
-        endDate = beginDate;
-      }
-
-      if (analysis.getInstantDayOfYear() != null) {
-        beginDayOfYear = analysis.getInstantDayOfYear();
-        endDayOfYear = analysis.getInstantDayOfYear();
-      } else {
-        beginDayOfYear = 1;
-        endDayOfYear = 365; // TODO leap year
-      }
-
-      if (analysis.getInstantDayOfMonth() != null) {
-        beginDayOfMonth = analysis.getInstantDayOfMonth();
-        endDayOfMonth = analysis.getInstantDayOfMonth();
-      } else {
-        beginDayOfMonth = 1;
-        endDayOfMonth = 31; // TODO depends on if there is a month but no day, but for the moment I'm assuming it's Year or Day precision, but not month...
-      }
-
-      if (analysis.getInstantMonth() != null) {
-        beginMonth = analysis.getInstantMonth();
-        endMonth = analysis.getInstantMonth();
-      } else {
-        beginMonth = 1;
-        endMonth = 12;
-      }
+      endDate = analysis.getInstantEndUtcDateTimeString();
     }
 
+    beginDayOfYear = analysis.getInstantDayOfYear();
+    endDayOfYear = analysis.getInstantEndDayOfYear();
+    beginMonth = analysis.getInstantMonth();
+    endMonth = analysis.getInstantEndMonth();
+    beginDayOfMonth = analysis.getInstantDayOfMonth();
+    endDayOfMonth = analysis.getInstantEndDayOfMonth();
     beginYear = analysis.getInstantYear();
     endYear = analysis.getInstantYear();
 
@@ -444,44 +406,12 @@ private static Map<String, Object> prepareBeginDate(TemporalBounding bounding, T
     if (analysis.getBeginDescriptor() == VALID) {
       if (analysis.getBeginIndexable()) {
         result.put("beginDate", analysis.getBeginUtcDateTimeString());
-
-        var precision = analysis.getBeginPrecision();
-
-        // if (Temporal.extractField(parsedDate, ChronoField.DAY_OF_YEAR) != null) {
-        if (precision.equals(ChronoUnit.DAYS.toString()) || precision.equals(ChronoUnit.NANOS.toString())) {
-          beginDayOfYear = analysis.getBeginDayOfYear();
-          beginDayOfMonth = analysis.getBeginDayOfMonth();
-          beginMonth = analysis.getBeginMonth();
-        }
-        else {
-          beginDayOfYear = 1;
-          beginDayOfMonth = 1;
-          beginMonth = 1; // TODO base off month precision, if applicable
-        }
-
-      } else {
-        beginDayOfYear = null;
-        beginDayOfMonth = null;
-        beginMonth = null;
       }
-      result.put("beginYear", analysis.getBeginYear());
-
-      // if (precision.equals(ChronoUnit.DAYS.toString())) {
-      //   beginDayOfYear = analysis.getBeginDayOfYear();
-      //   beginDayOfMonth = analysis.getBeginDayOfMonth();
-      //   beginMonth = analysis.getBeginMonth();
-      // }
-      // // else {
-      // //   beginDayOfYear = 1;
-      // //   beginDayOfMonth = 1;
-      // //   beginMonth = 1; // TODO base off month precision, if applicable
-      // // }
-      // else {
-      //   beginDayOfYear = null;
-      //   beginDayOfMonth = null;
-      //   beginMonth = null;
-      // }
+      beginDayOfYear = analysis.getBeginDayOfYear();
+      beginDayOfMonth = analysis.getBeginDayOfMonth();
+      beginMonth = analysis.getBeginMonth();
 
+      result.put("beginYear", analysis.getBeginYear());
       result.put("beginDayOfYear", beginDayOfYear);
       result.put("beginDayOfMonth", beginDayOfMonth);
       result.put("beginMonth", beginMonth);
@@ -496,42 +426,12 @@ private static Map<String, Object> prepareEndDate(TemporalBounding bounding, Tem
     if (analysis.getEndDescriptor() == VALID) {
       if (analysis.getEndIndexable()) {
         result.put("endDate", analysis.getEndUtcDateTimeString());
-
-        var precision = analysis.getEndPrecision();
-        if (precision.equals(ChronoUnit.DAYS.toString())) {
-          endDayOfYear = analysis.getEndDayOfYear();
-          endDayOfMonth = analysis.getEndDayOfMonth();
-          endMonth = analysis.getEndMonth();
-        }
-        else { // TODO this implies other precision checks (begin date) are also needed
-          endDayOfYear = 365; // TODO leap years
-          endDayOfMonth = 31; // TODO base off month precision, if applicable
-          endMonth = 12; // TODO base off month precision, if applicable
-        }
-      } else {
-        endDayOfYear = null;
-        endDayOfMonth = null;
-        endMonth = null;
       }
-      result.put("endYear", analysis.getEndYear());
-
-
-      // if (precision.equals(ChronoUnit.DAYS.toString())) {
-      //   endDayOfYear = analysis.getEndDayOfYear();
-      //   endDayOfMonth = analysis.getEndDayOfMonth();
-      //   endMonth = analysis.getEndMonth();
-      // }
-      // // else { // TODO this implies other precision checks (begin date) are also needed
-      //   // endDayOfYear = 365; // TODO leap years
-      //   // endDayOfMonth = 31; // TODO base off month precision, if applicable
-      //   // endMonth = 12; // TODO base off month precision, if applicable
-      // // }
-      // else {
-      //   endDayOfYear = null;
-      //   endDayOfMonth = null;
-      //   endMonth = null;
-      // }
 
+      endDayOfYear = analysis.getEndDayOfYear();
+      endDayOfMonth = analysis.getEndDayOfMonth();
+      endMonth = analysis.getEndMonth();
+      result.put("endYear", analysis.getEndYear());
       result.put("endDayOfYear", endDayOfYear);
       result.put("endDayOfMonth", endDayOfMonth);
       result.put("endMonth", endMonth);
@@ -541,114 +441,22 @@ private static Map<String, Object> prepareEndDate(TemporalBounding bounding, Tem
 
   private static Map<String, Object> prepareDates(TemporalBounding bounding, TemporalBoundingAnalysis analysis) {
     String beginDate, endDate;
-    // Long year;
     Long beginYear, endYear;
     Integer beginDayOfYear, beginDayOfMonth, beginMonth;
     Integer endDayOfYear, endDayOfMonth, endMonth;
     var result = new HashMap<String, Object>();
 
     // If bounding is actually an instant, set search fields accordingly
-    if (analysis.getRangeDescriptor() == TimeRangeDescriptor.INSTANT && analysis.getBeginDescriptor() == UNDEFINED) {
+    if (analysis.getRangeDescriptor() == TimeRangeDescriptor.INSTANT && analysis.getBeginDescriptor() == UNDEFINED) { // distinguished getting begin and end date that were exactly the same (also described as instant)
       return prepareDatesForInstant(bounding, analysis);
     } else {
-      // If dates exist (thus VALID) and are indexable use value from analysis block where dates are UTC datetime normalized,
-      // else only set the year values as this is indicative of a paleo date TODO does this all behave the same now?
-
       result.putAll(prepareBeginDate(bounding, analysis));
       result.putAll(prepareEndDate(bounding, analysis));
-      // if (analysis.getBeginDescriptor() == VALID && analysis.getBeginIndexable()) {
-      //   beginDate = analysis.getBeginUtcDateTimeString();
-      //   beginYear = analysis.getBeginYear();
-      //   var precision = analysis.getBeginPrecision();
-      //   if (precision.equals(ChronoUnit.DAYS.toString())) {
-      //     beginDayOfYear = analysis.getBeginDayOfYear();
-      //     beginDayOfMonth = analysis.getBeginDayOfMonth();
-      //     beginMonth = analysis.getBeginMonth();
-      //   } else {
-      //     beginDayOfYear = 1;
-      //     beginDayOfMonth = 1;
-      //     beginMonth = 1; // TODO base off month precision, if applicable
-      //   }
-      // } else {
-      //   beginDate = null;
-      //   beginYear = null;
-      //   beginDayOfYear = null;
-      //   beginDayOfMonth = null;
-      //   beginMonth = null;
-      // }
-      // if (analysis.getEndDescriptor() == VALID && analysis.getEndIndexable()) {
-      //   endDate = analysis.getEndUtcDateTimeString();
-      //   endYear = analysis.getEndYear();
-      //   var precision = analysis.getEndPrecision();
-      //   if (precision.equals(ChronoUnit.DAYS.toString())) {
-      //     endDayOfYear = analysis.getEndDayOfYear();
-      //     endDayOfMonth = analysis.getEndDayOfMonth();
-      //     endMonth = analysis.getEndMonth();
-      //   } else { // TODO this implies other precision checks (begin date) are also needed
-      //     endDayOfYear = 365; // TODO leap years
-      //     endDayOfMonth = 31; // TODO base off month precision, if applicable
-      //     endMonth = 12; // TODO base off month precision, if applicable
-      //   }
-      // } else {
-      //   endDate = null;
-      //   endYear = null;
-      //   endDayOfYear = null;
-      //   endDayOfMonth = null;
-      //   endMonth = null;
-      // }
     }
 
-    // result.put("beginDate", beginDate);
-    // result.put("beginYear", beginYear);
-    // result.put("beginDayOfYear", beginDayOfYear);
-    // result.put("beginDayOfMonth", beginDayOfMonth);
-    // result.put("beginMonth", beginMonth);
-    // result.putAll(parseAdditionalTimeFields("begin", beginDate));
-
-    // result.put("endDate", endDate);
-    // result.put("endYear", endYear);
-    // result.put("endDayOfYear", endDayOfYear);
-    // result.put("endDayOfMonth", endDayOfMonth);
-    // result.put("endMonth", endMonth);
-    // result.putAll(parseAdditionalTimeFields("end", endDate));
-
     return result;
   }
 
-  // private static HashMap<String, Object> parseAdditionalTimeFields(String prefix, String time){
-  //   var result = new HashMap<String, Object>();
-  //   try {
-  //
-  //     Integer dayOfYear, dayOfMonth, month;
-  //     if (time != null) {
-  //       ZonedDateTime dateTime = ZonedDateTime.parse(time);
-  //
-  //       dayOfYear = dateTime.getDayOfYear();
-  //       dayOfMonth = dateTime.getDayOfMonth();
-  //       month = dateTime.getMonthValue();
-  //     }
-  //     else {
-  //       dayOfYear = null;
-  //       dayOfMonth = null;
-  //       month = null;
-  //     }
-  //
-  //     result.put(prefix + "DayOfYear", dayOfYear);
-  //     result.put(prefix + "DayOfMonth", dayOfMonth);
-  //     result.put(prefix + "Month", month);
-  //   } catch (Exception e) {} // TODO temporary
-  //   return result;
-  // }
-
-  // private static Long parseYear(String utcDateTime) {
-  //   if (StringUtils.isBlank(utcDateTime)) {
-  //     return null;
-  //   } else {
-  //     // Watch out for BCE years
-  //     return Long.parseLong(utcDateTime.substring(0, utcDateTime.indexOf('-', 1)));
-  //   }
-  // }
-
   ////////////////////////////
   // Keywords               //
   ////////////////////////////
diff --git a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
index b0792a5e2..885b49441 100644
--- a/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
+++ b/indexer/src/test/groovy/org/cedar/onestop/indexer/util/TransformationUtilsSpec.groovy
@@ -22,8 +22,6 @@ import groovy.json.JsonOutput
 import groovy.json.JsonSlurper
 import org.cedar.schemas.avro.util.AvroUtils
 
-import static org.cedar.schemas.avro.util.TemporalTestData.getSituations
-
 import org.cedar.onestop.kafka.common.util.DataUtils;
 
 @Unroll
@@ -349,12 +347,14 @@ class TransformationUtilsSpec extends Specification {
   ////////////////////////////
   // Dates                  //
   ////////////////////////////
+
   def "when #label, expected temporal bounding generated"() {
     when:
     def discovery = Discovery.newBuilder().setTemporalBounding(input).build()
     def newTimeMetadata = TransformationUtils.prepareDates(input, Temporal.analyzeBounding(discovery))
-    println("debug"+label)
-    println(Temporal.analyzeBounding(discovery))
+
+    println("debug " + label + ": " + Temporal.analyzeBounding(discovery))
+
     then:
     newTimeMetadata.beginDate == beginDate
     newTimeMetadata.beginYear == beginYear
@@ -371,36 +371,20 @@ class TransformationUtilsSpec extends Specification {
     label | input | beginDate | beginYear | beginDayOfYear | beginDayOfMonth | beginMonth | endDate | endYear | endDayOfYear | endDayOfMonth | endMonth
 
     "undefined range" | TemporalBounding.newBuilder().build() | null | null | null | null | null | null | null | null | null | null
-    "non-paleo bounded range with day and year precision" | TemporalBounding.newBuilder().setBeginDate('1900-01-01').setEndDate('2009').build() | '1900-01-01T00:00:00Z' | 1900 | 1 | 1 | 1 | '2009-12-31T23:59:59.999Z' | 2009 | 365 | 31 | 12 // TODO does this assumption re end date make sense, really? TODO had to add the .999 to endDate - why and is that good/bad/other?
+    "non-paleo bounded range with day and year precision" | TemporalBounding.newBuilder().setBeginDate('1900-01-01').setEndDate('2009').build() | '1900-01-01T00:00:00Z' | 1900 | 1 | 1 | 1 | '2009-12-31T23:59:59.999Z' | 2009 | 365 | 31 | 12
     "paleo bounded range" | TemporalBounding.newBuilder().setBeginDate('-2000000000').setEndDate('-1000000000').build() | null | -2000000000 | null | null | null | null | -1000000000 | null | null | null
     "ongoing range with second precision for begin" | TemporalBounding.newBuilder().setBeginDate('1975-06-15T12:30:00Z').build() | "1975-06-15T12:30:00Z" | 1975 | 166 | 15 | 6 | null | null | null | null | null
     // INSTANTS:
-    "non-paleo instant with years precision" | TemporalBounding.newBuilder().setInstant('1999').build() | '1999-01-01T00:00:00Z' | 1999 | 1 | 1 | 1 | '1999-12-31T23:59:59Z' | 1999 | 365 | 31 | 12
-    "non-paleo instant with days precision" | TemporalBounding.newBuilder().setInstant('1999-12-31').build() | '1999-12-31T00:00:00Z' | 1999 | 365 | 31 | 12 | '1999-12-31T23:59:59Z' | 1999 | 365 | 31 | 12
-    "paleo instant with years precision" | TemporalBounding.newBuilder().setInstant('-1000000000').build() | null | -1000000000 | null | null | null | null | -1000000000 | null | null | null // TODO I think this is a bug in analysis, that it doesn't populate instantYears
+    "instant leapyear" | TemporalBounding.newBuilder().setInstant('2004').build() | '2004-01-01T00:00:00Z' | 2004 | 1 | 1 | 1 | '2004-12-31T23:59:59.999Z' | 2004 | 366 | 31 | 12
+    "instant with month precision" | TemporalBounding.newBuilder().setInstant('1999-02').build() | '1999-02-01T00:00:00Z' | 1999 | 32 | 1 | 2 | '1999-02-28T23:59:59.999Z' | 1999 | 59 | 28 | 2
+    "instant on leapyear with month precision" | TemporalBounding.newBuilder().setInstant('2004-02').build() | '2004-02-01T00:00:00Z' | 2004 | 32 | 1 | 2 | '2004-02-29T23:59:59.999Z' | 2004 | 60 | 29 | 2
+    "instant set with begin and end date matching" | TemporalBounding.newBuilder().setBeginDate('1994-07-20T13:22:00Z').setEndDate('1994-07-20T13:22:00Z').build() | '1994-07-20T13:22:00Z' | 1994 | 201 | 20 | 7 | '1994-07-20T13:22:00Z' | 1994 | 201 | 20 | 7
+    "non-paleo instant with years precision" | TemporalBounding.newBuilder().setInstant('1999').build() | '1999-01-01T00:00:00Z' | 1999 | 1 | 1 | 1 | '1999-12-31T23:59:59.999Z' | 1999 | 365 | 31 | 12
+    "non-paleo instant with days precision" | TemporalBounding.newBuilder().setInstant('1999-12-31').build() | '1999-12-31T00:00:00Z' | 1999 | 365 | 31 | 12 | '1999-12-31T23:59:59.999Z' | 1999 | 365 | 31 | 12
+    "paleo instant with years precision" | TemporalBounding.newBuilder().setInstant('-1000000000').build() | null | -1000000000 | null | null | null | null | -1000000000 | null | null | null
     "non-paleo instant with nanos precision" | TemporalBounding.newBuilder().setInstant('2008-04-01T00:00:00Z').build() | '2008-04-01T00:00:00Z' | 2008 | 92 | 1 | 4 | '2008-04-01T00:00:00Z' | 2008 | 92 | 1 | 4
   }
 
-  // def "When #situation.description, expected temporal bounding generated"() {
-  //   when:
-  //   def discovery = Discovery.newBuilder().setTemporalBounding(situation.bounding).build()
-  //   def newTimeMetadata = TransformationUtils.prepareDates(situation.bounding, Temporal.analyzeBounding(discovery))
-  //
-  //   then:
-  //   newTimeMetadata.sort() == expectedResult
-  //
-  //   where:
-  //   situation               | expectedResult
-  //   // situations.instantDay   | [beginDate: '1999-12-31T00:00:00Z', beginYear: 1999, beginDayOfYear: 365, beginDayOfMonth: 31, beginMonth: 12, endDate: '1999-12-31T23:59:59Z', endYear: 1999, endDayOfYear:365, endDayOfMonth:31, endMonth:12].sort()
-  //   // situations.instantYear  | [beginDate: '1999-01-01T00:00:00Z', beginYear: 1999, beginDayOfYear: 1, beginDayOfMonth:1, beginMonth: 1, endDate: '1999-12-31T23:59:59Z', endYear: 1999, endDayOfMonth:31, endDayOfYear:365, endMonth:12].sort()
-  //   // situations.instantPaleo | [beginDate: null, endDate: null, beginYear: -1000000000, endYear: -1000000000, beginDayOfYear: null, beginDayOfMonth:null, beginMonth: null, endDayOfYear: null, endDayOfMonth:null, endMonth:null].sort()
-  //   // situations.instantNano  | [beginDate: '2008-04-01T00:00:00Z', beginYear: 2008, beginDayOfYear: 92, beginDayOfMonth:1, beginMonth: 4, endDate: '2008-04-01T00:00:00Z', endYear: 2008,  endDayOfYear: 92, endDayOfMonth:1, endMonth:4].sort()
-  //   // situations.bounded      | [beginDate: '1900-01-01T00:00:00Z',  beginYear: 1900, beginDayOfYear: 1, beginDayOfMonth:1, beginMonth: 1, endDate: '2009-12-31T23:59:59Z', endYear: 2009, endDayOfYear:365, endDayOfMonth:31, endMonth:12].sort() // TODO does this assumption re end date make sense, really?
-  //   // situations.paleoBounded | [beginDate: null, endDate: null, beginYear: -2000000000, endYear: -1000000000, beginDayOfYear: null, beginDayOfMonth:null, beginMonth: null, endDayOfYear: null, endDayOfMonth:null, endMonth:null].sort()
-  //   // situations.ongoing      | [beginDate: "1975-06-15T12:30:00Z", beginDayOfMonth:15, beginDayOfYear:166, beginMonth:6, beginYear:1975, endDate:null, endYear:null, endDayOfYear: null, endDayOfMonth: null, endMonth: null].sort()
-  //   situations.empty        | [beginDate: null, endDate: null, beginYear: null, endYear: null, beginDayOfYear: null, beginDayOfMonth:null, beginMonth: null, endDayOfYear: null, endDayOfMonth:null, endMonth:null].sort()
-  // }
-
   def "temporal bounding with #testCase dates is prepared correctly"() {
     given:
     def bounding = TemporalBounding.newBuilder().setBeginDate(begin).setEndDate(end).build()

From 04f543497106084388453a89361e647010ba481a Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Fri, 26 Jun 2020 15:36:41 -0600
Subject: [PATCH 26/29] cleanup

---
 .../indexer/util/TransformationUtils.java     | 70 ++++++-------------
 1 file changed, 20 insertions(+), 50 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index b67818e03..e8bf6aaf4 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -360,94 +360,64 @@ private static Map<String, Object> prepareTemporalBounding(TemporalBoundingAnaly
   }
 
   private static Map<String, Object> prepareDatesForInstant(TemporalBounding bounding, TemporalBoundingAnalysis analysis) {
-    String beginDate, endDate;
-    Long beginYear, endYear;
-    Integer beginDayOfYear, beginDayOfMonth, beginMonth;
-    Integer endDayOfYear, endDayOfMonth, endMonth;
     var result = new HashMap<String, Object>();
 
-    if (!analysis.getInstantIndexable()) {
-      // paleo dates are not indexable, so don't add beginDate or endDate to the index
-      beginDate = null;
-      endDate = null;
-    } else {
-      beginDate = analysis.getInstantUtcDateTimeString();
-      endDate = analysis.getInstantEndUtcDateTimeString();
+    if (analysis.getInstantIndexable()) {
+      // paleo dates are not indexable, so only add beginDate or endDate to the index if instantIndexable
+      result.put("beginDate", analysis.getInstantUtcDateTimeString());
+      result.put("endDate", analysis.getInstantEndUtcDateTimeString());
     }
 
-    beginDayOfYear = analysis.getInstantDayOfYear();
-    endDayOfYear = analysis.getInstantEndDayOfYear();
-    beginMonth = analysis.getInstantMonth();
-    endMonth = analysis.getInstantEndMonth();
-    beginDayOfMonth = analysis.getInstantDayOfMonth();
-    endDayOfMonth = analysis.getInstantEndDayOfMonth();
-    beginYear = analysis.getInstantYear();
-    endYear = analysis.getInstantYear();
-
-    result.put("beginDate", beginDate);
-    result.put("beginYear", beginYear);
-    result.put("beginDayOfYear", beginDayOfYear);
-    result.put("beginDayOfMonth", beginDayOfMonth);
-    result.put("beginMonth", beginMonth);
-
-    result.put("endDate", endDate);
-    result.put("endYear", endYear);
-    result.put("endDayOfYear", endDayOfYear);
-    result.put("endDayOfMonth", endDayOfMonth);
-    result.put("endMonth", endMonth);
+    result.put("beginYear", analysis.getInstantYear());
+    result.put("beginDayOfYear", analysis.getInstantDayOfYear());
+    result.put("beginDayOfMonth", analysis.getInstantDayOfMonth());
+    result.put("beginMonth",  analysis.getInstantMonth());
+
+    result.put("endYear", analysis.getInstantYear());
+    result.put("endDayOfYear", analysis.getInstantEndDayOfYear());
+    result.put("endDayOfMonth", analysis.getInstantEndDayOfMonth());
+    result.put("endMonth", analysis.getInstantEndMonth());
 
     return result;
   }
 
   private static Map<String, Object> prepareBeginDate(TemporalBounding bounding, TemporalBoundingAnalysis analysis) {
     var result = new HashMap<String, Object>();
-    Integer beginDayOfYear, beginDayOfMonth, beginMonth;
 
     if (analysis.getBeginDescriptor() == VALID) {
       if (analysis.getBeginIndexable()) {
         result.put("beginDate", analysis.getBeginUtcDateTimeString());
       }
-      beginDayOfYear = analysis.getBeginDayOfYear();
-      beginDayOfMonth = analysis.getBeginDayOfMonth();
-      beginMonth = analysis.getBeginMonth();
 
       result.put("beginYear", analysis.getBeginYear());
-      result.put("beginDayOfYear", beginDayOfYear);
-      result.put("beginDayOfMonth", beginDayOfMonth);
-      result.put("beginMonth", beginMonth);
+      result.put("beginDayOfYear", analysis.getBeginDayOfYear());
+      result.put("beginDayOfMonth", analysis.getBeginDayOfMonth());
+      result.put("beginMonth", analysis.getBeginMonth());
     }
     return result;
   }
 
   private static Map<String, Object> prepareEndDate(TemporalBounding bounding, TemporalBoundingAnalysis analysis) {
     var result = new HashMap<String, Object>();
-    Integer endDayOfYear, endDayOfMonth, endMonth;
 
     if (analysis.getEndDescriptor() == VALID) {
       if (analysis.getEndIndexable()) {
         result.put("endDate", analysis.getEndUtcDateTimeString());
       }
 
-      endDayOfYear = analysis.getEndDayOfYear();
-      endDayOfMonth = analysis.getEndDayOfMonth();
-      endMonth = analysis.getEndMonth();
       result.put("endYear", analysis.getEndYear());
-      result.put("endDayOfYear", endDayOfYear);
-      result.put("endDayOfMonth", endDayOfMonth);
-      result.put("endMonth", endMonth);
+      result.put("endDayOfYear", analysis.getEndDayOfYear());
+      result.put("endDayOfMonth", analysis.getEndDayOfMonth());
+      result.put("endMonth", analysis.getEndMonth());
     }
     return result;
   }
 
   private static Map<String, Object> prepareDates(TemporalBounding bounding, TemporalBoundingAnalysis analysis) {
-    String beginDate, endDate;
-    Long beginYear, endYear;
-    Integer beginDayOfYear, beginDayOfMonth, beginMonth;
-    Integer endDayOfYear, endDayOfMonth, endMonth;
     var result = new HashMap<String, Object>();
 
     // If bounding is actually an instant, set search fields accordingly
-    if (analysis.getRangeDescriptor() == TimeRangeDescriptor.INSTANT && analysis.getBeginDescriptor() == UNDEFINED) { // distinguished getting begin and end date that were exactly the same (also described as instant)
+    if (analysis.getRangeDescriptor() == TimeRangeDescriptor.INSTANT && analysis.getBeginDescriptor() == UNDEFINED) { // distinguished getting begin and end date that were exactly the same (also described as instant), but in that case need to use prepareBeginDate and prepareEndDate to get data off the correct analysis fields
       return prepareDatesForInstant(bounding, analysis);
     } else {
       result.putAll(prepareBeginDate(bounding, analysis));

From 27f3b6c17b63948ad009b2bbec6da50858502721 Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Tue, 30 Jun 2020 15:01:16 -0600
Subject: [PATCH 27/29] Minor cleanup

---
 .../org/cedar/onestop/indexer/util/TransformationUtils.java | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
index e8bf6aaf4..c7ef47b89 100644
--- a/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
+++ b/indexer/src/main/java/org/cedar/onestop/indexer/util/TransformationUtils.java
@@ -18,10 +18,6 @@
 import static org.cedar.schemas.avro.psi.ValidDescriptor.UNDEFINED;
 import static org.cedar.schemas.avro.psi.ValidDescriptor.VALID;
 
-import org.cedar.onestop.kafka.common.util.DataUtils;
-
-// TODO import org.apache.kafka.streams.StreamsBuilder;
-
 /**
  * This class contains utilities for transforming the contents of the Avro (schemas) records into the appropriate
  * corresponding Elasticsearch mapping format.
@@ -65,7 +61,7 @@ public static Map<String, Object> reformatMessageForAnalysis(ParsedRecord record
 
   public static Map<String, Object> prepareIdentification(IdentificationAnalysis identification, RecordType recordType) {
     var result = new HashMap<String, Object>();
-    var analysis = AvroUtils.avroToMap(identification); // TODO using map because I need javadocs on the IdentificationAnalysis object...
+    var analysis = AvroUtils.avroToMap(identification); // currently using map because couldn't get it working with IdentificationAnalysis object. Worth revisiting at some point.
 
     if (analysis == null) {
       return result;

From af15fa46d3f9bf1bff6ccd6594be18b11ddc9eed Mon Sep 17 00:00:00 2001
From: Zeb <elizabeth.delk@colorado.edu>
Date: Tue, 30 Jun 2020 15:28:20 -0600
Subject: [PATCH 28/29] Added dashboard for errors (requires manual import at
 the moment)

---
 .../src/main/resources/dashboards/errors.ndjson        | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 elastic-common/src/main/resources/dashboards/errors.ndjson

diff --git a/elastic-common/src/main/resources/dashboards/errors.ndjson b/elastic-common/src/main/resources/dashboards/errors.ndjson
new file mode 100644
index 000000000..5c3152530
--- /dev/null
+++ b/elastic-common/src/main/resources/dashboards/errors.ndjson
@@ -0,0 +1,10 @@
+{"attributes":{"fields":"[{\"name\":\"_id\",\"type\":\"string\",\"esTypes\":[\"_id\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"esTypes\":[\"_index\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"esTypes\":[\"_source\"],\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"esTypes\":[\"_type\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"dataAccess.dataAccessExists\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"description.descriptionCharacters\",\"type\":\"number\",\"esTypes\":[\"short\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"description.descriptionExists\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"description.descriptionFleschKincaidReadingGradeLevel\",\"type\":\"number\",\"esTypes\":[\"half_float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"description.descriptionFleschReadingEaseScore\",\"type\":\"number\",\"esTypes\":[\"half_float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"errors.detail\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false,\"subType\":{\"nested\":{\"path\":\"errors\"}}},{\"name\":\"errors.source\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"nested\":{\"path\":\"errors\"}}},{\"name\":\"errors.title\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"nested\":{\"path\":\"errors\"}}},{\"name\":\"identification.doiExists\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"identification.doiString\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"identification.doiString.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"identification.doiString\"}}},{\"name\":\"identification.fileIdentifierExists\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"identification.fileIdentifierString\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"identification.fileIdentifierString.keyword\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true,\"subType\":{\"multi\":{\"parent\":\"identification.fileIdentifierString\"}}},{\"name\":\"identification.hierarchyLevelNameExists\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"identification.isGranule\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"identification.parentIdentifierExists\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"identification.parentIdentifierString\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"internalParentIdentifier\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"spatialBounding.isValid\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"spatialBounding.spatialBoundingExists\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"spatialBounding.validationError\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"stagedDate\",\"type\":\"date\",\"esTypes\":[\"date\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.beginDescriptor\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.beginIndexable\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.beginPrecision\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.beginUtcDateTimeString\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.beginZoneSpecified\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.endDescriptor\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.endIndexable\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.endPrecision\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.endUtcDateTimeString\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.endZoneSpecified\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.instantDescriptor\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.instantIndexable\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.instantPrecision\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.instantUtcDateTimeString\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.instantZoneSpecified\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"temporalBounding.rangeDescriptor\",\"type\":\"string\",\"esTypes\":[\"keyword\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"thumbnail.thumbnailExists\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"titles.alternateTitleCharacters\",\"type\":\"number\",\"esTypes\":[\"short\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"titles.alternateTitleExists\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"titles.alternateTitleFleschKincaidReadingGradeLevel\",\"type\":\"number\",\"esTypes\":[\"half_float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"titles.alternateTitleFleschReadingEaseScore\",\"type\":\"number\",\"esTypes\":[\"half_float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"titles.titleCharacters\",\"type\":\"number\",\"esTypes\":[\"short\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"titles.titleExists\",\"type\":\"boolean\",\"esTypes\":[\"boolean\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"titles.titleFleschKincaidReadingGradeLevel\",\"type\":\"number\",\"esTypes\":[\"half_float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"titles.titleFleschReadingEaseScore\",\"type\":\"number\",\"esTypes\":[\"half_float\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]","timeFieldName":"stagedDate","title":"analysis_error*"},"id":"80f1f940-af44-11ea-a83e-2ff43ec9c891","migrationVersion":{"index-pattern":"7.6.0"},"references":[],"type":"index-pattern","updated_at":"2020-06-16T22:08:50.511Z","version":"WzExMjksMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Mismatched Identifiers [OneStop] [Indexing Errors]","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"Mismatched Identifiers [OneStop] [Indexing Errors]\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"Labels\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":1},{\"type\":\"range\",\"from\":1,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":true,\"subText\":\"\",\"fontSize\":60}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}}],\"bucket\":{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"string\",\"params\":{}}}},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"filters\",\"schema\":\"group\",\"params\":{\"filters\":[{\"input\":{\"query\":\"identification.matchesIdentifiers:false\",\"language\":\"lucene\"},\"label\":\"Mismatched Identifiers\"}]}}]}"},"id":"061539c0-af45-11ea-a83e-2ff43ec9c891","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"80f1f940-af44-11ea-a83e-2ff43ec9c891","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-06-16T22:08:50.511Z","version":"WzExMzMsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Missing Identifiers [OneStop] [Indexing Errors]","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"Missing Identifiers [OneStop] [Indexing Errors]\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"Labels\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":1},{\"type\":\"range\",\"from\":1,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":true,\"subText\":\"\",\"fontSize\":60}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}}],\"bucket\":{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"string\",\"params\":{}}}},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"filters\",\"schema\":\"group\",\"params\":{\"filters\":[{\"input\":{\"query\":\"identification.fileIdentifierExists:false AND identification.doiExists:false\",\"language\":\"lucene\"},\"label\":\"Missing Identifiers\"}]}}]}"},"id":"043cb3c0-af46-11ea-a83e-2ff43ec9c891","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"80f1f940-af44-11ea-a83e-2ff43ec9c891","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-06-16T22:08:50.511Z","version":"WzExMzQsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Number of Analyzed Records [OneStop] [Indexing Errors]","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"Number of Analyzed Records [OneStop] [Indexing Errors]\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":60}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}}],\"bucket\":{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"string\",\"params\":{}}}},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}}]}"},"id":"4d8e76e0-b017-11ea-a83e-2ff43ec9c891","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"80f1f940-af44-11ea-a83e-2ff43ec9c891","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-06-16T22:08:50.511Z","version":"WzExMzIsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Invalid Date [OneStop] [Indexing Errors]","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"Invalid Date [OneStop] [Indexing Errors]\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"Labels\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":1},{\"type\":\"range\",\"from\":1,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":true,\"subText\":\"\",\"fontSize\":60}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}}],\"bucket\":{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"string\",\"params\":{}}}},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"filters\",\"schema\":\"group\",\"params\":{\"filters\":[{\"input\":{\"query\":\"temporalBounding.instantIndexable:false OR temporalBounding.endIndexable:false OR temporalBounding.beginIndexable:false\",\"language\":\"lucene\"},\"label\":\"Invalid Date\"}]}}]}"},"id":"52d195a0-af46-11ea-a83e-2ff43ec9c891","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"80f1f940-af44-11ea-a83e-2ff43ec9c891","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-06-16T22:08:50.511Z","version":"WzExMzAsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Invalid Geometry [OneStop] [Indexing Errors]","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"Invalid Geometry [OneStop] [Indexing Errors]\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"Labels\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":1},{\"type\":\"range\",\"from\":1,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":true,\"subText\":\"\",\"fontSize\":60}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}}],\"bucket\":{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"string\",\"params\":{}}}},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"filters\",\"schema\":\"group\",\"params\":{\"filters\":[{\"input\":{\"query\":\"spatialBounding.isValid: false AND spatialBounding.spatialBoundingExists: true\",\"language\":\"lucene\"},\"label\":\"Invalid Geometry\"}]}}]}"},"id":"810213f0-af46-11ea-a83e-2ff43ec9c891","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"80f1f940-af44-11ea-a83e-2ff43ec9c891","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-06-16T22:08:50.511Z","version":"WzExMzEsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Record Ids [OneStop]","uiStateJSON":"{\"vis\":{\"params\":{\"sort\":{\"columnIndex\":null,\"direction\":null}}}}","version":1,"visState":"{\"title\":\"Record Ids [OneStop]\",\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMetricsAtAllLevels\":false,\"sort\":{\"columnIndex\":null,\"direction\":null},\"showTotal\":false,\"totalFunc\":\"sum\",\"percentageCol\":\"\",\"dimensions\":{\"metrics\":[{\"accessor\":0,\"format\":{\"id\":\"number\"},\"params\":{},\"label\":\"Count\",\"aggType\":\"count\"}],\"buckets\":[]}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"_id\",\"orderBy\":\"1\",\"order\":\"desc\",\"size\":25,\"otherBucket\":true,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}"},"id":"d729de80-b0c6-11ea-84ab-a34d141ed3e3","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"80f1f940-af44-11ea-a83e-2ff43ec9c891","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-06-17T18:17:44.806Z","version":"WzE3MjMsMV0="}
+{"attributes":{"description":"","kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[],\"indexRefName\":\"kibanaSavedObjectMeta.searchSourceJSON.index\"}"},"title":"Missing Title [OneStop] [Indexing Errors]","uiStateJSON":"{}","version":1,"visState":"{\"title\":\"Missing Title [OneStop] [Indexing Errors]\",\"type\":\"metric\",\"params\":{\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"Labels\",\"colorsRange\":[{\"type\":\"range\",\"from\":0,\"to\":1},{\"type\":\"range\",\"from\":1,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":true,\"subText\":\"\",\"fontSize\":60}},\"dimensions\":{\"metrics\":[{\"type\":\"vis_dimension\",\"accessor\":1,\"format\":{\"id\":\"number\",\"params\":{}}}],\"bucket\":{\"type\":\"vis_dimension\",\"accessor\":0,\"format\":{\"id\":\"string\",\"params\":{}}}},\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\"},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"filters\",\"schema\":\"group\",\"params\":{\"filters\":[{\"input\":{\"query\":\"titles.titleExists:false\",\"language\":\"lucene\"},\"label\":\"Missing Title\"}]}}]}"},"id":"30ec1050-af46-11ea-a83e-2ff43ec9c891","migrationVersion":{"visualization":"7.4.2"},"references":[{"id":"80f1f940-af44-11ea-a83e-2ff43ec9c891","name":"kibanaSavedObjectMeta.searchSourceJSON.index","type":"index-pattern"}],"type":"visualization","updated_at":"2020-06-16T22:08:50.511Z","version":"WzExMzUsMV0="}
+{"attributes":{"description":"","hits":0,"kibanaSavedObjectMeta":{"searchSourceJSON":"{\"query\":{\"language\":\"kuery\",\"query\":\"\"},\"filter\":[]}"},"optionsJSON":"{\"hidePanelTitles\":false,\"useMargins\":true}","panelsJSON":"[{\"version\":\"7.6.2\",\"gridData\":{\"x\":0,\"y\":0,\"w\":12,\"h\":9,\"i\":\"295956ed-b142-4eec-abee-783b6f0d30c7\"},\"panelIndex\":\"295956ed-b142-4eec-abee-783b6f0d30c7\",\"embeddableConfig\":{},\"panelRefName\":\"panel_0\"},{\"version\":\"7.6.2\",\"gridData\":{\"x\":12,\"y\":0,\"w\":12,\"h\":9,\"i\":\"92458b33-5178-411f-a5a8-35a2f7244f52\"},\"panelIndex\":\"92458b33-5178-411f-a5a8-35a2f7244f52\",\"embeddableConfig\":{},\"panelRefName\":\"panel_1\"},{\"version\":\"7.6.2\",\"gridData\":{\"x\":24,\"y\":0,\"w\":12,\"h\":9,\"i\":\"ed1dfd4e-d0b2-4771-928b-8df896c74aa3\"},\"panelIndex\":\"ed1dfd4e-d0b2-4771-928b-8df896c74aa3\",\"embeddableConfig\":{},\"panelRefName\":\"panel_2\"},{\"version\":\"7.6.2\",\"gridData\":{\"x\":0,\"y\":9,\"w\":12,\"h\":9,\"i\":\"5dc8ee16-75d9-40a8-8e22-6b0522a7837b\"},\"panelIndex\":\"5dc8ee16-75d9-40a8-8e22-6b0522a7837b\",\"embeddableConfig\":{},\"panelRefName\":\"panel_3\"},{\"version\":\"7.6.2\",\"gridData\":{\"x\":12,\"y\":9,\"w\":12,\"h\":9,\"i\":\"b384c5ff-a3c9-48de-b223-6f2125ea3bcd\"},\"panelIndex\":\"b384c5ff-a3c9-48de-b223-6f2125ea3bcd\",\"embeddableConfig\":{},\"panelRefName\":\"panel_4\"},{\"version\":\"7.6.2\",\"gridData\":{\"x\":24,\"y\":9,\"w\":24,\"h\":18,\"i\":\"df92499f-0e6d-4b2d-a1ba-063a323ca5d8\"},\"panelIndex\":\"df92499f-0e6d-4b2d-a1ba-063a323ca5d8\",\"embeddableConfig\":{},\"panelRefName\":\"panel_5\"},{\"version\":\"7.6.2\",\"gridData\":{\"x\":0,\"y\":18,\"w\":12,\"h\":9,\"i\":\"ccf1e2bf-d986-43e9-aedd-066788044135\"},\"panelIndex\":\"ccf1e2bf-d986-43e9-aedd-066788044135\",\"embeddableConfig\":{},\"panelRefName\":\"panel_6\"}]","timeRestore":false,"title":"Indexing Errors [OneStop]","version":1},"id":"524a6ed0-af47-11ea-a83e-2ff43ec9c891","migrationVersion":{"dashboard":"7.3.0"},"references":[{"id":"061539c0-af45-11ea-a83e-2ff43ec9c891","name":"panel_0","type":"visualization"},{"id":"043cb3c0-af46-11ea-a83e-2ff43ec9c891","name":"panel_1","type":"visualization"},{"id":"4d8e76e0-b017-11ea-a83e-2ff43ec9c891","name":"panel_2","type":"visualization"},{"id":"52d195a0-af46-11ea-a83e-2ff43ec9c891","name":"panel_3","type":"visualization"},{"id":"810213f0-af46-11ea-a83e-2ff43ec9c891","name":"panel_4","type":"visualization"},{"id":"d729de80-b0c6-11ea-84ab-a34d141ed3e3","name":"panel_5","type":"visualization"},{"id":"30ec1050-af46-11ea-a83e-2ff43ec9c891","name":"panel_6","type":"visualization"}],"type":"dashboard","updated_at":"2020-06-17T18:18:36.234Z","version":"WzE3MjYsMV0="}
+{"exportedCount":9,"missingRefCount":0,"missingReferences":[]}

From d22a0c1bc0ee3a66cf4e08a6665da34a3c91c743 Mon Sep 17 00:00:00 2001
From: Arianna Jakositz <ajakz@users.noreply.github.com>
Date: Thu, 2 Jul 2020 11:02:28 -0600
Subject: [PATCH 29/29] update to tagged release of schemas

---
 buildSrc/src/main/kotlin/utils.kt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/buildSrc/src/main/kotlin/utils.kt b/buildSrc/src/main/kotlin/utils.kt
index 770b24936..8192ce34a 100644
--- a/buildSrc/src/main/kotlin/utils.kt
+++ b/buildSrc/src/main/kotlin/utils.kt
@@ -41,7 +41,7 @@ object Versions {
     const val SNAKE_YAML = "1.24"
     const val REACTOR_BOM = "Dysprosium-SR7"
 
-    const val ONESTOP_SCHEMAS: String = "1250-date-parsing-exception-SNAPSHOT"
+    const val ONESTOP_SCHEMAS: String = "0.6.0"
 }
 
 // data classes