From a8714ff1cd9bd6ffeadd083dfd71b2b84bda9a39 Mon Sep 17 00:00:00 2001 From: utas-raymondng Date: Wed, 11 Sep 2024 16:26:59 +1000 Subject: [PATCH] Change the converter logic to handle malform date --- .../esindexer/service/RankingServiceImpl.java | 16 +- .../service/StacCollectionMapperService.java | 76 ++- .../StacCollectionMapperServiceTests.java | 26 +- .../test/resources/canned/sample12_stac.json | 6 +- .../test/resources/canned/sample13_stac.json | 6 +- .../test/resources/canned/sample4_stac.json | 4 +- .../test/resources/canned/sample5_stac.json | 4 +- .../test/resources/canned/sample6_stac.json | 4 +- .../test/resources/canned/sample7_stac.json | 4 +- .../resources/canned/sample7_stac_no_es.json | 6 +- .../test/resources/canned/sample8_stac.json | 4 +- .../test/resources/canned/sample9_stac.json | 6 +- .../sample_abstract_citation_null_stac.json | 4 +- .../sample_incorrect_projection_stac.json | 6 +- .../resources/canned/sample_malform_date.xml | 465 ++++++++++++++++++ .../canned/sample_malform_date_stac.json | 87 ++++ .../sample_multiple_temporal1_stac.json | 4 +- .../sample_multiple_temporal2_stac.json | 10 +- 18 files changed, 675 insertions(+), 63 deletions(-) create mode 100644 indexer/src/test/resources/canned/sample_malform_date.xml create mode 100644 indexer/src/test/resources/canned/sample_malform_date_stac.json diff --git a/indexer/src/main/java/au/org/aodn/esindexer/service/RankingServiceImpl.java b/indexer/src/main/java/au/org/aodn/esindexer/service/RankingServiceImpl.java index d8f7f5f6..f153fbeb 100644 --- a/indexer/src/main/java/au/org/aodn/esindexer/service/RankingServiceImpl.java +++ b/indexer/src/main/java/au/org/aodn/esindexer/service/RankingServiceImpl.java @@ -12,7 +12,7 @@ public class RankingServiceImpl implements RankingService { protected static Logger logger = LogManager.getLogger(RankingServiceImpl.class); public Integer evaluateCompleteness(StacCollectionModel stacCollectionModel) { - Integer total = 0; + int total = 0; /* * The implementation of this method can be adjusted @@ -31,27 +31,27 @@ public Integer evaluateCompleteness(StacCollectionModel stacCollectionModel) { * Total: 100 points * */ - if (stacCollectionModel.getTitle() != null && !stacCollectionModel.getTitle().equals("")) { + if (stacCollectionModel.getTitle() != null && !stacCollectionModel.getTitle().isBlank()) { logger.debug("Title found"); total += 15; } - if (stacCollectionModel.getDescription() != null && !stacCollectionModel.getDescription().equals("")) { + if (stacCollectionModel.getDescription() != null && !stacCollectionModel.getDescription().isBlank()) { logger.debug("Description found"); total += 15; } - if (stacCollectionModel.getExtent().getBbox() != null && stacCollectionModel.getExtent().getBbox().size() > 0) { + if (stacCollectionModel.getExtent().getBbox() != null && !stacCollectionModel.getExtent().getBbox().isEmpty()) { logger.debug("Extent found"); total += 10; } - if (stacCollectionModel.getExtent().getTemporal() != null && stacCollectionModel.getExtent().getTemporal().size() > 0) { + if (stacCollectionModel.getExtent().getTemporal() != null && !stacCollectionModel.getExtent().getTemporal().isEmpty()) { logger.debug("Temporal found"); total += 10; } - if (stacCollectionModel.getLinks() != null && stacCollectionModel.getLinks().size() > 0) { + if (stacCollectionModel.getLinks() != null && !stacCollectionModel.getLinks().isEmpty()) { if (stacCollectionModel.getLinks().size() <= 2) { logger.debug("Links found with size: " + stacCollectionModel.getLinks().size()); total += 10; @@ -64,7 +64,7 @@ public Integer evaluateCompleteness(StacCollectionModel stacCollectionModel) { } } - if (stacCollectionModel.getThemes() != null && stacCollectionModel.getThemes().size() > 0) { + if (stacCollectionModel.getThemes() != null && !stacCollectionModel.getThemes().isEmpty()) { if (stacCollectionModel.getThemes().size() <= 2) { logger.debug("Themes found with size: " + stacCollectionModel.getThemes().size()); total += 10; @@ -77,7 +77,7 @@ public Integer evaluateCompleteness(StacCollectionModel stacCollectionModel) { } } - if (stacCollectionModel.getContacts() != null && stacCollectionModel.getContacts().size() > 0) { + if (stacCollectionModel.getContacts() != null && !stacCollectionModel.getContacts().isEmpty()) { logger.debug("Contacts found"); total += 10; } diff --git a/indexer/src/main/java/au/org/aodn/esindexer/service/StacCollectionMapperService.java b/indexer/src/main/java/au/org/aodn/esindexer/service/StacCollectionMapperService.java index a222207e..7a600a44 100644 --- a/indexer/src/main/java/au/org/aodn/esindexer/service/StacCollectionMapperService.java +++ b/indexer/src/main/java/au/org/aodn/esindexer/service/StacCollectionMapperService.java @@ -21,9 +21,7 @@ import java.io.IOException; import java.math.BigDecimal; -import java.time.ZoneId; -import java.time.ZoneOffset; -import java.time.ZonedDateTime; +import java.time.*; import java.time.format.DateTimeFormatter; import java.util.*; import java.util.function.Function; @@ -130,13 +128,13 @@ List createExtentTemporal(MDMetadataType source) { if (pair0.isEmpty()) { pair0 = safeGet(() -> timePeriodType.getBeginPosition().getValue().get(0)); } - pair0.ifPresent(pair -> temporalPair[0] = convertDateToZonedDateTime(this.mapUUID(source), pair)); + pair0.ifPresent(pair -> temporalPair[0] = convertDateToZonedDateTime(this.mapUUID(source), pair, true)); var pair1 = safeGet(() -> timePeriodType.getEnd().getTimeInstant().getTimePosition().getValue().get(0)); if (pair1.isEmpty()) { pair1 = safeGet(() -> timePeriodType.getEndPosition().getValue().get(0)); } - pair1.ifPresent(pair -> temporalPair[1] = convertDateToZonedDateTime(this.mapUUID(source), pair)); + pair1.ifPresent(pair -> temporalPair[1] = convertDateToZonedDateTime(this.mapUUID(source), pair, false)); } result.add(temporalPair); @@ -145,25 +143,65 @@ List createExtentTemporal(MDMetadataType source) { } return result; } - - private String convertDateToZonedDateTime(String uuid, String inputDateString) { - - String inputDateTimeString = inputDateString; - if (!inputDateString.contains("T")) { - inputDateTimeString += "T00:00:00"; - } - + /** + * If the date missing month / day / time then we will add it back by making it cover a range that is as wide as + * possible. So for example if only year then it will be first date of year and end date of that year. + * + * @param uuid - The uuid of the record + * @param dateStr - The date value in the XML + * @param isStartDate - Is it processing start date? + * @return - Well format date time string + */ + private String convertDateToZonedDateTime(String uuid, String dateStr, boolean isStartDate) { + ZonedDateTime utcZonedDateTime = null; + String convertedDateTime = null; try { - ZonedDateTime zonedDateTime = ZonedDateTime.parse(inputDateTimeString, TemporalUtils.TIME_FORMATTER.withZone(ZoneId.of(timeZoneId))); + // Case 1: Date and Time (e.g., "2024-09-10T10:15:30") + if (dateStr.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}")) { + // Do nothing + convertedDateTime = dateStr; + ZonedDateTime zt = ZonedDateTime.parse(convertedDateTime, TemporalUtils.TIME_FORMATTER.withZone(ZoneId.of(timeZoneId))); + utcZonedDateTime = zt.withZoneSameInstant(ZoneOffset.UTC); + } + // Case 2: Full Date (e.g., "2024-09-10"), depends on it is start or end, try to cover the full range + else if (dateStr.matches("\\d{4}-\\d{2}-\\d{2}")) { + convertedDateTime = isStartDate ? dateStr + "T00:00:00" : dateStr + "T23:59:59"; + ZonedDateTime zt = ZonedDateTime.parse(convertedDateTime, TemporalUtils.TIME_FORMATTER.withZone(ZoneId.of(timeZoneId))); + utcZonedDateTime = zt.withZoneSameInstant(ZoneOffset.UTC); + } + // Case 3: Year and Month (e.g., "2024-09"), depends on it is start or end, try to cover the full range + else if (dateStr.matches("\\d{4}-\\d{2}")) { + YearMonth yearMonth = YearMonth.parse(dateStr); + LocalDateTime ld = isStartDate ? + yearMonth.atDay(1).atTime(0, 0, 0) : + yearMonth.atEndOfMonth().atTime(23, 59, 59); + + ZonedDateTime zt = ld.atZone(ZoneId.of(timeZoneId)); + utcZonedDateTime = zt.withZoneSameInstant(ZoneOffset.UTC); + } + // Case 4: Year only (e.g., "2024"), depends on it is start or end, try to cover the full range + else if (dateStr.matches("\\d{4}")) { + YearMonth yearMonth = isStartDate ? YearMonth.parse(dateStr + "-01") : YearMonth.parse(dateStr + "-12"); + LocalDateTime ld = isStartDate ? + yearMonth.atDay(1).atTime(0, 0, 0) : + yearMonth.atEndOfMonth().atTime(23, 59, 59); + + ZonedDateTime zt = ld.atZone(ZoneId.of(timeZoneId)); + utcZonedDateTime = zt.withZoneSameInstant(ZoneOffset.UTC); + } - // Convert to UTC - ZonedDateTime utcZonedDateTime = zonedDateTime.withZoneSameInstant(ZoneOffset.UTC); - DateTimeFormatter outputFormatter = DateTimeFormatter.ISO_OFFSET_DATE_TIME; - return utcZonedDateTime.format(outputFormatter); + // Convert to UTC + if(utcZonedDateTime != null) { + return utcZonedDateTime.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); + } + else { + logger.warn("Unable to convert date to ISO_OFFSET_DATE_TIME: {} for record {}", dateStr, uuid); + return null; + } } catch (Exception e) { - logger.warn("Unable to convert date to ISO_OFFSET_DATE_TIME: {} for record {}", inputDateString, uuid); + logger.warn("Unable to convert date to ISO_OFFSET_DATE_TIME: {} for record {}", dateStr, uuid); return null; } } diff --git a/indexer/src/test/java/au/org/aodn/esindexer/service/StacCollectionMapperServiceTests.java b/indexer/src/test/java/au/org/aodn/esindexer/service/StacCollectionMapperServiceTests.java index 248160fb..d3544b0b 100644 --- a/indexer/src/test/java/au/org/aodn/esindexer/service/StacCollectionMapperServiceTests.java +++ b/indexer/src/test/java/au/org/aodn/esindexer/service/StacCollectionMapperServiceTests.java @@ -206,7 +206,8 @@ public void verifyLicenseCorrect() throws IOException, JSONException { var a = lastRequest.get().document().toString(); Map content3 = objectMapper.readValue(lastRequest.get().document().toString(), Map.class); String out3 = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(content3); - JSONAssert.assertEquals(objectMapper.readTree(expected3).toPrettyString(), + JSONAssert.assertEquals( + objectMapper.readTree(expected3).toPrettyString(), objectMapper.readTree(out3.strip()).toPrettyString(), JSONCompareMode.STRICT); } @@ -422,4 +423,25 @@ public void verifyAbstractCitationNullWorks() throws IOException, JSONException objectMapper.readTree(out.strip()).toPrettyString(), JSONCompareMode.STRICT ); - }} + } + /** + * The date field contains year only or year-month only. We need to handle this case. + * + * @throws IOException - Do not expect to throw + */ + @Test + public void verifyMalformDateTimeWorks() throws IOException, JSONException { + String xml = readResourceFile("classpath:canned/sample_malform_date.xml"); + String expected = readResourceFile("classpath:canned/sample_malform_date_stac.json"); + indexerService.indexMetadata(xml); + + Map content = objectMapper.readValue(lastRequest.get().document().toString(), Map.class); + String out = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(content); + log.info(out); + JSONAssert.assertEquals( + objectMapper.readTree(expected).toPrettyString(), + objectMapper.readTree(out.strip()).toPrettyString(), + JSONCompareMode.STRICT + ); + } +} diff --git a/indexer/src/test/resources/canned/sample12_stac.json b/indexer/src/test/resources/canned/sample12_stac.json index 95caa748..46c50aca 100644 --- a/indexer/src/test/resources/canned/sample12_stac.json +++ b/indexer/src/test/resources/canned/sample12_stac.json @@ -19,11 +19,11 @@ "temporal": [ [ "2012-04-15T14:00:00Z", - "2012-04-29T14:00:00Z" + "2012-04-30T13:59:59Z" ], [ "2012-04-15T14:00:00Z", - "2012-04-29T14:00:00Z" + "2012-04-30T13:59:59Z" ] ] }, @@ -73,7 +73,7 @@ "temporal": [ { "start": "2012-04-15T14:00:00Z", - "end": "2012-04-29T14:00:00Z" + "end": "2012-04-30T13:59:59Z" } ] }, diff --git a/indexer/src/test/resources/canned/sample13_stac.json b/indexer/src/test/resources/canned/sample13_stac.json index 6e2ef5c8..30eb7527 100644 --- a/indexer/src/test/resources/canned/sample13_stac.json +++ b/indexer/src/test/resources/canned/sample13_stac.json @@ -19,11 +19,11 @@ "temporal": [ [ "2016-11-17T13:00:00Z", - "2016-11-19T13:00:00Z" + "2016-11-20T12:59:59Z" ], [ "2016-11-17T13:00:00Z", - "2016-11-19T13:00:00Z" + "2016-11-20T12:59:59Z" ] ] }, @@ -69,7 +69,7 @@ "temporal": [ { "start": "2016-11-17T13:00:00Z", - "end": "2016-11-19T13:00:00Z" + "end": "2016-11-20T12:59:59Z" } ] }, diff --git a/indexer/src/test/resources/canned/sample4_stac.json b/indexer/src/test/resources/canned/sample4_stac.json index 2dabd783..cc362ff9 100644 --- a/indexer/src/test/resources/canned/sample4_stac.json +++ b/indexer/src/test/resources/canned/sample4_stac.json @@ -3,7 +3,7 @@ "description" : "This dataset contains the reconstructed time series of monthly mean aragonite, calcite and pH together with distribution of dissolved inorganic carbon (DIC), total alkalinity (ALK), sea surface temperature and salinity in the Australian region at a 1 degree resolution over the period 1870-2013.", "extent" : { "bbox" : [ [ 95.5, -44.5, 169.5, -0.5 ], [ 95.5, -44.5, 169.5, -0.5 ] ], - "temporal" : [ [ "1870-07-16T14:10:44Z", "2013-06-16T14:00:00Z" ], [ "1870-07-16T14:10:44Z", "2013-06-16T14:00:00Z" ] ] + "temporal" : [ [ "1870-07-16T14:10:44Z", "2013-06-17T13:59:59Z" ], [ "1870-07-16T14:10:44Z", "2013-06-17T13:59:59Z" ] ] }, "summaries" : { "score" : 95, @@ -28,7 +28,7 @@ }, "temporal" : [ { "start" : "1870-07-16T14:10:44Z", - "end" : "2013-06-16T14:00:00Z" + "end" : "2013-06-17T13:59:59Z" } ], "parameter_vocabs" : [ "ph (total scale) of the water body", "alkalinity", "carbon", "temperature", "salinity" ] }, diff --git a/indexer/src/test/resources/canned/sample5_stac.json b/indexer/src/test/resources/canned/sample5_stac.json index 6810c47d..1d918610 100644 --- a/indexer/src/test/resources/canned/sample5_stac.json +++ b/indexer/src/test/resources/canned/sample5_stac.json @@ -3,7 +3,7 @@ "description" : "The Bonney Coast (BONC) HF ocean radar system covers an area of the Bonney Coast, South Australia, which has a recurring annual upwelling feature near to the coast that significantly changes the ecosystem from one of warm water originating in Western Australia, to one dominated by cold upwelling water from off the continental shelf. The dynamics of this area and the relationship between ocean circulation, chemistry and sediments control the larval species and the higher marine species and ecosystems in which they forage. The data from this site provide linking observations between the Southern Ocean and NSW through processes that occur on weekly to El Nino time scales. The BONC HF ocean radar system consists of two SeaSonde crossed loop direction finding stations located at Nora Creina (37.329 S 139.850 E) and Blackfellows Cave (37.940 S 140.457 E). These radars operate at a frequency of 5.211 MHz, with a bandwidth of 50 KHz, a maximum range of 200 Km and a range resolution of 3 Km. Within the HF radar coverage area surface currents are measured. This site was decommissioned in March 2017.", "extent" : { "bbox" : [ [ 138.2, -39.4, 140.8, -37.0 ], [ 138.2, -39.4, 140.8, -37.0 ] ], - "temporal" : [ [ "2010-01-21T01:00:00Z", "2017-03-26T13:00:00Z" ], [ "2010-01-21T01:00:00Z", "2017-03-26T13:00:00Z" ] ] + "temporal" : [ [ "2010-01-21T01:00:00Z", "2017-03-27T12:59:59Z" ], [ "2010-01-21T01:00:00Z", "2017-03-27T12:59:59Z" ] ] }, "summaries" : { "score" : 90, @@ -27,7 +27,7 @@ }, "temporal" : [ { "start" : "2010-01-21T01:00:00Z", - "end" : "2017-03-26T13:00:00Z" + "end" : "2017-03-27T12:59:59Z" } ] }, "contacts" : [ { diff --git a/indexer/src/test/resources/canned/sample6_stac.json b/indexer/src/test/resources/canned/sample6_stac.json index b6ec0423..78ad5343 100644 --- a/indexer/src/test/resources/canned/sample6_stac.json +++ b/indexer/src/test/resources/canned/sample6_stac.json @@ -3,7 +3,7 @@ "description" : "This record describes the End of Voyage (EOV) data archive from the Marine National Facility (MNF) RV Investigator voyage IN2024_V01, titled \"Multidisciplinary Investigations of the Southern Ocean (MISO): linking physics, biogeochemistry, plankton, aerosols, clouds, and climate.\" The voyage took place between January 02, 2024 and March 05, 2024 (AEST), departing from Hobart and returning to Fremantle.\n\n For further information please refer to the voyage documentation links.\n\n Instruments used and data collected include:\n Regular measurements:\n Lowered ADCP (LADCP), Acoustic Doppler Current Profiler (ADCP; 75, 150 KHz ), Greenhouse Gas Analysers (Picarro), Cloud Condensation Nuclei counter (CCN), Condensation Particle Counters (CPC), Disdrometer, Radon sensor, Scanning Mobility Particle Sizers (SMPS), CTD, Hydrochemistry, Triaxus, Fisheries Echosounder (EK80), Multibeam Echosounder (EM710, EM122), Sub-bottom Profiler (SBP120), GPS Positioning System, Doppler Velocity Log, Thermosalinographs (TSG), Fluorometer, Oxygen Optode, pCO2, Multiangle Absorption Photometer (MAAP), Ozone Sensor, Nephelometer, Atmospheric Temperature, Humidity, Pressure, Wind and Rain sensors, Photosynthetically Active Radiation (PAR) sensor, Precision Infrared Radiometer (PIR), Precision Spectral Pyranometer (PSP), Starboard and Portside Radiometers, Air Sampler, Ultra Short BaseLine Underwater Positioning System (USBL), Weather Radar, Expendable Bathythermographs (XBTs).\n\n Voyage-specific measurements:\n Black Carbon sensor (Aethalometer), Mobility particle size spectrometer (MPSS), Bongo Net, Chemical Ionisation Mass Spectrometer (CIMS), Cloud Radar (BASTA), Fast Repetition Rate Chlorophyll-a Fluorometer (FRRf), Mini Micro-Pulse LIDAR (miniMPL), Micro Rain Radar (MRR), Neutral Cluster Air Ion Spectrometer (NAIS), Proton-Transfer-Reaction Mass Spectrometry (PTR-MS), Radiosondes, Cloud and Aerosol Backscatter Lidar (RMAN), Stabilised Platform, Mercury Analyser (Tekran), Time of Flight Aerosol Chemical Speciation Monitor (ToF-ACSM), Water Vapor Radiometer (WVR), Aerosol mass spectrometer (AMS), Core Argo floats, Biogeochemical (BGC) Argo floats, Near-surface Drifters, In situ pumps (ISPs), Ice Nucleating Particles (INPs), Ozone Sensor, Trace Metal Aerosol Sampling, Trace Metal CTD Rosette and Bottles, Organic Sulfur Sequential Chemical Analysis Robot (OSSCAR), Omics data and various biological data.\n\n The archive for the IN2024_V01 EOV raw data is curated by the CSIRO National Collections and Marine Infrastructure (NCMI) Information and Data Centre (IDC) in Hobart, with a permanent archive at the CSIRO Data Access Portal (https://data.csiro.au/), providing access to voyage participants and processors of the data collected on the voyage.\n\n All voyage documentation is available electronically to MNF support via the local network. Applications to access voyage documentation by non-CSIRO participants can be made via data-requests-hf@csiro.au.\n\n All processed data from this voyage are made publicly available through the MNF Data Trawler (in the related links).", "extent" : { "bbox" : [ [ 113.2462, -67.0026, 151.4171, -31.9323 ], [ 113.2462, -67.0026, 151.4171, -31.9323 ] ], - "temporal" : [ [ "2024-01-01T13:00:00Z", "2024-03-04T13:00:00Z" ], [ "2024-01-01T13:00:00Z", "2024-03-04T13:00:00Z" ] ] + "temporal" : [ [ "2024-01-01T13:00:00Z", "2024-03-05T12:59:59Z" ], [ "2024-01-01T13:00:00Z", "2024-03-05T12:59:59Z" ] ] }, "summaries" : { "score" : 100, @@ -27,7 +27,7 @@ }, "temporal" : [ { "start" : "2024-01-01T13:00:00Z", - "end" : "2024-03-04T13:00:00Z" + "end" : "2024-03-05T12:59:59Z" } ], "parameter_vocabs" : [ "ocean biota", "bathymetry", "density", "water pressure", "current", "temperature", "salinity" ], "platform_vocabs" : [ "research vessel" ] diff --git a/indexer/src/test/resources/canned/sample7_stac.json b/indexer/src/test/resources/canned/sample7_stac.json index 79a41328..5d950455 100644 --- a/indexer/src/test/resources/canned/sample7_stac.json +++ b/indexer/src/test/resources/canned/sample7_stac.json @@ -3,7 +3,7 @@ "description" : "Link to 12 3D photogrammetric models and underlying images to replicate the method presented in the publication titled “RapidBenthos – Automated segmentation and multi-view classification of coral reef communities from photogrammetric reconstruction”. These sites were selected to assess the performance of RapidBenthos in different environmental conditions and reef habitats, as they ranged from high visibility offshore reefs to turbid inshore reefs, spanned depths from 5 to 15 meters, and included a range of intra-reefal environments (i.e., reef front, flank, back, and lagoon).\n\n\n All sites were imaged using a standardized diver-rig photogrammetry workflow described by Gordon et al. (2023). High-resolution benthic images (5686 x 3217 pixels) were captured using two Nikon D850 DSLR cameras with 20 mm Nikkor prime lens shooting at 0.5 second intervals (full camera settings described in (Gordon et al., 2023). Cameras were housed in Nauticam underwater housings with 8-inch dome ports and were mounted on an aluminium rig at a distance of 57 cm between lenses (60 % overlap between adjacent images, Figure 1c). Each site was imaged by a single diver on SCUBA over a period of 10-15 minutes to capture approximately 3,000 photos. Nadiral and oblique imagery was captured at an altitude of approximately 1.5 m using a “lawn-mowing” swim pattern consisting of 5 longitudinal passes and an additional 4-8 perpendicular passes. The swim pattern and speed used ensured a minimum overlap of 80 and 60 % between temporally and spatially adjacent photos, respectively (Figure 1d). Six GPCs were distributed across the depth gradient of the site prior imaging to scale resultant models in X, Y, and Z axes (details provided in(Gordon et al., 2023). Depth was also recorded for each GPC to incorporate bathymetric information into 3D model building.\n\n\n The RapidBenthos workflow was applyed to each plots, segmenting and calssifying benthic constituents on the orthomosaics. This method resulted in extracting community compostion and colony-level metrics (i.e., colony planar-area and colony frequency). The significance of this research lies in devlopping a workflow that automatically extract community composition information from close-range photogrammetry in any coral reefs environment. We eveluated that our method was 195 time faster than manual segmentation and classification allowing to sustainably scale 3D photogrammetry mointoring, both in replication and size of reefs surveyed compared to manual data extraction.\n\n\n Due to the large data files, the data can be accessed on request.", "extent" : { "bbox" : [ [ 142.9417419434, -23.8933715259, 152.3920440674, -9.7347748815 ], [ 142.9417419434, -9.998605505, 142.9417419434, -9.998605505 ], [ 143.396987915, -9.7347748815, 143.396987915, -9.7347748815 ], [ 143.396987915, -9.8470978307, 143.396987915, -9.8470978307 ], [ 145.4428482056, -14.6852319114, 145.4428482056, -14.6852319114 ], [ 146.227684021, -16.86303383, 146.227684021, -16.86303383 ], [ 146.4995956421, -18.5720601133, 146.4995956421, -18.5720601133 ], [ 146.5195083618, -18.643642707, 146.5195083618, -18.643642707 ], [ 147.6466369629, -18.778916369, 147.6466369629, -18.778916369 ], [ 150.9700870514, -23.1967007098, 150.9700870514, -23.1967007098 ], [ 151.9247817993, -23.4456087744, 151.9247817993, -23.4456087744 ], [ 152.3920440674, -23.8933715259, 152.3920440674, -23.8933715259 ] ], - "temporal" : [ [ "2021-09-30T14:00:00Z", "2022-12-30T13:00:00Z" ], [ "2021-09-30T14:00:00Z", "2022-12-30T13:00:00Z" ] ] + "temporal" : [ [ "2021-09-30T14:00:00Z", "2022-12-31T12:59:59Z" ], [ "2021-09-30T14:00:00Z", "2022-12-31T12:59:59Z" ] ] }, "summaries" : { "score" : 70, @@ -57,7 +57,7 @@ }, "temporal" : [ { "start" : "2021-09-30T14:00:00Z", - "end" : "2022-12-30T13:00:00Z" + "end" : "2022-12-31T12:59:59Z" } ] }, "contacts" : [ { diff --git a/indexer/src/test/resources/canned/sample7_stac_no_es.json b/indexer/src/test/resources/canned/sample7_stac_no_es.json index e0f50980..bbcc92ba 100644 --- a/indexer/src/test/resources/canned/sample7_stac_no_es.json +++ b/indexer/src/test/resources/canned/sample7_stac_no_es.json @@ -79,11 +79,11 @@ "temporal": [ [ "2021-09-30T14:00:00Z", - "2022-12-30T13:00:00Z" + "2022-12-31T12:59:59Z" ], [ "2021-09-30T14:00:00Z", - "2022-12-30T13:00:00Z" + "2022-12-31T12:59:59Z" ] ] }, @@ -187,7 +187,7 @@ "temporal": [ { "start": "2021-09-30T14:00:00Z", - "end": "2022-12-30T13:00:00Z" + "end": "2022-12-31T12:59:59Z" } ] }, diff --git a/indexer/src/test/resources/canned/sample8_stac.json b/indexer/src/test/resources/canned/sample8_stac.json index e1e410cc..a299c5ab 100644 --- a/indexer/src/test/resources/canned/sample8_stac.json +++ b/indexer/src/test/resources/canned/sample8_stac.json @@ -3,7 +3,7 @@ "description" : "This record describes the End of Voyage (EOV) archive from the Marine National Facility (MNF) RV Investigator research voyage IN2019_V06, titled \"Tropical observations of atmospheric convection, biogenic emissions, ocean mixing, and processes generating intraseasonal SST variability.\" The voyage took place from Darwin (NT) to Darwin between October 19 and December 17, 2019 (AEST).\n\n For further information please refer to the voyage documentation links below.\n\n Instruments used and data collected include:\n Regular measurements:\n Acoustic Doppler Current Profiler (ADCP; 75, 150 KHz ), Lowered ADCP (LADCP), Disdrometer, Fisheries echosounder (EK60), Multibeam Echosounder (EM710, EM122), Sub-bottom Profiler (SBP120), Gravimeter, GPS Positioning System, Doppler Velocity Log, Atmospheric Temperature, Humidity, Pressure, Wind and Rain sensors, Photosynthetically Active Radiation (PAR) sensor, Precision Infrared Radiometer (PIR), Precision Spectral Pyranometer (PSP), Nephelometer, pCO2, Condensation Particle Counters (CPC), Cloud Condensation Nuclei counter (CCN), Multiangle Absorption Photometer (MAAP), Starboard and Portside Radiometers, Ozone sensors, Weather Radar, Greenhouse Gas Analysers (Aerodyne, Picarro), Infrared Sea Surface Temperature Autonomous Radiometer (ISAR), Fluorometer, Oxygen optode, Thermosalinographs (TSG), CTD, Hydrochemistry, Expendable Bathythermographs (XBTs).\n\n Voyage-specific measurements:\n AIRBOX (TSI 3772 Condensation Particle Counter (3772CPC), Black Carbon sensor (Aethalometer), Aerosol mass spectrometer (AMS), Chemical Ionisation Mass Spectrometer (CIMS), Cloud Radar (BASTA), Weather Station, Multi-AXis Differential Optical Absorption Spectrometer (MAX-DOAS), mini Micro-Pulse LIDAR (miniMPL), Neutral Cluster Air Ion Spectrometer (NAIS), Radon sensor, Cloud and Aerosol Backscatter Lidar (RMAN), Scanning Mobility Particle Sizers (SMPS), Sonic Anemometer, Greenhouse Gas Analyser (Fourier Transform Infrared (FTIR) spectrometer - Spectronus), Mercury Analyser (Tekran), Gas Chromatograph - Electron Capture Detector (uDirac), Volatility-Hygroscopicity Tandem Differential Mobility Analyser (VH-TDMA)), Radiosondes, Wave-powered Profiler (Wirewalker), Sea State cameras, Triaxus, ECO Triplet, Sound Velocity Profile (SVP).\n\n The archive for the IN2019_V06 EOV raw data is curated by the CSIRO NCMI Information and Data Centre (IDC) in Hobart, with a permanent archive at the CSIRO Data Access Portal (DAP, https://data.csiro.au/dap/), providing access to participants and processors of the data collected in the voyage.\n\n All voyage documentation is available electronically to MNF support via the local network. Access to voyage documentation for non-CSIRO participants can be made via NCMI_DataLibrarians@csiro.au.", "extent" : { "bbox" : [ [ 120.55, -14.68, 133.45, -10.8 ], [ 120.55, -14.68, 133.45, -10.8 ] ], - "temporal" : [ [ "2019-10-18T13:00:00Z", "2019-12-16T13:00:00Z" ], [ "2019-10-18T13:00:00Z", "2019-12-16T13:00:00Z" ] ] + "temporal" : [ [ "2019-10-18T13:00:00Z", "2019-12-17T12:59:59Z" ], [ "2019-10-18T13:00:00Z", "2019-12-17T12:59:59Z" ] ] }, "summaries" : { "score" : 1, @@ -26,7 +26,7 @@ }, "temporal" : [ { "start" : "2019-10-18T13:00:00Z", - "end" : "2019-12-16T13:00:00Z" + "end" : "2019-12-17T12:59:59Z" } ] }, "contacts" : [ { diff --git a/indexer/src/test/resources/canned/sample9_stac.json b/indexer/src/test/resources/canned/sample9_stac.json index e74d5d79..705b0c9f 100644 --- a/indexer/src/test/resources/canned/sample9_stac.json +++ b/indexer/src/test/resources/canned/sample9_stac.json @@ -25,11 +25,11 @@ "temporal": [ [ "2018-10-28T13:00:00Z", - "2019-11-19T13:00:00Z" + "2019-11-20T12:59:59Z" ], [ "2018-10-28T13:00:00Z", - "2019-11-19T13:00:00Z" + "2019-11-20T12:59:59Z" ] ] }, @@ -73,7 +73,7 @@ "temporal": [ { "start": "2018-10-28T13:00:00Z", - "end": "2019-11-19T13:00:00Z" + "end": "2019-11-20T12:59:59Z" } ] }, diff --git a/indexer/src/test/resources/canned/sample_abstract_citation_null_stac.json b/indexer/src/test/resources/canned/sample_abstract_citation_null_stac.json index 4381ffff..3463a3dc 100644 --- a/indexer/src/test/resources/canned/sample_abstract_citation_null_stac.json +++ b/indexer/src/test/resources/canned/sample_abstract_citation_null_stac.json @@ -3,7 +3,7 @@ "description" : "This record describes the Underway (UWY) data collected from the Marine National Facility (MNF) RV Investigator research voyage in2017_v01, titled \"Interactions of the Totten Glacier with the Southern Ocean through multiple glacial cycles.\" The voyage took place from Hobart (TAS) to Hobart between the 14th January and 5th March, 2017.\n\n Standard Underway data is continuously recorded, it consists of:\n (1) NAV: Navigation data: (dual GPS) Latitude, Longitude, Speed, Heading, course over ground, Gyros, and Doppler Log.\n (2) TSG: Thermosalinograph: Water Salinity, flow-rate, Temperature, Fluorescence, and pCO2.\n (3) MET: Atmospheric (port and starboard): Humidity, Wind speed and direction (vane and ultrasonic), Radiometer, Pyranometer, PAR, Air temperature, Air pressure, Rain, Ozone and Trace gases.\n\n Data are recorded at 5 second intervals. Near real-time data is available via the link below. This dataset will be processed and archived within the CSIRO Oceans & Atmosphere Data Centre in Hobart. Data is available in 5sec intervals in NetCDF, 10 sec and 5 min intervals in ascii format. Additional information regarding this dataset is contained in the cruise report for this voyage and/or the data processing report.", "extent" : { "bbox" : [ [ 113.7, -65.7, 147.5, -42.8 ], [ 113.7, -65.7, 147.5, -42.8 ] ], - "temporal" : [ [ "2017-01-13T13:00:00Z", "2016-03-03T13:00:00Z" ], [ "2017-01-13T13:00:00Z", "2016-03-03T13:00:00Z" ] ] + "temporal" : [ [ "2017-01-13T13:00:00Z", "2016-03-04T12:59:59Z" ], [ "2017-01-13T13:00:00Z", "2016-03-04T12:59:59Z" ] ] }, "summaries" : { "score" : 1, @@ -21,7 +21,7 @@ }, "temporal" : [ { "start" : "2017-01-13T13:00:00Z", - "end" : "2016-03-03T13:00:00Z" + "end" : "2016-03-04T12:59:59Z" } ] }, "contacts" : [ { diff --git a/indexer/src/test/resources/canned/sample_incorrect_projection_stac.json b/indexer/src/test/resources/canned/sample_incorrect_projection_stac.json index 167faa25..288a9db4 100644 --- a/indexer/src/test/resources/canned/sample_incorrect_projection_stac.json +++ b/indexer/src/test/resources/canned/sample_incorrect_projection_stac.json @@ -61,11 +61,11 @@ "temporal": [ [ "2003-03-31T14:00:00Z", - "2013-02-28T13:00:00Z" + "2013-03-01T12:59:59Z" ], [ "2003-03-31T14:00:00Z", - "2013-02-28T13:00:00Z" + "2013-03-01T12:59:59Z" ] ] }, @@ -1121,7 +1121,7 @@ "temporal": [ { "start": "2003-03-31T14:00:00Z", - "end": "2013-02-28T13:00:00Z" + "end": "2013-03-01T12:59:59Z" } ] }, diff --git a/indexer/src/test/resources/canned/sample_malform_date.xml b/indexer/src/test/resources/canned/sample_malform_date.xml new file mode 100644 index 00000000..ba96e7cb --- /dev/null +++ b/indexer/src/test/resources/canned/sample_malform_date.xml @@ -0,0 +1,465 @@ + + + + + + 10955a94-8737-08e4-e053-08114f8c5e74 + + + + + + + eng + + + utf8 + + + + + + + pointOfContact + + + + + CSIRO Oceans & Atmosphere - Hobart + + + + + + + data-requests-hf@csiro.au + + + + + + + + + CSIRO O&A, Information & Data Centre + + + Data Requests + + + + + + + + + + + 2015-03-06T12:10:44 + + + revision + + + + + + + ISO 19115-3:2018 + + + + + + + + + + + + WGS84 + + + + + + + + + + + Australian Fisheries Management Authority: Fisheries Spatial Management Through Time + + + AFMA closures: time series + + + + + + Marlin Record Number: 14472 + + + + + + + Anzlic Identifier: ANZCW0306014472 + + + + + + + Information relating to Australian fisheries closures imposed by AFMA (Australian Fisheries Management Authority) and gazetted through ComLaw have been collated and tabulated with mapable GIS references in an ORACLE data base. This database allows searching, viewing and GIS mapping of the Australian spatial fisheries management through time. Mapping views of fisheries closures can be created for time, fisheries and gear types. Changes in spatial management through time will be mapable using a time-series of mapping views. + The data is currently complete for the time-period of 2005 to Feb 2013. It is intended to continue this records as new gazettes are released as part of annual stock assessment updates. Non-fisheries closures (MPA, CMR) are also intended to be included but this is dependent on continued funding and demand for their use. + + Disclaimer: These data have been collated and made accessible for scientific research only, they may not be complete or representing the most up-to-date data. For the most recent spatial management arrangements for legal and commercial purposes, refer to the relevant current ComLaw documents or contact AFMA. + + + + onGoing + + + + + pointOfContact + + + + + CSIRO Oceans & Atmosphere - Hobart + + + + + + + Michael.Fuller@csiro.au + + + + + + + + + Fuller, Mike + + + Senior Experimental Scientist (Oceans & Atmosphere) + + + + + + + + + + + + + + + + + + boundaries + + + location + + + + + + + 67 + + + 180 + + + -71 + + + -8 + + + + + + + + 2005 + 2013-02 + + + + + + + + + + asNeeded + + + + + + + Global / Oceans | Indian Ocean + + + Global / Oceans | Pacific Ocean + + + Global / Oceans | Southern Ocean + + + Marine Features (Australia) | Australian EEZ + + + place + + + + + AODN Geographic Extent Names + + + AODN GEN + + + + + 2016-05-10T00:00:00 + + + publication + + + + + + + geonetwork.thesaurus.register.place.urn:aodn.org.au:geographicextents + + + + + + + + + + + Data is made available under a Creative Commons Attribution 4.0 International Licence, please see link. Data is supplied 'as is' without any warranty or guarantee except as required by law to be given to you. The data may not be free of error, comprehensive, current or appropriate for your particular purpose. You accept all risk and responsibility for its use. ATTRIBUTION STATEMENT: The dataset [Insert-dataset-name-here] downloaded on [Insert-DD-Mmm-YYYY-here] was provided by CSIRO. + + + + + + + eng + + + utf8 + + + + + Software for querying and extracting data from Oracle database + + + ATTRIBUTE STATEMENT: + Closure geometry, + closure start and end dates, + gear types associated with closures + link to relevant ComLaw gazette + + + + + + + + + + + DIGITAL - Database Files - Oracle + + + + + + + + + + + + + Oracle data-base: 11 inter-linked tables) + GIS shape files and other formats can be created on request. + + + + + + + + + + + + + https://creativecommons.org/licenses/by/4.0/ + + + WWW:LINK-1.0-http--link + + + Documentation Link + + + This work is licensed under a Creative Commons Attribution 4.0 International License + + + + + + + + + + + + + + + + + + + + + dataset + + + + + + + + + + + + + + publication + + + + + + + Refer to datasets. + + + + + + + + + + + + + + + + + publication + + + + + + + Data collation and tabulation complete for gazetted closures dated between 2005- Feb 2013; + Ongoing intention: + (i) update for annual stock assessments; + (ii) expand database to add non-fisheries closures (MPA/ CMR) currently available through CAPAD; + (iii) Web-accessibility of GIS files via a geo-server. + + + + + + + + + + + + Shapefiles obtained from AFMA. Closures gazetted under ComLaw -- details collated into a data base. Additional closures extracted from CommLaw documents + + + + + dataset + + + + + + diff --git a/indexer/src/test/resources/canned/sample_malform_date_stac.json b/indexer/src/test/resources/canned/sample_malform_date_stac.json new file mode 100644 index 00000000..d6f1366a --- /dev/null +++ b/indexer/src/test/resources/canned/sample_malform_date_stac.json @@ -0,0 +1,87 @@ +{ + "title" : "Australian Fisheries Management Authority: Fisheries Spatial Management Through Time", + "description" : "Information relating to Australian fisheries closures imposed by AFMA (Australian Fisheries Management Authority) and gazetted through ComLaw have been collated and tabulated with mapable GIS references in an ORACLE data base. This database allows searching, viewing and GIS mapping of the Australian spatial fisheries management through time. Mapping views of fisheries closures can be created for time, fisheries and gear types. Changes in spatial management through time will be mapable using a time-series of mapping views.\n The data is currently complete for the time-period of 2005 to Feb 2013. It is intended to continue this records as new gazettes are released as part of annual stock assessment updates. Non-fisheries closures (MPA, CMR) are also intended to be included but this is dependent on continued funding and demand for their use.\n\n Disclaimer: These data have been collated and made accessible for scientific research only, they may not be complete or representing the most up-to-date data. For the most recent spatial management arrangements for legal and commercial purposes, refer to the relevant current ComLaw documents or contact AFMA.", + "extent" : { + "bbox" : [ [ 67.0, -71.0, 180.0, -8.0 ], [ 67.0, -71.0, 180.0, -8.0 ] ], + "temporal" : [ [ "2004-12-31T13:00:00Z", "2013-02-28T12:59:59Z" ], [ "2004-12-31T13:00:00Z", "2013-02-28T12:59:59Z" ] ] + }, + "summaries" : { + "score" : 1, + "status" : "onGoing", + "credits" : [ ], + "statement" : "Shapefiles obtained from AFMA. Closures gazetted under ComLaw -- details collated into a data base. Additional closures extracted from CommLaw documents", + "revision" : "2015-03-06T12:10:44", + "update_frequency" : "other", + "proj:geometry" : { + "geometries" : [ { + "type" : "Polygon", + "coordinates" : [ [ [ 67, -71 ], [ 180, -71 ], [ 180, -8 ], [ 67, -8 ], [ 67, -71 ] ] ] + } ], + "type" : "GeometryCollection" + }, + "temporal" : [ { + "start" : "2004-12-31T13:00:00Z", + "end" : "2013-02-28T12:59:59Z" + } ] + }, + "contacts" : [ { + "roles" : [ "pointOfContact", "about" ], + "organization" : "CSIRO Oceans & Atmosphere - Hobart", + "name" : "Fuller, Mike", + "position" : "Senior Experimental Scientist (Oceans & Atmosphere)", + "emails" : [ ], + "addresses" : [ ], + "phones" : [ ], + "links" : [ ] + }, { + "roles" : [ "pointOfContact", "metadata" ], + "organization" : "CSIRO Oceans & Atmosphere - Hobart", + "name" : "CSIRO O&A, Information & Data Centre", + "position" : "Data Requests", + "emails" : [ ], + "addresses" : [ ], + "phones" : [ ], + "links" : [ ] + } ], + "languages" : [ { + "code" : "eng", + "name" : "English" + } ], + "links" : [ { + "href" : "https://creativecommons.org/licenses/by/4.0/", + "rel" : "related", + "type" : "text/html", + "title" : "Documentation Link" + } ], + "license" : "Data is made available under a Creative Commons Attribution 4.0 International Licence, please see link. Data is supplied 'as is' without any warranty or guarantee except as required by law to be given to you. The data may not be free of error, comprehensive, current or appropriate for your particular purpose. You accept all risk and responsibility for its use. ATTRIBUTION STATEMENT: The dataset [Insert-dataset-name-here] downloaded on [Insert-DD-Mmm-YYYY-here] was provided by CSIRO.", + "providers" : [ { + "name" : "CSIRO Oceans & Atmosphere - Hobart", + "roles" : [ "pointOfContact" ] + } ], + "themes" : [ { + "concepts" : [ { + "id" : "Global / Oceans | Indian Ocean", + "url" : "https://marlin.csiro.au/geonetwork/srv/eng/xml.keyword.get?thesaurus=register.place.urn:aodn.org.au:geographicextents&id=urn:aodn.org.au:geographicextents:concept:4" + }, { + "id" : "Global / Oceans | Pacific Ocean", + "url" : "https://marlin.csiro.au/geonetwork/srv/eng/xml.keyword.get?thesaurus=register.place.urn:aodn.org.au:geographicextents&id=urn:aodn.org.au:geographicextents:concept:5" + }, { + "id" : "Global / Oceans | Southern Ocean", + "url" : "https://marlin.csiro.au/geonetwork/srv/eng/xml.keyword.get?thesaurus=register.place.urn:aodn.org.au:geographicextents&id=urn:aodn.org.au:geographicextents:concept:6" + }, { + "id" : "Marine Features (Australia) | Australian EEZ", + "url" : "https://marlin.csiro.au/geonetwork/srv/eng/xml.keyword.get?thesaurus=register.place.urn:aodn.org.au:geographicextents&id=urn:aodn.org.au:geographicextents:concept:1001" + } ], + "scheme" : "place", + "description" : "AODN GEN", + "title" : "AODN Geographic Extent Names" + } ], + "id" : "10955a94-8737-08e4-e053-08114f8c5e74", + "search_suggestions" : { + "abstract_phrases" : [ ] + }, + "sci:citation" : "{\"suggestedCitation\":null,\"useLimitations\":null,\"otherConstraints\":[\"Data is made available under a Creative Commons Attribution 4.0 International Licence, please see link. Data is supplied 'as is' without any warranty or guarantee except as required by law to be given to you. The data may not be free of error, comprehensive, current or appropriate for your particular purpose. You accept all risk and responsibility for its use. ATTRIBUTION STATEMENT: The dataset [Insert-dataset-name-here] downloaded on [Insert-DD-Mmm-YYYY-here] was provided by CSIRO.\"]}", + "type" : "Collection", + "stac_version" : "1.0.0", + "stac_extensions" : [ "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", "https://stac-extensions.github.io/contacts/v0.1.1/schema.json", "https://stac-extensions.github.io/projection/v1.1.0/schema.json", "https://stac-extensions.github.io/language/v1.0.0/schema.json", "https://stac-extensions.github.io/themes/v1.0.0/schema.json" ] +} diff --git a/indexer/src/test/resources/canned/sample_multiple_temporal1_stac.json b/indexer/src/test/resources/canned/sample_multiple_temporal1_stac.json index 2fa2091c..6115d667 100644 --- a/indexer/src/test/resources/canned/sample_multiple_temporal1_stac.json +++ b/indexer/src/test/resources/canned/sample_multiple_temporal1_stac.json @@ -113,7 +113,7 @@ ], [ "1980-02-27T13:00:00Z", - "1982-02-27T13:00:00Z" + "1982-02-28T12:59:59Z" ], [ "1984-02-27T13:00:00Z", @@ -262,7 +262,7 @@ "temporal": [ { "start": "1980-02-27T13:00:00Z", - "end": "1982-02-27T13:00:00Z" + "end": "1982-02-28T12:59:59Z" }, { "start": "1984-02-27T13:00:00Z", diff --git a/indexer/src/test/resources/canned/sample_multiple_temporal2_stac.json b/indexer/src/test/resources/canned/sample_multiple_temporal2_stac.json index 935928be..020bec81 100644 --- a/indexer/src/test/resources/canned/sample_multiple_temporal2_stac.json +++ b/indexer/src/test/resources/canned/sample_multiple_temporal2_stac.json @@ -109,15 +109,15 @@ "temporal": [ [ "1980-02-27T13:00:00Z", - "1985-02-27T13:00:00Z" + "1985-02-28T12:59:59Z" ], [ "1980-02-27T13:00:00Z", - "1982-02-27T13:00:00Z" + "1982-02-28T12:59:59Z" ], [ "1984-02-27T13:00:00Z", - "1985-02-27T13:00:00Z" + "1985-02-28T12:59:59Z" ] ] }, @@ -262,11 +262,11 @@ "temporal": [ { "start": "1980-02-27T13:00:00Z", - "end": "1982-02-27T13:00:00Z" + "end": "1982-02-28T12:59:59Z" }, { "start": "1984-02-27T13:00:00Z", - "end": "1985-02-27T13:00:00Z" + "end": "1985-02-28T12:59:59Z" } ] },