Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gcmd proxied mapping parameter vocabs #136

Merged
merged 10 commits into from
Sep 18, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import au.org.aodn.esindexer.configuration.AppConstants;
import au.org.aodn.esindexer.exception.*;
import au.org.aodn.esindexer.utils.GcmdKeywordUtils;
import au.org.aodn.esindexer.utils.JaxbUtils;
import au.org.aodn.metadata.iso19115_3_2018.MDMetadataType;
import au.org.aodn.stac.model.StacCollectionModel;
Expand Down Expand Up @@ -41,6 +42,8 @@
import java.io.InputStream;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import java.util.stream.Stream;

@Slf4j
@Service
Expand All @@ -57,7 +60,7 @@ public class IndexerServiceImpl implements IndexerService {
protected JaxbUtils<MDMetadataType> jaxbUtils;
protected RankingService rankingService;
protected VocabService vocabService;

protected GcmdKeywordUtils gcmdKeywordUtils;
protected static final long DEFAULT_BACKOFF_TIME = 3000L;

@Lazy
Expand All @@ -75,7 +78,8 @@ public IndexerServiceImpl(
ElasticsearchClient portalElasticsearchClient,
ElasticSearchIndexService elasticSearchIndexService,
StacCollectionMapperService stacCollectionMapperService,
VocabService vocabService
VocabService vocabService,
GcmdKeywordUtils gcmdKeywordUtils
) {
this.indexName = indexName;
this.tokensAnalyserName = tokensAnalyserName;
Expand All @@ -87,6 +91,7 @@ public IndexerServiceImpl(
this.elasticSearchIndexService = elasticSearchIndexService;
this.stacCollectionMapperService = stacCollectionMapperService;
this.vocabService = vocabService;
this.gcmdKeywordUtils = gcmdKeywordUtils;
}

public Hit<ObjectNode> getDocumentByUUID(String uuid) throws IOException {
Expand Down Expand Up @@ -171,9 +176,11 @@ protected StacCollectionModel getMappedMetadataValues(String metadataValues) thr
stacCollectionModel.getSummaries().setScore(score);

// parameter vocabs
List<String> mappedParameterVocabsFromGcmdKeywords = gcmdKeywordUtils.getMappedParameterVocabsFromGcmdKeywords(stacCollectionModel.getThemes());
List<String> processedParameterVocabs = vocabService.extractVocabLabelsFromThemes(stacCollectionModel.getThemes(), AppConstants.AODN_DISCOVERY_PARAMETER_VOCABS);

if (!processedParameterVocabs.isEmpty()) {
stacCollectionModel.getSummaries().setParameterVocabs(processedParameterVocabs);
stacCollectionModel.getSummaries().setParameterVocabs(Stream.concat(mappedParameterVocabsFromGcmdKeywords.stream(), processedParameterVocabs.stream()).distinct().collect(Collectors.toList()));
}

/*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package au.org.aodn.esindexer.utils;

import au.org.aodn.stac.model.ConceptModel;
import au.org.aodn.stac.model.ThemesModel;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import org.springframework.util.ResourceUtils;

import javax.annotation.PostConstruct;
import java.io.*;
import java.nio.file.Files;
import java.util.*;


@Slf4j
@Component
public class GcmdKeywordUtils {

protected Map<String, String> gcmdMapping = new HashMap<>();

@PostConstruct
public void init() {
loadCsvToMap("classpath:config_files/gcmd-mapping.csv");
}

private String getLastWord(String keyword) {
String result;
if (keyword.contains("|")) {
result = keyword.substring(keyword.lastIndexOf("|") + 1).strip();
} else if (keyword.contains(">")) {
result = keyword.substring(keyword.lastIndexOf(">") + 1).strip();
} else {
result = keyword.strip();
}
return result;
}


private static String readResourceFile(String path) throws IOException {
File f = ResourceUtils.getFile(path);
return new String(Files.readAllBytes(f.toPath()));
}

// Load the CSV file into a HashMap
private void loadCsvToMap(String path) {
try {
// Read the file as a single String
String fileContent = readResourceFile(path);

// Split the content into lines
String[] lines = fileContent.split("\\r?\\n");

// Process each line
for (String line : lines) {
// Split the line into key and value based on comma
String[] parts = line.split(",");
if (parts.length >= 2) {
String key = parts[0].trim();
String value = parts[1].trim();
gcmdMapping.put(key, value);
}
}
} catch (IOException e) {
log.error(e.getMessage());
}
}

protected List<String> extractGcmdKeywordLastWords(List<ThemesModel> themes) {
Set<String> keywords = new HashSet<>();
for (ThemesModel themesModel : themes) {
if ((themesModel.getTitle().toLowerCase().contains("gcmd") || themesModel.getTitle().toLowerCase().contains("global change master directory")) && !themesModel.getTitle().toLowerCase().contains("palaeo temporal coverage")) {
for (ConceptModel conceptModel : themesModel.getConcepts()) {
if (conceptModel.getId() != null && !conceptModel.getId().isEmpty()) {
keywords.add(getLastWord(conceptModel.getId().replace("\"", "")).toUpperCase());
}
}
}
}
return new ArrayList<>(keywords);
}

protected String getParameterVocabByGcmdKeywordLastWord(String gcmdKeywordLastWord) {
return gcmdMapping.getOrDefault(gcmdKeywordLastWord, "");
}

public List<String> getMappedParameterVocabsFromGcmdKeywords(List<ThemesModel> themes) {
Set<String> results = new HashSet<>();

List<String> gcmdKeywordLastWords = extractGcmdKeywordLastWords(themes);

if (!gcmdKeywordLastWords.isEmpty()) {
for (String gcmdKeywordLastWord : gcmdKeywordLastWords) {
String mappedParameterVocab = getParameterVocabByGcmdKeywordLastWord(gcmdKeywordLastWord);
if (!mappedParameterVocab.isEmpty() && !mappedParameterVocab.equalsIgnoreCase("uncategorised")) {
results.add(mappedParameterVocab.toLowerCase());
}
}
}

return new ArrayList<>(results);
}
}
Loading
Loading