From 83d5bab05f4fbd2d5228ea2184a183ca3cfd6857 Mon Sep 17 00:00:00 2001 From: Andre Date: Sat, 15 Jul 2023 14:46:25 +0200 Subject: [PATCH 01/63] src/index/IndexImpl: Replaced manuell reading of a file to json with the fileToJson helper function. --- src/index/IndexImpl.cpp | 8 ++++---- src/parser/ParallelParseBuffer.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 95671dbdcb..860a4ca85a 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,8 +4,6 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) -#include "./IndexImpl.h" - #include #include #include @@ -27,6 +25,9 @@ #include #include +#include "./IndexImpl.h" +#include "util/json.h" + using std::array; // _____________________________________________________________________________ @@ -921,8 +922,7 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { json j; // if we have no settings, we still have to initialize some default // values if (!settingsFileName_.empty()) { - auto f = ad_utility::makeIfstream(settingsFileName_); - f >> j; + j = fileToJson(settingsFileName_); } if (j.find("prefixes-external") != j.end()) { diff --git a/src/parser/ParallelParseBuffer.h b/src/parser/ParallelParseBuffer.h index 6706e253ca..e5d21d7b63 100644 --- a/src/parser/ParallelParseBuffer.h +++ b/src/parser/ParallelParseBuffer.h @@ -10,6 +10,7 @@ #include #include "../util/Log.h" +#include "parser/TurtleParser.h" using std::array; using std::string; From 4a356566c81ed01069d10f9dd8e246e3de43b853 Mon Sep 17 00:00:00 2001 From: Andre Date: Sat, 15 Jul 2023 15:32:34 +0200 Subject: [PATCH 02/63] src/util/ConfigManager/ConfigManager: Had to make the getConfigOption function public. Otherwise I can't check the set state of ConfigOptions. --- src/util/ConfigManager/ConfigManager.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index 2a8e13eeff..d48a76eaed 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -139,6 +139,16 @@ class ConfigManager { */ std::string printConfigurationDoc(bool printCurrentJsonConfiguration) const; + /* + @brief Return the underlying configuration option, if it's at the position + described by the `keys`. If there is no configuration option at that + place, an exception will be thrown. + + @param keys The keys for looking up the configuration option. + */ + const ConfigOption& getConfigurationOptionByNestedKeys( + const std::vector& keys) const; + private: // For testing. FRIEND_TEST(ConfigManagerTest, GetConfigurationOptionByNestedKeysTest); @@ -173,16 +183,6 @@ class ConfigManager { void addConfigOption(const std::vector& pathToOption, ConfigOption&& option); - /* - @brief Return the underlying configuration option, if it's at the position - described by the `keys`. If there is no configuration option at that - place, an exception will be thrown. - - @param keys The keys for looking up the configuration option. - */ - const ConfigOption& getConfigurationOptionByNestedKeys( - const std::vector& keys) const; - /* @brief Return string representation of a `std::vector`. */ From f1f49cdacddba281d1a04d7f68d583273bcc013b Mon Sep 17 00:00:00 2001 From: Andre Date: Sat, 15 Jul 2023 17:25:25 +0200 Subject: [PATCH 03/63] src/index/IndexImpl: Tried to integrate ConfigManager with readIndexBuilderSettingsFromFile. --- src/index/CMakeLists.txt | 2 +- src/index/IndexImpl.cpp | 213 ++++++++++++++++++++++++--------------- 2 files changed, 133 insertions(+), 82 deletions(-) diff --git a/src/index/CMakeLists.txt b/src/index/CMakeLists.txt index d437fe4a13..2c73de55bf 100644 --- a/src/index/CMakeLists.txt +++ b/src/index/CMakeLists.txt @@ -17,4 +17,4 @@ add_library(index CompressedRelation.h CompressedRelation.cpp PatternCreator.h PatternCreator.cpp) -qlever_target_link_libraries(index util parser vocabulary compilationInfo ${STXXL_LIBRARIES}) +qlever_target_link_libraries(index util parser configManager vocabulary compilationInfo ${STXXL_LIBRARIES}) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 860a4ca85a..5cfa125348 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -5,6 +5,7 @@ // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) #include +#include #include #include #include @@ -21,11 +22,13 @@ #include #include #include +#include #include #include #include #include "./IndexImpl.h" +#include "util/ConfigManager/ConfigManager.h" #include "util/json.h" using std::array; @@ -919,20 +922,68 @@ LangtagAndTriple IndexImpl::tripleToInternalRepresentation( // ___________________________________________________________________________ template void IndexImpl::readIndexBuilderSettingsFromFile() { - json j; // if we have no settings, we still have to initialize some default - // values + ad_utility::ConfigManager config{}; + + // TODO Write a description. + std::vector prefixesExternal; + config.createConfigOption>( + "prefixes-external", "", &prefixesExternal, std::vector{}); + + // TODO Write a description. + std::vector languagesInternal; + config.createConfigOption>( + "languages-internal", "", &prefixesExternal, std::vector{}); + + // TODO Write a description. + std::string lang; + config.createConfigOption( + std::vector{"locale", "language"}, "", &lang, + LOCALE_DEFAULT_LANG); + + // TODO Write a description. + std::string country; + config.createConfigOption( + std::vector{"locale", "country"}, "", &country, + LOCALE_DEFAULT_COUNTRY); + + // TODO Write a description. + bool ignorePunctuation; + config.createConfigOption( + std::vector{"locale", "ignore-punctuation"}, "", + &ignorePunctuation, LOCALE_DEFAULT_IGNORE_PUNCTUATION); + + // TODO Write a description. + bool asciiPrefixesOnly; + config.createConfigOption("ascii-prefixes-only", "", &asciiPrefixesOnly, + false); + + // TODO Write a description. + size_t numTriplesPerBatch; + config.createConfigOption("num-triples-per-batch", "", + &numTriplesPerBatch, 0); + + // TODO Write a description. + size_t parserBatchSize; + config.createConfigOption("parser-batch-size", "", &parserBatchSize, + 0); + + // TODO Write a description. + std::string parserIntegerOverflowBehavior; + config.createConfigOption("parser-integer-overflow-behavior", "", + &parserIntegerOverflowBehavior, + "overflowing-integers-throw"); + + // Set the options. if (!settingsFileName_.empty()) { - j = fileToJson(settingsFileName_); - } - - if (j.find("prefixes-external") != j.end()) { - vocab_.initializeExternalizePrefixes(j["prefixes-external"]); - configurationJson_["prefixes-external"] = j["prefixes-external"]; + config.parseConfig(fileToJson(settingsFileName_)); + } else { + config.parseConfig(json(json::value_t::object)); } - if (j.count("ignore-case")) { - LOG(ERROR) << ERROR_IGNORE_CASE_UNSUPPORTED << '\n'; - throw std::runtime_error("Deprecated key \"ignore-case\" in settings JSON"); + if (config.getConfigurationOptionByNestedKeys({"prefixes-external"}) + .wasSetAtRuntime()) { + vocab_.initializeExternalizePrefixes(prefixesExternal); + configurationJson_["prefixes-external"] = prefixesExternal; } /** @@ -942,47 +993,49 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { * locale setting. */ - { - std::string lang = LOCALE_DEFAULT_LANG; - std::string country = LOCALE_DEFAULT_COUNTRY; - bool ignorePunctuation = LOCALE_DEFAULT_IGNORE_PUNCTUATION; - if (j.count("locale")) { - lang = std::string{j["locale"]["language"]}; - country = std::string{j["locale"]["country"]}; - ignorePunctuation = bool{j["locale"]["ignore-punctuation"]}; - } else { - LOG(INFO) << "Locale was not specified in settings file, default is " - "en_US" - << std::endl; - } - LOG(INFO) << "You specified \"locale = " << lang << "_" << country << "\" " - << "and \"ignore-punctuation = " << ignorePunctuation << "\"" - << std::endl; - - if (lang != LOCALE_DEFAULT_LANG || country != LOCALE_DEFAULT_COUNTRY) { - LOG(WARN) << "You are using Locale settings that differ from the default " - "language or country.\n\t" - << "This should work but is untested by the QLever team. If " - "you are running into unexpected problems,\n\t" - << "Please make sure to also report your used locale when " - "filing a bug report. Also note that changing the\n\t" - << "locale requires to completely rebuild the index\n"; - } - vocab_.setLocale(lang, country, ignorePunctuation); - textVocab_.setLocale(lang, country, ignorePunctuation); - configurationJson_["locale"]["language"] = lang; - configurationJson_["locale"]["country"] = country; - configurationJson_["locale"]["ignore-punctuation"] = ignorePunctuation; + if (config.getConfigurationOptionByNestedKeys({"locale", "language"}) + .wasSetAtRuntime() != + config.getConfigurationOptionByNestedKeys({"locale", "country"}) + .wasSetAtRuntime() || + config.getConfigurationOptionByNestedKeys({"locale", "country"}) + .wasSetAtRuntime() != config + .getConfigurationOptionByNestedKeys( + {"locale", "ignore-punctuation"}) + .wasSetAtRuntime()) { + throw std::runtime_error(absl::StrCat( + "All three options under 'locale' must be set, or none of them.", + config.printConfigurationDoc(true))); } - if (j.find("languages-internal") != j.end()) { - vocab_.initializeInternalizedLangs(j["languages-internal"]); - configurationJson_["languages-internal"] = j["languages-internal"]; + LOG(INFO) << "You specified \"locale = " << lang << "_" << country << "\" " + << "and \"ignore-punctuation = " << ignorePunctuation << "\"" + << std::endl; + + if (lang != LOCALE_DEFAULT_LANG || country != LOCALE_DEFAULT_COUNTRY) { + LOG(WARN) << "You are using Locale settings that differ from the default " + "language or country.\n\t" + << "This should work but is untested by the QLever team. If " + "you are running into unexpected problems,\n\t" + << "Please make sure to also report your used locale when " + "filing a bug report. Also note that changing the\n\t" + << "locale requires to completely rebuild the index\n"; + } + vocab_.setLocale(lang, country, ignorePunctuation); + textVocab_.setLocale(lang, country, ignorePunctuation); + configurationJson_["locale"]["language"] = lang; + configurationJson_["locale"]["country"] = country; + configurationJson_["locale"]["ignore-punctuation"] = ignorePunctuation; + + if (config.getConfigurationOptionByNestedKeys({"languages-internal"}) + .wasSetAtRuntime()) { + vocab_.initializeInternalizedLangs(languagesInternal); + configurationJson_["languages-internal"] = languagesInternal; } - if (j.count("ascii-prefixes-only")) { + + if (config.getConfigurationOptionByNestedKeys({"ascii-prefixes-only"}) + .wasSetAtRuntime()) { if constexpr (std::is_same_v, TurtleParserAuto>) { - bool v{j["ascii-prefixes-only"]}; - if (v) { + if (asciiPrefixesOnly) { LOG(INFO) << WARNING_ASCII_ONLY_PREFIXES << std::endl; onlyAsciiTurtlePrefixes_ = true; } else { @@ -996,16 +1049,18 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { } } - if (j.count("num-triples-per-batch")) { - numTriplesPerBatch_ = size_t{j["num-triples-per-batch"]}; + if (config.getConfigurationOptionByNestedKeys({"num-triples-per-batch"}) + .wasSetAtRuntime()) { + numTriplesPerBatch_ = numTriplesPerBatch; LOG(INFO) << "You specified \"num-triples-per-batch = " << numTriplesPerBatch_ << "\", choose a lower value if the index builder runs out of memory" << std::endl; } - if (j.count("parser-batch-size")) { - parserBatchSize_ = size_t{j["parser-batch-size"]}; + if (config.getConfigurationOptionByNestedKeys({"parser-batch-size"}) + .wasSetAtRuntime()) { + parserBatchSize_ = parserBatchSize; LOG(INFO) << "Overriding setting parser-batch-size to " << parserBatchSize_ << " This might influence performance during index build." << std::endl; @@ -1018,39 +1073,35 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { std::vector allModes{overflowingIntegersThrow, overflowingIntegersBecomeDoubles, allIntegersBecomeDoubles}; - std::string key = "parser-integer-overflow-behavior"; - if (j.count(key)) { - auto value = j[key]; - if (value == overflowingIntegersThrow) { - LOG(INFO) << "Integers that cannot be represented by QLever will throw " - "an exception" - << std::endl; - turtleParserIntegerOverflowBehavior_ = - TurtleParserIntegerOverflowBehavior::Error; - } else if (value == overflowingIntegersBecomeDoubles) { - LOG(INFO) << "Integers that cannot be represented by QLever will be " - "converted to doubles" - << std::endl; - turtleParserIntegerOverflowBehavior_ = - TurtleParserIntegerOverflowBehavior::OverflowingToDouble; - } else if (value == allIntegersBecomeDoubles) { - LOG(INFO) << "All integers will be converted to doubles" << std::endl; - turtleParserIntegerOverflowBehavior_ = - TurtleParserIntegerOverflowBehavior::OverflowingToDouble; - } else { - AD_CONTRACT_CHECK(std::find(allModes.begin(), allModes.end(), value) == - allModes.end()); - LOG(ERROR) << "Invalid value for " << key << std::endl; - LOG(INFO) << "The currently supported values are " - << absl::StrJoin(allModes, ",") << std::endl; - } - } else { + if (parserIntegerOverflowBehavior == overflowingIntegersThrow) { + LOG(INFO) << "Integers that cannot be represented by QLever will throw " + "an exception" + << std::endl; turtleParserIntegerOverflowBehavior_ = TurtleParserIntegerOverflowBehavior::Error; - LOG(INFO) << "Integers that cannot be represented by QLever will throw an " - "exception (this is the default behavior)" + } else if (parserIntegerOverflowBehavior == + overflowingIntegersBecomeDoubles) { + LOG(INFO) << "Integers that cannot be represented by QLever will be " + "converted to doubles" << std::endl; + turtleParserIntegerOverflowBehavior_ = + TurtleParserIntegerOverflowBehavior::OverflowingToDouble; + } else if (parserIntegerOverflowBehavior == allIntegersBecomeDoubles) { + LOG(INFO) << "All integers will be converted to doubles" << std::endl; + turtleParserIntegerOverflowBehavior_ = + TurtleParserIntegerOverflowBehavior::OverflowingToDouble; + } else { + AD_CONTRACT_CHECK(std::find(allModes.begin(), allModes.end(), + parserIntegerOverflowBehavior) == + allModes.end()); + LOG(ERROR) << "Invalid value for parser-integer-overflow-behavior" + << std::endl; + LOG(INFO) << "The currently supported values are " + << absl::StrJoin(allModes, ",") << std::endl; } + + // Logging used configuration options. + LOG(INFO) << config.printConfigurationDoc(true); } // ___________________________________________________________________________ From e538477e357990f265aa9f2e8a23733db6c81579 Mon Sep 17 00:00:00 2001 From: Andre Date: Mon, 17 Jul 2023 12:16:31 +0200 Subject: [PATCH 04/63] src/index/Vocabulary: Added missing explicit template instantiation. --- src/index/Vocabulary.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/index/Vocabulary.cpp b/src/index/Vocabulary.cpp index 70f7821448..d9e00907b3 100644 --- a/src/index/Vocabulary.cpp +++ b/src/index/Vocabulary.cpp @@ -3,8 +3,6 @@ // Authors: Björn Buchhold , // Johannes Kalmbach (johannes.kalmbach@gmail.com) -#include "./Vocabulary.h" - #include #include @@ -17,6 +15,7 @@ #include "../util/Serializer/FileSerializer.h" #include "../util/json.h" #include "./ConstantsIndexBuilding.h" +#include "./Vocabulary.h" using std::string; @@ -318,6 +317,8 @@ template void RdfsVocabulary::buildCodebookForPrefixCompression< const std::vector&); template void RdfsVocabulary::initializeInternalizedLangs( const nlohmann::json&); +template void RdfsVocabulary::initializeInternalizedLangs< + std::vector>(const std::vector&); template void RdfsVocabulary::initializeExternalizePrefixes( const nlohmann::json& prefixes); template void RdfsVocabulary::initializeExternalizePrefixes< From db786b1fb3821d43dfc0c19e6e2306872885e31d Mon Sep 17 00:00:00 2001 From: Andre Date: Mon, 17 Jul 2023 14:43:09 +0200 Subject: [PATCH 05/63] src/util: Fixed cyclic dependency. --- src/engine/Server.h | 2 +- src/parser/CMakeLists.txt | 3 +-- src/parser/ParsedQuery.h | 2 +- src/parser/TurtleParser.h | 2 +- src/util/CMakeLists.txt | 4 ++-- src/{parser => util}/ParseException.cpp | 4 +++- src/{parser => util}/ParseException.h | 0 src/util/antlr/ANTLRErrorHandling.h | 2 +- test/AcceptHeaderTest.cpp | 2 +- test/ParseExceptionTest.cpp | 2 +- 10 files changed, 12 insertions(+), 11 deletions(-) rename src/{parser => util}/ParseException.cpp (97%) rename src/{parser => util}/ParseException.h (100%) diff --git a/src/engine/Server.h b/src/engine/Server.h index 10aa906c86..7abc7b5109 100644 --- a/src/engine/Server.h +++ b/src/engine/Server.h @@ -14,9 +14,9 @@ #include "engine/QueryExecutionTree.h" #include "engine/SortPerformanceEstimator.h" #include "index/Index.h" -#include "parser/ParseException.h" #include "parser/SparqlParser.h" #include "util/AllocatorWithLimit.h" +#include "util/ParseException.h" #include "util/Timer.h" #include "util/http/HttpServer.h" #include "util/http/streamable_body.h" diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt index 61a696d1d2..13775883fe 100644 --- a/src/parser/CMakeLists.txt +++ b/src/parser/CMakeLists.txt @@ -8,7 +8,6 @@ add_library(parser sparqlParser/SparqlQleverVisitor.cpp SparqlParser.h SparqlParser.cpp ParsedQuery.h ParsedQuery.cpp - ParseException.h TurtleParser.h TurtleParser.cpp Tokenizer.h Tokenizer.cpp ContextFileParser.cpp ContextFileParser.h @@ -20,7 +19,7 @@ add_library(parser GraphPatternOperation.cpp PropertyPath.h PropertyPath.cpp Alias.h data/SolutionModifiers.h data/LimitOffsetClause.h data/SparqlFilter.h data/SparqlFilter.cpp - data/OrderKey.h data/GroupKey.h ParseException.cpp SelectClause.cpp + data/OrderKey.h data/GroupKey.h SelectClause.cpp SelectClause.h GraphPatternOperation.cpp GraphPatternOperation.h # The `Variable.cpp` from the subdirectory is linked here because otherwise we get linking errors. GraphPattern.cpp GraphPattern.h ConstructClause.h data/VariableToColumnMapPrinters.cpp) diff --git a/src/parser/ParsedQuery.h b/src/parser/ParsedQuery.h index f892de253f..212564a6c9 100644 --- a/src/parser/ParsedQuery.h +++ b/src/parser/ParsedQuery.h @@ -15,7 +15,6 @@ #include "parser/ConstructClause.h" #include "parser/GraphPattern.h" #include "parser/GraphPatternOperation.h" -#include "parser/ParseException.h" #include "parser/PropertyPath.h" #include "parser/SelectClause.h" #include "parser/TripleComponent.h" @@ -31,6 +30,7 @@ #include "util/Generator.h" #include "util/HashMap.h" #include "util/OverloadCallOperator.h" +#include "util/ParseException.h" #include "util/StringUtils.h" using std::string; diff --git a/src/parser/TurtleParser.h b/src/parser/TurtleParser.h index 1af252d401..f4040564fa 100644 --- a/src/parser/TurtleParser.h +++ b/src/parser/TurtleParser.h @@ -26,7 +26,7 @@ #include #include -#include "parser/ParseException.h" +#include "util/ParseException.h" using std::string; diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index dc3330778b..2db9e31b4e 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -1,5 +1,5 @@ add_subdirectory(ConfigManager) add_subdirectory(http) add_library(util GeoSparqlHelpers.h GeoSparqlHelpers.cpp VisitMixin.h - antlr/ANTLRErrorHandling.cpp antlr/ANTLRErrorHandling.h Conversions.cpp ResetWhenMoved.h Date.cpp) -qlever_target_link_libraries(util parser) + antlr/ANTLRErrorHandling.cpp antlr/ANTLRErrorHandling.h Conversions.cpp ResetWhenMoved.h Date.cpp ParseException.cpp) +qlever_target_link_libraries(util) diff --git a/src/parser/ParseException.cpp b/src/util/ParseException.cpp similarity index 97% rename from src/parser/ParseException.cpp rename to src/util/ParseException.cpp index 278c8c4542..9e48df29e7 100644 --- a/src/parser/ParseException.cpp +++ b/src/util/ParseException.cpp @@ -3,8 +3,10 @@ // Author: Julian Mundhahs (mundhahj@informatik.uni-freiburg.de) #include -#include #include +#include + +#include "util/StringUtils.h" // ___________________________________________________________________________ std::string ExceptionMetadata::coloredError() const { diff --git a/src/parser/ParseException.h b/src/util/ParseException.h similarity index 100% rename from src/parser/ParseException.h rename to src/util/ParseException.h diff --git a/src/util/antlr/ANTLRErrorHandling.h b/src/util/antlr/ANTLRErrorHandling.h index 5008de8acc..54c8a95dd2 100644 --- a/src/util/antlr/ANTLRErrorHandling.h +++ b/src/util/antlr/ANTLRErrorHandling.h @@ -6,7 +6,7 @@ #pragma once -#include +#include #include diff --git a/test/AcceptHeaderTest.cpp b/test/AcceptHeaderTest.cpp index 54eda3357f..9337e6eb4f 100644 --- a/test/AcceptHeaderTest.cpp +++ b/test/AcceptHeaderTest.cpp @@ -3,7 +3,7 @@ // Author: Johannes Kalmbach #include -#include +#include #include diff --git a/test/ParseExceptionTest.cpp b/test/ParseExceptionTest.cpp index 8fe2863f4c..ca1e6162b8 100644 --- a/test/ParseExceptionTest.cpp +++ b/test/ParseExceptionTest.cpp @@ -5,8 +5,8 @@ #include #include "SparqlAntlrParserTestHelpers.h" -#include "parser/ParseException.h" #include "parser/SparqlParser.h" +#include "util/ParseException.h" #include "util/SourceLocation.h" TEST(ParseException, coloredError) { From 6e9cb52d17fe161ad4b94fde9b7417ee7dea0176 Mon Sep 17 00:00:00 2001 From: Andre Date: Thu, 20 Jul 2023 09:20:51 +0200 Subject: [PATCH 06/63] src/index/IndexImpl: Tried to integrate the ConfigManager in readConfiguration. --- src/index/IndexImpl.cpp | 150 ++++++++++++++++++++++++++++------------ 1 file changed, 104 insertions(+), 46 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 5cfa125348..299eec1abe 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -798,18 +798,74 @@ void IndexImpl::writeConfiguration() const { // ___________________________________________________________________________ void IndexImpl::readConfiguration() { - auto f = ad_utility::makeIfstream(onDiskBase_ + CONFIGURATION_FILE); - f >> configurationJson_; - if (configurationJson_.find("git_hash") != configurationJson_.end()) { + ad_utility::ConfigManager config{}; + + // TODO Write a description. + std::string gitHash; + config.createConfigOption("git_hash", "", &gitHash, + "None given."); + + // TODO Write a description. + bool boolPrefixes; + config.createConfigOption("prefixes", "", &boolPrefixes, false); + + // TODO Write a description. + bool hasAllPermutations; + config.createConfigOption("has-all-permutations", "", + &hasAllPermutations, true); + + // TODO Write a description. + std::vector prefixesExternal; + config.createConfigOption>( + "prefixes-external", "", &prefixesExternal, std::vector{}); + + // TODO Write a description. + std::string lang; + config.createConfigOption( + std::vector{"locale", "language"}, "", &lang); + + // TODO Write a description. + std::string country; + config.createConfigOption( + std::vector{"locale", "country"}, "", &country); + + // TODO Write a description. + bool ignorePunctuation; + config.createConfigOption( + std::vector{"locale", "ignore-punctuation"}, "", + &ignorePunctuation); + + // TODO Write a description. + std::vector languagesInternal; + config.createConfigOption>( + "languages-internal", "", &languagesInternal, std::vector{}); + + // TODO Write a description. + config.createConfigOption("num-predicates-normal", "", + &numPredicatesNormal_); + config.createConfigOption("num-subjects-normal", "", + &numSubjectsNormal_); + config.createConfigOption("num-objects-normal", "", + &numObjectsNormal_); + config.createConfigOption("num-triples-normal", "", + &numTriplesNormal_); + + config.parseConfig(fileToJson(onDiskBase_ + CONFIGURATION_FILE)); + + if (config.getConfigurationOptionByNestedKeys({"git_hash"}) + .wasSetAtRuntime()) { + configurationJson_["git_hash"] = gitHash; LOG(INFO) << "The git hash used to build this index was " - << std::string(configurationJson_["git_hash"]).substr(0, 6) - << std::endl; + << gitHash.substr(0, 6) << std::endl; } else { LOG(INFO) << "The index was built before git commit hashes were stored in " "the index meta data" << std::endl; } + // Slight problem here: I have no idea, what kind of value `"prefixes"` points + // to. So I had to guess. + /* if (configurationJson_.find("prefixes") != configurationJson_.end()) { if (configurationJson_["prefixes"]) { vector prefixes; @@ -822,60 +878,62 @@ void IndexImpl::readConfiguration() { vocab_.buildCodebookForPrefixCompression(std::vector()); } } - - if (configurationJson_.find("prefixes-external") != - configurationJson_.end()) { - vocab_.initializeExternalizePrefixes( - configurationJson_["prefixes-external"]); + */ + if (config.getConfigurationOptionByNestedKeys({"prefixes"}) + .wasSetAtRuntime()) { + configurationJson_["prefixes"] = boolPrefixes; + if (boolPrefixes) { + vector prefixes; + auto prefixFile = ad_utility::makeIfstream(onDiskBase_ + PREFIX_FILE); + for (string prefix; std::getline(prefixFile, prefix);) { + prefixes.emplace_back(std::move(prefix)); + } + vocab_.buildCodebookForPrefixCompression(prefixes); + } else { + vocab_.buildCodebookForPrefixCompression(std::vector()); + } } - if (configurationJson_.count("ignore-case")) { - LOG(ERROR) << ERROR_IGNORE_CASE_UNSUPPORTED << '\n'; - throw std::runtime_error("Deprecated key \"ignore-case\" in index build"); + if (config.getConfigurationOptionByNestedKeys({"prefixes-external"}) + .wasSetAtRuntime()) { + vocab_.initializeExternalizePrefixes(prefixesExternal); + configurationJson_["prefixes-external"] = prefixesExternal; } - if (configurationJson_.count("locale")) { - std::string lang{configurationJson_["locale"]["language"]}; - std::string country{configurationJson_["locale"]["country"]}; - bool ignorePunctuation{configurationJson_["locale"]["ignore-punctuation"]}; - vocab_.setLocale(lang, country, ignorePunctuation); - textVocab_.setLocale(lang, country, ignorePunctuation); - } else { - LOG(ERROR) << "Key \"locale\" is missing in the metadata. This is probably " - "and old index build that is no longer supported by QLever. " - "Please rebuild your index\n"; - throw std::runtime_error( - "Missing required key \"locale\" in index build's metadata"); - } + configurationJson_["locale"]["language"] = lang; + configurationJson_["locale"]["country"] = country; + configurationJson_["locale"]["ignore-punctuation"] = ignorePunctuation; + vocab_.setLocale(lang, country, ignorePunctuation); + textVocab_.setLocale(lang, country, ignorePunctuation); - if (configurationJson_.find("languages-internal") != - configurationJson_.end()) { - vocab_.initializeInternalizedLangs( - configurationJson_["languages-internal"]); + if (config.getConfigurationOptionByNestedKeys({"languages-internal"}) + .wasSetAtRuntime()) { + vocab_.initializeInternalizedLangs(languagesInternal); + configurationJson_["languages-internal"] = languagesInternal; } + // Once again, I can only guess, what kind of value should be at + // `"has-all-permutations"`. + /* if (configurationJson_.find("has-all-permutations") != configurationJson_.end() && configurationJson_["has-all-permutations"] == false) { // If the permutations simply don't exist, then we can never load them. loadAllPermutations_ = false; } + */ + if (config.getConfigurationOptionByNestedKeys({"has-all-permutations"}) + .wasSetAtRuntime() && + !hasAllPermutations) { + configurationJson_["has-all-permutations"] = false; + // If the permutations simply don't exist, then we can never load them. + loadAllPermutations_ = false; + } - auto loadRequestedDataMember = [this](std::string_view key, auto& target) { - auto it = configurationJson_.find(key); - if (it == configurationJson_.end()) { - throw std::runtime_error{absl::StrCat( - "The required key \"", key, - "\" was not found in the `meta-data.json`. Most likely this index " - "was built with an older version of QLever and should be rebuilt")}; - } - target = std::decay_t{*it}; - }; - - loadRequestedDataMember("num-predicates-normal", numPredicatesNormal_); - loadRequestedDataMember("num-subjects-normal", numSubjectsNormal_); - loadRequestedDataMember("num-objects-normal", numObjectsNormal_); - loadRequestedDataMember("num-triples-normal", numTriplesNormal_); + configurationJson_["num-predicates-normal"] = numPredicatesNormal_; + configurationJson_["num-subjects-normal"] = numSubjectsNormal_; + configurationJson_["num-objects-normal"] = numObjectsNormal_; + configurationJson_["num-triples-normal"] = numTriplesNormal_; } // ___________________________________________________________________________ @@ -932,7 +990,7 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { // TODO Write a description. std::vector languagesInternal; config.createConfigOption>( - "languages-internal", "", &prefixesExternal, std::vector{}); + "languages-internal", "", &languagesInternal, std::vector{}); // TODO Write a description. std::string lang; From 31afceef21d75203354f0ffe7413e3d2865d6ee4 Mon Sep 17 00:00:00 2001 From: Andre Date: Thu, 20 Jul 2023 13:37:51 +0200 Subject: [PATCH 07/63] src/util/ConfigManager/ConfigManager: Renamed createConfigOption to addOption. --- benchmark/BenchmarkExamples.cpp | 16 ++-- benchmark/Usage.md | 2 +- src/util/ConfigManager/ConfigManager.h | 27 +++--- test/ConfigManagerTest.cpp | 109 ++++++++++++------------- 4 files changed, 74 insertions(+), 80 deletions(-) diff --git a/benchmark/BenchmarkExamples.cpp b/benchmark/BenchmarkExamples.cpp index 67d9d66a59..d871bcf9df 100644 --- a/benchmark/BenchmarkExamples.cpp +++ b/benchmark/BenchmarkExamples.cpp @@ -49,19 +49,19 @@ class ConfigOptions : public BenchmarkInterface { ConfigOptions() { ad_utility::ConfigManager& manager = getConfigManager(); - manager.createConfigOption("date", "The current date.", - &dateString_, "22.3.2023"); + manager.addOption("date", "The current date.", &dateString_, + "22.3.2023"); - manager.createConfigOption("numSigns", "The number of street signs.", - &numberOfStreetSigns_, 10); + manager.addOption("numSigns", "The number of street signs.", + &numberOfStreetSigns_, 10); - manager.createConfigOption>( + manager.addOption>( "CoinFlipTry", "The number of succesful coin flips.", &wonOnTryX_, std::vector{false, false, false, false, false}); - manager.createConfigOption({"Accounts", "Personal", "Steve"}, - "Steves saving account balance.", - &balanceOnStevesSavingAccount_, -41.9); + manager.addOption({"Accounts", "Personal", "Steve"}, + "Steves saving account balance.", + &balanceOnStevesSavingAccount_, -41.9); } }; diff --git a/benchmark/Usage.md b/benchmark/Usage.md index 181ef447da..f0a7e64127 100644 --- a/benchmark/Usage.md +++ b/benchmark/Usage.md @@ -134,7 +134,7 @@ Defining the configuration options and passing values to them. ### Adding options -Adding configuration options is done by adding configuration option to the private member variable `manager_`, accessible via a getter, by using the function `ConfigManager::createConfigOption`. That is best done in the constructor of your class. +Adding configuration options is done by adding configuration option to the private member variable `manager_`, accessible via a getter, by using the function `ConfigManager::addOption`. That is best done in the constructor of your class. In our system a configuration option is described by a handful of characteristics: diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index 2a8e13eeff..d679f28dce 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -67,11 +67,11 @@ class ConfigManager { template requires ad_utility::isTypeContainedIn - void createConfigOption(const std::vector& pathToOption, - std::string_view optionDescription, - OptionType* variableToPutValueOfTheOptionIn, - std::optional defaultValue = - std::optional(std::nullopt)) { + void addOption(const std::vector& pathToOption, + std::string_view optionDescription, + OptionType* variableToPutValueOfTheOptionIn, + std::optional defaultValue = + std::optional(std::nullopt)) { /* We need a non-empty path to construct a ConfigOption object, the `verify...` function always throws an exception for this case. No need to duplicate the @@ -89,20 +89,19 @@ class ConfigManager { /* @brief Creates and adds a new configuration option, just like in the other - `createConfigOption`. But instead of a `pathToOption`, there is only an + `addOption`. But instead of a `pathToOption`, there is only an `optionName`, which describes a path only made out of this single string. */ template requires ad_utility::isTypeContainedIn - void createConfigOption(std::string optionName, - std::string_view optionDescription, - OptionType* variableToPutValueOfTheOptionIn, - std::optional defaultValue = - std::optional(std::nullopt)) { - createConfigOption( - std::vector{std::move(optionName)}, optionDescription, - variableToPutValueOfTheOptionIn, std::move(defaultValue)); + void addOption(std::string optionName, std::string_view optionDescription, + OptionType* variableToPutValueOfTheOptionIn, + std::optional defaultValue = + std::optional(std::nullopt)) { + addOption(std::vector{std::move(optionName)}, + optionDescription, variableToPutValueOfTheOptionIn, + std::move(defaultValue)); } /* diff --git a/test/ConfigManagerTest.cpp b/test/ConfigManagerTest.cpp index 191c680fcf..2aaf05dfca 100644 --- a/test/ConfigManagerTest.cpp +++ b/test/ConfigManagerTest.cpp @@ -46,12 +46,11 @@ TEST(ConfigManagerTest, GetConfigurationOptionByNestedKeysTest) { // Configuration options for testing. int notUsed; - config.createConfigOption( - {"Shared_part", "Unique_part_1", "Sense_of_existence"}, "", ¬Used, - std::optional{42}); + config.addOption({"Shared_part", "Unique_part_1", "Sense_of_existence"}, "", + ¬Used, std::optional{42}); - config.createConfigOption( - {"Shared_part", "Unique_part_2", "Sense_of_existence"}, "", ¬Used); + config.addOption({"Shared_part", "Unique_part_2", "Sense_of_existence"}, "", + ¬Used); // Where those two options added? ASSERT_EQ(config.configurationOptions_.size(), 2); @@ -78,12 +77,12 @@ TEST(ConfigManagerTest, CreateConfigurationOptionExceptionTest) { // Configuration options for testing. int notUsed; - config.createConfigOption( - {"Shared_part", "Unique_part_1", "Sense_of_existence"}, "", ¬Used, 42); + config.addOption({"Shared_part", "Unique_part_1", "Sense_of_existence"}, + "", ¬Used, 42); // Trying to add a configuration option with the same name at the same // place, should cause an error. - ASSERT_THROW(config.createConfigOption( + ASSERT_THROW(config.addOption( {"Shared_part", "Unique_part_1", "Sense_of_existence"}, "", ¬Used, 42); , ad_utility::ConfigManagerOptionPathAlreadyinUseException); @@ -92,8 +91,8 @@ TEST(ConfigManagerTest, CreateConfigurationOptionExceptionTest) { Reason: The last key is used as the name for the to be created `ConfigOption`. An empty vector doesn't work with that. */ - ASSERT_ANY_THROW(config.createConfigOption(std::vector{}, - "", ¬Used, 42);); + ASSERT_ANY_THROW( + config.addOption(std::vector{}, "", ¬Used, 42);); /* Trying to add a configuration option with a path containing strings with @@ -102,7 +101,7 @@ TEST(ConfigManagerTest, CreateConfigurationOptionExceptionTest) { configuration grammar. Ergo, you can't set values, with such paths per short hand, which we don't want. */ - ASSERT_THROW(config.createConfigOption( + ASSERT_THROW(config.addOption( std::vector{"Shared part", "Sense_of_existence"}, "", ¬Used, 42); , ad_utility::NotValidShortHandNameException); @@ -116,14 +115,11 @@ TEST(ConfigManagerTest, ParseConfig) { int secondInt; int thirdInt; - config.createConfigOption( - std::vector{"depth_0", "Option_0"}, - "Must be set. Has no default value.", &firstInt); - config.createConfigOption({"depth_0", "depth_1", "Option_1"}, - "Must be set. Has no default value.", - &secondInt); - config.createConfigOption("Option_2", "Has a default value.", &thirdInt, - 2); + config.addOption(std::vector{"depth_0", "Option_0"}, + "Must be set. Has no default value.", &firstInt); + config.addOption({"depth_0", "depth_1", "Option_1"}, + "Must be set. Has no default value.", &secondInt); + config.addOption("Option_2", "Has a default value.", &thirdInt, 2); // For easier access to the options. auto getOption = [&config](const size_t& optionNumber) { @@ -171,10 +167,9 @@ TEST(ConfigManagerTest, ParseConfigExceptionTest) { // Add one option with default and one without. int notUsedInt; std::vector notUsedVector; - config.createConfigOption( - std::vector{"depth_0", "Without_default"}, - "Must be set. Has no default value.", ¬UsedInt); - config.createConfigOption>( + config.addOption(std::vector{"depth_0", "Without_default"}, + "Must be set. Has no default value.", ¬UsedInt); + config.addOption>( std::vector{"depth_0", "With_default"}, "Must not be set. Has default value.", ¬UsedVector, std::vector{40, 41}); @@ -232,71 +227,71 @@ TEST(ConfigManagerTest, ParseShortHandTest) { // Add integer options. int somePositiveNumberInt; - config.createConfigOption("somePositiveNumber", - "Must be set. Has no default value.", - &somePositiveNumberInt); + config.addOption("somePositiveNumber", + "Must be set. Has no default value.", + &somePositiveNumberInt); int someNegativNumberInt; - config.createConfigOption("someNegativNumber", - "Must be set. Has no default value.", - &someNegativNumberInt); + config.addOption("someNegativNumber", + "Must be set. Has no default value.", + &someNegativNumberInt); // Add integer list. std::vector someIntegerlistIntVector; - config.createConfigOption>( - "someIntegerlist", "Must be set. Has no default value.", - &someIntegerlistIntVector); + config.addOption>("someIntegerlist", + "Must be set. Has no default value.", + &someIntegerlistIntVector); // Add floating point options. float somePositiveFloatingPointFloat; - config.createConfigOption("somePositiveFloatingPoint", - "Must be set. Has no default value.", - &somePositiveFloatingPointFloat); + config.addOption("somePositiveFloatingPoint", + "Must be set. Has no default value.", + &somePositiveFloatingPointFloat); float someNegativFloatingPointFloat; - config.createConfigOption("someNegativFloatingPoint", - "Must be set. Has no default value.", - &someNegativFloatingPointFloat); + config.addOption("someNegativFloatingPoint", + "Must be set. Has no default value.", + &someNegativFloatingPointFloat); // Add floating point list. std::vector someFloatingPointListFloatVector; - config.createConfigOption>( - "someFloatingPointList", "Must be set. Has no default value.", - &someFloatingPointListFloatVector); + config.addOption>("someFloatingPointList", + "Must be set. Has no default value.", + &someFloatingPointListFloatVector); // Add boolean options. bool boolTrueBool; - config.createConfigOption( - "boolTrue", "Must be set. Has no default value.", &boolTrueBool); + config.addOption("boolTrue", "Must be set. Has no default value.", + &boolTrueBool); bool boolFalseBool; - config.createConfigOption( - "boolFalse", "Must be set. Has no default value.", &boolFalseBool); + config.addOption("boolFalse", "Must be set. Has no default value.", + &boolFalseBool); // Add boolean list. std::vector someBooleanListBoolVector; - config.createConfigOption>( - "someBooleanList", "Must be set. Has no default value.", - &someBooleanListBoolVector); + config.addOption>("someBooleanList", + "Must be set. Has no default value.", + &someBooleanListBoolVector); // Add string option. std::string myNameString; - config.createConfigOption( - "myName", "Must be set. Has no default value.", &myNameString); + config.addOption("myName", "Must be set. Has no default value.", + &myNameString); // Add string list. std::vector someStringListStringVector; - config.createConfigOption>( + config.addOption>( "someStringList", "Must be set. Has no default value.", &someStringListStringVector); // Add option with deeper level. std::vector deeperIntVector; - config.createConfigOption>( - {"depth", "here", "list"}, "Must be set. Has no default value.", - &deeperIntVector); + config.addOption>({"depth", "here", "list"}, + "Must be set. Has no default value.", + &deeperIntVector); // This one will not be changed, in order to test, that options, that are // not set at run time, are not changed. int noChangeInt; - config.createConfigOption("No_change", "", &noChangeInt, 10); + config.addOption("No_change", "", &noChangeInt, 10); // Set those. config.parseConfig(ad_utility::ConfigManager::parseShortHand( @@ -370,8 +365,8 @@ TEST(ConfigManagerTest, PrintConfigurationDocExistence) { // Can you print a non-empty one? int notUsed; - config.createConfigOption("WithDefault", "", ¬Used, 42); - config.createConfigOption("WithoutDefault", "", ¬Used); + config.addOption("WithDefault", "", ¬Used, 42); + config.addOption("WithoutDefault", "", ¬Used); ASSERT_NO_THROW(config.printConfigurationDoc(false)); ASSERT_NO_THROW(config.printConfigurationDoc(true)); } From 9d558103191e3c503db3d7863150520b89f5ffa9 Mon Sep 17 00:00:00 2001 From: Andre Date: Thu, 20 Jul 2023 14:28:15 +0200 Subject: [PATCH 08/63] src/util/ConfigManager/ConfigManager: Replaced the choosing of default value existence with multiple function overloads. --- benchmark/BenchmarkExamples.cpp | 20 ++--- src/util/ConfigManager/ConfigManager.h | 109 ++++++++++++++++++++----- test/ConfigManagerTest.cpp | 99 +++++++++++----------- 3 files changed, 144 insertions(+), 84 deletions(-) diff --git a/benchmark/BenchmarkExamples.cpp b/benchmark/BenchmarkExamples.cpp index d871bcf9df..57bee0e9c7 100644 --- a/benchmark/BenchmarkExamples.cpp +++ b/benchmark/BenchmarkExamples.cpp @@ -13,6 +13,8 @@ #include "util/ConfigManager/ConfigOption.h" #include "util/Random.h" +using namespace std::string_literals; + namespace ad_benchmark { /* A typical problem in benchmarking is that the result of a computation is @@ -49,19 +51,17 @@ class ConfigOptions : public BenchmarkInterface { ConfigOptions() { ad_utility::ConfigManager& manager = getConfigManager(); - manager.addOption("date", "The current date.", &dateString_, - "22.3.2023"); + manager.addOption("date", "The current date.", &dateString_, "22.3.2023"s); - manager.addOption("numSigns", "The number of street signs.", - &numberOfStreetSigns_, 10); + manager.addOption("numSigns", "The number of street signs.", + &numberOfStreetSigns_, 10); - manager.addOption>( - "CoinFlipTry", "The number of succesful coin flips.", &wonOnTryX_, - std::vector{false, false, false, false, false}); + manager.addOption("CoinFlipTry", "The number of succesful coin flips.", + &wonOnTryX_, {false, false, false, false, false}); - manager.addOption({"Accounts", "Personal", "Steve"}, - "Steves saving account balance.", - &balanceOnStevesSavingAccount_, -41.9); + manager.addOption({"Accounts", "Personal", "Steve"}, + "Steves saving account balance.", + &balanceOnStevesSavingAccount_, -41.9f); } }; diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index d679f28dce..b07c4c1f7d 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -49,7 +49,8 @@ class ConfigManager { public: /* - @brief Creates and adds a new configuration option. + @brief Creates and adds a new configuration option without a default value. + This configuration option must always be set at runtime. @tparam OptionType The type of value, the configuration option can hold. @@ -60,31 +61,44 @@ class ConfigManager { @param variableToPutValueOfTheOptionIn The value held by the configuration option will be copied into this variable, whenever the value in the configuration option changes. - @param defaultValue A default value for the configuration option. If none is - given, signified by an empty optional, then a value for the configuration - option MUST be given at runtime. */ template requires ad_utility::isTypeContainedIn void addOption(const std::vector& pathToOption, std::string_view optionDescription, - OptionType* variableToPutValueOfTheOptionIn, - std::optional defaultValue = - std::optional(std::nullopt)) { - /* - We need a non-empty path to construct a ConfigOption object, the `verify...` - function always throws an exception for this case. No need to duplicate the - error code. - */ - if (pathToOption.empty()) { - verifyPathToConfigOption(pathToOption, ""); - } + OptionType* variableToPutValueOfTheOptionIn) { + addOptionImpl(pathToOption, optionDescription, + variableToPutValueOfTheOptionIn, + std::optional(std::nullopt)); + } - addConfigOption( - pathToOption, - ConfigOption(pathToOption.back(), optionDescription, - variableToPutValueOfTheOptionIn, defaultValue)); + /* + @brief Creates and adds a new configuration option with a default value. + Setting this option at runtime is optional and not required. + + @tparam OptionType The type of value, the configuration option can hold. + + @param pathToOption Describes a path in json, that points to the value held by + the configuration option. The last key in the vector is the name of the + configuration option. + @param optionDescription A description for the configuration option. + @param variableToPutValueOfTheOptionIn The value held by the configuration + option will be copied into this variable, whenever the value in the + configuration option changes. + @param defaultValue A default value for the configuration option. + */ + template DefaultValueType = OptionType> + requires ad_utility::isTypeContainedIn + void addOption(const std::vector& pathToOption, + std::string_view optionDescription, + OptionType* variableToPutValueOfTheOptionIn, + DefaultValueType defaultValue) { + addOptionImpl(pathToOption, optionDescription, + variableToPutValueOfTheOptionIn, + std::optional(std::move(defaultValue))); } /* @@ -93,12 +107,26 @@ class ConfigManager { `optionName`, which describes a path only made out of this single string. */ template + requires ad_utility::isTypeContainedIn + void addOption(std::string optionName, std::string_view optionDescription, + OptionType* variableToPutValueOfTheOptionIn) { + addOption(std::vector{std::move(optionName)}, + optionDescription, variableToPutValueOfTheOptionIn); + } + + /* + @brief Creates and adds a new configuration option, just like in the other + `addOption`. But instead of a `pathToOption`, there is only an + `optionName`, which describes a path only made out of this single string. + */ + template DefaultValueType = OptionType> requires ad_utility::isTypeContainedIn void addOption(std::string optionName, std::string_view optionDescription, OptionType* variableToPutValueOfTheOptionIn, - std::optional defaultValue = - std::optional(std::nullopt)) { + DefaultValueType defaultValue) { addOption(std::vector{std::move(optionName)}, optionDescription, variableToPutValueOfTheOptionIn, std::move(defaultValue)); @@ -194,5 +222,44 @@ class ConfigManager { "Configuration option 'x' was not set at runtime, using default value 'y'.". */ std::string getListOfNotChangedConfigOptionsWithDefaultValuesAsString() const; + + /* + @brief Creates and adds a new configuration option. + + @tparam OptionType The type of value, the configuration option can hold. + + @param pathToOption Describes a path in json, that points to the value held by + the configuration option. The last key in the vector is the name of the + configuration option. + @param optionDescription A description for the configuration option. + @param variableToPutValueOfTheOptionIn The value held by the configuration + option will be copied into this variable, whenever the value in the + configuration option changes. + @param defaultValue A default value for the configuration option. If none is + given, signified by an empty optional, then a value for the configuration + option MUST be given at runtime. + */ + template + requires ad_utility::isTypeContainedIn + void addOptionImpl(const std::vector& pathToOption, + std::string_view optionDescription, + OptionType* variableToPutValueOfTheOptionIn, + std::optional defaultValue = + std::optional(std::nullopt)) { + /* + We need a non-empty path to construct a ConfigOption object, the `verify...` + function always throws an exception for this case. No need to duplicate the + error code. + */ + if (pathToOption.empty()) { + verifyPathToConfigOption(pathToOption, ""); + } + + addConfigOption( + pathToOption, + ConfigOption(pathToOption.back(), optionDescription, + variableToPutValueOfTheOptionIn, defaultValue)); + } }; } // namespace ad_utility diff --git a/test/ConfigManagerTest.cpp b/test/ConfigManagerTest.cpp index 2aaf05dfca..bd9e474f0f 100644 --- a/test/ConfigManagerTest.cpp +++ b/test/ConfigManagerTest.cpp @@ -47,7 +47,7 @@ TEST(ConfigManagerTest, GetConfigurationOptionByNestedKeysTest) { int notUsed; config.addOption({"Shared_part", "Unique_part_1", "Sense_of_existence"}, "", - ¬Used, std::optional{42}); + ¬Used, 42); config.addOption({"Shared_part", "Unique_part_2", "Sense_of_existence"}, "", ¬Used); @@ -77,12 +77,12 @@ TEST(ConfigManagerTest, CreateConfigurationOptionExceptionTest) { // Configuration options for testing. int notUsed; - config.addOption({"Shared_part", "Unique_part_1", "Sense_of_existence"}, - "", ¬Used, 42); + config.addOption({"Shared_part", "Unique_part_1", "Sense_of_existence"}, "", + ¬Used, 42); // Trying to add a configuration option with the same name at the same // place, should cause an error. - ASSERT_THROW(config.addOption( + ASSERT_THROW(config.addOption( {"Shared_part", "Unique_part_1", "Sense_of_existence"}, "", ¬Used, 42); , ad_utility::ConfigManagerOptionPathAlreadyinUseException); @@ -92,7 +92,7 @@ TEST(ConfigManagerTest, CreateConfigurationOptionExceptionTest) { An empty vector doesn't work with that. */ ASSERT_ANY_THROW( - config.addOption(std::vector{}, "", ¬Used, 42);); + config.addOption(std::vector{}, "", ¬Used, 42);); /* Trying to add a configuration option with a path containing strings with @@ -101,7 +101,7 @@ TEST(ConfigManagerTest, CreateConfigurationOptionExceptionTest) { configuration grammar. Ergo, you can't set values, with such paths per short hand, which we don't want. */ - ASSERT_THROW(config.addOption( + ASSERT_THROW(config.addOption( std::vector{"Shared part", "Sense_of_existence"}, "", ¬Used, 42); , ad_utility::NotValidShortHandNameException); @@ -115,11 +115,11 @@ TEST(ConfigManagerTest, ParseConfig) { int secondInt; int thirdInt; - config.addOption(std::vector{"depth_0", "Option_0"}, - "Must be set. Has no default value.", &firstInt); - config.addOption({"depth_0", "depth_1", "Option_1"}, - "Must be set. Has no default value.", &secondInt); - config.addOption("Option_2", "Has a default value.", &thirdInt, 2); + config.addOption(std::vector{"depth_0", "Option_0"}, + "Must be set. Has no default value.", &firstInt); + config.addOption({"depth_0", "depth_1", "Option_1"}, + "Must be set. Has no default value.", &secondInt); + config.addOption("Option_2", "Has a default value.", &thirdInt, 2); // For easier access to the options. auto getOption = [&config](const size_t& optionNumber) { @@ -167,12 +167,11 @@ TEST(ConfigManagerTest, ParseConfigExceptionTest) { // Add one option with default and one without. int notUsedInt; std::vector notUsedVector; - config.addOption(std::vector{"depth_0", "Without_default"}, - "Must be set. Has no default value.", ¬UsedInt); - config.addOption>( - std::vector{"depth_0", "With_default"}, - "Must not be set. Has default value.", ¬UsedVector, - std::vector{40, 41}); + config.addOption(std::vector{"depth_0", "Without_default"}, + "Must be set. Has no default value.", ¬UsedInt); + config.addOption(std::vector{"depth_0", "With_default"}, + "Must not be set. Has default value.", ¬UsedVector, + {40, 41}); // Should throw an exception, if we don't set all options, that must be set. ASSERT_THROW(config.parseConfig(nlohmann::json::parse(R"--({})--")), @@ -227,71 +226,65 @@ TEST(ConfigManagerTest, ParseShortHandTest) { // Add integer options. int somePositiveNumberInt; - config.addOption("somePositiveNumber", - "Must be set. Has no default value.", - &somePositiveNumberInt); + config.addOption("somePositiveNumber", "Must be set. Has no default value.", + &somePositiveNumberInt); int someNegativNumberInt; - config.addOption("someNegativNumber", - "Must be set. Has no default value.", - &someNegativNumberInt); + config.addOption("someNegativNumber", "Must be set. Has no default value.", + &someNegativNumberInt); // Add integer list. std::vector someIntegerlistIntVector; - config.addOption>("someIntegerlist", - "Must be set. Has no default value.", - &someIntegerlistIntVector); + config.addOption("someIntegerlist", "Must be set. Has no default value.", + &someIntegerlistIntVector); // Add floating point options. float somePositiveFloatingPointFloat; - config.addOption("somePositiveFloatingPoint", - "Must be set. Has no default value.", - &somePositiveFloatingPointFloat); + config.addOption("somePositiveFloatingPoint", + "Must be set. Has no default value.", + &somePositiveFloatingPointFloat); float someNegativFloatingPointFloat; - config.addOption("someNegativFloatingPoint", - "Must be set. Has no default value.", - &someNegativFloatingPointFloat); + config.addOption("someNegativFloatingPoint", + "Must be set. Has no default value.", + &someNegativFloatingPointFloat); // Add floating point list. std::vector someFloatingPointListFloatVector; - config.addOption>("someFloatingPointList", - "Must be set. Has no default value.", - &someFloatingPointListFloatVector); + config.addOption("someFloatingPointList", + "Must be set. Has no default value.", + &someFloatingPointListFloatVector); // Add boolean options. bool boolTrueBool; - config.addOption("boolTrue", "Must be set. Has no default value.", - &boolTrueBool); + config.addOption("boolTrue", "Must be set. Has no default value.", + &boolTrueBool); bool boolFalseBool; - config.addOption("boolFalse", "Must be set. Has no default value.", - &boolFalseBool); + config.addOption("boolFalse", "Must be set. Has no default value.", + &boolFalseBool); // Add boolean list. std::vector someBooleanListBoolVector; - config.addOption>("someBooleanList", - "Must be set. Has no default value.", - &someBooleanListBoolVector); + config.addOption("someBooleanList", "Must be set. Has no default value.", + &someBooleanListBoolVector); // Add string option. std::string myNameString; - config.addOption("myName", "Must be set. Has no default value.", - &myNameString); + config.addOption("myName", "Must be set. Has no default value.", + &myNameString); // Add string list. std::vector someStringListStringVector; - config.addOption>( - "someStringList", "Must be set. Has no default value.", - &someStringListStringVector); + config.addOption("someStringList", "Must be set. Has no default value.", + &someStringListStringVector); // Add option with deeper level. std::vector deeperIntVector; - config.addOption>({"depth", "here", "list"}, - "Must be set. Has no default value.", - &deeperIntVector); + config.addOption({"depth", "here", "list"}, + "Must be set. Has no default value.", &deeperIntVector); // This one will not be changed, in order to test, that options, that are // not set at run time, are not changed. int noChangeInt; - config.addOption("No_change", "", &noChangeInt, 10); + config.addOption("No_change", "", &noChangeInt, 10); // Set those. config.parseConfig(ad_utility::ConfigManager::parseShortHand( @@ -365,8 +358,8 @@ TEST(ConfigManagerTest, PrintConfigurationDocExistence) { // Can you print a non-empty one? int notUsed; - config.addOption("WithDefault", "", ¬Used, 42); - config.addOption("WithoutDefault", "", ¬Used); + config.addOption("WithDefault", "", ¬Used, 42); + config.addOption("WithoutDefault", "", ¬Used); ASSERT_NO_THROW(config.printConfigurationDoc(false)); ASSERT_NO_THROW(config.printConfigurationDoc(true)); } From e9c210b2892d27c6c6704635a7158f82003cd4c1 Mon Sep 17 00:00:00 2001 From: Andre Date: Thu, 20 Jul 2023 15:44:57 +0200 Subject: [PATCH 09/63] src/util/ConfigManager/ConfigManager: Function for creating ConfigOptions now return a pointer to the created ConfigOption. --- src/util/ConfigManager/ConfigManager.cpp | 38 ++--- src/util/ConfigManager/ConfigManager.h | 93 ++++++----- test/ConfigManagerTest.cpp | 203 +++++++++-------------- 3 files changed, 143 insertions(+), 191 deletions(-) diff --git a/src/util/ConfigManager/ConfigManager.cpp b/src/util/ConfigManager/ConfigManager.cpp index adac52852a..8e0163ac12 100644 --- a/src/util/ConfigManager/ConfigManager.cpp +++ b/src/util/ConfigManager/ConfigManager.cpp @@ -2,8 +2,6 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (March of 2023, schlegea@informatik.uni-freiburg.de) -#include "util/ConfigManager/ConfigManager.h" - #include #include #include @@ -16,9 +14,11 @@ #include #include #include +#include #include "util/Algorithm.h" #include "util/ConfigManager/ConfigExceptions.h" +#include "util/ConfigManager/ConfigManager.h" #include "util/ConfigManager/ConfigOption.h" #include "util/ConfigManager/ConfigShorthandVisitor.h" #include "util/ConfigManager/ConfigUtil.h" @@ -103,22 +103,8 @@ void ConfigManager::addConfigOption( // Add the configuration option. configurationOptions_.insert( - {createJsonPointerString(pathToOption), std::move(option)}); -} - -// ____________________________________________________________________________ -const ConfigOption& ConfigManager::getConfigurationOptionByNestedKeys( - const std::vector& keys) const { - // If there is an config option with that described location, then this should - // point to the configuration option. - const std::string ptr{createJsonPointerString(keys)}; - - if (configurationOptions_.contains(ptr)) { - return configurationOptions_.at(ptr); - } else { - throw NoConfigOptionFoundException(vectorOfKeysForJsonToString(keys), - printConfigurationDoc(true)); - } + {createJsonPointerString(pathToOption), + std::make_unique(std::move(option))}); } // ____________________________________________________________________________ @@ -224,7 +210,7 @@ void ConfigManager::parseConfig(const nlohmann::json& j) { j.contains(configurationOptionJsonPosition)) { // This will throw an exception, if the json object can't be interpreted // with the wanted type. - option.setValueWithJson(j.at(configurationOptionJsonPosition)); + option->setValueWithJson(j.at(configurationOptionJsonPosition)); } /* @@ -232,7 +218,7 @@ void ConfigManager::parseConfig(const nlohmann::json& j) { points to, that means, it doesn't have a default value, and needs to be set by the user at runtime, but wasn't. */ - if (!option.wasSet()) { + if (!option->wasSet()) { throw ConfigOptionWasntSetException(key); } } @@ -267,12 +253,12 @@ std::string ConfigManager::printConfigurationDoc( // We can only use the value, if we are sure, that the value was // initialized. configuratioOptionsVisualization[jsonOptionPointer] = - option.wasSet() ? option.getValueAsJson() - : "value was never initialized"; + option->wasSet() ? option->getValueAsJson() + : "value was never initialized"; } else { configuratioOptionsVisualization[jsonOptionPointer] = - option.hasDefaultValue() ? option.getDefaultValueAsJson() - : option.getDummyValueAsJson(); + option->hasDefaultValue() ? option->getDefaultValueAsJson() + : option->getDummyValueAsJson(); } } @@ -287,7 +273,7 @@ std::string ConfigManager::printConfigurationDoc( // itself. return absl::StrCat( "Location : ", pair.first, "\n", - static_cast(pair.second)); + static_cast(*pair.second)); }), "\n\n"); @@ -321,7 +307,7 @@ ConfigManager::getListOfNotChangedConfigOptionsWithDefaultValuesAsString() const { // For only looking at the configuration options in our map. auto onlyConfigurationOptionsView = std::views::transform( - configurationOptions_, [](const auto& pair) { return pair.second; }); + configurationOptions_, [](const auto& pair) { return *pair.second; }); // Returns true, if the `ConfigOption` has a default value and wasn't set at // runtime. diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index b07c4c1f7d..967308743f 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -45,7 +45,8 @@ class ConfigManager { The string key describes their location in the json object literal, by representing a json pointer in string form. */ - absl::flat_hash_map configurationOptions_; + absl::flat_hash_map> + configurationOptions_; public: /* @@ -61,16 +62,18 @@ class ConfigManager { @param variableToPutValueOfTheOptionIn The value held by the configuration option will be copied into this variable, whenever the value in the configuration option changes. + + @return A pointer to the newly created configuration option. */ template requires ad_utility::isTypeContainedIn - void addOption(const std::vector& pathToOption, - std::string_view optionDescription, - OptionType* variableToPutValueOfTheOptionIn) { - addOptionImpl(pathToOption, optionDescription, - variableToPutValueOfTheOptionIn, - std::optional(std::nullopt)); + const ConfigOption* addOption(const std::vector& pathToOption, + std::string_view optionDescription, + OptionType* variableToPutValueOfTheOptionIn) { + return addOptionImpl(pathToOption, optionDescription, + variableToPutValueOfTheOptionIn, + std::optional(std::nullopt)); } /* @@ -87,49 +90,58 @@ class ConfigManager { option will be copied into this variable, whenever the value in the configuration option changes. @param defaultValue A default value for the configuration option. + + @return A pointer to the newly created configuration option. */ template DefaultValueType = OptionType> requires ad_utility::isTypeContainedIn - void addOption(const std::vector& pathToOption, - std::string_view optionDescription, - OptionType* variableToPutValueOfTheOptionIn, - DefaultValueType defaultValue) { - addOptionImpl(pathToOption, optionDescription, - variableToPutValueOfTheOptionIn, - std::optional(std::move(defaultValue))); + const ConfigOption* addOption(const std::vector& pathToOption, + std::string_view optionDescription, + OptionType* variableToPutValueOfTheOptionIn, + DefaultValueType defaultValue) { + return addOptionImpl(pathToOption, optionDescription, + variableToPutValueOfTheOptionIn, + std::optional(std::move(defaultValue))); } /* @brief Creates and adds a new configuration option, just like in the other `addOption`. But instead of a `pathToOption`, there is only an `optionName`, which describes a path only made out of this single string. + + @return A pointer to the newly created configuration option. */ template requires ad_utility::isTypeContainedIn - void addOption(std::string optionName, std::string_view optionDescription, - OptionType* variableToPutValueOfTheOptionIn) { - addOption(std::vector{std::move(optionName)}, - optionDescription, variableToPutValueOfTheOptionIn); + const ConfigOption* addOption(std::string optionName, + std::string_view optionDescription, + OptionType* variableToPutValueOfTheOptionIn) { + return addOption( + std::vector{std::move(optionName)}, optionDescription, + variableToPutValueOfTheOptionIn); } /* @brief Creates and adds a new configuration option, just like in the other `addOption`. But instead of a `pathToOption`, there is only an `optionName`, which describes a path only made out of this single string. + + @return A pointer to the newly created configuration option. */ template DefaultValueType = OptionType> requires ad_utility::isTypeContainedIn - void addOption(std::string optionName, std::string_view optionDescription, - OptionType* variableToPutValueOfTheOptionIn, - DefaultValueType defaultValue) { - addOption(std::vector{std::move(optionName)}, - optionDescription, variableToPutValueOfTheOptionIn, - std::move(defaultValue)); + const ConfigOption* addOption(std::string optionName, + std::string_view optionDescription, + OptionType* variableToPutValueOfTheOptionIn, + DefaultValueType defaultValue) { + return addOption( + std::vector{std::move(optionName)}, optionDescription, + variableToPutValueOfTheOptionIn, std::move(defaultValue)); } /* @@ -167,8 +179,6 @@ class ConfigManager { std::string printConfigurationDoc(bool printCurrentJsonConfiguration) const; private: - // For testing. - FRIEND_TEST(ConfigManagerTest, GetConfigurationOptionByNestedKeysTest); FRIEND_TEST(ConfigManagerTest, ParseConfig); FRIEND_TEST(ConfigManagerTest, ParseConfigExceptionTest); FRIEND_TEST(ConfigManagerTest, ParseShortHandTest); @@ -200,16 +210,6 @@ class ConfigManager { void addConfigOption(const std::vector& pathToOption, ConfigOption&& option); - /* - @brief Return the underlying configuration option, if it's at the position - described by the `keys`. If there is no configuration option at that - place, an exception will be thrown. - - @param keys The keys for looking up the configuration option. - */ - const ConfigOption& getConfigurationOptionByNestedKeys( - const std::vector& keys) const; - /* @brief Return string representation of a `std::vector`. */ @@ -238,15 +238,18 @@ class ConfigManager { @param defaultValue A default value for the configuration option. If none is given, signified by an empty optional, then a value for the configuration option MUST be given at runtime. + + @return A pointer to the newly created configuration option. */ template requires ad_utility::isTypeContainedIn - void addOptionImpl(const std::vector& pathToOption, - std::string_view optionDescription, - OptionType* variableToPutValueOfTheOptionIn, - std::optional defaultValue = - std::optional(std::nullopt)) { + const ConfigOption* addOptionImpl( + const std::vector& pathToOption, + std::string_view optionDescription, + OptionType* variableToPutValueOfTheOptionIn, + std::optional defaultValue = + std::optional(std::nullopt)) { /* We need a non-empty path to construct a ConfigOption object, the `verify...` function always throws an exception for this case. No need to duplicate the @@ -260,6 +263,14 @@ class ConfigManager { pathToOption, ConfigOption(pathToOption.back(), optionDescription, variableToPutValueOfTheOptionIn, defaultValue)); + + /* + The `unqiue_ptr` was created, by creating a new `ConfigOption` via it's + move constructor. Which is why, we can't just return the `ConfigOption` + we created here. + */ + return configurationOptions_.at(createJsonPointerString(pathToOption)) + .get(); } }; } // namespace ad_utility diff --git a/test/ConfigManagerTest.cpp b/test/ConfigManagerTest.cpp index bd9e474f0f..c42eab4143 100644 --- a/test/ConfigManagerTest.cpp +++ b/test/ConfigManagerTest.cpp @@ -40,35 +40,6 @@ void checkOption(const ad_utility::ConfigOption& option, } } -TEST(ConfigManagerTest, GetConfigurationOptionByNestedKeysTest) { - ad_utility::ConfigManager config{}; - - // Configuration options for testing. - int notUsed; - - config.addOption({"Shared_part", "Unique_part_1", "Sense_of_existence"}, "", - ¬Used, 42); - - config.addOption({"Shared_part", "Unique_part_2", "Sense_of_existence"}, "", - ¬Used); - - // Where those two options added? - ASSERT_EQ(config.configurationOptions_.size(), 2); - - checkOption(config.getConfigurationOptionByNestedKeys( - {"Shared_part", "Unique_part_1", "Sense_of_existence"}), - notUsed, true, 42); - checkOption(config.getConfigurationOptionByNestedKeys( - {"Shared_part", "Unique_part_2", "Sense_of_existence"}), - notUsed, false, 42); - - // Trying to get a configuration option, that does not exist, should cause - // an exception. - ASSERT_THROW( - config.getConfigurationOptionByNestedKeys({"Shared_part", "Getsbourgh"}), - ad_utility::NoConfigOptionFoundException); -} - /* The exceptions for adding configuration options. */ @@ -115,31 +86,22 @@ TEST(ConfigManagerTest, ParseConfig) { int secondInt; int thirdInt; - config.addOption(std::vector{"depth_0", "Option_0"}, - "Must be set. Has no default value.", &firstInt); - config.addOption({"depth_0", "depth_1", "Option_1"}, - "Must be set. Has no default value.", &secondInt); - config.addOption("Option_2", "Has a default value.", &thirdInt, 2); - - // For easier access to the options. - auto getOption = [&config](const size_t& optionNumber) { - if (optionNumber == 0) { - return config.getConfigurationOptionByNestedKeys({"depth_0", "Option_0"}); - } else if (optionNumber == 1) { - return config.getConfigurationOptionByNestedKeys( - {"depth_0", "depth_1", "Option_1"}); - } else { - return config.getConfigurationOptionByNestedKeys({"Option_2"}); - } - }; + const ConfigOption* optionZero = + config.addOption(std::vector{"depth_0", "Option_0"}, + "Must be set. Has no default value.", &firstInt); + const ConfigOption* optionOne = + config.addOption({"depth_0", "depth_1", "Option_1"}, + "Must be set. Has no default value.", &secondInt); + const ConfigOption* optionTwo = + config.addOption("Option_2", "Has a default value.", &thirdInt, 2); // Does the option with the default already have a value? - checkOption(getOption(2), thirdInt, true, 2); + checkOption(*optionTwo, thirdInt, true, 2); // The other two should never have set the variable, that the internal pointer // points to. - checkOption(getOption(0), firstInt, false, 2); - checkOption(getOption(1), secondInt, false, 2); + checkOption(*optionZero, firstInt, false, 2); + checkOption(*optionOne, secondInt, false, 2); // The json for testing `parseConfig`. Sets all of the configuration // options. @@ -156,9 +118,9 @@ TEST(ConfigManagerTest, ParseConfig) { // Set and check. config.parseConfig(testJson); - checkOption(getOption(0), firstInt, true, 10); - checkOption(getOption(1), secondInt, true, 11); - checkOption(getOption(2), thirdInt, true, 12); + checkOption(*optionZero, firstInt, true, 10); + checkOption(*optionOne, secondInt, true, 11); + checkOption(*optionTwo, thirdInt, true, 12); } TEST(ConfigManagerTest, ParseConfigExceptionTest) { @@ -226,116 +188,109 @@ TEST(ConfigManagerTest, ParseShortHandTest) { // Add integer options. int somePositiveNumberInt; - config.addOption("somePositiveNumber", "Must be set. Has no default value.", - &somePositiveNumberInt); + const ConfigOption* somePositiveNumber = config.addOption( + "somePositiveNumber", "Must be set. Has no default value.", + &somePositiveNumberInt); int someNegativNumberInt; - config.addOption("someNegativNumber", "Must be set. Has no default value.", - &someNegativNumberInt); + const ConfigOption* someNegativNumber = config.addOption( + "someNegativNumber", "Must be set. Has no default value.", + &someNegativNumberInt); // Add integer list. std::vector someIntegerlistIntVector; - config.addOption("someIntegerlist", "Must be set. Has no default value.", - &someIntegerlistIntVector); + const ConfigOption* someIntegerlist = + config.addOption("someIntegerlist", "Must be set. Has no default value.", + &someIntegerlistIntVector); // Add floating point options. float somePositiveFloatingPointFloat; - config.addOption("somePositiveFloatingPoint", - "Must be set. Has no default value.", - &somePositiveFloatingPointFloat); + const ConfigOption* somePositiveFloatingPoint = config.addOption( + "somePositiveFloatingPoint", "Must be set. Has no default value.", + &somePositiveFloatingPointFloat); float someNegativFloatingPointFloat; - config.addOption("someNegativFloatingPoint", - "Must be set. Has no default value.", - &someNegativFloatingPointFloat); + const ConfigOption* someNegativFloatingPoint = config.addOption( + "someNegativFloatingPoint", "Must be set. Has no default value.", + &someNegativFloatingPointFloat); // Add floating point list. std::vector someFloatingPointListFloatVector; - config.addOption("someFloatingPointList", - "Must be set. Has no default value.", - &someFloatingPointListFloatVector); + const ConfigOption* someFloatingPointList = config.addOption( + "someFloatingPointList", "Must be set. Has no default value.", + &someFloatingPointListFloatVector); // Add boolean options. bool boolTrueBool; - config.addOption("boolTrue", "Must be set. Has no default value.", - &boolTrueBool); + const ConfigOption* boolTrue = config.addOption( + "boolTrue", "Must be set. Has no default value.", &boolTrueBool); bool boolFalseBool; - config.addOption("boolFalse", "Must be set. Has no default value.", - &boolFalseBool); + const ConfigOption* boolFalse = config.addOption( + "boolFalse", "Must be set. Has no default value.", &boolFalseBool); // Add boolean list. std::vector someBooleanListBoolVector; - config.addOption("someBooleanList", "Must be set. Has no default value.", - &someBooleanListBoolVector); + const ConfigOption* someBooleanList = + config.addOption("someBooleanList", "Must be set. Has no default value.", + &someBooleanListBoolVector); // Add string option. std::string myNameString; - config.addOption("myName", "Must be set. Has no default value.", - &myNameString); + const ConfigOption* myName = config.addOption( + "myName", "Must be set. Has no default value.", &myNameString); // Add string list. std::vector someStringListStringVector; - config.addOption("someStringList", "Must be set. Has no default value.", - &someStringListStringVector); + const ConfigOption* someStringList = + config.addOption("someStringList", "Must be set. Has no default value.", + &someStringListStringVector); // Add option with deeper level. std::vector deeperIntVector; - config.addOption({"depth", "here", "list"}, - "Must be set. Has no default value.", &deeperIntVector); + const ConfigOption* deeperIntVectorOption = + config.addOption({"depth", "here", "list"}, + "Must be set. Has no default value.", &deeperIntVector); // This one will not be changed, in order to test, that options, that are // not set at run time, are not changed. int noChangeInt; - config.addOption("No_change", "", &noChangeInt, 10); + const ConfigOption* noChange = + config.addOption("No_change", "", &noChangeInt, 10); // Set those. config.parseConfig(ad_utility::ConfigManager::parseShortHand( R"--(somePositiveNumber : 42, someNegativNumber : -42, someIntegerlist : [40, 41], somePositiveFloatingPoint : 4.2, someNegativFloatingPoint : -4.2, someFloatingPointList : [4.1, 4.2], boolTrue : true, boolFalse : false, someBooleanList : [true, false, true], myName : "Bernd", someStringList : ["t1", "t2"], depth : { here : {list : [7,8]}})--")); - checkOption( - config.getConfigurationOptionByNestedKeys({"somePositiveNumber"}), - somePositiveNumberInt, true, 42); - checkOption( - config.getConfigurationOptionByNestedKeys({"someNegativNumber"}), - someNegativNumberInt, true, -42); - - checkOption>( - config.getConfigurationOptionByNestedKeys({"someIntegerlist"}), - someIntegerlistIntVector, true, std::vector{40, 41}); - - checkOption( - config.getConfigurationOptionByNestedKeys({"somePositiveFloatingPoint"}), - somePositiveFloatingPointFloat, true, 4.2f); - checkOption( - config.getConfigurationOptionByNestedKeys({"someNegativFloatingPoint"}), - someNegativFloatingPointFloat, true, -4.2f); - - checkOption>( - config.getConfigurationOptionByNestedKeys({"someFloatingPointList"}), - someFloatingPointListFloatVector, true, {4.1f, 4.2f}); - - checkOption(config.getConfigurationOptionByNestedKeys({"boolTrue"}), - boolTrueBool, true, true); - checkOption(config.getConfigurationOptionByNestedKeys({"boolFalse"}), - boolFalseBool, true, false); - - checkOption>( - config.getConfigurationOptionByNestedKeys({"someBooleanList"}), - someBooleanListBoolVector, true, std::vector{true, false, true}); - - checkOption( - config.getConfigurationOptionByNestedKeys({"myName"}), myNameString, true, - std::string{"Bernd"}); - - checkOption>( - config.getConfigurationOptionByNestedKeys({"someStringList"}), - someStringListStringVector, true, std::vector{"t1", "t2"}); - - checkOption>( - config.getConfigurationOptionByNestedKeys({"depth", "here", "list"}), - deeperIntVector, true, std::vector{7, 8}); + checkOption(*somePositiveNumber, somePositiveNumberInt, true, 42); + checkOption(*someNegativNumber, someNegativNumberInt, true, -42); + + checkOption>(*someIntegerlist, someIntegerlistIntVector, + true, std::vector{40, 41}); + + checkOption(*somePositiveFloatingPoint, somePositiveFloatingPointFloat, + true, 4.2f); + checkOption(*someNegativFloatingPoint, someNegativFloatingPointFloat, + true, -4.2f); + + checkOption>(*someFloatingPointList, + someFloatingPointListFloatVector, true, + {4.1f, 4.2f}); + + checkOption(*boolTrue, boolTrueBool, true, true); + checkOption(*boolFalse, boolFalseBool, true, false); + + checkOption>(*someBooleanList, someBooleanListBoolVector, + true, std::vector{true, false, true}); + + checkOption(*myName, myNameString, true, std::string{"Bernd"}); + + checkOption>(*someStringList, + someStringListStringVector, true, + std::vector{"t1", "t2"}); + + checkOption>(*deeperIntVectorOption, deeperIntVector, true, + std::vector{7, 8}); // Is the "No Change" unchanged? - checkOption(config.getConfigurationOptionByNestedKeys({"No_change"}), - noChangeInt, true, 10); + checkOption(*noChange, noChangeInt, true, 10); // Multiple key value pairs with the same key are not allowed. AD_EXPECT_THROW_WITH_MESSAGE(ad_utility::ConfigManager::parseShortHand( From 969ee15996bdc719ccda229d80ac4d92a96aefaa Mon Sep 17 00:00:00 2001 From: Andre Date: Thu, 20 Jul 2023 16:07:12 +0200 Subject: [PATCH 10/63] Cleand up some code formatting. --- src/util/ConfigManager/ConfigManager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/util/ConfigManager/ConfigManager.cpp b/src/util/ConfigManager/ConfigManager.cpp index 8e0163ac12..2fdc7e2d3a 100644 --- a/src/util/ConfigManager/ConfigManager.cpp +++ b/src/util/ConfigManager/ConfigManager.cpp @@ -2,6 +2,8 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (March of 2023, schlegea@informatik.uni-freiburg.de) +#include "util/ConfigManager/ConfigManager.h" + #include #include #include @@ -18,7 +20,6 @@ #include "util/Algorithm.h" #include "util/ConfigManager/ConfigExceptions.h" -#include "util/ConfigManager/ConfigManager.h" #include "util/ConfigManager/ConfigOption.h" #include "util/ConfigManager/ConfigShorthandVisitor.h" #include "util/ConfigManager/ConfigUtil.h" From 3a24fa3704c04cc9cd461da2c6fa4207b2466f54 Mon Sep 17 00:00:00 2001 From: Andre Date: Thu, 20 Jul 2023 16:34:43 +0200 Subject: [PATCH 11/63] Cleaned up some code formatting. --- src/index/IndexImpl.cpp | 3 ++- src/index/Vocabulary.cpp | 3 ++- src/util/ParseException.cpp | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 299eec1abe..3a01660185 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,6 +4,8 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) +#include "./IndexImpl.h" + #include #include #include @@ -27,7 +29,6 @@ #include #include -#include "./IndexImpl.h" #include "util/ConfigManager/ConfigManager.h" #include "util/json.h" diff --git a/src/index/Vocabulary.cpp b/src/index/Vocabulary.cpp index d9e00907b3..5ac9a37b75 100644 --- a/src/index/Vocabulary.cpp +++ b/src/index/Vocabulary.cpp @@ -3,6 +3,8 @@ // Authors: Björn Buchhold , // Johannes Kalmbach (johannes.kalmbach@gmail.com) +#include "./Vocabulary.h" + #include #include @@ -15,7 +17,6 @@ #include "../util/Serializer/FileSerializer.h" #include "../util/json.h" #include "./ConstantsIndexBuilding.h" -#include "./Vocabulary.h" using std::string; diff --git a/src/util/ParseException.cpp b/src/util/ParseException.cpp index 70720d3bf7..be0f3b516d 100644 --- a/src/util/ParseException.cpp +++ b/src/util/ParseException.cpp @@ -2,10 +2,11 @@ // Chair of Algorithms and Data Structures. // Author: Julian Mundhahs (mundhahj@informatik.uni-freiburg.de) +#include "util/ParseException.h" + #include #include "util/Exception.h" -#include "util/ParseException.h" #include "util/StringUtils.h" // ___________________________________________________________________________ From b79454e5fd7e8f29317ae235d625fad2b2228137 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 08:30:54 +0200 Subject: [PATCH 12/63] src/index/IndexImpl: Added the correct default values to the configOptions. --- src/index/IndexImpl.cpp | 110 ++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 72 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index f1c34d21b7..879153e439 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -17,6 +17,7 @@ #include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" +#include "index/ConstantsIndexBuilding.h" #include "index/IndexFormatVersion.h" #include "index/PrefixHeuristic.h" #include "index/TriplesView.h" @@ -804,8 +805,7 @@ void IndexImpl::readConfiguration() { // TODO Write a description. std::string gitHash; - const ad_utility::ConfigOption* gitHashOption = config.addOption( - "git-hash", "", &gitHash, std::string{"None given."}); + config.addOption("git-hash", "", &gitHash, std::string{}); // TODO Write a description. bool boolPrefixes; @@ -814,16 +814,12 @@ void IndexImpl::readConfiguration() { // TODO Write a description. bool hasAllPermutations; - const ad_utility::ConfigOption* hasAllPermutationsOption = - config.addOption("has-all-permutations", "", &hasAllPermutations, - true); + config.addOption("has-all-permutations", "", &hasAllPermutations, true); // TODO Write a description. std::vector prefixesExternal; - const ad_utility::ConfigOption* prefixesExternalOption = - config.addOption>("prefixes-external", "", - &prefixesExternal, - std::vector{}); + config.addOption>( + "prefixes-external", "", &prefixesExternal, std::vector{}); // TODO Write a description. std::string lang; @@ -843,10 +839,8 @@ void IndexImpl::readConfiguration() { // TODO Write a description. std::vector languagesInternal; - const ad_utility::ConfigOption* languagesInternalOption = - config.addOption>("languages-internal", "", - &languagesInternal, - std::vector{}); + config.addOption>("languages-internal", "", + &languagesInternal, {"en"}); // TODO Write a description. config.addOption("num-predicates-normal", "", &numPredicatesNormal_); @@ -871,7 +865,7 @@ void IndexImpl::readConfiguration() { configurationJson_; config.parseConfig(fileToJson(onDiskBase_ + CONFIGURATION_FILE)); - if (gitHashOption->wasSetAtRuntime()) { + if (!gitHash.empty()) { configurationJson_["git-hash"] = gitHash; LOG(INFO) << "The git hash used to build this index was " << gitHash.substr(0, 6) << std::endl; @@ -939,10 +933,8 @@ void IndexImpl::readConfiguration() { } } - if (prefixesExternalOption->wasSetAtRuntime()) { - vocab_.initializeExternalizePrefixes(prefixesExternal); - configurationJson_["prefixes-external"] = prefixesExternal; - } + vocab_.initializeExternalizePrefixes(prefixesExternal); + configurationJson_["prefixes-external"] = prefixesExternal; configurationJson_["locale"]["language"] = lang; configurationJson_["locale"]["country"] = country; @@ -950,10 +942,8 @@ void IndexImpl::readConfiguration() { vocab_.setLocale(lang, country, ignorePunctuation); textVocab_.setLocale(lang, country, ignorePunctuation); - if (languagesInternalOption->wasSetAtRuntime()) { - vocab_.initializeInternalizedLangs(languagesInternal); - configurationJson_["languages-internal"] = languagesInternal; - } + vocab_.initializeInternalizedLangs(languagesInternal); + configurationJson_["languages-internal"] = languagesInternal; // Once again, I can only guess, what kind of value should be at // `"has-all-permutations"`. @@ -965,7 +955,7 @@ void IndexImpl::readConfiguration() { loadAllPermutations_ = false; } */ - if (hasAllPermutationsOption->wasSetAtRuntime() && !hasAllPermutations) { + if (!hasAllPermutations) { configurationJson_["has-all-permutations"] = false; // If the permutations simply don't exist, then we can never load them. loadAllPermutations_ = false; @@ -1025,17 +1015,13 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { // TODO Write a description. std::vector prefixesExternal; - const ad_utility::ConfigOption* prefixesExternalOption = - config.addOption>("prefixes-external", "", - &prefixesExternal, - std::vector{}); + config.addOption>( + "prefixes-external", "", &prefixesExternal, std::vector{}); // TODO Write a description. std::vector languagesInternal; - const ad_utility::ConfigOption* languagesInternalOption = - config.addOption>("languages-internal", "", - &languagesInternal, - std::vector{}); + config.addOption>("languages-internal", "", + &languagesInternal, {"en"}); // TODO Write a description. std::string lang; @@ -1058,20 +1044,17 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { // TODO Write a description. bool asciiPrefixesOnly; - const ad_utility::ConfigOption* asciiPrefixesOnlyOption = - config.addOption("ascii-prefixes-only", "", &asciiPrefixesOnly, - false); + config.addOption("ascii-prefixes-only", "", &asciiPrefixesOnly, false); // TODO Write a description. size_t numTriplesPerBatch; - const ad_utility::ConfigOption* numTriplesPerBatchOption = - config.addOption("num-triples-per-batch", "", &numTriplesPerBatch, - 0uL); + config.addOption("num-triples-per-batch", "", &numTriplesPerBatch, + static_cast(NUM_TRIPLES_PER_PARTIAL_VOCAB)); // TODO Write a description. size_t parserBatchSize; - const ad_utility::ConfigOption* parserBatchSizeOption = - config.addOption("parser-batch-size", "", &parserBatchSize, 0uL); + config.addOption("parser-batch-size", "", &parserBatchSize, + PARSER_BATCH_SIZE); // TODO Write a description. std::string parserIntegerOverflowBehavior; @@ -1086,10 +1069,8 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { config.parseConfig(json(json::value_t::object)); } - if (prefixesExternalOption->wasSetAtRuntime()) { - vocab_.initializeExternalizePrefixes(prefixesExternal); - configurationJson_["prefixes-external"] = prefixesExternal; - } + vocab_.initializeExternalizePrefixes(prefixesExternal); + configurationJson_["prefixes-external"] = prefixesExternal; /** * ICU uses two separate arguments for each Locale, the language ("en" or @@ -1125,41 +1106,26 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { configurationJson_["locale"]["country"] = country; configurationJson_["locale"]["ignore-punctuation"] = ignorePunctuation; - if (languagesInternalOption->wasSetAtRuntime()) { - vocab_.initializeInternalizedLangs(languagesInternal); - configurationJson_["languages-internal"] = languagesInternal; - } + vocab_.initializeInternalizedLangs(languagesInternal); + configurationJson_["languages-internal"] = languagesInternal; - if (asciiPrefixesOnlyOption->wasSetAtRuntime()) { - if constexpr (std::is_same_v, TurtleParserAuto>) { - if (asciiPrefixesOnly) { - LOG(INFO) << WARNING_ASCII_ONLY_PREFIXES << std::endl; - onlyAsciiTurtlePrefixes_ = true; - } else { - onlyAsciiTurtlePrefixes_ = false; - } + if constexpr (std::is_same_v, TurtleParserAuto>) { + if (asciiPrefixesOnly) { + LOG(INFO) << WARNING_ASCII_ONLY_PREFIXES << std::endl; + onlyAsciiTurtlePrefixes_ = true; } else { - LOG(WARN) << "You specified the ascii-prefixes-only but a parser that is " - "not the Turtle stream parser. This means that this setting " - "is ignored." - << std::endl; + onlyAsciiTurtlePrefixes_ = false; } + } else { + LOG(WARN) << "You specified the ascii-prefixes-only but a parser that is " + "not the Turtle stream parser. This means that this setting " + "is ignored." + << std::endl; } - if (numTriplesPerBatchOption->wasSetAtRuntime()) { - numTriplesPerBatch_ = numTriplesPerBatch; - LOG(INFO) - << "You specified \"num-triples-per-batch = " << numTriplesPerBatch_ - << "\", choose a lower value if the index builder runs out of memory" - << std::endl; - } + numTriplesPerBatch_ = numTriplesPerBatch; - if (parserBatchSizeOption->wasSetAtRuntime()) { - parserBatchSize_ = parserBatchSize; - LOG(INFO) << "Overriding setting parser-batch-size to " << parserBatchSize_ - << " This might influence performance during index build." - << std::endl; - } + parserBatchSize_ = parserBatchSize; std::string overflowingIntegersThrow = "overflowing-integers-throw"; std::string overflowingIntegersBecomeDoubles = From 9482932c56e5e198a211e0c288600741e0711378 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 08:59:49 +0200 Subject: [PATCH 13/63] Cleaned up some code formatting. --- src/index/IndexImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 879153e439..35fbfdb08a 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,6 +4,8 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) +#include "./IndexImpl.h" + #include #include #include @@ -14,7 +16,6 @@ #include #include -#include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "index/ConstantsIndexBuilding.h" From 4443ca3c5c7e4fbf44f246f1812bf2f5c1fc78bf Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 09:46:32 +0200 Subject: [PATCH 14/63] test/ConfigManagerTest: Shortend test code. --- test/ConfigManagerTest.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/test/ConfigManagerTest.cpp b/test/ConfigManagerTest.cpp index c42eab4143..3411f3929f 100644 --- a/test/ConfigManagerTest.cpp +++ b/test/ConfigManagerTest.cpp @@ -17,6 +17,8 @@ #include "util/ConfigManager/ConfigShorthandVisitor.h" #include "util/json.h" +using namespace std::string_literals; + namespace ad_utility { /* @@ -72,9 +74,8 @@ TEST(ConfigManagerTest, CreateConfigurationOptionExceptionTest) { configuration grammar. Ergo, you can't set values, with such paths per short hand, which we don't want. */ - ASSERT_THROW(config.addOption( - std::vector{"Shared part", "Sense_of_existence"}, "", - ¬Used, 42); + ASSERT_THROW(config.addOption({"Shared part"s, "Sense_of_existence"s}, "", + ¬Used, 42); , ad_utility::NotValidShortHandNameException); } @@ -87,7 +88,7 @@ TEST(ConfigManagerTest, ParseConfig) { int thirdInt; const ConfigOption* optionZero = - config.addOption(std::vector{"depth_0", "Option_0"}, + config.addOption({"depth_0"s, "Option_0"s}, "Must be set. Has no default value.", &firstInt); const ConfigOption* optionOne = config.addOption({"depth_0", "depth_1", "Option_1"}, @@ -129,9 +130,9 @@ TEST(ConfigManagerTest, ParseConfigExceptionTest) { // Add one option with default and one without. int notUsedInt; std::vector notUsedVector; - config.addOption(std::vector{"depth_0", "Without_default"}, + config.addOption({"depth_0"s, "Without_default"s}, "Must be set. Has no default value.", ¬UsedInt); - config.addOption(std::vector{"depth_0", "With_default"}, + config.addOption({"depth_0"s, "With_default"s}, "Must not be set. Has default value.", ¬UsedVector, {40, 41}); From 67830ad3d6dc2537b0c516fabc0b4a8b0decdbe2 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 10:08:33 +0200 Subject: [PATCH 15/63] src/index/IndexImpl: Shortend code for the creation of configOptions. --- src/index/IndexImpl.cpp | 78 ++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 44 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index fcf1c30c79..bacbf47e98 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -32,6 +32,7 @@ #include "util/json.h" using std::array; +using namespace std::string_literals; // _____________________________________________________________________________ IndexImpl::IndexImpl(ad_utility::AllocatorWithLimit allocator) @@ -792,58 +793,52 @@ void IndexImpl::readConfiguration() { // TODO Write a description. std::string gitHash; - config.addOption("git-hash", "", &gitHash, std::string{}); + config.addOption("git-hash", "", &gitHash, {}); // TODO Write a description. bool boolPrefixes; const ad_utility::ConfigOption* prefixesOption = - config.addOption("prefixes", "", &boolPrefixes, false); + config.addOption("prefixes", "", &boolPrefixes, false); // TODO Write a description. bool hasAllPermutations; - config.addOption("has-all-permutations", "", &hasAllPermutations, true); + config.addOption("has-all-permutations", "", &hasAllPermutations, true); // TODO Write a description. std::vector prefixesExternal; - config.addOption>( - "prefixes-external", "", &prefixesExternal, std::vector{}); + config.addOption("prefixes-external", "", &prefixesExternal, {}); // TODO Write a description. std::string lang; - config.addOption(std::vector{"locale", "language"}, - "", &lang); + config.addOption({"locale"s, "language"s}, "", &lang); // TODO Write a description. std::string country; - config.addOption(std::vector{"locale", "country"}, - "", &country); + config.addOption({"locale"s, "country"s}, "", &country); // TODO Write a description. bool ignorePunctuation; - config.addOption( - std::vector{"locale", "ignore-punctuation"}, "", - &ignorePunctuation); + config.addOption({"locale"s, "ignore-punctuation"s}, "", &ignorePunctuation); // TODO Write a description. std::vector languagesInternal; - config.addOption>("languages-internal", "", - &languagesInternal, {"en"}); + config.addOption("languages-internal", "", &languagesInternal, {"en"}); // TODO Write a description. - config.addOption("num-predicates-normal", "", &numPredicatesNormal_); - config.addOption("num-subjects-normal", "", &numSubjectsNormal_); - config.addOption("num-objects-normal", "", &numObjectsNormal_); - config.addOption("num-triples-normal", "", &numTriplesNormal_); + config.addOption("num-predicates-normal", "", &numPredicatesNormal_); + config.addOption("num-subjects-normal", "", &numSubjectsNormal_); + config.addOption("num-objects-normal", "", &numObjectsNormal_); + config.addOption("num-triples-normal", "", &numTriplesNormal_); // TODO Make this cleaner, than just catching all the fields of the object. size_t indexFormatVersionPullRequestNumber; config.addOption( - std::vector{"index-format-version", "pull-request-number"}, + {"index-format-version"s, "pull-request-number"s}, "The number of the pull request that changed the index format most " "recently.", &indexFormatVersionPullRequestNumber); std::string indexFormatVersionDate; - config.addOption(std::vector{"index-format-version", "date"}, + config.addOption({"index-format-version"s, "date"s}, "The date of the last breaking change of the index format.", &indexFormatVersionDate); @@ -1001,56 +996,51 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { // TODO Write a description. std::vector prefixesExternal; - config.addOption>( - "prefixes-external", "", &prefixesExternal, std::vector{}); + config.addOption("prefixes-external", "", &prefixesExternal, {}); // TODO Write a description. std::vector languagesInternal; - config.addOption>("languages-internal", "", - &languagesInternal, {"en"}); + config.addOption("languages-internal", "", &languagesInternal, {"en"}); // TODO Write a description. std::string lang; - const ad_utility::ConfigOption* langOption = config.addOption( - std::vector{"locale", "language"}, "", &lang, - LOCALE_DEFAULT_LANG); + const ad_utility::ConfigOption* langOption = config.addOption( + {"locale"s, "language"s}, "", &lang, LOCALE_DEFAULT_LANG); // TODO Write a description. std::string country; - const ad_utility::ConfigOption* countryOption = config.addOption( - std::vector{"locale", "country"}, "", &country, - LOCALE_DEFAULT_COUNTRY); + const ad_utility::ConfigOption* countryOption = config.addOption( + {"locale"s, "country"s}, "", &country, LOCALE_DEFAULT_COUNTRY); // TODO Write a description. bool ignorePunctuation; const ad_utility::ConfigOption* ignorePunctuationOption = - config.addOption( - std::vector{"locale", "ignore-punctuation"}, "", - &ignorePunctuation, LOCALE_DEFAULT_IGNORE_PUNCTUATION); + config.addOption({"locale"s, "ignore-punctuation"s}, "", + &ignorePunctuation, LOCALE_DEFAULT_IGNORE_PUNCTUATION); // TODO Write a description. - config.addOption("ascii-prefixes-only", "", &onlyAsciiTurtlePrefixes_, - onlyAsciiTurtlePrefixes_); + config.addOption("ascii-prefixes-only", "", &onlyAsciiTurtlePrefixes_, + onlyAsciiTurtlePrefixes_); // TODO Write a description. - config.addOption("parallel-parsing", "", &useParallelParser_, - useParallelParser_); + config.addOption("parallel-parsing", "", &useParallelParser_, + useParallelParser_); // TODO Write a description. size_t numTriplesPerBatch; - config.addOption("num-triples-per-batch", "", &numTriplesPerBatch, - static_cast(NUM_TRIPLES_PER_PARTIAL_VOCAB)); + config.addOption("num-triples-per-batch", "", &numTriplesPerBatch, + static_cast(NUM_TRIPLES_PER_PARTIAL_VOCAB)); // TODO Write a description. size_t parserBatchSize; - config.addOption("parser-batch-size", "", &parserBatchSize, - PARSER_BATCH_SIZE); + config.addOption("parser-batch-size", "", &parserBatchSize, + PARSER_BATCH_SIZE); // TODO Write a description. std::string parserIntegerOverflowBehavior; - config.addOption("parser-integer-overflow-behavior", "", - &parserIntegerOverflowBehavior, - std::string{"overflowing-integers-throw"}); + config.addOption("parser-integer-overflow-behavior", "", + &parserIntegerOverflowBehavior, + "overflowing-integers-throw"s); // Set the options. if (!settingsFileName_.empty()) { From 7e3472d742b5b3101f5dcd0ae9fc953afb68b6ca Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 10:57:37 +0200 Subject: [PATCH 16/63] benchmark/BenchmarkExamples: Made the usage of addOption clearer. --- benchmark/BenchmarkExamples.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/BenchmarkExamples.cpp b/benchmark/BenchmarkExamples.cpp index 57bee0e9c7..c471867771 100644 --- a/benchmark/BenchmarkExamples.cpp +++ b/benchmark/BenchmarkExamples.cpp @@ -59,7 +59,7 @@ class ConfigOptions : public BenchmarkInterface { manager.addOption("CoinFlipTry", "The number of succesful coin flips.", &wonOnTryX_, {false, false, false, false, false}); - manager.addOption({"Accounts", "Personal", "Steve"}, + manager.addOption({"Accounts"s, "Personal"s, "Steve"s}, "Steves saving account balance.", &balanceOnStevesSavingAccount_, -41.9f); } From 207ea5e832e793d2576687fec685d05d46cea289 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 11:03:07 +0200 Subject: [PATCH 17/63] src/util/ConfigManager/ConfigManager: AddOption now returns a reference instead of a pointer. --- src/util/ConfigManager/ConfigManager.h | 29 ++++++----- test/ConfigManagerTest.cpp | 72 +++++++++++++------------- 2 files changed, 53 insertions(+), 48 deletions(-) diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index 967308743f..277087e16e 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -63,12 +63,13 @@ class ConfigManager { option will be copied into this variable, whenever the value in the configuration option changes. - @return A pointer to the newly created configuration option. + @return A reference to the newly created configuration option. This reference + will stay valid, even after adding more options. */ template requires ad_utility::isTypeContainedIn - const ConfigOption* addOption(const std::vector& pathToOption, + const ConfigOption& addOption(const std::vector& pathToOption, std::string_view optionDescription, OptionType* variableToPutValueOfTheOptionIn) { return addOptionImpl(pathToOption, optionDescription, @@ -91,13 +92,14 @@ class ConfigManager { configuration option changes. @param defaultValue A default value for the configuration option. - @return A pointer to the newly created configuration option. + @return A reference to the newly created configuration option. This reference + will stay valid, even after adding more options. */ template DefaultValueType = OptionType> requires ad_utility::isTypeContainedIn - const ConfigOption* addOption(const std::vector& pathToOption, + const ConfigOption& addOption(const std::vector& pathToOption, std::string_view optionDescription, OptionType* variableToPutValueOfTheOptionIn, DefaultValueType defaultValue) { @@ -111,12 +113,13 @@ class ConfigManager { `addOption`. But instead of a `pathToOption`, there is only an `optionName`, which describes a path only made out of this single string. - @return A pointer to the newly created configuration option. + @return A reference to the newly created configuration option. This reference + will stay valid, even after adding more options. */ template requires ad_utility::isTypeContainedIn - const ConfigOption* addOption(std::string optionName, + const ConfigOption& addOption(std::string optionName, std::string_view optionDescription, OptionType* variableToPutValueOfTheOptionIn) { return addOption( @@ -129,13 +132,14 @@ class ConfigManager { `addOption`. But instead of a `pathToOption`, there is only an `optionName`, which describes a path only made out of this single string. - @return A pointer to the newly created configuration option. + @return A reference to the newly created configuration option. This reference + will stay valid, even after adding more options. */ template DefaultValueType = OptionType> requires ad_utility::isTypeContainedIn - const ConfigOption* addOption(std::string optionName, + const ConfigOption& addOption(std::string optionName, std::string_view optionDescription, OptionType* variableToPutValueOfTheOptionIn, DefaultValueType defaultValue) { @@ -239,12 +243,13 @@ class ConfigManager { given, signified by an empty optional, then a value for the configuration option MUST be given at runtime. - @return A pointer to the newly created configuration option. + @return A reference to the newly created configuration option. Will stay + valid, even after more options. */ template requires ad_utility::isTypeContainedIn - const ConfigOption* addOptionImpl( + const ConfigOption& addOptionImpl( const std::vector& pathToOption, std::string_view optionDescription, OptionType* variableToPutValueOfTheOptionIn, @@ -269,8 +274,8 @@ class ConfigManager { move constructor. Which is why, we can't just return the `ConfigOption` we created here. */ - return configurationOptions_.at(createJsonPointerString(pathToOption)) - .get(); + return *configurationOptions_.at(createJsonPointerString(pathToOption)) + .get(); } }; } // namespace ad_utility diff --git a/test/ConfigManagerTest.cpp b/test/ConfigManagerTest.cpp index 3411f3929f..416687f290 100644 --- a/test/ConfigManagerTest.cpp +++ b/test/ConfigManagerTest.cpp @@ -87,22 +87,22 @@ TEST(ConfigManagerTest, ParseConfig) { int secondInt; int thirdInt; - const ConfigOption* optionZero = + const ConfigOption& optionZero = config.addOption({"depth_0"s, "Option_0"s}, "Must be set. Has no default value.", &firstInt); - const ConfigOption* optionOne = + const ConfigOption& optionOne = config.addOption({"depth_0", "depth_1", "Option_1"}, "Must be set. Has no default value.", &secondInt); - const ConfigOption* optionTwo = + const ConfigOption& optionTwo = config.addOption("Option_2", "Has a default value.", &thirdInt, 2); // Does the option with the default already have a value? - checkOption(*optionTwo, thirdInt, true, 2); + checkOption(optionTwo, thirdInt, true, 2); // The other two should never have set the variable, that the internal pointer // points to. - checkOption(*optionZero, firstInt, false, 2); - checkOption(*optionOne, secondInt, false, 2); + checkOption(optionZero, firstInt, false, 2); + checkOption(optionOne, secondInt, false, 2); // The json for testing `parseConfig`. Sets all of the configuration // options. @@ -119,9 +119,9 @@ TEST(ConfigManagerTest, ParseConfig) { // Set and check. config.parseConfig(testJson); - checkOption(*optionZero, firstInt, true, 10); - checkOption(*optionOne, secondInt, true, 11); - checkOption(*optionTwo, thirdInt, true, 12); + checkOption(optionZero, firstInt, true, 10); + checkOption(optionOne, secondInt, true, 11); + checkOption(optionTwo, thirdInt, true, 12); } TEST(ConfigManagerTest, ParseConfigExceptionTest) { @@ -189,109 +189,109 @@ TEST(ConfigManagerTest, ParseShortHandTest) { // Add integer options. int somePositiveNumberInt; - const ConfigOption* somePositiveNumber = config.addOption( + const ConfigOption& somePositiveNumber = config.addOption( "somePositiveNumber", "Must be set. Has no default value.", &somePositiveNumberInt); int someNegativNumberInt; - const ConfigOption* someNegativNumber = config.addOption( + const ConfigOption& someNegativNumber = config.addOption( "someNegativNumber", "Must be set. Has no default value.", &someNegativNumberInt); // Add integer list. std::vector someIntegerlistIntVector; - const ConfigOption* someIntegerlist = + const ConfigOption& someIntegerlist = config.addOption("someIntegerlist", "Must be set. Has no default value.", &someIntegerlistIntVector); // Add floating point options. float somePositiveFloatingPointFloat; - const ConfigOption* somePositiveFloatingPoint = config.addOption( + const ConfigOption& somePositiveFloatingPoint = config.addOption( "somePositiveFloatingPoint", "Must be set. Has no default value.", &somePositiveFloatingPointFloat); float someNegativFloatingPointFloat; - const ConfigOption* someNegativFloatingPoint = config.addOption( + const ConfigOption& someNegativFloatingPoint = config.addOption( "someNegativFloatingPoint", "Must be set. Has no default value.", &someNegativFloatingPointFloat); // Add floating point list. std::vector someFloatingPointListFloatVector; - const ConfigOption* someFloatingPointList = config.addOption( + const ConfigOption& someFloatingPointList = config.addOption( "someFloatingPointList", "Must be set. Has no default value.", &someFloatingPointListFloatVector); // Add boolean options. bool boolTrueBool; - const ConfigOption* boolTrue = config.addOption( + const ConfigOption& boolTrue = config.addOption( "boolTrue", "Must be set. Has no default value.", &boolTrueBool); bool boolFalseBool; - const ConfigOption* boolFalse = config.addOption( + const ConfigOption& boolFalse = config.addOption( "boolFalse", "Must be set. Has no default value.", &boolFalseBool); // Add boolean list. std::vector someBooleanListBoolVector; - const ConfigOption* someBooleanList = + const ConfigOption& someBooleanList = config.addOption("someBooleanList", "Must be set. Has no default value.", &someBooleanListBoolVector); // Add string option. std::string myNameString; - const ConfigOption* myName = config.addOption( + const ConfigOption& myName = config.addOption( "myName", "Must be set. Has no default value.", &myNameString); // Add string list. std::vector someStringListStringVector; - const ConfigOption* someStringList = + const ConfigOption& someStringList = config.addOption("someStringList", "Must be set. Has no default value.", &someStringListStringVector); // Add option with deeper level. std::vector deeperIntVector; - const ConfigOption* deeperIntVectorOption = + const ConfigOption& deeperIntVectorOption = config.addOption({"depth", "here", "list"}, "Must be set. Has no default value.", &deeperIntVector); // This one will not be changed, in order to test, that options, that are // not set at run time, are not changed. int noChangeInt; - const ConfigOption* noChange = + const ConfigOption& noChange = config.addOption("No_change", "", &noChangeInt, 10); // Set those. config.parseConfig(ad_utility::ConfigManager::parseShortHand( R"--(somePositiveNumber : 42, someNegativNumber : -42, someIntegerlist : [40, 41], somePositiveFloatingPoint : 4.2, someNegativFloatingPoint : -4.2, someFloatingPointList : [4.1, 4.2], boolTrue : true, boolFalse : false, someBooleanList : [true, false, true], myName : "Bernd", someStringList : ["t1", "t2"], depth : { here : {list : [7,8]}})--")); - checkOption(*somePositiveNumber, somePositiveNumberInt, true, 42); - checkOption(*someNegativNumber, someNegativNumberInt, true, -42); + checkOption(somePositiveNumber, somePositiveNumberInt, true, 42); + checkOption(someNegativNumber, someNegativNumberInt, true, -42); - checkOption>(*someIntegerlist, someIntegerlistIntVector, - true, std::vector{40, 41}); + checkOption>(someIntegerlist, someIntegerlistIntVector, true, + std::vector{40, 41}); - checkOption(*somePositiveFloatingPoint, somePositiveFloatingPointFloat, + checkOption(somePositiveFloatingPoint, somePositiveFloatingPointFloat, true, 4.2f); - checkOption(*someNegativFloatingPoint, someNegativFloatingPointFloat, + checkOption(someNegativFloatingPoint, someNegativFloatingPointFloat, true, -4.2f); - checkOption>(*someFloatingPointList, + checkOption>(someFloatingPointList, someFloatingPointListFloatVector, true, {4.1f, 4.2f}); - checkOption(*boolTrue, boolTrueBool, true, true); - checkOption(*boolFalse, boolFalseBool, true, false); + checkOption(boolTrue, boolTrueBool, true, true); + checkOption(boolFalse, boolFalseBool, true, false); - checkOption>(*someBooleanList, someBooleanListBoolVector, + checkOption>(someBooleanList, someBooleanListBoolVector, true, std::vector{true, false, true}); - checkOption(*myName, myNameString, true, std::string{"Bernd"}); + checkOption(myName, myNameString, true, std::string{"Bernd"}); - checkOption>(*someStringList, + checkOption>(someStringList, someStringListStringVector, true, std::vector{"t1", "t2"}); - checkOption>(*deeperIntVectorOption, deeperIntVector, true, + checkOption>(deeperIntVectorOption, deeperIntVector, true, std::vector{7, 8}); // Is the "No Change" unchanged? - checkOption(*noChange, noChangeInt, true, 10); + checkOption(noChange, noChangeInt, true, 10); // Multiple key value pairs with the same key are not allowed. AD_EXPECT_THROW_WITH_MESSAGE(ad_utility::ConfigManager::parseShortHand( From b2e58d4ca902419ba4b6213529bc4d5f720319f0 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 14:17:27 +0200 Subject: [PATCH 18/63] src/util/ConfigManager/ConfigManager: Improved element access in implementation by usage of std::views. --- src/util/ConfigManager/ConfigManager.cpp | 57 +++++++++++++++++------- src/util/ConfigManager/ConfigManager.h | 7 +++ 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/src/util/ConfigManager/ConfigManager.cpp b/src/util/ConfigManager/ConfigManager.cpp index 2fdc7e2d3a..6ebf189424 100644 --- a/src/util/ConfigManager/ConfigManager.cpp +++ b/src/util/ConfigManager/ConfigManager.cpp @@ -2,13 +2,13 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (March of 2023, schlegea@informatik.uni-freiburg.de) -#include "util/ConfigManager/ConfigManager.h" - #include #include #include #include +#include +#include #include #include #include @@ -17,20 +17,43 @@ #include #include #include +#include #include "util/Algorithm.h" #include "util/ConfigManager/ConfigExceptions.h" +#include "util/ConfigManager/ConfigManager.h" #include "util/ConfigManager/ConfigOption.h" #include "util/ConfigManager/ConfigShorthandVisitor.h" #include "util/ConfigManager/ConfigUtil.h" #include "util/ConfigManager/generated/ConfigShorthandLexer.h" #include "util/ConfigManager/generated/ConfigShorthandParser.h" #include "util/Exception.h" +#include "util/Forward.h" #include "util/StringUtils.h" #include "util/antlr/ANTLRErrorHandling.h" #include "util/json.h" namespace ad_utility { +static auto getDereferencedConfigurationOptionsViewImpl( + auto& configurationOptions) { + return std::views::transform(AD_FWD(configurationOptions), [](auto& pair) { + // Make sure, that there is no null pointer. + AD_CORRECTNESS_CHECK(pair.second); + + // Return a dereferenced reference. + return std::tie(pair.first, *pair.second); + }); +} + +// ____________________________________________________________________________ +auto ConfigManager::getDereferencedConfigurationOptionsView() { + return getDereferencedConfigurationOptionsViewImpl(configurationOptions_); +} + +// ____________________________________________________________________________ +auto ConfigManager::getDereferencedConfigurationOptionsView() const { + return getDereferencedConfigurationOptionsViewImpl(configurationOptions_); +} // ____________________________________________________________________________ std::string ConfigManager::createJsonPointerString( @@ -204,22 +227,22 @@ void ConfigManager::parseConfig(const nlohmann::json& j) { an exception, if a configuration option was given a value of the wrong type, or if it HAD to be set, but wasn't. */ - for (auto& [key, option] : configurationOptions_) { + for (auto&& [key, option] : getDereferencedConfigurationOptionsView()) { // Set the option, if possible, with the pointer to the position of the // current configuration in json. if (const nlohmann::json::json_pointer configurationOptionJsonPosition{key}; j.contains(configurationOptionJsonPosition)) { // This will throw an exception, if the json object can't be interpreted // with the wanted type. - option->setValueWithJson(j.at(configurationOptionJsonPosition)); + option.setValueWithJson(j.at(configurationOptionJsonPosition)); } /* If the option hasn't set the variable, that it's internal variable pointer - points to, that means, it doesn't have a default value, and needs to be set - by the user at runtime, but wasn't. + points to, that means, it doesn't have a default value, and needs to be + set by the user at runtime, but wasn't. */ - if (!option->wasSet()) { + if (!option.wasSet()) { throw ConfigOptionWasntSetException(key); } } @@ -245,7 +268,7 @@ std::string ConfigManager::printConfigurationDoc( - The default value of the configuration option. - An example value, of the correct type. */ - for (const auto& [path, option] : configurationOptions_) { + for (const auto& [path, option] : getDereferencedConfigurationOptionsView()) { // Pointer to the position of this option in // `configuratioOptionsVisualization`. const nlohmann::json::json_pointer jsonOptionPointer{path}; @@ -254,12 +277,12 @@ std::string ConfigManager::printConfigurationDoc( // We can only use the value, if we are sure, that the value was // initialized. configuratioOptionsVisualization[jsonOptionPointer] = - option->wasSet() ? option->getValueAsJson() - : "value was never initialized"; + option.wasSet() ? option.getValueAsJson() + : "value was never initialized"; } else { configuratioOptionsVisualization[jsonOptionPointer] = - option->hasDefaultValue() ? option->getDefaultValueAsJson() - : option->getDummyValueAsJson(); + option.hasDefaultValue() ? option.getDefaultValueAsJson() + : option.getDummyValueAsJson(); } } @@ -268,13 +291,13 @@ std::string ConfigManager::printConfigurationDoc( // List the configuration options themselves. const std::string& listOfConfigurationOptions = ad_utility::lazyStrJoin( - std::views::transform(configurationOptions_, + std::views::transform(getDereferencedConfigurationOptionsView(), [](const auto& pair) { // Add the location of the option and the option // itself. return absl::StrCat( - "Location : ", pair.first, "\n", - static_cast(*pair.second)); + "Location : ", std::get<0>(pair), "\n", + static_cast(std::get<1>(pair))); }), "\n\n"); @@ -307,8 +330,8 @@ std::string ConfigManager::getListOfNotChangedConfigOptionsWithDefaultValuesAsString() const { // For only looking at the configuration options in our map. - auto onlyConfigurationOptionsView = std::views::transform( - configurationOptions_, [](const auto& pair) { return *pair.second; }); + auto onlyConfigurationOptionsView = + std::views::values(getDereferencedConfigurationOptionsView()); // Returns true, if the `ConfigOption` has a default value and wasn't set at // runtime. diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index 277087e16e..6a223a3b47 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -277,5 +277,12 @@ class ConfigManager { return *configurationOptions_.at(createJsonPointerString(pathToOption)) .get(); } + + /* + @brief Provide a range of tuples, that hold references to the key value pairs + in `configurationOptions_`, but with the pointer dereferenced. + */ + auto getDereferencedConfigurationOptionsView(); + auto getDereferencedConfigurationOptionsView() const; }; } // namespace ad_utility From 86ee1f0392b0b90f7e02489d959182bf5807fbff Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 14:21:45 +0200 Subject: [PATCH 19/63] src/util/ConfigManager/ConfigManager: Improved consistency of examples for addOption. --- test/ConfigManagerTest.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/ConfigManagerTest.cpp b/test/ConfigManagerTest.cpp index 416687f290..8ff6843149 100644 --- a/test/ConfigManagerTest.cpp +++ b/test/ConfigManagerTest.cpp @@ -50,13 +50,14 @@ TEST(ConfigManagerTest, CreateConfigurationOptionExceptionTest) { // Configuration options for testing. int notUsed; - config.addOption({"Shared_part", "Unique_part_1", "Sense_of_existence"}, "", - ¬Used, 42); + config.addOption({"Shared_part"s, "Unique_part_1"s, "Sense_of_existence"s}, + "", ¬Used, 42); // Trying to add a configuration option with the same name at the same // place, should cause an error. ASSERT_THROW(config.addOption( - {"Shared_part", "Unique_part_1", "Sense_of_existence"}, "", ¬Used, 42); + {"Shared_part"s, "Unique_part_1"s, "Sense_of_existence"s}, "", ¬Used, + 42); , ad_utility::ConfigManagerOptionPathAlreadyinUseException); /* @@ -91,7 +92,7 @@ TEST(ConfigManagerTest, ParseConfig) { config.addOption({"depth_0"s, "Option_0"s}, "Must be set. Has no default value.", &firstInt); const ConfigOption& optionOne = - config.addOption({"depth_0", "depth_1", "Option_1"}, + config.addOption({"depth_0"s, "depth_1"s, "Option_1"s}, "Must be set. Has no default value.", &secondInt); const ConfigOption& optionTwo = config.addOption("Option_2", "Has a default value.", &thirdInt, 2); @@ -247,7 +248,7 @@ TEST(ConfigManagerTest, ParseShortHandTest) { // Add option with deeper level. std::vector deeperIntVector; const ConfigOption& deeperIntVectorOption = - config.addOption({"depth", "here", "list"}, + config.addOption({"depth"s, "here"s, "list"s}, "Must be set. Has no default value.", &deeperIntVector); // This one will not be changed, in order to test, that options, that are From ba7eceeedc7b9ef69c9d92747a13550c11c998cc Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 14:31:25 +0200 Subject: [PATCH 20/63] Cleaned up some code formatting. --- src/util/ConfigManager/ConfigManager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/util/ConfigManager/ConfigManager.cpp b/src/util/ConfigManager/ConfigManager.cpp index 6ebf189424..1a18655c3c 100644 --- a/src/util/ConfigManager/ConfigManager.cpp +++ b/src/util/ConfigManager/ConfigManager.cpp @@ -2,6 +2,8 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (March of 2023, schlegea@informatik.uni-freiburg.de) +#include "util/ConfigManager/ConfigManager.h" + #include #include #include @@ -21,7 +23,6 @@ #include "util/Algorithm.h" #include "util/ConfigManager/ConfigExceptions.h" -#include "util/ConfigManager/ConfigManager.h" #include "util/ConfigManager/ConfigOption.h" #include "util/ConfigManager/ConfigShorthandVisitor.h" #include "util/ConfigManager/ConfigUtil.h" From adf88dcd5caa367b237bdb18da1b5d594532fef2 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 14:50:09 +0200 Subject: [PATCH 21/63] src/index/IndexImpl: Adjusted to changes in API. --- src/index/IndexImpl.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index bacbf47e98..5600f59404 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,8 +4,6 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) -#include "./IndexImpl.h" - #include #include #include @@ -13,6 +11,7 @@ #include #include +#include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "index/ConstantsIndexBuilding.h" @@ -797,7 +796,7 @@ void IndexImpl::readConfiguration() { // TODO Write a description. bool boolPrefixes; - const ad_utility::ConfigOption* prefixesOption = + const ad_utility::ConfigOption& prefixesOption = config.addOption("prefixes", "", &boolPrefixes, false); // TODO Write a description. @@ -901,7 +900,7 @@ void IndexImpl::readConfiguration() { } } */ - if (prefixesOption->wasSetAtRuntime()) { + if (prefixesOption.wasSetAtRuntime()) { configurationJson_["prefixes"] = boolPrefixes; if (boolPrefixes) { vector prefixes; @@ -1004,17 +1003,17 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { // TODO Write a description. std::string lang; - const ad_utility::ConfigOption* langOption = config.addOption( + const ad_utility::ConfigOption& langOption = config.addOption( {"locale"s, "language"s}, "", &lang, LOCALE_DEFAULT_LANG); // TODO Write a description. std::string country; - const ad_utility::ConfigOption* countryOption = config.addOption( + const ad_utility::ConfigOption& countryOption = config.addOption( {"locale"s, "country"s}, "", &country, LOCALE_DEFAULT_COUNTRY); // TODO Write a description. bool ignorePunctuation; - const ad_utility::ConfigOption* ignorePunctuationOption = + const ad_utility::ConfigOption& ignorePunctuationOption = config.addOption({"locale"s, "ignore-punctuation"s}, "", &ignorePunctuation, LOCALE_DEFAULT_IGNORE_PUNCTUATION); @@ -1059,9 +1058,9 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { * locale setting. */ - if (langOption->wasSetAtRuntime() != countryOption->wasSetAtRuntime() || - countryOption->wasSetAtRuntime() != - ignorePunctuationOption->wasSetAtRuntime()) { + if (langOption.wasSetAtRuntime() != countryOption.wasSetAtRuntime() || + countryOption.wasSetAtRuntime() != + ignorePunctuationOption.wasSetAtRuntime()) { throw std::runtime_error(absl::StrCat( "All three options under 'locale' must be set, or none of them.", config.printConfigurationDoc(true))); From b959473823f30df79951592c313d11c966b60795 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 14:51:39 +0200 Subject: [PATCH 22/63] src/index/IndexImpl: Deleted uneeded comments. --- src/index/IndexImpl.cpp | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 5600f59404..6bf1b008bf 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -884,22 +884,6 @@ void IndexImpl::readConfiguration() { "Incompatible index format, see log message for details"}; } - // Slight problem here: I have no idea, what kind of value `"prefixes"` points - // to. So I had to guess. - /* - if (configurationJson_.find("prefixes") != configurationJson_.end()) { - if (configurationJson_["prefixes"]) { - vector prefixes; - auto prefixFile = ad_utility::makeIfstream(onDiskBase_ + PREFIX_FILE); - for (string prefix; std::getline(prefixFile, prefix);) { - prefixes.emplace_back(std::move(prefix)); - } - vocab_.buildCodebookForPrefixCompression(prefixes); - } else { - vocab_.buildCodebookForPrefixCompression(std::vector()); - } - } - */ if (prefixesOption.wasSetAtRuntime()) { configurationJson_["prefixes"] = boolPrefixes; if (boolPrefixes) { @@ -926,16 +910,6 @@ void IndexImpl::readConfiguration() { vocab_.initializeInternalizedLangs(languagesInternal); configurationJson_["languages-internal"] = languagesInternal; - // Once again, I can only guess, what kind of value should be at - // `"has-all-permutations"`. - /* - if (configurationJson_.find("has-all-permutations") != - configurationJson_.end() && - configurationJson_["has-all-permutations"] == false) { - // If the permutations simply don't exist, then we can never load them. - loadAllPermutations_ = false; - } - */ if (!hasAllPermutations) { configurationJson_["has-all-permutations"] = false; // If the permutations simply don't exist, then we can never load them. From a021fc5098c4c2ab2f47f662203a0522e24d8f30 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 14:55:17 +0200 Subject: [PATCH 23/63] Cleaned up code formatting. --- src/index/IndexImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 6bf1b008bf..2cf55e0758 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,6 +4,8 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) +#include "./IndexImpl.h" + #include #include #include @@ -11,7 +13,6 @@ #include #include -#include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "index/ConstantsIndexBuilding.h" From 8f8c8a32fd793daf9c5e967f1e2671c001f5bb8d Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 15:24:18 +0200 Subject: [PATCH 24/63] src/util/ConfigManager/ConfigManager: Replaced specific hash map with ad_utility:hash_map. --- src/util/ConfigManager/ConfigManager.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index 6a223a3b47..e2767cb6db 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -26,6 +26,7 @@ #include "util/ConfigManager/generated/ConfigShorthandLexer.h" #include "util/Exception.h" #include "util/Forward.h" +#include "util/HashMap.h" #include "util/TypeTraits.h" #include "util/json.h" @@ -45,7 +46,7 @@ class ConfigManager { The string key describes their location in the json object literal, by representing a json pointer in string form. */ - absl::flat_hash_map> + ad_utility::HashMap> configurationOptions_; public: From 3559c406975bdcba59d64ce1af45a6187e3e431b Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 17:51:38 +0200 Subject: [PATCH 25/63] src/util/ConfigManager/ConfigManager: Improved helper function name. --- src/util/ConfigManager/ConfigManager.cpp | 22 ++++++++++------------ src/util/ConfigManager/ConfigManager.h | 4 ++-- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/util/ConfigManager/ConfigManager.cpp b/src/util/ConfigManager/ConfigManager.cpp index 1a18655c3c..b40eb7a9a3 100644 --- a/src/util/ConfigManager/ConfigManager.cpp +++ b/src/util/ConfigManager/ConfigManager.cpp @@ -2,8 +2,6 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (March of 2023, schlegea@informatik.uni-freiburg.de) -#include "util/ConfigManager/ConfigManager.h" - #include #include #include @@ -23,6 +21,7 @@ #include "util/Algorithm.h" #include "util/ConfigManager/ConfigExceptions.h" +#include "util/ConfigManager/ConfigManager.h" #include "util/ConfigManager/ConfigOption.h" #include "util/ConfigManager/ConfigShorthandVisitor.h" #include "util/ConfigManager/ConfigUtil.h" @@ -35,8 +34,7 @@ #include "util/json.h" namespace ad_utility { -static auto getDereferencedConfigurationOptionsViewImpl( - auto& configurationOptions) { +static auto configurationOptionsImpl(auto& configurationOptions) { return std::views::transform(AD_FWD(configurationOptions), [](auto& pair) { // Make sure, that there is no null pointer. AD_CORRECTNESS_CHECK(pair.second); @@ -47,13 +45,13 @@ static auto getDereferencedConfigurationOptionsViewImpl( } // ____________________________________________________________________________ -auto ConfigManager::getDereferencedConfigurationOptionsView() { - return getDereferencedConfigurationOptionsViewImpl(configurationOptions_); +auto ConfigManager::configurationOptions() { + return configurationOptionsImpl(configurationOptions_); } // ____________________________________________________________________________ -auto ConfigManager::getDereferencedConfigurationOptionsView() const { - return getDereferencedConfigurationOptionsViewImpl(configurationOptions_); +auto ConfigManager::configurationOptionsView() const { + return configurationOptionsImpl(configurationOptions_); } // ____________________________________________________________________________ @@ -228,7 +226,7 @@ void ConfigManager::parseConfig(const nlohmann::json& j) { an exception, if a configuration option was given a value of the wrong type, or if it HAD to be set, but wasn't. */ - for (auto&& [key, option] : getDereferencedConfigurationOptionsView()) { + for (auto&& [key, option] : configurationOptions()) { // Set the option, if possible, with the pointer to the position of the // current configuration in json. if (const nlohmann::json::json_pointer configurationOptionJsonPosition{key}; @@ -269,7 +267,7 @@ std::string ConfigManager::printConfigurationDoc( - The default value of the configuration option. - An example value, of the correct type. */ - for (const auto& [path, option] : getDereferencedConfigurationOptionsView()) { + for (const auto& [path, option] : configurationOptionsView()) { // Pointer to the position of this option in // `configuratioOptionsVisualization`. const nlohmann::json::json_pointer jsonOptionPointer{path}; @@ -292,7 +290,7 @@ std::string ConfigManager::printConfigurationDoc( // List the configuration options themselves. const std::string& listOfConfigurationOptions = ad_utility::lazyStrJoin( - std::views::transform(getDereferencedConfigurationOptionsView(), + std::views::transform(configurationOptionsView(), [](const auto& pair) { // Add the location of the option and the option // itself. @@ -332,7 +330,7 @@ ConfigManager::getListOfNotChangedConfigOptionsWithDefaultValuesAsString() const { // For only looking at the configuration options in our map. auto onlyConfigurationOptionsView = - std::views::values(getDereferencedConfigurationOptionsView()); + std::views::values(configurationOptionsView()); // Returns true, if the `ConfigOption` has a default value and wasn't set at // runtime. diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index e2767cb6db..3a76625b6b 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -283,7 +283,7 @@ class ConfigManager { @brief Provide a range of tuples, that hold references to the key value pairs in `configurationOptions_`, but with the pointer dereferenced. */ - auto getDereferencedConfigurationOptionsView(); - auto getDereferencedConfigurationOptionsView() const; + auto configurationOptions(); + auto configurationOptionsView() const; }; } // namespace ad_utility From b8114e0ea363681584329544f8e609adee1c1d15 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 17:56:08 +0200 Subject: [PATCH 26/63] src/util/ConfigManager/ConfigManager: Got rid of uneeded forwarding. --- src/util/ConfigManager/ConfigManager.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/util/ConfigManager/ConfigManager.cpp b/src/util/ConfigManager/ConfigManager.cpp index b40eb7a9a3..ce925072fc 100644 --- a/src/util/ConfigManager/ConfigManager.cpp +++ b/src/util/ConfigManager/ConfigManager.cpp @@ -2,6 +2,8 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (March of 2023, schlegea@informatik.uni-freiburg.de) +#include "util/ConfigManager/ConfigManager.h" + #include #include #include @@ -21,21 +23,19 @@ #include "util/Algorithm.h" #include "util/ConfigManager/ConfigExceptions.h" -#include "util/ConfigManager/ConfigManager.h" #include "util/ConfigManager/ConfigOption.h" #include "util/ConfigManager/ConfigShorthandVisitor.h" #include "util/ConfigManager/ConfigUtil.h" #include "util/ConfigManager/generated/ConfigShorthandLexer.h" #include "util/ConfigManager/generated/ConfigShorthandParser.h" #include "util/Exception.h" -#include "util/Forward.h" #include "util/StringUtils.h" #include "util/antlr/ANTLRErrorHandling.h" #include "util/json.h" namespace ad_utility { static auto configurationOptionsImpl(auto& configurationOptions) { - return std::views::transform(AD_FWD(configurationOptions), [](auto& pair) { + return std::views::transform(configurationOptions, [](auto& pair) { // Make sure, that there is no null pointer. AD_CORRECTNESS_CHECK(pair.second); From 05af73ef90e1aabfa44a99da9ede6b24fec6de62 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 21 Jul 2023 18:58:41 +0200 Subject: [PATCH 27/63] src/util/ConfigManager/ConfigManager: Fixed a missspelling from a previous function renaming. --- src/util/ConfigManager/ConfigManager.cpp | 8 ++++---- src/util/ConfigManager/ConfigManager.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/util/ConfigManager/ConfigManager.cpp b/src/util/ConfigManager/ConfigManager.cpp index ce925072fc..4d5bb8807b 100644 --- a/src/util/ConfigManager/ConfigManager.cpp +++ b/src/util/ConfigManager/ConfigManager.cpp @@ -50,7 +50,7 @@ auto ConfigManager::configurationOptions() { } // ____________________________________________________________________________ -auto ConfigManager::configurationOptionsView() const { +auto ConfigManager::configurationOptions() const { return configurationOptionsImpl(configurationOptions_); } @@ -267,7 +267,7 @@ std::string ConfigManager::printConfigurationDoc( - The default value of the configuration option. - An example value, of the correct type. */ - for (const auto& [path, option] : configurationOptionsView()) { + for (const auto& [path, option] : configurationOptions()) { // Pointer to the position of this option in // `configuratioOptionsVisualization`. const nlohmann::json::json_pointer jsonOptionPointer{path}; @@ -290,7 +290,7 @@ std::string ConfigManager::printConfigurationDoc( // List the configuration options themselves. const std::string& listOfConfigurationOptions = ad_utility::lazyStrJoin( - std::views::transform(configurationOptionsView(), + std::views::transform(configurationOptions(), [](const auto& pair) { // Add the location of the option and the option // itself. @@ -330,7 +330,7 @@ ConfigManager::getListOfNotChangedConfigOptionsWithDefaultValuesAsString() const { // For only looking at the configuration options in our map. auto onlyConfigurationOptionsView = - std::views::values(configurationOptionsView()); + std::views::values(configurationOptions()); // Returns true, if the `ConfigOption` has a default value and wasn't set at // runtime. diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index 3a76625b6b..4d2952a839 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -284,6 +284,6 @@ class ConfigManager { in `configurationOptions_`, but with the pointer dereferenced. */ auto configurationOptions(); - auto configurationOptionsView() const; + auto configurationOptions() const; }; } // namespace ad_utility From 3e3c740692ebb98a4422371c3484ddcca80dea33 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 28 Jul 2023 11:00:28 +0200 Subject: [PATCH 28/63] src/util/ConfigManager/ConfigManager: Sub managers can now be created for better organization. --- src/util/ConfigManager/ConfigExceptions.cpp | 7 +- src/util/ConfigManager/ConfigManager.cpp | 165 +++++--- src/util/ConfigManager/ConfigManager.h | 73 ++-- test/ConfigManagerTest.cpp | 426 +++++++++++++++++++- 4 files changed, 579 insertions(+), 92 deletions(-) diff --git a/src/util/ConfigManager/ConfigExceptions.cpp b/src/util/ConfigManager/ConfigExceptions.cpp index 1a53c83c98..70ce2436b1 100644 --- a/src/util/ConfigManager/ConfigExceptions.cpp +++ b/src/util/ConfigManager/ConfigExceptions.cpp @@ -2,13 +2,13 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (June of 2023, schlegea@informatik.uni-freiburg.de) -#include "util/ConfigManager/ConfigExceptions.h" - #include #include #include +#include "util/ConfigManager/ConfigExceptions.h" + namespace ad_utility { //_____________________________________________________________________________ std::string& ExceptionWithMessage::getMessage() { return message_; } @@ -63,7 +63,8 @@ ConfigManagerOptionPathAlreadyinUseException:: std::string_view pathToOption, std::string_view allPathsCurrentlyInUse) { getMessage() = absl::StrCat( - "Key error: There is already a configuration option with the path '", + "Key error: There is already a configuration option/manager with the " + "path '", pathToOption, "'\n", allPathsCurrentlyInUse, "\n"); } diff --git a/src/util/ConfigManager/ConfigManager.cpp b/src/util/ConfigManager/ConfigManager.cpp index 4d5bb8807b..8e9fb00540 100644 --- a/src/util/ConfigManager/ConfigManager.cpp +++ b/src/util/ConfigManager/ConfigManager.cpp @@ -2,8 +2,6 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (March of 2023, schlegea@informatik.uni-freiburg.de) -#include "util/ConfigManager/ConfigManager.h" - #include #include #include @@ -20,38 +18,82 @@ #include #include #include +#include #include "util/Algorithm.h" #include "util/ConfigManager/ConfigExceptions.h" +#include "util/ConfigManager/ConfigManager.h" #include "util/ConfigManager/ConfigOption.h" #include "util/ConfigManager/ConfigShorthandVisitor.h" #include "util/ConfigManager/ConfigUtil.h" #include "util/ConfigManager/generated/ConfigShorthandLexer.h" #include "util/ConfigManager/generated/ConfigShorthandParser.h" #include "util/Exception.h" +#include "util/HashMap.h" #include "util/StringUtils.h" #include "util/antlr/ANTLRErrorHandling.h" #include "util/json.h" namespace ad_utility { -static auto configurationOptionsImpl(auto& configurationOptions) { - return std::views::transform(configurationOptions, [](auto& pair) { - // Make sure, that there is no null pointer. - AD_CORRECTNESS_CHECK(pair.second); +// ____________________________________________________________________________ +auto ConfigManager::configurationOptionsImpl(auto& configurationOptions, + std::string_view pathPrefix) { + std::vector> collectedOptions; - // Return a dereferenced reference. - return std::tie(pair.first, *pair.second); - }); + std::ranges::for_each( + configurationOptions, + [&collectedOptions, &pathPrefix](auto pair) { + const std::string& pathToCurrentEntry = + absl::StrCat(pathPrefix, std::get<0>(pair)); + + std::visit( + [&collectedOptions, &pathToCurrentEntry](T& var) { + // A normal `ConfigOption` can be directly added. For a + // `ConfigManager` we have to recursively collect the options. + if constexpr (std::is_same_v, ConfigOption>) { + collectedOptions.push_back( + std::make_pair(pathToCurrentEntry, &var)); + } else { + AD_CORRECTNESS_CHECK( + (std::is_same_v, ConfigManager>)); + ad_utility::appendVector( + collectedOptions, + var.configurationOptions(pathToCurrentEntry)); + } + }, + std::get<1>(pair)); + }, + [&pathPrefix](auto& pair) { + // Make sure, that there is no null pointer. + AD_CORRECTNESS_CHECK(pair.second != nullptr); + + // An empty sub manager tends to point to a logic error on the user + // side. + if (const ConfigManager* ptr = + std::get_if(pair.second.get()); + ptr != nullptr && ptr->configurationOptions_.empty()) { + throw std::runtime_error(absl::StrCat( + "The sub manager at '", pathPrefix, std::get<0>(pair), + "' is empty. Either fill it, or delete it.")); + } + + // Return a dereferenced reference. + return std::tie(pair.first, *pair.second); + }); + + return collectedOptions; } // ____________________________________________________________________________ -auto ConfigManager::configurationOptions() { - return configurationOptionsImpl(configurationOptions_); +std::vector> +ConfigManager::configurationOptions(std::string_view pathPrefix) { + return configurationOptionsImpl(configurationOptions_, pathPrefix); } // ____________________________________________________________________________ -auto ConfigManager::configurationOptions() const { - return configurationOptionsImpl(configurationOptions_); +const std::vector> +ConfigManager::configurationOptions(std::string_view pathPrefix) const { + return configurationOptionsImpl(configurationOptions_, pathPrefix); } // ____________________________________________________________________________ @@ -80,41 +122,32 @@ std::string ConfigManager::createJsonPointerString( } // ____________________________________________________________________________ -void ConfigManager::verifyPathToConfigOption( - const std::vector& pathToOption, - std::string_view optionName) const { +void ConfigManager::verifyPath(const std::vector& path) const { // We need at least a name in the path. - if (pathToOption.empty()) { + if (path.empty()) { throw std::runtime_error( - "The vector 'pathToOption' is empty, which is not allowed. We need at " - "least a name for a working path to a configuration option."); + "The vector 'path' is empty, which is not allowed. We need at least a " + "name for a working path to a configuration option, or manager."); } /* - The last entry in the path is the name of the configuration option. If it - isn't, something has gone wrong. - */ - AD_CORRECTNESS_CHECK(pathToOption.back() == optionName); - - /* - A string must be a valid `NAME` in the short hand. Otherwise, the option can't + A string must be a valid `NAME` in the short hand. Otherwise, an option can't get accessed with the short hand. */ - if (auto failedKey = - std::ranges::find_if_not(pathToOption, isNameInShortHand); - failedKey != pathToOption.end()) { + if (auto failedKey = std::ranges::find_if_not(path, isNameInShortHand); + failedKey != path.end()) { /* One of the keys failed. `failedKey` is an iterator pointing to the key. */ - throw NotValidShortHandNameException( - *failedKey, vectorOfKeysForJsonToString(pathToOption)); + throw NotValidShortHandNameException(*failedKey, + vectorOfKeysForJsonToString(path)); } - // Is there already a configuration option with the same identifier at the - // same location? - if (configurationOptions_.contains(createJsonPointerString(pathToOption))) { + // Is there already a configuration option/manager with the same identifier at + // the same location? + if (configurationOptions_.contains(createJsonPointerString(path))) { throw ConfigManagerOptionPathAlreadyinUseException( - vectorOfKeysForJsonToString(pathToOption), printConfigurationDoc(true)); + vectorOfKeysForJsonToString(path), printConfigurationDoc(true)); } } @@ -122,12 +155,28 @@ void ConfigManager::verifyPathToConfigOption( void ConfigManager::addConfigOption( const std::vector& pathToOption, ConfigOption&& option) { // Is the path valid? - verifyPathToConfigOption(pathToOption, option.getIdentifier()); + verifyPath(pathToOption); // Add the configuration option. - configurationOptions_.insert( - {createJsonPointerString(pathToOption), - std::make_unique(std::move(option))}); + configurationOptions_.emplace( + createJsonPointerString(pathToOption), + std::make_unique(std::move(option))); +} + +// ____________________________________________________________________________ +ConfigManager& ConfigManager::addSubManager( + const std::vector& path) { + // Is the path valid? + verifyPath(path); + + // The path in json format. + const std::string jsonPath = createJsonPointerString(path); + + // Add the configuration manager. + configurationOptions_.emplace( + jsonPath, std::make_unique(ConfigManager{})); + + return std::get(*configurationOptions_.at(jsonPath)); } // ____________________________________________________________________________ @@ -175,6 +224,12 @@ void ConfigManager::parseConfig(const nlohmann::json& j) { */ const auto& jFlattend = j.flatten(); + // All the configuration options together with their paths. + std::vector> allConfigOption = + configurationOptions(""); + ad_utility::HashMap allConfigOptionHashMap( + allConfigOption.begin(), allConfigOption.end()); + /* We can skip the following check, if `j` is empty. Note: Even if the JSON object is empty, its flattened version contains a single dummy entry, so @@ -193,8 +248,8 @@ void ConfigManager::parseConfig(const nlohmann::json& j) { // Only returns true, if the given pointer is the path to a // configuration option. auto isPointerToConfigurationOption = - [this](const nlohmann::json::json_pointer& ptr) { - return configurationOptions_.contains(ptr.to_string()); + [&allConfigOptionHashMap](const nlohmann::json::json_pointer& ptr) { + return allConfigOptionHashMap.contains(ptr.to_string()); }; /* @@ -226,7 +281,9 @@ void ConfigManager::parseConfig(const nlohmann::json& j) { an exception, if a configuration option was given a value of the wrong type, or if it HAD to be set, but wasn't. */ - for (auto&& [key, option] : configurationOptions()) { + for (auto&& [key, option] : std::views::transform( + allConfigOption, + [](auto& pair) { return std::tie(pair.first, *pair.second); })) { // Set the option, if possible, with the pointer to the position of the // current configuration in json. if (const nlohmann::json::json_pointer configurationOptionJsonPosition{key}; @@ -250,10 +307,12 @@ void ConfigManager::parseConfig(const nlohmann::json& j) { // ____________________________________________________________________________ std::string ConfigManager::printConfigurationDoc( bool printCurrentJsonConfiguration) const { - // Handeling, for when there are no configuration options. - if (configurationOptions_.empty()) { - return "No configuration options were defined."; - } + // All the configuration options together with their paths. + const std::vector> allConfigOption = + configurationOptions(""); + auto allConfigOptionDereferencedView = std::views::transform( + allConfigOption, + [](auto& pair) { return std::tie(pair.first, *pair.second); }); // Setup for printing the locations of the option in json format, so that // people can easier understand, where everything is. @@ -267,7 +326,7 @@ std::string ConfigManager::printConfigurationDoc( - The default value of the configuration option. - An example value, of the correct type. */ - for (const auto& [path, option] : configurationOptions()) { + for (const auto& [path, option] : allConfigOptionDereferencedView) { // Pointer to the position of this option in // `configuratioOptionsVisualization`. const nlohmann::json::json_pointer jsonOptionPointer{path}; @@ -290,7 +349,7 @@ std::string ConfigManager::printConfigurationDoc( // List the configuration options themselves. const std::string& listOfConfigurationOptions = ad_utility::lazyStrJoin( - std::views::transform(configurationOptions(), + std::views::transform(allConfigOptionDereferencedView, [](const auto& pair) { // Add the location of the option and the option // itself. @@ -328,9 +387,16 @@ std::string ConfigManager::vectorOfKeysForJsonToString( std::string ConfigManager::getListOfNotChangedConfigOptionsWithDefaultValuesAsString() const { + // All the configuration options together with their paths. + const std::vector> allConfigOption = + configurationOptions(""); + auto allConfigOptionDereferencedView = std::views::transform( + allConfigOption, + [](auto& pair) { return std::tie(pair.first, *pair.second); }); + // For only looking at the configuration options in our map. auto onlyConfigurationOptionsView = - std::views::values(configurationOptions()); + std::views::values(allConfigOptionDereferencedView); // Returns true, if the `ConfigOption` has a default value and wasn't set at // runtime. @@ -355,4 +421,5 @@ ConfigManager::getListOfNotChangedConfigOptionsWithDefaultValuesAsString() return ad_utility::lazyStrJoin(unchangedFromDefaultConfigOptions, "\n"); } + } // namespace ad_utility diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index 4d2952a839..2940aea33e 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include #include @@ -17,6 +16,7 @@ #include #include #include +#include #include #include @@ -37,7 +37,8 @@ Manages a bunch of `ConfigOption`s. */ class ConfigManager { /* - The added configuration options. + The added configuration options. Configuration managers are used by the user + to describe a json object literal more explicitly. A configuration option tends to be placed like a key value pair in a json object. For example: `{"object 1" : [{"object 2" : { "The configuration option @@ -46,8 +47,9 @@ class ConfigManager { The string key describes their location in the json object literal, by representing a json pointer in string form. */ - ad_utility::HashMap> - configurationOptions_; + using HashMapEntry = + std::unique_ptr>; + ad_utility::HashMap configurationOptions_; public: /* @@ -149,13 +151,25 @@ class ConfigManager { variableToPutValueOfTheOptionIn, std::move(defaultValue)); } + /* + @brief Creates and adds a new configuration manager with a prefix path for + it's internally held configuration options and managers. + + @param pathToOption Describes a path in json, which will be a prefix to all + the other paths held in the newly created ConfigManager. + + @return A reference to the newly created configuration manager. This reference + will stay valid, even after adding more options. + */ + ConfigManager& addSubManager(const std::vector& path); + /* @brief Sets the configuration options based on the given json. @param j There will be an exception thrown, if: - `j` doesn't contain values for all configuration options, that must be set at runtime. - - Same, if there are values for configuration options, that do not exist. + - If there are values for configuration options, that do not exist. - `j` is anything but a json object literal. */ void parseConfig(const nlohmann::json& j); @@ -187,6 +201,7 @@ class ConfigManager { FRIEND_TEST(ConfigManagerTest, ParseConfig); FRIEND_TEST(ConfigManagerTest, ParseConfigExceptionTest); FRIEND_TEST(ConfigManagerTest, ParseShortHandTest); + FRIEND_TEST(ConfigManagerTest, CheckForBrokenPaths); /* @brief Creates the string representation of a valid `nlohmann::json` pointer @@ -196,15 +211,12 @@ class ConfigManager { const std::vector& keys); /* - @brief Verifies, that the given path is a valid path for an option, with this - name. If not, throws exceptions. + @brief Verifies, that the given path is a valid path for an option/manager. If + not, throws exceptions. - @param pathToOption Describes a path in json, that points to the value held by - the configuration option. - @param optionName The identifier of the `ConfigOption`. + @param pathToOption Describes a path in json. */ - void verifyPathToConfigOption(const std::vector& pathToOption, - std::string_view optionName) const; + void verifyPath(const std::vector& path) const; /* @brief Adds a configuration option, that can be accessed with the given path. @@ -256,15 +268,7 @@ class ConfigManager { OptionType* variableToPutValueOfTheOptionIn, std::optional defaultValue = std::optional(std::nullopt)) { - /* - We need a non-empty path to construct a ConfigOption object, the `verify...` - function always throws an exception for this case. No need to duplicate the - error code. - */ - if (pathToOption.empty()) { - verifyPathToConfigOption(pathToOption, ""); - } - + verifyPath(pathToOption); addConfigOption( pathToOption, ConfigOption(pathToOption.back(), optionDescription, @@ -275,15 +279,30 @@ class ConfigManager { move constructor. Which is why, we can't just return the `ConfigOption` we created here. */ - return *configurationOptions_.at(createJsonPointerString(pathToOption)) - .get(); + return std::get( + *configurationOptions_.at(createJsonPointerString(pathToOption))); } /* - @brief Provide a range of tuples, that hold references to the key value pairs - in `configurationOptions_`, but with the pointer dereferenced. + @brief A vector to all the configuratio options, held by this manager, + represented with their json paths and reference to them. Options held by a sub + manager, are also included with the path to the sub manager as prefix. + + @param pathPrefix This prefix will be added to all configuration option json + paths, that will be returned. + */ + std::vector> configurationOptions( + std::string_view pathPrefix = ""); + const std::vector> configurationOptions( + std::string_view pathPrefix = "") const; + + /* + @brief The implementation for `configurationOptions`. + + @param pathPrefix This prefix will be added to all configuration option json + paths, that will be returned. */ - auto configurationOptions(); - auto configurationOptions() const; + static auto configurationOptionsImpl(auto& configurationOptions, + std::string_view pathPrefix = ""); }; } // namespace ad_utility diff --git a/test/ConfigManagerTest.cpp b/test/ConfigManagerTest.cpp index 8ff6843149..fb376102ca 100644 --- a/test/ConfigManagerTest.cpp +++ b/test/ConfigManagerTest.cpp @@ -5,6 +5,7 @@ #include +#include #include #include #include @@ -80,7 +81,39 @@ TEST(ConfigManagerTest, CreateConfigurationOptionExceptionTest) { , ad_utility::NotValidShortHandNameException); } -TEST(ConfigManagerTest, ParseConfig) { +/* +The exceptions for adding sub managers. +*/ +TEST(ConfigManagerTest, addSubManagerExceptionTest) { + ad_utility::ConfigManager config{}; + + // Sub manager for testing. Empty sub manager are not allowed. + int notUsed; + config + .addSubManager({"Shared_part"s, "Unique_part_1"s, "Sense_of_existence"s}) + .addOption("ignore", "", ¬Used); + + // Trying to add a sub manager with the same name at the same place, should + // cause an error. + ASSERT_THROW(config.addSubManager( + {"Shared_part"s, "Unique_part_1"s, "Sense_of_existence"s}), + ad_utility::ConfigManagerOptionPathAlreadyinUseException); + + // An empty vector that should cause an exception. + ASSERT_ANY_THROW(config.addSubManager(std::vector{});); + + /* + Trying to add a sub manager with a path containing strings with + spaces should cause an error. + Reason: A string with spaces in it, can't be read by the short hand + configuration grammar. Ergo, you can't set values, with such paths per + short hand, which we don't want. + */ + ASSERT_THROW(config.addSubManager({"Shared part"s, "Sense_of_existence"s}); + , ad_utility::NotValidShortHandNameException); +} + +TEST(ConfigManagerTest, ParseConfigNoSubManager) { ad_utility::ConfigManager config{}; // Adding the options. @@ -108,14 +141,14 @@ TEST(ConfigManagerTest, ParseConfig) { // The json for testing `parseConfig`. Sets all of the configuration // options. const nlohmann::json testJson(nlohmann::json::parse(R"--({ -"depth_0": { - "Option_0": 10, - "depth_1": { - "Option_1": 11 - } -}, -"Option_2": 12 -})--")); + "depth_0": { + "Option_0": 10, + "depth_1": { + "Option_1": 11 + } + }, + "Option_2": 12 + })--")); // Set and check. config.parseConfig(testJson); @@ -125,7 +158,239 @@ TEST(ConfigManagerTest, ParseConfig) { checkOption(optionTwo, thirdInt, true, 12); } -TEST(ConfigManagerTest, ParseConfigExceptionTest) { +TEST(ConfigManagerTest, ParseConfigWithSubManager) { + // Parse the given configManager with the given json and check, that all the + // configOption were set correctly. + auto parseAndCheck = + [](const nlohmann::json& j, ConfigManager& m, + const std::vector>& wantedValues) { + m.parseConfig(j); + + std::ranges::for_each( + wantedValues, [](const std::pair& wantedValue) -> void { + ASSERT_EQ(*wantedValue.first, wantedValue.second); + }); + }; + + // Simple manager, with only one sub manager and no recursion. + ad_utility::ConfigManager managerWithOneSubNoRecursion{}; + ad_utility::ConfigManager& managerSteve = + managerWithOneSubNoRecursion.addSubManager({"personal"s, "Steve"s}); + int steveId; + managerSteve.addOption("Id", "", &steveId, 4); + int steveInfractions; + managerSteve.addOption("Infractions", "", &steveInfractions, 6); + + parseAndCheck(nlohmann::json::parse(R"--({ + "personal": { + "Steve": { + "Id": 40, "Infractions" : 60 + } + } + })--"), + managerWithOneSubNoRecursion, + {{&steveId, 40}, {&steveInfractions, 60}}); + + // Adding configuration options to the top level manager. + int amountOfPersonal; + managerWithOneSubNoRecursion.addOption("AmountOfPersonal", "", + &amountOfPersonal, 0); + + parseAndCheck( + nlohmann::json::parse(R"--({ + "AmountOfPersonal" : 1, + "personal": { + "Steve": { + "Id": 30, "Infractions" : 70 + } + } + })--"), + managerWithOneSubNoRecursion, + {{&amountOfPersonal, 1}, {&steveId, 30}, {&steveInfractions, 70}}); + + // Simple manager, with multiple sub manager and no recursion. + ad_utility::ConfigManager managerWithMultipleSubNoRecursion{}; + ad_utility::ConfigManager& managerDave = + managerWithMultipleSubNoRecursion.addSubManager({"personal"s, "Dave"s}); + ad_utility::ConfigManager& managerJanice = + managerWithMultipleSubNoRecursion.addSubManager({"personal"s, "Janice"s}); + int daveId; + managerDave.addOption("Id", "", &daveId, 7); + int janiceId; + managerJanice.addOption("Id", "", &janiceId, 11); + int daveInfractions; + managerDave.addOption("Infractions", "", &daveInfractions, 1); + int janiceInfractions; + managerJanice.addOption("Infractions", "", &janiceInfractions, 143); + + parseAndCheck(nlohmann::json::parse(R"--({ + "personal": { + "Dave": { + "Id": 4, "Infractions" : 0 + }, + "Janice": { + "Id": 0, "Infractions" : 6 + } + } + })--"), + managerWithMultipleSubNoRecursion, + {{&daveId, 4}, + {&daveInfractions, 0}, + {&janiceId, 0}, + {&janiceInfractions, 6}}); + + // Adding configuration options to the top level manager. + managerWithMultipleSubNoRecursion.addOption("AmountOfPersonal", "", + &amountOfPersonal, 0); + + parseAndCheck(nlohmann::json::parse(R"--({ + "AmountOfPersonal" : 1, + "personal": { + "Dave": { + "Id": 6, "Infractions" : 2 + }, + "Janice": { + "Id": 2, "Infractions" : 8 + } + } + })--"), + managerWithMultipleSubNoRecursion, + {{&amountOfPersonal, 1}, + {&daveId, 6}, + {&daveInfractions, 2}, + {&janiceId, 2}, + {&janiceInfractions, 8}}); + + // Complex manager with recursion. + ad_utility::ConfigManager managerWithRecursion{}; + ad_utility::ConfigManager& managerDepth1 = + managerWithRecursion.addSubManager({"depth1"s}); + ad_utility::ConfigManager& managerDepth2 = + managerDepth1.addSubManager({"depth2"s}); + + ad_utility::ConfigManager& managerAlex = + managerDepth2.addSubManager({"personal"s, "Alex"s}); + int alexId; + managerAlex.addOption("Id", "", &alexId, 8); + int alexInfractions; + managerAlex.addOption("Infractions", "", &alexInfractions, 4); + + ad_utility::ConfigManager& managerPeter = + managerDepth2.addSubManager({"personal"s, "Peter"s}); + int peterId; + managerPeter.addOption("Id", "", &peterId, 8); + int peterInfractions; + managerPeter.addOption("Infractions", "", &peterInfractions, 4); + + parseAndCheck(nlohmann::json::parse(R"--({ + "depth1": { + "depth2": { + "personal": { + "Alex": { + "Id": 4, "Infractions" : 0 + }, + "Peter": { + "Id": 0, "Infractions" : 6 + } + } + } + } + })--"), + managerWithRecursion, + {{&alexId, 4}, + {&alexInfractions, 0}, + {&peterId, 0}, + {&peterInfractions, 6}}); + + // Add an option to `managerDepth2`. + int someOptionAtDepth2; + managerDepth2.addOption("someOption", "", &someOptionAtDepth2, 7); + + parseAndCheck(nlohmann::json::parse(R"--({ + "depth1": { + "depth2": { + "someOption" : 9, + "personal": { + "Alex": { + "Id": 6, "Infractions" : 2 + }, + "Peter": { + "Id": 2, "Infractions" : 8 + } + } + } + } + })--"), + managerWithRecursion, + {{&someOptionAtDepth2, 9}, + {&alexId, 6}, + {&alexInfractions, 2}, + {&peterId, 2}, + {&peterInfractions, 8}}); + + // Add an option to `managerDepth1`. + int someOptionAtDepth1; + managerDepth1.addOption("someOption", "", &someOptionAtDepth1, 10); + + parseAndCheck(nlohmann::json::parse(R"--({ + "depth1": { + "someOption" : 3, + "depth2": { + "someOption" : 7, + "personal": { + "Alex": { + "Id": 4, "Infractions" : 0 + }, + "Peter": { + "Id": 0, "Infractions" : 6 + } + } + } + } + })--"), + managerWithRecursion, + {{&someOptionAtDepth1, 3}, + {&someOptionAtDepth2, 7}, + {&alexId, 4}, + {&alexInfractions, 0}, + {&peterId, 0}, + {&peterInfractions, 6}}); + + // Add a second sub manager to `managerDepth1`. + int someOptionInSecondSubManagerAtDepth1; + managerDepth1.addSubManager({"random"s}) + .addOption("someOption", "", &someOptionInSecondSubManagerAtDepth1, 1); + + parseAndCheck(nlohmann::json::parse(R"--({ + "depth1": { + "random": { + "someOption" : 8 + }, + "someOption" : 1, + "depth2": { + "someOption" : 5, + "personal": { + "Alex": { + "Id": 2, "Infractions" : -2 + }, + "Peter": { + "Id": -2, "Infractions" : 4 + } + } + } + } + })--"), + managerWithRecursion, + {{&someOptionInSecondSubManagerAtDepth1, 8}, + {&someOptionAtDepth1, 1}, + {&someOptionAtDepth2, 5}, + {&alexId, 2}, + {&alexInfractions, -2}, + {&peterId, -2}, + {&peterInfractions, 4}}); +} + +TEST(ConfigManagerTest, ParseConfigExceptionWithoutSubManagerTest) { ad_utility::ConfigManager config{}; // Add one option with default and one without. @@ -144,11 +409,13 @@ TEST(ConfigManagerTest, ParseConfigExceptionTest) { // Should throw an exception, if we try set an option, that isn't there. AD_EXPECT_THROW_WITH_MESSAGE( config.parseConfig(nlohmann::json::parse( - R"--({"depth_0":{"Without_default":42, "with_default" : [39]}})--")), + R"--({"depth_0":{"Without_default":42, "with_default" : + [39]}})--")), ::testing::ContainsRegex(R"('/depth_0/with_default')")); AD_EXPECT_THROW_WITH_MESSAGE( config.parseConfig(nlohmann::json::parse( - R"--({"depth_0":{"Without_default":42, "test_string" : "test"}})--")), + R"--({"depth_0":{"Without_default":42, "test_string" : + "test"}})--")), ::testing::ContainsRegex(R"('/depth_0/test_string')")); /* @@ -158,7 +425,8 @@ TEST(ConfigManagerTest, ParseConfigExceptionTest) { */ AD_EXPECT_THROW_WITH_MESSAGE( config.parseConfig(nlohmann::json::parse( - R"--({"depth_0":{"Without_default":42, "With_default" : {"value" : 4}}})--")), + R"--({"depth_0":{"Without_default":42, "With_default" : {"value" : + 4}}})--")), ::testing::ContainsRegex(R"('/depth_0/With_default/value')")); // Parsing with a non json object literal is not allowed. @@ -185,6 +453,119 @@ TEST(ConfigManagerTest, ParseConfigExceptionTest) { ConfigManagerParseConfigNotJsonObjectLiteralException); } +TEST(ConfigManagerTest, ParseConfigExceptionWithSubManagerTest) { + ad_utility::ConfigManager config{}; + + // Empty sub managers are not allowed. + ad_utility::ConfigManager& m1 = config.addSubManager({"some"s, "manager"s}); + AD_EXPECT_THROW_WITH_MESSAGE( + config.parseConfig(nlohmann::json::parse(R"--({})--")), + ::testing::ContainsRegex(R"('/some/manager')")); + int notUsedInt; + config.addOption("Ignore", "Must not be set. Has default value.", ¬UsedInt, + 41); + AD_EXPECT_THROW_WITH_MESSAGE( + config.parseConfig(nlohmann::json::parse(R"--({})--")), + ::testing::ContainsRegex(R"('/some/manager')")); + + // Add one option with default and one without. + std::vector notUsedVector; + m1.addOption({"depth_0"s, "Without_default"s}, + "Must be set. Has no default value.", ¬UsedInt); + m1.addOption({"depth_0"s, "With_default"s}, + "Must not be set. Has default value.", ¬UsedVector, {40, 41}); + + // Should throw an exception, if we don't set all options, that must be set. + ASSERT_THROW(config.parseConfig(nlohmann::json::parse(R"--({})--")), + ad_utility::ConfigOptionWasntSetException); + + // Should throw an exception, if we try set an option, that isn't there. + AD_EXPECT_THROW_WITH_MESSAGE( + config.parseConfig(nlohmann::json::parse( + R"--({"some":{ "manager": {"depth_0":{"Without_default":42, + "with_default" : [39]}}}})--")), + ::testing::ContainsRegex(R"('/some/manager/depth_0/with_default')")); + AD_EXPECT_THROW_WITH_MESSAGE( + config.parseConfig(nlohmann::json::parse( + R"--({"some":{ "manager": {"depth_0":{"Without_default":42, + "test_string" : "test"}}}})--")), + ::testing::ContainsRegex(R"('/some/manager/depth_0/test_string')")); + + /* + Should throw an exception, if we try set an option with a value, that we + already know, can't be valid, regardless of the actual internal type of the + configuration option. That is, it's neither an array, nor a primitive. + */ + AD_EXPECT_THROW_WITH_MESSAGE( + config.parseConfig(nlohmann::json::parse( + R"--({"some":{ "manager": {"depth_0":{"Without_default":42, + "With_default" : {"value" : 4}}}}})--")), + ::testing::ContainsRegex( + R"('/some/manager/depth_0/With_default/value')")); + + // Repeat all those tests, but with a second sub manager added to the first + // one. + ad_utility::ConfigManager config2{}; + + // Empty sub managers are not allowed. + ad_utility::ConfigManager& config2m1 = + config2.addSubManager({"some"s, "manager"s}); + ad_utility::ConfigManager& config2m2 = + config2m1.addSubManager({"some"s, "manager"s}); + AD_EXPECT_THROW_WITH_MESSAGE( + config2.parseConfig(nlohmann::json::parse(R"--({})--")), + ::testing::ContainsRegex(R"('/some/manager/some/manager')")); + config2.addOption("Ignore", "Must not be set. Has default value.", + ¬UsedInt, 41); + AD_EXPECT_THROW_WITH_MESSAGE( + config2.parseConfig(nlohmann::json::parse(R"--({})--")), + ::testing::ContainsRegex(R"('/some/manager/some/manager')")); + config2m1.addOption("Ignore", "Must not be set. Has default value.", + ¬UsedInt, 41); + AD_EXPECT_THROW_WITH_MESSAGE( + config2.parseConfig(nlohmann::json::parse(R"--({})--")), + ::testing::ContainsRegex(R"('/some/manager/some/manager')")); + + // Add one option with default and one without. + config2m2.addOption({"depth_0"s, "Without_default"s}, + "Must be set. Has no default value.", ¬UsedInt); + config2m2.addOption({"depth_0"s, "With_default"s}, + "Must not be set. Has default value.", ¬UsedVector, + {40, 41}); + + // Should throw an exception, if we don't set all options, that must be set. + ASSERT_THROW(config2.parseConfig(nlohmann::json::parse(R"--({})--")), + ad_utility::ConfigOptionWasntSetException); + + // Should throw an exception, if we try set an option, that isn't there. + AD_EXPECT_THROW_WITH_MESSAGE( + config2.parseConfig(nlohmann::json::parse( + R"--({"some":{ "manager": {"some":{ "manager": + {"depth_0":{"Without_default":42, "with_default" : [39]}}}}}})--")), + ::testing::ContainsRegex( + R"('/some/manager/some/manager/depth_0/with_default')")); + AD_EXPECT_THROW_WITH_MESSAGE( + config2.parseConfig(nlohmann::json::parse( + R"--({"some":{ "manager": {"some":{ "manager": + {"depth_0":{"Without_default":42, "test_string" : + "test"}}}}}})--")), + ::testing::ContainsRegex( + R"('/some/manager/some/manager/depth_0/test_string')")); + + /* + Should throw an exception, if we try set an option with a value, that we + already know, can't be valid, regardless of the actual internal type of the + configuration option. That is, it's neither an array, nor a primitive. + */ + AD_EXPECT_THROW_WITH_MESSAGE( + config2.parseConfig(nlohmann::json::parse( + R"--({"some":{ "manager": {"some":{ "manager": + {"depth_0":{"Without_default":42, "With_default" : {"value" : + 4}}}}}}})--")), + ::testing::ContainsRegex( + R"('/some/manager/some/manager/depth_0/With_default/value')")); +} + TEST(ConfigManagerTest, ParseShortHandTest) { ad_utility::ConfigManager config{}; @@ -319,5 +700,24 @@ TEST(ConfigManagerTest, PrintConfigurationDocExistence) { config.addOption("WithoutDefault", "", ¬Used); ASSERT_NO_THROW(config.printConfigurationDoc(false)); ASSERT_NO_THROW(config.printConfigurationDoc(true)); + + ad_utility::ConfigManager& subMan = + config.addSubManager({"Just"s, "some"s, "sub-manager"}); + subMan.addOption("WithDefault", "", ¬Used, 42); + subMan.addOption("WithoutDefault", "", ¬Used); + ASSERT_NO_THROW(config.printConfigurationDoc(false)); + ASSERT_NO_THROW(config.printConfigurationDoc(true)); + + // Printing with an empty sub manager should never be possible. + subMan.addSubManager({"Just"s, "some"s, "other"s, "sub-manager"}); + AD_EXPECT_THROW_WITH_MESSAGE( + config.printConfigurationDoc(false), + ::testing::ContainsRegex( + R"('/Just/some/sub-manager/Just/some/other/sub-manager')")); + AD_EXPECT_THROW_WITH_MESSAGE( + config.printConfigurationDoc(true), + ::testing::ContainsRegex( + R"('/Just/some/sub-manager/Just/some/other/sub-manager')")); } + } // namespace ad_utility From 1ec1d13e61c4afacdaaff07e8c430eb3a8ded1d3 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 28 Jul 2023 11:04:12 +0200 Subject: [PATCH 29/63] Cleaned up code formatting. --- src/util/ConfigManager/ConfigExceptions.cpp | 4 ++-- src/util/ConfigManager/ConfigManager.cpp | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/util/ConfigManager/ConfigExceptions.cpp b/src/util/ConfigManager/ConfigExceptions.cpp index 70ce2436b1..185ef601dd 100644 --- a/src/util/ConfigManager/ConfigExceptions.cpp +++ b/src/util/ConfigManager/ConfigExceptions.cpp @@ -2,13 +2,13 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (June of 2023, schlegea@informatik.uni-freiburg.de) +#include "util/ConfigManager/ConfigExceptions.h" + #include #include #include -#include "util/ConfigManager/ConfigExceptions.h" - namespace ad_utility { //_____________________________________________________________________________ std::string& ExceptionWithMessage::getMessage() { return message_; } diff --git a/src/util/ConfigManager/ConfigManager.cpp b/src/util/ConfigManager/ConfigManager.cpp index 8e9fb00540..0fbd9a0d1f 100644 --- a/src/util/ConfigManager/ConfigManager.cpp +++ b/src/util/ConfigManager/ConfigManager.cpp @@ -2,6 +2,8 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (March of 2023, schlegea@informatik.uni-freiburg.de) +#include "util/ConfigManager/ConfigManager.h" + #include #include #include @@ -22,7 +24,6 @@ #include "util/Algorithm.h" #include "util/ConfigManager/ConfigExceptions.h" -#include "util/ConfigManager/ConfigManager.h" #include "util/ConfigManager/ConfigOption.h" #include "util/ConfigManager/ConfigShorthandVisitor.h" #include "util/ConfigManager/ConfigUtil.h" From eaecacce58ecdf0424ab77ee976011c6c23320d9 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 28 Jul 2023 13:53:53 +0200 Subject: [PATCH 30/63] src/index/IndexImpl: No longer write to jsonConfiguration in readConfiguration. --- src/index/IndexImpl.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 2cf55e0758..d6403607b1 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,8 +4,6 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) -#include "./IndexImpl.h" - #include #include #include @@ -13,6 +11,7 @@ #include #include +#include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "index/ConstantsIndexBuilding.h" @@ -845,10 +844,9 @@ void IndexImpl::readConfiguration() { // One of the options can't be done with `ConfigManager`. ad_utility::makeIfstream(onDiskBase_ + CONFIGURATION_FILE) >> configurationJson_; - config.parseConfig(fileToJson(onDiskBase_ + CONFIGURATION_FILE)); + config.parseConfig(configurationJson_); if (!gitHash.empty()) { - configurationJson_["git-hash"] = gitHash; LOG(INFO) << "The git hash used to build this index was " << gitHash.substr(0, 6) << std::endl; } else { @@ -886,7 +884,6 @@ void IndexImpl::readConfiguration() { } if (prefixesOption.wasSetAtRuntime()) { - configurationJson_["prefixes"] = boolPrefixes; if (boolPrefixes) { vector prefixes; auto prefixFile = ad_utility::makeIfstream(onDiskBase_ + PREFIX_FILE); @@ -900,27 +897,16 @@ void IndexImpl::readConfiguration() { } vocab_.initializeExternalizePrefixes(prefixesExternal); - configurationJson_["prefixes-external"] = prefixesExternal; - configurationJson_["locale"]["language"] = lang; - configurationJson_["locale"]["country"] = country; - configurationJson_["locale"]["ignore-punctuation"] = ignorePunctuation; vocab_.setLocale(lang, country, ignorePunctuation); textVocab_.setLocale(lang, country, ignorePunctuation); vocab_.initializeInternalizedLangs(languagesInternal); - configurationJson_["languages-internal"] = languagesInternal; if (!hasAllPermutations) { - configurationJson_["has-all-permutations"] = false; // If the permutations simply don't exist, then we can never load them. loadAllPermutations_ = false; } - - configurationJson_["num-predicates-normal"] = numPredicatesNormal_; - configurationJson_["num-subjects-normal"] = numSubjectsNormal_; - configurationJson_["num-objects-normal"] = numObjectsNormal_; - configurationJson_["num-triples-normal"] = numTriplesNormal_; } // ___________________________________________________________________________ From e87e61c5133955df3babf81fe286dfaaa7aedf04 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 28 Jul 2023 14:07:21 +0200 Subject: [PATCH 31/63] src/index/IndexImpl: Improved setting of configurationJson_ in readConfiguration via helper function. --- src/index/IndexImpl.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index d6403607b1..be8fa2f1c8 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -842,8 +842,7 @@ void IndexImpl::readConfiguration() { &indexFormatVersionDate); // One of the options can't be done with `ConfigManager`. - ad_utility::makeIfstream(onDiskBase_ + CONFIGURATION_FILE) >> - configurationJson_; + configurationJson_ = fileToJson(onDiskBase_ + CONFIGURATION_FILE); config.parseConfig(configurationJson_); if (!gitHash.empty()) { From 5a2fbc9fac23d28541043e9987ffb46c75b273f3 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 28 Jul 2023 16:08:25 +0200 Subject: [PATCH 32/63] src/index/IndexImpl: Got rid of uneeded wasSetAtRuntime()-check. --- src/index/IndexImpl.cpp | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index be8fa2f1c8..b2f87f5081 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -796,8 +796,7 @@ void IndexImpl::readConfiguration() { // TODO Write a description. bool boolPrefixes; - const ad_utility::ConfigOption& prefixesOption = - config.addOption("prefixes", "", &boolPrefixes, false); + config.addOption("prefixes", "", &boolPrefixes, false); // TODO Write a description. bool hasAllPermutations; @@ -841,7 +840,6 @@ void IndexImpl::readConfiguration() { "The date of the last breaking change of the index format.", &indexFormatVersionDate); - // One of the options can't be done with `ConfigManager`. configurationJson_ = fileToJson(onDiskBase_ + CONFIGURATION_FILE); config.parseConfig(configurationJson_); @@ -882,17 +880,15 @@ void IndexImpl::readConfiguration() { "Incompatible index format, see log message for details"}; } - if (prefixesOption.wasSetAtRuntime()) { - if (boolPrefixes) { - vector prefixes; - auto prefixFile = ad_utility::makeIfstream(onDiskBase_ + PREFIX_FILE); - for (string prefix; std::getline(prefixFile, prefix);) { - prefixes.emplace_back(std::move(prefix)); - } - vocab_.buildCodebookForPrefixCompression(prefixes); - } else { - vocab_.buildCodebookForPrefixCompression(std::vector()); + if (boolPrefixes) { + vector prefixes; + auto prefixFile = ad_utility::makeIfstream(onDiskBase_ + PREFIX_FILE); + for (string prefix; std::getline(prefixFile, prefix);) { + prefixes.emplace_back(std::move(prefix)); } + vocab_.buildCodebookForPrefixCompression(prefixes); + } else { + vocab_.buildCodebookForPrefixCompression(std::vector()); } vocab_.initializeExternalizePrefixes(prefixesExternal); From da5584a39ddb10994584a9254e2ddfb6d25fe810 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 28 Jul 2023 16:14:30 +0200 Subject: [PATCH 33/63] src/index/IndexImpl: Local config manager now directly writes to numTriplesPerBatch_ and parserBatchSize_. --- src/index/IndexImpl.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index b2f87f5081..734a91ce92 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -982,13 +982,11 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { useParallelParser_); // TODO Write a description. - size_t numTriplesPerBatch; - config.addOption("num-triples-per-batch", "", &numTriplesPerBatch, + config.addOption("num-triples-per-batch", "", &numTriplesPerBatch_, static_cast(NUM_TRIPLES_PER_PARTIAL_VOCAB)); // TODO Write a description. - size_t parserBatchSize; - config.addOption("parser-batch-size", "", &parserBatchSize, + config.addOption("parser-batch-size", "", &parserBatchSize_, PARSER_BATCH_SIZE); // TODO Write a description. @@ -1052,10 +1050,6 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { LOG(INFO) << WARNING_PARALLEL_PARSING << std::endl; } - numTriplesPerBatch_ = numTriplesPerBatch; - - parserBatchSize_ = parserBatchSize; - std::string overflowingIntegersThrow = "overflowing-integers-throw"; std::string overflowingIntegersBecomeDoubles = "overflowing-integers-become-doubles"; From a022ccb992dc9f8bf283c2ce626972dbd9d6579e Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 28 Jul 2023 16:42:15 +0200 Subject: [PATCH 34/63] src/index/IndexImpl: Index version will now be checked before parsing. --- src/index/IndexImpl.cpp | 85 ++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 734a91ce92..02d50a98fd 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -828,19 +828,52 @@ void IndexImpl::readConfiguration() { config.addOption("num-objects-normal", "", &numObjectsNormal_); config.addOption("num-triples-normal", "", &numTriplesNormal_); - // TODO Make this cleaner, than just catching all the fields of the object. - size_t indexFormatVersionPullRequestNumber; - config.addOption( - {"index-format-version"s, "pull-request-number"s}, - "The number of the pull request that changed the index format most " - "recently.", - &indexFormatVersionPullRequestNumber); - std::string indexFormatVersionDate; - config.addOption({"index-format-version"s, "date"s}, - "The date of the last breaking change of the index format.", - &indexFormatVersionDate); - configurationJson_ = fileToJson(onDiskBase_ + CONFIGURATION_FILE); + + /* + Because an out of date index format version can cause the parsing for + configuration options to fail, we have to manually check it before parsing. + + For example: Old configuration option could have been deleted. Trying to set + those, would cause an error, before we could actually parse the index format + version. + */ + if (configurationJson_.find("index-format-version") != + configurationJson_.end()) { + auto indexFormatVersion = static_cast( + configurationJson_["index-format-version"]); + const auto& currentVersion = qlever::indexFormatVersion; + if (indexFormatVersion != currentVersion) { + if (indexFormatVersion.date_.toBits() > currentVersion.date_.toBits()) { + LOG(ERROR) << "The version of QLever you are using is too old for this " + "index. Please use a version of QLever that is " + "compatible with this index" + " (PR = " + << indexFormatVersion.prNumber_ << ", Date = " + << indexFormatVersion.date_.toStringAndType().first << ")." + << std::endl; + } else { + LOG(ERROR) << "The index is too old for this version of QLever. " + "We recommend that you rebuild the index and start the " + "server with the current master. Alternatively start the " + "engine with a version of QLever that is compatible with " + "this index (PR = " + << indexFormatVersion.prNumber_ << ", Date = " + << indexFormatVersion.date_.toStringAndType().first << ")." + << std::endl; + } + throw std::runtime_error{ + "Incompatible index format, see log message for details"}; + } + } else { + LOG(ERROR) << "This index was built before versioning was introduced for " + "QLever's index format. Please rebuild your index using the " + "current version of QLever." + << std::endl; + throw std::runtime_error{ + "Incompatible index format, see log message for details"}; + } + config.parseConfig(configurationJson_); if (!gitHash.empty()) { @@ -852,34 +885,6 @@ void IndexImpl::readConfiguration() { << std::endl; } - // Is the index format version up to date? - auto indexFormatVersion = qlever::IndexFormatVersion{ - indexFormatVersionPullRequestNumber, - DateOrLargeYear::parseXsdDate(indexFormatVersionDate)}; - const auto& currentVersion = qlever::indexFormatVersion; - if (indexFormatVersion != currentVersion) { - if (indexFormatVersion.date_.toBits() > currentVersion.date_.toBits()) { - LOG(ERROR) << "The version of QLever you are using is too old for this " - "index. Please use a version of QLever that is " - "compatible with this index" - " (PR = " - << indexFormatVersion.prNumber_ << ", Date = " - << indexFormatVersion.date_.toStringAndType().first << ")." - << std::endl; - } else { - LOG(ERROR) << "The index is too old for this version of QLever. " - "We recommend that you rebuild the index and start the " - "server with the current master. Alternatively start the " - "engine with a version of QLever that is compatible with " - "this index (PR = " - << indexFormatVersion.prNumber_ << ", Date = " - << indexFormatVersion.date_.toStringAndType().first << ")." - << std::endl; - } - throw std::runtime_error{ - "Incompatible index format, see log message for details"}; - } - if (boolPrefixes) { vector prefixes; auto prefixFile = ad_utility::makeIfstream(onDiskBase_ + PREFIX_FILE); From de0ad8b5520092004c96ec1a857f6750bdff830d Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 28 Jul 2023 17:08:30 +0200 Subject: [PATCH 35/63] src/index/IndexImpl: Fixed mistake, where the values needed for the index format version weren't registered in the config manager. --- src/index/IndexImpl.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 02d50a98fd..2116f1ffeb 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -828,6 +828,18 @@ void IndexImpl::readConfiguration() { config.addOption("num-objects-normal", "", &numObjectsNormal_); config.addOption("num-triples-normal", "", &numTriplesNormal_); + // TODO Make this cleaner, than just catching all the fields of the object. + size_t indexFormatVersionPullRequestNumber; + config.addOption( + {"index-format-version"s, "pull-request-number"s}, + "The number of the pull request that changed the index format most " + "recently.", + &indexFormatVersionPullRequestNumber); + std::string indexFormatVersionDate; + config.addOption({"index-format-version"s, "date"s}, + "The date of the last breaking change of the index format.", + &indexFormatVersionDate); + configurationJson_ = fileToJson(onDiskBase_ + CONFIGURATION_FILE); /* From 5954e06bd7206c353fd0818e7f147897c8689297 Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 28 Jul 2023 17:12:59 +0200 Subject: [PATCH 36/63] Cleaned up code formatting. --- src/index/IndexImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 2116f1ffeb..544c99beac 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,6 +4,8 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) +#include "./IndexImpl.h" + #include #include #include @@ -11,7 +13,6 @@ #include #include -#include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "index/ConstantsIndexBuilding.h" From c631b210cbbaf5330d9b24aaf18ca993b474162c Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 28 Jul 2023 17:18:21 +0200 Subject: [PATCH 37/63] Github was being buggy. From 852723432039a61a40b8ecc36c1d5194ae61fb18 Mon Sep 17 00:00:00 2001 From: Andre Date: Sun, 27 Aug 2023 10:44:34 +0200 Subject: [PATCH 38/63] src/index/IndexImpl: Replaced the manual check for the states of the options under cli argument locale with a validator. --- src/index/IndexImpl.cpp | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 544c99beac..e5525e111b 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,8 +4,6 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) -#include "./IndexImpl.h" - #include #include #include @@ -13,6 +11,7 @@ #include #include +#include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "index/ConstantsIndexBuilding.h" @@ -977,20 +976,33 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { // TODO Write a description. std::string lang; - const ad_utility::ConfigOption& langOption = config.addOption( - {"locale"s, "language"s}, "", &lang, LOCALE_DEFAULT_LANG); + decltype(auto) langOption = config.addOption({"locale"s, "language"s}, "", + &lang, LOCALE_DEFAULT_LANG); // TODO Write a description. std::string country; - const ad_utility::ConfigOption& countryOption = config.addOption( + decltype(auto) countryOption = config.addOption( {"locale"s, "country"s}, "", &country, LOCALE_DEFAULT_COUNTRY); // TODO Write a description. bool ignorePunctuation; - const ad_utility::ConfigOption& ignorePunctuationOption = + decltype(auto) ignorePunctuationOption = config.addOption({"locale"s, "ignore-punctuation"s}, "", &ignorePunctuation, LOCALE_DEFAULT_IGNORE_PUNCTUATION); + // Validator for the entries under `locale`. Either they all must use the + // default value, or all must be set at runtime. + config.addOptionValidator( + [](const ad_utility::ConfigOption& langOpt, + const ad_utility::ConfigOption& countryOpt, + const ad_utility::ConfigOption& ignorePunctuationOpt) { + return langOpt.wasSetAtRuntime() == countryOpt.wasSetAtRuntime() && + countryOpt.wasSetAtRuntime() == + ignorePunctuationOpt.wasSetAtRuntime(); + }, + "All three options under 'locale' must be set, or none of them.", + langOption, countryOption, ignorePunctuationOption); + // TODO Write a description. config.addOption("ascii-prefixes-only", "", &onlyAsciiTurtlePrefixes_, onlyAsciiTurtlePrefixes_); @@ -1030,14 +1042,6 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { * locale setting. */ - if (langOption.wasSetAtRuntime() != countryOption.wasSetAtRuntime() || - countryOption.wasSetAtRuntime() != - ignorePunctuationOption.wasSetAtRuntime()) { - throw std::runtime_error(absl::StrCat( - "All three options under 'locale' must be set, or none of them.", - config.printConfigurationDoc(true))); - } - LOG(INFO) << "You specified \"locale = " << lang << "_" << country << "\" " << "and \"ignore-punctuation = " << ignorePunctuation << "\"" << std::endl; From 8c833b6a45a8ae675e854c636bdf1608d548ca19 Mon Sep 17 00:00:00 2001 From: Andre Date: Sun, 27 Aug 2023 10:57:17 +0200 Subject: [PATCH 39/63] src/index/IndexImpl: Added TODO for possible validator candidate. --- src/index/IndexImpl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index e5525e111b..adf8957af8 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1097,6 +1097,8 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { turtleParserIntegerOverflowBehavior_ = TurtleParserIntegerOverflowBehavior::OverflowingToDouble; } else { + // TODO This can maybe be replaced with a validator, if the logging of the + // information is not needed. AD_CONTRACT_CHECK(std::find(allModes.begin(), allModes.end(), parserIntegerOverflowBehavior) == allModes.end()); From a581945df507c93f1347b6bc24fa8d8356828f3d Mon Sep 17 00:00:00 2001 From: Andre Date: Sun, 27 Aug 2023 10:59:37 +0200 Subject: [PATCH 40/63] Cleaned up code formatting. --- src/index/IndexImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index adf8957af8..83c4b2af55 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,6 +4,8 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) +#include "./IndexImpl.h" + #include #include #include @@ -11,7 +13,6 @@ #include #include -#include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "index/ConstantsIndexBuilding.h" From 1471823cb2e1597b13d0ad04114c8d6ac31761ef Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 1 Sep 2023 10:40:21 +0200 Subject: [PATCH 41/63] src/index/IndexImpl: Added sub manager for easier organization. --- src/index/IndexImpl.cpp | 48 +++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 95bb882155..bf30eb8f11 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,8 +4,6 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) -#include "./IndexImpl.h" - #include #include #include @@ -13,6 +11,7 @@ #include #include +#include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "index/ConstantsIndexBuilding.h" @@ -807,17 +806,18 @@ void IndexImpl::readConfiguration() { std::vector prefixesExternal; config.addOption("prefixes-external", "", &prefixesExternal, {}); + decltype(auto) localeManager = config.addSubManager({"locale"s}); // TODO Write a description. std::string lang; - config.addOption({"locale"s, "language"s}, "", &lang); + localeManager.addOption("language", "", &lang); // TODO Write a description. std::string country; - config.addOption({"locale"s, "country"s}, "", &country); + localeManager.addOption("country", "", &country); // TODO Write a description. bool ignorePunctuation; - config.addOption({"locale"s, "ignore-punctuation"s}, "", &ignorePunctuation); + localeManager.addOption("ignore-punctuation", "", &ignorePunctuation); // TODO Write a description. std::vector languagesInternal; @@ -829,17 +829,22 @@ void IndexImpl::readConfiguration() { config.addOption("num-objects-normal", "", &numObjectsNormal_); config.addOption("num-triples-normal", "", &numTriplesNormal_); - // TODO Make this cleaner, than just catching all the fields of the object. + /* + We check those options manually below, but add the options anyway for + documentation and parsing purpose. (A config manager doesn't allow any + options to be passed, that are not registered within him.) + */ + decltype(auto) indexFormatVersionManager = + config.addSubManager({"index-format-version"s}); size_t indexFormatVersionPullRequestNumber; - config.addOption( - {"index-format-version"s, "pull-request-number"s}, - "The number of the pull request that changed the index format most " - "recently.", - &indexFormatVersionPullRequestNumber); + indexFormatVersionManager.addOption("pull-request-number", + "The number of the pull request that " + "changed the index format most recently.", + &indexFormatVersionPullRequestNumber); std::string indexFormatVersionDate; - config.addOption({"index-format-version"s, "date"s}, - "The date of the last breaking change of the index format.", - &indexFormatVersionDate); + indexFormatVersionManager.addOption( + "date", "The date of the last breaking change of the index format.", + &indexFormatVersionDate); configurationJson_ = fileToJson(onDiskBase_ + CONFIGURATION_FILE); @@ -975,25 +980,26 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { std::vector languagesInternal; config.addOption("languages-internal", "", &languagesInternal, {"en"}); + decltype(auto) localeManager = config.addSubManager({"locale"s}); // TODO Write a description. std::string lang; - decltype(auto) langOption = config.addOption({"locale"s, "language"s}, "", - &lang, LOCALE_DEFAULT_LANG); + decltype(auto) langOption = + localeManager.addOption("language", "", &lang, LOCALE_DEFAULT_LANG); // TODO Write a description. std::string country; - decltype(auto) countryOption = config.addOption( - {"locale"s, "country"s}, "", &country, LOCALE_DEFAULT_COUNTRY); + decltype(auto) countryOption = + localeManager.addOption("country", "", &country, LOCALE_DEFAULT_COUNTRY); // TODO Write a description. bool ignorePunctuation; decltype(auto) ignorePunctuationOption = - config.addOption({"locale"s, "ignore-punctuation"s}, "", - &ignorePunctuation, LOCALE_DEFAULT_IGNORE_PUNCTUATION); + localeManager.addOption("ignore-punctuation", "", &ignorePunctuation, + LOCALE_DEFAULT_IGNORE_PUNCTUATION); // Validator for the entries under `locale`. Either they all must use the // default value, or all must be set at runtime. - config.addOptionValidator( + localeManager.addOptionValidator( [](const ad_utility::ConfigOption& langOpt, const ad_utility::ConfigOption& countryOpt, const ad_utility::ConfigOption& ignorePunctuationOpt) { From 14ed6d5b8dce3166caaa0412cb874ddc4a6ed5be Mon Sep 17 00:00:00 2001 From: Andre Date: Fri, 1 Sep 2023 10:46:33 +0200 Subject: [PATCH 42/63] Cleaned up code formatting. --- src/index/IndexImpl.cpp | 3 ++- src/util/ConfigManager/ConfigExceptions.cpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index bf30eb8f11..61e101acf7 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,6 +4,8 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) +#include "./IndexImpl.h" + #include #include #include @@ -11,7 +13,6 @@ #include #include -#include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "index/ConstantsIndexBuilding.h" diff --git a/src/util/ConfigManager/ConfigExceptions.cpp b/src/util/ConfigManager/ConfigExceptions.cpp index de482076c6..f11dd1527f 100644 --- a/src/util/ConfigManager/ConfigExceptions.cpp +++ b/src/util/ConfigManager/ConfigExceptions.cpp @@ -2,13 +2,13 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (June of 2023, schlegea@informatik.uni-freiburg.de) +#include "util/ConfigManager/ConfigExceptions.h" + #include #include #include -#include "util/ConfigManager/ConfigExceptions.h" - namespace ad_utility { //_____________________________________________________________________________ std::string& ExceptionWithMessage::getMessage() { return message_; } From e367e96d4b308717298abf9985c77495300e3744 Mon Sep 17 00:00:00 2001 From: Andre Date: Tue, 3 Oct 2023 10:31:49 +0200 Subject: [PATCH 43/63] Fixed code formatting. --- src/index/IndexImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 595b599785..3f6808d9a1 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,6 +4,8 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) +#include "./IndexImpl.h" + #include #include #include @@ -11,7 +13,6 @@ #include #include -#include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "index/ConstantsIndexBuilding.h" From ad104adcd8e3a8766c147a1ca77dd47affdee779 Mon Sep 17 00:00:00 2001 From: Andre Date: Mon, 6 Nov 2023 11:58:22 +0100 Subject: [PATCH 44/63] src/index/IndexImpl.cpp: Added description to the validator. --- src/index/IndexImpl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 3f6808d9a1..5f0ce971b1 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1026,6 +1026,7 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { ignorePunctuationOpt.wasSetAtRuntime(); }, "All three options under 'locale' must be set, or none of them.", + "All three options under 'locale' must be set, or none of them.", langOption, countryOption, ignorePunctuationOption); // TODO Write a description. From cb0d2b1c95eddbd5965158eac41b0a9a6a70ed82 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 23 Nov 2023 12:17:51 +0100 Subject: [PATCH 45/63] Added some comments. --- src/index/IndexImpl.cpp | 78 ++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 25 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 12d7f7978a..67899382ee 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -991,30 +991,42 @@ LangtagAndTriple IndexImpl::tripleToInternalRepresentation( void IndexImpl::readIndexBuilderSettingsFromFile() { ad_utility::ConfigManager config{}; - // TODO Write a description. std::vector prefixesExternal; - config.addOption("prefixes-external", "", &prefixesExternal, {}); + config.addOption("prefixes-external", + "Literals or IRIs that start with any of these prefixes " + "will be stored in the external vocabulary. For example " + "`[\"<\"] will externalize all IRIs", + &prefixesExternal, {}); - // TODO Write a description. std::vector languagesInternal; - config.addOption("languages-internal", "", &languagesInternal, {"en"}); - + config.addOption("languages-internal", + "Literals with one of these langauge tag will be stored in " + "the internal vocabulary by default", + &languagesInternal, {"en"}); + + // TODO It would be nice to add a description to this + // submanager directly, e.g. "The locale used for all operations that depend + // on the lexicographical order of strings, e.g. ORDER BY" decltype(auto) localeManager = config.addSubManager({"locale"s}); - // TODO Write a description. + + // Should be self-explanatory with the default value. std::string lang; decltype(auto) langOption = localeManager.addOption("language", "", &lang, LOCALE_DEFAULT_LANG); - // TODO Write a description. + // Should be self-explanatory with the default value. std::string country; decltype(auto) countryOption = localeManager.addOption("country", "", &country, LOCALE_DEFAULT_COUNTRY); - // TODO Write a description. bool ignorePunctuation; - decltype(auto) ignorePunctuationOption = - localeManager.addOption("ignore-punctuation", "", &ignorePunctuation, - LOCALE_DEFAULT_IGNORE_PUNCTUATION); + decltype(auto) ignorePunctuationOption = localeManager.addOption( + "ignore-punctuation", + "If set to true, then punctuation characters will only be considered on " + "the last level of comparisons. This will for example lead to the order " + "\"aa\", \"a.a\", \"ab\" (the first two are basically equal and the dot " + "is only used as a tie break)", + &ignorePunctuation, LOCALE_DEFAULT_IGNORE_PUNCTUATION); // Validator for the entries under `locale`. Either they all must use the // default value, or all must be set at runtime. @@ -1030,20 +1042,36 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { "All three options under 'locale' must be set, or none of them.", langOption, countryOption, ignorePunctuationOption); - // TODO Write a description. - config.addOption("ascii-prefixes-only", "", &onlyAsciiTurtlePrefixes_, - onlyAsciiTurtlePrefixes_); - - // TODO Write a description. - config.addOption("parallel-parsing", "", &useParallelParser_, - useParallelParser_); - - // TODO Write a description. - config.addOption("num-triples-per-batch", "", &numTriplesPerBatch_, - static_cast(NUM_TRIPLES_PER_PARTIAL_VOCAB)); - - // TODO Write a description. - config.addOption("parser-batch-size", "", &parserBatchSize_, + config.addOption( + "ascii-prefixes-only", + "Activate a faster parsing mode that is relaxed in two ways: 1. It " + "doesn't work if certain corner cases of the Turtle specification are " + "used (e.g. certain non-alphanumeric non-ascii characters in prefixes " + "and IRIs). 2. It allows certain patterns that are actually not valid " + "turtle, for example spaces in IRIs. As parsing is not a bottleneck " + "anymore, we recommend setting this to `false` and making sure that the " + "input is valid according to the official RDF Turtle specification", + &onlyAsciiTurtlePrefixes_, onlyAsciiTurtlePrefixes_); + + config.addOption( + "parallel-parsing", + "Enable the parallel parser, which assumes the following properties of " + "the Turtle input: 1. All prefix definitions are at the beginning of the " + "file, 2. All ends of triple blocks (denoted by a dot) are followed by a " + "newline (possibly with other whitespace inbetween), and a dot followed " + "by a newline always denotes the end of a triple block (especially there " + "are no multiline literals). This is true for most reasonably formatted " + "turtle files", + &useParallelParser_, useParallelParser_); + + config.addOption( + "num-triples-per-batch", + "The batch size of the first phase of the index build. Lower values will " + "reduce the RAM consumption of this phase while a too low value might " + "hurt the performance of the index builder", + &numTriplesPerBatch_, static_cast(NUM_TRIPLES_PER_PARTIAL_VOCAB)); + + config.addOption("parser-batch-size", "The internal batch size of the turtle parser. Typically there is no need to change this parameter.", &parserBatchSize_, PARSER_BATCH_SIZE); // TODO Write a description. From 286ebb69409b99f5c0771acdb695326e9030a9cd Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 23 Nov 2023 15:44:47 +0100 Subject: [PATCH 46/63] Added some preliminary descriptions to the config manager in IndexImpl.cpp --- src/index/IndexImpl.cpp | 70 +++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 67899382ee..1eb4515ecb 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1071,14 +1071,35 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { "hurt the performance of the index builder", &numTriplesPerBatch_, static_cast(NUM_TRIPLES_PER_PARTIAL_VOCAB)); - config.addOption("parser-batch-size", "The internal batch size of the turtle parser. Typically there is no need to change this parameter.", &parserBatchSize_, - PARSER_BATCH_SIZE); + config.addOption("parser-batch-size", + "The internal batch size of the turtle parser. Typically " + "there is no need to change this parameter.", + &parserBatchSize_, PARSER_BATCH_SIZE); - // TODO Write a description. std::string parserIntegerOverflowBehavior; - config.addOption("parser-integer-overflow-behavior", "", - &parserIntegerOverflowBehavior, - "overflowing-integers-throw"s); + decltype(auto) overflowOption = config.addOption( + "parser-integer-overflow-behavior", + "QLever stores all integer values with a fixed number of bits. This " + "option configures the behavior when an integer in the turtle input " + "cannot be represented by QLever. Note that this doesn't affect the " + "behavior of overflows during the query processing", + &parserIntegerOverflowBehavior, "overflowing-integers-throw"s); + + using OverflowMap = + ad_utility::HashMap; + const OverflowMap overflowMap = []() -> OverflowMap { + using enum TurtleParserIntegerOverflowBehavior; + return {{"overflowing-integers-throw", Error}, + {"overflowing-integers-become-doubles", OverflowingToDouble}, + {"all-integers-become-doubles", AllToDouble}}; + }(); + config.addValidator( + [&overflowMap](std::string_view input) -> bool { + return overflowMap.contains(input); + }, + "value must be one of " + + ad_utility::lazyStrJoin(std::views::keys(overflowMap), ", "), + "dummy description for the overflow behavior validator", overflowOption); // Set the options. if (!settingsFileName_.empty()) { @@ -1127,41 +1148,8 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { LOG(INFO) << WARNING_PARALLEL_PARSING << std::endl; } - std::string overflowingIntegersThrow = "overflowing-integers-throw"; - std::string overflowingIntegersBecomeDoubles = - "overflowing-integers-become-doubles"; - std::string allIntegersBecomeDoubles = "all-integers-become-doubles"; - std::vector allModes{overflowingIntegersThrow, - overflowingIntegersBecomeDoubles, - allIntegersBecomeDoubles}; - if (parserIntegerOverflowBehavior == overflowingIntegersThrow) { - LOG(INFO) << "Integers that cannot be represented by QLever will throw " - "an exception" - << std::endl; - turtleParserIntegerOverflowBehavior_ = - TurtleParserIntegerOverflowBehavior::Error; - } else if (parserIntegerOverflowBehavior == - overflowingIntegersBecomeDoubles) { - LOG(INFO) << "Integers that cannot be represented by QLever will be " - "converted to doubles" - << std::endl; - turtleParserIntegerOverflowBehavior_ = - TurtleParserIntegerOverflowBehavior::OverflowingToDouble; - } else if (parserIntegerOverflowBehavior == allIntegersBecomeDoubles) { - LOG(INFO) << "All integers will be converted to doubles" << std::endl; - turtleParserIntegerOverflowBehavior_ = - TurtleParserIntegerOverflowBehavior::OverflowingToDouble; - } else { - // TODO This can maybe be replaced with a validator, if the logging of the - // information is not needed. - AD_CONTRACT_CHECK(std::find(allModes.begin(), allModes.end(), - parserIntegerOverflowBehavior) == - allModes.end()); - LOG(ERROR) << "Invalid value for parser-integer-overflow-behavior" - << std::endl; - LOG(INFO) << "The currently supported values are " - << absl::StrJoin(allModes, ",") << std::endl; - } + turtleParserIntegerOverflowBehavior_ = + overflowMap.at(parserIntegerOverflowBehavior); // Logging used configuration options. LOG(INFO) << config.printConfigurationDoc(true); From 046c41f6746e689c9b2b7ff0413424b44bd95ea1 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 7 Dec 2023 10:55:25 +0100 Subject: [PATCH 47/63] Merge in the master, and see for myself what is going on there. --- src/index/IndexImpl.cpp | 9 ++++++--- src/util/ConfigManager/ConfigManager.h | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 6d467df5ea..f9f9a1cec7 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -23,15 +23,14 @@ #include "parser/ParallelParseBuffer.h" #include "util/BatchedPipeline.h" #include "util/CachingMemoryResource.h" -#include "util/CompressionUsingZstd/ZstdWrapper.h" #include "util/ConfigManager/ConfigManager.h" #include "util/ConfigManager/ConfigOption.h" #include "util/Date.h" #include "util/HashMap.h" #include "util/Serializer/FileSerializer.h" #include "util/TupleHelpers.h" -#include "util/json.h" #include "util/TypeTraits.h" +#include "util/json.h" using std::array; using namespace ad_utility::memory_literals; @@ -1117,7 +1116,11 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { overflowMap.at(parserIntegerOverflowBehavior); // Logging used configuration options. - LOG(INFO) << config.printConfigurationDoc(true); + LOG(INFO) + << "Printing the configuration from the settings json file (including " + "implictly defaulted values). For a detailed description of this " + "configuration call `IndexBuilderMain --help`:\n" + << config.printConfigurationDoc(false) << std::endl; } // ___________________________________________________________________________ diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index bbc95facba..76e97859bc 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -204,7 +204,7 @@ class ConfigManager { will stay valid, even after adding more options. */ template DefaultValueType = OptionType> + std::convertible_to DefaultValueType = OptionType> requires ad_utility::isTypeContainedIn ConstConfigOptionProxy addOption( @@ -245,7 +245,7 @@ class ConfigManager { will stay valid, even after adding more options. */ template DefaultValueType = OptionType> + std::convertible_to DefaultValueType = OptionType> requires ad_utility::isTypeContainedIn ConstConfigOptionProxy addOption( From c22c89bcc492ba6a1f39498724bcda5c5d47d2d8 Mon Sep 17 00:00:00 2001 From: Andre Date: Thu, 7 Dec 2023 13:15:27 +0100 Subject: [PATCH 48/63] src/util/ConfigManager/ConfigManager: Wrote two public helper functions for printConfigurationDoc, so that user have more control over the representation. --- src/util/ConfigManager/ConfigManager.cpp | 33 +++++++++++++++---- src/util/ConfigManager/ConfigManager.h | 15 ++++++++- test/ConfigManagerTest.cpp | 19 ++++++++--- .../PrintConfigurationDocComparisonString.h | 21 ++++++------ 4 files changed, 66 insertions(+), 22 deletions(-) diff --git a/src/util/ConfigManager/ConfigManager.cpp b/src/util/ConfigManager/ConfigManager.cpp index 807500de32..4ad5dce618 100644 --- a/src/util/ConfigManager/ConfigManager.cpp +++ b/src/util/ConfigManager/ConfigManager.cpp @@ -774,12 +774,34 @@ auto ConfigManager::getValidatorAssignment() const return assignment; } +// ____________________________________________________________________________ +std::string ConfigManager::printConfigurationDocJson() const { + return generateConfigurationDocJson("").dump(2); +} + +// ____________________________________________________________________________ +std::string ConfigManager::printConfigurationDocDetailedList() const { + /* + This works, because sub managers are not allowed to be empty. (This + invariant is checked by the helper function for walking over the hash map + entries, that is used by the `generateConfigurationDocDetailedList` helper + function.) So, the only way for a valid lack of configuration options to be + true, is on the top level. A.k.a. the object, on which + `printConfigurationDocDetailedList` was called. + */ + if (configurationOptions_.empty()) { + return "No configuration options were defined."; + } + + return generateConfigurationDocDetailedList("", getValidatorAssignment()); +} + // ____________________________________________________________________________ std::string ConfigManager::printConfigurationDoc(bool detailed) const { /* This works, because sub managers are not allowed to be empty. (This invariant is checked by the helper function for walking over the hash map - entries, that is used by the `generateConfigurationDoc...` helper + entries, that is used by the `printConfigurationDoc...` helper functions.) So, the only way for a valid lack of configuration options to be true, is on the top level. A.k.a. the object, on which `printConfigurationDoc` was called. @@ -789,16 +811,15 @@ std::string ConfigManager::printConfigurationDoc(bool detailed) const { } // We always print the configuration doc json. - const std::string& configurationDocJsonString{absl::StrCat( - "Configuration:\n", generateConfigurationDocJson("").dump(2))}; + const std::string& configurationDocJsonString{ + absl::StrCat("Configuration:\n", printConfigurationDocJson())}; if (!detailed) { return configurationDocJsonString; } - return absl::StrCat( - configurationDocJsonString, "\n\n", - generateConfigurationDocDetailedList("", getValidatorAssignment())); + return absl::StrCat(configurationDocJsonString, "\n\n", + printConfigurationDocDetailedList()); } // ____________________________________________________________________________ diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index 76e97859bc..0776622000 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -292,6 +292,19 @@ class ConfigManager { */ static nlohmann::json parseShortHand(const std::string& shortHandString); + /* + @brief Generate a string containing a json representation of the current + config manager configuration. + */ + std::string printConfigurationDocJson() const; + + /* + @brief Create a detailed list about the configuration options, with their + types, values, default values, etc. shown and organized by the sub managers, + that hold them. Validators are also printed. + */ + std::string printConfigurationDocDetailedList() const; + /* @brief Returns a string containing a json configuration and, optionally, the string representations of all added configuration options, togehter with the @@ -779,7 +792,7 @@ class ConfigManager { /* @brief Create a detailed list about the configuration options, with their - types, values, default values, etc. shown and organized by the sub managers, + types, values, default values, etc. shown and organized by the sub managers, that hold them. Validator invariant descriptions will be printed according to `ConfigurationDocValidatorAssignment`. diff --git a/test/ConfigManagerTest.cpp b/test/ConfigManagerTest.cpp index a14eb9eb3d..c9c4ccf82b 100644 --- a/test/ConfigManagerTest.cpp +++ b/test/ConfigManagerTest.cpp @@ -2682,9 +2682,20 @@ TEST(ConfigManagerTest, PrintConfigurationDocComparison) { doubleArgumentValidatorSecondArgument); // Finally, check, if the expected and actual output is the same. - assertStringEqual(exampleConfigManagerExpectedNotDetailedString, - topManager.printConfigurationDoc(false)); - assertStringEqual(exampleConfigManagerExpectedDetailedString, - topManager.printConfigurationDoc(true)); + assertStringEqual(exampleConfigManagerExpectedprintConfigurationDocJsonString, + topManager.printConfigurationDocJson()); + assertStringEqual( + exampleConfigManagerExpectedprintConfigurationDocDetailedListString, + topManager.printConfigurationDocDetailedList()); + assertStringEqual( + absl::StrCat("Configuration:\n", + exampleConfigManagerExpectedprintConfigurationDocJsonString), + topManager.printConfigurationDoc(false)); + assertStringEqual( + absl::StrCat( + "Configuration:\n", + exampleConfigManagerExpectedprintConfigurationDocJsonString, "\n\n", + exampleConfigManagerExpectedprintConfigurationDocDetailedListString), + topManager.printConfigurationDoc(true)); } } // namespace ad_utility diff --git a/test/util/PrintConfigurationDocComparisonString.h b/test/util/PrintConfigurationDocComparisonString.h index f669706e57..67a75591b3 100644 --- a/test/util/PrintConfigurationDocComparisonString.h +++ b/test/util/PrintConfigurationDocComparisonString.h @@ -2,6 +2,9 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (December of 2023, // schlegea@informatik.uni-freiburg.de) +#include + +#include #pragma once @@ -15,14 +18,11 @@ be! */ // The strings to compare against. -#include - -#include constexpr std::string_view emptyConfigManagerExpectedString = "No configuration options were defined."; -constexpr std::string_view exampleConfigManagerExpectedNotDetailedString = - R"--(Configuration: -{ +constexpr std::string_view + exampleConfigManagerExpectedprintConfigurationDocJsonString = + R"--({ "booleanWithoutDescriptionWithoutDefaultValueWithoutValidator": "[must be specified]", "booleanWithoutDescriptionWithoutDefaultValueWithValidator": "[must be specified]", "booleanWithoutDescriptionWithDefaultValueWithKeepDefaultValueWithoutValidator": true, @@ -509,10 +509,9 @@ constexpr std::string_view exampleConfigManagerExpectedNotDetailedString = } })--"; -inline const std::string& exampleConfigManagerExpectedDetailedString = - absl::StrCat(exampleConfigManagerExpectedNotDetailedString, R"--( - -Option 'booleanWithoutDescriptionWithoutDefaultValueWithoutValidator' [boolean] +constexpr std::string_view + exampleConfigManagerExpectedprintConfigurationDocDetailedListString = + R"--(Option 'booleanWithoutDescriptionWithoutDefaultValueWithoutValidator' [boolean] Value: [must be specified] Option 'booleanWithoutDescriptionWithoutDefaultValueWithValidator' [boolean] @@ -1680,4 +1679,4 @@ Sub manager 'subManager' Value: [must be specified] Required invariants: - - Validator for configuration options doubleArgumentValidatorFirstArgument, doubleArgumentValidatorSecondArgument.)--"); + - Validator for configuration options doubleArgumentValidatorFirstArgument, doubleArgumentValidatorSecondArgument.)--"; From 3ed1eabe68b744ff3ecc0a4dd0dbcb2d464ec7a5 Mon Sep 17 00:00:00 2001 From: Andre Date: Thu, 7 Dec 2023 17:08:22 +0100 Subject: [PATCH 49/63] src/index/IndexImpl::readIndexBuilderSettingsFromFile: Moved the creation of the config manager into its own file. --- src/index/IndexImpl.cpp | 96 ++++++++++++++++++++++------------------- src/index/IndexImpl.h | 37 ++++++++++++++++ 2 files changed, 88 insertions(+), 45 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 61629b8094..1e52cd376e 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,15 +4,15 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) -#include "./IndexImpl.h" - #include #include #include #include +#include #include #include +#include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "index/ConstantsIndexBuilding.h" @@ -29,7 +29,6 @@ #include "util/HashMap.h" #include "util/Serializer/FileSerializer.h" #include "util/TupleHelpers.h" -#include "util/json.h" #include "util/TypeTraits.h" #include "util/json.h" @@ -953,21 +952,22 @@ LangtagAndTriple IndexImpl::tripleToInternalRepresentation( } // ___________________________________________________________________________ -void IndexImpl::readIndexBuilderSettingsFromFile() { +std::pair> +IndexImpl::generateConfigManagerForIndexBuilderSettings() { + auto variables{std::make_unique()}; ad_utility::ConfigManager config{}; - std::vector prefixesExternal; config.addOption("prefixes-external", "Literals or IRIs that start with any of these prefixes " "will be stored in the external vocabulary. For example " "`[\"<\"] will externalize all IRIs", - &prefixesExternal, {}); + &variables->prefixesExternal_, {}); - std::vector languagesInternal; config.addOption("languages-internal", "Literals with one of these langauge tag will be stored in " "the internal vocabulary by default", - &languagesInternal, {"en"}); + &variables->languagesInternal_, {"en"}); // TODO It would be nice to add a description to this // submanager directly, e.g. "The locale used for all operations that depend @@ -975,23 +975,20 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { decltype(auto) localeManager = config.addSubManager({"locale"s}); // Should be self-explanatory with the default value. - std::string lang; - decltype(auto) langOption = - localeManager.addOption("language", "", &lang, LOCALE_DEFAULT_LANG); + decltype(auto) langOption = localeManager.addOption( + "language", "", &variables->localLang_, LOCALE_DEFAULT_LANG); // Should be self-explanatory with the default value. - std::string country; - decltype(auto) countryOption = - localeManager.addOption("country", "", &country, LOCALE_DEFAULT_COUNTRY); + decltype(auto) countryOption = localeManager.addOption( + "country", "", &variables->localCountry_, LOCALE_DEFAULT_COUNTRY); - bool ignorePunctuation; decltype(auto) ignorePunctuationOption = localeManager.addOption( "ignore-punctuation", "If set to true, then punctuation characters will only be considered on " "the last level of comparisons. This will for example lead to the order " "\"aa\", \"a.a\", \"ab\" (the first two are basically equal and the dot " "is only used as a tie break)", - &ignorePunctuation, LOCALE_DEFAULT_IGNORE_PUNCTUATION); + &variables->localIgnorePunctuation_, LOCALE_DEFAULT_IGNORE_PUNCTUATION); // Validator for the entries under `locale`. Either they all must use the // default value, or all must be set at runtime. @@ -1041,31 +1038,33 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { "there is no need to change this parameter.", &parserBatchSize_, PARSER_BATCH_SIZE); - std::string parserIntegerOverflowBehavior; decltype(auto) overflowOption = config.addOption( "parser-integer-overflow-behavior", "QLever stores all integer values with a fixed number of bits. This " "option configures the behavior when an integer in the turtle input " "cannot be represented by QLever. Note that this doesn't affect the " "behavior of overflows during the query processing", - &parserIntegerOverflowBehavior, "overflowing-integers-throw"s); - - using OverflowMap = - ad_utility::HashMap; - const OverflowMap overflowMap = []() -> OverflowMap { - using enum TurtleParserIntegerOverflowBehavior; - return {{"overflowing-integers-throw", Error}, - {"overflowing-integers-become-doubles", OverflowingToDouble}, - {"all-integers-become-doubles", AllToDouble}}; - }(); + &variables->parserIntegerOverflowBehavior_, + "overflowing-integers-throw"s); + config.addValidator( - [&overflowMap](std::string_view input) -> bool { - return overflowMap.contains(input); + [](std::string_view input) -> bool { + return turtleParserIntegerOverflowBehaviorMap_.contains(input); }, "value must be one of " + - ad_utility::lazyStrJoin(std::views::keys(overflowMap), ", "), + ad_utility::lazyStrJoin( + std::views::keys(turtleParserIntegerOverflowBehaviorMap_), ", "), "dummy description for the overflow behavior validator", overflowOption); + return std::make_pair(std::move(config), std::move(variables)); +} + +// ___________________________________________________________________________ +void IndexImpl::readIndexBuilderSettingsFromFile() { + auto [config, + configVariablesPointer]{generateConfigManagerForIndexBuilderSettings()}; + auto& configVariables{*configVariablesPointer}; + // Set the options. if (!settingsFileName_.empty()) { config.parseConfig(fileToJson(settingsFileName_)); @@ -1073,8 +1072,8 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { config.parseConfig(json(json::value_t::object)); } - vocab_.initializeExternalizePrefixes(prefixesExternal); - configurationJson_["prefixes-external"] = prefixesExternal; + vocab_.initializeExternalizePrefixes(configVariables.prefixesExternal_); + configurationJson_["prefixes-external"] = configVariables.prefixesExternal_; /** * ICU uses two separate arguments for each Locale, the language ("en" or @@ -1083,11 +1082,13 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { * locale setting. */ - LOG(INFO) << "You specified \"locale = " << lang << "_" << country << "\" " - << "and \"ignore-punctuation = " << ignorePunctuation << "\"" - << std::endl; + LOG(INFO) << "You specified \"locale = " << configVariables.localLang_ << "_" + << configVariables.localCountry_ << "\" " + << "and \"ignore-punctuation = " + << configVariables.localIgnorePunctuation_ << "\"" << std::endl; - if (lang != LOCALE_DEFAULT_LANG || country != LOCALE_DEFAULT_COUNTRY) { + if (configVariables.localLang_ != LOCALE_DEFAULT_LANG || + configVariables.localCountry_ != LOCALE_DEFAULT_COUNTRY) { LOG(WARN) << "You are using Locale settings that differ from the default " "language or country.\n\t" << "This should work but is untested by the QLever team. If " @@ -1096,14 +1097,18 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { "filing a bug report. Also note that changing the\n\t" << "locale requires to completely rebuild the index\n"; } - vocab_.setLocale(lang, country, ignorePunctuation); - textVocab_.setLocale(lang, country, ignorePunctuation); - configurationJson_["locale"]["language"] = lang; - configurationJson_["locale"]["country"] = country; - configurationJson_["locale"]["ignore-punctuation"] = ignorePunctuation; - - vocab_.initializeInternalizedLangs(languagesInternal); - configurationJson_["languages-internal"] = languagesInternal; + vocab_.setLocale(configVariables.localLang_, configVariables.localCountry_, + configVariables.localIgnorePunctuation_); + textVocab_.setLocale(configVariables.localLang_, + configVariables.localCountry_, + configVariables.localIgnorePunctuation_); + configurationJson_["locale"]["language"] = configVariables.localLang_; + configurationJson_["locale"]["country"] = configVariables.localCountry_; + configurationJson_["locale"]["ignore-punctuation"] = + configVariables.localIgnorePunctuation_; + + vocab_.initializeInternalizedLangs(configVariables.languagesInternal_); + configurationJson_["languages-internal"] = configVariables.languagesInternal_; if (onlyAsciiTurtlePrefixes_) { LOG(INFO) << WARNING_ASCII_ONLY_PREFIXES << std::endl; @@ -1114,7 +1119,8 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { } turtleParserIntegerOverflowBehavior_ = - overflowMap.at(parserIntegerOverflowBehavior); + turtleParserIntegerOverflowBehaviorMap_.at( + configVariables.parserIntegerOverflowBehavior_); // Logging used configuration options. LOG(INFO) diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 94761b36f3..6b93c3f466 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -43,6 +43,7 @@ #include "engine/idTable/CompressedExternalIdTable.h" #include "util/CancellationHandle.h" +#include "util/ConfigManager/ConfigManager.h" #include "util/MemorySize/MemorySize.h" using ad_utility::BufferedVector; @@ -631,6 +632,42 @@ class IndexImpl { void writeConfiguration() const; void readConfiguration(); + // Assigns the entries of the enum `TurtleParserIntegerOverflowBehavior` to + // their string representation. + inline static const ad_utility::HashMap + turtleParserIntegerOverflowBehaviorMap_{ + {"overflowing-integers-throw", + TurtleParserIntegerOverflowBehavior::Error}, + {"overflowing-integers-become-doubles", + TurtleParserIntegerOverflowBehavior::OverflowingToDouble}, + {"all-integers-become-doubles", + TurtleParserIntegerOverflowBehavior::AllToDouble}}; + + /* + Some of the variables, that will be set by the `ConfigManager` generated by + `generateConfigManagerForIndexBuilderSettings`, after its parse function was + called. The remaining variables, it writes to, are member variables of + `IndexImpl`. + */ + struct IndexBuilderSettingsVariables { + std::vector prefixesExternal_; + std::vector languagesInternal_; + std::string localLang_; + std::string localCountry_; + bool localIgnorePunctuation_; + std::string parserIntegerOverflowBehavior_; + }; + + /* + @brief Generate the `ConfigManager`, and some of the variables it writes to, + that describes the index builder settings. The remaining variables, it writes + to, are member variables of `IndexImpl`. + */ + std::pair> + generateConfigManagerForIndexBuilderSettings(); + // initialize the index-build-time settings for the vocabulary void readIndexBuilderSettingsFromFile(); From 158422ac79e0c54edac8c4e63efbb1b64c391575 Mon Sep 17 00:00:00 2001 From: Andre Date: Thu, 7 Dec 2023 18:01:32 +0100 Subject: [PATCH 50/63] src/index/IndexBuilderMain: The '-h' cli parameter now also prints the configuration options for the index builder. --- src/index/Index.cpp | 5 +++++ src/index/Index.h | 5 +++++ src/index/IndexBuilderMain.cpp | 3 ++- src/index/IndexImpl.cpp | 9 ++++++++- src/index/IndexImpl.h | 13 ++++++++++--- 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/index/Index.cpp b/src/index/Index.cpp index 29db81f326..068680d75c 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -325,3 +325,8 @@ size_t Index::getResultSizeOfScan(const TripleComponent& col0String, const Permutation::Enum& permutation) const { return pimpl_->getResultSizeOfScan(col0String, col1String, permutation); } + +// ____________________________________________________________________________ +std::string Index::getConfigurationDocForIndexBuilder() { + return pimpl_->getConfigurationDocForIndexBuilder(); +} diff --git a/src/index/Index.h b/src/index/Index.h index 58d13d5eab..69ab519836 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -281,4 +281,9 @@ class Index { // requires including the rather expensive `IndexImpl.h` header IndexImpl& getImpl() { return *pimpl_; } [[nodiscard]] const IndexImpl& getImpl() const { return *pimpl_; } + + /* + @brief Print the detailed documentation of the options for the index builder. + */ + std::string getConfigurationDocForIndexBuilder(); }; diff --git a/src/index/IndexBuilderMain.cpp b/src/index/IndexBuilderMain.cpp index d7ad6364c6..803952a7b1 100644 --- a/src/index/IndexBuilderMain.cpp +++ b/src/index/IndexBuilderMain.cpp @@ -139,7 +139,8 @@ int main(int argc, char** argv) { try { po::store(po::parse_command_line(argc, argv, boostOptions), optionsMap); if (optionsMap.count("help")) { - std::cout << boostOptions << '\n'; + std::cout << boostOptions << '\n' + << index.getConfigurationDocForIndexBuilder() << '\n'; return EXIT_SUCCESS; } po::notify(optionsMap); diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 1e52cd376e..02075e5f3c 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -4,6 +4,8 @@ // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) // 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) +#include "./IndexImpl.h" + #include #include #include @@ -12,7 +14,6 @@ #include #include -#include "./IndexImpl.h" #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "index/ConstantsIndexBuilding.h" @@ -1059,6 +1060,12 @@ IndexImpl::generateConfigManagerForIndexBuilderSettings() { return std::make_pair(std::move(config), std::move(variables)); } +// ___________________________________________________________________________ +std::string IndexImpl::getConfigurationDocForIndexBuilder() { + return generateConfigManagerForIndexBuilderSettings() + .first.printConfigurationDoc(true); +} + // ___________________________________________________________________________ void IndexImpl::readIndexBuilderSettingsFromFile() { auto [config, diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 6b93c3f466..4db372080e 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -678,9 +678,16 @@ class IndexImpl { void deleteTemporaryFile(const string& path); public: - // Count the number of "QLever-internal" triples (predicate ql:langtag or - // predicate starts with @) and all other triples (that were actually part of - // the input). + /* + @brief Print the detailed documentation of the options for the index builder. + */ + std::string getConfigurationDocForIndexBuilder(); + + /* + Count the number of "QLever-internal" triples (predicate ql:langtag or + predicate starts with @) and all other triples (that were actually part of the + input). + */ NumNormalAndInternal numTriples() const; // The index contains several triples that are not part of the "actual" From bdefed31ce03c5e53bde8c201761f4c396807432 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Tue, 12 Dec 2023 11:45:49 +0100 Subject: [PATCH 51/63] Refactor workflows + revert to standard `apt` (#1185) 1. For a while now, we used `apt-spy` because `apt` was not working reliably within GitHub's workflows. Recently, it was `apt-spy` that was unreliable and so we are reverting back to `apt` 2. Use the occasion to factor out separate `action.yml` "sub"-workflow for installing a compiler on Ubuntu and for installing dependencies on Ubuntu, which are used at various places in our workflows --- .github/workflows/check_index_version.yml | 18 +++------ .github/workflows/code-coverage.yml | 29 ++++---------- .../install-compiler-ubuntu/action.yml | 37 +++++++++++++++++ .../install-dependencies-ubuntu/action.yml | 32 +++++++++++++++ .github/workflows/native-build-conan.yml | 9 ++--- .github/workflows/native-build.yml | 40 ++++--------------- .github/workflows/upload-sonarcloud.yml | 22 +++------- 7 files changed, 99 insertions(+), 88 deletions(-) create mode 100644 .github/workflows/install-compiler-ubuntu/action.yml create mode 100644 .github/workflows/install-dependencies-ubuntu/action.yml diff --git a/.github/workflows/check_index_version.yml b/.github/workflows/check_index_version.yml index 896ab0ec7b..a58f4c11d2 100644 --- a/.github/workflows/check_index_version.yml +++ b/.github/workflows/check_index_version.yml @@ -36,18 +36,12 @@ jobs: ref: 'master' - name: Install dependencies - run: | - sudo gem install apt-spy2 && sudo apt-spy2 fix --commit --launchpad --country=US - sudo apt-get update - sudo apt-get install -y libicu-dev tzdata gcc-10 libzstd-dev libjemalloc-dev - - name: Install boost Ubuntu 22.04 - run: sudo add-apt-repository -y ppa:mhier/libboost-latest && sudo apt update && sudo apt install -y libboost1.81-all-dev libboost-url1.81-dev - - name: Install gcc 11 - run : sudo add-apt-repository ppa:ubuntu-toolchain-r/test && sudo apt update && sudo apt install -y gcc-11 g++-11 - - - name: Python dependencies - run: sudo apt-get install python3-yaml unzip pkg-config python3-icu - + uses: ./.github/workflows/install-dependencies-ubuntu + - name: Install compiler + uses: ./.github/workflows/install-compiler-ubuntu + with: + compiler: {{matrix.compiler}} + compiler-version: ${{matrix.compiler-version}} - name: Configure CMake Master working-directory: ${{github.workspace}}/master run: cmake -B build -DCMAKE_BUILD_TYPE=${{matrix.build-type}} -DCMAKE_TOOLCHAIN_FILE="$(pwd)/toolchains/${{matrix.compiler}}${{matrix.compiler-version}}.cmake" -DADDITIONAL_COMPILER_FLAGS="${{matrix.warnings}} ${{matrix.asan-flags}} ${{matrix.ubsan-flags}}" -DUSE_PARALLEL=true -DRUN_EXPENSIVE_TESTS=true -DENABLE_EXPENSIVE_CHECKS=true diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml index c1bce45aea..e1df7528d6 100644 --- a/.github/workflows/code-coverage.yml +++ b/.github/workflows/code-coverage.yml @@ -42,32 +42,19 @@ jobs: submodules: "recursive" - name: Install dependencies - run: | - sudo gem install apt-spy2 && sudo apt-spy2 fix --commit --launchpad --country=US - sudo apt-get update - - name: Install clang 16 - # The sed command fixes a bug in `llvm.sh` in combination with the latest version of - # `apt-key`. Without it the GPG key for the llvm repository is downloaded but deleted - # immediately after. + uses: ./.github/workflows/install-dependencies-ubuntu + - name: Install compiler + uses: ./.github/workflows/install-compiler-ubuntu + with: + compiler: "clang" + compiler-version: "16" + - name: Install coverage tools run: | - wget https://apt.llvm.org/llvm.sh - sudo chmod +x llvm.sh - sed 's/apt-key del/echo/' llvm.sh -iy - sudo ./llvm.sh 16 - sudo apt install -y clang-16 llvm-16 + sudo apt install -y llvm-16 - name: Show path run: | which llvm-profdata-16 which llvm-cov-16 - - name: Install dependencies - run: | - sudo gem install apt-spy2 - sudo apt-spy2 fix --commit --launchpad --country=US - sudo add-apt-repository -y ppa:mhier/libboost-latest - sudo apt-get update - sudo apt-get install -y libicu-dev tzdata libzstd-dev libjemalloc-dev libboost1.81-all-dev libboost-url1.81-dev - - name: Python dependencies - run: sudo apt-get install python3-yaml unzip pkg-config python3-icu python3-pip - name: Create build directory run: mkdir ${{github.workspace}}/build - name: Configure CMake diff --git a/.github/workflows/install-compiler-ubuntu/action.yml b/.github/workflows/install-compiler-ubuntu/action.yml new file mode 100644 index 0000000000..6a13cbe852 --- /dev/null +++ b/.github/workflows/install-compiler-ubuntu/action.yml @@ -0,0 +1,37 @@ +name: "Install various versions of gcc and clang" +description: "dummy description" +inputs: + compiler: + description: "Must be `gcc` or `clang`" + required: true + compiler-version: + description: "the version of the compiler (must be major version)" + required: true + +runs: + using: "composite" + steps: + - name: Add PPA for GCC + if : inputs.compiler == 'gcc' + run : sudo add-apt-repository ppa:ubuntu-toolchain-r/test && sudo apt update + shell: bash + + - name: Install gcc + if : inputs.compiler == 'gcc' + run : sudo apt install -y gcc-${{inputs.compiler-version}} g++-${{inputs.compiler-version}} + shell: bash + + - name: Install clang + if : inputs.compiler == 'clang' + # The sed command fixes a bug in `llvm.sh` in combination with the latest version of + # `apt-key`. Without it the GPG key for the llvm repository is downloaded but deleted + # immediately after. + run: | + wget https://apt.llvm.org/llvm.sh + sudo chmod +x llvm.sh + sed 's/apt-key del/echo/' llvm.sh -iy + sudo ./llvm.sh ${{inputs.compiler-version}} + sudo apt install -y clang-${{inputs.compiler-version}} + shell: bash + +# TODO Add assertion for unsupported compilers and versions. diff --git a/.github/workflows/install-dependencies-ubuntu/action.yml b/.github/workflows/install-dependencies-ubuntu/action.yml new file mode 100644 index 0000000000..b5c248dfed --- /dev/null +++ b/.github/workflows/install-dependencies-ubuntu/action.yml @@ -0,0 +1,32 @@ +name: "Install dependencies via apt" +description: "dummy description" + +inputs: + install-third-party-libraries: + description: "Should be set to false for conan builds" + required: true + default: "true" + +runs: + using: "composite" + steps: + - name: Install basic compiler + run: | + sudo apt-get update + sudo apt-get install -y build-essential + shell: bash + + - name: Install third-party libraries + if: inputs.install-third-party-libraries == 'true' + run: | + sudo apt-get install -y libicu-dev tzdata libzstd-dev libjemalloc-dev + shell: bash + + - name: Install boost from PPA + if: inputs.install-third-party-libraries == 'true' + run: sudo add-apt-repository -y ppa:mhier/libboost-latest && sudo apt update && sudo apt install -y libboost1.81-all-dev libboost-url1.81-dev + shell: bash + + - name: Install Python packages for E2E tests + run: sudo apt-get install python3-yaml unzip pkg-config python3-icu + shell: bash diff --git a/.github/workflows/native-build-conan.yml b/.github/workflows/native-build-conan.yml index 7fd3cb873a..695c364e00 100644 --- a/.github/workflows/native-build-conan.yml +++ b/.github/workflows/native-build-conan.yml @@ -31,13 +31,10 @@ jobs: submodules: 'recursive' - name: Install dependencies - run: | - sudo gem install apt-spy2 && sudo apt-spy2 fix --commit --launchpad --country=US - sudo apt-get update - sudo apt-get install build-essential + uses: ./.github/workflows/install-dependencies-ubuntu + with: + install-third-party-libraries: "false" - - name: Python dependencies - run: sudo apt-get install python3-yaml unzip pkg-config python3-icu - name: Create build directory run: mkdir ${{github.workspace}}/build - name: Install and run conan diff --git a/.github/workflows/native-build.yml b/.github/workflows/native-build.yml index 316f92793f..ca86e221be 100644 --- a/.github/workflows/native-build.yml +++ b/.github/workflows/native-build.yml @@ -59,40 +59,14 @@ jobs: steps: - uses: actions/checkout@v3 with: - submodules: 'recursive' - + submodules: 'recursive' - name: Install dependencies - run: | - sudo gem install apt-spy2 && sudo apt-spy2 fix --commit --launchpad --country=US - sudo apt-get update - sudo apt-get install -y libicu-dev tzdata gcc-10 libzstd-dev libjemalloc-dev - - name: Install boost Ubuntu 22.04 - run: sudo add-apt-repository -y ppa:mhier/libboost-latest && sudo apt update && sudo apt install -y libboost1.81-all-dev libboost-url1.81-dev - - name: Install gcc 11 - run : sudo add-apt-repository ppa:ubuntu-toolchain-r/test && sudo apt update && sudo apt install -y gcc-11 g++-11 - if : matrix.compiler == 'gcc' && matrix.compiler-version == 11 - - name: Install gcc 12 - run : sudo add-apt-repository ppa:ubuntu-toolchain-r/test && sudo apt update && sudo apt install -y gcc-12 g++-12 - if : matrix.compiler == 'gcc' && matrix.compiler-version == 12 - - - name: Install gcc 13 - run : sudo add-apt-repository ppa:ubuntu-toolchain-r/test && sudo apt update && sudo apt install -y gcc-13 g++-13 - if : matrix.compiler == 'gcc' && matrix.compiler-version == 13 - - - name: Install clang - # The sed command fixes a bug in `llvm.sh` in combination with the latest version of - # `apt-key`. Without it the GPG key for the llvm repository is downloaded but deleted - # immediately after. - run: | - wget https://apt.llvm.org/llvm.sh - sudo chmod +x llvm.sh - sed 's/apt-key del/echo/' llvm.sh -iy - sudo ./llvm.sh ${{matrix.compiler-version}} - sudo apt install -y clang-${{matrix.compiler-version}} - if : matrix.compiler == 'clang' - - - name: Python dependencies - run: sudo apt-get install python3-yaml unzip pkg-config python3-icu + uses: ./.github/workflows/install-dependencies-ubuntu + - name: Install compiler + uses: ./.github/workflows/install-compiler-ubuntu + with: + compiler: ${{matrix.compiler}} + compiler-version: ${{matrix.compiler-version}} - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. diff --git a/.github/workflows/upload-sonarcloud.yml b/.github/workflows/upload-sonarcloud.yml index cb48a6cdc3..71d63ef93e 100644 --- a/.github/workflows/upload-sonarcloud.yml +++ b/.github/workflows/upload-sonarcloud.yml @@ -112,22 +112,12 @@ jobs: - name: Install sonar-scanner and build-wrapper uses: SonarSource/sonarcloud-github-c-cpp@v2 - name: Install dependencies - run: | - sudo gem install apt-spy2 - sudo apt-spy2 fix --commit --launchpad --country=US - sudo apt-get update - sudo apt-get install -y libicu-dev tzdata gcc-10 libzstd-dev libjemalloc-dev - - name: Install boost Ubuntu 22.04 - run: sudo add-apt-repository -y ppa:mhier/libboost-latest && sudo apt update && sudo apt install -y libboost1.81-all-dev libboost-url1.81-dev - - name: Install clang 16 - # The sed command fixes a bug in `llvm.sh` in combination with the latest version of - # `apt-key`. Without it the GPG key for the llvm repository is downloaded but deleted - # immediately after. - run: | - wget https://apt.llvm.org/llvm.sh - sudo chmod +x llvm.sh - sed 's/apt-key del/echo/' llvm.sh -iy - sudo ./llvm.sh 16 all + uses: ./.github/workflows/install-dependencies-ubuntu + - name: Install compiler + uses: ./.github/workflows/install-compiler-ubuntu + with: + compiler: ${{env.compiler}} + compiler-version: ${{env.compiler-version}} - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. From 5bfa1b4e6e26ba68f3703447cd907eaf345c68cf Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Tue, 12 Dec 2023 11:47:15 +0100 Subject: [PATCH 52/63] Make sonarcloud happy by making an `std::move` explicit (#1179) --- src/engine/GroupBy.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index 33151ca3a6..811ac6dd22 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -848,8 +848,7 @@ void GroupBy::extractValues( } }; - std::visit(visitor, std::forward( - expressionResult)); + std::visit(visitor, std::move(expressionResult)); } // _____________________________________________________________________________ From f45fece4a4774c62a9cbb87e4c57eb1fd50a35a2 Mon Sep 17 00:00:00 2001 From: schlegan <43997908+schlegan@users.noreply.github.com> Date: Tue, 12 Dec 2023 13:48:36 +0100 Subject: [PATCH 53/63] Benchmark::ResultTable : Add helper functions for column-based operations (#1174) Add some helper functions for adding statistics columns (that is columns with results that were not measured but computed from other columns) to the result table of a benchmark run. --- benchmark/util/ResultTableColumnOperations.h | 100 ++++ test/BenchmarkMeasurementContainerTest.cpp | 42 +- test/CMakeLists.txt | 4 +- test/ResultTableColumnOperationsTest.cpp | 431 ++++++++++++++++++ .../BenchmarkMeasurementContainerHelpers.cpp | 38 ++ .../BenchmarkMeasurementContainerHelpers.h | 43 ++ test/util/CMakeLists.txt | 2 +- 7 files changed, 617 insertions(+), 43 deletions(-) create mode 100644 benchmark/util/ResultTableColumnOperations.h create mode 100644 test/ResultTableColumnOperationsTest.cpp create mode 100644 test/util/BenchmarkMeasurementContainerHelpers.cpp create mode 100644 test/util/BenchmarkMeasurementContainerHelpers.h diff --git a/benchmark/util/ResultTableColumnOperations.h b/benchmark/util/ResultTableColumnOperations.h new file mode 100644 index 0000000000..d961867812 --- /dev/null +++ b/benchmark/util/ResultTableColumnOperations.h @@ -0,0 +1,100 @@ +// Copyright 2022, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Andre Schlegel (November of 2023, +// schlegea@informatik.uni-freiburg.de) + +#pragma once + +#include "../benchmark/infrastructure/BenchmarkMeasurementContainer.h" +#include "util/Exception.h" +#include "util/TypeTraits.h" + +/* +For doing a column based operations, that is, on all the entries. +For example: Adding two columns together, calculating speed up between the +entries of two columns, etc. +*/ +namespace ad_benchmark { + +/* +Column number together with the type of value, that can be found inside the +column. Note, that **all** entries in the column must have the same type, +because of `ResultTable::getEntry`. +*/ +template +requires ad_utility::isTypeContainedIn +struct ColumnNumWithType { + using ColumnType = Type; + const size_t columnNum_; +}; + +template +requires(sizeof...(ColumnInputTypes) > 0) void generateColumnWithColumnInput( + ResultTable* const table, + ad_utility::InvocableWithSimilarReturnType< + ColumnReturnType, const ColumnInputTypes&...> auto&& generator, + const ColumnNumWithType& columnToPutResultIn, + const ColumnNumWithType&... inputColumns) { + // Using a column more than once is the sign of an error. + std::array allColumnNums{ + {inputColumns.columnNum_...}}; + std::ranges::sort(allColumnNums); + AD_CONTRACT_CHECK(std::ranges::adjacent_find(allColumnNums) == + allColumnNums.end()); + + // Fill the result column. + for (size_t row = 0; row < table->numRows(); row++) { + table->setEntry( + row, columnToPutResultIn.columnNum_, + std::invoke(generator, table->getEntry( + row, inputColumns.columnNum_)...)); + } +} + +/* +@brief Vector addition with `ResultTable` columns. +*/ +template >... ColumnInputTypes> +requires(sizeof...(ColumnInputTypes) > 1) void sumUpColumns( + ResultTable* const table, + const ColumnNumWithType& columnToPutResultIn, + const ColumnInputTypes&... columnsToSumUp) { + // We can simply pass this to `generateColumnWithColumnInput`. + generateColumnWithColumnInput( + table, + [](const ColumnInputTypes::ColumnType&... values) -> ColumnReturnType { + return (values + ...); + }, + columnToPutResultIn, columnsToSumUp...); +} + +/* +@brief Reads two floating point columns, calculates the relativ speedup between +their entries and writes it in a third column. + +@param columnToCalculateFor, columnToCompareAgainst The columns, with which +the question "How much faster than the entries of `columnToCompareAgainst` +are the entires of `columnToCalculateFor`?". +@param columnToPlaceResultIn This is where the speedup calculation results +will be placed in. +*/ +inline void calculateSpeedupOfColumn( + ResultTable* const table, + const ColumnNumWithType& columnToPlaceResultIn, + const ColumnNumWithType& columnToCalculateFor, + const ColumnNumWithType& columnToCompareAgainst) { + // We can simply pass this to `generateColumnWithColumnInput`. + generateColumnWithColumnInput( + table, + [](const float compareAgainst, const float calculateFor) -> float { + /* + Speedup calculations only makes sense, if ALL values are bigger than + 0. + */ + AD_CONTRACT_CHECK(compareAgainst > 0.f && calculateFor > 0.f); + return compareAgainst / calculateFor; + }, + columnToPlaceResultIn, columnToCompareAgainst, columnToCalculateFor); +} +} // namespace ad_benchmark diff --git a/test/BenchmarkMeasurementContainerTest.cpp b/test/BenchmarkMeasurementContainerTest.cpp index b05d375e73..b8ddf41901 100644 --- a/test/BenchmarkMeasurementContainerTest.cpp +++ b/test/BenchmarkMeasurementContainerTest.cpp @@ -10,6 +10,7 @@ #include #include "../benchmark/infrastructure/BenchmarkMeasurementContainer.h" +#include "../test/util/BenchmarkMeasurementContainerHelpers.h" #include "util/Exception.h" using namespace std::chrono_literals; @@ -83,47 +84,6 @@ TEST(BenchmarkMeasurementContainerTest, ResultGroup) { ASSERT_EQ(table.getEntry(1, 0), rowNames.at(1)); } -/* -@brief Call the function with each of the alternatives in -`ad_benchmark::ResultTable::EntryType`, except `std::monostate`, as template -parameter. - -@tparam Function The loop body should be a templated function, with one -`typename` template argument and no more. It also shouldn't take any function -arguments. Should be passed per deduction. -*/ -template -static void doForTypeInResultTableEntryType(Function function) { - ad_utility::forEachTypeInTemplateType( - [&function]() { - // `std::monostate` is not important for these kinds of tests. - if constexpr (!ad_utility::isSimilar) { - function.template operator()(); - } - }); -} - -// Helper function for creating `ad_benchmark::ResultTable::EntryType` dummy -// values. -template -requires ad_utility::isTypeContainedIn -static Type createDummyValueEntryType() { - if constexpr (ad_utility::isSimilar) { - return 4.2f; - } else if constexpr (ad_utility::isSimilar) { - return "test"s; - } else if constexpr (ad_utility::isSimilar) { - return true; - } else if constexpr (ad_utility::isSimilar) { - return 17361644613946UL; - } else if constexpr (ad_utility::isSimilar) { - return -42; - } else { - // Not a supported type. - AD_FAIL(); - } -} - TEST(BenchmarkMeasurementContainerTest, ResultTable) { // Looks, if the general form is correct. auto checkForm = [](const ResultTable& table, const std::string& name, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a5798e7155..41ff9577e6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -335,7 +335,9 @@ addLinkAndDiscoverTest(ConfigUtilTest configManager) addLinkAndDiscoverTest(RandomTest) -addLinkAndDiscoverTest(BenchmarkMeasurementContainerTest benchmark) +addLinkAndDiscoverTest(BenchmarkMeasurementContainerTest benchmark testUtil) + +addLinkAndDiscoverTest(ResultTableColumnOperationsTest benchmark testUtil) addLinkAndDiscoverTest(FindUndefRangesTest engine) diff --git a/test/ResultTableColumnOperationsTest.cpp b/test/ResultTableColumnOperationsTest.cpp new file mode 100644 index 0000000000..c260439811 --- /dev/null +++ b/test/ResultTableColumnOperationsTest.cpp @@ -0,0 +1,431 @@ +// Copyright 2023, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Andre Schlegel (November of 2023, +// schlegea@informatik.uni-freiburg.de) + +#include + +#include +#include +#include +#include + +#include "../benchmark/infrastructure/BenchmarkMeasurementContainer.h" +#include "../benchmark/util/ResultTableColumnOperations.h" +#include "../test/util/BenchmarkMeasurementContainerHelpers.h" +#include "../test/util/GTestHelpers.h" +#include "gmock/gmock.h" +#include "util/Exception.h" +#include "util/Random.h" +#include "util/TypeTraits.h" + +namespace ad_benchmark { +// How many rows should the test tables have? +constexpr size_t NUM_ROWS = 10; + +// Does `T` support addition? +template +concept SupportsAddition = requires(T a, T b) { + { a + b } -> std::same_as; +}; + +/* +@brief Create a table for testing purpose. + +@param numRows, numColumns How many rows and columns the created table should +have. +@param columnsWithDummyValues The designated columns are filled with dummy +values via `createDummyValueEntryType`. +*/ +template +static ResultTable createTestTable( + const size_t numRows, const size_t numColumns, + const ColumnNumWithType&... columnsWithDummyValues) { + ResultTable table("", std::vector(numRows, ""), + std::vector(numColumns, "")); + for (size_t i = 0; i < table.numRows(); i++) { + (table.setEntry(i, columnsWithDummyValues.columnNum_, + createDummyValueEntryType()), + ...); + } + return table; +} + +// Compare the column of a `ResultTable` with a `std::vector`. +template +static void compareToColumn( + const std::vector& expectedContent, + const ResultTable& tableToCompareAgainst, + const ColumnNumWithType& columnsToCompareAgainst, + ad_utility::source_location l = ad_utility::source_location::current()) { + // For generating better messages, when failing a test. + auto trace{generateLocationTrace(l, "compareToColumn")}; + + // Compare every entry with the fitting comparsion function. + AD_CONTRACT_CHECK(expectedContent.size() == tableToCompareAgainst.numRows()); + for (size_t i = 0; i < expectedContent.size(); i++) { + if constexpr (std::floating_point) { + ASSERT_FLOAT_EQ(expectedContent.at(i), + tableToCompareAgainst.getEntry( + i, columnsToCompareAgainst.columnNum_)); + } else if constexpr (ad_utility::isSimilar) { + ASSERT_STREQ(expectedContent.at(i).c_str(), + tableToCompareAgainst + .getEntry(i, columnsToCompareAgainst.columnNum_) + .c_str()); + } else { + ASSERT_EQ(expectedContent.at(i), + tableToCompareAgainst.getEntry( + i, columnsToCompareAgainst.columnNum_)); + } + } +} + +/* +@brief Test the general exception cases for a function of +`ResultTableColumnOperations`, that takes two input columns. + +@param callTransform Lambda, that transforms the call arguments into arguments +for the function, that you want to test. Must have the signature `ResultTable* +,const ColumnNumWithType& columnToPutResultIn, const +ColumnNumWithType inputColumnOne, const +ColumnNumWithType inputColumnTwo`. +*/ +static void generalExceptionTestTwoInputColumns( + const auto& callTransform, + ad_utility::source_location l = ad_utility::source_location::current()) { + // For generating better messages, when failing a test. + auto trace{generateLocationTrace(l, "generalExceptionTestTwoInputColumns")}; + + doForTypeInResultTableEntryType([&callTransform]() { + // A call with a `ResultTable`, who has no rows, is valid. + auto table{createTestTable(0, 3, ColumnNumWithType{0}, + ColumnNumWithType{1})}; + ASSERT_NO_THROW(callTransform(&table, ColumnNumWithType{2}, + ColumnNumWithType{1}, + ColumnNumWithType{0})); + + // A call, in which the result column is also an input column, is valid. + table = createTestTable(NUM_ROWS, 3, ColumnNumWithType{0}, + ColumnNumWithType{1}); + ASSERT_NO_THROW(callTransform(&table, ColumnNumWithType{1}, + ColumnNumWithType{1}, + ColumnNumWithType{0})); + + // Exception tests. + // A column contains more than 1 type. + table = createTestTable(std::variant_size_v - 1, 3); + doForTypeInResultTableEntryType([row = 0, &table]() mutable { + table.setEntry(row++, 0, createDummyValueEntryType()); + }); + ASSERT_ANY_THROW(callTransform(&table, ColumnNumWithType{1}, + ColumnNumWithType{0}, + ColumnNumWithType{2})); + + // Wrong input column type. + table = createTestTable(NUM_ROWS, 3, ColumnNumWithType{0}, + ColumnNumWithType{1}); + doForTypeInResultTableEntryType([&table, + &callTransform]() { + if constexpr (!ad_utility::isSimilar) { + ASSERT_ANY_THROW(callTransform(&table, ColumnNumWithType{2}, + ColumnNumWithType{1}, + ColumnNumWithType{0})); + } + }); + + // Column is outside boundaries. + table = createTestTable(NUM_ROWS, 3, ColumnNumWithType{0}, + ColumnNumWithType{1}, ColumnNumWithType{2}); + ASSERT_ANY_THROW(callTransform(&table, ColumnNumWithType{10}, + ColumnNumWithType{1}, + ColumnNumWithType{2})); + ASSERT_ANY_THROW(callTransform(&table, ColumnNumWithType{0}, + ColumnNumWithType{10}, + ColumnNumWithType{2})); + ASSERT_ANY_THROW(callTransform(&table, ColumnNumWithType{0}, + ColumnNumWithType{1}, + ColumnNumWithType{20})); + + // One column is used more than once as an input. + table = createTestTable(NUM_ROWS, 2, ColumnNumWithType{0}, + ColumnNumWithType{1}); + ASSERT_ANY_THROW(callTransform(&table, ColumnNumWithType{1}, + ColumnNumWithType{0}, + ColumnNumWithType{0})); + }); +} + +/* +@brief Test the general exception cases for a function of +`ResultTableColumnOperations`, that takes an unlimited number of input columns +and has a minimum of two input columns. + +@param callTransform Lambda, that transforms the call arguments into arguments +for the function, that you want to test. Must have the signature `ResultTable* +,const ColumnNumWithType& columnToPutResultIn, const +ColumnNumWithType&... inputColumns`. +*/ +static void generalExceptionTestUnlimitedInputColumns( + const auto& callTransform, + ad_utility::source_location l = ad_utility::source_location::current()) { + // For generating better messages, when failing a test. + auto trace{ + generateLocationTrace(l, "generalExceptionTestUnlimitedInputColumns")}; + + // We can pass a lot to `generalExceptionTestTwoInputColumns`. + generalExceptionTestTwoInputColumns(callTransform); + + doForTypeInResultTableEntryType([&callTransform]() { + // Column is outside boundaries. + ResultTable table{createTestTable( + NUM_ROWS, 4, ColumnNumWithType{0}, ColumnNumWithType{1}, + ColumnNumWithType{2}, ColumnNumWithType{3})}; + ASSERT_ANY_THROW( + callTransform(&table, ColumnNumWithType{10}, ColumnNumWithType{1}, + ColumnNumWithType{2}, ColumnNumWithType{3})); + ASSERT_ANY_THROW( + callTransform(&table, ColumnNumWithType{0}, ColumnNumWithType{10}, + ColumnNumWithType{2}, ColumnNumWithType{3})); + ASSERT_ANY_THROW( + callTransform(&table, ColumnNumWithType{0}, ColumnNumWithType{1}, + ColumnNumWithType{20}, ColumnNumWithType{3})); + ASSERT_ANY_THROW( + callTransform(&table, ColumnNumWithType{0}, ColumnNumWithType{1}, + ColumnNumWithType{2}, ColumnNumWithType{30})); + }); +} + +TEST(ResultTableColumnOperations, generateColumnWithColumnInput) { + // How many rows should the test table have? + constexpr size_t NUM_ROWS = 10; + + // A lambda, that copies on column into another. + auto columnCopyLambda = [](const auto& d) { return d; }; + + doForTypeInResultTableEntryType([&NUM_ROWS, &columnCopyLambda]() { + // Single parameter operators. + // Two columns. Transcribe column 0 into column 1. + ResultTable table{createTestTable(NUM_ROWS, 2, ColumnNumWithType{0})}; + generateColumnWithColumnInput(&table, columnCopyLambda, + ColumnNumWithType{1}, + ColumnNumWithType{0}); + compareToColumn(std::vector(NUM_ROWS, createDummyValueEntryType()), + table, ColumnNumWithType{1}); + + // Double parameter operators. + // Different cases for different `T`. + table = createTestTable(NUM_ROWS, 3, ColumnNumWithType{0}, + ColumnNumWithType{1}); + if constexpr (ad_utility::isSimilar) { + // Do XOR on the column content. + generateColumnWithColumnInput( + &table, [](const T& a, const T& b) { return a != b; }, + ColumnNumWithType{2}, ColumnNumWithType{0}, + ColumnNumWithType{1}); + compareToColumn(std::vector(NUM_ROWS, false), table, + ColumnNumWithType{2}); + } else if constexpr (SupportsAddition) { + // Just do addition. + generateColumnWithColumnInput( + &table, [](const T& a, const T& b) { return a + b; }, + ColumnNumWithType{2}, ColumnNumWithType{0}, + ColumnNumWithType{1}); + compareToColumn( + std::vector(NUM_ROWS, createDummyValueEntryType() + + createDummyValueEntryType()), + table, ColumnNumWithType{2}); + } else { + // We need more cases! + AD_FAIL(); + }; + }); + + // Exception tests. + generalExceptionTestUnlimitedInputColumns([](ResultTable* table, + const auto& columnToPutResultIn, + const auto&... inputColumns) { + generateColumnWithColumnInput( + table, + [](const auto& firstInput, const auto&...) { return firstInput; }, + columnToPutResultIn, inputColumns...); + }); +} + +TEST(ResultTableColumnOperations, SumUpColumns) { + // Normal tests. + doForTypeInResultTableEntryType([]() { + // We only do tests on types, that support addition. + if constexpr (SupportsAddition) { + // Minimal amount of columns. + ResultTable table{createTestTable(NUM_ROWS, 3, ColumnNumWithType{0}, + ColumnNumWithType{1})}; + sumUpColumns(&table, ColumnNumWithType{2}, ColumnNumWithType{1}, + ColumnNumWithType{0}); + compareToColumn( + std::vector(NUM_ROWS, createDummyValueEntryType() + + createDummyValueEntryType()), + table, ColumnNumWithType{2}); + + // Test with more columns. + table = createTestTable(NUM_ROWS, 10, ColumnNumWithType{0}, + ColumnNumWithType{1}, ColumnNumWithType{2}, + ColumnNumWithType{3}, ColumnNumWithType{4}, + ColumnNumWithType{5}, ColumnNumWithType{6}, + ColumnNumWithType{7}, ColumnNumWithType{8}); + sumUpColumns(&table, ColumnNumWithType{9}, ColumnNumWithType{0}, + ColumnNumWithType{1}, ColumnNumWithType{2}, + ColumnNumWithType{3}, ColumnNumWithType{4}, + ColumnNumWithType{5}, ColumnNumWithType{6}, + ColumnNumWithType{7}, ColumnNumWithType{8}); + compareToColumn( + std::vector(NUM_ROWS, createDummyValueEntryType() + + createDummyValueEntryType() + + createDummyValueEntryType() + + createDummyValueEntryType() + + createDummyValueEntryType() + + createDummyValueEntryType() + + createDummyValueEntryType() + + createDummyValueEntryType() + + createDummyValueEntryType()), + table, ColumnNumWithType{9}); + } + }); + + // Exception tests. + generalExceptionTestUnlimitedInputColumns([](ResultTable* table, + const auto& columnToPutResultIn, + const auto&... inputColumns) { + sumUpColumns(table, columnToPutResultIn, inputColumns...); + }); +} + +TEST(ResultTableColumnOperations, calculateSpeedupOfColumn) { + // Fill two columns so, that they have the desired speedup. + auto fillColumnsForSpeedup = [](ResultTable* table, const float wantedSpeedup, + const ColumnNumWithType& columnOne, + const ColumnNumWithType& columnTwo) { + /* + For the math: + wantedSpeedup = columnTwo / columnOne + <=> wantedSpeeup * columnOne = columnTwo + */ + for (size_t row = 0; row < table->numRows(); row++) { + table->setEntry(row, columnOne.columnNum_, static_cast(row + 1)); + table->setEntry(row, columnTwo.columnNum_, (row + 1) * wantedSpeedup); + } + }; + + // Test things for a range of speedups. + std::ranges::for_each( + std::array{2.f, 16.f, 73.696f, 4.2f}, + [&fillColumnsForSpeedup](const float wantedSpeedup, + ad_utility::source_location l = + ad_utility::source_location::current()) { + // For generating better messages, when failing a test. + auto trace{generateLocationTrace(l, "testRangeOfSpeedups")}; + ResultTable table{createTestTable(NUM_ROWS, 10)}; + + /* + Needed for exception test, to choose a random entry inside a column. + Note: The described range is inclusive. + */ + ad_utility::SlowRandomIntGenerator rowGenerator( + 0, table.numRows() - 1); + + // Test things trough for all possible input and output columns. + for (size_t outputColumn = 0; outputColumn < table.numColumns(); + outputColumn++) { + for (size_t firstInputColumn = 0; + firstInputColumn < table.numColumns(); firstInputColumn++) { + for (size_t secondInputColumn = 0; + secondInputColumn < table.numColumns(); secondInputColumn++) { + // The same column repeated as input is not allowed. + if (firstInputColumn == secondInputColumn) { + continue; + } + + // Test, if things are calculated as wanted. + fillColumnsForSpeedup(&table, wantedSpeedup, {firstInputColumn}, + {secondInputColumn}); + calculateSpeedupOfColumn(&table, {outputColumn}, + {firstInputColumn}, {secondInputColumn}); + compareToColumn(std::vector(NUM_ROWS, wantedSpeedup), table, + ColumnNumWithType{outputColumn}); + + // Exception test, if input values, that are smaller than 0, are + // not allowed. + const size_t firstInputColumnRow{rowGenerator()}; + const size_t secondInputColumnRow{rowGenerator()}; + const float firstInputColumnOldEntry{ + table.getEntry(firstInputColumnRow, firstInputColumn)}; + const float secondInputColumnOldEntry{table.getEntry( + secondInputColumnRow, secondInputColumn)}; + auto doExceptionTest = [&table, &outputColumn, &firstInputColumn, + &secondInputColumn]() { + ASSERT_ANY_THROW(calculateSpeedupOfColumn( + &table, {outputColumn}, {firstInputColumn}, + {secondInputColumn})); + }; + + // The actual test. + // Only an error in the first column. + table.setEntry(firstInputColumnRow, firstInputColumn, + -firstInputColumnOldEntry); + doExceptionTest(); + table.setEntry(firstInputColumnRow, firstInputColumn, 0); + doExceptionTest(); + + // Only an error in the second column. + table.setEntry(firstInputColumnRow, firstInputColumn, + firstInputColumnOldEntry); + table.setEntry(secondInputColumnRow, secondInputColumn, + -secondInputColumnOldEntry); + doExceptionTest(); + table.setEntry(secondInputColumnRow, secondInputColumn, 0); + doExceptionTest(); + + // Error in both the input columns. + table.setEntry(firstInputColumnRow, firstInputColumn, 0); + table.setEntry(secondInputColumnRow, secondInputColumn, 0); + doExceptionTest(); + table.setEntry(firstInputColumnRow, firstInputColumn, + -firstInputColumnOldEntry); + table.setEntry(secondInputColumnRow, secondInputColumn, 0); + doExceptionTest(); + table.setEntry(firstInputColumnRow, firstInputColumn, 0); + table.setEntry(secondInputColumnRow, secondInputColumn, + -secondInputColumnOldEntry); + doExceptionTest(); + table.setEntry(firstInputColumnRow, firstInputColumn, + -firstInputColumnOldEntry); + table.setEntry(secondInputColumnRow, secondInputColumn, + -secondInputColumnOldEntry); + doExceptionTest(); + } + } + } + }); + + // General exception tests. + generalExceptionTestTwoInputColumns( + []( + ResultTable* table, const auto& columnToPutResultIn, + const ColumnNumWithType& firstInputColumns, + const ColumnNumWithType& secondInputColumns) { + /* + Unlike the other functions, `` only works with measured execution times. + So, whenever the inputs are not for type `float`, we pass the + responsibility to a trivial function. + */ + if constexpr (std::same_as && + std::same_as) { + calculateSpeedupOfColumn(table, columnToPutResultIn, + firstInputColumns, secondInputColumns); + } else { + sumUpColumns(table, columnToPutResultIn, firstInputColumns, + secondInputColumns); + }; + }); +} +} // namespace ad_benchmark diff --git a/test/util/BenchmarkMeasurementContainerHelpers.cpp b/test/util/BenchmarkMeasurementContainerHelpers.cpp new file mode 100644 index 0000000000..b551b17d8f --- /dev/null +++ b/test/util/BenchmarkMeasurementContainerHelpers.cpp @@ -0,0 +1,38 @@ +// Copyright 2023, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Andre Schlegel (November of 2023, +// schlegea@informatik.uni-freiburg.de) + +#include "../test/util/BenchmarkMeasurementContainerHelpers.h" + +using namespace std::string_literals; + +// ____________________________________________________________________________ +template +requires ad_utility::isTypeContainedIn +Type createDummyValueEntryType() { + if constexpr (ad_utility::isSimilar) { + return 4.2f; + } else if constexpr (ad_utility::isSimilar) { + return "test"s; + } else if constexpr (ad_utility::isSimilar) { + return true; + } else if constexpr (ad_utility::isSimilar) { + return 17361644613946UL; + } else if constexpr (ad_utility::isSimilar) { + return -42; + } else { + // Not a supported type. + AD_FAIL(); + } +} + +/* +Explicit instantiation for all types in `ad_benchmark::ResultTable::EntryType`. +*/ +template bool createDummyValueEntryType(); +template std::string createDummyValueEntryType(); +template int createDummyValueEntryType(); +template size_t createDummyValueEntryType(); +template float createDummyValueEntryType(); diff --git a/test/util/BenchmarkMeasurementContainerHelpers.h b/test/util/BenchmarkMeasurementContainerHelpers.h new file mode 100644 index 0000000000..956cd19e48 --- /dev/null +++ b/test/util/BenchmarkMeasurementContainerHelpers.h @@ -0,0 +1,43 @@ +// Copyright 2023, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Andre Schlegel (November of 2023, +// schlegea@informatik.uni-freiburg.de) + +#pragma once + +#include +#include +#include +#include + +#include "../benchmark/infrastructure/BenchmarkMeasurementContainer.h" +#include "util/TypeTraits.h" + +/* +@brief Generate a dummy value of the given type. Used for generating test values +for `ad_benchmark::ResultTable`. +*/ +template +requires ad_utility::isTypeContainedIn +Type createDummyValueEntryType(); + +/* +@brief Call the lambda with each of the alternatives in +`ad_benchmark::ResultTable::EntryType`, except `std::monostate`, as template +parameter. + +@tparam Function The loop body should be a templated function, with one +`typename` template argument and no more. It also shouldn't take any function +arguments. Should be passed per deduction. +*/ +template +static void doForTypeInResultTableEntryType(Function function) { + ad_utility::forEachTypeInTemplateType( + [&function]() { + // `std::monostate` is not important for these kinds of tests. + if constexpr (!ad_utility::isSimilar) { + function.template operator()(); + } + }); +} diff --git a/test/util/CMakeLists.txt b/test/util/CMakeLists.txt index b2281b7b35..65352c0ba6 100644 --- a/test/util/CMakeLists.txt +++ b/test/util/CMakeLists.txt @@ -1,3 +1,3 @@ -add_library(testUtil IdTableHelpers.cpp ValidatorHelpers.cpp RandomTestHelpers.cpp IndexTestHelpers.cpp) +add_library(testUtil IdTableHelpers.cpp ValidatorHelpers.cpp RandomTestHelpers.cpp BenchmarkMeasurementContainerHelpers.cpp IndexTestHelpers.cpp) # TODO Once there is more support for it, we should be able to do this cheaper with modules. qlever_target_link_libraries(testUtil engine) From 0820c96fff6c5c6d14c8109465801ee4934f2187 Mon Sep 17 00:00:00 2001 From: Fabian Krause <29677855+kcaliban@users.noreply.github.com> Date: Tue, 12 Dec 2023 15:56:30 +0100 Subject: [PATCH 54/63] For GROUP BY optimization, use vector offsets to access aggregation data. (#1181) In the hash map-based GROUP BY optimization, the hash map now only stores a mapping from ID->size_t where the size_t is an index into a vector that stores the actual data. This optimization will make the support for multiple different aggregates much more efficient. --- src/engine/GroupBy.cpp | 196 ++++++++++++++++++++--------------------- src/engine/GroupBy.h | 92 +++++++++++++------ test/GroupByTest.cpp | 63 ++++++++++--- 3 files changed, 208 insertions(+), 143 deletions(-) diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index 811ac6dd22..281d7193b9 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -384,11 +384,9 @@ ResultTable GroupBy::computeResult() { size_t outWidth = idTable.numColumns(); if (hashMapOptimizationParams.has_value()) { - CALL_FIXED_SIZE( - (std::array{outWidth, aggregates.size(), - hashMapOptimizationParams->numAggregates_}), - &GroupBy::computeGroupByForHashMapOptimization, this, &idTable, - hashMapOptimizationParams->aggregateAliases_, subresult->idTable(), + computeGroupByForHashMapOptimization( + &idTable, hashMapOptimizationParams->aggregateAliases_, + hashMapOptimizationParams->numAggregates_, subresult->idTable(), hashMapOptimizationParams->subtreeColumnIndex_, &localVocab); return {std::move(idTable), resultSortedOn(), std::move(localVocab)}; @@ -725,13 +723,6 @@ GroupBy::checkIfHashMapOptimizationPossible(std::vector& aliases) { return std::nullopt; } - // TODO remove this as soon as we have a better implementation - // for multiple aggregates - // Only allow up to 5 aliases for now (because of CallFixedSize) - if (aliases.size() > 5) { - return std::nullopt; - } - // Get pointers to all aggregate expressions and their parents size_t numAggregates = 0; std::vector aliasesWithAggregateInfo; @@ -745,20 +736,13 @@ GroupBy::checkIfHashMapOptimizationPossible(std::vector& aliases) { if (!foundAggregates.has_value()) return std::nullopt; for (auto& aggregate : foundAggregates.value()) { - aggregate.hashMapIndex_ = numAggregates++; + aggregate.aggregateDataIndex = numAggregates++; } aliasesWithAggregateInfo.emplace_back(alias._expression, alias._outCol, foundAggregates.value()); } - // TODO remove this as soon as we have a better implementation - // for multiple aggregates - // Only allow up to 5 aggregates for now (because of CallFixedSize) - if (numAggregates > 5) { - return std::nullopt; - } - const Variable& groupByVariable = _groupByVariables.front(); auto child = _subtree->getRootOperation()->getChildren().at(0); auto columnIndex = child->getVariableColumn(groupByVariable); @@ -852,52 +836,41 @@ void GroupBy::extractValues( } // _____________________________________________________________________________ -template sparqlExpression::VectorWithMemoryLimit -GroupBy::extractValuesDirectlyFromMap( - size_t beginIndex, size_t endIndex, - const ad_utility::HashMapWithMemoryLimit>& - map, - IdTable* resultTable, size_t hashMapIndex, size_t outCol) { - // Store in a vector such that we can copy it to - // `_previousResultsFromSameGroup`. +GroupBy::getHashMapAggregationResults( + IdTable* resultTable, const HashMapAggregationData& aggregationData, + size_t dataIndex, size_t beginIndex, size_t endIndex) { sparqlExpression::VectorWithMemoryLimit aggregateResults( getExecutionContext()->getAllocator()); + aggregateResults.resize(endIndex - beginIndex); - // Store aggregate results in table and in vector. decltype(auto) groupValues = resultTable->getColumn(0); - for (size_t j = beginIndex; j < endIndex; j++) { - Id val = groupValues[j]; - auto& aggregateData = map.at(val).at(hashMapIndex); - auto aggregateResult = aggregateData.calculateResult(); - (*resultTable)(j, outCol) = aggregateResult; - aggregateResults.push_back(aggregateResult); - } + auto& aggregateDataVector = + aggregationData.getAggregationDataVector(dataIndex); + + auto op = [&aggregationData, &aggregateDataVector](Id val) { + auto index = aggregationData.getIndex(val); + auto& aggregateData = aggregateDataVector.at(index); + return aggregateData.calculateResult(); + }; + + std::ranges::transform(groupValues.begin() + beginIndex, + groupValues.begin() + endIndex, + aggregateResults.begin(), op); return aggregateResults; } // _____________________________________________________________________________ -template void GroupBy::substituteAllAggregates( - std::vector& info, - sparqlExpression::EvaluationContext& evaluationContext, - const ad_utility::HashMapWithMemoryLimit>& - map, + std::vector& info, size_t beginIndex, + size_t endIndex, const HashMapAggregationData& aggregationData, IdTable* resultTable) { // Substitute in the results of all aggregates of `info`. for (auto& aggregate : info) { - sparqlExpression::VectorWithMemoryLimit aggregateResults( - getExecutionContext()->getAllocator()); - - // Get all aggregate results as a vector - decltype(auto) groupValues = resultTable->getColumn(0); - for (size_t j = evaluationContext._beginIndex; - j < evaluationContext._endIndex; j++) { - Id val = groupValues[j]; - auto& aggregateData = map.at(val).at(aggregate.hashMapIndex_); - aggregateResults.push_back(aggregateData.calculateResult()); - } + auto aggregateResults = getHashMapAggregationResults( + resultTable, aggregationData, aggregate.aggregateDataIndex, beginIndex, + endIndex); // Substitute the resulting vector as a literal auto newExpression = std::make_unique( @@ -911,28 +884,48 @@ void GroupBy::substituteAllAggregates( } // _____________________________________________________________________________ -template -void GroupBy::createResultFromHashMap( - IdTable* result, - const ad_utility::HashMapWithMemoryLimit>& - map, - std::vector& aggregateAliases, - LocalVocab* localVocab) { - // Sort by groupBy column +std::vector GroupBy::HashMapAggregationData::getHashEntries( + std::span ids) { + std::vector hashEntries; + hashEntries.reserve(ids.size()); + + for (auto& val : ids) { + auto [iterator, wasAdded] = map_.try_emplace(val, getNumberOfGroups()); + + hashEntries.push_back(iterator->second); + } + + for (auto& aggregation : aggregationData_) + aggregation.resize(getNumberOfGroups()); + + return hashEntries; +} + +// _____________________________________________________________________________ +[[nodiscard]] std::vector +GroupBy::HashMapAggregationData::getSortedGroupColumn() const { std::vector sortedKeys; - for (const auto& val : map) { + sortedKeys.reserve(map_.size()); + // TODO: use ranges::to + for (const auto& val : map_) { sortedKeys.push_back(val.first); } std::ranges::sort(sortedKeys); + return sortedKeys; +} +// _____________________________________________________________________________ +void GroupBy::createResultFromHashMap( + IdTable* result, const HashMapAggregationData& aggregationData, + std::vector& aggregateAliases, + LocalVocab* localVocab) { // Create result table, filling in the group values, since they might be // required in evaluation - // TODO use views::enumerate - size_t rowIndex = 0; - for (const auto& val : sortedKeys) { - result->emplace_back(); - (*result)(rowIndex++, 0) = val; - } + auto sortedKeys = aggregationData.getSortedGroupColumn(); + size_t numberOfGroups = aggregationData.getNumberOfGroups(); + result->resize(numberOfGroups); + + std::ranges::copy(sortedKeys, result->getColumn(0).begin()); // Initialize evaluation context sparqlExpression::EvaluationContext evaluationContext( @@ -948,9 +941,9 @@ void GroupBy::createResultFromHashMap( size_t blockSize = 65536; - for (size_t i = 0; i < map.size(); i += blockSize) { + for (size_t i = 0; i < numberOfGroups; i += blockSize) { evaluationContext._beginIndex = i; - evaluationContext._endIndex = std::min(i + blockSize, map.size()); + evaluationContext._endIndex = std::min(i + blockSize, numberOfGroups); for (auto& alias : aggregateAliases) { auto& info = alias.aggregateInfo_; @@ -959,10 +952,15 @@ void GroupBy::createResultFromHashMap( if (info.size() == 1 && !info.at(0).parentAndIndex_.has_value()) { auto& aggregate = info.at(0); - // Store results in table - auto aggregateResults = extractValuesDirectlyFromMap( - evaluationContext._beginIndex, evaluationContext._endIndex, map, - result, aggregate.hashMapIndex_, alias.outCol_); + // Get aggregate results + auto aggregateResults = getHashMapAggregationResults( + result, aggregationData, aggregate.aggregateDataIndex, + evaluationContext._beginIndex, evaluationContext._endIndex); + + // Copy to result table + decltype(auto) outValues = result->getColumn(alias.outCol_); + std::ranges::copy(aggregateResults, + outValues.begin() + evaluationContext._beginIndex); // Copy the result so that future aliases may reuse it evaluationContext._previousResultsFromSameGroup.at(alias.outCol_) = @@ -972,7 +970,9 @@ void GroupBy::createResultFromHashMap( } else { // Substitute in the results of all aggregates contained in the // expression of the current alias, if `info` is non-empty. - substituteAllAggregates(info, evaluationContext, map, result); + substituteAllAggregates(info, evaluationContext._beginIndex, + evaluationContext._endIndex, aggregationData, + result); // Evaluate top-level alias expression sparqlExpression::ExpressionResult expressionResult = @@ -991,12 +991,13 @@ void GroupBy::createResultFromHashMap( } // _____________________________________________________________________________ -template void GroupBy::computeGroupByForHashMapOptimization( IdTable* result, std::vector& aggregateAliases, - const IdTable& subresult, size_t columnIndex, LocalVocab* localVocab) { - ad_utility::HashMapWithMemoryLimit> map( - getExecutionContext()->getAllocator()); + size_t numAggregates, const IdTable& subresult, size_t columnIndex, + LocalVocab* localVocab) { + // Initialize aggregation data + HashMapAggregationData aggregationData(getExecutionContext()->getAllocator(), + numAggregates); // Initialize evaluation context sparqlExpression::EvaluationContext evaluationContext( @@ -1016,25 +1017,11 @@ void GroupBy::computeGroupByForHashMapOptimization( auto currentBlockSize = evaluationContext._endIndex - evaluationContext._beginIndex; - // Perform HashMap lookup for all groups in current block - std::vector*> hashEntries; - - auto getId = [&subresult, &evaluationContext, &columnIndex](size_t j) { - return subresult(evaluationContext._beginIndex + j, columnIndex); - }; - - // Create elements in map - for (size_t j = 0; j < currentBlockSize; ++j) { - auto id = getId(j); - map.try_emplace(id); - } - - // Get pointers to values - // (valid for this iteration as no new elements added) - for (size_t j = 0; j < currentBlockSize; ++j) { - auto id = getId(j); - hashEntries.emplace_back(&(map.at(id))); - } + // Perform HashMap lookup once for all groups in current block + auto groupValues = + subresult.getColumn(columnIndex) + .subspan(evaluationContext._beginIndex, currentBlockSize); + auto hashEntries = aggregationData.getHashEntries(groupValues); // TODO use views::enumerate for (auto& aggregateAlias : aggregateAliases) { @@ -1044,8 +1031,12 @@ void GroupBy::computeGroupByForHashMapOptimization( sparqlExpression::ExpressionResult expressionResult = exprChildren[0]->evaluate(&evaluationContext); + auto& aggregationDataVector = aggregationData.getAggregationDataVector( + aggregate.aggregateDataIndex); + auto visitor = [¤tBlockSize, &evaluationContext, &hashEntries, - &aggregate]( + &aggregationDataVector]< + sparqlExpression::SingleExpressionResult T>( T&& singleResult) mutable { auto generator = sparqlExpression::detail::makeGenerator( std::forward(singleResult), currentBlockSize, @@ -1054,12 +1045,13 @@ void GroupBy::computeGroupByForHashMapOptimization( using NVG = sparqlExpression::detail::NumericValueGetter; auto hashEntryIndex = 0; + for (const auto& val : generator) { sparqlExpression::detail::NumericValue numVal = NVG()(val, &evaluationContext); - auto& aggregateData = - hashEntries[hashEntryIndex]->at(aggregate.hashMapIndex_); + auto vectorOffset = hashEntries[hashEntryIndex]; + auto& aggregateData = aggregationDataVector.at(vectorOffset); aggregateData.increment(numVal); @@ -1072,8 +1064,8 @@ void GroupBy::computeGroupByForHashMapOptimization( } } - createResultFromHashMap( - result, map, aggregateAliases, localVocab); + createResultFromHashMap(result, aggregationData, aggregateAliases, + localVocab); } // _____________________________________________________________________________ diff --git a/src/engine/GroupBy.h b/src/engine/GroupBy.h index 198544965d..0686f4ca9c 100644 --- a/src/engine/GroupBy.h +++ b/src/engine/GroupBy.h @@ -179,8 +179,7 @@ class GroupBy : public Operation { }; using KeyType = ValueId; - template - using ValueType = std::array; + using ValueType = size_t; // Stores information required for substitution of an expression in an // expression tree. @@ -202,16 +201,16 @@ class GroupBy : public Operation { sparqlExpression::SparqlExpression* expr_; // The index in the `std::array` of the Hash Map where results of this // aggregate are stored. - size_t hashMapIndex_; + size_t aggregateDataIndex; // The parent expression of this aggregate, and the index this expression // appears in the parents' children, so that it may be substituted away. std::optional parentAndIndex_ = std::nullopt; HashMapAggregateInformation( - sparqlExpression::SparqlExpression* expr, size_t hashMapIndex, + sparqlExpression::SparqlExpression* expr, size_t aggregateDataIndex, std::optional parentAndIndex = std::nullopt) : expr_{expr}, - hashMapIndex_{hashMapIndex}, + aggregateDataIndex{aggregateDataIndex}, parentAndIndex_{parentAndIndex} { AD_CONTRACT_CHECK(expr != nullptr); } @@ -240,17 +239,66 @@ class GroupBy : public Operation { // Create result IdTable by using a HashMap mapping groups to aggregation data // and subsequently calling `createResultFromHashMap`. - template void computeGroupByForHashMapOptimization( IdTable* result, std::vector& aggregateAliases, - const IdTable& subresult, size_t columnIndex, LocalVocab* localVocab); + size_t numAggregates, const IdTable& subresult, size_t columnIndex, + LocalVocab* localVocab); + + // Stores the map which associates Ids with vector offsets and + // the vectors containing the aggregation data. + class HashMapAggregationData { + public: + HashMapAggregationData(ad_utility::AllocatorWithLimit alloc, + size_t numAggregates) + : map_{ad_utility::HashMapWithMemoryLimit(alloc)}, + aggregationData_{ + std::vector>(numAggregates)} { + AD_CONTRACT_CHECK(numAggregates > 0); + } + + // Returns a vector containing the offsets for all ids of `ids`, + // inserting entries if necessary. + std::vector getHashEntries(std::span ids); + + // Return the index of `id`. + [[nodiscard]] size_t getIndex(Id id) const { return map_.at(id); } + + // Get vector containing the aggregation data at `aggregationDataIndex`. + std::vector& getAggregationDataVector( + size_t aggregationDataIndex) { + return aggregationData_.at(aggregationDataIndex); + } + + // Get vector containing the aggregation data at `aggregationDataIndex`, + // but const. + [[nodiscard]] const std::vector& + getAggregationDataVector(size_t aggregationDataIndex) const { + return aggregationData_.at(aggregationDataIndex); + } + + // Get the values of the grouped column in ascending order. + [[nodiscard]] std::vector getSortedGroupColumn() const; + + // Returns the number of groups. + [[nodiscard]] size_t getNumberOfGroups() const { return map_.size(); } + + private: + // Maps `Id` to vector offsets. + ad_utility::HashMapWithMemoryLimit map_; + // Stores the actual aggregation data. + std::vector> aggregationData_; + }; + + // Returns the aggregation results between `beginIndex` and `endIndex` + // of the aggregates stored at `dataIndex`, + // based on the groups stored in the first column of `resultTable` + sparqlExpression::VectorWithMemoryLimit getHashMapAggregationResults( + IdTable* resultTable, const HashMapAggregationData& aggregationData, + size_t dataIndex, size_t beginIndex, size_t endIndex); // Sort the HashMap by key and create result table. - template void createResultFromHashMap( - IdTable* result, - const ad_utility::HashMapWithMemoryLimit>& map, + IdTable* result, const HashMapAggregationData& aggregationData, std::vector& aggregateAliases, LocalVocab* localVocab); @@ -259,7 +307,6 @@ class GroupBy : public Operation { // - Runtime parameter is set // - Child operation is SORT // - All aggregates are AVG - // - Maximum 5 aliases and 5 aggregates // - Only one grouped variable std::optional checkIfHashMapOptimizationPossible( std::vector& aggregates); @@ -272,25 +319,12 @@ class GroupBy : public Operation { sparqlExpression::EvaluationContext& evaluationContext, IdTable* resultTable, LocalVocab* localVocab, size_t outCol); - // In case of aliases that contain an aggregate at the top-level of the - // expression tree, e.g. "AVG(?y) as ?avg", we can directly store the values - // calculated in our HashMap in the result table. - template - sparqlExpression::VectorWithMemoryLimit extractValuesDirectlyFromMap( - size_t beginIndex, size_t endIndex, - const ad_utility::HashMapWithMemoryLimit>& map, - IdTable* resultTable, size_t hashMapIndex, size_t outCol); - // Substitute the results for all aggregates in `info`. The values of the // grouped variable should be at column 0 in `groupValues`. - template - void substituteAllAggregates( - std::vector& info, - sparqlExpression::EvaluationContext& evaluationContext, - const ad_utility::HashMapWithMemoryLimit>& map, - IdTable* resultTable); + void substituteAllAggregates(std::vector& info, + size_t beginIndex, size_t endIndex, + const HashMapAggregationData& aggregationData, + IdTable* resultTable); // Check if an expression is of a certain type. template diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index 9b34acaa0d..07410cdac0 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -555,7 +555,7 @@ TEST_F(GroupByOptimizations, checkIfHashMapOptimizationPossible) { ASSERT_EQ(aggregateAlias.expr_.getPimpl(), avgXPimpl.getPimpl()); // Check aggregate info is correct auto aggregateInfo = aggregateAlias.aggregateInfo_[0]; - ASSERT_EQ(aggregateInfo.hashMapIndex_, 0); + ASSERT_EQ(aggregateInfo.aggregateDataIndex, 0); ASSERT_FALSE(aggregateInfo.parentAndIndex_.has_value()); ASSERT_EQ(aggregateInfo.expr_, avgXPimpl.getPimpl()); } @@ -603,7 +603,9 @@ TEST_F(GroupByOptimizations, correctResultForHashMapOptimizationNonTrivial) { /* Setup query: SELECT ?x (AVG(?y) as ?avg) (?avg + ((2 * AVG(?y)) * AVG(4 * ?y)) as ?complexAvg) - (5.0 as ?const) WHERE { + (5.0 as ?const) (42.0 as ?const2) (13.37 as ?const3) + (?const + ?const2 + ?const3 + AVG(?y) + AVG(?y) + AVG(?y) as ?sth) + WHERE { ?z ?x . ?z ?y } GROUP BY ?x @@ -619,10 +621,11 @@ TEST_F(GroupByOptimizations, correctResultForHashMapOptimizationNonTrivial) { std::vector sortedColumns = {1}; Tree sortedJoin = makeExecutionTree(qec, join, sortedColumns); + // (AVG(?y) as ?avg) Variable varAvg{"?avg"}; - SparqlExpressionPimpl avgYPimpl = makeAvgPimpl(varY); + // (?avg + ((2 * AVG(?y)) * AVG(4 * ?y)) as ?complexAvg) auto fourTimesYExpr = makeMultiplyExpression(makeLiteralDoubleExpr(4.0), makeVariableExpression(varY)); auto avgFourTimesYExpr = @@ -640,13 +643,43 @@ TEST_F(GroupByOptimizations, correctResultForHashMapOptimizationNonTrivial) { std::move(avgY_plus_twoTimesAvgY_times_avgFourTimesYExpr), "(?avg + ((2 * AVG(?y)) * AVG(4 * ?y)) as ?complexAvg)"); + // (5.0 as ?const) (42.0 as ?const2) (13.37 as ?const3) + Variable varConst = Variable{"?const"}; SparqlExpressionPimpl constantFive = makeLiteralDoublePimpl(5.0); + Variable varConst2 = Variable{"?const2"}; + SparqlExpressionPimpl constantFortyTwo = makeLiteralDoublePimpl(42.0); + Variable varConst3 = Variable{"?const3"}; + SparqlExpressionPimpl constantLeet = makeLiteralDoublePimpl(13.37); + + // (?const + ?const2 + ?const3 + AVG(?y) + AVG(?y) + AVG(?y) as ?sth) + auto constPlusConst2 = makeAddExpression(makeVariableExpression(varConst), + makeVariableExpression(varConst2)); + auto constPlusConst2PlusConst3 = makeAddExpression( + std::move(constPlusConst2), makeVariableExpression(varConst3)); + auto avgY1 = + std::make_unique(false, makeVariableExpression(varY)); + auto constPusConst2PlusConst3PlusAvgY = + makeAddExpression(std::move(constPlusConst2PlusConst3), std::move(avgY1)); + auto avgY2 = + std::make_unique(false, makeVariableExpression(varY)); + auto constPlusConst2PlusConst3PlusAvgYPlusAvgY = makeAddExpression( + std::move(constPusConst2PlusConst3PlusAvgY), std::move(avgY2)); + auto avgY3 = + std::make_unique(false, makeVariableExpression(varY)); + auto constPlusEtc = makeAddExpression( + std::move(constPlusConst2PlusConst3PlusAvgYPlusAvgY), std::move(avgY3)); + SparqlExpressionPimpl constPlusEtcPimpl( + std::move(constPlusEtc), + "?const + ?const2 + ?const3 + AVG(?y) + AVG(?y) + AVG(?y)"); std::vector aliasesAvgY{ Alias{avgYPimpl, varAvg}, Alias{avgY_plus_twoTimesAvgY_times_avgFourTimesYPimpl, Variable{"?complexAvg"}}, - Alias{constantFive, Variable{"?const"}}}; + Alias{constantFive, varConst}, + Alias{constantFortyTwo, varConst2}, + Alias{constantLeet, varConst3}, + Alias{constPlusEtcPimpl, Variable{"?sth"}}}; // Clear cache, calculate result without optimization RuntimeParameters().set<"use-group-by-hash-map-optimization">(false); @@ -701,6 +734,7 @@ TEST_F(GroupByOptimizations, checkIfJoinWithFullScan) { ASSERT_EQ(optimizedAggregateData->subtreeColumnIndex_, 0); } +// _____________________________________________________________________________ TEST_F(GroupByOptimizations, computeGroupByForJoinWithFullScan) { { // One of the invalid cases from the previous test. @@ -728,8 +762,8 @@ TEST_F(GroupByOptimizations, computeGroupByForJoinWithFullScan) { source_location l = source_location::current()) { auto trace = generateLocationTrace(l); - // Set up a `VALUES` clause with three values for `?x`, two of which (`` - // and ``) actually appear in the test knowledge graph. + // Set up a `VALUES` clause with three values for `?x`, two of which + // (`` and ``) actually appear in the test knowledge graph. parsedQuery::SparqlValues sparqlValues; sparqlValues._variables.push_back(varX); sparqlValues._values.emplace_back(std::vector{TripleComponent{""}}); @@ -779,6 +813,7 @@ TEST_F(GroupByOptimizations, computeGroupByForJoinWithFullScan) { } } +// _____________________________________________________________________________ TEST_F(GroupByOptimizations, computeGroupByForSingleIndexScan) { // Assert that a GROUP BY, that is constructed from the given arguments, // can not perform the `OptimizedAggregateOnIndexScanChild` optimization. @@ -845,6 +880,7 @@ TEST_F(GroupByOptimizations, computeGroupByForSingleIndexScan) { } } +// _____________________________________________________________________________ TEST_F(GroupByOptimizations, computeGroupByForFullIndexScan) { // Assert that a GROUP BY which is constructed from the given arguments // can not perform the `GroupByForSingleIndexScan2` optimization. @@ -923,6 +959,7 @@ auto make = [](auto&&... args) -> SparqlExpression::Ptr { return std::make_unique(AD_FWD(args)...); }; } // namespace +// _____________________________________________________________________________ TEST(GroupBy, GroupedVariableInExpressions) { parsedQuery::SparqlValues input; using TC = TripleComponent; @@ -934,8 +971,8 @@ TEST(GroupBy, GroupedVariableInExpressions) { // // Note: The values are chosen such that the results are all integers. // Otherwise we would get into trouble with floating point comparisons. A - // check with a similar query but with non-integral inputs and results can be - // found in the E2E tests. + // check with a similar query but with non-integral inputs and results can + // be found in the E2E tests. Variable varA = Variable{"?a"}; Variable varB = Variable{"?b"}; @@ -985,6 +1022,7 @@ TEST(GroupBy, GroupedVariableInExpressions) { EXPECT_EQ(table, expected); } +// _____________________________________________________________________________ TEST(GroupBy, AliasResultReused) { parsedQuery::SparqlValues input; using TC = TripleComponent; @@ -996,8 +1034,8 @@ TEST(GroupBy, AliasResultReused) { // // Note: The values are chosen such that the results are all integers. // Otherwise we would get into trouble with floating point comparisons. A - // check with a similar query but with non-integral inputs and results can be - // found in the E2E tests. + // check with a similar query but with non-integral inputs and results can + // be found in the E2E tests. Variable varA = Variable{"?a"}; Variable varB = Variable{"?b"}; @@ -1049,9 +1087,10 @@ TEST(GroupBy, AliasResultReused) { } // namespace -// Expressions in HAVING clauses are converted to special internal aliases. Test -// the combination of parsing and evaluating such queries. +// _____________________________________________________________________________ TEST(GroupBy, AddedHavingRows) { + // Expressions in HAVING clauses are converted to special internal aliases. + // Test the combination of parsing and evaluating such queries. auto query = "SELECT ?x (COUNT(?y) as ?count) WHERE {" " VALUES (?x ?y) {(0 1) (0 3) (0 5) (1 4) (1 3) } }" From a363d10c6da78cb9da0d6e45899d579f0379644e Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Tue, 12 Dec 2023 23:46:13 +0100 Subject: [PATCH 55/63] Push to Docker Hub as `adfreiburg/qlever` (#1192) So far, the Docker images were pushed to Docker Hub as `/qlever`, to make it easier to check whether everything works as it should. Everything seems to work as it should and so now we push as the official `adfreiburg/qlever`. --- .github/workflows/docker-publish.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index e15bb83f57..2a1da90645 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -58,7 +58,7 @@ jobs: # We have to explicitly add the "qlever:latest" tag for it to work correctly, # see e.g. https://stackoverflow.com/questions/27643017/do-i-need-to-manually-tag-latest-when-pushing-to-docker-public-repository tags: > - ${{ secrets.DOCKERHUB_USERNAME }}/qlever:${{ github.ref_name == 'master' && format('pr-{0}', steps.pr.outputs.pr_num) || github.ref_name }}, - ${{ secrets.DOCKERHUB_USERNAME }}/qlever:commit-${{ steps.sha.outputs.sha_short }}, - ${{ secrets.DOCKERHUB_USERNAME }}/qlever:latest + adfreiburg/qlever:${{ github.ref_name == 'master' && format('pr-{0}', steps.pr.outputs.pr_num) || github.ref_name }}, + adfreiburg/qlever:commit-${{ steps.sha.outputs.sha_short }}, + adfreiburg/qlever:latest From d852aa439100e4bd07dc8649119d8e5606d08e43 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Wed, 13 Dec 2023 13:57:55 +0100 Subject: [PATCH 56/63] Make sure that no exceptions get lost in the parallel Turtle parser (#1187) So far, the parallel Turtle parser had the problem that certain exceptions disappeared. After #1124, two exceptions happened more often, namely that the `FILE_BUFFER_SIZE` was not large enough (it was reduced from 100M to 10M), and an overflow of the sort key buffer (its preallocation was increased significantly to save time). So far, both exceptions had the effect that the parsing hung forever, either because batches were still being produced but not processed, or batches were still being processed but not produced. All exceptions are now caught and appear in the log. The potential overflow of the sort key buffer is now fixed. The `FILE_BUFFER_SIZE` can still be too small (and it happens for `UniProt` and `OSM Planet`), but now the exception shows in the log and ends the index build. --- src/index/StringSortComparator.h | 4 +- src/parser/TurtleParser.cpp | 157 ++++++++++++++++++++++--------- src/parser/TurtleParser.h | 71 +++++++++----- src/util/TaskQueue.h | 118 ++++++++--------------- test/TaskQueueTest.cpp | 22 +---- test/TimerTest.cpp | 13 ++- test/TurtleParserTest.cpp | 94 ++++++++++++++++-- 7 files changed, 303 insertions(+), 176 deletions(-) diff --git a/src/index/StringSortComparator.h b/src/index/StringSortComparator.h index 1c7f146cbe..7fadbe0d98 100644 --- a/src/index/StringSortComparator.h +++ b/src/index/StringSortComparator.h @@ -188,10 +188,10 @@ class LocaleManager { std::vector sortKeyBuffer; // The actual computation of the sort key is very expensive, so we first // allocate a buffer that is typically large enough to store the sort key. - sortKeyBuffer.resize(50 * s.size()); + static constexpr size_t maxBufferSize = std::numeric_limits::max(); + sortKeyBuffer.resize(std::min(50 * s.size(), maxBufferSize)); static_assert(sizeof(uint8_t) == sizeof(std::string::value_type)); static constexpr auto intMax = std::numeric_limits::max(); - AD_CORRECTNESS_CHECK(sortKeyBuffer.size() <= static_cast(intMax)); auto sz = col.getSortKey(utf16, sortKeyBuffer.data(), static_cast(sortKeyBuffer.size())); AD_CONTRACT_CHECK(sz >= 0); diff --git a/src/parser/TurtleParser.cpp b/src/parser/TurtleParser.cpp index 72d95cc7dc..a6f412a4b4 100644 --- a/src/parser/TurtleParser.cpp +++ b/src/parser/TurtleParser.cpp @@ -3,12 +3,15 @@ // Author: Johannes Kalmbach(joka921) // -#include -#include -#include +#include "parser/TurtleParser.h" #include +#include "parser/RdfEscaping.h" +#include "util/Conversions.h" +#include "util/OnDestructionDontThrowDuringStackUnwinding.h" + +using namespace std::chrono_literals; // _______________________________________________________________ template bool TurtleParser::statement() { @@ -817,68 +820,119 @@ bool TurtleStreamParser::getLine(TurtleTriple* triple) { return true; } +// We will use the following trick: For a batch that is forwarded to the +// parallel parser, we will first increment `numBatchesTotal_` and then call +// the following lambda after the batch has completely been parsed and the +// result pushed to the `tripleCollector_`. We thus get the invariant that +// `batchIdx_ +// == numBatchesTotal_` iff all batches that have been inserted to the +// `parallelParser_` have been fully processed. After the last batch we will +// push another call to this lambda to the `parallelParser_` which will then +// finish the `tripleCollector_` as soon as all batches have been computed. template -void TurtleParallelParser::initialize(const string& filename) { - fileBuffer_.open(filename); - if (auto batch = fileBuffer_.getNextBlock(); !batch) { - LOG(WARN) << "Empty input to the TURTLE parser, is this what you intended?" - << std::endl; - batch.emplace(); - } else { - TurtleStringParser declarationParser{}; - declarationParser.setInputStream(std::move(*batch)); - while (declarationParser.parseDirectiveManually()) { - } - this->prefixMap_ = std::move(declarationParser.getPrefixMap()); - auto remainder = declarationParser.getUnparsedRemainder(); - remainingBatchFromInitialization_.clear(); - remainingBatchFromInitialization_.reserve(remainder.size()); - std::copy(remainder.begin(), remainder.end(), - std::back_inserter(remainingBatchFromInitialization_)); +void TurtleParallelParser::finishTripleCollectorIfLastBatch() { + if (batchIdx_.fetch_add(1) == numBatchesTotal_) { + tripleCollector_.finish(); } +} + +// __________________________________________________________________________________ +template +void TurtleParallelParser::parseBatch(size_t parsePosition, + auto batch) { + try { + TurtleStringParser parser; + parser.prefixMap_ = this->prefixMap_; + parser.setPositionOffset(parsePosition); + parser.setInputStream(std::move(batch)); + // TODO: raise error message if a prefix parsing fails; + std::vector triples = parser.parseAndReturnAllTriples(); + + tripleCollector_.push([triples = std::move(triples), this]() mutable { + triples_ = std::move(triples); + }); + finishTripleCollectorIfLastBatch(); + } catch (std::exception& e) { + tripleCollector_.pushException(std::current_exception()); + parallelParser_.finish(); + } +}; - // This lambda fetches all the unparsed blocks of triples from the input - // file and feeds them to the parallel parsers. - auto feedBatches = [&, first = true, parsePosition = 0ull]() mutable { - decltype(remainingBatchFromInitialization_) inputBatch; +// _______________________________________________________________________ +template +void TurtleParallelParser::feedBatchesToParser( + auto remainingBatchFromInitialization) { + bool first = true; + size_t parsePosition = 0; + auto cleanup = + ad_utility::makeOnDestructionDontThrowDuringStackUnwinding([this] { + // Wait until everything has been parsed and then also finish the + // triple collector. + parallelParser_.push([this] { finishTripleCollectorIfLastBatch(); }); + parallelParser_.finish(); + }); + decltype(remainingBatchFromInitialization) inputBatch; + try { while (true) { if (first) { - inputBatch = std::move(remainingBatchFromInitialization_); + inputBatch = std::move(remainingBatchFromInitialization); first = false; } else { auto nextOptional = fileBuffer_.getNextBlock(); if (!nextOptional) { - // Wait until everything has been parsed. - parallelParser_.finish(); - // Wait until all the parsed triples have been picked up. - tripleCollector_.finish(); return; } inputBatch = std::move(nextOptional.value()); } auto batchSize = inputBatch.size(); - auto parseBatch = [this, parsePosition, - batch = std::move(inputBatch)]() mutable { - TurtleStringParser parser; - parser.prefixMap_ = this->prefixMap_; - parser.setPositionOffset(parsePosition); - parser.setInputStream(std::move(batch)); - // TODO: raise error message if a prefix parsing fails; - // TODO: handle exceptions in threads; - std::vector triples = parser.parseAndReturnAllTriples(); - - tripleCollector_.push([triples = std::move(triples), this]() mutable { - triples_ = std::move(triples); - }); + auto parseThisBatch = [this, parsePosition, + batch = std::move(inputBatch)]() mutable { + return parseBatch(parsePosition, std::move(batch)); }; parsePosition += batchSize; - parallelParser_.push(parseBatch); + numBatchesTotal_.fetch_add(1); + if (sleepTimeForTesting_ > 0ms) { + std::this_thread::sleep_for(sleepTimeForTesting_); + } + bool stillActive = parallelParser_.push(parseThisBatch); + if (!stillActive) { + return; + } } + } catch (std::exception& e) { + tripleCollector_.pushException(std::current_exception()); + } +}; + +// _______________________________________________________________________ +template +void TurtleParallelParser::initialize(const string& filename) { + ParallelBuffer::BufferType remainingBatchFromInitialization; + fileBuffer_.open(filename); + if (auto batch = fileBuffer_.getNextBlock(); !batch) { + LOG(WARN) << "Empty input to the TURTLE parser, is this what you intended?" + << std::endl; + } else { + TurtleStringParser declarationParser{}; + declarationParser.setInputStream(std::move(batch.value())); + while (declarationParser.parseDirectiveManually()) { + } + this->prefixMap_ = std::move(declarationParser.getPrefixMap()); + auto remainder = declarationParser.getUnparsedRemainder(); + remainingBatchFromInitialization.reserve(remainder.size()); + std::ranges::copy(remainder, + std::back_inserter(remainingBatchFromInitialization)); + } + + auto feedBatches = [this, firstBatch = std::move( + remainingBatchFromInitialization)]() mutable { + return feedBatchesToParser(std::move(firstBatch)); }; parseFuture_ = std::async(std::launch::async, feedBatches); } +// _______________________________________________________________________ template bool TurtleParallelParser::getLine(TurtleTriple* triple) { // If the current batch is out of triples_ get the next batch of triples. @@ -886,7 +940,7 @@ bool TurtleParallelParser::getLine(TurtleTriple* triple) { // contains no triples. (Theoretically this might happen, and it is safer this // way) while (triples_.empty()) { - auto optionalTripleTask = tripleCollector_.popManually(); + auto optionalTripleTask = tripleCollector_.pop(); if (!optionalTripleTask) { // Everything has been parsed return false; @@ -901,12 +955,13 @@ bool TurtleParallelParser::getLine(TurtleTriple* triple) { return true; } +// _______________________________________________________________________ template std::optional> TurtleParallelParser::getBatch() { // we need a while in case there is a batch that contains no triples // (this should be rare, // TODO warn about this while (triples_.empty()) { - auto optionalTripleTask = tripleCollector_.popManually(); + auto optionalTripleTask = tripleCollector_.pop(); if (!optionalTripleTask) { // everything has been parsed return std::nullopt; @@ -917,6 +972,18 @@ std::optional> TurtleParallelParser::getBatch() { return std::move(triples_); } +// __________________________________________________________ +template +TurtleParallelParser::~TurtleParallelParser() { + ad_utility::ignoreExceptionIfThrows( + [this] { + parallelParser_.finish(); + tripleCollector_.finish(); + parseFuture_.get(); + }, + "During the destruction of a TurtleParallelParser"); +} + // Explicit instantiations template class TurtleParser; template class TurtleParser; diff --git a/src/parser/TurtleParser.h b/src/parser/TurtleParser.h index 220955c504..dd6bb2a55e 100644 --- a/src/parser/TurtleParser.h +++ b/src/parser/TurtleParser.h @@ -4,29 +4,30 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include #include #include #include #include +#include "absl/strings/str_cat.h" +#include "global/Constants.h" +#include "gtest/gtest.h" +#include "index/ConstantsIndexBuilding.h" +#include "parser/ParallelBuffer.h" +#include "parser/Tokenizer.h" +#include "parser/TokenizerCtre.h" +#include "parser/TripleComponent.h" +#include "parser/data/BlankNode.h" +#include "sys/mman.h" +#include "util/Exception.h" +#include "util/ExceptionHandling.h" +#include "util/File.h" +#include "util/HashMap.h" +#include "util/Log.h" #include "util/ParseException.h" +#include "util/TaskQueue.h" +#include "util/ThreadSafeQueue.h" using std::string; @@ -564,7 +565,14 @@ class TurtleParallelParser : public TurtleParser { using Triple = std::array; // Default construction needed for tests TurtleParallelParser() = default; - explicit TurtleParallelParser(const string& filename) { + + // If the `sleepTimeForTesting` is set, then after the initialization the + // parser will sleep for the specified time before parsing each batch s.t. + // certain corner cases can be tested. + explicit TurtleParallelParser(const string& filename, + std::chrono::milliseconds sleepTimeForTesting = + std::chrono::milliseconds{0}) + : sleepTimeForTesting_(sleepTimeForTesting) { LOG(DEBUG) << "Initialize parallel Turtle Parsing from uncompressed file or " "stream " @@ -582,8 +590,6 @@ class TurtleParallelParser : public TurtleParser { void printAndResetQueueStatistics() override { LOG(TIMING) << parallelParser_.getTimeStatistics() << '\n'; parallelParser_.resetTimers(); - LOG(TIMING) << tripleCollector_.getTimeStatistics() << '\n'; - tripleCollector_.resetTimers(); } void initialize(const string& filename); @@ -593,7 +599,23 @@ class TurtleParallelParser : public TurtleParser { return 0; } + // The destructor has to clean up all the parallel structures that might be + // still running in the background, especially when it is called before the + // parsing has finished (e.g. in case of an exception in the code that uses + // the parser). + ~TurtleParallelParser() override; + private: + // The documentation for this is in the `.cpp` file, because it closely + // interacts with the functions next to it. + void finishTripleCollectorIfLastBatch(); + // Parse the single `batch` and push the result to the `triplesCollector_`. + void parseBatch(size_t parsePosition, auto batch); + // Read all the batches from the file and feed them to the parallel parser + // threads. The argument is the first batch which might have been leftover + // from the initialization phase where the prefixes are parsed. + void feedBatchesToParser(auto remainingBatchFromInitialization); + using TurtleParser::tok_; using TurtleParser::triples_; using TurtleParser::isParserExhausted_; @@ -604,12 +626,17 @@ class TurtleParallelParser : public TurtleParser { ParallelBufferWithEndRegex fileBuffer_{bufferSize_, "\\.[\\t ]*([\\r\\n]+)"}; - ad_utility::TaskQueue tripleCollector_{ - QUEUE_SIZE_AFTER_PARALLEL_PARSING, 0, "triple collector"}; + ad_utility::data_structures::ThreadSafeQueue> + tripleCollector_{QUEUE_SIZE_AFTER_PARALLEL_PARSING}; ad_utility::TaskQueue parallelParser_{ QUEUE_SIZE_BEFORE_PARALLEL_PARSING, NUM_PARALLEL_PARSER_THREADS, "parallel parser"}; std::future parseFuture_; + // The parallel parsers need to know when the last batch has been parsed, s.t. + // the parser threads can be destroyed. The following two members are needed + // for keeping track of this condition. + std::atomic batchIdx_ = 0; + std::atomic numBatchesTotal_ = 0; - ParallelBuffer::BufferType remainingBatchFromInitialization_; + std::chrono::milliseconds sleepTimeForTesting_; }; diff --git a/src/util/TaskQueue.h b/src/util/TaskQueue.h index 13013de01f..fd37cfa4cf 100644 --- a/src/util/TaskQueue.h +++ b/src/util/TaskQueue.h @@ -4,15 +4,18 @@ #ifndef QLEVER_TASKQUEUE_H #define QLEVER_TASKQUEUE_H +#include + #include #include #include #include #include -#include "./Exception.h" -#include "./Timer.h" -#include "./jthread.h" +#include "util/Exception.h" +#include "util/ThreadSafeQueue.h" +#include "util/Timer.h" +#include "util/jthread.h" namespace ad_utility { /** @@ -29,19 +32,15 @@ class TaskQueue { using Task = std::function; using Timer = ad_utility::Timer; using AtomicMs = std::atomic; + using Queue = ad_utility::data_structures::ThreadSafeQueue; - std::vector _threads; - std::queue _queuedTasks; - size_t _queueMaxSize = 1; - // CV to notify that a new task has been added to the queue - std::condition_variable _newTaskWasPushed; - // CV to notify that a task was finished by a thread. - std::condition_variable _workerHasFinishedTask; - std::mutex _queueMutex; - std::atomic _shutdownQueue = false; - std::string _name; + std::atomic isFinished_ = false; + size_t queueMaxSize_ = 1; + Queue queuedTasks_{queueMaxSize_}; + std::vector threads_; + std::string name_; // Keep track of the time spent waiting in the push/pop operation - AtomicMs _pushTime = 0, _popTime = 0; + AtomicMs pushTime_ = 0, popTime_ = 0; public: /// Construct from the maximum size of the queue, and the number of worker @@ -60,69 +59,43 @@ class TaskQueue { /// workers are at least as fast as the "pusher", but the pusher is faster /// sometimes (which the queue can then accomodate). TaskQueue(size_t maxQueueSize, size_t numThreads, std::string name = "") - : _queueMaxSize{maxQueueSize}, _name{std::move(name)} { - AD_CONTRACT_CHECK(_queueMaxSize > 0); - _threads.reserve(numThreads); + : queueMaxSize_{maxQueueSize}, name_{std::move(name)} { + AD_CONTRACT_CHECK(queueMaxSize_ > 0); + threads_.reserve(numThreads); for (size_t i = 0; i < numThreads; ++i) { - _threads.emplace_back(&TaskQueue::function_for_thread, this); + threads_.emplace_back(&TaskQueue::function_for_thread, this); } } /// Add a task to the queue for Execution. Blocks until there is at least /// one free spot in the queue. - void push(Task t) { + /// Note: If the execution of the task throws, `std::terminate` will be + /// called. + bool push(Task t) { // the actual logic - auto action = [&, this] { - std::unique_lock l{_queueMutex}; - _workerHasFinishedTask.wait( - l, [&] { return _queuedTasks.size() < _queueMaxSize; }); - _queuedTasks.push(std::move(t)); - _newTaskWasPushed.notify_one(); - }; - - // If TrackTimes==true, measure the time and add it to _pushTime, + auto action = [&, this] { return queuedTasks_.push(std::move(t)); }; + + // If TrackTimes==true, measure the time and add it to pushTime_, // else only perform the pushing. - executeAndUpdateTimer(action, _pushTime); + return executeAndUpdateTimer(action, pushTime_); } // Blocks until all tasks have been computed. After a call to finish, no more // calls to push are allowed. void finish() { - std::unique_lock l{_queueMutex}; - - // empty queue and _shutdownQueue set is the way of signalling the - // destruction to the threads; - _shutdownQueue = true; - // Wait not only until the queue is empty, but also until the tasks are - // actually performed and the threads have joined. - l.unlock(); - _newTaskWasPushed.notify_all(); - for (auto& thread : _threads) { - if (thread.joinable()) { - thread.join(); - } + if (isFinished_.exchange(true)) { + // There was a previous call to `finish()` , so we don't need to do + // anything. The atomic exchange is required to not have a data race on + // the `threads_` variable. + return; } - } - - std::optional popManually() { - auto action = [&, this]() -> std::optional { - std::unique_lock l{_queueMutex}; - _newTaskWasPushed.wait( - l, [&] { return !_queuedTasks.empty() || _shutdownQueue; }); - if (_shutdownQueue && _queuedTasks.empty()) { - return std::nullopt; - } - auto task = std::move(_queuedTasks.front()); - _queuedTasks.pop(); - _workerHasFinishedTask.notify_one(); - return task; - }; - return executeAndUpdateTimer(action, _popTime); + queuedTasks_.finish(); + threads_.clear(); } void resetTimers() requires TrackTimes { - _pushTime = 0; - _popTime = 0; + pushTime_ = 0; + popTime_ = 0; } // Execute the callable f of type F. If TrackTimes==true, add the passed time @@ -132,13 +105,9 @@ class TaskQueue { template decltype(auto) executeAndUpdateTimer(F&& f, AtomicMs& duration) { if constexpr (TrackTimes) { - struct T { - ad_utility::Timer _t{ad_utility::Timer::Started}; - AtomicMs& _target; - T(AtomicMs& target) : _target(target) {} - ~T() { _target += _t.msecs().count(); } - }; - T timeHandler{duration}; + ad_utility::Timer t{ad_utility::Timer::Started}; + auto cleanup = + absl::Cleanup{[&duration, &t] { duration += t.msecs().count(); }}; return f(); } else { return f(); @@ -147,9 +116,9 @@ class TaskQueue { // __________________________________________________________________________ std::string getTimeStatistics() const requires TrackTimes { - return "Time spent waiting in queue " + _name + ": " + - std::to_string(_pushTime) + "ms (push), " + - std::to_string(_popTime) + "ms (pop)"; + return "Time spent waiting in queue " + name_ + ": " + + std::to_string(pushTime_) + "ms (push), " + + std::to_string(popTime_) + "ms (pop)"; } ~TaskQueue() { finish(); } @@ -157,13 +126,8 @@ class TaskQueue { private: // _________________________________________________________________________ void function_for_thread() { - while (true) { - auto optionalTask = popManually(); - if (!optionalTask) { - return; - } - // perform the task without actually holding the lock. - (*optionalTask)(); + while (auto task = queuedTasks_.pop()) { + task.value()(); } } }; diff --git a/test/TaskQueueTest.cpp b/test/TaskQueueTest.cpp index 8a579eb6ce..2c9a4f6d8c 100644 --- a/test/TaskQueueTest.cpp +++ b/test/TaskQueueTest.cpp @@ -46,24 +46,6 @@ TEST(TaskQueue, SimpleSumWithWait) { ASSERT_EQ(result, 500500); } -TEST(TaskQueue, SimpleSumWithManualPop) { - std::atomic result; - // 0 threads, so we have to pickUp the threads manually from another thread. - ad_utility::TaskQueue q{10, 0}; - auto future = std::async(std::launch::async, [&] { - for (size_t i = 0; i <= 1000; ++i) { - q.push([&result, i] { result += i; }); - } - q.finish(); - }); - while (true) { - auto taskAsOptional = q.popManually(); - if (!taskAsOptional) { - break; - } - // execute the task; - (*taskAsOptional)(); - } - future.get(); - ASSERT_EQ(result, 500500); +TEST(TaskQueue, ThrowOnMaxQueueSizeZero) { + EXPECT_ANY_THROW((ad_utility::TaskQueue{0, 5})); } diff --git a/test/TimerTest.cpp b/test/TimerTest.cpp index a92e53d00e..4a0f462ffd 100644 --- a/test/TimerTest.cpp +++ b/test/TimerTest.cpp @@ -13,7 +13,7 @@ using ad_utility::Timer; using namespace std::chrono_literals; // On macOS the timer seems to work, but the `sleep_for` is too imprecise. -#ifndef __APPLE__ +// That's why we have deactivated all the tests via `GTEST_SKIP` on macOS. void testTime(Timer::Duration duration, std::chrono::milliseconds msecs, std::chrono::milliseconds expected) { @@ -32,6 +32,9 @@ void testTime(const ad_utility::Timer& timer, } TEST(Timer, BasicWorkflow) { +#ifdef __APPLE__ + GTEST_SKIP_("sleep_for is unreliable for macos builds"); +#endif Timer t{Timer::Started}; ASSERT_TRUE(t.isRunning()); std::this_thread::sleep_for(10ms); @@ -82,6 +85,9 @@ TEST(Timer, BasicWorkflow) { } TEST(Timer, InitiallyStopped) { +#ifdef __APPLE__ + GTEST_SKIP_("sleep_for is unreliable for macos builds"); +#endif Timer t{Timer::Stopped}; ASSERT_FALSE(t.isRunning()); ASSERT_EQ(t.value(), Timer::Duration::zero()); @@ -96,6 +102,9 @@ TEST(Timer, InitiallyStopped) { } TEST(TimeBlockAndLog, TimeBlockAndLog) { +#ifdef __APPLE__ + GTEST_SKIP_("sleep_for is unreliable for macos builds"); +#endif std::string s; { auto callback = [&s](std::chrono::milliseconds msecs, @@ -107,5 +116,3 @@ TEST(TimeBlockAndLog, TimeBlockAndLog) { } ASSERT_THAT(s, ::testing::MatchesRegex("message: 2[5-9]")); } - -#endif diff --git a/test/TurtleParserTest.cpp b/test/TurtleParserTest.cpp index 4797b7d6b9..691145cfc5 100644 --- a/test/TurtleParserTest.cpp +++ b/test/TurtleParserTest.cpp @@ -647,20 +647,22 @@ std::vector parseFromFile(const std::string& filename, // `useBatchInterface` argument) and possible additional args, and run this // function for all the different parsers that can read from a file (stream and // parallel parser, with all the combinations of the different tokenizers). +auto forAllParallelParsers(const auto& function, const auto&... args) { + function.template operator()>(true, args...); + function.template operator()>(false, args...); + function.template operator()>(true, + args...); + function.template operator()>(false, + args...); +} auto forAllParsers(const auto& function, const auto&... args) { function.template operator()>(true, args...); function.template operator()>(false, args...); - function.template operator()>(true, args...); - function.template operator()>(false, args...); - function.template operator()>(true, args...); function.template operator()>(false, args...); - function.template operator()>(true, - args...); - function.template operator()>(false, - args...); + forAllParallelParsers(function, args...); } TEST(TurtleParserTest, TurtleStreamAndParallelParser) { @@ -760,3 +762,81 @@ TEST(TurtleParserTest, multilineComments) { sortTriples(expected); forAllParsers(testWithParser, input, expected); } + +// Test that exceptions during the turtle parsing are properly propagated to the +// calling code. This is especially important for the parallel parsers where the +// actual parsing happens on background threads. +TEST(TurtleParserTest, exceptionPropagation) { + std::string filename{"turtleParserExceptionPropagation.dat"}; + FILE_BUFFER_SIZE() = 1000; + auto testWithParser = [&](bool useBatchInterface, + std::string_view input) { + { + auto of = ad_utility::makeOfstream(filename); + of << input; + } + AD_EXPECT_THROW_WITH_MESSAGE( + (parseFromFile(filename, useBatchInterface)), + ::testing::ContainsRegex("Parse error")); + ad_utility::deleteFile(filename); + }; + forAllParsers(testWithParser, " ."); +} + +// Test that exceptions in the batched reading of the input file are properly +// propagated. +TEST(TurtleParserTest, exceptionPropagationFileBufferReading) { + std::string filename{"turtleParserEmptyInput.dat"}; + auto testWithParser = [&](bool useBatchInterface, + std::string_view input) { + { + auto of = ad_utility::makeOfstream(filename); + of << input; + } + AD_EXPECT_THROW_WITH_MESSAGE( + (parseFromFile(filename, useBatchInterface)), + ::testing::ContainsRegex("Please increase the FILE_BUFFER_SIZE")); + ad_utility::deleteFile(filename); + }; + // Deliberately chosen s.t. the first triple fits in a block, but the second + // one doesn't. + FILE_BUFFER_SIZE() = 40; + forAllParallelParsers(testWithParser, + " . \n " + " ."); +} + +// Test that the parallel parser's destructor can be run quickly and without +// blocking, even when there are still lots of blocks in the pipeline that are +// currently being parsed. +TEST(TurtleParserTest, stopParsingOnOutsideFailure) { +#ifdef __APPLE__ + GTEST_SKIP_("sleep_for is unreliable for macos builds"); +#endif + std::string filename{"turtleParserStopParsingOnOutsideFailure.dat"}; + auto testWithParser = [&]( + [[maybe_unused]] bool useBatchInterface, + std::string_view input) { + { + auto of = ad_utility::makeOfstream(filename); + of << input; + } + ad_utility::Timer t{ad_utility::Timer::Stopped}; + { + [[maybe_unused]] Parser parserChild{filename, 10ms}; + t.cont(); + } + EXPECT_LE(t.msecs(), 20ms); + }; + const std::string input = []() { + std::string singleBlock = " . \n "; + std::string longBlock; + longBlock.reserve(200 * singleBlock.size()); + for ([[maybe_unused]] size_t i : ad_utility::integerRange(200ul)) { + longBlock.append(singleBlock); + } + return longBlock; + }(); + FILE_BUFFER_SIZE() = 40; + forAllParallelParsers(testWithParser, input); +} From 96572ac7b5f43239e9a73b609acf10836c5a984f Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Thu, 14 Dec 2023 10:08:20 +0100 Subject: [PATCH 57/63] Update CTRE and embed via FetchContent (#1199) In the future we should manage more (or even all) of our git dependencies via FetchContent, as it is much simpler than working with git submodules. --- CMakeLists.txt | 17 +- src/parser/RdfEscaping.cpp | 2 +- src/parser/TokenizerCtre.h | 2 +- src/parser/data/BlankNode.cpp | 3 +- src/parser/data/Iri.cpp | 2 +- .../data/VariableToColumnMapPrinters.cpp | 3 +- src/util/Conversions.cpp | 3 +- src/util/CtreHelpers.h | 2 +- src/util/Date.cpp | 48 +- src/util/Date.h | 3 - src/util/GeoSparqlHelpers.cpp | 9 +- src/util/ParseableDuration.h | 22 +- src/util/http/HttpUtils.cpp | 4 +- src/util/http/websocket/WebSocketSession.cpp | 3 +- test/ServiceTest.cpp | 13 +- third_party/ctre/include/ctre/ctre.h | 14569 ---------------- 16 files changed, 62 insertions(+), 14643 deletions(-) delete mode 100644 third_party/ctre/include/ctre/ctre.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ed405e4f88..ae2dcf4014 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,6 +47,7 @@ endif () ############################################################################### ##### Essential settings ##### ############################################################################### +include(FetchContent) ################################ # GTEST AND GMOCK @@ -185,8 +186,11 @@ include_directories(third_party/json/) ################################ # CTRE, Compile-Time-Regular-Expressions ################################ -# Header only, nothing to include -include_directories(third_party/ctre/include) +FetchContent_Declare( + ctre + GIT_REPOSITORY https://github.com/hanickadot/compile-time-regular-expressions.git + GIT_TAG b3d7788b559e34d985c8530c3e0e7260b67505a6 # v3.8.1 +) ################################ # ABSEIL @@ -253,6 +257,13 @@ add_subdirectory(third_party/re2 EXCLUDE_FROM_ALL) target_compile_options(re2 PRIVATE -Wno-unused-but-set-variable) include_directories(SYSTEM third_party/re2) + +################################ +# Apply FetchContent +################################ +FetchContent_MakeAvailable(ctre) +include_directories(${ctre_SOURCE_DIR}/single-header) + # reinstate original flags including all warnings set(CMAKE_CXX_FLAGS "${LOCAL_CXX_BACKUP_FLAGS}") @@ -307,7 +318,7 @@ add_definitions(-DLOGLEVEL=${LOG_LEVEL_${LOGLEVEL}}) ################################################## # Precompiled headers set(PRECOMPILED_HEADER_FILES_ENGINE src/util/HashMap.h src/engine/Operation.h src/engine/QueryExecutionTree.h) -set(PRECOMPILED_HEADER_FILES_PARSER src/engine/sparqlExpressions/AggregateExpression.h third_party/ctre/include/ctre/ctre.h third_party/antlr4/runtime/Cpp/runtime/src/antlr4-runtime.h) +set(PRECOMPILED_HEADER_FILES_PARSER src/engine/sparqlExpressions/AggregateExpression.h ${ctre_SOURCE_DIR}/single-header/ctre-unicode.hpp third_party/antlr4/runtime/Cpp/runtime/src/antlr4-runtime.h) add_subdirectory(src/parser) target_precompile_headers(parser PRIVATE ${PRECOMPILED_HEADER_FILES_PARSER}) diff --git a/src/parser/RdfEscaping.cpp b/src/parser/RdfEscaping.cpp index ffc7d287c2..8f89012636 100644 --- a/src/parser/RdfEscaping.cpp +++ b/src/parser/RdfEscaping.cpp @@ -6,9 +6,9 @@ #include #include -#include #include +#include #include #include diff --git a/src/parser/TokenizerCtre.h b/src/parser/TokenizerCtre.h index 85b5aeaccd..4ee6f12f2e 100644 --- a/src/parser/TokenizerCtre.h +++ b/src/parser/TokenizerCtre.h @@ -4,10 +4,10 @@ #pragma once -#include #include #include +#include #include #include "parser/Tokenizer.h" diff --git a/src/parser/data/BlankNode.cpp b/src/parser/data/BlankNode.cpp index bc7e8c2502..1b1c7f17a0 100644 --- a/src/parser/data/BlankNode.cpp +++ b/src/parser/data/BlankNode.cpp @@ -4,10 +4,9 @@ #include "parser/data/BlankNode.h" +#include #include -#include "ctre/ctre.h" - // _____________________________________________________________________________ BlankNode::BlankNode(bool generated, std::string label) : _generated{generated}, _label{std::move(label)} { diff --git a/src/parser/data/Iri.cpp b/src/parser/data/Iri.cpp index e0db562217..340f6fad4d 100644 --- a/src/parser/data/Iri.cpp +++ b/src/parser/data/Iri.cpp @@ -4,7 +4,7 @@ #include "parser/data/Iri.h" -#include "ctre/ctre.h" +#include // ____________________________________________________________________________ Iri::Iri(std::string str) : _string{std::move(str)} { AD_CONTRACT_CHECK(ctre::match<"(?:@[a-zA-Z]+(?:-(?:[a-zA-Z]|\\d)+)*@)?" diff --git a/src/parser/data/VariableToColumnMapPrinters.cpp b/src/parser/data/VariableToColumnMapPrinters.cpp index ef656b7182..fbf7808533 100644 --- a/src/parser/data/VariableToColumnMapPrinters.cpp +++ b/src/parser/data/VariableToColumnMapPrinters.cpp @@ -2,7 +2,8 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Kalmbach -#include "ctre/ctre.h" +#include + #include "engine/ExportQueryExecutionTrees.h" #include "global/Constants.h" #include "index/Index.h" diff --git a/src/util/Conversions.cpp b/src/util/Conversions.cpp index fe8d7882db..0ea52a8d0c 100644 --- a/src/util/Conversions.cpp +++ b/src/util/Conversions.cpp @@ -5,7 +5,6 @@ #include "util/Conversions.h" #include -#include #include #include #include @@ -13,6 +12,8 @@ #include #include +#include + #include "../global/Constants.h" #include "../parser/TokenizerCtre.h" #include "./Exception.h" diff --git a/src/util/CtreHelpers.h b/src/util/CtreHelpers.h index a223f07e5e..9536a4bbbc 100644 --- a/src/util/CtreHelpers.h +++ b/src/util/CtreHelpers.h @@ -4,7 +4,7 @@ #pragma once -#include "ctre/ctre.h" +#include /// Helper function for ctre: concatenation of fixed_strings template diff --git a/src/util/Date.cpp b/src/util/Date.cpp index be2ab27723..afc5e078b1 100644 --- a/src/util/Date.cpp +++ b/src/util/Date.cpp @@ -4,6 +4,11 @@ #include "util/Date.h" +#include +#include + +#include + #include "util/Log.h" // ____________________________________________________________________________________________________ @@ -86,25 +91,6 @@ std::pair DateOrLargeYear::toStringAndType() const { AD_FAIL(); } -// Convert a CTRE `match` to an integer. The behavior is undefined if -// the `match` cannot be completely converted to an integer. -// We need this for `int64_t` as well as plain `int` because both these types -// are used in the date representations. -template -static int64_t toInt64(const auto& match) { - int64_t result = 0; - const auto& s = match.template get(); - std::from_chars(s.data(), s.data() + s.size(), result); - return result; -} -template -static int toInt(const auto& match) { - int64_t result = toInt64(match); - AD_CORRECTNESS_CHECK(result >= std::numeric_limits::min()); - AD_CORRECTNESS_CHECK(result <= std::numeric_limits::max()); - return static_cast(result); -} - // Regex objects with explicitly named groups to parse dates and times. constexpr static ctll::fixed_string dateRegex{ R"((?-?\d{4,})-(?\d{2})-(?\d{2}))"}; @@ -120,7 +106,7 @@ static Date::TimeZone parseTimeZone(const auto& match) { } else if (!match.template get<"tzHours">()) { return Date::NoTimeZone{}; } - int tz = toInt<"tzHours">(match); + int tz = match.template get<"tzHours">().to_number(); if (match.template get<"tzSign">() == "-") { tz *= -1; } @@ -197,11 +183,11 @@ DateOrLargeYear DateOrLargeYear::parseXsdDatetime(std::string_view dateString) { throw DateParseException{absl::StrCat( "The value ", dateString, " cannot be parsed as an `xsd:dateTime`.")}; } - int64_t year = toInt64<"year">(match); - int month = toInt<"month">(match); - int day = toInt<"day">(match); - int hour = toInt<"hour">(match); - int minute = toInt<"minute">(match); + int64_t year = match.template get<"year">().to_number(); + int month = match.template get<"month">().to_number(); + int day = match.template get<"day">().to_number(); + int hour = match.template get<"hour">().to_number(); + int minute = match.template get<"minute">().to_number(); double second = std::strtod(match.get<"second">().data(), nullptr); return makeDateOrLargeYear(dateString, year, month, day, hour, minute, second, parseTimeZone(match)); @@ -216,9 +202,9 @@ DateOrLargeYear DateOrLargeYear::parseXsdDate(std::string_view dateString) { throw DateParseException{absl::StrCat( "The value ", dateString, " cannot be parsed as an `xsd:date`.")}; } - int64_t year = toInt64<"year">(match); - int month = toInt<"month">(match); - int day = toInt<"day">(match); + int64_t year = match.template get<"year">().to_number(); + int month = match.template get<"month">().to_number(); + int day = match.template get<"day">().to_number(); return makeDateOrLargeYear(dateString, year, month, day, -1, 0, 0.0, parseTimeZone(match)); } @@ -233,7 +219,7 @@ DateOrLargeYear DateOrLargeYear::parseGYear(std::string_view dateString) { throw DateParseException{absl::StrCat( "The value ", dateString, " cannot be parsed as an `xsd:gYear`.")}; } - int64_t year = toInt64<"year">(match); + int64_t year = match.template get<"year">().to_number(); return makeDateOrLargeYear(dateString, year, 0, 0, -1, 0, 0.0, parseTimeZone(match)); } @@ -249,8 +235,8 @@ DateOrLargeYear DateOrLargeYear::parseGYearMonth(std::string_view dateString) { throw DateParseException{absl::StrCat( "The value ", dateString, " cannot be parsed as an `xsd:gYearMonth`.")}; } - int64_t year = toInt64<"year">(match); - int month = toInt<"month">(match); + int64_t year = match.template get<"year">().to_number(); + int month = match.template get<"month">().to_number(); return makeDateOrLargeYear(dateString, year, month, 0, -1, 0, 0.0, parseTimeZone(match)); } diff --git a/src/util/Date.h b/src/util/Date.h index adb2bab949..89e2d0db4f 100644 --- a/src/util/Date.h +++ b/src/util/Date.h @@ -11,9 +11,6 @@ #include #include -#include "absl/strings/str_cat.h" -#include "absl/strings/str_format.h" -#include "ctre/ctre.h" #include "global/Constants.h" #include "util/CtreHelpers.h" #include "util/NBitInteger.h" diff --git a/src/util/GeoSparqlHelpers.cpp b/src/util/GeoSparqlHelpers.cpp index 4e90911c3e..51e2af711d 100644 --- a/src/util/GeoSparqlHelpers.cpp +++ b/src/util/GeoSparqlHelpers.cpp @@ -4,16 +4,15 @@ #include "./GeoSparqlHelpers.h" +#include + #include +#include #include #include -#include -#include #include -#include "./Exception.h" -#include "absl/strings/charconv.h" -#include "ctre/ctre.h" +#include "util/Exception.h" namespace ad_utility { diff --git a/src/util/ParseableDuration.h b/src/util/ParseableDuration.h index db1adc9bf4..83b3f51cd2 100644 --- a/src/util/ParseableDuration.h +++ b/src/util/ParseableDuration.h @@ -6,9 +6,9 @@ #define QLEVER_PARSEABLEDURATION_H #include -#include #include +#include #include #include "util/Exception.h" @@ -59,28 +59,28 @@ class ParseableDuration { static ParseableDuration fromString(std::string_view arg) { using namespace std::chrono; if (auto m = ctre::match(arg)) { - auto amount = m.template get<1>().to_view(); auto unit = m.template get<2>().to_view(); - auto toDuration = [](std::string_view amount) { - return duration_cast(OriginalDuration{ - boost::lexical_cast(amount)}); + auto toDuration = [&m]() { + auto amount = m.template get<1>() + .template to_number(); + return duration_cast(OriginalDuration{amount}); }; if (unit == "ns") { - return toDuration.template operator()(amount); + return toDuration.template operator()(); } else if (unit == "us") { - return toDuration.template operator()(amount); + return toDuration.template operator()(); } else if (unit == "ms") { - return toDuration.template operator()(amount); + return toDuration.template operator()(); } else if (unit == "s") { - return toDuration.template operator()(amount); + return toDuration.template operator()(); } else if (unit == "min") { - return toDuration.template operator()(amount); + return toDuration.template operator()(); } else { // Verify unit was checked exhaustively AD_CORRECTNESS_CHECK(unit == "h"); - return toDuration.template operator()(amount); + return toDuration.template operator()(); } } throw std::runtime_error{absl::StrCat( diff --git a/src/util/http/HttpUtils.cpp b/src/util/http/HttpUtils.cpp index ed194cd277..93ac1d9294 100644 --- a/src/util/http/HttpUtils.cpp +++ b/src/util/http/HttpUtils.cpp @@ -5,7 +5,7 @@ #include "./HttpUtils.h" -#include "ctre/ctre.h" +#include // TODO: Which other implementations that are currently still in `HttpUtils.h` // should we move here, to `HttpUtils.cpp`? @@ -27,7 +27,7 @@ Url::Url(std::string_view url) { "URL \"", url, "\" malformed, must match regex ", urlRegexString)); } protocol_ = - match.get<1>().to_string() == "http" ? Protocol::HTTP : Protocol::HTTPS; + match.get<1>().to_view() == "http" ? Protocol::HTTP : Protocol::HTTPS; host_ = match.get<2>().to_string(); port_ = match.get<4>().to_string(); if (port_.empty()) { diff --git a/src/util/http/websocket/WebSocketSession.cpp b/src/util/http/websocket/WebSocketSession.cpp index 3949669106..066de8bb78 100644 --- a/src/util/http/websocket/WebSocketSession.cpp +++ b/src/util/http/websocket/WebSocketSession.cpp @@ -4,9 +4,8 @@ #include "WebSocketSession.h" -#include - #include +#include #include #include "util/Algorithm.h" diff --git a/test/ServiceTest.cpp b/test/ServiceTest.cpp index 2279394827..2e473216a6 100644 --- a/test/ServiceTest.cpp +++ b/test/ServiceTest.cpp @@ -3,19 +3,14 @@ // Author: Hannah Bast #include -#include +#include #include -#include "./HttpTestHelpers.h" -#include "./IndexTestHelpers.h" -#include "./util/IdTableHelpers.h" -#include "absl/strings/str_cat.h" -#include "absl/strings/str_join.h" -#include "absl/strings/str_split.h" -#include "ctre/ctre.h" +#include "IndexTestHelpers.h" #include "engine/Service.h" #include "parser/GraphPatternOperation.h" +#include "util/IdTableHelpers.h" #include "util/http/HttpUtils.h" // Fixture that sets up a test index and a factory for producing mocks for the @@ -57,7 +52,7 @@ class ServiceTest : public ::testing::Test { // NOTE: The first three are hard-coded in `Service::computeResult`, but // the host and port of the endpoint are derived from the IRI, so the last // two checks are non-trivial. - EXPECT_EQ(method, http::verb::post); + EXPECT_EQ(method, boost::beast::http::verb::post); EXPECT_EQ(contentTypeHeader, "application/sparql-query"); EXPECT_EQ(acceptHeader, "text/tab-separated-values"); EXPECT_EQ(url.asString(), expectedUrl); diff --git a/third_party/ctre/include/ctre/ctre.h b/third_party/ctre/include/ctre/ctre.h deleted file mode 100644 index 18acf2e4c8..0000000000 --- a/third_party/ctre/include/ctre/ctre.h +++ /dev/null @@ -1,14569 +0,0 @@ -/* - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ---- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. -*/ -#ifndef CTRE_V2__CTRE_UNICODE__HPP -#define CTRE_V2__CTRE_UNICODE__HPP - -#ifndef CTRE_V2__CTRE__HPP -#define CTRE_V2__CTRE__HPP - -#ifndef CTRE_V2__CTRE__LITERALS__HPP -#define CTRE_V2__CTRE__LITERALS__HPP - -#ifndef CTRE_V2__CTLL__HPP -#define CTRE_V2__CTLL__HPP - -#ifndef CTLL__PARSER__HPP -#define CTLL__PARSER__HPP - -#ifndef CTLL__FIXED_STRING__GPP -#define CTLL__FIXED_STRING__GPP - -#include -#include -#include -#include - -namespace ctll { - -struct length_value_t { -uint32_t value; -uint8_t length; -}; - -constexpr length_value_t length_and_value_of_utf8_code_point(uint8_t first_unit) noexcept { -if ((first_unit & 0b1000'0000) == 0b0000'0000) return {static_cast(first_unit), 1}; -else if ((first_unit & 0b1110'0000) == 0b1100'0000) return {static_cast(first_unit & 0b0001'1111), 2}; -else if ((first_unit & 0b1111'0000) == 0b1110'0000) return {static_cast(first_unit & 0b0000'1111), 3}; -else if ((first_unit & 0b1111'1000) == 0b1111'0000) return {static_cast(first_unit & 0b0000'0111), 4}; -else if ((first_unit & 0b1111'1100) == 0b1111'1000) return {static_cast(first_unit & 0b0000'0011), 5}; -else if ((first_unit & 0b1111'1100) == 0b1111'1100) return {static_cast(first_unit & 0b0000'0001), 6}; -else return {0, 0}; -} - -constexpr char32_t value_of_trailing_utf8_code_point(uint8_t unit, bool & correct) noexcept { -if ((unit & 0b1100'0000) == 0b1000'0000) return unit & 0b0011'1111; -else { -correct = false; -return 0; -} -} - -constexpr length_value_t length_and_value_of_utf16_code_point(uint16_t first_unit) noexcept { -if ((first_unit & 0b1111110000000000) == 0b1101'1000'0000'0000) return {static_cast(first_unit & 0b0000001111111111), 2}; -else return {first_unit, 1}; -} - -template struct fixed_string { -char32_t content[N] = {}; -size_t real_size{0}; -bool correct_flag{true}; -template constexpr fixed_string(const T (&input)[N+1]) noexcept { -if constexpr (std::is_same_v) { -#if CTRE_STRING_IS_UTF8 -size_t out{0}; -for (size_t i{0}; i < N; ++i) { -if ((i == (N-1)) && (input[i] == 0)) break; -length_value_t info = length_and_value_of_utf8_code_point(input[i]); -switch (info.length) { -case 6: -if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); -[[fallthrough]]; -case 5: -if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); -[[fallthrough]]; -case 4: -if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); -[[fallthrough]]; -case 3: -if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); -[[fallthrough]]; -case 2: -if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); -[[fallthrough]]; -case 1: -content[out++] = static_cast(info.value); -real_size++; -break; -default: -correct_flag = false; -return; -} -} -#else -for (size_t i{0}; i < N; ++i) { -content[i] = static_cast(input[i]); -if ((i == (N-1)) && (input[i] == 0)) break; -real_size++; -} -#endif -#if __cpp_char8_t -} else if constexpr (std::is_same_v) { -size_t out{0}; -for (size_t i{0}; i < N; ++i) { -if ((i == (N-1)) && (input[i] == 0)) break; -length_value_t info = length_and_value_of_utf8_code_point(input[i]); -switch (info.length) { -case 6: -if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); -[[fallthrough]]; -case 5: -if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); -[[fallthrough]]; -case 4: -if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); -[[fallthrough]]; -case 3: -if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); -[[fallthrough]]; -case 2: -if (++i < N) info.value = (info.value << 6) | value_of_trailing_utf8_code_point(input[i], correct_flag); -[[fallthrough]]; -case 1: -content[out++] = static_cast(info.value); -real_size++; -break; -default: -correct_flag = false; -return; -} -} -#endif -} else if constexpr (std::is_same_v) { -size_t out{0}; -for (size_t i{0}; i < N; ++i) { -length_value_t info = length_and_value_of_utf16_code_point(input[i]); -if (info.length == 2) { -if (++i < N) { -if ((input[i] & 0b1111'1100'0000'0000) == 0b1101'1100'0000'0000) { -content[out++] = (info.value << 10) | (input[i] & 0b0000'0011'1111'1111); -} else { -correct_flag = false; -break; -} -} -} else { -if ((i == (N-1)) && (input[i] == 0)) break; -content[out++] = info.value; -} -} -real_size = out; -} else if constexpr (std::is_same_v || std::is_same_v) { -for (size_t i{0}; i < N; ++i) { -content[i] = input[i]; -if ((i == (N-1)) && (input[i] == 0)) break; -real_size++; -} -} -} -constexpr fixed_string(const fixed_string & other) noexcept { -for (size_t i{0}; i < N; ++i) { -content[i] = other.content[i]; -} -real_size = other.real_size; -correct_flag = other.correct_flag; -} -constexpr bool correct() const noexcept { -return correct_flag; -} -constexpr size_t size() const noexcept { -return real_size; -} -constexpr const char32_t * begin() const noexcept { -return content; -} -constexpr const char32_t * end() const noexcept { -return content + size(); -} -constexpr char32_t operator[](size_t i) const noexcept { -return content[i]; -} -template constexpr bool is_same_as(const fixed_string & rhs) const noexcept { -if (real_size != rhs.size()) return false; -for (size_t i{0}; i != real_size; ++i) { -if (content[i] != rhs[i]) return false; -} -return true; -} -constexpr operator std::basic_string_view() const noexcept { -return std::basic_string_view{content, size()}; -} -}; - -template <> class fixed_string<0> { -static constexpr char32_t empty[1] = {0}; -public: -template constexpr fixed_string(const T *) noexcept { - -} -constexpr fixed_string(std::initializer_list) noexcept { - -} -constexpr fixed_string(const fixed_string &) noexcept { - -} -constexpr bool correct() const noexcept { -return true; -} -constexpr size_t size() const noexcept { -return 0; -} -constexpr const char32_t * begin() const noexcept { -return empty; -} -constexpr const char32_t * end() const noexcept { -return empty + size(); -} -constexpr char32_t operator[](size_t) const noexcept { -return 0; -} -constexpr operator std::basic_string_view() const noexcept { -return std::basic_string_view{empty, 0}; -} -}; - -template fixed_string(const CharT (&)[N]) -> fixed_string; -template fixed_string(fixed_string) -> fixed_string; - -} - -#if CTLL_CNTTP_COMPILER_CHECK -#define CTLL_FIXED_STRING ctll::fixed_string -#else -#define CTLL_FIXED_STRING const auto & -#endif - -#endif - -#ifndef CTLL__TYPE_STACK__HPP -#define CTLL__TYPE_STACK__HPP - -#ifndef CTLL__UTILITIES__HPP -#define CTLL__UTILITIES__HPP - -#include - -#define CTLL_CNTTP_COMPILER_CHECK (__cpp_nontype_template_parameter_class || (__cpp_nontype_template_args >= 201911L) || (__cpp_nontype_template_args >= 201411L && __clang_major__ >= 12)) - -#ifdef _MSC_VER -#define CTLL_FORCE_INLINE __forceinline -#else -#define CTLL_FORCE_INLINE __attribute__((always_inline)) -#endif - -namespace ctll { - -template struct conditional_helper; - -template <> struct conditional_helper { -template using type = A; -}; - -template <> struct conditional_helper { -template using type = B; -}; - -template using conditional = typename conditional_helper::template type; - -} - -#endif - -namespace ctll { - -template struct list { }; - -struct _nothing { }; - -using empty_list = list<>; - -// calculate size of list content -template constexpr auto size(list) noexcept { return sizeof...(Ts); } - - -// check if the list is empty -template constexpr bool empty(list) noexcept { return false; } -constexpr bool empty(empty_list) { return true; } - -// concat two lists together left to right -template constexpr auto concat(list, list) noexcept -> list { return {}; } - -// push something to the front of a list -template constexpr auto push_front(T, list) noexcept -> list { return {}; } - -// pop element from the front of a list -template constexpr auto pop_front(list) noexcept -> list { return {}; } -constexpr auto pop_front(empty_list) -> empty_list; - -// pop element from the front of a list and return new typelist too -template struct list_pop_pair { -Front front{}; -List list{}; -constexpr list_pop_pair() = default; -}; - -template constexpr auto pop_and_get_front(list, T = T()) noexcept -> list_pop_pair> { return {}; } -template constexpr auto pop_and_get_front(empty_list, T = T()) noexcept -> list_pop_pair { return {}; } - -// return front of the list -template constexpr auto front(list, T = T()) noexcept -> Head { return {}; } -template constexpr auto front(empty_list, T = T()) noexcept -> T { return {}; } - -// set operations -template struct item_matcher { -struct not_selected { -template friend constexpr auto operator+(list, not_selected) -> list; -}; -template struct wrapper { -template friend constexpr auto operator+(list, wrapper) -> list; -}; - -static constexpr auto check(T) { return std::true_type{}; } -static constexpr auto check(...) { return std::false_type{}; } -static constexpr auto select(T) { return not_selected{}; } -template static constexpr auto select(Y) { return wrapper{}; } -}; - -template constexpr bool exists_in(T, list) noexcept { -return (item_matcher::check(Ts{}) || ... || false); -} - -template constexpr auto add_item(T item, list l) noexcept { -if constexpr (exists_in(item, l)) { -return l; -} else { -return list{}; -} -} - -template constexpr auto remove_item(T, list) noexcept { -item_matcher matcher; -return decltype((list<>{} + ... + matcher.select(Ts{}))){}; -} - -} - -#endif - -#ifndef CTLL__GRAMMARS__HPP -#define CTLL__GRAMMARS__HPP - -namespace ctll { - -// terminal type representing symbol / character of any type -template struct term { -static constexpr auto value = v; -}; - -// epsilon = nothing on input tape -// also used as an command for parsing means "do nothing" -struct epsilon { -static constexpr auto value = '-'; -}; - -// empty_stack_symbol = nothing on stack -struct empty_stack_symbol {}; - -// push is alias to list -template using push = list; - -// accept/reject type for controlling output of LL1 machine -struct accept { constexpr explicit operator bool() noexcept { return true; } }; -struct reject { constexpr explicit operator bool() noexcept { return false; } }; - -// action type, every action item in grammar must inherit from -struct action { -struct action_tag { }; -}; - -// move one character forward and pop it from stack command -struct pop_input { -struct pop_input_tag { }; -}; - -// additional overloads for type list -template constexpr auto push_front(pop_input, list) -> list { return {}; } - -template constexpr auto push_front(epsilon, list) -> list { return {}; } - -template constexpr auto push_front(list, list) -> list { return {}; } - -template constexpr auto pop_front_and_push_front(T item, list l) { -return push_front(item, pop_front(l)); -} - -// SPECIAL matching types for nicer grammars - -// match any term -struct anything { -constexpr inline anything() noexcept { } -template constexpr anything(term) noexcept; -}; - -// match range of term A-B -template struct range { -constexpr inline range() noexcept { } -//template constexpr range(term) noexcept requires (A <= V) && (V <= B); -template > constexpr inline range(term) noexcept; -}; - -#ifdef __EDG__ -template struct contains { -static constexpr bool value = ((Set == V) || ... || false); -}; -#endif - -// match terms defined in set -template struct set { -constexpr inline set() noexcept { } -#ifdef __EDG__ -template ::value>> constexpr inline set(term) noexcept; -#else -template > constexpr inline set(term) noexcept; -#endif -}; - -// match terms not defined in set -template struct neg_set { -constexpr inline neg_set() noexcept { } - -#ifdef __EDG__ -template ::value>> constexpr inline neg_set(term) noexcept; -#else -template > constexpr inline neg_set(term) noexcept; -#endif -}; - -// AUGMENTED grammar which completes user-defined grammar for all other cases -template struct augment_grammar: public Grammar { -// start nonterminal is defined in parent type -using typename Grammar::_start; - -// grammar rules are inherited from Grammar parent type -using Grammar::rule; - -// term on stack and on input means pop_input; -template static constexpr auto rule(term, term) -> ctll::pop_input; - -// if the type on stack (range, set, neg_set, anything) is constructible from the terminal => pop_input -template static constexpr auto rule(Expected, term) -> std::enable_if_t>, ctll::pop_input>; - -// empty stack and empty input means we are accepting -static constexpr auto rule(empty_stack_symbol, epsilon) -> ctll::accept; - -// not matching anything else => reject -static constexpr auto rule(...) -> ctll::reject; - -// start stack is just a list; -using start_stack = list; -}; - -} - -#endif - -#ifndef CTLL__ACTIONS__HPP -#define CTLL__ACTIONS__HPP - -namespace ctll { -struct empty_subject { }; - -struct empty_actions { -// dummy operator so using Actions::operator() later will not give error -template static constexpr auto apply(Action, InputSymbol, Subject subject) { -return subject; -} -}; - -template struct identity: public Actions { -using Actions::apply; -// allow empty_subject to exists -template constexpr static auto apply(Action, term, empty_subject) -> empty_subject { return {}; } -template constexpr static auto apply(Action, epsilon, empty_subject) -> empty_subject { return {}; } -}; - -template struct ignore_unknown: public Actions { -using Actions::apply; -// allow flow thru unknown actions -template constexpr static auto apply(Action, term, Subject) -> Subject { return {}; } -template constexpr static auto apply(Action, epsilon, Subject) -> Subject { return {}; } -}; -} - -#endif - -#include - -namespace ctll { - -enum class decision { -reject, -accept, -undecided -}; - -struct placeholder { }; - -template using index_placeholder = placeholder; - -#if CTLL_CNTTP_COMPILER_CHECK -template struct parser { // in c++20 -#else -template struct parser { -#endif - -#ifdef __GNUC__ // workaround to GCC bug -#if CTLL_CNTTP_COMPILER_CHECK -static constexpr auto _input = input; // c++20 mode -#else -static constexpr auto & _input = input; // c++17 mode -#endif -#else -static constexpr auto _input = input; // everyone else -#endif - -using Actions = ctll::conditional, identity>; -using grammar = augment_grammar; - -template struct results { -constexpr inline CTLL_FORCE_INLINE operator bool() const noexcept { -return Decision == decision::accept; -} - -#ifdef __GNUC__ // workaround to GCC bug -#if CTLL_CNTTP_COMPILER_CHECK -static constexpr auto _input = input; // c++20 mode -#else -static constexpr auto & _input = input; // c++17 mode -#endif -#else -static constexpr auto _input = input; // everyone else -#endif - -using output_type = Subject; - -constexpr auto operator+(placeholder) const noexcept { -if constexpr (Decision == decision::undecided) { -// parse for current char (RPos) with previous stack and subject :) -return parser::template decide({}, {}); -} else { -// if there is decision already => just push it to the end of fold expression -return *this; -} -} -}; - -template static constexpr auto get_current_term() noexcept { -if constexpr (Pos < input.size()) { -constexpr auto value = input[Pos]; -if constexpr (value <= static_cast(std::numeric_limits::max())) { -return term(value)>{}; -} else { -return term{}; -} - -} else { -// return epsilon if we are past the input -return epsilon{}; -} -} -template static constexpr auto get_previous_term() noexcept { -if constexpr (Pos == 0) { -// there is no previous character on input if we are on start -return epsilon{}; -} else if constexpr ((Pos-1) < input.size()) { -constexpr auto value = input[Pos-1]; -if constexpr (value <= static_cast(std::numeric_limits::max())) { -return term(value)>{}; -} else { -return term{}; -} -} else { -return epsilon{}; -} -} -// if rule is accept => return true and subject -template -static constexpr auto move(ctll::accept, Terminal, Stack, Subject) noexcept { -return typename parser::template results(); -} -// if rule is reject => return false and subject -template -static constexpr auto move(ctll::reject, Terminal, Stack, Subject) noexcept { -return typename parser::template results(); -} -// if rule is pop_input => move to next character -template -static constexpr auto move(ctll::pop_input, Terminal, Stack, Subject) noexcept { -return typename parser::template results(); -} -// if rule is string => push it to the front of stack -template -static constexpr auto move(push string, Terminal, Stack stack, Subject subject) noexcept { -return decide(push_front(string, stack), subject); -} -// if rule is epsilon (empty string) => continue -template -static constexpr auto move(epsilon, Terminal, Stack stack, Subject subject) noexcept { -return decide(stack, subject); -} -// if rule is string with current character at the beginning (term) => move to next character -// and push string without the character (quick LL(1)) -template -static constexpr auto move(push, Content...>, term, Stack stack, Subject) noexcept { -constexpr auto _input = input; -return typename parser::template results(), stack)), Subject, decision::undecided>(); -} -// if rule is string with any character at the beginning (compatible with current term) => move to next character -// and push string without the character (quick LL(1)) -template -static constexpr auto move(push, term, Stack stack, Subject) noexcept { -constexpr auto _input = input; -return typename parser::template results(), stack)), Subject, decision::undecided>(); -} -// decide if we need to take action or move -template static constexpr auto decide(Stack previous_stack, Subject previous_subject) noexcept { -// each call means we pop something from stack -auto top_symbol = decltype(ctll::front(previous_stack, empty_stack_symbol()))(); -// gcc pedantic warning -[[maybe_unused]] auto stack = decltype(ctll::pop_front(previous_stack))(); - -// in case top_symbol is action type (apply it on previous subject and get new one) -if constexpr (std::is_base_of_v) { -auto subject = Actions::apply(top_symbol, get_previous_term(), previous_subject); - -// in case that semantic action is error => reject input -if constexpr (std::is_same_v) { -return typename parser::template results(); -} else { -return decide(stack, subject); -} -} else { -// all other cases are ordinary for LL(1) parser -auto current_term = get_current_term(); -auto rule = decltype(grammar::rule(top_symbol,current_term))(); -return move(rule, current_term, stack, previous_subject); -} -} - -// trampolines with folded expression -template static constexpr auto trampoline_decide(Subject, std::index_sequence) noexcept { -// parse everything for first char and than for next and next ... -// Pos+1 is needed as we want to finish calculation with epsilons on stack -auto v = (decide<0, typename grammar::start_stack, Subject>({}, {}) + ... + index_placeholder()); -return v; -} - -template static constexpr auto trampoline_decide(Subject subject = {}) noexcept { -// there will be no recursion, just sequence long as the input -return trampoline_decide(subject, std::make_index_sequence()); -} - -template using output = decltype(trampoline_decide()); -template static inline constexpr bool correct_with = trampoline_decide(); - -}; - -} // end of ctll namespace - -#endif - -#endif - -#ifndef CTRE__PCRE_ACTIONS__HPP -#define CTRE__PCRE_ACTIONS__HPP - -#ifndef CTRE__PCRE__HPP -#define CTRE__PCRE__HPP - -// THIS FILE WAS GENERATED BY DESATOMAT TOOL, DO NOT MODIFY THIS FILE - -namespace ctre { - -struct pcre { - -// NONTERMINALS: -struct a {}; -struct b {}; -struct backslash {}; -struct backslash_range {}; -struct block {}; -struct block_name2 {}; -struct block_name {}; -struct c {}; -struct class_named_name {}; -struct content2 {}; -struct content {}; -struct content_in_capture {}; -struct d {}; -struct e {}; -struct f {}; -struct g {}; -struct h {}; -struct hexdec_repeat {}; -struct i {}; -struct j {}; -struct k {}; -struct l {}; -struct m {}; -struct mod {}; -struct mod_opt {}; -struct n {}; -struct number2 {}; -struct number {}; -struct o {}; -struct property_name2 {}; -struct property_name {}; -struct property_value2 {}; -struct property_value {}; -struct range {}; -struct repeat {}; -struct s {}; using _start = s; -struct set2a {}; -struct set2b {}; -struct string2 {}; - -// 'action' types: -struct class_digit: ctll::action {}; -struct class_horizontal_space: ctll::action {}; -struct class_named_alnum: ctll::action {}; -struct class_named_alpha: ctll::action {}; -struct class_named_ascii: ctll::action {}; -struct class_named_blank: ctll::action {}; -struct class_named_cntrl: ctll::action {}; -struct class_named_digit: ctll::action {}; -struct class_named_graph: ctll::action {}; -struct class_named_lower: ctll::action {}; -struct class_named_print: ctll::action {}; -struct class_named_punct: ctll::action {}; -struct class_named_space: ctll::action {}; -struct class_named_upper: ctll::action {}; -struct class_named_word: ctll::action {}; -struct class_named_xdigit: ctll::action {}; -struct class_non_horizontal_space: ctll::action {}; -struct class_non_vertical_space: ctll::action {}; -struct class_nondigit: ctll::action {}; -struct class_nonnewline: ctll::action {}; -struct class_nonspace: ctll::action {}; -struct class_nonword: ctll::action {}; -struct class_space: ctll::action {}; -struct class_vertical_space: ctll::action {}; -struct class_word: ctll::action {}; -struct create_hexdec: ctll::action {}; -struct create_number: ctll::action {}; -struct finish_hexdec: ctll::action {}; -struct look_finish: ctll::action {}; -struct make_alternate: ctll::action {}; -struct make_atomic: ctll::action {}; -struct make_back_reference: ctll::action {}; -struct make_capture: ctll::action {}; -struct make_capture_with_name: ctll::action {}; -struct make_lazy: ctll::action {}; -struct make_optional: ctll::action {}; -struct make_possessive: ctll::action {}; -struct make_property: ctll::action {}; -struct make_property_negative: ctll::action {}; -struct make_range: ctll::action {}; -struct make_relative_back_reference: ctll::action {}; -struct make_sequence: ctll::action {}; -struct negate_class_named: ctll::action {}; -struct prepare_capture: ctll::action {}; -struct push_assert_begin: ctll::action {}; -struct push_assert_end: ctll::action {}; -struct push_assert_subject_begin: ctll::action {}; -struct push_assert_subject_end: ctll::action {}; -struct push_assert_subject_end_with_lineend: ctll::action {}; -struct push_character: ctll::action {}; -struct push_character_alarm: ctll::action {}; -struct push_character_anything: ctll::action {}; -struct push_character_escape: ctll::action {}; -struct push_character_formfeed: ctll::action {}; -struct push_character_newline: ctll::action {}; -struct push_character_null: ctll::action {}; -struct push_character_return_carriage: ctll::action {}; -struct push_character_tab: ctll::action {}; -struct push_empty: ctll::action {}; -struct push_hexdec: ctll::action {}; -struct push_name: ctll::action {}; -struct push_not_word_boundary: ctll::action {}; -struct push_number: ctll::action {}; -struct push_property_name: ctll::action {}; -struct push_property_value: ctll::action {}; -struct push_word_boundary: ctll::action {}; -struct repeat_ab: ctll::action {}; -struct repeat_at_least: ctll::action {}; -struct repeat_exactly: ctll::action {}; -struct repeat_plus: ctll::action {}; -struct repeat_star: ctll::action {}; -struct reset_capture: ctll::action {}; -struct set_combine: ctll::action {}; -struct set_make: ctll::action {}; -struct set_make_negative: ctll::action {}; -struct set_start: ctll::action {}; -struct start_atomic: ctll::action {}; -struct start_lookahead_negative: ctll::action {}; -struct start_lookahead_positive: ctll::action {}; - -// (q)LL1 function: -using _others = ctll::neg_set<'!','$','\x28','\x29','*','+',',','-','.','/',':','<','=','>','?','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','a','b','c','d','e','f','g','h','0','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','\x7B','|','\x7D','1','2','3','4','5','6','7','8','9'>; -static constexpr auto rule(s, ctll::term<'\\'>) -> ctll::push; -static constexpr auto rule(s, ctll::term<'['>) -> ctll::push; -static constexpr auto rule(s, ctll::term<'\x28'>) -> ctll::push; -static constexpr auto rule(s, ctll::term<'^'>) -> ctll::push; -static constexpr auto rule(s, ctll::term<'$'>) -> ctll::push; -static constexpr auto rule(s, ctll::set<'!',',','-','/',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T',']','_','0','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push; -static constexpr auto rule(s, _others) -> ctll::push; -static constexpr auto rule(s, ctll::term<'.'>) -> ctll::push; -static constexpr auto rule(s, ctll::term<'|'>) -> ctll::push; -static constexpr auto rule(s, ctll::epsilon) -> ctll::push; -static constexpr auto rule(s, ctll::set<'\x29','*','+','?','\x7B','\x7D'>) -> ctll::reject; - -static constexpr auto rule(a, ctll::term<'\\'>) -> ctll::push; -static constexpr auto rule(a, ctll::term<'['>) -> ctll::push; -static constexpr auto rule(a, ctll::term<'\x28'>) -> ctll::push; -static constexpr auto rule(a, ctll::term<'^'>) -> ctll::push; -static constexpr auto rule(a, ctll::term<'$'>) -> ctll::push; -static constexpr auto rule(a, ctll::set<'!',',','-','/',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T',']','_','0','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push; -static constexpr auto rule(a, _others) -> ctll::push; -static constexpr auto rule(a, ctll::term<'.'>) -> ctll::push; -static constexpr auto rule(a, ctll::term<'\x29'>) -> ctll::push; -static constexpr auto rule(a, ctll::epsilon) -> ctll::push; -static constexpr auto rule(a, ctll::set<'*','+','?','\x7B','|','\x7D'>) -> ctll::reject; - -static constexpr auto rule(b, ctll::term<','>) -> ctll::push; -static constexpr auto rule(b, ctll::term<'\x7D'>) -> ctll::push; - -static constexpr auto rule(backslash, ctll::term<'d'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'h'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'H'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'V'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'D'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'N'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'S'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'W'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'s'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'v'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'w'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::set<'1','2','3','4','5','6','7','8','9'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'g'>) -> ctll::push, m>; -static constexpr auto rule(backslash, ctll::term<'p'>) -> ctll::push, property_name, ctll::term<'\x7D'>, make_property>; -static constexpr auto rule(backslash, ctll::term<'P'>) -> ctll::push, property_name, ctll::term<'\x7D'>, make_property_negative>; -static constexpr auto rule(backslash, ctll::term<'u'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'x'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'A'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'z'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'Z'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::set<'$','\x28','\x29','*','+','-','.','/','?','[','\\',']','^','\x7B','|','\x7D'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'a'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'e'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'f'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'n'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'0'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'r'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'t'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'B'>) -> ctll::push; -static constexpr auto rule(backslash, ctll::term<'b'>) -> ctll::push; - -static constexpr auto rule(backslash_range, ctll::term<'u'>) -> ctll::push; -static constexpr auto rule(backslash_range, ctll::term<'x'>) -> ctll::push; -static constexpr auto rule(backslash_range, ctll::set<'$','\x28','\x29','*','+','-','.','/','?','[','\\',']','^','\x7B','|','\x7D'>) -> ctll::push; -static constexpr auto rule(backslash_range, ctll::term<'a'>) -> ctll::push; -static constexpr auto rule(backslash_range, ctll::term<'e'>) -> ctll::push; -static constexpr auto rule(backslash_range, ctll::term<'f'>) -> ctll::push; -static constexpr auto rule(backslash_range, ctll::term<'n'>) -> ctll::push; -static constexpr auto rule(backslash_range, ctll::term<'0'>) -> ctll::push; -static constexpr auto rule(backslash_range, ctll::term<'r'>) -> ctll::push; -static constexpr auto rule(backslash_range, ctll::term<'t'>) -> ctll::push; - -static constexpr auto rule(block, ctll::term<'\\'>) -> ctll::push>; -static constexpr auto rule(block, ctll::term<'['>) -> ctll::push>; -static constexpr auto rule(block, ctll::term<'?'>) -> ctll::push; -static constexpr auto rule(block, ctll::term<'\x28'>) -> ctll::push>; -static constexpr auto rule(block, ctll::term<'^'>) -> ctll::push>; -static constexpr auto rule(block, ctll::term<'$'>) -> ctll::push>; -static constexpr auto rule(block, ctll::set<'!',',','-','/',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T',']','_','0','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push>; -static constexpr auto rule(block, _others) -> ctll::push>; -static constexpr auto rule(block, ctll::term<'.'>) -> ctll::push>; -static constexpr auto rule(block, ctll::term<'|'>) -> ctll::push>; -static constexpr auto rule(block, ctll::term<'\x29'>) -> ctll::push; -static constexpr auto rule(block, ctll::set<'*','+','\x7B','\x7D'>) -> ctll::reject; - -static constexpr auto rule(block_name2, ctll::set<'>','\x7D'>) -> ctll::epsilon; -static constexpr auto rule(block_name2, ctll::set<'0','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push; - -static constexpr auto rule(block_name, ctll::set<'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'>) -> ctll::push; - -static constexpr auto rule(c, ctll::term<'['>) -> ctll::push, i, range, set_start, set2b, set_make, ctll::term<']'>>; -static constexpr auto rule(c, ctll::term<'\\'>) -> ctll::push>; -static constexpr auto rule(c, ctll::set<'!','$','\x28','\x29','*','+',',','.','/',':','<','=','>','?','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','0','t','u','v','w','x','y','z','\x7B','|','\x7D','1','2','3','4','5','6','7','8','9'>) -> ctll::push>; -static constexpr auto rule(c, _others) -> ctll::push>; -static constexpr auto rule(c, ctll::term<'^'>) -> ctll::push>; -static constexpr auto rule(c, ctll::set<'-',']'>) -> ctll::reject; - -static constexpr auto rule(class_named_name, ctll::term<'x'>) -> ctll::push, ctll::term<'i'>, ctll::term<'g'>, ctll::term<'i'>, ctll::term<'t'>, class_named_xdigit>; -static constexpr auto rule(class_named_name, ctll::term<'d'>) -> ctll::push, ctll::term<'g'>, ctll::term<'i'>, ctll::term<'t'>, class_named_digit>; -static constexpr auto rule(class_named_name, ctll::term<'b'>) -> ctll::push, ctll::term<'a'>, ctll::term<'n'>, ctll::term<'k'>, class_named_blank>; -static constexpr auto rule(class_named_name, ctll::term<'c'>) -> ctll::push, ctll::term<'t'>, ctll::term<'r'>, ctll::term<'l'>, class_named_cntrl>; -static constexpr auto rule(class_named_name, ctll::term<'w'>) -> ctll::push, ctll::term<'r'>, ctll::term<'d'>, class_named_word>; -static constexpr auto rule(class_named_name, ctll::term<'l'>) -> ctll::push, ctll::term<'w'>, ctll::term<'e'>, ctll::term<'r'>, class_named_lower>; -static constexpr auto rule(class_named_name, ctll::term<'s'>) -> ctll::push, ctll::term<'a'>, ctll::term<'c'>, ctll::term<'e'>, class_named_space>; -static constexpr auto rule(class_named_name, ctll::term<'u'>) -> ctll::push, ctll::term<'p'>, ctll::term<'e'>, ctll::term<'r'>, class_named_upper>; -static constexpr auto rule(class_named_name, ctll::term<'g'>) -> ctll::push, ctll::term<'a'>, ctll::term<'p'>, ctll::term<'h'>, class_named_graph>; -static constexpr auto rule(class_named_name, ctll::term<'a'>) -> ctll::push; -static constexpr auto rule(class_named_name, ctll::term<'p'>) -> ctll::push; - -static constexpr auto rule(content2, ctll::term<'\x29'>) -> ctll::epsilon; -static constexpr auto rule(content2, ctll::epsilon) -> ctll::epsilon; -static constexpr auto rule(content2, ctll::term<'|'>) -> ctll::push; - -static constexpr auto rule(content, ctll::term<'\\'>) -> ctll::push; -static constexpr auto rule(content, ctll::term<'['>) -> ctll::push; -static constexpr auto rule(content, ctll::term<'\x28'>) -> ctll::push; -static constexpr auto rule(content, ctll::term<'^'>) -> ctll::push; -static constexpr auto rule(content, ctll::term<'$'>) -> ctll::push; -static constexpr auto rule(content, ctll::set<'!',',','-','/',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T',']','_','0','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push; -static constexpr auto rule(content, _others) -> ctll::push; -static constexpr auto rule(content, ctll::term<'.'>) -> ctll::push; -static constexpr auto rule(content, ctll::set<'\x29','*','+','?','\x7B','|','\x7D'>) -> ctll::reject; - -static constexpr auto rule(content_in_capture, ctll::term<'\\'>) -> ctll::push; -static constexpr auto rule(content_in_capture, ctll::term<'['>) -> ctll::push; -static constexpr auto rule(content_in_capture, ctll::term<'\x28'>) -> ctll::push; -static constexpr auto rule(content_in_capture, ctll::term<'^'>) -> ctll::push; -static constexpr auto rule(content_in_capture, ctll::term<'$'>) -> ctll::push; -static constexpr auto rule(content_in_capture, ctll::set<'!',',','-','/',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T',']','_','0','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push; -static constexpr auto rule(content_in_capture, _others) -> ctll::push; -static constexpr auto rule(content_in_capture, ctll::term<'.'>) -> ctll::push; -static constexpr auto rule(content_in_capture, ctll::term<'|'>) -> ctll::push; -static constexpr auto rule(content_in_capture, ctll::term<'\x29'>) -> ctll::push; -static constexpr auto rule(content_in_capture, ctll::set<'*','+','?','\x7B','\x7D'>) -> ctll::reject; - -static constexpr auto rule(d, ctll::term<'<'>) -> ctll::push'>, content_in_capture, make_capture_with_name, ctll::term<'\x29'>>; -static constexpr auto rule(d, ctll::term<':'>) -> ctll::push>; -static constexpr auto rule(d, ctll::term<'>'>) -> ctll::push>; -static constexpr auto rule(d, ctll::term<'!'>) -> ctll::push>; -static constexpr auto rule(d, ctll::term<'='>) -> ctll::push>; - -static constexpr auto rule(e, ctll::term<'d'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'h'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'H'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'V'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'D'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'N'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'S'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'W'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'s'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'v'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'w'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'p'>) -> ctll::push, property_name, ctll::term<'\x7D'>, make_property>; -static constexpr auto rule(e, ctll::term<'P'>) -> ctll::push, property_name, ctll::term<'\x7D'>, make_property_negative>; -static constexpr auto rule(e, ctll::term<'u'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'x'>) -> ctll::push; -static constexpr auto rule(e, ctll::set<'$','\x28','\x29','*','+','-','.','/','?','[','\\',']','^','\x7B','|','\x7D'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'a'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'e'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'f'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'n'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'0'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'r'>) -> ctll::push; -static constexpr auto rule(e, ctll::term<'t'>) -> ctll::push; - -static constexpr auto rule(f, ctll::term<'d'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'h'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'H'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'V'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'D'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'N'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'S'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'W'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'s'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'v'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'w'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'p'>) -> ctll::push, property_name, ctll::term<'\x7D'>, make_property>; -static constexpr auto rule(f, ctll::term<'P'>) -> ctll::push, property_name, ctll::term<'\x7D'>, make_property_negative>; -static constexpr auto rule(f, ctll::term<'u'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'x'>) -> ctll::push; -static constexpr auto rule(f, ctll::set<'$','\x28','\x29','*','+','-','.','/','?','[','\\',']','^','\x7B','|','\x7D'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'a'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'e'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'f'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'n'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'0'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'r'>) -> ctll::push; -static constexpr auto rule(f, ctll::term<'t'>) -> ctll::push; - -static constexpr auto rule(g, ctll::term<'s'>) -> ctll::push, ctll::term<'i'>, ctll::term<'i'>, class_named_ascii>; -static constexpr auto rule(g, ctll::term<'l'>) -> ctll::push; - -static constexpr auto rule(h, ctll::term<'r'>) -> ctll::push, ctll::term<'n'>, ctll::term<'t'>, class_named_print>; -static constexpr auto rule(h, ctll::term<'u'>) -> ctll::push, ctll::term<'c'>, ctll::term<'t'>, class_named_punct>; - -static constexpr auto rule(hexdec_repeat, ctll::term<'\x7D'>) -> ctll::epsilon; -static constexpr auto rule(hexdec_repeat, ctll::set<'0','A','B','C','D','E','F','a','b','c','d','e','f','1','2','3','4','5','6','7','8','9'>) -> ctll::push; - -static constexpr auto rule(i, ctll::term<'^'>) -> ctll::push, ctll::term<']'>>; -static constexpr auto rule(i, ctll::term<'x'>) -> ctll::push, ctll::term<'i'>, ctll::term<'g'>, ctll::term<'i'>, ctll::term<'t'>, class_named_xdigit, ctll::term<':'>, ctll::term<']'>>; -static constexpr auto rule(i, ctll::term<'d'>) -> ctll::push, ctll::term<'g'>, ctll::term<'i'>, ctll::term<'t'>, class_named_digit, ctll::term<':'>, ctll::term<']'>>; -static constexpr auto rule(i, ctll::term<'b'>) -> ctll::push, ctll::term<'a'>, ctll::term<'n'>, ctll::term<'k'>, class_named_blank, ctll::term<':'>, ctll::term<']'>>; -static constexpr auto rule(i, ctll::term<'c'>) -> ctll::push, ctll::term<'t'>, ctll::term<'r'>, ctll::term<'l'>, class_named_cntrl, ctll::term<':'>, ctll::term<']'>>; -static constexpr auto rule(i, ctll::term<'w'>) -> ctll::push, ctll::term<'r'>, ctll::term<'d'>, class_named_word, ctll::term<':'>, ctll::term<']'>>; -static constexpr auto rule(i, ctll::term<'l'>) -> ctll::push, ctll::term<'w'>, ctll::term<'e'>, ctll::term<'r'>, class_named_lower, ctll::term<':'>, ctll::term<']'>>; -static constexpr auto rule(i, ctll::term<'s'>) -> ctll::push, ctll::term<'a'>, ctll::term<'c'>, ctll::term<'e'>, class_named_space, ctll::term<':'>, ctll::term<']'>>; -static constexpr auto rule(i, ctll::term<'u'>) -> ctll::push, ctll::term<'p'>, ctll::term<'e'>, ctll::term<'r'>, class_named_upper, ctll::term<':'>, ctll::term<']'>>; -static constexpr auto rule(i, ctll::term<'g'>) -> ctll::push, ctll::term<'a'>, ctll::term<'p'>, ctll::term<'h'>, class_named_graph, ctll::term<':'>, ctll::term<']'>>; -static constexpr auto rule(i, ctll::term<'a'>) -> ctll::push, ctll::term<']'>>; -static constexpr auto rule(i, ctll::term<'p'>) -> ctll::push, ctll::term<']'>>; - -static constexpr auto rule(j, ctll::term<'\\'>) -> ctll::push; -static constexpr auto rule(j, ctll::set<'!','$','\x28','\x29','*','+',',','.','/',':','<','=','>','?','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','^','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','0','t','u','v','w','x','y','z','\x7B','|','\x7D','1','2','3','4','5','6','7','8','9'>) -> ctll::push; -static constexpr auto rule(j, _others) -> ctll::push; -static constexpr auto rule(j, ctll::set<'-','[',']'>) -> ctll::reject; - -static constexpr auto rule(k, ctll::term<'\x7B'>) -> ctll::push, push_hexdec, hexdec_repeat, ctll::term<'\x7D'>, finish_hexdec>; -static constexpr auto rule(k, ctll::set<'0','A','B','C','D','E','F','a','b','c','d','e','f','1','2','3','4','5','6','7','8','9'>) -> ctll::push, push_hexdec, ctll::set<'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','a','b','c','d','e','f'>, push_hexdec, ctll::set<'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','a','b','c','d','e','f'>, push_hexdec, finish_hexdec>; - -static constexpr auto rule(l, ctll::term<'\x7B'>) -> ctll::push, push_hexdec, hexdec_repeat, ctll::term<'\x7D'>, finish_hexdec>; -static constexpr auto rule(l, ctll::set<'0','A','B','C','D','E','F','a','b','c','d','e','f','1','2','3','4','5','6','7','8','9'>) -> ctll::push, push_hexdec, finish_hexdec>; - -static constexpr auto rule(m, ctll::set<'0','1','2','3','4','5','6','7','8','9'>) -> ctll::push, make_back_reference>; -static constexpr auto rule(m, ctll::term<'-'>) -> ctll::push, make_relative_back_reference>; -static constexpr auto rule(m, ctll::set<'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'>) -> ctll::push, make_back_reference>; - -static constexpr auto rule(mod, ctll::set<'!','$','\x28','\x29',',','-','.','/',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','|','0','1','2','3','4','5','6','7','8','9'>) -> ctll::epsilon; -static constexpr auto rule(mod, ctll::epsilon) -> ctll::epsilon; -static constexpr auto rule(mod, _others) -> ctll::epsilon; -static constexpr auto rule(mod, ctll::term<'?'>) -> ctll::push; -static constexpr auto rule(mod, ctll::term<'+'>) -> ctll::push; -static constexpr auto rule(mod, ctll::set<'*','\x7B','\x7D'>) -> ctll::reject; - -static constexpr auto rule(mod_opt, ctll::set<'!','$','\x28','\x29',',','-','.','/',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','|','0','1','2','3','4','5','6','7','8','9'>) -> ctll::epsilon; -static constexpr auto rule(mod_opt, ctll::epsilon) -> ctll::epsilon; -static constexpr auto rule(mod_opt, _others) -> ctll::epsilon; -static constexpr auto rule(mod_opt, ctll::term<'?'>) -> ctll::push; -static constexpr auto rule(mod_opt, ctll::set<'*','+','\x7B','\x7D'>) -> ctll::reject; - -static constexpr auto rule(n, ctll::set<'0','1','2','3','4','5','6','7','8','9'>) -> ctll::push, mod>; -static constexpr auto rule(n, ctll::term<'\x7D'>) -> ctll::push; - -static constexpr auto rule(number2, ctll::set<',','\x7D'>) -> ctll::epsilon; -static constexpr auto rule(number2, ctll::set<'0','1','2','3','4','5','6','7','8','9'>) -> ctll::push; - -static constexpr auto rule(number, ctll::set<'0','1','2','3','4','5','6','7','8','9'>) -> ctll::push; - -static constexpr auto rule(o, ctll::term<'p'>) -> ctll::push, ctll::term<'a'>, class_named_alpha>; -static constexpr auto rule(o, ctll::term<'n'>) -> ctll::push, ctll::term<'m'>, class_named_alnum>; - -static constexpr auto rule(property_name2, ctll::term<'\x7D'>) -> ctll::epsilon; -static constexpr auto rule(property_name2, ctll::term<'='>) -> ctll::push; -static constexpr auto rule(property_name2, ctll::set<'0','.','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push; - -static constexpr auto rule(property_name, ctll::set<'0','.','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push; - -static constexpr auto rule(property_value2, ctll::term<'\x7D'>) -> ctll::epsilon; -static constexpr auto rule(property_value2, ctll::set<'0','.','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push; - -static constexpr auto rule(property_value, ctll::set<'0','.','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push; - -static constexpr auto rule(range, ctll::set<'!','$','\x28','\x29','*','+',',','.','/',':','<','=','>','?','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','\x7B','|','\x7D','0','1','2','3','4','5','6','7','8','9'>) -> ctll::epsilon; -static constexpr auto rule(range, ctll::epsilon) -> ctll::epsilon; -static constexpr auto rule(range, _others) -> ctll::epsilon; -static constexpr auto rule(range, ctll::term<'-'>) -> ctll::push; - -static constexpr auto rule(repeat, ctll::set<'!','$','\x28','\x29',',','-','.','/',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','|','0','1','2','3','4','5','6','7','8','9'>) -> ctll::epsilon; -static constexpr auto rule(repeat, ctll::epsilon) -> ctll::epsilon; -static constexpr auto rule(repeat, _others) -> ctll::epsilon; -static constexpr auto rule(repeat, ctll::term<'?'>) -> ctll::push; -static constexpr auto rule(repeat, ctll::term<'\x7B'>) -> ctll::push; -static constexpr auto rule(repeat, ctll::term<'+'>) -> ctll::push; -static constexpr auto rule(repeat, ctll::term<'*'>) -> ctll::push; -static constexpr auto rule(repeat, ctll::term<'\x7D'>) -> ctll::reject; - -static constexpr auto rule(set2a, ctll::term<']'>) -> ctll::epsilon; -static constexpr auto rule(set2a, ctll::term<'['>) -> ctll::push, i, range, set_start, set2b>; -static constexpr auto rule(set2a, ctll::term<'\\'>) -> ctll::push; -static constexpr auto rule(set2a, ctll::set<'!','$','\x28','\x29','*','+',',','.','/',':','<','=','>','?','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','^','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','0','t','u','v','w','x','y','z','\x7B','|','\x7D','1','2','3','4','5','6','7','8','9'>) -> ctll::push; -static constexpr auto rule(set2a, _others) -> ctll::push; -static constexpr auto rule(set2a, ctll::term<'-'>) -> ctll::reject; - -static constexpr auto rule(set2b, ctll::term<']'>) -> ctll::epsilon; -static constexpr auto rule(set2b, ctll::term<'['>) -> ctll::push, i, range, set_combine, set2b>; -static constexpr auto rule(set2b, ctll::term<'\\'>) -> ctll::push; -static constexpr auto rule(set2b, ctll::set<'!','$','\x28','\x29','*','+',',','.','/',':','<','=','>','?','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','^','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','0','t','u','v','w','x','y','z','\x7B','|','\x7D','1','2','3','4','5','6','7','8','9'>) -> ctll::push; -static constexpr auto rule(set2b, _others) -> ctll::push; -static constexpr auto rule(set2b, ctll::term<'-'>) -> ctll::reject; - -static constexpr auto rule(string2, ctll::set<'\x29','|'>) -> ctll::epsilon; -static constexpr auto rule(string2, ctll::epsilon) -> ctll::epsilon; -static constexpr auto rule(string2, ctll::term<'\\'>) -> ctll::push; -static constexpr auto rule(string2, ctll::term<'['>) -> ctll::push; -static constexpr auto rule(string2, ctll::term<'\x28'>) -> ctll::push; -static constexpr auto rule(string2, ctll::term<'^'>) -> ctll::push; -static constexpr auto rule(string2, ctll::term<'$'>) -> ctll::push; -static constexpr auto rule(string2, ctll::set<'!',',','-','/',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T',']','_','0','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push; -static constexpr auto rule(string2, _others) -> ctll::push; -static constexpr auto rule(string2, ctll::term<'.'>) -> ctll::push; -static constexpr auto rule(string2, ctll::set<'*','+','?','\x7B','\x7D'>) -> ctll::reject; - -}; - -} - -#endif //CTRE__PCRE__HPP - -#ifndef CTRE__ATOMS__HPP -#define CTRE__ATOMS__HPP - -#ifndef CTRE__ATOMS_CHARACTERS__HPP -#define CTRE__ATOMS_CHARACTERS__HPP - -#ifndef CTRE__UTILITY__HPP -#define CTRE__UTILITY__HPP - -#define CTRE_CNTTP_COMPILER_CHECK (__cpp_nontype_template_parameter_class || (__cpp_nontype_template_args >= 201911L) || (__cpp_nontype_template_args >= 201411L && __clang_major__ >= 12)) - -#if __GNUC__ > 9 -#if __has_cpp_attribute(likely) -#define CTRE_LIKELY [[likely]] -#else -#define CTRE_LIKELY -#endif - -#if __has_cpp_attribute(unlikely) -#define CTRE_UNLIKELY [[unlikely]] -#else -#define CTRE_UNLIKELY -#endif -#else -#define CTRE_LIKELY -#define CTRE_UNLIKELY -#endif - -#ifdef _MSC_VER -#define CTRE_FORCE_INLINE __forceinline -#define CTRE_FLATTEN -#else -#define CTRE_FORCE_INLINE inline __attribute__((always_inline)) -#define CTRE_FLATTEN __attribute__((flatten)) -#endif - -#endif - -#include - -namespace ctre { - -// sfinae check for types here - -template class MatchesCharacter { -template static auto test(CharT c) -> decltype(Y::match_char(c), std::true_type()); -template static auto test(...) -> std::false_type; -public: -template static inline constexpr bool value = decltype(test(std::declval()))(); -}; - -template struct character { -template CTRE_FORCE_INLINE static constexpr bool match_char(CharT value) noexcept { -return value == V; -} -}; - -template struct negative_set { -template CTRE_FORCE_INLINE static constexpr bool match_char(CharT value) noexcept { -return !(Content::match_char(value) || ... || false); -} -}; - -template struct set { -template CTRE_FORCE_INLINE static constexpr bool match_char(CharT value) noexcept { -return (Content::match_char(value) || ... || false); -} -}; - -template struct enumeration : set...> { }; - -template struct negate { -template CTRE_FORCE_INLINE static constexpr bool match_char(CharT value) noexcept { -return !(Content::match_char(value) || ... || false); -} -}; - -template struct char_range { -template CTRE_FORCE_INLINE static constexpr bool match_char(CharT value) noexcept { -return (value >= A) && (value <= B); -} -}; - -using word_chars = set, char_range<'a','z'>, char_range<'0','9'>, character<'_'> >; - -using space_chars = enumeration<' ', '\t', '\n', '\v', '\f', '\r'>; - -using vertical_space_chars = enumeration< -(char)0x000A, // Linefeed (LF) -(char)0x000B, // Vertical tab (VT) -(char)0x000C, // Form feed (FF) -(char)0x000D, // Carriage return (CR) -(char32_t)0x0085, // Next line (NEL) -(char32_t)0x2028, // Line separator -(char32_t)0x2029 // Paragraph separator ->; - -using horizontal_space_chars = enumeration< -(char)0x0009, // Horizontal tab (HT) -(char)0x0020, // Space -(char32_t)0x00A0, // Non-break space -(char32_t)0x1680, // Ogham space mark -(char32_t)0x180E, // Mongolian vowel separator -(char32_t)0x2000, // En quad -(char32_t)0x2001, // Em quad -(char32_t)0x2002, // En space -(char32_t)0x2003, // Em space -(char32_t)0x2004, // Three-per-em space -(char32_t)0x2005, // Four-per-em space -(char32_t)0x2006, // Six-per-em space -(char32_t)0x2007, // Figure space -(char32_t)0x2008, // Punctuation space -(char32_t)0x2009, // Thin space -(char32_t)0x200A, // Hair space -(char32_t)0x202F, // Narrow no-break space -(char32_t)0x205F, // Medium mathematical space -(char32_t)0x3000 // Ideographic space ->; - -using alphanum_chars = set, char_range<'a','z'>, char_range<'0','9'> >; - -using alpha_chars = set, char_range<'a','z'> >; - -using xdigit_chars = set, char_range<'a','f'>, char_range<'0','9'> >; - -using punct_chars -= enumeration<'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', ',', '-', -'.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', -'^', '_', '`', '{', '|', '}', '~'>; - -using digit_chars = char_range<'0','9'>; - -using ascii_chars = char_range<'\x00','\x7F'>; - -} - -#endif - -#include - -namespace ctre { - -// special helpers for matching -struct accept { }; -struct reject { }; -struct start_mark { }; -struct end_mark { }; -struct end_cycle_mark { }; -struct end_lookahead_mark { }; -template struct numeric_mark { }; - -struct any { }; - -// actual AST of regexp -template struct string { }; -template struct select { }; -template struct sequence { }; -struct empty { }; - -template struct repeat { }; -template using plus = repeat<1,0,Content...>; -template using star = repeat<0,0,Content...>; - -template struct lazy_repeat { }; -template using lazy_plus = lazy_repeat<1,0,Content...>; -template using lazy_star = lazy_repeat<0,0,Content...>; - -template struct possessive_repeat { }; -template using possessive_plus = possessive_repeat<1,0,Content...>; -template using possessive_star = possessive_repeat<0,0,Content...>; - -template using optional = repeat<0,1,Content...>; -template using lazy_optional = lazy_repeat<0,1,Content...>; - -template struct capture { }; - -template struct capture_with_name { }; - -template struct back_reference { }; -template struct back_reference_with_name { }; - -template struct look_start { }; - -template struct lookahead_positive { }; -template struct lookahead_negative { }; - -struct atomic_start { }; - -template struct atomic_group { }; - -template struct boundary { }; -template struct not_boundary { }; - -using word_boundary = boundary; -using not_word_boundary = not_boundary; - -struct assert_subject_begin { }; -struct assert_subject_end { }; -struct assert_subject_end_line{ }; -struct assert_line_begin { }; -struct assert_line_end { }; - -} - -#endif - -#ifndef CTRE__ATOMS_UNICODE__HPP -#define CTRE__ATOMS_UNICODE__HPP - -// master branch is not including unicode db (for now) -#ifndef H_COR3NTIN_UNICODE_SYNOPSYS -#define H_COR3NTIN_UNICODE_SYNOPSYS - -#include - -namespace uni -{ -enum class category; -enum class property; -enum class version : unsigned char; -enum class script ; -enum class block; - -struct script_extensions_view { -constexpr script_extensions_view(char32_t c); - -struct sentinel {}; -struct iterator { - -constexpr iterator(char32_t c); -constexpr script operator*() const; - -constexpr iterator& operator++(int); - -constexpr iterator operator++(); - -constexpr bool operator==(sentinel) const; -constexpr bool operator!=(sentinel) const; - -private: -char32_t m_c; -script m_script; -int idx = 1; -}; - -constexpr iterator begin() const; -constexpr sentinel end() const; - -private: -char32_t c; -}; - -struct numeric_value { - -constexpr double value() const; -constexpr long long numerator() const; -constexpr int denominator() const; -constexpr bool is_valid() const; - -protected: -constexpr numeric_value() = default; -constexpr numeric_value(long long n, int16_t d); - -long long _n = 0; -int16_t _d = 0; -friend constexpr numeric_value cp_numeric_value(char32_t cp); -}; - -constexpr category cp_category(char32_t cp); -constexpr script cp_script(char32_t cp); -constexpr script_extensions_view cp_script_extensions(char32_t cp); -constexpr version cp_age(char32_t cp); -constexpr block cp_block(char32_t cp); -constexpr bool cp_is_valid(char32_t cp); -constexpr bool cp_is_assigned(char32_t cp); -constexpr bool cp_is_ascii(char32_t cp); -constexpr numeric_value cp_numeric_value(char32_t cp); - -template