diff --git a/src/index/CMakeLists.txt b/src/index/CMakeLists.txt index d06778e2d7..728a46c4c8 100644 --- a/src/index/CMakeLists.txt +++ b/src/index/CMakeLists.txt @@ -6,4 +6,4 @@ add_library(index DocsDB.cpp FTSAlgorithms.cpp PrefixHeuristic.cpp CompressedRelation.cpp PatternCreator.cpp) -qlever_target_link_libraries(index util parser vocabulary compilationInfo ${STXXL_LIBRARIES}) +qlever_target_link_libraries(index util parser vocabulary compilationInfo configManager ${STXXL_LIBRARIES}) diff --git a/src/index/Index.cpp b/src/index/Index.cpp index 1e46755cf9..b1ee4e3541 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -293,3 +293,8 @@ size_t Index::getResultSizeOfScan(const TripleComponent& col0String, const Permutation::Enum& permutation) const { return pimpl_->getResultSizeOfScan(col0String, col1String, permutation); } + +// ____________________________________________________________________________ +std::string Index::getConfigurationDocForIndexBuilder() { + return pimpl_->getConfigurationDocForIndexBuilder(); +} diff --git a/src/index/Index.h b/src/index/Index.h index b446516b08..7e5df24d25 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -250,4 +250,9 @@ class Index { // requires including the rather expensive `IndexImpl.h` header IndexImpl& getImpl() { return *pimpl_; } [[nodiscard]] const IndexImpl& getImpl() const { return *pimpl_; } + + /* + @brief Print the detailed documentation of the options for the index builder. + */ + std::string getConfigurationDocForIndexBuilder(); }; diff --git a/src/index/IndexBuilderMain.cpp b/src/index/IndexBuilderMain.cpp index 0bff9fdd97..7a9eca16fb 100644 --- a/src/index/IndexBuilderMain.cpp +++ b/src/index/IndexBuilderMain.cpp @@ -139,7 +139,8 @@ int main(int argc, char** argv) { try { po::store(po::parse_command_line(argc, argv, boostOptions), optionsMap); if (optionsMap.count("help")) { - std::cout << boostOptions << '\n'; + std::cout << boostOptions << '\n' + << index.getConfigurationDocForIndexBuilder() << '\n'; return EXIT_SUCCESS; } po::notify(optionsMap); diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 9af973dfdc..45df74a557 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -9,12 +9,14 @@ #include #include #include +#include #include #include #include "CompilationInfo.h" #include "absl/strings/str_join.h" #include "engine/AddCombinedRowToTable.h" +#include "index/ConstantsIndexBuilding.h" #include "index/IndexFormatVersion.h" #include "index/PrefixHeuristic.h" #include "index/TriplesView.h" @@ -22,12 +24,16 @@ #include "parser/ParallelParseBuffer.h" #include "util/BatchedPipeline.h" #include "util/CachingMemoryResource.h" +#include "util/ConfigManager/ConfigManager.h" +#include "util/ConfigManager/ConfigOption.h" +#include "util/Date.h" #include "util/HashMap.h" #include "util/JoinAlgorithms/JoinAlgorithms.h" #include "util/Serializer/FileSerializer.h" #include "util/ThreadSafeQueue.h" #include "util/TupleHelpers.h" #include "util/TypeTraits.h" +#include "util/json.h" using std::array; using namespace ad_utility::memory_literals; @@ -893,18 +899,75 @@ void IndexImpl::writeConfiguration() const { // ___________________________________________________________________________ void IndexImpl::readConfiguration() { - auto f = ad_utility::makeIfstream(onDiskBase_ + CONFIGURATION_FILE); - f >> configurationJson_; - if (configurationJson_.find("git-hash") != configurationJson_.end()) { - LOG(INFO) << "The git hash used to build this index was " - << std::string(configurationJson_["git-hash"]).substr(0, 6) - << std::endl; - } else { - LOG(INFO) << "The index was built before git commit hashes were stored in " - "the index meta data" - << std::endl; - } + ad_utility::ConfigManager config{}; + + // TODO Write a description. + std::string gitHash; + config.addOption("git-hash", "", &gitHash, {}); + + // TODO Write a description. + bool boolPrefixes; + config.addOption("prefixes", "", &boolPrefixes, false); + // TODO Write a description. + bool hasAllPermutations; + config.addOption("has-all-permutations", "", &hasAllPermutations, true); + + // TODO Write a description. + std::vector prefixesExternal; + config.addOption("prefixes-external", "", &prefixesExternal, {}); + + decltype(auto) localeManager = config.addSubManager({"locale"s}); + // TODO Write a description. + std::string lang; + localeManager.addOption("language", "", &lang); + + // TODO Write a description. + std::string country; + localeManager.addOption("country", "", &country); + + // TODO Write a description. + bool ignorePunctuation; + localeManager.addOption("ignore-punctuation", "", &ignorePunctuation); + + // TODO Write a description. + std::vector languagesInternal; + config.addOption("languages-internal", "", &languagesInternal, {"en"}); + + // TODO Write a description. + config.addOption("num-predicates-normal", "", &numPredicatesNormal_); + // These might be missing if there are only two permutations. + config.addOption("num-subjects-normal", "", &numSubjectsNormal_, 0UL); + config.addOption("num-objects-normal", "", &numObjectsNormal_, 0UL); + config.addOption("num-triples-normal", "", &numTriplesNormal_); + + /* + We check those options manually below, but add the options anyway for + documentation and parsing purpose. (A config manager doesn't allow any + options to be passed, that are not registered within him.) + */ + decltype(auto) indexFormatVersionManager = + config.addSubManager({"index-format-version"s}); + size_t indexFormatVersionPullRequestNumber; + indexFormatVersionManager.addOption("pull-request-number", + "The number of the pull request that " + "changed the index format most recently.", + &indexFormatVersionPullRequestNumber); + std::string indexFormatVersionDate; + indexFormatVersionManager.addOption( + "date", "The date of the last breaking change of the index format.", + &indexFormatVersionDate); + + configurationJson_ = fileToJson(onDiskBase_ + CONFIGURATION_FILE); + + /* + Because an out of date index format version can cause the parsing for + configuration options to fail, we have to manually check it before parsing. + + For example: Old configuration option could have been deleted. Trying to set + those, would cause an error, before we could actually parse the index format + version. + */ if (configurationJson_.find("index-format-version") != configurationJson_.end()) { auto indexFormatVersion = static_cast( @@ -941,76 +1004,40 @@ void IndexImpl::readConfiguration() { "Incompatible index format, see log message for details"}; } - if (configurationJson_.find("prefixes") != configurationJson_.end()) { - if (configurationJson_["prefixes"]) { - vector prefixes; - auto prefixFile = ad_utility::makeIfstream(onDiskBase_ + PREFIX_FILE); - for (string prefix; std::getline(prefixFile, prefix);) { - prefixes.emplace_back( - RdfEscaping::unescapeNewlinesAndBackslashes(prefix)); - } - vocab_.buildCodebookForPrefixCompression(prefixes); - } else { - vocab_.buildCodebookForPrefixCompression(std::vector()); - } - } - - if (configurationJson_.find("prefixes-external") != - configurationJson_.end()) { - vocab_.initializeExternalizePrefixes( - configurationJson_["prefixes-external"]); - } + config.parseConfig(configurationJson_); - if (configurationJson_.count("ignore-case")) { - LOG(ERROR) << ERROR_IGNORE_CASE_UNSUPPORTED << '\n'; - throw std::runtime_error("Deprecated key \"ignore-case\" in index build"); + if (!gitHash.empty()) { + LOG(INFO) << "The git hash used to build this index was " + << gitHash.substr(0, 6) << std::endl; + } else { + LOG(INFO) << "The index was built before git commit hashes were stored in " + "the index meta data" + << std::endl; } - if (configurationJson_.count("locale")) { - std::string lang{configurationJson_["locale"]["language"]}; - std::string country{configurationJson_["locale"]["country"]}; - bool ignorePunctuation{configurationJson_["locale"]["ignore-punctuation"]}; - vocab_.setLocale(lang, country, ignorePunctuation); - textVocab_.setLocale(lang, country, ignorePunctuation); + if (boolPrefixes) { + vector prefixes; + auto prefixFile = ad_utility::makeIfstream(onDiskBase_ + PREFIX_FILE); + for (string prefix; std::getline(prefixFile, prefix);) { + prefixes.emplace_back( + RdfEscaping::unescapeNewlinesAndBackslashes(prefix)); + } + vocab_.buildCodebookForPrefixCompression(prefixes); } else { - LOG(ERROR) << "Key \"locale\" is missing in the metadata. This is probably " - "and old index build that is no longer supported by QLever. " - "Please rebuild your index\n"; - throw std::runtime_error( - "Missing required key \"locale\" in index build's metadata"); + vocab_.buildCodebookForPrefixCompression(std::vector()); } - if (configurationJson_.find("languages-internal") != - configurationJson_.end()) { - vocab_.initializeInternalizedLangs( - configurationJson_["languages-internal"]); - } + vocab_.initializeExternalizePrefixes(prefixesExternal); - auto loadDataMember = [this]( - std::string_view key, Target& target, - std::optional> - defaultValue = std::nullopt) { - auto it = configurationJson_.find(key); - if (it == configurationJson_.end()) { - if (defaultValue.has_value()) { - target = std::move(defaultValue.value()); - } else { - throw std::runtime_error{absl::StrCat( - "The required key \"", key, - "\" was not found in the `meta-data.json`. Most likely this index " - "was built with an older version of QLever and should be rebuilt")}; - } - } else { - target = Target{*it}; - } - }; + vocab_.setLocale(lang, country, ignorePunctuation); + textVocab_.setLocale(lang, country, ignorePunctuation); - loadDataMember("has-all-permutations", loadAllPermutations_, true); - loadDataMember("num-predicates-normal", numPredicatesNormal_); - // These might be missing if there are only two permutations. - loadDataMember("num-subjects-normal", numSubjectsNormal_, 0); - loadDataMember("num-objects-normal", numObjectsNormal_, 0); - loadDataMember("num-triples-normal", numTriplesNormal_); + vocab_.initializeInternalizedLangs(languagesInternal); + + if (!hasAllPermutations) { + // If the permutations simply don't exist, then we can never load them. + loadAllPermutations_ = false; + } // Compute unique ID for this index. // @@ -1062,24 +1089,135 @@ LangtagAndTriple IndexImpl::tripleToInternalRepresentation( return result; } +// ___________________________________________________________________________ +std::pair> +IndexImpl::generateConfigManagerForIndexBuilderSettings() { + auto variables{std::make_unique()}; + ad_utility::ConfigManager config{}; + + config.addOption("prefixes-external", + "Literals or IRIs that start with any of these prefixes " + "will be stored in the external vocabulary. For example " + "`[\"<\"] will externalize all IRIs", + &variables->prefixesExternal_, {}); + + config.addOption("languages-internal", + "Literals with one of these langauge tag will be stored in " + "the internal vocabulary by default", + &variables->languagesInternal_, {"en"}); + + // TODO It would be nice to add a description to this + // submanager directly, e.g. "The locale used for all operations that depend + // on the lexicographical order of strings, e.g. ORDER BY" + decltype(auto) localeManager = config.addSubManager({"locale"s}); + + // Should be self-explanatory with the default value. + decltype(auto) langOption = localeManager.addOption( + "language", "", &variables->localLang_, LOCALE_DEFAULT_LANG); + + // Should be self-explanatory with the default value. + decltype(auto) countryOption = localeManager.addOption( + "country", "", &variables->localCountry_, LOCALE_DEFAULT_COUNTRY); + + decltype(auto) ignorePunctuationOption = localeManager.addOption( + "ignore-punctuation", + "If set to true, then punctuation characters will only be considered on " + "the last level of comparisons. This will for example lead to the order " + "\"aa\", \"a.a\", \"ab\" (the first two are basically equal and the dot " + "is only used as a tie break)", + &variables->localIgnorePunctuation_, LOCALE_DEFAULT_IGNORE_PUNCTUATION); + + // Validator for the entries under `locale`. Either they all must use the + // default value, or all must be set at runtime. + localeManager.addOptionValidator( + [](const ad_utility::ConfigOption& langOpt, + const ad_utility::ConfigOption& countryOpt, + const ad_utility::ConfigOption& ignorePunctuationOpt) { + return langOpt.wasSetAtRuntime() == countryOpt.wasSetAtRuntime() && + countryOpt.wasSetAtRuntime() == + ignorePunctuationOpt.wasSetAtRuntime(); + }, + "All three options under 'locale' must be set, or none of them.", + "All three options under 'locale' must be set, or none of them.", + langOption, countryOption, ignorePunctuationOption); + + config.addOption( + "ascii-prefixes-only", + "Activate a faster parsing mode that is relaxed in two ways: 1. It " + "doesn't work if certain corner cases of the Turtle specification are " + "used (e.g. certain non-alphanumeric non-ascii characters in prefixes " + "and IRIs). 2. It allows certain patterns that are actually not valid " + "turtle, for example spaces in IRIs. As parsing is not a bottleneck " + "anymore, we recommend setting this to `false` and making sure that the " + "input is valid according to the official RDF Turtle specification", + &onlyAsciiTurtlePrefixes_, onlyAsciiTurtlePrefixes_); + + config.addOption( + "parallel-parsing", + "Enable the parallel parser, which assumes the following properties of " + "the Turtle input: 1. All prefix definitions are at the beginning of the " + "file, 2. All ends of triple blocks (denoted by a dot) are followed by a " + "newline (possibly with other whitespace inbetween), and a dot followed " + "by a newline always denotes the end of a triple block (especially there " + "are no multiline literals). This is true for most reasonably formatted " + "turtle files", + &useParallelParser_, useParallelParser_); + + config.addOption( + "num-triples-per-batch", + "The batch size of the first phase of the index build. Lower values will " + "reduce the RAM consumption of this phase while a too low value might " + "hurt the performance of the index builder", + &numTriplesPerBatch_, static_cast(NUM_TRIPLES_PER_PARTIAL_VOCAB)); + + config.addOption("parser-batch-size", + "The internal batch size of the turtle parser. Typically " + "there is no need to change this parameter.", + &parserBatchSize_, PARSER_BATCH_SIZE); + + decltype(auto) overflowOption = config.addOption( + "parser-integer-overflow-behavior", + "QLever stores all integer values with a fixed number of bits. This " + "option configures the behavior when an integer in the turtle input " + "cannot be represented by QLever. Note that this doesn't affect the " + "behavior of overflows during the query processing", + &variables->parserIntegerOverflowBehavior_, + "overflowing-integers-throw"s); + + config.addValidator( + [](std::string_view input) { + return turtleParserIntegerOverflowBehaviorMap_.contains(input); + }, + "value must be one of " + + ad_utility::lazyStrJoin( + std::views::keys(turtleParserIntegerOverflowBehaviorMap_), ", "), + "dummy description for the overflow behavior validator", overflowOption); + + return {std::move(config), std::move(variables)}; +} + +// ___________________________________________________________________________ +std::string IndexImpl::getConfigurationDocForIndexBuilder() { + return generateConfigManagerForIndexBuilderSettings() + .first.printConfigurationDoc(true); +} + // ___________________________________________________________________________ void IndexImpl::readIndexBuilderSettingsFromFile() { - json j; // if we have no settings, we still have to initialize some default - // values - if (!settingsFileName_.empty()) { - auto f = ad_utility::makeIfstream(settingsFileName_); - f >> j; - } + auto [config, + configVariablesPointer]{generateConfigManagerForIndexBuilderSettings()}; + auto& configVariables{*configVariablesPointer}; - if (j.find("prefixes-external") != j.end()) { - vocab_.initializeExternalizePrefixes(j["prefixes-external"]); - configurationJson_["prefixes-external"] = j["prefixes-external"]; + // Set the options. + if (!settingsFileName_.empty()) { + config.parseConfig(fileToJson(settingsFileName_)); + } else { + config.parseConfig(json(json::value_t::object)); } - if (j.count("ignore-case")) { - LOG(ERROR) << ERROR_IGNORE_CASE_UNSUPPORTED << '\n'; - throw std::runtime_error("Deprecated key \"ignore-case\" in settings JSON"); - } + vocab_.initializeExternalizePrefixes(configVariables.prefixesExternal_); + configurationJson_["prefixes-external"] = configVariables.prefixesExternal_; /** * ICU uses two separate arguments for each Locale, the language ("en" or @@ -1088,111 +1226,47 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { * locale setting. */ - { - std::string lang = LOCALE_DEFAULT_LANG; - std::string country = LOCALE_DEFAULT_COUNTRY; - bool ignorePunctuation = LOCALE_DEFAULT_IGNORE_PUNCTUATION; - if (j.count("locale")) { - lang = std::string{j["locale"]["language"]}; - country = std::string{j["locale"]["country"]}; - ignorePunctuation = bool{j["locale"]["ignore-punctuation"]}; - } else { - LOG(INFO) << "Locale was not specified in settings file, default is " - "en_US" - << std::endl; - } - LOG(INFO) << "You specified \"locale = " << lang << "_" << country << "\" " - << "and \"ignore-punctuation = " << ignorePunctuation << "\"" - << std::endl; - - if (lang != LOCALE_DEFAULT_LANG || country != LOCALE_DEFAULT_COUNTRY) { - LOG(WARN) << "You are using Locale settings that differ from the default " - "language or country.\n\t" - << "This should work but is untested by the QLever team. If " - "you are running into unexpected problems,\n\t" - << "Please make sure to also report your used locale when " - "filing a bug report. Also note that changing the\n\t" - << "locale requires to completely rebuild the index\n"; - } - vocab_.setLocale(lang, country, ignorePunctuation); - textVocab_.setLocale(lang, country, ignorePunctuation); - configurationJson_["locale"]["language"] = lang; - configurationJson_["locale"]["country"] = country; - configurationJson_["locale"]["ignore-punctuation"] = ignorePunctuation; + if (configVariables.localLang_ != LOCALE_DEFAULT_LANG || + configVariables.localCountry_ != LOCALE_DEFAULT_COUNTRY) { + LOG(WARN) << "You are using Locale settings that differ from the default " + "language or country.\n\t" + << "This should work but is untested by the QLever team. If " + "you are running into unexpected problems,\n\t" + << "Please make sure to also report your used locale when " + "filing a bug report. Also note that changing the\n\t" + << "locale requires to completely rebuild the index\n"; } + vocab_.setLocale(configVariables.localLang_, configVariables.localCountry_, + configVariables.localIgnorePunctuation_); + textVocab_.setLocale(configVariables.localLang_, + configVariables.localCountry_, + configVariables.localIgnorePunctuation_); + configurationJson_["locale"]["language"] = configVariables.localLang_; + configurationJson_["locale"]["country"] = configVariables.localCountry_; + configurationJson_["locale"]["ignore-punctuation"] = + configVariables.localIgnorePunctuation_; + + vocab_.initializeInternalizedLangs(configVariables.languagesInternal_); + configurationJson_["languages-internal"] = configVariables.languagesInternal_; - if (j.find("languages-internal") != j.end()) { - vocab_.initializeInternalizedLangs(j["languages-internal"]); - configurationJson_["languages-internal"] = j["languages-internal"]; - } - if (j.count("ascii-prefixes-only")) { - onlyAsciiTurtlePrefixes_ = static_cast(j["ascii-prefixes-only"]); - } if (onlyAsciiTurtlePrefixes_) { LOG(INFO) << WARNING_ASCII_ONLY_PREFIXES << std::endl; } - if (j.count("parallel-parsing")) { - useParallelParser_ = static_cast(j["parallel-parsing"]); - } if (useParallelParser_) { LOG(INFO) << WARNING_PARALLEL_PARSING << std::endl; } - if (j.count("num-triples-per-batch")) { - numTriplesPerBatch_ = size_t{j["num-triples-per-batch"]}; - LOG(INFO) - << "You specified \"num-triples-per-batch = " << numTriplesPerBatch_ - << "\", choose a lower value if the index builder runs out of memory" - << std::endl; - } - - if (j.count("parser-batch-size")) { - parserBatchSize_ = size_t{j["parser-batch-size"]}; - LOG(INFO) << "Overriding setting parser-batch-size to " << parserBatchSize_ - << " This might influence performance during index build." - << std::endl; - } + turtleParserIntegerOverflowBehavior_ = + turtleParserIntegerOverflowBehaviorMap_.at( + configVariables.parserIntegerOverflowBehavior_); - std::string overflowingIntegersThrow = "overflowing-integers-throw"; - std::string overflowingIntegersBecomeDoubles = - "overflowing-integers-become-doubles"; - std::string allIntegersBecomeDoubles = "all-integers-become-doubles"; - std::vector allModes{overflowingIntegersThrow, - overflowingIntegersBecomeDoubles, - allIntegersBecomeDoubles}; - std::string key = "parser-integer-overflow-behavior"; - if (j.count(key)) { - auto value = static_cast(j[key]); - if (value == overflowingIntegersThrow) { - LOG(INFO) << "Integers that cannot be represented by QLever will throw " - "an exception" - << std::endl; - turtleParserIntegerOverflowBehavior_ = - TurtleParserIntegerOverflowBehavior::Error; - } else if (value == overflowingIntegersBecomeDoubles) { - LOG(INFO) << "Integers that cannot be represented by QLever will be " - "converted to doubles" - << std::endl; - turtleParserIntegerOverflowBehavior_ = - TurtleParserIntegerOverflowBehavior::OverflowingToDouble; - } else if (value == allIntegersBecomeDoubles) { - LOG(INFO) << "All integers will be converted to doubles" << std::endl; - turtleParserIntegerOverflowBehavior_ = - TurtleParserIntegerOverflowBehavior::OverflowingToDouble; - } else { - AD_CONTRACT_CHECK(std::ranges::find(allModes, value) == allModes.end()); - LOG(ERROR) << "Invalid value for " << key << std::endl; - LOG(INFO) << "The currently supported values are " - << absl::StrJoin(allModes, ",") << std::endl; - } - } else { - turtleParserIntegerOverflowBehavior_ = - TurtleParserIntegerOverflowBehavior::Error; - LOG(INFO) << "Integers that cannot be represented by QLever will throw an " - "exception (this is the default behavior)" - << std::endl; - } + // Logging used configuration options. + LOG(INFO) + << "Printing the configuration from the settings json file (including " + "implictly defaulted values). For a detailed description of this " + "configuration call `IndexBuilderMain --help`:\n" + << config.printConfigurationDoc(false) << std::endl; } // ___________________________________________________________________________ diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 9f5a83f967..bcc5aa1c60 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -44,6 +44,7 @@ #include "engine/idTable/CompressedExternalIdTable.h" #include "util/CancellationHandle.h" +#include "util/ConfigManager/ConfigManager.h" #include "util/MemorySize/MemorySize.h" using ad_utility::BufferedVector; @@ -621,6 +622,41 @@ class IndexImpl { void writeConfiguration() const; void readConfiguration(); + // Assigns the entries of the enum `TurtleParserIntegerOverflowBehavior` to + // their string representation. + inline static const auto turtleParserIntegerOverflowBehaviorMap_{ + []() -> ad_utility::HashMap { + using enum TurtleParserIntegerOverflowBehavior; + return {{"overflowing-integers-throw", Error}, + {"overflowing-integers-become-doubles", OverflowingToDouble}, + {"all-integers-become-doubles", AllToDouble}}; + }()}; + + /* + Some of the variables, that will be set by the `ConfigManager` generated by + `generateConfigManagerForIndexBuilderSettings`, after its parse function was + called. The remaining variables, it writes to, are member variables of + `IndexImpl`. + */ + struct IndexBuilderSettingsVariables { + std::vector prefixesExternal_; + std::vector languagesInternal_; + std::string localLang_; + std::string localCountry_; + bool localIgnorePunctuation_; + std::string parserIntegerOverflowBehavior_; + }; + + /* + @brief Generate the `ConfigManager`, and some of the variables it writes to, + that describes the index builder settings. The remaining variables, it writes + to, are member variables of `IndexImpl`. + */ + std::pair> + generateConfigManagerForIndexBuilderSettings(); + // initialize the index-build-time settings for the vocabulary void readIndexBuilderSettingsFromFile(); @@ -631,9 +667,16 @@ class IndexImpl { void deleteTemporaryFile(const string& path); public: - // Count the number of "QLever-internal" triples (predicate ql:langtag or - // predicate starts with @) and all other triples (that were actually part of - // the input). + /* + @brief Print the detailed documentation of the options for the index builder. + */ + std::string getConfigurationDocForIndexBuilder(); + + /* + Count the number of "QLever-internal" triples (predicate ql:langtag or + predicate starts with @) and all other triples (that were actually part of the + input). + */ NumNormalAndInternal numTriples() const; // The index contains several triples that are not part of the "actual" diff --git a/src/index/Vocabulary.cpp b/src/index/Vocabulary.cpp index 13699dfcaf..9a1131e446 100644 --- a/src/index/Vocabulary.cpp +++ b/src/index/Vocabulary.cpp @@ -323,6 +323,8 @@ template void RdfsVocabulary::buildCodebookForPrefixCompression< const std::vector&); template void RdfsVocabulary::initializeInternalizedLangs( const nlohmann::json&); +template void RdfsVocabulary::initializeInternalizedLangs< + std::vector>(const std::vector&); template void RdfsVocabulary::initializeExternalizePrefixes( const nlohmann::json& prefixes); template void RdfsVocabulary::initializeExternalizePrefixes< diff --git a/src/parser/ParallelParseBuffer.h b/src/parser/ParallelParseBuffer.h index 6ad203a499..278335b2bc 100644 --- a/src/parser/ParallelParseBuffer.h +++ b/src/parser/ParallelParseBuffer.h @@ -10,6 +10,7 @@ #include #include "../util/Log.h" +#include "parser/TurtleParser.h" using std::array; using std::string; diff --git a/src/util/ConfigManager/ConfigManager.cpp b/src/util/ConfigManager/ConfigManager.cpp index a4b7ab2813..548d0696e6 100644 --- a/src/util/ConfigManager/ConfigManager.cpp +++ b/src/util/ConfigManager/ConfigManager.cpp @@ -768,12 +768,34 @@ auto ConfigManager::getValidatorAssignment() const return assignment; } +// ____________________________________________________________________________ +std::string ConfigManager::printConfigurationDocJson() const { + return generateConfigurationDocJson("").dump(2); +} + +// ____________________________________________________________________________ +std::string ConfigManager::printConfigurationDocDetailedList() const { + /* + This works, because sub managers are not allowed to be empty. (This + invariant is checked by the helper function for walking over the hash map + entries, that is used by the `generateConfigurationDocDetailedList` helper + function.) So, the only way for a valid lack of configuration options to be + true, is on the top level. A.k.a. the object, on which + `printConfigurationDocDetailedList` was called. + */ + if (configurationOptions_.empty()) { + return "No configuration options were defined."; + } + + return generateConfigurationDocDetailedList("", getValidatorAssignment()); +} + // ____________________________________________________________________________ std::string ConfigManager::printConfigurationDoc(bool detailed) const { /* This works, because sub managers are not allowed to be empty. (This invariant is checked by the helper function for walking over the hash map - entries, that is used by the `generateConfigurationDoc...` helper + entries, that is used by the `printConfigurationDoc...` helper functions.) So, the only way for a valid lack of configuration options to be true, is on the top level. A.k.a. the object, on which `printConfigurationDoc` was called. @@ -783,9 +805,9 @@ std::string ConfigManager::printConfigurationDoc(bool detailed) const { } // We always print the configuration doc json. - const std::string& configurationDocJsonString{ - insertThousandSeparator(absl::StrCat( - "Configuration:\n", generateConfigurationDocJson("").dump(2)))}; + const std::string& configurationDocJsonString{absl::StrCat( + "Configuration:\n", + insertThousandSeparator<'.'>(printConfigurationDocJson(), ' '))}; if (!detailed) { return configurationDocJsonString; @@ -793,9 +815,7 @@ std::string ConfigManager::printConfigurationDoc(bool detailed) const { return absl::StrCat( configurationDocJsonString, "\n\n", - insertThousandSeparator<'.'>( - generateConfigurationDocDetailedList("", getValidatorAssignment()), - ' ')); + insertThousandSeparator<'.'>(printConfigurationDocDetailedList(), ' ')); } // ____________________________________________________________________________ diff --git a/src/util/ConfigManager/ConfigManager.h b/src/util/ConfigManager/ConfigManager.h index 298f5bc3bd..b20123f330 100644 --- a/src/util/ConfigManager/ConfigManager.h +++ b/src/util/ConfigManager/ConfigManager.h @@ -291,6 +291,19 @@ class ConfigManager { */ static nlohmann::json parseShortHand(const std::string& shortHandString); + /* + @brief Generate a string containing a json representation of the current + config manager configuration. + */ + std::string printConfigurationDocJson() const; + + /* + @brief Create a detailed list about the configuration options, with their + types, values, default values, etc. shown and organized by the sub managers, + that hold them. Validators are also printed. + */ + std::string printConfigurationDocDetailedList() const; + /* @brief Returns a string containing a json configuration and, optionally, the string representations of all added configuration options, togehter with the @@ -774,7 +787,7 @@ class ConfigManager { /* @brief Create a detailed list about the configuration options, with their - types, values, default values, etc. shown and organized by the sub managers, + types, values, default values, etc. shown and organized by the sub managers, that hold them. Validator invariant descriptions will be printed according to `ConfigurationDocValidatorAssignment`. diff --git a/test/ConfigManagerTest.cpp b/test/ConfigManagerTest.cpp index 3c5131ee91..6967250c06 100644 --- a/test/ConfigManagerTest.cpp +++ b/test/ConfigManagerTest.cpp @@ -2682,9 +2682,20 @@ TEST(ConfigManagerTest, PrintConfigurationDocComparison) { doubleArgumentValidatorSecondArgument); // Finally, check, if the expected and actual output is the same. - assertStringEqual(exampleConfigManagerExpectedNotDetailedString, - topManager.printConfigurationDoc(false)); - assertStringEqual(exampleConfigManagerExpectedDetailedString, - topManager.printConfigurationDoc(true)); + assertStringEqual(exampleConfigManagerExpectedprintConfigurationDocJsonString, + topManager.printConfigurationDocJson()); + assertStringEqual( + exampleConfigManagerExpectedprintConfigurationDocDetailedListString, + topManager.printConfigurationDocDetailedList()); + assertStringEqual( + absl::StrCat("Configuration:\n", + exampleConfigManagerExpectedprintConfigurationDocJsonString), + topManager.printConfigurationDoc(false)); + assertStringEqual( + absl::StrCat( + "Configuration:\n", + exampleConfigManagerExpectedprintConfigurationDocJsonString, "\n\n", + exampleConfigManagerExpectedprintConfigurationDocDetailedListString), + topManager.printConfigurationDoc(true)); } } // namespace ad_utility::ConfigManagerImpl diff --git a/test/util/PrintConfigurationDocComparisonString.h b/test/util/PrintConfigurationDocComparisonString.h index f669706e57..67a75591b3 100644 --- a/test/util/PrintConfigurationDocComparisonString.h +++ b/test/util/PrintConfigurationDocComparisonString.h @@ -2,6 +2,9 @@ // Chair of Algorithms and Data Structures. // Author: Andre Schlegel (December of 2023, // schlegea@informatik.uni-freiburg.de) +#include + +#include #pragma once @@ -15,14 +18,11 @@ be! */ // The strings to compare against. -#include - -#include constexpr std::string_view emptyConfigManagerExpectedString = "No configuration options were defined."; -constexpr std::string_view exampleConfigManagerExpectedNotDetailedString = - R"--(Configuration: -{ +constexpr std::string_view + exampleConfigManagerExpectedprintConfigurationDocJsonString = + R"--({ "booleanWithoutDescriptionWithoutDefaultValueWithoutValidator": "[must be specified]", "booleanWithoutDescriptionWithoutDefaultValueWithValidator": "[must be specified]", "booleanWithoutDescriptionWithDefaultValueWithKeepDefaultValueWithoutValidator": true, @@ -509,10 +509,9 @@ constexpr std::string_view exampleConfigManagerExpectedNotDetailedString = } })--"; -inline const std::string& exampleConfigManagerExpectedDetailedString = - absl::StrCat(exampleConfigManagerExpectedNotDetailedString, R"--( - -Option 'booleanWithoutDescriptionWithoutDefaultValueWithoutValidator' [boolean] +constexpr std::string_view + exampleConfigManagerExpectedprintConfigurationDocDetailedListString = + R"--(Option 'booleanWithoutDescriptionWithoutDefaultValueWithoutValidator' [boolean] Value: [must be specified] Option 'booleanWithoutDescriptionWithoutDefaultValueWithValidator' [boolean] @@ -1680,4 +1679,4 @@ Sub manager 'subManager' Value: [must be specified] Required invariants: - - Validator for configuration options doubleArgumentValidatorFirstArgument, doubleArgumentValidatorSecondArgument.)--"); + - Validator for configuration options doubleArgumentValidatorFirstArgument, doubleArgumentValidatorSecondArgument.)--";