diff --git a/meson.build b/meson.build index 1f4b5954..2cdfbd51 100644 --- a/meson.build +++ b/meson.build @@ -19,7 +19,7 @@ if static_linkage endif endif -libzim_dep = dependency('libzim', version : '>=8.0.0', static:static_linkage) +libzim_dep = dependency('libzim', version : '>=9.1.0', static:static_linkage) with_xapian_support = compiler.has_header_symbol('zim/zim.h', 'LIBZIM_WITH_XAPIAN') find_library_in_compiler = meson.version().version_compare('>=0.31.0') diff --git a/src/zimcheck/checks.cpp b/src/zimcheck/checks.cpp index 38e51931..446544b0 100644 --- a/src/zimcheck/checks.cpp +++ b/src/zimcheck/checks.cpp @@ -14,30 +14,10 @@ #include #include #include +#include #include #include -// Specialization of std::hash needed for our unordered_map. Can be removed in c++14 -namespace std { - template <> struct hash { - size_t operator() (const LogTag &t) const { return size_t(t); } - }; -} - -// Specialization of std::hash needed for our unordered_map. Can be removed in c++14 -namespace std { - template <> struct hash { - size_t operator() (const TestType &t) const { return size_t(t); } - }; -} - -// Specialization of std::hash needed for our unordered_map. Can be removed in c++14 -namespace std { - template <> struct hash { - size_t operator() (const MsgId &msgid) const { return size_t(msgid); } - }; -} - namespace { @@ -113,6 +93,11 @@ SortedMsgParams sortedMsgParams(const MsgParams& msgParams) return SortedMsgParams(msgParams.begin(), msgParams.end()); } +bool areAliases(const zim::Item& i1, const zim::Item& i2) +{ + return i1.getClusterIndex() == i2.getClusterIndex() && i1.getBlobIndex() == i2.getBlobIndex(); +} + } // unnamed namespace namespace JSON @@ -487,15 +472,22 @@ void ArticleChecker::detect_redundant_articles() progress.report(); auto l = it.second; while ( !l.empty() ) { - const auto e1 = archive.getEntryByPath(l.front()); + // The way we have constructed `l`, e1 MUST BE an item + const auto e1 = archive.getEntryByPath(l.front()).getItem(); l.pop_front(); if ( !l.empty() ) { - // The way we have constructed `l`, e1 MUST BEĀ an item - const std::string s1 = e1.getItem().getData(); + std::optional s1; decltype(l) articlesDifferentFromE1; for(auto other : l) { - auto e2 = archive.getEntryByPath(other); - std::string s2 = e2.getItem().getData(); + // The way we have constructed `l`, e2 MUST BE an item + const auto e2 = archive.getEntryByPath(other).getItem(); + if (areAliases(e1, e2)) { + continue; + } + if (!s1) { + s1 = e1.getData(); + } + std::string s2 = e2.getData(); if (s1 != s2 ) { articlesDifferentFromE1.push_back(other); continue;