diff --git a/src/book.cpp b/src/book.cpp index cf340c04a..56f96c738 100644 --- a/src/book.cpp +++ b/src/book.cpp @@ -77,8 +77,8 @@ void Book::update(const zim::Archive& archive) { m_flavour = getMetaFlavour(archive); m_tags = getMetaTags(archive); m_category = getCategoryFromTags(); - m_articleCount = getArchiveArticleCount(archive); - m_mediaCount = getArchiveMediaCount(archive); + m_articleCount = archive.getArticleCount(); + m_mediaCount = archive.getMediaCount(); m_size = static_cast(getArchiveFileSize(archive)) << 10; m_illustrations.clear(); diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp index 6fceeb97a..e5d468ee8 100644 --- a/src/tools/archiveTools.cpp +++ b/src/tools/archiveTools.cpp @@ -105,46 +105,6 @@ bool getArchiveFavicon(const zim::Archive& archive, unsigned size, return false; } -// should this be in libzim -unsigned int getArchiveMediaCount(const zim::Archive& archive) { - std::map counterMap = parseArchiveCounter(archive); - unsigned int counter = 0; - - for (auto &pair:counterMap) { - if (startsWith(pair.first, "image/") || - startsWith(pair.first, "video/") || - startsWith(pair.first, "audio/")) { - counter += pair.second; - } - } - - return counter; -} - -unsigned int getArchiveArticleCount(const zim::Archive& archive) { - // [HACK] - // getArticleCount() returns different things depending of the "version" of the zim. - // On old zim (<=6), it returns the number of entry in `A` namespace - // On recent zim (>=7), it returns: - // - the number of entry in `C` namespace (==getEntryCount) if no frontArticleIndex is present - // - the number of front article if a frontArticleIndex is present - // The use case >=7 without frontArticleIndex is pretty rare so we don't care - // We can detect if we are reading a zim <= 6 by checking if we have a newNamespaceScheme. - if (archive.hasNewNamespaceScheme()) { - //The articleCount is "good" - return archive.getArticleCount(); - } else { - // We have to parse the `M/Counter` metadata - unsigned int counter = 0; - for(const auto& pair:parseArchiveCounter(archive)) { - if (startsWith(pair.first, "text/html")) { - counter += pair.second; - } - } - return counter; - } -} - unsigned int getArchiveFileSize(const zim::Archive& archive) { return archive.getFilesize() / 1024; } @@ -165,14 +125,4 @@ zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path } throw zim::EntryNotFound("Cannot find entry for non empty path"); } - -MimeCounterType parseArchiveCounter(const zim::Archive& archive) { - try { - auto counterContent = archive.getMetadata("Counter"); - return parseMimetypeCounter(counterContent); - } catch (zim::EntryNotFound& e) { - return {}; - } -} - } // kiwix diff --git a/src/tools/archiveTools.h b/src/tools/archiveTools.h index 5b159df34..cefbc66c1 100644 --- a/src/tools/archiveTools.h +++ b/src/tools/archiveTools.h @@ -51,9 +51,6 @@ namespace kiwix zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry); zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path); - - MimeCounterType parseArchiveCounter(const zim::Archive& archive); - } #endif diff --git a/src/tools/otherTools.cpp b/src/tools/otherTools.cpp index ab5bf5874..13b2172e8 100644 --- a/src/tools/otherTools.cpp +++ b/src/tools/otherTools.cpp @@ -288,67 +288,6 @@ bool kiwix::convertStrToBool(const std::string& value) throw std::domain_error(ss.str()); } -namespace -{ -// The counter metadata format is a list of item separated by a `;` : -// item0;item1;item2 -// Each item is a "tuple" mimetype=number. -// However, the mimetype may contains parameters: -// text/html;raw=true;foo=bar -// So the final format may be complex to parse: -// key0=value0;key1;foo=bar=value1;key2=value2 - -typedef kiwix::MimeCounterType::value_type MimetypeAndCounter; - -std::string readFullMimetypeAndCounterString(std::istream& in) -{ - std::string mtcStr, params; - getline(in, mtcStr, ';'); - if ( mtcStr.find('=') == std::string::npos ) - { - do - { - if ( !getline(in, params, ';' ) ) - return std::string(); - mtcStr += ";" + params; - } - while ( std::count(params.begin(), params.end(), '=') != 2 ); - } - return mtcStr; -} - -MimetypeAndCounter parseASingleMimetypeCounter(const std::string& s) -{ - const std::string::size_type k = s.find_last_of("="); - if ( k != std::string::npos ) - { - const std::string mimeType = s.substr(0, k); - std::istringstream counterSS(s.substr(k+1)); - unsigned int counter; - if (counterSS >> counter && counterSS.eof()) - return MimetypeAndCounter{mimeType, counter}; - } - return MimetypeAndCounter{"", 0}; -} - -} // unnamed namespace - -kiwix::MimeCounterType kiwix::parseMimetypeCounter(const std::string& counterData) -{ - kiwix::MimeCounterType counters; - std::istringstream ss(counterData); - - while (ss) - { - const std::string mtcStr = readFullMimetypeAndCounterString(ss); - const MimetypeAndCounter mtc = parseASingleMimetypeCounter(mtcStr); - if ( !mtc.first.empty() ) - counters.insert(mtc); - } - - return counters; -} - std::string kiwix::gen_date_str() { auto now = std::time(0); diff --git a/src/tools/otherTools.h b/src/tools/otherTools.h index c0920d7bf..e00a35858 100644 --- a/src/tools/otherTools.h +++ b/src/tools/otherTools.h @@ -45,9 +45,6 @@ namespace kiwix const std::string& tagName); bool convertStrToBool(const std::string& value); - using MimeCounterType = std::map; - MimeCounterType parseMimetypeCounter(const std::string& counterData); - std::string gen_date_str(); std::string gen_uuid(const std::string& s); diff --git a/test/counterParsing.cpp b/test/counterParsing.cpp deleted file mode 100644 index c4c99e32b..000000000 --- a/test/counterParsing.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (C) 2019 Matthieu Gautier - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and - * NON-INFRINGEMENT. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include "gtest/gtest.h" -#include -#include -#include -#include - -namespace kiwix { -using CounterType = std::map; -CounterType parseMimetypeCounter(const std::string& counterData); -}; - -using namespace kiwix; -#define parse parseMimetypeCounter - -namespace -{ -TEST(ParseCounterTest, simpleMimeType) -{ - { - std::string counterStr = ""; - CounterType counterMap = {}; - ASSERT_EQ(parse(counterStr), counterMap) << counterStr; - } - { - std::string counterStr = "foo=1"; - CounterType counterMap = {{"foo", 1}}; - ASSERT_EQ(parse(counterStr), counterMap) << counterStr; - } - { - std::string counterStr = "foo=1;text/html=50;"; - CounterType counterMap = {{"foo", 1}, {"text/html", 50}}; - ASSERT_EQ(parse(counterStr), counterMap) << counterStr; - } -} - -TEST(ParseCounterTest, paramMimeType) -{ - { - std::string counterStr = "text/html;raw=true=1"; - CounterType counterMap = {{"text/html;raw=true", 1}}; - ASSERT_EQ(parse(counterStr), counterMap) << counterStr; - } - { - std::string counterStr = "foo=1;text/html;raw=true=50;bar=2"; - CounterType counterMap = {{"foo", 1}, {"text/html;raw=true", 50}, {"bar", 2}}; - ASSERT_EQ(parse(counterStr), counterMap) << counterStr; - } - { - std::string counterStr = "foo=1;text/html;raw=true;param=value=50;bar=2"; - CounterType counterMap = {{"foo", 1}, {"text/html;raw=true;param=value", 50}, {"bar", 2}}; - ASSERT_EQ(parse(counterStr), counterMap) << counterStr; - } - { - std::string counterStr = "foo=1;text/html;raw=true=50;bar=2"; - CounterType counterMap = {{"foo", 1}, {"text/html;raw=true", 50}, {"bar", 2}}; - ASSERT_EQ(parse(counterStr), counterMap) << counterStr; - } - { - std::string counterStr = "application/javascript=8;text/html=3;application/warc-headers=28364;text/html;raw=true=6336;text/css=47;text/javascript=98;image/png=968;image/webp=24;application/json=3694;image/gif=10274;image/jpeg=1582;font/woff2=25;text/plain=284;application/atom+xml=247;application/x-www-form-urlencoded=9;video/mp4=9;application/x-javascript=7;application/xml=1;image/svg+xml=5"; - CounterType counterMap = { - {"application/javascript", 8}, - {"text/html", 3}, - {"application/warc-headers", 28364}, - {"text/html;raw=true", 6336}, - {"text/css", 47}, - {"text/javascript", 98}, - {"image/png", 968}, - {"image/webp", 24}, - {"application/json", 3694}, - {"image/gif", 10274}, - {"image/jpeg", 1582}, - {"font/woff2", 25}, - {"text/plain", 284}, - {"application/atom+xml", 247}, - {"application/x-www-form-urlencoded", 9}, - {"video/mp4", 9}, - {"application/x-javascript", 7}, - {"application/xml", 1}, - {"image/svg+xml", 5} - }; - ASSERT_EQ(parse(counterStr), counterMap) << counterStr; - } -} - -TEST(ParseCounterTest, wrongType) -{ - CounterType empty = {}; - { - std::string counterStr = "text/html"; - ASSERT_EQ(parse(counterStr), empty) << counterStr; - } - { - std::string counterStr = "text/html="; - ASSERT_EQ(parse(counterStr), empty) << counterStr; - } - { - std::string counterStr = "text/html=foo"; - ASSERT_EQ(parse(counterStr), empty) << counterStr; - } - { - std::string counterStr = "text/html=123foo"; - ASSERT_EQ(parse(counterStr), empty) << counterStr; - } - { - std::string counterStr = "text/html=50;foo"; - CounterType counterMap = {{"text/html", 50}}; - ASSERT_EQ(parse(counterStr), counterMap) << counterStr; - } - { - std::string counterStr = "text/html;foo=20"; - ASSERT_EQ(parse(counterStr), empty) << counterStr; - } - { - std::string counterStr = "text/html;foo=20;"; - ASSERT_EQ(parse(counterStr), empty) << counterStr; - } - { - std::string counterStr = "text/html=50;;foo"; - CounterType counterMap = {{"text/html", 50}}; - ASSERT_EQ(parse(counterStr), counterMap) << counterStr; - } -} - -}; diff --git a/test/meson.build b/test/meson.build index 90b7ce892..ac432ae00 100644 --- a/test/meson.build +++ b/test/meson.build @@ -2,7 +2,6 @@ tests = [ 'library', 'regex', 'tagParsing', - 'counterParsing', 'stringTools', 'pathTools', 'kiwixserve',