Skip to content

Commit

Permalink
Remove htmlcxx dependency and use libtidy with tinyxml2 to parse html
Browse files Browse the repository at this point in the history
  • Loading branch information
Sude- committed May 9, 2024
1 parent 1866f4c commit 1c0ab29
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 155 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
run: |
sudo apt -y update
sudo apt -y install ninja-build build-essential libcurl4-openssl-dev libboost-regex-dev \
libjsoncpp-dev librhash-dev libtinyxml2-dev libhtmlcxx-dev \
libjsoncpp-dev librhash-dev libtinyxml2-dev libtidy-dev \
libboost-system-dev libboost-filesystem-dev libboost-program-options-dev \
libboost-date-time-dev libboost-iostreams-dev help2man cmake \
pkg-config zlib1g-dev qtwebengine5-dev
Expand Down
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ find_package(Boost
)
find_package(CURL 7.55.0 REQUIRED)
find_package(Jsoncpp REQUIRED)
find_package(Htmlcxx REQUIRED)
find_package(Tinyxml2 REQUIRED)
find_package(Rhash REQUIRED)
find_package(Threads REQUIRED)
find_package(ZLIB REQUIRED)
find_package(Tidy REQUIRED)

file(GLOB SRC_FILES
main.cpp
Expand Down Expand Up @@ -111,22 +111,22 @@ target_include_directories(${PROJECT_NAME}
PRIVATE ${CURL_INCLUDE_DIRS}
PRIVATE ${OAuth_INCLUDE_DIRS}
PRIVATE ${Jsoncpp_INCLUDE_DIRS}
PRIVATE ${Htmlcxx_INCLUDE_DIRS}
PRIVATE ${Tinyxml2_INCLUDE_DIRS}
PRIVATE ${Rhash_INCLUDE_DIRS}
PRIVATE ${ZLIB_INCLUDE_DIRS}
PRIVATE ${Tidy_INCLUDE_DIRS}
)

target_link_libraries(${PROJECT_NAME}
PRIVATE ${Boost_LIBRARIES}
PRIVATE ${CURL_LIBRARIES}
PRIVATE ${OAuth_LIBRARIES}
PRIVATE ${Jsoncpp_LIBRARIES}
PRIVATE ${Htmlcxx_LIBRARIES}
PRIVATE ${Tinyxml2_LIBRARIES}
PRIVATE ${Rhash_LIBRARIES}
PRIVATE ${CMAKE_THREAD_LIBS_INIT}
PRIVATE ${ZLIB_LIBRARIES}
PRIVATE ${Tidy_LIBRARIES}
)

# Check if libatomic is needed in order to use std::atomic, and add
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ It uses the same API as GOG Galaxy which doesn't have Linux support at the momen
* [libcurl](https://curl.haxx.se/libcurl/) >= 7.55.0
* [librhash](https://github.com/rhash/RHash)
* [jsoncpp](https://github.com/open-source-parsers/jsoncpp)
* [htmlcxx](http://htmlcxx.sourceforge.net/)
* [libtidy](https://www.html-tidy.org/)
* [tinyxml2](https://github.com/leethomason/tinyxml2)
* [boost](http://www.boost.org/) (regex, date-time, system, filesystem, program-options, iostreams)
* [zlib](https://www.zlib.net/)
Expand All @@ -22,7 +22,7 @@ It uses the same API as GOG Galaxy which doesn't have Linux support at the momen
## Debian/Ubuntu

# apt install build-essential libcurl4-openssl-dev libboost-regex-dev \
libjsoncpp-dev librhash-dev libtinyxml2-dev libhtmlcxx-dev \
libjsoncpp-dev librhash-dev libtinyxml2-dev libtidy-dev \
libboost-system-dev libboost-filesystem-dev libboost-program-options-dev \
libboost-date-time-dev libboost-iostreams-dev help2man cmake \
pkg-config zlib1g-dev qtwebengine5-dev ninja-build
Expand All @@ -35,7 +35,7 @@ It uses the same API as GOG Galaxy which doesn't have Linux support at the momen
## Fedora
```
sudo dnf install cmake make gcc gcc-c++ glibc tinyxml2-devel rhash-devel \
htmlcxx-devel tinyxml-devel jsoncpp-devel liboauth-devel libcurl-devel \
libtidy-devel tinyxml-devel jsoncpp-devel liboauth-devel libcurl-devel \
boost-devel help2man
```
### Build and Install
Expand Down
54 changes: 0 additions & 54 deletions cmake/FindHtmlcxx.cmake

This file was deleted.

33 changes: 33 additions & 0 deletions cmake/FindTidy.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# - Try to find tidy
#
# Once done this will define
# Tidy_FOUND - System has tidy
# Tidy_INCLUDE_DIRS - The tidy include directories
# Tidy_LIBRARIES - The libraries needed to use tidy

find_package(PkgConfig)
pkg_check_modules(PC_TIDY tidy)

find_path(TIDY_INCLUDE_DIR tidy.h
HINTS
${PC_TIDY_INCLUDEDIR}
${PC_TIDY_INCLUDE_DIRS}
PATHS
${PC_TIDY_INCLUDE_DIRS}
)

find_library(TIDY_LIBRARY tidy
HINTS
${PC_TIDY_LIBDIR}
${PC_TIDY_LIBRARY_DIRS}
PATHS
${PC_TIDY_LIBRARY_DIRS}
)

mark_as_advanced(TIDY_INCLUDE_DIR TIDY_LIBRARY)

if(TIDY_INCLUDE_DIR)
set(Tidy_FOUND ON)
set(Tidy_INCLUDE_DIRS ${TIDY_INCLUDE_DIR})
set(Tidy_LIBRARIES ${TIDY_LIBRARY})
endif(TIDY_INCLUDE_DIR)
3 changes: 3 additions & 0 deletions include/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <json/json.h>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <curl/curl.h>
#include <tinyxml2.h>

typedef struct
{
Expand Down Expand Up @@ -104,6 +105,8 @@ namespace Util
}
Json::Value readJsonFile(const std::string& path);
std::string transformGamename(const std::string& gamename);
std::string htmlToXhtml(const std::string& html);
tinyxml2::XMLNode* nextXMLNode(tinyxml2::XMLNode* node);
}

#endif // UTIL_H
1 change: 0 additions & 1 deletion include/website.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class Website
private:
CURL* curlhandle;
bool IsloggedInSimple();
bool IsLoggedInComplex(const std::string& email);
std::map<std::string, std::string> getTagsFromJson(const Json::Value& json);
int retries;
std::string LoginGetAuthCode(const std::string& email, const std::string& password);
Expand Down
34 changes: 10 additions & 24 deletions src/downloader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
#include <boost/date_time/posix_time/posix_time.hpp>
#include <tinyxml2.h>
#include <json/json.h>
#include <htmlcxx/html/ParserDom.h>
#include <htmlcxx/html/Uri.h>
#include <termios.h>
#include <algorithm>
#include <thread>
Expand Down Expand Up @@ -1630,30 +1628,18 @@ std::string Downloader::getSerialsFromJSON(const Json::Value& json)
}
else
{
htmlcxx::HTML::ParserDom parser;
tree<htmlcxx::HTML::Node> dom = parser.parseTree(cdkey);
tree<htmlcxx::HTML::Node>::iterator it = dom.begin();
tree<htmlcxx::HTML::Node>::iterator end = dom.end();
for (; it != end; ++it)
std::string xhtml = Util::htmlToXhtml(cdkey);
tinyxml2::XMLDocument doc;
doc.Parse(xhtml.c_str());
tinyxml2::XMLNode* node = doc.FirstChildElement("html");
while(node)
{
std::string tag_text;
if (it->tagName() == "span")
{
for (unsigned int j = 0; j < dom.number_of_children(it); ++j)
{
tree<htmlcxx::HTML::Node>::iterator span_it = dom.child(it, j);
if (!span_it->isTag() && !span_it->isComment())
tag_text = span_it->text();
}
}
tinyxml2::XMLElement *element = node->ToElement();
const char* text = element->GetText();
if (text)
serials << text << std::endl;

if (!tag_text.empty())
{
boost::regex expression("^\\h+|\\h+$");
std::string text = boost::regex_replace(tag_text, expression, "");
if (!text.empty())
serials << text << std::endl;
}
node = Util::nextXMLNode(node);
}
}

Expand Down
49 changes: 48 additions & 1 deletion src/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/copy.hpp>
#include <tinyxml2.h>
#include <json/json.h>
#include <fstream>
#include <sys/ioctl.h>
#include <tidy.h>
#include <tidybuffio.h>

/*
Create filepath from specified directory and path
Expand Down Expand Up @@ -1007,3 +1008,49 @@ std::string Util::transformGamename(const std::string& gamename)

return gamename_transformed;
}

std::string Util::htmlToXhtml(const std::string& html)
{
std::string xhtml;
TidyBuffer buffer = {0, 0, 0, 0, 0};
int rc = -1;
TidyDoc doc = tidyCreate();

tidyOptSetBool(doc, TidyXhtmlOut, yes);
tidyOptSetBool(doc, TidyForceOutput, yes);
tidyOptSetInt(doc, TidyWrapLen, 0);
tidyOptSetInt(doc, TidyShowInfo, 0);
tidyOptSetInt(doc, TidyShowWarnings, 0);
rc = tidyParseString(doc, html.c_str());
if ( rc >= 0 )
rc = tidyCleanAndRepair(doc);
if ( rc >= 0 )
rc = tidySaveBuffer(doc, &buffer);

xhtml = std::string((char*)buffer.bp, buffer.size);

tidyBufFree(&buffer);
tidyRelease(doc);

return xhtml;
}

tinyxml2::XMLNode* Util::nextXMLNode(tinyxml2::XMLNode* node)
{
if (node->FirstChildElement()) // Has child element, go to first child
node = node->FirstChildElement();
else if (node->NextSiblingElement()) // Has sibling element, go to first sibling
node = node->NextSiblingElement();
else
{
// Go to parent node until it has sibling
while(node->Parent() && !node->Parent()->NextSiblingElement())
node = node->Parent();
if(node->Parent() && node->Parent()->NextSiblingElement())
node = node->Parent()->NextSiblingElement();
else // Reached the end
node = nullptr;
}

return node;
}
Loading

0 comments on commit 1c0ab29

Please sign in to comment.