Skip to content

Commit

Permalink
Merge pull request #299 from mattmundell/fast-xml-parser-11.0
Browse files Browse the repository at this point in the history
Add a fast memory-only XML parser (9.0)
  • Loading branch information
mattmundell authored Nov 25, 2019
2 parents b69ef29 + 0648f40 commit 35922a4
Show file tree
Hide file tree
Showing 7 changed files with 709 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

### Added
- Add option to set finished hosts in OSP targets [#298](https://github.com/greenbone/gvm-libs/pull/298)
- Add a fast memory-only XML parser [#299](https://github.com/greenbone/gvm-libs/pull/299)

### Fixed
- Fix sigsegv when no plugin_feed_info.inc file present. [#278](https://github.com/greenbone/gvm-libs/pull/278)
Expand Down
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ configure_file (VERSION.in ${CMAKE_BINARY_DIR}/VERSION @ONLY)

enable_testing ()

add_custom_target (tests
DEPENDS array-test xmlutils-test)

## Program

if (NOT SKIP_SRC)
Expand Down
3 changes: 0 additions & 3 deletions base/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,6 @@ add_test (array-test array-test)

target_link_libraries (array-test cgreen ${GLIB_LDFLAGS} ${LINKER_HARDENING_FLAGS})

add_custom_target (tests
DEPENDS array-test)


## Install

Expand Down
29 changes: 27 additions & 2 deletions util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ pkg_check_modules (LIBSSH REQUIRED libssh>=0.6.0)
# for kb we need libhiredis
pkg_check_modules (REDIS REQUIRED hiredis>=0.10.1)

# for fast XML we need libxml2
pkg_check_modules (LIBXML2 REQUIRED libxml-2.0>=2.0)

# Set NVTICACHE name with the version
set (NVTICACHE_STR "nvticache${PROJECT_VERSION}")
add_definitions (-DNVTICACHE_STR="${NVTICACHE_STR}")
Expand Down Expand Up @@ -131,7 +134,8 @@ if (BUILD_WITH_LDAP)
endif (NOT LIBLDAP)
endif (BUILD_WITH_LDAP)

include_directories (${GLIB_INCLUDE_DIRS} ${GPGME_INCLUDE_DIRS} ${GCRYPT_INCLUDE_DIRS})
include_directories (${GLIB_INCLUDE_DIRS} ${GPGME_INCLUDE_DIRS} ${GCRYPT_INCLUDE_DIRS}
${LIBXML2_INCLUDE_DIRS})

set (FILES authutils.c compressutils.c fileutils.c gpgmeutils.c kb.c ldaputils.c
nvticache.c radiusutils.c serverutils.c sshutils.c uuidutils.c
Expand Down Expand Up @@ -160,9 +164,30 @@ if (BUILD_SHARED)
${GIO_LDFLAGS} ${GPGME_LDFLAGS} ${ZLIB_LDFLAGS}
${RADIUS_LDFLAGS} ${LIBSSH_LDFLAGS} ${GNUTLS_LDFLAGS}
${GCRYPT_LDFLAGS} ${LDAP_LDFLAGS} ${REDIS_LDFLAGS}
${UUID_LDFLAGS} ${LINKER_HARDENING_FLAGS})
${LIBXML2_LDFLAGS} ${UUID_LDFLAGS}
${LINKER_HARDENING_FLAGS})
endif (BUILD_SHARED)


## Tests

add_executable (xmlutils-test
EXCLUDE_FROM_ALL
xmlutils_tests.c)

add_test (xmlutils-test xmlutils-test)

target_link_libraries (xmlutils-test cgreen
${GLIB_LDFLAGS} ${GIO_LDFLAGS} ${GPGME_LDFLAGS} ${ZLIB_LDFLAGS}
${RADIUS_LDFLAGS} ${LIBSSH_LDFLAGS} ${GNUTLS_LDFLAGS}
${GCRYPT_LDFLAGS} ${LDAP_LDFLAGS} ${REDIS_LDFLAGS}
${LIBXML2_LDFLAGS} ${UUID_LDFLAGS}
${LINKER_HARDENING_FLAGS})

add_custom_target (tests-xmlutils
DEPENDS xmlutils-test)


## Install
configure_file (libgvm_util.pc.in ${CMAKE_BINARY_DIR}/libgvm_util.pc @ONLY)

Expand Down
246 changes: 246 additions & 0 deletions util/xmlutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include <fcntl.h> /* for fcntl, F_SETFL, O_NONBLOCK */
#include <glib.h> /* for g_free, GSList, g_markup_parse_context_free */
#include <glib/gtypes.h> /* for GPOINTER_TO_INT, GINT_TO_POINTER, gsize */
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <string.h> /* for strcmp, strerror, strlen */
#include <time.h> /* for time, time_t */
#include <unistd.h> /* for ssize_t */
Expand Down Expand Up @@ -1566,3 +1568,247 @@ find_element_in_xml_file (gchar *file_path, gchar *find_element,
return search_data.found;
}
#undef XML_FILE_BUFFER_SIZE


/* The new faster parser that uses libxml2. */

/**
* @brief Read an XML element tree from a string.
*
* Caller must not free string until caller is finished using element.
*
* @param[in] string Input string.
* @param[out] element Location for parsed element tree, or NULL if not
* required. If given, set to NULL on failure.
* Free with element_free.
*
* @return 0 success, -1 read error, -2 parse error, -3 XML ended prematurely,
* -4 setup error.
*/
int
parse_element (const gchar *string, element_t *element)
{
xmlDocPtr doc;

LIBXML_TEST_VERSION

if (element)
*element = NULL;

if (xmlMemSetup (g_free, g_malloc, g_realloc, g_strdup))
return -4;

doc = xmlReadMemory (string, strlen (string), "noname.xml", NULL, 0);
if (doc == NULL)
return -2;

if (element)
*element = xmlDocGetRootElement (doc);

return 0;
}

/**
* @brief Free an entire element tree.
*
* Beware that this frees the entire tree that element is part of, including
* any ancestors.
*
* @param[in] element Element.
*/
void
element_free (element_t element)
{
if (element)
{
assert (element->doc);
xmlFreeDoc (element->doc);
}
}

/**
* @brief Get the name of an element.
*
* @param[in] element Element.
*
* @return Element name.
*/
const gchar *
element_name (element_t element)
{
if (element
&& (element->type == XML_ELEMENT_NODE))
return (const gchar *) element->name;

return "";
}

/**
* @brief Find child in an element.
*
* @param[in] element Element.
* @param[in] name Name of child.
*
* @return Child if found, else NULL.
*/
static element_t
find_child (element_t element, const gchar *name)
{
for (xmlNode *node = element->children; node; node = node->next)
if (xmlStrcmp (node->name, (const xmlChar *) name) == 0)
return node;
return NULL;
}

/**
* @brief Get a child of an element.
*
* @param[in] element Element.
* @param[in] name Name of the child.
*
* @return Element if found, else NULL.
*/
element_t
element_child (element_t element, const gchar *name)
{
const gchar *stripped_name;

if (!element)
return NULL;

stripped_name = strchr (name, ':');
if (stripped_name)
{
element_t child;

/* There was a namespace in the name.
*
* First try without the namespace, because libxml2 doesn't consider the
* namespace in the name when the namespace is defined. */

stripped_name++;

if (*stripped_name == '\0')
/* Don't search for child with empty stripped name, because we'll
* find text nodes. But search with just the namespace for glib
* compatibility. */
return find_child (element, name);

child = find_child (element, stripped_name);
if (child)
return child;

/* Didn't find anything. */
}

/* There was no namespace, or we didn't find anything without the namespace.
*
* Try with the full name. */

return find_child (element, name);
}

/**
* @brief Get text of an element.
*
* If element is not NULL then the return is guaranteed to be a string.
* So if the caller has NULL checked element then there is no need for
* the caller to NULL check the return.
*
* @param[in] element Element.
*
* @return NULL if element is NULL, else the text. Caller must g_free.
*/
gchar *
element_text (element_t element)
{
gchar *string;

if (!element)
return NULL;

string = (gchar *) xmlNodeListGetString (element->doc, element->xmlChildrenNode, 1);
if (string)
return string;
string = xmlMalloc (1);
string[0] = '\0';
return string;
}

/**
* @brief Get an attribute of an element.
*
* @param[in] element Element.
* @param[in] name Name of the attribute.
*
* @return Attribute value if found, else NULL. Caller must g_free.
*/
gchar *
element_attribute (element_t element, const gchar *name)
{
const gchar *stripped_name;

if (!element)
return NULL;

stripped_name = strchr (name, ':');
if (stripped_name)
{
gchar *attribute;

/* There was a namespace in the name.
*
* First try without the namespace, because libxml2 doesn't consider the
* namespace in the name when the namespace is defined. */

stripped_name++;

if (*stripped_name == '\0')
/* Don't search for child with empty stripped name, because we'll
* find text nodes. But search with just the namespace for glib
* compatibility. */
return (gchar *) xmlGetProp (element, (const xmlChar *) name);

attribute = (gchar *) xmlGetProp (element, (const xmlChar *) stripped_name);
if (attribute)
return attribute;

/* Didn't find anything. */
}

/* There was no namespace, or we didn't find anything without the namespace.
*
* Try with the full name. */

return (gchar *) xmlGetProp (element, (const xmlChar *) name);
}

/**
* @brief Get the first child of an element.
*
* @param[in] element Element.
*
* @return Child if there is one, else NULL.
*/
element_t
element_first_child (element_t element)
{
if (element)
return element->children;
return NULL;
}

/**
* @brief Get the next sibling of an element
*
* @param[in] element Element.
*
* @return Next sibling element if there is one, else NULL.
*/
element_t
element_next (element_t element)
{
if (element)
return element->next;
return NULL;
}
30 changes: 30 additions & 0 deletions util/xmlutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,39 @@ int xml_count_entities (entities_t);
void
xml_string_append (GString *, const char *, ...);


/* XML file utilities */

int
find_element_in_xml_file (gchar *, gchar *, GHashTable *);


/* The new faster parser that uses libxml2. */

typedef struct _xmlNode *element_t;

int
parse_element (const gchar *, element_t *);

void
element_free (element_t);

const gchar *
element_name (element_t);

gchar *
element_attribute (element_t, const gchar *);

gchar *
element_text (element_t);

element_t
element_child (element_t, const gchar *);

element_t
element_first_child (element_t);

element_t
element_next (element_t);

#endif /* not _GVM_XMLUTILS_H */
Loading

0 comments on commit 35922a4

Please sign in to comment.