Skip to content

Commit

Permalink
Merge pull request #259 from jelu/psl
Browse files Browse the repository at this point in the history
PSL TLD list
  • Loading branch information
jelu authored Jan 13, 2022
2 parents 0bb6688 + 2880f93 commit 74b942d
Show file tree
Hide file tree
Showing 21 changed files with 20,998 additions and 18 deletions.
4 changes: 2 additions & 2 deletions debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Priority: optional
Maintainer: Jerry Lundström <lundstrom.jerry@gmail.com>
Build-Depends: debhelper (>= 10), build-essential, automake, autoconf,
libpcap-dev, libproc-pid-file-perl, netbase, libgeoip-dev, pkg-config,
libmaxminddb-dev, libdnswire-dev, libuv1-dev
libmaxminddb-dev, libdnswire-dev, libuv1-dev, python3 (>= 3.5)
Standards-Version: 3.9.4
Homepage: https://www.dns-oarc.net/oarc/data/dsc
Vcs-Git: https://github.com/DNS-OARC/dsc.git
Expand All @@ -14,7 +14,7 @@ Package: dsc
Architecture: any
Conflicts: dsc-statistics-collector
Depends: ${shlibs:Depends}, ${perl:Depends}, ${misc:Depends},
libproc-pid-file-perl
libproc-pid-file-perl, ${python3:Depends}
Description: DNS Statistics Collector
DNS Statistics Collector (DSC) is a tool used for collecting and exploring
statistics from busy DNS servers. It uses a distributed architecture with
Expand Down
2 changes: 2 additions & 0 deletions rpm/dsc.spec
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ BuildRequires: libtool
BuildRequires: pkgconfig
BuildRequires: dnswire-devel
BuildRequires: libuv-devel
BuildRequires: python3
Requires: perl
Requires: perl(Proc::PID::File)
Requires: python3

%description
DNS Statistics Collector (DSC) is a tool used for collecting and exploring
Expand Down
16 changes: 12 additions & 4 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@ AM_CFLAGS = -I$(srcdir) \
$(libmaxminddb_CFLAGS) \
$(libdnswire_CFLAGS) $(libuv_CFLAGS)

EXTRA_DIST = dsc.sh dsc.conf.sample.in dsc.1.in dsc.conf.5.in
EXTRA_DIST = dsc.sh dsc.conf.sample.in dsc.1.in dsc.conf.5.in \
dsc-psl-convert.1.in

etcdir = $(sysconfdir)/dsc
etc_DATA = dsc.conf.sample

bin_PROGRAMS = dsc
dist_bin_SCRIPTS = dsc-psl-convert
dsc_SOURCES = asn_index.c certain_qnames_index.c client_index.c \
client_subnet_index.c compat.c config_hooks.c country_index.c daemon.c \
dns_ip_version_index.c dns_message.c dns_protocol.c dns_source_port_index.c \
Expand All @@ -24,7 +26,7 @@ dsc_SOURCES = asn_index.c certain_qnames_index.c client_index.c \
parse_conf.c pcap.c qclass_index.c qname_index.c qnamelen_index.c \
qr_aa_bits_index.c qtype_index.c query_classification_index.c rcode_index.c \
rd_bit_index.c server_ip_addr_index.c tc_bit_index.c tld_index.c \
transport_index.c xmalloc.c response_time_index.c \
transport_index.c xmalloc.c response_time_index.c tld_list.c \
ext/base64.c ext/lookup3.c \
pcap_layers/pcap_layers.c \
pcap-thread/pcap_thread.c \
Expand All @@ -38,13 +40,13 @@ dist_dsc_SOURCES = asn_index.h base64.h certain_qnames_index.h client_index.h \
parse_conf.h pcap.h qclass_index.h qname_index.h qnamelen_index.h \
qr_aa_bits_index.h qtype_index.h query_classification_index.h rcode_index.h \
rd_bit_index.h server_ip_addr_index.h syslog_debug.h tc_bit_index.h \
tld_index.h transport_index.h xmalloc.h response_time_index.h \
tld_index.h transport_index.h xmalloc.h response_time_index.h tld_list.h \
pcap_layers/byteorder.h pcap_layers/pcap_layers.h \
pcap-thread/pcap_thread.h \
dnstap.h input_mode.h knowntlds.inc
dsc_LDADD = $(PTHREAD_LIBS) $(libmaxminddb_LIBS) \
$(libdnswire_LIBS) $(libuv_LIBS)
man1_MANS = dsc.1
man1_MANS = dsc.1 dsc-psl-convert.1
man5_MANS = dsc.conf.5

dsc.conf.sample: dsc.conf.sample.in Makefile
Expand All @@ -59,6 +61,12 @@ dsc.1: dsc.1.in Makefile
-e 's,[@]etcdir[@],$(etcdir),g' \
< $(srcdir)/dsc.1.in > dsc.1

dsc-psl-convert.1: dsc-psl-convert.1.in Makefile
sed -e 's,[@]PACKAGE_VERSION[@],$(PACKAGE_VERSION),g' \
-e 's,[@]PACKAGE_URL[@],$(PACKAGE_URL),g' \
-e 's,[@]PACKAGE_BUGREPORT[@],$(PACKAGE_BUGREPORT),g' \
< $(srcdir)/dsc-psl-convert.1.in > dsc-psl-convert.1

dsc.conf.5: dsc.conf.5.in Makefile
sed -e 's,[@]PACKAGE_VERSION[@],$(PACKAGE_VERSION),g' \
-e 's,[@]PACKAGE_URL[@],$(PACKAGE_URL),g' \
Expand Down
33 changes: 33 additions & 0 deletions src/config_hooks.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "response_time_index.h"
#include "input_mode.h"
#include "dnstap.h"
#include "tld_list.h"

#include "knowntlds.inc"

Expand Down Expand Up @@ -675,3 +676,35 @@ int load_knowntlds(const char* file)

return 1;
}

int load_tld_list(const char* file)
{
FILE* fp;
char * buffer = 0, *p;
size_t bufsize = 0;

if (!(fp = fopen(file, "r"))) {
dsyslogf(LOG_ERR, "unable to open %s", file);
return 0;
}

while (getline(&buffer, &bufsize, fp) > 0 && buffer) {
for (p = buffer; *p; p++) {
if (*p == '\r' || *p == '\n') {
*p = 0;
break;
}
*p = tolower(*p);
}
if (buffer[0] == '#') {
continue;
}
tld_list_add(buffer);
}
free(buffer);
fclose(fp);

dsyslogf(LOG_INFO, "loaded TLD list from %s", file);

return 1;
}
1 change: 1 addition & 0 deletions src/config_hooks.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,5 +80,6 @@ int set_response_time_max_seconds(const char* s);
int set_response_time_max_sec_mode(const char* s);
int set_response_time_bucket_size(const char* s);
int load_knowntlds(const char* file);
int load_tld_list(const char* file);

#endif /* __dsc_config_hooks_h */
34 changes: 34 additions & 0 deletions src/dns_message.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "dns_message.h"
#include "xmalloc.h"
#include "syslog_debug.h"
#include "tld_list.h"

#include "null_index.h"
#include "qtype_index.h"
Expand Down Expand Up @@ -398,6 +399,39 @@ const char* dns_message_QnameToNld(const char* qname, int nld)
t = e;
if (0 == strcmp(t, ".arpa"))
dotcount--;
if (have_tld_list) {
// Use TLD list to find labels that are the "TLD"
const char *lt = 0, *ot = t;
while (t > qname) {
t--;
if ('.' == *t) {
if (0 == state) {
int r = tld_list_find(t + 1);
if (r & 1) {
// this is a tld
lt = t;
}
if (!r || !(r & 2)) {
// no more children
if (lt) {
// reset to what we last found
t = lt;
dotcount++;
state = 1;
} else {
// or reset
t = ot;
state = 0;
}
break;
}
}
state = 1;
} else {
state = 0;
}
}
}
while (t > qname && dotcount < nld) {
t--;
if ('.' == *t) {
Expand Down
96 changes: 96 additions & 0 deletions src/dsc-psl-convert
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env python3
# Copyright (c) 2008-2022, OARC, Inc.
# Copyright (c) 2007-2008, Internet Systems Consortium, Inc.
# Copyright (c) 2003-2007, The Measurement Factory, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import sys
import os
import io
import re
import argparse
from encodings import idna

parser = argparse.ArgumentParser(description='Convert Public Suffix List (PSL) to DSC TLD List (stdout)', epilog='See `man dsc-psl-convert` for more information')
parser.add_argument('fn', metavar='PSL', type=str, nargs='?',
help='specify the PSL to use or use system publicsuffix if exists, "-" will read from stdin')
parser.add_argument('--all', action='store_true',
help='include all of PSL, as default it will stop after ICANN domains')
parser.add_argument('--no-skip-idna-err', action='store_true',
help='fail if idna.ToASCII() fails, default is to ignore these errors')
args = parser.parse_args()


def dn2ascii(dn):
labels = []
for l in dn.split('.'):
# print(l)
if args.no_skip_idna_err:
labels.append(idna.ToASCII(l).decode('utf-8'))
else:
try:
labels.append(idna.ToASCII(l).decode('utf-8'))
except Exception as e:
return None
return '.'.join(labels)


if not args.fn:
for e in ['/usr/share/publicsuffix', '/usr/local/share/publicsuffix']:
e += '/public_suffix_list.dat'
if os.path.isfile(e):
args.fn = e
break

if not args.fn:
parser.error('No installed PSL file found, please specify one')

f = None
try:
if args.fn == "-":
f = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
else:
f = open(args.fn, 'r', encoding='utf-8')
except Exception as e:
parser.exit(1, "Unable to open %r: %s\n" % (args.fn, e))

r = re.compile('^([^\!\s(?://)]+\.[^\s(?://)]+)')
for l in f:
if not args.all and '===END ICANN DOMAINS===' in l:
break
l = l.replace('*.', '')
m = r.search(l)
if m:
dn = dn2ascii(m.group(1))
if dn is None:
continue
print(dn)
Loading

0 comments on commit 74b942d

Please sign in to comment.