-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinkcheckerrc
245 lines (221 loc) · 7.48 KB
/
linkcheckerrc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# Sample configuration file; see the linkcheckerrc(5) man page or
# execute linkchecker -h for help on these options.
# Commandline or GUI options override these settings.
##################### output configuration ##########################
[output]
# enable debug messages; see 'linkchecker -h' for valid debug names
#debug=all
# print status output
#status=1
# change the logging type
#log=xml
# turn on/off --verbose
#verbose=1
# turn on/off --warnings
#warnings=0
# turn on/off --quiet
#quiet=1
# additional file output
#fileoutput = text, html, gml, sql
##################### logger configuration ##########################
# Note that the logger configuration is ignored by the linkchecker-gui
# program. Results in the GUI can be saved to a file with the command
# File -> Save results.
#
# logger output part names:
# all For all parts
# realurl The full url link
# result Valid or invalid, with messages
# extern 1 or 0, only in some logger types reported
# base <base href=...>
# name <a href=...>name</a> and <img alt="name">
# parenturl The referrer URL if there is any
# info Some additional info, e.g. FTP welcome messages
# warning Warnings
# dltime Download time
# checktime Check time
# url The original url name, can be relative
# intro The blurb at the beginning, "starting at ..."
# outro The blurb at the end, "found x errors ..."
# stats Statistics including URL lengths and contents.
# each Logger can have separate configuration parameters
# standard text logger
[text]
#filename=linkchecker-out.txt
#parts=all
# colors for the various parts, syntax is <color> or <type>;<color>
# type can be bold, light, blink, invert
# color can be default, black, red, green, yellow, blue, purple, cyan, white,
# Black, Red, Green, Yellow, Blue, Purple, Cyan, White
#colorparent=white
#colorurl=default
#colorname=default
#colorreal=cyan
#colorbase=purple
#colorvalid=bold;green
#colorinvalid=bold;red
#colorinfo=default
#colorwarning=bold;yellow
#colordltime=default
#colorreset=default
# GML logger
[gml]
#filename=linkchecker-out.gml
#parts=all
# valid encodings are listed in http://docs.python.org/library/codecs.html#standard-encodings
# default encoding is iso-8859-15
#encoding=utf_16
# DOT logger
[dot]
#filename=linkchecker-out.dot
#parts=all
# default encoding is ascii since the original DOT format does not
# support other charsets
#encoding=iso-8859-15
# CSV logger
[csv]
#filename=linkchecker-out.csv
#separator=,
#quotechar="
#parts=all
# SQL logger
[sql]
#filename=linkchecker-out.sql
#dbname=linksdb
#separator=;
#parts=all
# HTML logger
[html]
#filename=linkchecker-out.html
# colors for the various parts
#colorbackground=#fff7e5
#colorurl=#dcd5cf
#colorborder=#000000
#colorlink=#191c83
#colorwarning=#e0954e
#colorerror=#db4930
#colorok=#3ba557
#parts=all
# blacklist logger
[blacklist]
#filename=~/.linkchecker/blacklist
# custom xml logger
[xml]
#encoding=iso-8859-1
# GraphXML logger
[gxml]
#encoding=iso-8859-1
# Sitemap logger
[sitemap]
#priority=0.7
#frequency=weekly
##################### checking options ##########################
[checking]
# number of threads
#threads=100
# connection timeout in seconds
#timeout=60
# check anchors?
anchors=1
#recursionlevel=1
# supply a regular expression for which warnings are printed if found
# in any HTML files.
#warningregex=(Oracle DB Error|Page Not Found|badsite\.example\.com)
# Basic NNTP server. Overrides NNTP_SERVER environment variable.
# warn if size info exceeds given maximum of bytes
#warnsizebytes=2000
#nntpserver=
# check HTML or CSS syntax locally with HTML tidy or cssutils, falling
# back to the W3C online validator
checkhtml=1
checkcss=0
# scan URL content for viruses with ClamAV
#scanvirus=1
# ClamAV config file
#clamavconf=/etc/clamav/clamd.conf
# Send and store cookies
#cookies=1
# parse a cookiefile for initial cookie data
#cookiefile=/path/to/cookies.txt
# User-Agent header string to send to HTTP web servers
#useragent=Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
# Pause the given number of seconds between two subsequent connection
# requests to the same host.
#pause=0
# When checking finishes, write a memory dump to a temporary file.
# The memory dump is written both when checking finishes normally
# and when checking gets canceled.
# The memory dump only works if the python-meliae package is installed.
# Otherwise a warning is printed to install it.
#debugmemory=0
# When checking absolute URLs inside local files, the given root directory
# is used as base URL.
# Note that the given directory must have URL syntax, so it must use a slash
# to join directories instead of a backslash.
# And the given directory must end with a slash.
# Unix example:
#localwebroot=/var/www/
# Windows example:
#localwebroot=/C|/public_html/
# Check that SSL certificates are at least the given number of days valid.
# The number must not be negative.
# If the number of days is zero a warning is printed only for certificates
# that are already expired.
# The default number of days is 14.
#sslcertwarndays=14
# Stop checking new URLs after the given number of seconds. Same as if the
# user hits Ctrl-C after X seconds.
#maxrunseconds=600
# Maximum number of URLs to check. New URLs will not be queued after the
# given number of URLs is checked.
#maxnumurls=153
##################### filtering options ##########################
[filtering]
#ignore=
# ignore everything with 'lconline' in the URL name
# lconline
# and ignore everything with 'bookmark' in the URL name
# bookmark
# and ignore all mailto: URLs
# ^mailto:
# do not recurse into the following URLs
ignore=(^mailto:|^https://www.linkedin.com/|^https://plus.google.com/u/0|\.asc$|bootstrap\.min\.js$)
#nofollow=
# just an example
# http://www\.example\.com/bla
# Ignore specified warnings (see linkchecker -h for the list of
# recognized warnings). Add a comma-separated list of warnings here
# that prevent a valid URL from being logged. Note that the warning
# will be logged in invalid URLs.
#ignorewarnings=url-unicode-domain,anchor-not-found
ignorewarnings=http-robots-denied,https-certificate-error
# Regular expression to add more URLs recognized as internal links.
# Default is that URLs given on the command line are internal.
#internlinks=^http://www\.example\.net/
##################### password authentication ##########################
[authentication]
# WARNING: if you store passwords in this configuration entry, make sure the
# configuration file is not readable by other users.
# Different user/password pairs for different URLs can be provided.
# Entries are a triple (URL regular expression, username, password),
# separated by whitespace.
# If the regular expression matches, the given user/password pair is used
# for authentication. The commandline options -u,-p match every link
# and therefore override the entries given here. The first match wins.
# At the moment, authentication is used for http[s] and ftp links.
#entry=
# Note that passwords are optional. If any passwords are stored here,
# this file should not readable by other users.
# ^https?://www\.example\.com/~calvin/ calvin mypass
# ^ftp://www\.example\.com/secret/ calvin
# if the website requires a login the URL and optionally the user and
# password CGI fieldnames can be provided.
#loginurl=http://www.example.com/
# The name of the user and password CGI field
#loginuserfield=login
#loginpasswordfield=password
# Optionally any additional CGI name/value pairs. Note that the default
# values are submitted automatically.
#loginextrafields=
# name1:value1
# name 2:value 2