-
-
Notifications
You must be signed in to change notification settings - Fork 53
/
Copy pathfilter_model.py
36 lines (36 loc) · 2.79 KB
/
filter_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# Remove endpoints that ends with the given strings
# Example:
# exclude_endswith = ["*.jpg", "*.png", "*.ico", "*.pdf", "*.css", "*.gif"]
exclude_endswith = []
##################################
# Remove endpoints that contains the given strings in any place
# Example:
# exclude_contain = ["product", "Scripts", "a/h/", "about", "article/", "assets/", "bd/", "blog", "comment", "item/", "es/", "ru/", "eg/", "fr/", "uk/", "ua/", "tw/", "search", "global/", "hk/", "in/", "it/", "tr/", "us/", "vn/", "en/"]
exclude_contain = []
##################################
# Remove any endpoints that 'match' this regular expressions.
# Example:
# remove_regex = ["^(\d*)-(\d*)", "^(\d*)\.html$", "^(\d*)$"]
remove_regex = []
##################################
#extensions filters arranged by category.
#sheet
#exclude_endswith = ["*.ods", "*.xls", "*.xlsx", "*.csv", "*.ics", "*.vcf"]
#image
#exclude_endswith = ["*.3dm", "*.3ds", "*.max", "*.bmp", "*.dds", "*.gif", "*.jpg", "*.jpeg", "*.png", "*.psd", "*.xcf", "*.tga", "*.thm", "*.tif", "*.tiff", "*.yuv", "*.ai", "*.eps", "*.ps", "*.svg", "*.dwg", "*.dxf", "*.gpx", "*.kml", "*.kmz", "*.webp"]
#video
#exclude_endswith = ["*.3g2", "*.3gp", "*.aaf", "*.asf", "*.avchd", "*.avi", "*.drc", "*.flv", "*.m2v", "*.m4p", "*.m4v", "*.mkv", "*.mng", "*.mov", "*.mp2", "*.mp4", "*.mpe", "*.mpeg", "*.mpg", "*.mpv", "*.mxf", "*.nsv", "*.ogg", "*.ogv", "*.ogm", "*.qt", "*.rm", "*.rmvb", "*.roq", "*.srt", "*.svi", "*.vob", "*.webm", "*.wmv", "*.yuv"]
#audio
#exclude_endswith = ["*.aac", "*.aiff", "*.ape", "*.au", "*.flac", "*.gsm", "*.it", "*.m3u", "*.m4a", "*.mid", "*.mod", "*.mp3", "*.mpa", "*.pls", "*.ra", "*.s3m", "*.sid", "*.wav", "*.wma", "*.xm"]
#archiv
#exclude_endswith = ["*.7z", "*.a", "*.apk", "*.ar", "*.bz2", "*.cab", "*.cpio", "*.deb", "*.dmg", "*.egg", "*.gz", "*.iso", "*.jar", "*.lha", "*.mar", "*.pea", "*.rar", "*.rpm", "*.s7z", "*.shar", "*.tar", "*.tbz2", "*.tgz", "*.tlz", "*.war", "*.whl", "*.xpi", "*.zip", "*.zipx", "*.xz", "*.pak"]
#exec
#exclude_endswith = ["*.exe", "*.msi", "*.bin", "*.command", "*.sh", "*.bat", "*.crx"]
#code
#exclude_endswith = ["*.c", "*.cc", "*.class", "*.clj", "*.cpp", "*.cs", "*.cxx", "*.el", "*.go", "*.h", "*.java", "*.lua", "*.m", "*.m4", "*.php", "*.pl", "*.po", "*.py", "*.rb", "*.rs", "*.sh", "*.swift", "*.vb", "*.vcxproj", "*.xcodeproj", "*.xml", "*.diff", "*.patch", "*.html", "*.js"]
#web
#exclude_endswith = ["*.html", "*.htm", "*.css", "*.js", "*.jsx", "*.less", "*.scss", "*.wasm", "*.php"]
#font
#exclude_endswith = ["*.eot", "*.otf", "*.ttf", "*.woff", "*.woff2"]
#text
#exclude_endswith = ["*.doc", "*.docx", "*.ebook", "*.log", "*.md", "*.msg", "*.odt", "*.org", "*.pages", "*.pdf", "*.rtf", "*.rst", "*.tex", "*.txt", "*.wpd", "*.wps", "*.ppt", "*.odp", "*.mobi", "*.epub", "*.azw1", "*.azw3", "*.azw4", "*.azw6", "*.azw", "*.cbr", "*.cbz"]