-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(plugin): Add new plugin ua-restriction for bot spider restriction #4587
Changes from 8 commits
f2a8bf1
830b621
bfe2d7c
cb6d5d8
8be944e
dc331cd
7ea7c87
0c6dcb2
420e676
f7a61bd
9ace564
1ba8c12
a0fb6dd
6837ed1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
-- | ||
-- Licensed to the Apache Software Foundation (ASF) under one or more | ||
-- contributor license agreements. See the NOTICE file distributed with | ||
-- this work for additional information regarding copyright ownership. | ||
-- The ASF licenses this file to You under the Apache License, Version 2.0 | ||
-- (the "License"); you may not use this file except in compliance with | ||
-- the License. You may obtain a copy of the License at | ||
-- | ||
-- http://www.apache.org/licenses/LICENSE-2.0 | ||
-- | ||
-- Unless required by applicable law or agreed to in writing, software | ||
-- distributed under the License is distributed on an "AS IS" BASIS, | ||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
-- See the License for the specific language governing permissions and | ||
-- limitations under the License. | ||
-- | ||
local ipairs = ipairs | ||
local core = require("apisix.core") | ||
local stringx = require('pl.stringx') | ||
local type = type | ||
local str_strip = stringx.strip | ||
local re_find = ngx.re.find | ||
|
||
local MATCH_NONE = 0 | ||
local MATCH_ALLOW = 1 | ||
local MATCH_DENY = 2 | ||
|
||
local lrucache_useragent = core.lrucache.new({ ttl = 300, count = 4096 }) | ||
|
||
local schema = { | ||
type = "object", | ||
properties = { | ||
message = { | ||
type = "string", | ||
minLength = 1, | ||
maxLength = 1024, | ||
default = "Not allowed" | ||
}, | ||
allowlist = { | ||
type = "array", | ||
minItems = 1 | ||
}, | ||
denylist = { | ||
type = "array", | ||
minItems = 1 | ||
}, | ||
}, | ||
anyOf = { | ||
{required = {"allowlist"}}, | ||
{required = {"denylist"}}, | ||
}, | ||
minProperties = 1, | ||
additionalProperties = false, | ||
} | ||
|
||
local plugin_name = "ua-restriction" | ||
|
||
local _M = { | ||
version = 0.1, | ||
priority = 2999, | ||
name = plugin_name, | ||
schema = schema, | ||
} | ||
|
||
local function match_user_agent(user_agent, conf) | ||
user_agent = str_strip(user_agent) | ||
if conf.allowlist then | ||
for _, rule in ipairs(conf.allowlist) do | ||
if re_find(user_agent, rule, "jo") then | ||
return MATCH_ALLOW | ||
end | ||
end | ||
end | ||
|
||
tokers marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if conf.denylist then | ||
for _, rule in ipairs(conf.denylist) do | ||
if re_find(user_agent, rule, "jo") then | ||
return MATCH_DENY | ||
end | ||
end | ||
end | ||
|
||
return MATCH_NONE | ||
end | ||
|
||
function _M.check_schema(conf) | ||
local ok, err = core.schema.check(schema, conf) | ||
|
||
if not ok then | ||
return false, err | ||
end | ||
|
||
return true | ||
end | ||
|
||
function _M.access(conf, ctx) | ||
local user_agent = core.request.header(ctx, "User-Agent") | ||
|
||
if not user_agent then | ||
tokers marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return | ||
end | ||
local match = MATCH_NONE | ||
if type(user_agent) == "table" then | ||
for _, v in ipairs(user_agent) do | ||
if type(v) == "string" then | ||
match = lrucache_useragent(v, conf, match_user_agent, v, conf) | ||
if match > MATCH_ALLOW then | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think this check should be MATCH_ALLOW, if ua in deny list, the result should be MATCH_DENY, otherwise, the result is MATCH_ALLOW or MATCH_NONE |
||
break | ||
end | ||
end | ||
end | ||
else | ||
match = lrucache_useragent(user_agent, conf, match_user_agent, user_agent, conf) | ||
end | ||
|
||
if match > MATCH_ALLOW then | ||
return 403, { message = conf.message } | ||
end | ||
end | ||
|
||
return _M |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,128 @@ | ||||||
--- | ||||||
title: ua-restriction | ||||||
--- | ||||||
|
||||||
<!-- | ||||||
# | ||||||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||||||
# contributor license agreements. See the NOTICE file distributed with | ||||||
# this work for additional information regarding copyright ownership. | ||||||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||||||
# (the "License"); you may not use this file except in compliance with | ||||||
# the License. You may obtain a copy of the License at | ||||||
# | ||||||
# http://www.apache.org/licenses/LICENSE-2.0 | ||||||
# | ||||||
# Unless required by applicable law or agreed to in writing, software | ||||||
# distributed under the License is distributed on an "AS IS" BASIS, | ||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
# See the License for the specific language governing permissions and | ||||||
# limitations under the License. | ||||||
# | ||||||
--> | ||||||
|
||||||
## Summary | ||||||
|
||||||
- [**Name**](#name) | ||||||
- [**Attributes**](#attributes) | ||||||
- [**How To Enable**](#how-to-enable) | ||||||
- [**Test Plugin**](#test-plugin) | ||||||
- [**Disable Plugin**](#disable-plugin) | ||||||
|
||||||
## Name | ||||||
|
||||||
The `ua-restriction` can restrict access to a Service or a Route by either `allowlist` or `denylist` `User-Agent`. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Broken sentence? What does the "by either |
||||||
|
||||||
## Attributes | ||||||
|
||||||
| Name | Type | Requirement | Default | Valid | Description | | ||||||
| --------- | ------------- | ----------- | ------- | ----- | ---------------------------------------- | | ||||||
| allowlist | array[string] | optional | | | List of User-Agent of allowlist. | | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| denylist | array[string] | optional | | | List of User-Agent of denylist. | | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| message | string | optional | Not allowed. | [1, 1024] | Message of deny reason. | | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should clarify that |
||||||
|
||||||
Any of `allowlist` or `denylist` can be optional, and can work together in this order: allowlist->denylist | ||||||
|
||||||
The message can be user-defined. | ||||||
|
||||||
## How To Enable | ||||||
|
||||||
Creates a route or service object, and enable plugin `ua-restriction`. | ||||||
|
||||||
```shell | ||||||
curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' | ||||||
{ | ||||||
"uri": "/index.html", | ||||||
"upstream": { | ||||||
"type": "roundrobin", | ||||||
"nodes": { | ||||||
"127.0.0.1:1980": 1 | ||||||
} | ||||||
}, | ||||||
"plugins": { | ||||||
"ua-restriction": { | ||||||
"allowlist": [ | ||||||
"my-bot1", | ||||||
"(Baiduspider)/(\\d+)\\.(\\d+)" | ||||||
], | ||||||
"denylist": [ | ||||||
"my-bot2", | ||||||
"(Twitterspider)/(\\d+)\\.(\\d+)" | ||||||
] | ||||||
} | ||||||
} | ||||||
}' | ||||||
``` | ||||||
|
||||||
Default returns `{"message":"Not allowed"}` when rejected. If you want to use a custom message, you can configure it in the plugin section. | ||||||
|
||||||
```json | ||||||
"plugins": { | ||||||
"ua-restriction": { | ||||||
"denylist": [ | ||||||
"my-bot2", | ||||||
"(Twitterspider)/(\\d+)\\.(\\d+)" | ||||||
], | ||||||
"message": "Do you want to do something bad?" | ||||||
} | ||||||
} | ||||||
``` | ||||||
|
||||||
## Test Plugin | ||||||
|
||||||
Requests from normal User-Agent: | ||||||
|
||||||
```shell | ||||||
$ curl http://127.0.0.1:9080/index.html -i | ||||||
HTTP/1.1 200 OK | ||||||
... | ||||||
``` | ||||||
|
||||||
Requests from bot User-Agent: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
```shell | ||||||
$ curl http://127.0.0.1:9080/index.html --header 'User-Agent: Twitterspider/2.0' | ||||||
HTTP/1.1 403 Forbidden | ||||||
``` | ||||||
|
||||||
## Disable Plugin | ||||||
|
||||||
When you want to disable the `ua-restriction` plugin, it is very simple, | ||||||
you can delete the corresponding json configuration in the plugin configuration, | ||||||
no need to restart the service, it will take effect immediately: | ||||||
|
||||||
```shell | ||||||
$ curl http://127.0.0.1:2379/v2/keys/apisix/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d value=' | ||||||
{ | ||||||
"uri": "/index.html", | ||||||
"plugins": {}, | ||||||
"upstream": { | ||||||
"type": "roundrobin", | ||||||
"nodes": { | ||||||
"39.97.63.215:80": 1 | ||||||
} | ||||||
} | ||||||
}' | ||||||
``` | ||||||
|
||||||
The `ua-restriction` plugin has been disabled now. It works for other plugins. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
--- | ||
title: ua-restriction | ||
--- | ||
|
||
<!-- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
--> | ||
|
||
## 目录 | ||
|
||
- [**名字**](#名字) | ||
- [**属性**](#属性) | ||
- [**如何启用**](#如何启用) | ||
- [**测试插件**](#测试插件) | ||
- [**禁用插件**](#禁用插件) | ||
|
||
## 名字 | ||
|
||
`ua-restriction` 可以通过以下方式限制对服务或接口的访问,可以将指定 `User-Agent` 列入白名单或黑名单。 | ||
|
||
## 属性 | ||
|
||
| 参数名 | 类型 | 可选项 | 默认值 | 有效值 | 描述 | | ||
| --------- | ------------- | ------ | ------ | ------ | -------------------------------- | | ||
| allowlist | array[string] | 可选 | | | 加入白名单的 User-Agent | | ||
| denylist | array[string] | 可选 | | | 加入黑名单的 User-Agent | | ||
| message | string | 可选 | Not allowed. | [1, 1024] | 在未允许的 User-Agent 访问的情况下返回的信息 | | ||
|
||
白名单或黑名单可以同时启用,此插件对 User-Agent 的检查先后顺序依次如下:白名单、黑名单。`message`可以由用户自定义。 | ||
|
||
## 如何启用 | ||
|
||
下面是一个示例,在指定的 route 上开启了 `ua-restriction` 插件: | ||
|
||
```shell | ||
curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' | ||
{ | ||
"uri": "/index.html", | ||
"upstream": { | ||
"type": "roundrobin", | ||
"nodes": { | ||
"127.0.0.1:1980": 1 | ||
} | ||
}, | ||
"plugins": { | ||
"ua-restriction": { | ||
"allowlist": [ | ||
"my-bot1", | ||
"(Baiduspider)/(\\d+)\\.(\\d+)" | ||
], | ||
"denylist": [ | ||
"my-bot2", | ||
"(Twitterspider)/(\\d+)\\.(\\d+)" | ||
] | ||
} | ||
} | ||
}' | ||
``` | ||
|
||
当未允许的 User-Agent 访问时,默认返回`{"message":"Not allowed"}`。如果你想使用自定义的`message`,可以在插件部分进行配置: | ||
|
||
```json | ||
"plugins": { | ||
"ua-restriction": { | ||
"denylist": [ | ||
"my-bot2", | ||
"(Twitterspider)/(\\d+)\\.(\\d+)" | ||
], | ||
"message": "Do you want to do something bad?" | ||
} | ||
} | ||
``` | ||
|
||
## 测试插件 | ||
|
||
通过正常的 UA 访问: | ||
|
||
```shell | ||
$ curl http://127.0.0.1:9080/index.html --header 'User-Agent: YourApp/2.0.0' | ||
HTTP/1.1 200 OK | ||
``` | ||
|
||
通过爬虫 User-Agent 访问: | ||
|
||
```shell | ||
$ curl http://127.0.0.1:9080/index.html --header 'User-Agent: Twitterspider/2.0' | ||
HTTP/1.1 403 Forbidden | ||
``` | ||
|
||
## 禁用插件 | ||
|
||
当你想去掉 `ua-restriction` 插件的时候,很简单,在插件的配置中把对应的 json 配置删除即可,无须重启服务,即刻生效: | ||
|
||
```shell | ||
$ curl http://127.0.0.1:2379/v2/keys/apisix/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d value=' | ||
{ | ||
"uri": "/index.html", | ||
"plugins": {}, | ||
"upstream": { | ||
"type": "roundrobin", | ||
"nodes": { | ||
"39.97.63.215:80": 1 | ||
} | ||
} | ||
}' | ||
``` | ||
|
||
现在就已移除 `ua-restriction` 插件,其它插件的开启和移除也类似。 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
minProperties
is useless?