From db4174f8cd3b8e86cc4a0d475934421f1a43325c Mon Sep 17 00:00:00 2001 From: traky Date: Sat, 8 Feb 2025 13:49:48 +0800 Subject: [PATCH 1/2] improve docs --- docs/en/latest/plugins/ua-restriction.md | 137 +++++++++++++--------- docs/zh/latest/plugins/ua-restriction.md | 140 ++++++++++++++--------- 2 files changed, 173 insertions(+), 104 deletions(-) diff --git a/docs/en/latest/plugins/ua-restriction.md b/docs/en/latest/plugins/ua-restriction.md index 4eee1043639b..f7afbbd789f2 100644 --- a/docs/en/latest/plugins/ua-restriction.md +++ b/docs/en/latest/plugins/ua-restriction.md @@ -4,7 +4,7 @@ keywords: - Apache APISIX - API Gateway - UA restriction -description: This document contains information about the Apache APISIX ua-restriction Plugin, which allows you to restrict access to a Route or Service based on the User-Agent header with an allowlist and a denylist. +description: ua-restriction 插件支持通过配置用户代理的允许列表或拒绝列表来限制对上游资源的访问。限制用户代理有助于防止网络爬虫使上游资源过载并导致服务质量下降。 --- -## Description + + + -The `ua-restriction` Plugin allows you to restrict access to a Route or Service based on the `User-Agent` header with an `allowlist` and a `denylist`. +## Description -A common scenario is to set crawler rules. `User-Agent` is the identity of the client when sending requests to the server, and the user can allow or deny some crawler request headers in the `ua-restriction` Plugin. +The `ua-restriction` Plugin supports restricting access to upstream resources through either configuring an allowlist or denylist of user agents. A common use case is to prevent web crawlers from overloading the upstream resources and causing service degradation. ## Attributes | Name | Type | Required | Default | Valid values | Description | |----------------|---------------|----------|--------------|-------------------------|---------------------------------------------------------------------------------| -| bypass_missing | boolean | False | false | | When set to `true`, bypasses the check when the `User-Agent` header is missing. | -| allowlist | array[string] | False | | | List of allowed `User-Agent` headers. | -| denylist | array[string] | False | | | List of denied `User-Agent` headers. | -| message | string | False | "Not allowed" | | Message with the reason for denial to be added to the response. | +| bypass_missing | boolean | False | false | | If true, bypass the user agent restriction check when the `User-Agent` header is missing. | +| allowlist | array[string] | False | | | List of user agents to allow. Support regular expressions. At least one of the `allowlist` and `denylist` should be configured, but they cannot be configured at the same time. | +| denylist | array[string] | False | | | List of user agents to deny. Support regular expressions. At least one of the `allowlist` and `denylist` should be configured, but they cannot be configured at the same time. | +| message | string | False | "Not allowed" | | Message returned when the user agent is denied access. | -:::note +## Examples -`allowlist` and `denylist` can't be configured at the same time. - -::: - -## Enable Plugin - -You can enable the Plugin on a Route or a Service as shown below: +The examples below demonstrate how you can configure `ua-restriction` for different scenarios. :::note + You can fetch the `admin_key` from `config.yaml` and save to an environment variable with the following command: ```bash @@ -60,65 +57,103 @@ admin_key=$(yq '.deployment.admin.admin_key[0].key' conf/config.yaml | sed 's/"/ ::: +### Reject Web Crawlers and Customize Error Message + +The following example demonstrates how you can configure the Plugin to fend off unwanted web crawlers and customize the rejection message. + +Create a Route and configure the Plugin to block specific crawlers from accessing resources with a customized message: + ```shell -curl http://127.0.0.1:9180/apisix/admin/routes/1 -H "X-API-KEY: $admin_key" -X PUT -d ' -{ - "uri": "/index.html", - "upstream": { - "type": "roundrobin", - "nodes": { - "127.0.0.1:1980": 1 - } - }, +curl "http://127.0.0.1:9180/apisix/admin/routes" -X PUT \ + -H "X-API-KEY: ${admin_key}" \ + -d '{ + "id": "ua-restriction-route", + "uri": "/anything", "plugins": { - "ua-restriction": { - "bypass_missing": true, - "denylist": [ - "my-bot2", - "(Twitterspider)/(\\d+)\\.(\\d+)" - ], - "message": "Do you want to do something bad?" - } + "ua-restriction": { + "bypass_missing": false, + "denylist": [ + "(Baiduspider)/(\\d+)\\.(\\d+)", + "bad-bot-1" + ], + "message": "Access denied" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "httpbin.org:80": 1 + } } -}' + }' ``` -## Example usage - Send a request to the route: ```shell -curl http://127.0.0.1:9080/index.html -i +curl -i "http://127.0.0.1:9080/anything" ``` You should receive an `HTTP/1.1 200 OK` response. -Now if the `User-Agent` header is in the `denylist` i.e the bot User-Agent: +Send another request to the Route with a disallowed user agent: ```shell -curl http://127.0.0.1:9080/index.html --header 'User-Agent: Twitterspider/2.0' +curl -i "http://127.0.0.1:9080/anything" -H 'User-Agent: Baiduspider/5.0' ``` You should receive an `HTTP/1.1 403 Forbidden` response with the following message: ```text -{"message":"Do you want to do something bad?"} +{"message":"Access denied"} ``` -## Delete Plugin +### Bypass UA Restriction Checks + +The following example demonstrates how to configure the plugin to allow requests of a specific user agent to bypass the UA restriction. -To remove the `ua-restriction` Plugin, you can delete the corresponding JSON configuration from the Plugin configuration. APISIX will automatically reload and you do not have to restart for this to take effect. +Create a Route as such: ```shell -curl http://127.0.0.1:9180/apisix/admin/routes/1 -H "X-API-KEY: $admin_key" -X PUT -d ' -{ - "uri": "/index.html", - "plugins": {}, +curl "http://127.0.0.1:9180/apisix/admin/routes" -X PUT \ + -H "X-API-KEY: ${admin_key}" \ + -d '{ + "id": "ua-restriction-route", + "uri": "/anything", + "plugins": { + "ua-restriction": { + "bypass_missing": true, + "allowlist": [ + "good-bot-1" + ], + "message": "Access denied" + } + }, "upstream": { - "type": "roundrobin", - "nodes": { - "127.0.0.1:1980": 1 - } + "type": "roundrobin", + "nodes": { + "httpbin.org:80": 1 + } } -}' + }' ``` + +Send a request to the Route without modifying the user agent: + +```shell +curl -i "http://127.0.0.1:9080/anything" +``` + +You should receive an `HTTP/1.1 403 Forbidden` response with the following message: + +```text +{"message":"Access denied"} +``` + +Send another request to the Route with an empty user agent: + +```shell +curl -i "http://127.0.0.1:9080/anything" -H 'User-Agent: ' +``` + +You should receive an `HTTP/1.1 200 OK` response. diff --git a/docs/zh/latest/plugins/ua-restriction.md b/docs/zh/latest/plugins/ua-restriction.md index f5ccc6514ef5..cd71ba181f17 100644 --- a/docs/zh/latest/plugins/ua-restriction.md +++ b/docs/zh/latest/plugins/ua-restriction.md @@ -26,30 +26,26 @@ description: 本文介绍了 Apache APISIX ua-restriction 插件的使用方法 # --> -## 描述 + + + -`ua-restriction` 插件可以通过将指定 `User-Agent` 列入白名单或黑名单的方式来限制对服务或路由的访问。 +## 描述 -一种常见的场景是用来设置爬虫规则。`User-Agent` 是客户端在向服务器发送请求时的身份标识,用户可以将一些爬虫程序的请求头列入 `ua-restriction` 插件的白名单或黑名单中。 +The `ua-restriction` Plugin supports restricting access to upstream resources through either configuring an allowlist or denylist of user agents. A common use case is to prevent web crawlers from overloading the upstream resources and causing service degradation. ## 属性 | 名称 | 类型 | 必选项 | 默认值 | 有效值 | 描述 | | --------- | ------------- | ------ | ------ | ------ | -------------------------------- | -| allowlist | array[string] | 否 | | | 加入白名单的 `User-Agent`。 | -| denylist | array[string] | 否 | | | 加入黑名单的 `User-Agent`。 | -| message | string | 否 | "Not allowed" | | 当未允许的 `User-Agent` 访问时返回的信息。 | -| bypass_missing | boolean | 否 | false | | 当设置为 `true` 时,如果 `User-Agent` 请求头不存在或格式有误时,将绕过检查。 | - -:::note +|byp​​ass_missing|boolean|否|false||如果为 true,则在缺少 `User-Agent` 标头时绕过用户代理限制检查。| +|allowlist|array[string]|否||| 要允许的用户代理列表。支持正则表达式。应配置 `allowlist` 和 `denylist` 中至少一个,但不能同时配置。| +|denylist|array[string]|否||| 要拒绝的用户代理列表。支持正则表达式。应配置 `allowlist` 和 `denylist` 中至少一个,但不能同时配置。| +|message|string|否| "Not allowed" || 拒绝用户代理访问时返回的消息。| -`allowlist` 和 `denylist` 不可以同时启用。 +## 示例 -::: - -## 启用插件 - -以下示例展示了如何在指定路由上启用并配置 `ua-restriction` 插件: +以下示例演示了如何针对不同场景配置 `ua-restriction`。 :::note @@ -61,65 +57,103 @@ admin_key=$(yq '.deployment.admin.admin_key[0].key' conf/config.yaml | sed 's/"/ ::: +### 拒绝网络爬虫并自定义错误消息 + +以下示例演示了如何配置插件以抵御不需要的网络爬虫并自定义拒绝消息。 + +创建路由并配置插件以使用自定义消息阻止特定爬虫访问资源: + ```shell -curl http://127.0.0.1:9180/apisix/admin/routes/1 -H "X-API-KEY: $admin_key" -X PUT -d ' -{ - "uri": "/index.html", - "upstream": { - "type": "roundrobin", - "nodes": { - "127.0.0.1:1980": 1 - } - }, +curl "http://127.0.0.1:9180/apisix/admin/routes" -X PUT \ + -H "X-API-KEY: ${admin_key}" \ + -d '{ + "id": "ua-restriction-route", + "uri": "/anything", "plugins": { - "ua-restriction": { - "bypass_missing": true, - "denylist": [ - "my-bot2", - "(Twitterspider)/(\\d+)\\.(\\d+)" - ], - "message": "Do you want to do something bad?" - } + "ua-restriction": { + "bypass_missing": false, + "denylist": [ + "(Baiduspider)/(\\d+)\\.(\\d+)", + "bad-bot-1" + ], + "message": "Access denied" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "httpbin.org:80": 1 + } } -}' + }' ``` -## 测试插件 - -通过上述命令启用插件后,你可以先发起一个简单的请求测试: +向路由发送请求: ```shell -curl http://127.0.0.1:9080/index.html -i +curl -i "http://127.0.0.1:9080/anything" ``` -你应当收到 `HTTP/1.1 200 OK` 的响应,表示请求成功。 +您应该收到 `HTTP/1.1 200 OK` 响应。 -接下来,请求的同时指定处于 `denylist` 中的 `User-Agent`,如 `Twitterspider/2.0`: +使用不允许的用户代理向路由发送另一个请求: ```shell -curl http://127.0.0.1:9080/index.html --header 'User-Agent: Twitterspider/2.0' +curl -i "http://127.0.0.1:9080/anything" -H 'User-Agent: Baiduspider/5.0' ``` -你应当收到 `HTTP/1.1 403 Forbidden` 的响应和以下报错,表示请求失败,代表插件生效: +您应该收到 `HTTP/1.1 403 Forbidden` 响应,其中包含以下消息: ```text -{"message":"Do you want to do something bad?"} +{"message":"Access denied"} ``` -## 删除插件 +### 绕过 UA 限制检查 + +以下示例说明如何配置插件以允许特定用户代理的请求绕过 UA 限制。 -当你需要禁用 `ua-restriction` 插件时,可以通过以下命令删除相应的 JSON 配置,APISIX 将会自动重新加载相关配置,无需重启服务: +创建如下路由: ```shell -curl http://127.0.0.1:9180/apisix/admin/routes/1 -H "X-API-KEY: $admin_key" -X PUT -d ' -{ - "uri": "/index.html", - "plugins": {}, +curl "http://127.0.0.1:9180/apisix/admin/routes" -X PUT \ + -H "X-API-KEY: ${admin_key}" \ + -d '{ + "id": "ua-restriction-route", + "uri": "/anything", + "plugins": { + "ua-restriction": { + "bypass_missing": true, + "allowlist": [ + "good-bot-1" + ], + "message": "Access denied" + } + }, "upstream": { - "type": "roundrobin", - "nodes": { - "127.0.0.1:1980": 1 - } + "type": "roundrobin", + "nodes": { + "httpbin.org:80": 1 + } } -}' + }' +``` + +向路由发送一个请求而不修改用户代理: + +```shell +curl -i "http://127.0.0.1:9080/anything" ``` + +您应该收到一个 `HTTP/1.1 403 Forbidden` 响应,其中包含以下消息: + +```text +{"message":"Access denied"} +``` + +向路由发送另一个请求,用户代理为空: + +```shell +curl -i "http://127.0.0.1:9080/anything" -H 'User-Agent: ' +``` + +您应该收到一个 `HTTP/1.1 200 OK` 响应。 From 0605ac4366fc0a32580ed375f31c607f00cdc9de Mon Sep 17 00:00:00 2001 From: traky Date: Sat, 8 Feb 2025 13:52:51 +0800 Subject: [PATCH 2/2] lint --- docs/zh/latest/plugins/ua-restriction.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/zh/latest/plugins/ua-restriction.md b/docs/zh/latest/plugins/ua-restriction.md index cd71ba181f17..13eae7b215ef 100644 --- a/docs/zh/latest/plugins/ua-restriction.md +++ b/docs/zh/latest/plugins/ua-restriction.md @@ -38,10 +38,10 @@ The `ua-restriction` Plugin supports restricting access to upstream resources th | 名称 | 类型 | 必选项 | 默认值 | 有效值 | 描述 | | --------- | ------------- | ------ | ------ | ------ | -------------------------------- | -|byp​​ass_missing|boolean|否|false||如果为 true,则在缺少 `User-Agent` 标头时绕过用户代理限制检查。| -|allowlist|array[string]|否||| 要允许的用户代理列表。支持正则表达式。应配置 `allowlist` 和 `denylist` 中至少一个,但不能同时配置。| -|denylist|array[string]|否||| 要拒绝的用户代理列表。支持正则表达式。应配置 `allowlist` 和 `denylist` 中至少一个,但不能同时配置。| -|message|string|否| "Not allowed" || 拒绝用户代理访问时返回的消息。| +| byp​​ass_missing |boolean| 否 | false | | 如果为 true,则在缺少 `User-Agent` 标头时绕过用户代理限制检查。| +| allowlist | array[string] | 否 | | | 要允许的用户代理列表。支持正则表达式。应配置 `allowlist` 和 `denylist` 中至少一个,但不能同时配置。| +| denylist | array[string] | 否 | | | 要拒绝的用户代理列表。支持正则表达式。应配置 `allowlist` 和 `denylist` 中至少一个,但不能同时配置。| +| message | string | 否 | "Not allowed" | | 拒绝用户代理访问时返回的消息。| ## 示例