关于 Elasticsearch 不同分片设置的压测报告

摘要

为了验证当前集群经常出现索引超时以及请求拒绝的问题，现模拟线上集群环境及索引设置，通过压测工具随机生成测试数据，针对当前的 850 个分片的索引，以及减半之后的索引，以及更小分片索引的写入进行压测，使用不同的并发、不同的批次大小来观察索引的吞吐情况，并记录写入队列的堆积情况，用来分析分片数、批次数对写入的影响，从而确定后续的优化方案。

压测场景

Elasticsearch 版本 v7.7.1, 共有 57 个节点，其中 3 个独立 Master，3 个协调节点，31GB JVM。

压测流程

单索引 850 分片

索引定义

PUT idx-xxxx-xxxxxx
{
    "aliases" : {
      "alias-xxxx-xxxxxx" : { }
    },
    "mappings" : {
      "dynamic" : "strict",
      "_routing" : {
        "required" : true
      },
      "_source" : {
        "excludes" : [
          "isExtract*",
          "batchNo"
        ]
      },
      "properties" : {
        "addxxxx" : {
          "type" : "text",
          "term_vector" : "with_positions_offsets"
        },
        "clxxxx" : {
          "type" : "byte"
        },
        "contxxxx" : {
          "type" : "text",
          "boost" : 4.0,
          "term_vector" : "with_positions_offsets"
        },
        "conxxxx" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "con1xxxx" : {
          "type" : "text",
          "boost" : 16.0,
          "term_vector" : "with_positions_offsets",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "normalizer" : "keyword_normalizer"
            }
          },
          "analyzer" : "name_analyzer",
          "search_analyzer" : "keyword_analyzer"
        },
        "contSxxxx" : {
          "type" : "long",
          "index" : false,
          "doc_values" : false
        },
        "contSxxxxx" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "contTxxxx" : {
          "type" : "short"
        },
        "crtxxxx" : {
          "type" : "date",
          "ignore_malformed" : true,
          "format" : "yyyyMMddHHmmss"
        },
        "duration" : {
          "type" : "long",
          "index" : false,
          "doc_values" : false
        },
        "largeTxxxx" : {
          "type" : "keyword",
          "boost" : 8.0,
          "index" : false,
          "doc_values" : false
        },
        "md5" : {
          "type" : "keyword",
          "index" : false,
          "doc_values" : false
        },
        "orderxxxx" : {
          "type" : "alias",
          "path" : "contName.keyword"
        },
        "ownxxxxxx" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "ownxxxxxxxxxx" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "ownxxxxxxxxxxx" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "ownxxxxxxxxxxx" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "parenxxxxxxxxxx" : {
          "type" : "keyword"
        },
        "pathxx" : {
          "type" : "text",
          "boost" : 8.0,
          "term_vector" : "with_positions_offsets",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "path_analyzer"
        },
        "presexxxxx" : {
          "type" : "keyword",
          "boost" : 8.0,
          "index" : false,
          "doc_values" : false
        },
        "presexxxxx" : {
          "type" : "keyword",
          "boost" : 8.0,
          "index" : false,
          "doc_values" : false
        },
        "presxxxxxx" : {
          "type" : "keyword",
          "boost" : 8.0,
          "index" : false,
          "doc_values" : false
        },
        "prixxxxxx" : {
          "type" : "short",
          "index" : false
        },
        "search_xxxxxx" : {
          "type" : "alias",
          "path" : "contName"
        },
        "servixxxxxx" : {
          "type" : "byte"
        },
        "shotxxxxxx" : {
          "type" : "date",
          "ignore_malformed" : true,
          "format" : "yyyyMMddHHmmss"
        },
        "xxxxxxlThuxxxxxx" : {
          "type" : "keyword",
          "boost" : 8.0,
          "index" : false,
          "doc_values" : false
        },
        "tagxxxxxx" : {
          "type" : "text",
          "term_vector" : "with_positions_offsets"
        },
        "thumxxxxxx" : {
          "type" : "keyword",
          "boost" : 8.0,
          "index" : false,
          "doc_values" : false
        },
        "xxxxxxpdxxxxxx" : {
          "type" : "date",
          "ignore_malformed" : true,
          "format" : "yyyyMMddHHmmss"
        },
        "xxxxxxderAcxxxxxx" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "xxxxxxerAccouxxxxxx" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "xxxxxxerxxxxxxID" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "xxxxxxderNxxxxxx" : {
          "type" : "keyword",
          "doc_values" : false
        }
      }
    },
    "settings" : {
      "index" : {
        "max_ngram_diff" : "50",
        "refresh_interval" : "1s",
        "number_of_shards" : "850",

        "analysis" : {
          "normalizer" : {
            "keyword_normalizer" : {
              "filter" : [
                "lowercase"
              ],
              "type" : "custom"
            }
          },
          "analyzer" : {
            "keyword_analyzer" : {
              "filter" : [
                "lowercase"
              ],
              "type" : "custom",
              "tokenizer" : "keyword"
            },
            "name_analyzer" : {
              "filter" : [
                "lowercase"
              ],
              "type" : "custom",
              "tokenizer" : "name_tokenizer"
            },
            "path_analyzer" : {
              "filter" : [
                "lowercase"
              ],
              "type" : "custom",
              "tokenizer" : "path_tokenizer"
            }
          },
          "tokenizer" : {
            "name_tokenizer" : {
              "type" : "ngram",
              "min_gram" : "1",
              "max_gram" : "5"
            },
            "path_tokenizer" : {
              "pattern" : "/",
              "type" : "pattern"
            }
          }
        },
        "number_of_replicas" : "1"

      }
    }
  }

查看样例数据

POST idx-owncloud-img/_doc/1?routing=1
{
  "ownerxxxxxx" : "002#######0oV",
  "serxxxxxx" : 1,
  "tagxxxxxx" : "",
  "contxxxxxx" : "",
  "xxxxxxAccoxxxxxxe" : "1",
  "presxxxxxx" : "",
  "conxxxxxx" : "jpg",
  "xxxxxxerBxxxxxx" : "6#######573",
  "ownerxxxxxxx" : "13#######62",
  "presxxxxxxL" : "",
  "duxxxxxx" : 0,
  "paxxxxxx" : "00##########################################043",
  "crtxxxxxx" : "20#######45",
  "pxxxxxxtCatxxxxxx" : "001############################043",
  "sxxxxxxThumxxxxxx" : "http://downl#################################################961",
  "uxxxxxxerAxxxxxxt" : "1##############2",
  "uxxxxxxderAccoxxxxxxe" : "1",
  "uxxxxxxderxxxxxxID" : "0#####################V",
  "lxxxxxxhumxxxxxxl" : "http://d###################################D961",
  "thxxxxxxl" : "http://do###############################################################61",
  "axxxxxxss" : "",
  "uxxxxxxm" : "20##############8",
  "cxxxxxx" : 3,
  "coxxxxxx" : 1,
  "prxxxxxx" : 10,
  "coxxxxxx" : "0###################################cm",
  "co2xxxxxx" : 5##############8,
  "shoxxxxxx" : "20##############4",
  "contxxxxxx" : "mm##############g",
  "presxxxxxx" : "",
  "oxxxxxxBmpxxxxxx" : "6#######3",
  "md5" : "7##############1E"
}

查看loadgen 配置

root@loadgen:/opt/loadgen# cat loadgen.yml
statsd:
  enabled: false
  host: 192.168.3.98
  port: 8125
  namespace: loadgen.
variables:
  - name: ip
    type: file
    path: dict/ip.txt
  - name: message
    type: file
    path: dict/nginx.log
#  - name: user
#    type: file
#    path: dict/user.txt
  - name: id
    type: sequence
  - name: uuid
    type: uuid
  - name: now_local
    type: now_local
  - name: now_utc
    type: now_utc
  - name: now_unix
    type: now_unix
  - name: suffix
    type: range
    from: 12
    to: 12
  - name: bool
    type: range
    from: 0
    to: 1
requests:
  - request:
      method: POST
      runtime_variables:
        batch_no: id
      runtime_body_line_variables:
        routing_no: uuid
      basic_auth:
        username: elastic
        password: ####
      url: https://xxx.elasticsearch.xxx.cn:9243/_bulk
      body_repeat_times: 50
      body: "{ "create" : { "_index" : "idx-xxxxxx-xxxxxx","_type":"_doc", "_id" : "$[[uuid]]" , "routing" : "$[[routing_no]]" } }
{ "ownerxxxxxx" : "0011WsjCK0oV", "servxxxxxx" : $[[bool]], "tagxxxxxx" : "", "contxxxxxx" : "", "ownexxxxxxunxxxxxx" : "$[[bool]]", "prxxxxxxentLxxxxxx" : "", "conxxxxxx" : "jpg", "uxxxxxxexxxxxxID" : "$[[id]]", "owxxxxxxccxxxxxxt" : "$[[routing_no]]", "prxxxxxxtUxxxxxxL" : "", "durxxxxxxn" : 0, "paxxxxxx" : "00019700101000000001/0011WsjCK0oV00019700101000000043", "crxxxxxx" : "$[[id]]", "paxxxxxxntxxxxxxogIxxxxxx" : "0011WsjCK0oV00019700101000000043", "sxxxxxxThumxxxxxx" : "http://xxx.xxx.cn:80/storageWeb/servlet/GetFileByURLServlet?root=/mnt/wfs133&fileid=KB1af35f100578d655b2cfbd7edd2cb50e.jpg&ct=1&type=0&code=80B0EAB7F429F1A32F76EB895F5FF4DE1853D254604FAB67A7C33FDF92BE7220&exp=315&account=MTM2MzgzMTU1NjI=&p=0&ui=0011WsjCK0oV&ci=0011WsjCK0oV06320210812125345tcm&userSiteId=usersite-s&cn=mmexport162592513503...&oprChannel=10000000&dom=D961", "xxxxxxderAxxxxxxnt" : "$[[routing_no]]", "upxxxxxxerAcxxxxxxtype" : "$[[bool]]", "uploaderNDUserID" : "$[[uuid]]", "largeThumbnail" : "http://xxx.xxx.cn:80/storageWeb/servlet/GetFileByURLServlet?root=/mnt/wfs133&fileid=KB1af35f100578d655b2cfbd7edd2cb50e.jpg&ct=1&type=1&code=80B0EAB7F429F1A32F76EB895F5FF4DE1853D254604FAB67A7C33FDF92BE7220&exp=315&account=MTM2MzgzMTU1NjI=&p=0&ui=0011WsjCK0oV&ci=0011WsjCK0oV06320210812125345tcm&userSiteId=usersite-s&cn=mmexport162592513503...&oprChannel=10000000&dom=D961", "xxxxxxil" : "http://download.xxx.xxx.com:80/storageWeb/servlet/GetFileByURLServlet?root=/mnt/wfs133&fileid=KB1af35f100578d655b2cfbd7edd2cb50e.jpg&ct=1&type=2&code=80B0EAB7F429F1A32F76EB895F5FF4DE1853D254604FAB67A7C33FDF92BE7220&exp=315&account=MTM2MzgzMTU1NjI=&p=0&ui=0011WsjCK0oV&ci=0011WsjCK0oV06320210812125345tcm&userSiteId=usersite-s&cn=mmexport162592513503...&oprChannel=10000000&dom=D961", "adxxxxxx" : "", "upxxxxxx" : "$[[now_unix]]", "cxxxxxx" : 3, "contxxxxxxe" : $[[bool]], "prixxxxxx" : 10, "conxxxxxx" : "0011WsjCK0oV06320210812125345tcm", "contxxxxxx" : $[[id]], "shoxxxxxx" : "$[[id]]", "contxxxxxxe" : "mmexport1625925135032.jpg", "prxxxxxxtHxxxxxx" : "", "oxxxxxxrBmxxxxxxID" : "$[[id]]", "md5" : "$[[uuid]]" }
"

运行测试

开启 gzip 流量压缩，执行压测：

root@loadgen:/opt/loadgen# ./loadgen-linux-amd64 -config loadgen.yml -d 6000 -c 100  -compress

1 副本 100 并发

0 副本 100 并发

0 副本 200 并发

写入队列已经存在大量堆积和拒绝的现象了：

1 副本 200 并发

1 副本 400 并发

1 副本 800 并发

1 副本批次 500 并发 100

1 副本批次 2000 并发 100

1 副本批次 5000 并发 100

1 副本批次 5000 并发 200

单索引 425 分片

索引定义

PUT idx-xxxxxx-xxxxxx-425
{
    "aliases" : {
      "alias-xxxxxx-xxxxxx" : { }
    },
    "mappings" : {
      "dynamic" : "strict",
      "_routing" : {
        "required" : true
      },
      "_source" : {
        "excludes" : [
          "isExtract*",
          "batchNo"
        ]
      },
      "properties" : {
        "addxxxxxx" : {
          "type" : "text",
          "term_vector" : "with_positions_offsets"
        },
        "cxxxxxx" : {
          "type" : "byte"
        },
        "coxxxxxxc" : {
          "type" : "text",
          "boost" : 4.0,
          "term_vector" : "with_positions_offsets"
        },
        "coxxxxxx" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "conxxxxxxe" : {
          "type" : "text",
          "boost" : 16.0,
          "term_vector" : "with_positions_offsets",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "normalizer" : "keyword_normalizer"
            }
          },
          "analyzer" : "name_analyzer",
          "search_analyzer" : "keyword_analyzer"
        },
        "coxxxxxxze" : {
          "type" : "long",
          "index" : false,
          "doc_values" : false
        },
        "conxxxxxxfix" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "coxxxxxxpe" : {
          "type" : "short"
        },
        "cxxxxxxm" : {
          "type" : "date",
          "ignore_malformed" : true,
          "format" : "yyyyMMddHHmmss"
        },
        "duxxxxxxon" : {
          "type" : "long",
          "index" : false,
          "doc_values" : false
        },
        "laxxxxxxbnail" : {
          "type" : "keyword",
          "boost" : 8.0,
          "index" : false,
          "doc_values" : false
        },
        "md5" : {
          "type" : "keyword",
          "index" : false,
          "doc_values" : false
        },
        "ordxxxxxxNamxxxxxx" : {
          "type" : "alias",
          "path" : "contName.keyword"
        },
        "oxxxxxxccoxxxxxxt" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "owxxxxxxcounxxxxxxpe" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "owxxxxxxpUsxxxxxxD" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "oxxxxxxDUsexxxxxxD" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "pxxxxxxtalxxxxxxD" : {
          "type" : "keyword"
        },
        "patxxxxxx" : {
          "type" : "text",
          "boost" : 8.0,
          "term_vector" : "with_positions_offsets",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "path_analyzer"
        },
        "prxxxxxxntHxxxxxx" : {
          "type" : "keyword",
          "boost" : 8.0,
          "index" : false,
          "doc_values" : false
        },
        "prxxxxxxntLxxxxxx" : {
          "type" : "keyword",
          "boost" : 8.0,
          "index" : false,
          "doc_values" : false
        },
        "prxxxxxxURxxxxxx" : {
          "type" : "keyword",
          "boost" : 8.0,
          "index" : false,
          "doc_values" : false
        },
        "pxxxxxxity" : {
          "type" : "short",
          "index" : false
        },
        "sxxxxxxch_nxxxxxxe" : {
          "type" : "alias",
          "path" : "contName"
        },
        "sexxxxxxeTxxxxxxe" : {
          "type" : "byte"
        },
        "sxxxxxxTm" : {
          "type" : "date",
          "ignore_malformed" : true,
          "format" : "yyyyMMddHHmmss"
        },
        "smxxxxxxThuxxxxxxl" : {
          "type" : "keyword",
          "boost" : 8.0,
          "index" : false,
          "doc_values" : false
        },
        "taxxxxxxa" : {
          "type" : "text",
          "term_vector" : "with_positions_offsets"
        },
        "txxxxxxnaxxxxxx" : {
          "type" : "keyword",
          "boost" : 8.0,
          "index" : false,
          "doc_values" : false
        },
        "uxxxxxxm" : {
          "type" : "date",
          "ignore_malformed" : true,
          "format" : "yyyyMMddHHmmss"
        },
        "upxxxxxxdexxxxxxount" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "upxxxxxxrAcxxxxxxpe" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "upxxxxxxmpUsxxxxxx" : {
          "type" : "keyword",
          "doc_values" : false
        },
        "uxxxxxxerNDxxxxxxD" : {
          "type" : "keyword",
          "doc_values" : false
        }
      }
    },
    "settings" : {
      "index" : {
        "max_ngram_diff" : "50",
        "refresh_interval" : "1s",
        "number_of_shards" : "425",

        "analysis" : {
          "normalizer" : {
            "keyword_normalizer" : {
              "filter" : [
                "lowercase"
              ],
              "type" : "custom"
            }
          },
          "analyzer" : {
            "keyword_analyzer" : {
              "filter" : [
                "lowercase"
              ],
              "type" : "custom",
              "tokenizer" : "keyword"
            },
            "name_analyzer" : {
              "filter" : [
                "lowercase"
              ],
              "type" : "custom",
              "tokenizer" : "name_tokenizer"
            },
            "path_analyzer" : {
              "filter" : [
                "lowercase"
              ],
              "type" : "custom",
              "tokenizer" : "path_tokenizer"
            }
          },
          "tokenizer" : {
            "name_tokenizer" : {
              "type" : "ngram",
              "min_gram" : "1",
              "max_gram" : "5"
            },
            "path_tokenizer" : {
              "pattern" : "/",
              "type" : "pattern"
            }
          }
        },
        "number_of_replicas" : "1"

      }
    }
  }

1 副本批次 50 并发 100

1 副本批次 50 并发 200

1 副本批次 50 并发 400

1 副本批次 50 并发 800

1 副本批次 500 并发 100

1 副本批次 2000 并发 100

1 副本批次 5000 并发 100

单索引 50 分片

1 副本批次 50 并发 100

1 副本批次 500 并发 100

1 副本批次 1000 并发 100

1 副本批次 5000 并发 100

走网关单索引 425 分片

1 副本批次 50 并发 400>200

1 副本批次 500 并发 100

1 副本批次 500 并发 200

1 副本批次 500 并发 400

1 副本批次 5000 并发 100

1 副本批次 5000 并发 200

1 副本批次 5000 并发 400

走网关单索引 850 分片

1 副本批次 50 并发 400

1 副本批次 500 并发 400

1 副本批次 5000 并发 400

压测结果

索引数	分片数	副本数	批次大小	压测并发	平均写入吞吐(eps)
1	850	1	50	100	10,000
1	850	0	50	100	30,000
1	850	0	50	200	40,000
1	850	1	50	200	18,000
1	850	1	50	400	27,500
1	850	1	50	800	29,700
1	850	1	500	100	30,187
1	850	1	2000	100	68,000
1	850	1	5000	100	98,915
1	850	1	5000	200	78,462
1	425	1	50	100	12,695
1	425	1	500	100	46818
1	425	1	2000	100	100,000
1	425	1	5000	100	130,000
1	50	1	50	100	32,987
1	50	1	500	100	96,207
1	50	1	1000	100	147,719
1	50	1	5000	100	156,961

走网关节点异步合并模式：

索引数	分片数	副本数	批次大小	压测并发	平均写入吞吐(eps)
1	425	1	50	100	500
1	425	1	50	200	1,000
1	425	1	50	400	2,000
1	425	1	500	100	4,800
1	425	1	500	200	9,350
1	425	1	500	400	17,000
1	425	1	5000	100	50,000
1	425	1	5000	200	100,000
1	425	1	5000	400	175,000
1	850	1	50	400	2000
1	850	1	500	400	18,800
1	850	1	5000	400	137,000

结论

大分片索引，850 或者 425，在并发即使只有 100 的情况下就有可能出现占满线程池，出现请求拒绝的情况，单个批次的文档数比较小的情况下，更容易出现。而同样格式的索引，在 50 个分片的情况下，索引的吞吐是 425 分片的两倍，850 分片的三倍，且线程池基本上没有堆积，或者堆积很快处理完。单次请求的文档数越多，写入的效率越高。某些场景下索引分片虽然做了 Routing 处理，但是超大分片索引存在严重的转发效率问题，建议按照业务维度，或者当前的 Routing 维度进行索引的划分，将超大索引拆分成若干个子索引，单个索引的分片数尽量不要超过 20 个。

展开阅读全文

页面更新：2024-05-05

标签：维度队列副本节点网关集群线程索引大小情况

1 2 3 4 5