Skip to content

麻烦的ES查询

使用script

将time字段按年-月-日的形式作为key进行聚合(效率低)

JSON
GET es_xltest_log/_search
{
    "size": 0,
    "aggs": {
        "timeNum": {
            "terms": {
                "script": {
                  "source": """
                  return doc['time'].getValue().getYear()+"-"+doc['time'].getValue().getMonthValue()+"-"+doc['time'].getValue().getDayOfMonth();
                  """
                }
            }
        }
    }
}

时间分段查询

自定义间隔时长,需要使用fixed_interval。 calendar_interval只支持单位时间查询,比如1d、1h,如果查询2d、2h等,会报错。 interval参数后续版本移除,不要使用,可用fixed_interval和calendar_interval代替。

JSON
GET es_xltest_log/_search
{
    "query": {
        "bool": {
            "must": [{
                "range": {
                    "time": {
                        "gte": 1004065479577,
                        "lte": 1904065479577
                    }
                }
            }, {
                "term": {
                    "xltestType": {
                        "value": 5
                    }
                }
            }{
                "match_phrase": {
                    "xltestName": {
                        "query": "Win"
                    }
                }
            }, {
                "match_phrase": {
                    "networks.ip": {
                        "query": "29"
                    }
                }
            }, {
                "terms": {
                    "gid": ["00000000-0000-0000-0000-000000000002", "00000000-0000-0000-0000-000000000001"]
                }
            }],
            "should": [{
                "match_phrase": {
                    "name": {
                        "query": "lo"
                    }
                }
            }, {
                "match_phrase": {
                    "fullName": {
                        "query": "lo"
                    }
                }
            }],
            "minimum_should_match": 1
        }
    },
    "size": 0,
    "aggs": {
        "test": {
            "date_histogram": {
                "field": "time",
                "fixed_interval": "1d"
            }
        }
    }
}

指定返回字段

_source参数可以指定返回的字段

SQL
SELECT xltestGuid FROM es_xltest_log;
JSON
GET es_xltest_log/_search
{
  "_source": "xltestGuid", 
  "size":0
}
JSON
GET es_xltest_log/_search
{
  "_source": ["xltestGuid", "time"], 
  "size":0
}

SQL中的distinct

collapse参数可以起到与SQL中类似distinct的效果。但更像MySQL中的group by语句。(返回的条数不大于dinstinct(xltestGuid)的数量,且全字段返回) 以MySQL为例

SQL
SELECT * FROM es_xltest_log group by xltestGuid;
JSON
GET es_xltest_log/_search
{
  "collapse": {
    "field": "xltestGuid"
  }
}

SQL中的count() + group by

聚合操作中的cardinality效果与SQL中的count + group by效果相同

SQL
SELECT COUNT(xltestGuid) FROM es_xltest_log GROUP BY xltestGuid;
JSON
GET es_xltest_log/_search
{
  "aggs":{
    "count":{
      "cardinality": {
        "field": "xltestGuid"
      }
    }
  }
}

嵌套聚合查询

SQL
SELECT networks.ip,distinct(xltestGuid),name from es_xltest_log limit 1000 offset 0;
SELECT count(*) AS _count, xltestGuid FROM es_xltest_log GROUP BY xltestGuid ORDER BY _count desc limit 1000 offset 0;

以上两条SQL可转换为以下ES查询,但是查询出的结果和聚合查出的结果,不一定对应。

JSON
GET es_xltest_log/_search
{
    "_source": ["networks.ip","xltestGuid","name"], 
    "collapse": {
      "field": "xltestGuid"
    }, 
    "size": 1000,
    "aggs": {
        "xltestGuidCount": {
          "terms": {
            "field": "xltestGuid",
            "size": 1000, 
            "order": {
              "_count": "desc"
            }
          }
        }
    }
}

尝试将ES语句修改为嵌套的聚合操作,则可以对应上。

JSON
GET es_xltest_log/_search
{
    "size": 0,
    "aggs": {
        "xltestGuidCount": {
          "terms": {
            "field": "xltestGuid",
            "order": {
              "_count": "desc"
            }
          },
          "aggs":{
            "networks":{
              "terms": {
                "field": "networks.ip.keyword"
              }
            },
            "epNames":{
              "terms": {
                "field": "name.keyword"
              }
            }
          }
        }
    }
}

变相实现多字段group by

有通过script实现的方式,但是性能代价较高,官方推荐使用copy_to(jpountz提供了demo)。 因业务原因,可以使用两层aggregation变相实现多字段group by。

SQL
SELECT count(1) as _count, xltestType, xltestName from es_xltest_log group by xltestType, xltestName;
JSON
GET es_xltest_log/_search
{
    "size": 0,
    "aggs": {
        "xltestGuidCount": {
          "terms": {
            "field": "xltestType",
            "order": {
              "_count": "desc"
            },
            "size":65535
          },
          "aggs":{
            "xltestNames":{
              "terms": {
                "field": "xltestName.keyword",
                "order": {
                  "_count": "desc"
                }, 
                "size": 65535
              }
            }
          }
        }
    }
}

局部更新document

SQL
UPDATE es_xltest_log SET name='localhost.xltest-pc', finder=1 WHERE id = 'mA7AeXUBeiRdcTrq3UWM'
JSON
POST es_xltest_log/_doc/mA7AeXUBeiRdcTrq3UWM/_update
{
   "doc" : {
      "name" : "localhost.xltest-pc",
      "finder" : 1
   }
}

使用update by query进行局部更新

SQL
UPDATE es_xltest_log SET gid = '00000000-0000-0000-0000-000000000002' WHERE gid = '1' and xltestGuid = '092d5689-b4f9-471e-adc2-d9a812f9a128';
JSON
POST es_xltest_log/_update_by_query?scroll_size=65535
{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "gid": {
              "value": "1"
            }
          }
        },{
          "term": {
            "xltestGuid": {
              "value": "092d5689-b4f9-471e-adc2-d9a812f9a128"
            }
          }
        }
      ]
    }
  },
  "script": {
    "source": "ctx._source.gid = '00000000-0000-0000-0000-000000000002'",
    "lang": "painless"
  }
}