麻烦的ES查询
使用script
将time字段按年-月-日的形式作为key进行聚合(效率低)
JSON
GET es_xltest_log/_search
{
"size": 0,
"aggs": {
"timeNum": {
"terms": {
"script": {
"source": """
return doc['time'].getValue().getYear()+"-"+doc['time'].getValue().getMonthValue()+"-"+doc['time'].getValue().getDayOfMonth();
"""
}
}
}
}
}
时间分段查询
自定义间隔时长,需要使用fixed_interval。 calendar_interval只支持单位时间查询,比如1d、1h,如果查询2d、2h等,会报错。 interval参数后续版本移除,不要使用,可用fixed_interval和calendar_interval代替。
JSON
GET es_xltest_log/_search
{
"query": {
"bool": {
"must": [{
"range": {
"time": {
"gte": 1004065479577,
"lte": 1904065479577
}
}
}, {
"term": {
"xltestType": {
"value": 5
}
}
},{
"match_phrase": {
"xltestName": {
"query": "Win"
}
}
}, {
"match_phrase": {
"networks.ip": {
"query": "29"
}
}
}, {
"terms": {
"gid": ["00000000-0000-0000-0000-000000000002", "00000000-0000-0000-0000-000000000001"]
}
}],
"should": [{
"match_phrase": {
"name": {
"query": "lo"
}
}
}, {
"match_phrase": {
"fullName": {
"query": "lo"
}
}
}],
"minimum_should_match": 1
}
},
"size": 0,
"aggs": {
"test": {
"date_histogram": {
"field": "time",
"fixed_interval": "1d"
}
}
}
}
指定返回字段
_source参数可以指定返回的字段
SQL
SELECT xltestGuid FROM es_xltest_log;
JSON
GET es_xltest_log/_search
{
"_source": "xltestGuid",
"size":0
}
JSON
GET es_xltest_log/_search
{
"_source": ["xltestGuid", "time"],
"size":0
}
SQL中的distinct
collapse参数可以起到与SQL中类似distinct的效果。但更像MySQL中的group by语句。(返回的条数不大于dinstinct(xltestGuid)的数量,且全字段返回) 以MySQL为例
SQL
SELECT * FROM es_xltest_log group by xltestGuid;
JSON
GET es_xltest_log/_search
{
"collapse": {
"field": "xltestGuid"
}
}
SQL中的count() + group by
聚合操作中的cardinality效果与SQL中的count + group by效果相同
SQL
SELECT COUNT(xltestGuid) FROM es_xltest_log GROUP BY xltestGuid;
JSON
GET es_xltest_log/_search
{
"aggs":{
"count":{
"cardinality": {
"field": "xltestGuid"
}
}
}
}
嵌套聚合查询
SQL
SELECT networks.ip,distinct(xltestGuid),name from es_xltest_log limit 1000 offset 0;
SELECT count(*) AS _count, xltestGuid FROM es_xltest_log GROUP BY xltestGuid ORDER BY _count desc limit 1000 offset 0;
以上两条SQL可转换为以下ES查询,但是查询出的结果和聚合查出的结果,不一定对应。
JSON
GET es_xltest_log/_search
{
"_source": ["networks.ip","xltestGuid","name"],
"collapse": {
"field": "xltestGuid"
},
"size": 1000,
"aggs": {
"xltestGuidCount": {
"terms": {
"field": "xltestGuid",
"size": 1000,
"order": {
"_count": "desc"
}
}
}
}
}
尝试将ES语句修改为嵌套的聚合操作,则可以对应上。
JSON
GET es_xltest_log/_search
{
"size": 0,
"aggs": {
"xltestGuidCount": {
"terms": {
"field": "xltestGuid",
"order": {
"_count": "desc"
}
},
"aggs":{
"networks":{
"terms": {
"field": "networks.ip.keyword"
}
},
"epNames":{
"terms": {
"field": "name.keyword"
}
}
}
}
}
}
变相实现多字段group by
有通过script实现的方式,但是性能代价较高,官方推荐使用copy_to(jpountz提供了demo)。 因业务原因,可以使用两层aggregation变相实现多字段group by。
SQL
SELECT count(1) as _count, xltestType, xltestName from es_xltest_log group by xltestType, xltestName;
JSON
GET es_xltest_log/_search
{
"size": 0,
"aggs": {
"xltestGuidCount": {
"terms": {
"field": "xltestType",
"order": {
"_count": "desc"
},
"size":65535
},
"aggs":{
"xltestNames":{
"terms": {
"field": "xltestName.keyword",
"order": {
"_count": "desc"
},
"size": 65535
}
}
}
}
}
}
局部更新document
SQL
UPDATE es_xltest_log SET name='localhost.xltest-pc', finder=1 WHERE id = 'mA7AeXUBeiRdcTrq3UWM'
JSON
POST es_xltest_log/_doc/mA7AeXUBeiRdcTrq3UWM/_update
{
"doc" : {
"name" : "localhost.xltest-pc",
"finder" : 1
}
}
使用update by query进行局部更新
SQL
UPDATE es_xltest_log SET gid = '00000000-0000-0000-0000-000000000002' WHERE gid = '1' and xltestGuid = '092d5689-b4f9-471e-adc2-d9a812f9a128';
JSON
POST es_xltest_log/_update_by_query?scroll_size=65535
{
"query": {
"bool": {
"must": [
{
"term": {
"gid": {
"value": "1"
}
}
},{
"term": {
"xltestGuid": {
"value": "092d5689-b4f9-471e-adc2-d9a812f9a128"
}
}
}
]
}
},
"script": {
"source": "ctx._source.gid = '00000000-0000-0000-0000-000000000002'",
"lang": "painless"
}
}