I have a bucket (named data
) and want to find out how to define index(es) to optimize these two queries:
SELECT COUNT(1) AS size FROM data WHERE type='FishingTrip' AND (SUBSTR(date,0,10) > SUBSTR(date_add_str(now_str(), -300, 'day'),0,10))
and
SELECT SUM(count) as count FROM data WHERE type='Catch' AND SUBSTR(date,0,10) > SUBSTR(date_add_str(now_str(), -300, 'day'),0,10)
I already have an index for type
:
CREATE INDEX `def_type` ON `data`(`type`)
and I have tried (by guessing…) to create one over the type, date, and count:
CREATE INDEX `def_type_date_count` ON `data`(`type`,`date`,`count`)
… but is this the best way of doing it? I can see that the “FishingTrip” query runs around 175ms whereas the “Catch” query runs around 15ms… - although I would have thought that the later was the “heaviest” due to the SUM() function. The count
field does not exist on the FishingTrip - could that be a reason?
How do I find out how to best translate the “Explain” to the index I need to create?
For reference the output of Explain for FishingTrip is:
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "IndexScan2",
"covers": [
"cover ((`data`.`type`))",
"cover ((`data`.`date`))",
"cover ((`data`.`count`))",
"cover ((meta(`data`).`id`))"
],
"index": "def_type_date_count",
"index_id": "9f49efa27e901715",
"index_projection": {
"entry_keys": [
0,
1
]
},
"keyspace": "data",
"namespace": "default",
"spans": [
{
"range": [
{
"high": "\"FishingTrip\"",
"inclusion": 3,
"low": "\"FishingTrip\""
},
{
"inclusion": 0,
"low": "null"
}
]
}
],
"using": "gsi"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "((cover ((`data`.`type`)) = \"FishingTrip\") and (substr0(date_add_str(now_str(), (-300), \"day\"), 0, 10) < substr0(cover ((`data`.`date`)), 0, 10)))"
},
{
"#operator": "InitialGroup",
"aggregates": [
"count(1)"
],
"group_keys": []
}
]
}
},
{
"#operator": "IntermediateGroup",
"aggregates": [
"count(1)"
],
"group_keys": []
},
{
"#operator": "FinalGroup",
"aggregates": [
"count(1)"
],
"group_keys": []
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "InitialProject",
"result_terms": [
{
"as": "size",
"expr": "count(1)"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
"text": "SELECT COUNT(1) AS size FROM data WHERE type='FishingTrip' AND (SUBSTR(date,0,10) > SUBSTR(date_add_str(now_str(), -300, 'day'),0,10))"
}
and for Catch it is:
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "IndexScan2",
"covers": [
"cover ((`data`.`type`))",
"cover ((`data`.`date`))",
"cover ((`data`.`count`))",
"cover ((meta(`data`).`id`))"
],
"index": "def_type_date_count",
"index_id": "9f49efa27e901715",
"index_projection": {
"entry_keys": [
0,
1,
2
]
},
"keyspace": "data",
"namespace": "default",
"spans": [
{
"range": [
{
"high": "\"Catch\"",
"inclusion": 3,
"low": "\"Catch\""
},
{
"inclusion": 0,
"low": "null"
},
{
"inclusion": 0,
"low": "null"
}
]
}
],
"using": "gsi"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "((cover ((`data`.`type`)) = \"Catch\") and (substr0(date_add_str(now_str(), (-300), \"day\"), 0, 10) < substr0(cover ((`data`.`date`)), 0, 10)))"
},
{
"#operator": "InitialGroup",
"aggregates": [
"sum(cover ((`data`.`count`)))"
],
"group_keys": []
}
]
}
},
{
"#operator": "IntermediateGroup",
"aggregates": [
"sum(cover ((`data`.`count`)))"
],
"group_keys": []
},
{
"#operator": "FinalGroup",
"aggregates": [
"sum(cover ((`data`.`count`)))"
],
"group_keys": []
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "InitialProject",
"result_terms": [
{
"as": "count",
"expr": "sum(cover ((`data`.`count`)))"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
"text": "SELECT SUM(count) AS count FROM data WHERE type='Catch' AND (SUBSTR(date,0,10) > SUBSTR(date_add_str(now_str(), -300, 'day'),0,10))"
}