Following this nice advice from @vsr1: How do I create an index to cope with UNNEST'ed queries? I ended up having a query that would find the last edited or created document of a specific type:
SELECT
IFMISSINGORNULL(du.modified, d.revisioninfo.created) AS modified,
IFMISSINGORNULL(du.modifiedby, d.revisioninfo.createdby) AS modifiedby,
d.revisioninfo.modifiedcount
FROM data AS d
LEFT UNNEST TO_ARRAY(d.revisioninfo.updates) AS du
WHERE d.type='FishingTrip'
ORDER BY modified DESC limit 1;
It uses this index:
CREATE INDEX `def_type_modified` ON `data`(`type`,(`revisioninfo`.`modifiedcount`),(`revisioninfo`.`created`),(`revisioninfo`.`createdby`),(`revisioninfo`.`updates`))
as one can see from the Explain
:
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Sequence",
"~children": [
{
"#operator": "IndexScan2",
"covers": [
"cover ((`d`.`type`))",
"cover (((`d`.`revisioninfo`).`modifiedcount`))",
"cover (((`d`.`revisioninfo`).`created`))",
"cover (((`d`.`revisioninfo`).`createdby`))",
"cover (((`d`.`revisioninfo`).`updates`))",
"cover ((meta(`d`).`id`))"
],
"index": "def_type_modified",
"index_id": "587679f61576574c",
"index_projection": {
"entry_keys": [
0,
1,
2,
3,
4
]
},
"keyspace": "data",
"namespace": "default",
"spans": [
{
"exact": true,
"range": [
{
"high": "\"FishingTrip\"",
"inclusion": 3,
"low": "\"FishingTrip\""
}
]
}
],
"using": "gsi"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Unnest",
"as": "du",
"expr": "to_array(cover (((`d`.`revisioninfo`).`updates`)))",
"outer": true
}
]
}
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "(cover ((`d`.`type`)) = \"FishingTrip\")"
},
{
"#operator": "InitialProject",
"result_terms": [
{
"as": "modified",
"expr": "ifmissingornull((`du`.`modified`), cover (((`d`.`revisioninfo`).`created`)))"
},
{
"as": "modifiedby",
"expr": "ifmissingornull((`du`.`modifiedby`), cover (((`d`.`revisioninfo`).`createdby`)))"
},
{
"expr": "cover (((`d`.`revisioninfo`).`modifiedcount`))"
}
]
}
]
}
}
]
},
{
"#operator": "Order",
"limit": "1",
"sort_terms": [
{
"desc": true,
"expr": "`modified`"
}
]
},
{
"#operator": "Limit",
"expr": "1"
},
{
"#operator": "FinalProject"
}
]
},
"text": "SELECT\n IFMISSINGORNULL(du.modified, d.revisioninfo.created) AS modified,\n IFMISSINGORNULL(du.modifiedby, d.revisioninfo.createdby) AS modifiedby,\n d.revisioninfo.modifiedcount\nFROM data AS d\nLEFT UNNEST TO_ARRAY(d.revisioninfo.updates) AS du\nWHERE d.type='FishingTrip'\nORDER BY modified DESC limit 1;"
}
However, now I have started playing with “live” data - and the above query takes around 1 second when I have 25-30,000 documents.
I’m suspecting that the ORDER BY modified
could be the culprit. But I’m not sure - only guessing as it is not directly part of the index. If I remove the ORDER BY...
then the response is in milliseconds!
I have tried to create the index like this (from pure trying to “unfold” the query):
CREATE INDEX `def_type_modified` ON `data`(`type`,(`revisioninfo`.`modifiedcount`),(`revisioninfo`.`created`),(`revisioninfo`.`createdby`),(`revisioninfo`.`updates`),IFMISSINGORNULL(TO_ARRAY(d.revisioninfo.updates).modified, d.revisioninfo.created))
But it does not speed up the query…
How can I improve this?