I’ve stumbled across something that I can’t find mentioned in the documentation, but apologies if I’ve just missed it.
If I create an index with definition:
CREATE INDEX `Idx_Core_User_n`
ON `core_north-europe`(`docType`,`role`,`establishmentId`)
WHERE `docType`="User" USING GSI
…and get the EXPLAIN plan for query:
SELECT META().id
FROM `core_north-europe`
WHERE `docType`="User"
AND `establishmentId`="FaS87F40jUWzPQtVFkFGaQ"
…I get:
[
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "IndexScan",
"covers": [
"cover ((`core_north-europe`.`docType`))",
"cover ((`core_north-europe`.`role`))",
"cover ((`core_north-europe`.`establishmentId`))",
"cover ((meta(`core_north-europe`).`id`))"
],
"filter_covers": {
"cover ((`core_north-europe`.`docType`))": "User"
},
"index": "Idx_Core_User_n",
"index_id": "943ea07e4d48bf37",
"keyspace": "core_north-europe",
"namespace": "default",
"spans": [
{
"Range": {
"High": [
"successor(\"User\")"
],
"Inclusion": 1,
"Low": [
"\"User\""
]
}
}
],
"using": "gsi"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "((cover ((`core_north-europe`.`docType`)) = \"User\") and (cover ((`core_north-europe`.`establishmentId`)) = \"FaS87F40jUWzPQtVFkFGaQ\"))"
},
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "cover ((meta(`core_north-europe`).`id`))"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
"text": "SELECT META().id\n FROM `core_north-europe`\n WHERE `docType`=\"User\"\n AND `establishmentId`=\"FaS87F40jUWzPQtVFkFGaQ\""
}
]
This executes in 150ms with a result count of 11.
If however I include a value for the ‘role’ attribute in the query:
SELECT META().id
FROM `core_north-europe`
WHERE `docType`="User"
AND `establishmentId`="FaS87F40jUWzPQtVFkFGaQ" AND `role`="TeachingStaff"
…I get:
[
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "IndexScan",
"covers": [
"cover ((`core_north-europe`.`docType`))",
"cover ((`core_north-europe`.`role`))",
"cover ((`core_north-europe`.`establishmentId`))",
"cover ((meta(`core_north-europe`).`id`))"
],
"filter_covers": {
"cover ((`core_north-europe`.`docType`))": "User"
},
"index": "Idx_Core_User_n",
"index_id": "943ea07e4d48bf37",
"keyspace": "core_north-europe",
"namespace": "default",
"spans": [
{
"Range": {
"High": [
"\"User\"",
"\"TeachingStaff\"",
"\"FaS87F40jUWzPQtVFkFGaQ\""
],
"Inclusion": 3,
"Low": [
"\"User\"",
"\"TeachingStaff\"",
"\"FaS87F40jUWzPQtVFkFGaQ\""
]
}
}
],
"using": "gsi"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "(((cover ((`core_north-europe`.`docType`)) = \"User\") and (cover ((`core_north-europe`.`establishmentId`)) = \"FaS87F40jUWzPQtVFkFGaQ\")) and (cover ((`core_north-europe`.`role`)) = \"TeachingStaff\"))"
},
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "cover ((meta(`core_north-europe`).`id`))"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
"text": "SELECT META().id\n FROM `core_north-europe`\n WHERE `docType`=\"User\"\n AND `establishmentId`=\"FaS87F40jUWzPQtVFkFGaQ\" AND `role`=\"TeachingStaff\""
}
]
…which executes in 18ms with a result count of 8.
The main difference here is the range scan. In the latter, all query params are included, whereas in the former only ‘User’ is included in the range. From what I can see, these query params are only included in the range based on them lining up with the index definition.
For example, if I changed the order of the attributes to include in the index definition as follows (‘establishmentId’ first):
CREATE INDEX `Idx_Core_User_n`
ON `core_north-europe`(`docType`,`establishmentId`,`role`)
WHERE `docType`="User" USING GSI
…I can now run the first query again:
SELECT META().id
FROM `core_north-europe`
WHERE `docType`="User"
AND `establishmentId`="FaS87F40jUWzPQtVFkFGaQ"
…with EXPLAIN:
[
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "IndexScan",
"covers": [
"cover ((`core_north-europe`.`docType`))",
"cover ((`core_north-europe`.`establishmentId`))",
"cover ((`core_north-europe`.`role`))",
"cover ((meta(`core_north-europe`).`id`))"
],
"filter_covers": {
"cover ((`core_north-europe`.`docType`))": "User"
},
"index": "Idx_Core_User_n",
"index_id": "8df51bceebda4650",
"keyspace": "core_north-europe",
"namespace": "default",
"spans": [
{
"Range": {
"High": [
"\"User\"",
"successor(\"FaS87F40jUWzPQtVFkFGaQ\")"
],
"Inclusion": 1,
"Low": [
"\"User\"",
"\"FaS87F40jUWzPQtVFkFGaQ\""
]
}
}
],
"using": "gsi"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "((cover ((`core_north-europe`.`docType`)) = \"User\") and (cover ((`core_north-europe`.`establishmentId`)) = \"FaS87F40jUWzPQtVFkFGaQ\"))"
},
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "cover ((meta(`core_north-europe`).`id`))"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
"text": "SELECT META().id\n FROM `core_north-europe`\n WHERE `docType`=\"User\"\n AND `establishmentId`=\"FaS87F40jUWzPQtVFkFGaQ\""
}
]
…and execution in 20ms.
That’s 7.5x faster just by swapping the order of the index attributes to better suit one of my most executed queries.
Is there anything documented to let people know that they need to be thinking about this?
Thanks,
Fraser