I have the following N1QL query that takes me 10 seconds
SELECT multimedia,titles.title,publicationDate
FROM data
WHERE (sectionId="90"
OR sectionId="93"
OR sectionId="94")
AND publicationDate <= '2021-09-15 16:45:04'
AND published=TRUE
AND type="news"
AND META().id LIKE "editorial::%"
ORDER BY publicationDate DESC
LIMIT 500
The slowness is due to “ORDER BY”. without ORDER BY it takes only 60ms.
It is a bucket with 1300000 items. 100% RAM resident
Index contains 1000000 item and weighs 186.6 MB, 100% RAM resident ratio, 0 requests/seg
Enterprise Edition 6.6.2 build 9598. CPU and RAM idle servers. No load.
The index:
CREATE INDEX `data-idx` ON
`data`(`sectionId`,`publicationDate` DESC,`type`,`published`,`isAmazonOnsite`,`updatedOn`)
WHERE ((META().`id`) LIKE "editorial::%") WITH { "num_replica":1 }
The Plan do a IndexScan3 (#itemsOut - 157043 (quick)) → but need to make a fetch of 157043 (slow) → filter od 157043 (slow) → to finally make the ORDER (quick) → LIMIT (quick)
If the “publicationDate DESC” field is already indexed, why doesn’t the indexScan3 return the data already filtered, sorted and limited to 500. without fetching to the data service?
How could it improve the speed?
{
"#operator": "Sequence",
"#stats": {
"#phaseSwitches": 1,
"execTime": "17.781µs"
},
"~children": [
{
"#operator": "Authorize",
"#stats": {
"#phaseSwitches": 3,
"execTime": "14.58µs",
"servTime": "1.806162ms"
},
"privileges": {
"List": [
{
"Target": "default:data",
"Priv": 7
}
]
},
"~child": {
"#operator": "Sequence",
"#stats": {
"#phaseSwitches": 1,
"execTime": "3.83µs"
},
"~children": [
{
"#operator": "Sequence",
"#stats": {
"#phaseSwitches": 1,
"execTime": "2.21µs"
},
"~children": [
{
"#operator": "IndexScan3",
"#stats": {
"#itemsOut": 157043,
"#phaseSwitches": 628175,
"execTime": "297.608181ms",
"kernTime": "9.287087961s",
"servTime": "57.198871ms"
},
"index": "data-idx-editorial-portadillas",
"index_id": "6f1e7d080213883f",
"index_projection": {
"primary_key": true
},
"keyspace": "data",
"namespace": "default",
"spans": [
{
"exact": true,
"range": [
{
"high": "\"90\"",
"inclusion": 3,
"low": "\"90\""
},
{
"high": "\"2021-09-15 16:45:04\"",
"inclusion": 2,
"low": "null"
},
{
"high": "\"news\"",
"inclusion": 3,
"low": "\"news\""
},
{
"high": "true",
"inclusion": 3,
"low": "true"
}
]
},
{
"exact": true,
"range": [
{
"high": "\"93\"",
"inclusion": 3,
"low": "\"93\""
},
{
"high": "\"2021-09-15 16:45:04\"",
"inclusion": 2,
"low": "null"
},
{
"high": "\"news\"",
"inclusion": 3,
"low": "\"news\""
},
{
"high": "true",
"inclusion": 3,
"low": "true"
}
]
},
{
"exact": true,
"range": [
{
"high": "\"94\"",
"inclusion": 3,
"low": "\"94\""
},
{
"high": "\"2021-09-15 16:45:04\"",
"inclusion": 2,
"low": "null"
},
{
"high": "\"news\"",
"inclusion": 3,
"low": "\"news\""
},
{
"high": "true",
"inclusion": 3,
"low": "true"
}
]
}
],
"using": "gsi",
"#time_normal": "00:00.354",
"#time_absolute": 0.354807052
},
{
"#operator": "Fetch",
"#stats": {
"#itemsIn": 157043,
"#itemsOut": 157043,
"#phaseSwitches": 647807,
"execTime": "351.800071ms",
"kernTime": "1.04608118s",
"servTime": "8.273478909s"
},
"keyspace": "data",
"namespace": "default",
"#time_normal": "00:08.625",
"#time_absolute": 8.62527898
},
{
"#operator": "Sequence",
"#stats": {
"#phaseSwitches": 1,
"execTime": "1.24µs"
},
"~children": [
{
"#operator": "Filter",
"#stats": {
"#itemsIn": 157043,
"#itemsOut": 157043,
"#phaseSwitches": 628175,
"execTime": "9.078187605s",
"kernTime": "617.803648ms"
},
"condition": "((((((((`data`.`sectionId`) = \"90\") or ((`data`.`sectionId`) = \"93\")) or ((`data`.`sectionId`) = \"94\")) and ((`data`.`publicationDate`) <= \"2021-09-15 16:45:04\")) and ((`data`.`published`) = true)) and ((`data`.`type`) = \"news\")) and ((meta(`data`).`id`) like \"editorial::%\"))",
"#time_normal": "00:09.078",
"#time_absolute": 9.078187605
},
{
"#operator": "InitialProject",
"#stats": {
"#itemsIn": 157043,
"#itemsOut": 157043,
"#phaseSwitches": 628175,
"execTime": "3.452829516s",
"kernTime": "6.243215938s"
},
"result_terms": [
{
"expr": "(`data`.`multimedia`)"
},
{
"expr": "((`data`.`titles`).`title`)"
},
{
"expr": "(`data`.`publicationDate`)"
}
],
"#time_normal": "00:03.452",
"#time_absolute": 3.452829516
}
],
"#time_normal": "00:00.000",
"#time_absolute": 0.00000124
}
],
"#time_normal": "00:00.000",
"#time_absolute": 0.00000221
},
{
"#operator": "Order",
"#stats": {
"#itemsIn": 157043,
"#itemsOut": 500,
"#phaseSwitches": 314591,
"execTime": "937.039252ms",
"kernTime": "8.760740031s"
},
"limit": "500",
"sort_terms": [
{
"desc": true,
"expr": "(`data`.`publicationDate`)"
}
],
"#time_normal": "00:00.937",
"#time_absolute": 0.937039252
},
{
"#operator": "Limit",
"#stats": {
"#itemsIn": 500,
"#itemsOut": 500,
"#phaseSwitches": 1001,
"execTime": "154.614µs"
},
"expr": "500",
"#time_normal": "00:00.000",
"#time_absolute": 0.000154614
},
{
"#operator": "FinalProject",
"#stats": {
"#itemsIn": 500,
"#itemsOut": 500,
"#phaseSwitches": 1501,
"execTime": "354.527µs",
"kernTime": "9.370922ms"
},
"#time_normal": "00:00.000",
"#time_absolute": 0.000354527
}
],
"#time_normal": "00:00.000",
"#time_absolute": 0.00000383
},
"#time_normal": "00:00.001",
"#time_absolute": 0.001820742
},
{
"#operator": "Stream",
"#stats": {
"#itemsIn": 500,
"#itemsOut": 500,
"#phaseSwitches": 1003,
"execTime": "8.182236ms",
"kernTime": "9.701608428s"
},
"#time_normal": "00:00.008",
"#time_absolute": 0.008182235999999999
}
],
"~versions": [
"6.5.0-N1QL",
"6.6.2-9598-enterprise"
],
"#time_normal": "00:00.000",
"#time_absolute": 0.000017780999999999998
}
A curiosity:
- If I filter with OR disjunctions: 'WHERE (sectionId=“90” OR sectionId=“93” OR sectionId=“94”)'
takes 10 seconds - If I filter without OR disjunctions: 'WHERE (sectionId=“90”)'
take only 40ms(because it returns the 500 results directly from the IndexScan3 )
# 40ms
SELECT multimedia,titles.title,publicationDate
FROM data
WHERE (sectionId="90")
AND publicationDate <= '2021-09-15 16:45:04'
AND published=TRUE
AND type="news"
AND META().id LIKE "editorial::%"
ORDER BY publicationDate DESC
LIMIT 500
Notes:
- I need the ORDER and the ORs
- I could create as leading key “publicationDate DESC” and improve a lot the time of this query, but the reality is that this index is also used in more types of queries, And the users can select the fields by which he wants to sort or search.
- A “Covering Index” (all the fields specified in the query) improves the times, but it is not an option either, because some queries return free text fields and descriptions (for which we do not filter) that would make the index gigantic.
Thanks