This is related to another question: Why does N1QL not use my new index...? - but slightly different.
In the same database I have another query that uses an array of values. When running on all data the query takes 1,5-5 seconds which is way longer than I would expect (and that will cause problems for users’ experience).
This is an example of a query running on all data:
SELECT specieskey,(SUM(IFMISSINGORNULL(d.hoursfished,0))*60 + SUM(IFMISSINGORNULL(d.minutesfished,0))) minutesfished,COUNT(*) count
FROM data d
UNNEST targetspecies AS specieskey
WHERE d.type='FishingTrip' AND d.userkey is valued
AND specieskey IN ['12','1','2','13','25','4','49','38','6','7','41']
AND d.locationtype is valued
AND d.statslocation is valued
AND d.statspublic
AND d.year is valued
AND d.month is valued
GROUP BY specieskey
And this is an example of the “same” query running for a specific user:
SELECT specieskey,(SUM(IFMISSINGORNULL(d.hoursfished,0))*60 + SUM(IFMISSINGORNULL(d.minutesfished,0))) minutesfished,COUNT(*) count
FROM data d
UNNEST targetspecies AS specieskey
WHERE d.type='FishingTrip' AND d.userkey='2124DEFEC111BA8FC1257ED20034B387'
AND specieskey IN ['12','1','2','13','25','4','49','38','6','7','41']
AND d.locationtype is valued
AND d.statslocation is valued
AND d.statspublic is valued
AND d.year is valued
AND d.month is valued
GROUP BY specieskey
The latter runs fast (10-30ms).
This is the index I have created:
CREATE INDEX stats_trip_species_time
ON data (type,userkey,targetspecies,locationtype,statslocation,statspublic,year,hoursfished,minutesfished)
WHERE type='FishingTrip'
AND userkey is valued
AND targetspecies is valued
AND locationtype is valued
AND statslocation is valued
AND statspublic is valued
AND year is valued
AND month is valued
And this is the “Explain” - which I with my limited knowledge think looks Ok?
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "IndexScan3",
"as": "d",
"covers": [
"cover ((`d`.`type`))",
"cover ((`d`.`userkey`))",
"cover ((`d`.`targetspecies`))",
"cover ((`d`.`locationtype`))",
"cover ((`d`.`statslocation`))",
"cover ((`d`.`statspublic`))",
"cover ((`d`.`year`))",
"cover ((`d`.`hoursfished`))",
"cover ((`d`.`minutesfished`))",
"cover ((meta(`d`).`id`))"
],
"filter_covers": {
"cover (((`d`.`locationtype`) is valued))": true,
"cover (((`d`.`month`) is valued))": true,
"cover (((`d`.`statslocation`) is valued))": true,
"cover (((`d`.`statspublic`) is valued))": true,
"cover (((`d`.`targetspecies`) is valued))": true,
"cover (((`d`.`userkey`) is valued))": true,
"cover (((`d`.`year`) is valued))": true,
"cover ((`d`.`type`))": "FishingTrip"
},
"index": "stats_trip_species_time",
"index_id": "a8ccbdbe70c0dc9c",
"index_projection": {
"entry_keys": [
0,
1,
2,
3,
4,
5,
6,
7,
8
]
},
"keyspace": "data",
"namespace": "default",
"spans": [
{
"range": [
{
"high": "\"FishingTrip\"",
"inclusion": 3,
"low": "\"FishingTrip\""
},
{
"inclusion": 0,
"low": "null"
},
{
"high": "{}",
"inclusion": 1,
"low": "[]"
},
{
"inclusion": 0,
"low": "null"
},
{
"inclusion": 0,
"low": "null"
},
{
"inclusion": 1,
"low": "true"
},
{
"inclusion": 0,
"low": "null"
}
]
}
],
"using": "gsi"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Unnest",
"as": "specieskey",
"expr": "cover ((`d`.`targetspecies`))"
}
]
}
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "((((((((cover ((`d`.`type`)) = \"FishingTrip\") and cover (((`d`.`userkey`) is valued))) and (`specieskey` in [\"12\", \"1\", \"2\", \"13\", \"25\", \"4\", \"49\", \"38\", \"6\", \"7\", \"41\"])) and cover (((`d`.`locationtype`) is valued))) and cover (((`d`.`statslocation`) is valued))) and cover ((`d`.`statspublic`))) and cover (((`d`.`year`) is valued))) and cover (((`d`.`month`) is valued)))"
},
{
"#operator": "InitialGroup",
"aggregates": [
"count(*)",
"sum(ifmissingornull(cover ((`d`.`hoursfished`)), 0))",
"sum(ifmissingornull(cover ((`d`.`minutesfished`)), 0))"
],
"group_keys": [
"`specieskey`"
]
}
]
}
},
{
"#operator": "IntermediateGroup",
"aggregates": [
"count(*)",
"sum(ifmissingornull(cover ((`d`.`hoursfished`)), 0))",
"sum(ifmissingornull(cover ((`d`.`minutesfished`)), 0))"
],
"group_keys": [
"`specieskey`"
]
},
{
"#operator": "FinalGroup",
"aggregates": [
"count(*)",
"sum(ifmissingornull(cover ((`d`.`hoursfished`)), 0))",
"sum(ifmissingornull(cover ((`d`.`minutesfished`)), 0))"
],
"group_keys": [
"`specieskey`"
]
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "`specieskey`"
},
{
"as": "minutesfished",
"expr": "((sum(ifmissingornull(cover ((`d`.`hoursfished`)), 0)) * 60) + sum(ifmissingornull(cover ((`d`.`minutesfished`)), 0)))"
},
{
"as": "count",
"expr": "count(*)"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
"text": "SELECT specieskey,(SUM(IFMISSINGORNULL(d.hoursfished,0))*60 + SUM(IFMISSINGORNULL(d.minutesfished,0))) minutesfished,COUNT(*) count \nFROM data d \nUNNEST targetspecies AS specieskey \nWHERE d.type='FishingTrip' AND d.userkey is valued \nAND specieskey IN ['12','1','2','13','25','4','49','38','6','7','41'] \nAND d.locationtype is valued \nAND d.statslocation is valued \nAND d.statspublic \nAND d.year is valued \nAND d.month is valued \nGROUP BY specieskey"
}
Any advice on how I can improve the response times for the “all” query is much appreciated. Do I have to do anything in particular to handle the array of species keys? Thanks in advance!
Running on Couchbase Server 6.0.0 (build 1693) Community Edition.