I’m seeing a very large difference in the execution time of a parameterized SELECT statement depending on how I pass in the argument, where the argument is an array for an IN
clause.
If I POST (to http://[IP]:8093/query/service):
statement=SELECT META().id FROM `core_north-europe` WHERE docType="Group" AND groupProviderId IN ["ij8NG2_K20-gsy7zkz1jeg"] ORDER BY LOWER(name) OFFSET 0 LIMIT 20;
…I get results as expected, with the following metrics:
"metrics": {
"elapsedTime": "25.9944ms",
"executionTime": "25.9944ms",
"resultCount": 14,
"resultSize": 1008,
"sortCount": 14
}
If I change this query to a parameterized one (so that the query plan can be cached) by POSTing:
statement=SELECT META().id FROM `core_north-europe` WHERE docType="Group" AND groupProviderId IN $1 ORDER BY LOWER(name) OFFSET 0 LIMIT 20;&args=[["ij8NG2_K20-gsy7zkz1jeg"]]
…I get the same results, except it’s taken ~350x longer to execute:
"metrics": {
"elapsedTime": "9.6844907s",
"executionTime": "9.6844907s",
"resultCount": 14,
"resultSize": 1008,
"sortCount": 14
}
Here’s the EXPLAIN plans from both, respectively:
"requestID": "783eba9f-fd6d-4112-b015-29a609494bff",
"signature": "json",
"results": [
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Sequence",
"~children": [
{
"#operator": "DistinctScan",
"scan": {
"#operator": "IndexScan",
"covers": [
"cover ((`core_north-europe`.`docType`))",
"cover ((`core_north-europe`.`groupProviderId`))",
"cover ((`core_north-europe`.`members`))",
"cover ((distinct (array `m` for `m` in (`core_north-europe`.`members`) end)))",
"cover ((`core_north-europe`.`name`))",
"cover ((meta(`core_north-europe`).`id`))"
],
"filter_covers": {
"cover ((`core_north-europe`.`docType`))": "Group"
},
"index": "Idx_Core_Group_n",
"index_id": "724d8b622ec24de4",
"keyspace": "core_north-europe",
"namespace": "default",
"spans": [
{
"Range": {
"High": [
"\"Group\"",
"successor(\"ij8NG2_K20-gsy7zkz1jeg\")"
],
"Inclusion": 1,
"Low": [
"\"Group\"",
"\"ij8NG2_K20-gsy7zkz1jeg\""
]
}
}
],
"using": "gsi"
}
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "((cover ((`core_north-europe`.`docType`)) = \"Group\") and (cover ((`core_north-europe`.`groupProviderId`)) in [\"ij8NG2_K20-gsy7zkz1jeg\"]))"
},
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "cover ((meta(`core_north-europe`).`id`))"
}
]
}
]
}
}
]
},
{
"#operator": "Order",
"limit": "20",
"offset": "0",
"sort_terms": [
{
"expr": "lower(cover ((`core_north-europe`.`name`)))"
}
]
},
{
"#operator": "Offset",
"expr": "0"
},
{
"#operator": "Limit",
"expr": "20"
},
{
"#operator": "FinalProject"
}
]
},
"text": "SELECT META().id FROM `core_north-europe` WHERE docType=\"Group\" AND groupProviderId IN [\"ij8NG2_K20-gsy7zkz1jeg\"] ORDER BY LOWER(name) OFFSET 0 LIMIT 20;"
}
]
"requestID": "fe6d2df5-a24e-46f8-9971-ca28da3986ce",
"signature": "json",
"results": [
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Sequence",
"~children": [
{
"#operator": "DistinctScan",
"scan": {
"#operator": "IndexScan",
"covers": [
"cover ((`core_north-europe`.`docType`))",
"cover ((`core_north-europe`.`groupProviderId`))",
"cover ((`core_north-europe`.`members`))",
"cover ((distinct (array `m` for `m` in (`core_north-europe`.`members`) end)))",
"cover ((`core_north-europe`.`name`))",
"cover ((meta(`core_north-europe`).`id`))"
],
"filter_covers": {
"cover ((`core_north-europe`.`docType`))": "Group"
},
"index": "Idx_Core_Group_n",
"index_id": "724d8b622ec24de4",
"keyspace": "core_north-europe",
"namespace": "default",
"spans": [
{
"Range": {
"High": [
"successor(\"Group\")"
],
"Inclusion": 0,
"Low": [
"\"Group\"",
"null"
]
}
}
],
"using": "gsi"
}
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "((cover ((`core_north-europe`.`docType`)) = \"Group\") and (cover ((`core_north-europe`.`groupProviderId`)) in $1))"
},
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "cover ((meta(`core_north-europe`).`id`))"
}
]
}
]
}
}
]
},
{
"#operator": "Order",
"limit": "20",
"offset": "0",
"sort_terms": [
{
"expr": "lower(cover ((`core_north-europe`.`name`)))"
}
]
},
{
"#operator": "Offset",
"expr": "0"
},
{
"#operator": "Limit",
"expr": "20"
},
{
"#operator": "FinalProject"
}
]
},
"text": "SELECT META().id FROM `core_north-europe` WHERE docType=\"Group\" AND groupProviderId IN $1 ORDER BY LOWER(name) OFFSET 0 LIMIT 20;"
}
]
However, if I change the statements slightly, and rather than passing array arguments into an IN
, I pass a single argument into an equals comparison, things look much better, like so:
statement=SELECT META().id FROM `core_north-europe` WHERE docType="Group" AND groupProviderId="ij8NG2_K20-gsy7zkz1jeg" ORDER BY LOWER(name) OFFSET 0 LIMIT 20;
"metrics": {
"elapsedTime": "28.0058ms",
"executionTime": "28.0058ms",
"resultCount": 14,
"resultSize": 1008,
"sortCount": 14
}
…and:
statement=SELECT META().id FROM `core_north-europe` WHERE docType="Group" AND groupProviderId=$1 ORDER BY LOWER(name) OFFSET 0 LIMIT 20;&args=["ij8NG2_K20-gsy7zkz1jeg"]
"metrics": {
"elapsedTime": "26.003ms",
"executionTime": "26.003ms",
"resultCount": 14,
"resultSize": 1008,
"sortCount": 14
}
Is there something going on with the query engine parsing out array arguments for parameterized queries that explains the big difference?
I’m running 4.5.1-2806 Enterprise Edition (build-2806) on Windows Server 2012.