Trying to get an index to work for a nested array field, but running Explain on the query doesn’t use the Index.
What am I doing wrong? (I’m on couchbase version 4.5.1)
Explain seems to be using only the primary index
Index:
[1]
create index appid_idx_1 ON test_bucket (ARRAY app.value.id for app IN OBJECT_PAIRS(apps) end) using gsi
EXPLAIN SELECT * FROM test_bucket WHERE ANY app IN OBJECT_PAIRS(apps) SATISFIES app.value.id = ‘featureid0000001’ END
[2]
create index feature_userid_2_idx ON test_bucket (ARRAY (ARRAY (ARRAY feature.featureUserIdentifier.id FOR feature IN enrollInfo.features end) FOR enrollInfo IN app.value.enrollmentInfoList end) FOR app IN OBJECT_PAIRS(apps) end) using gsi
EXPLAIN SELECT * FROM test_bucket WHERE ANY app IN OBJECT_PAIRS(apps) SATISFIES ANY enrollInfo IN app.value.enrollmentInfoList SATISFIES ANY feature IN enrollInfo.features SATISFIES feature.featureUserIdentifier.id = ‘featureid0000001’ END END END
You need to add DISTINCT or ALL keyword before ARRAY in CREATE INDEX
create index appid_idx_1 ON test_bucket (DISTINCT ARRAY app.value.id for app IN OBJECT_PAIRS(apps) end) ;
Instead of OBJECT_PAIRS() you can use OBJECT_VALUES(), then you can access app.id
If you are looking "id":"featureid0000001" any where in apps try WITHIN
create index ix1 on default (DISTINCT ARRAY v.id FOR v WITHIN apps END);
select * from default ANY v WITHIN apps SATISFIES v.id = "featureid0000001" END;
Thanks @vsr1, based on this I have added a index for the nested fields, now explain looks good and is hitting the index.
But when I run this query, it takes a lot of time (25seconds). I have 200K documents
Is that the max performance I can get?
create index feature_userid_2_idx ON dummy_bucket (DISTINCT ARRAY (DISTINCT ARRAY (DISTINCT ARRAY feature.featureUserIdentifier FOR feature IN enrollInfo.features end) FOR enrollInfo IN app.enrollmentInfoList end) FOR app IN OBJECT_VALUES(dummy_bucket.apps) end) using gsi
explain:
EXPLAIN SELECT *, meta(dummy_bucket) FROM dummy_bucket WHERE ANY app IN OBJECT_VALUES(dummy_bucket.apps) SATISFIES ANY enrollInfo IN app.enrollmentInfoList SATISFIES ANY feature IN enrollInfo.features SATISFIES feature.featureUserIdentifier.domain = “dummy” AND feature.featureUserIdentifier.type = “type0001” AND feature.featureUserIdentifier.id = “dummmy00001” END END END
explain output:
[
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "DistinctScan",
"scan": {
"#operator": "IndexScan",
"index": "feature_userid_10_idx",
"index_id": "899efc6667426b37",
"keyspace": "dummy_bucket",
"namespace": "default",
"spans": [
{
"Range": {
"Inclusion": 0,
"Low": [
"null"
]
}
}
],
"using": "gsi"
}
},
{
"#operator": "Fetch",
"keyspace": "dummy_bucket",
"namespace": "default"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "any `app` in object_values((`dummy_bucket`.`apps`)) satisfies any `enrollInfo` in (`app`.`enrollmentInfoList`) satisfies any `feature` in (`enrollInfo`.`features`) satisfies ((((((`feature`.`featureUserIdentifier`).`domain`) = \"dummy\")) and (((`feature`.`featureUserIdentifier`).`type`) = \"type0001\")) and (((`feature`.`featureUserIdentifier`).`id`) = \"dummmy00001\")) end end end"
},
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "self",
"star": true
},
{
"expr": "meta((`dummy_bucket`.`notificationBucket`))"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
"text": "SELECT *, meta(notificationBucket) FROM `dummy_bucket` WHERE ANY app IN OBJECT_VALUES(`dummy_bucket`.apps) SATISFIES ANY enrollInfo IN app.enrollmentInfoList SATISFIES ANY feature IN enrollInfo.features SATISFIES feature.featureUserIdentifier.domain = \"dummy\" AND feature.featureUserIdentifier.type = \"type0001\" AND feature.featureUserIdentifier.id = \"dummmy00001\" END END END"
}
]
Index key is feature.featureUserIdentifier which is object, the relational operator in predicate needs to be on feature.featureUserIdentifier without that query needs to do complete indexScan. If feature.featureUserIdentifier.type and feature.featureUserIdentifier.domain are fixed values try the following,
CREATE INDEX ix2 ON default (DISTINCT ARRAY (DISTINCT ARRAY (DISTINCT ARRAY feature.featureUserIdentifier.id FOR feature IN enrollInfo.features WHEN feature.featureUserIdentifier.type = "type0001" AND feature.featureUserIdentifier.domain = "domain" END) FOR enrollInfo IN app.enrollmentInfoList END) FOR app IN OBJECT_VALUES(apps) END);
SELECT *, meta(d) FROM default AS d
WHERE ANY app IN OBJECT_VALUES(d.apps) SATISFIES (ANY enrollInfo IN app.enrollmentInfoList SATISFIES (ANY feature IN enrollInfo.features SATISFIES feature.featureUserIdentifier.domain = "domain" AND feature.featureUserIdentifier.type = "type0001" AND feature.featureUserIdentifier.id = "dummmy00001" END) END) END;