Hi there,
I have documents with nested arrays (array of legs contained in array of segments):
{
"type": "test",
"id": "CUZ-LIM",
"segments": [{
"legs": [
{
"flightCode": {
"carrierCode": "LA",
"flightNumber": 2062
},
"arrival": "2018-10-08T19:43:00.000Z",
"arrivalStation": "LIM",
"departure": "2018-10-08T18:11:00.000Z",
"departureStation": "CUZ",
"key": "LA2062 CUZ#20181008T1811~LIM#20181008T1943"
}
]
}]
}
Note this is a simplified version of the actual documents, which are considerably bigger.
I have been trying to create an index and query that both are covered in order to provide decent performance, since my bucket has several hundreds of thousands of documents and fetches to the data service would make the query very slow.
The query must allow to filter by several properties of the leg arbitrarily, i.e. that I may filter by departureStation
only, departureStation
and arrivalStation
together, and so forth depending on the criteria given by the user to create the query.
I have finally succeeded with an index like this:
CREATE INDEX `nested-array-1-prop-idx` ON `index-test`(
type,
ALL ARRAY
(
ALL ARRAY leg.departureStation FOR leg IN segment.legs END
)
FOR segment IN segments END
)
WHERE (`type` = 'test')
And some examples of queries that cover the index:
SELECT meta(`index-test`).id
FROM `index-test`
WHERE type = 'test'
AND ANY segment IN segments SATISFIES
(ANY leg IN segment.legs SATISFIES leg.departureStation = 'CUZ' END)
END
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "DistinctScan",
"scan": {
"#operator": "IndexScan2",
"covers": [
"cover ((`index-test`.`type`))",
"cover ((all (array (all (array (`leg`.`departureStation`) for `leg` in (`segment`.`legs`) end)) for `segment` in (`index-test`.`segments`) end)))",
"cover ((meta(`index-test`).`id`))"
],
"filter_covers": {
"cover ((`index-test`.`type`))": "test",
"cover (any `segment` in (`index-test`.`segments`) satisfies any `leg` in (`segment`.`legs`) satisfies ((`leg`.`departureStation`) = \"CUZ\") end end)": true
},
"index": "nested-array-1-prop-idx",
"index_id": "2ad8c4a634728f0c",
"index_projection": {
"entry_keys": [
0
],
"primary_key": true
},
"keyspace": "index-test",
"namespace": "default",
"spans": [
{
"exact": true,
"range": [
{
"high": "\"test\"",
"inclusion": 3,
"low": "\"test\""
},
{
"high": "\"CUZ\"",
"inclusion": 3,
"low": "\"CUZ\""
}
]
}
],
"using": "gsi"
}
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "((cover ((`index-test`.`type`)) = \"test\") and cover (any `segment` in (`index-test`.`segments`) satisfies any `leg` in (`segment`.`legs`) satisfies ((`leg`.`departureStation`) = \"CUZ\") end end))"
},
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "cover ((meta(`index-test`).`id`))"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
"text": "SELECT meta(`index-test`).id\nFROM `index-test`\nWHERE type = 'test'\nAND ANY segment IN segments SATISFIES\n (ANY leg IN segment.legs SATISFIES leg.departureStation = 'CUZ' END)\nEND"
}
Or:
SELECT meta(`index-test`).id
FROM `index-test`
WHERE type = 'test'
AND ANY segment IN segments SATISFIES
ANY leg IN segment.legs
SATISFIES leg.departureStation IS NOT MISSING
AND leg.arrivalStation = 'LIM'
AND flightCode.carrierCode = 'LA'
AND flightCode.flightNumber = 2062
END
END
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "DistinctScan",
"scan": {
"#operator": "IndexScan2",
"covers": [
"cover ((`index-test`.`type`))",
"cover ((all (array (all (array (`leg`.`departureStation`) for `leg` in (`segment`.`legs`) end)) for `segment` in (`index-test`.`segments`) end)))",
"cover ((meta(`index-test`).`id`))"
],
"filter_covers": {
"cover ((`index-test`.`type`))": "test",
"cover (any `segment` in (`index-test`.`segments`) satisfies any `leg` in (`segment`.`legs`) satisfies (((((`leg`.`departureStation`) is not missing) and ((`leg`.`arrivalStation`) = \"LIM\")) and (((`index-test`.`flightCode`).`carrierCode`) = \"LA\")) and (((`index-test`.`flightCode`).`flightNumber`) = 2062)) end end)": true,
"cover (any `segment` in (`index-test`.`segments`) satisfies any `leg` in (`segment`.`legs`) satisfies (((`leg`.`departureStation`) is not missing) and ((`leg`.`arrivalStation`) = \"LIM\") and (((`index-test`.`flightCode`).`carrierCode`) = \"LA\") and (((`index-test`.`flightCode`).`flightNumber`) = 2062)) end end)": true
},
"index": "nested-array-1-prop-idx",
"index_id": "2ad8c4a634728f0c",
"index_projection": {
"entry_keys": [
0
],
"primary_key": true
},
"keyspace": "index-test",
"namespace": "default",
"spans": [
{
"exact": true,
"range": [
{
"high": "\"test\"",
"inclusion": 3,
"low": "\"test\""
},
{
"inclusion": 1,
"low": "null"
}
]
}
],
"using": "gsi"
}
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "((cover ((`index-test`.`type`)) = \"test\") and cover (any `segment` in (`index-test`.`segments`) satisfies any `leg` in (`segment`.`legs`) satisfies (((((`leg`.`departureStation`) is not missing) and ((`leg`.`arrivalStation`) = \"LIM\")) and (((`index-test`.`flightCode`).`carrierCode`) = \"LA\")) and (((`index-test`.`flightCode`).`flightNumber`) = 2062)) end end))"
},
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "cover ((meta(`index-test`).`id`))"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
"text": "SELECT meta(`index-test`).id\nFROM `index-test`\nWHERE type = 'test'\nAND ANY segment IN segments SATISFIES\n ANY leg IN segment.legs\n SATISFIES leg.departureStation IS NOT MISSING\n AND leg.arrivalStation = 'LIM'\n AND flightCode.carrierCode = 'LA'\n AND flightCode.flightNumber = 2062\n END\nEND"
}
Note that in my index definition I’m only indexing leg.departureStation
and the query is covered as long as leg.departureStation
is provided in the query, with a value or with IS NOT MISSING
.
I’m wondering if there is a clearer or more intuitive way of achieving the same goal.
I’m using Couchbase 5.0.
Cheers.