Hello!
I am using Couchbase 4.1 CE. I have finished converting a location query that would return me a feed of information based on some distances, supposedly minimizing the number of items it needs to run on by filtering by min/max lat and lng, ordered by a sum of weight (a pre-calculated one and another one handled in real-time).
Much probably the following query may perform much better, but since I don’t have such knowledge in Couchbase, it is being such a great challenge to work with such queries. I am all open to suggestions.
One of the issues I’ve noticed is probably the need for recalculation the same two passed values at least two times. Of course one another important one is the lack of indexes, that I’ve preferred to only create at the end.
Document feed (user_account::1::feed::26):
{
"related_id": "item::5",
"_type": "feed",
"account_id": "user_account::1",
"total_weight": 0,
"created_at": 1474553980,
"action_type": "EXAMPLE_ACTION",
"_id": 26
}
Related document (item::5):
{
"title": "Testing",+
"type": "ITEM_TYPE",
"location": [
{
"lat": 38.7628539,
"lng": -9.1170388
}
],
"created_by": "user_account::1",
"_id": 5,
"_type": "item",
"created_at": 1474543977
}
My current query:
SELECT itemEntry, relatedItemDetails, distance, weight_distance, itemEntry.total_weight FROM (
SELECT itemEntry, relatedItemDetails, distance, weight_distance
FROM default itemEntry
NEST default relatedItemDetails ON KEYS (itemEntry.related_id)
/* get weight based on distance, and sum the ALIAS of it to total_weight when ordering */
LET distance = CASE
WHEN relatedItemDetails[0].location[0] IS NOT MISSING AND
relatedItemDetails[0].location[0].lat BETWEEN -0.89932160591873 AND 0.89932160591873 AND
relatedItemDetails[0].location[0].lng BETWEEN -0.89932160591873 AND 0.89932160591873
THEN (acos(sin( 0 )*sin(radians(relatedItemDetails[0].location[0].lat)) + cos( 0 )*cos(radians(relatedItemDetails[0].location[0].lat))*cos(radians(relatedItemDetails[0].location[0].lng)-( 0 ))) * 6371)
ELSE null
END,
weight_distance = CASE
WHEN relatedItemDetails[0].location[0] IS NOT MISSING AND
relatedItemDetails[0].location[0].lat BETWEEN -0.89932160591873 AND 0.89932160591873 AND
relatedItemDetails[0].location[0].lng BETWEEN -0.89932160591873 AND 0.89932160591873
THEN ((acos(sin( 0 )*sin(radians(relatedItemDetails[0].location[0].lat)) + cos( 0 )*cos(radians(relatedItemDetails[0].location[0].lat))*cos(radians(relatedItemDetails[0].location[0].lng)-( 0 ))) * 6371) / 100)
-
(acos(sin( 0 )*sin(radians(relatedItemDetails[0].location[0].lat)) + cos( 0 )*cos(radians(relatedItemDetails[0].location[0].lat))*cos(radians(relatedItemDetails[0].location[0].lng)-( 0 ))) * 6371)
ELSE 0
END
WHERE itemEntry._type = "feed" AND
itemEntry.account_id = user_account::5
) feedEntry
WHERE (feedEntry.distance <= 100 OR feedEntry.distance IS NULL)
EXPLAIN of the current SELECT:
[0] => stdClass Object
(
[#operator] => Sequence
[~children] => Array
(
[0] => stdClass Object
(
[#operator] => Sequence
[~children] => Array
(
[0] => stdClass Object
(
[#operator] => Sequence
[~children] => Array
(
[0] => stdClass Object
(
[#operator] => IndexScan
[index] => idx_type_accountid
[keyspace] => default
[namespace] => default
[spans] => Array
(
[0] => stdClass Object
(
[Range] => stdClass Object
(
[High] => Array
(
[0] => "feed"
[1] => $dkAccountId
)
[Inclusion] => 3
[Low] => Array
(
[0] => "feed"
[1] => $dkAccountId
)
)
)
)
[using] => gsi
)
[1] => stdClass Object
(
[#operator] => Parallel
[~child] => stdClass Object
(
[#operator] => Sequence
[~children] => Array
(
[0] => stdClass Object
(
[#operator] => Fetch
[as] => itemEntry
[keyspace] => default
[namespace] => default
)
[1] => stdClass Object
(
[#operator] => Nest
[as] => relatedItemDetails
[keyspace] => default
[namespace] => default
[on_keys] => (`itemEntry`.`related_id`)
)
[2] => stdClass Object
(
[#operator] => Let
[bindings] => Array
(
[0] => stdClass Object
(
[expr] => case when ((((((`relatedItemDetails`[0]).`location`)[0]) is not missing) and (((((`relatedItemDetails`[0]).`location`)[0]).`lat`) between $minLat and $maxLat)) and (((((`relatedItemDetails`[0]).`location`)[0]).`lng`) between $minLng and $maxLng)) then (acos(((sin($lat) * sin(radians(((((`relatedItemDetails`[0]).`location`)[0]).`lat`)))) + ((cos($lat) * cos(radians(((((`relatedItemDetails`[0]).`location`)[0]).`lat`)))) * cos((radians(((((`relatedItemDetails`[0]).`location`)[0]).`lng`)) - $lng))))) * $earthRadius) else null end
[variable] => distance
)
[1] => stdClass Object
(
[expr] => case when ((((((`relatedItemDetails`[0]).`location`)[0]) is not missing) and (((((`relatedItemDetails`[0]).`location`)[0]).`lat`) between $minLat and $maxLat)) and (((((`relatedItemDetails`[0]).`location`)[0]).`lng`) between $minLng and $maxLng)) then (((acos(((sin($lat) * sin(radians(((((`relatedItemDetails`[0]).`location`)[0]).`lat`)))) + ((cos($lat) * cos(radians(((((`relatedItemDetails`[0]).`location`)[0]).`lat`)))) * cos((radians(((((`relatedItemDetails`[0]).`location`)[0]).`lng`)) - $lng))))) * $earthRadius) / 100) - (acos(((sin($lat) * sin(radians(((((`relatedItemDetails`[0]).`location`)[0]).`lat`)))) + ((cos($lat) * cos(radians(((((`relatedItemDetails`[0]).`location`)[0]).`lat`)))) * cos((radians(((((`relatedItemDetails`[0]).`location`)[0]).`lng`)) - $lng))))) * $earthRadius)) else 0 end
[variable] => weight_distance
)
)
)
[3] => stdClass Object
(
[#operator] => Filter
[condition] => (((`itemEntry`.`_type`) = "feed") and ((`itemEntry`.`account_id`) = $dkAccountId))
)
[4] => stdClass Object
(
[#operator] => InitialProject
[result_terms] => Array
(
[0] => stdClass Object
(
[expr] => `itemEntry`
)
[1] => stdClass Object
(
[expr] => `relatedItemDetails`
)
[2] => stdClass Object
(
[expr] => `distance`
)
[3] => stdClass Object
(
[expr] => `weight_distance`
)
)
)
)
)
)
)
)
[1] => stdClass Object
(
[#operator] => Alias
[as] => feedEntry
)
[2] => stdClass Object
(
[#operator] => Parallel
[~child] => stdClass Object
(
[#operator] => Sequence
[~children] => Array
(
[0] => stdClass Object
(
[#operator] => Filter
[condition] => (((`feedEntry`.`distance`) <= $maxRadius) or ((`feedEntry`.`distance`) is null))
)
[1] => stdClass Object
(
[#operator] => InitialProject
[result_terms] => Array
(
[0] => stdClass Object
(
[expr] => (`feedEntry`.`itemEntry`)
)
[1] => stdClass Object
(
[expr] => (`feedEntry`.`relatedItemDetails`)
)
[2] => stdClass Object
(
[expr] => (`feedEntry`.`distance`)
)
[3] => stdClass Object
(
[expr] => (`feedEntry`.`weight_distance`)
)
[4] => stdClass Object
(
[expr] => ((`feedEntry`.`itemEntry`).`total_weight`)
)
)
)
)
)
)
)
)
[1] => stdClass Object
(
[#operator] => Order
[sort_terms] => Array
(
[0] => stdClass Object
(
[expr] => ((`feedEntry`.`total_weight`) + (`feedEntry`.`weight_distance`))
)
[1] => stdClass Object
(
[desc] => 1
[expr] => ((`feedEntry`.`itemEntry`).`created_at`)
)
)
)
[2] => stdClass Object
(
[#operator] => FinalProject
)
)
)
I feel that the topic is a bit extent, but as this may be a core feature of our system, we really need to get it done well.
I am looking for your feedback!
Thanks