Hello, i’m super recent (as of today) to CouchBase.
I have a bucket with events. Each event looks like:
{
"createdOn": "2018-04-30T17:25:17Z",
"eventType": "identification",
"payload": {
"device": "device 1",
"identificationDistance": "0.5",
"ingestionID": "4472375037",
"personIdentified": false,
"personalID": "Bastian",
"time": "2018-04-30T17:25:17Z"
}
}
I’m trying to build a query to filter events, by grouping by person (payload.personalID) and return the top most recent event per group.
I searched the forums and found some similar questions, from which i deduced this query:
SELECT MAX([db1.statedb.payload.time, db1])[1]
FROM statedb AS db1
JOIN statedb AS db2 ON KEYS META(db1).id
GROUP BY db1.payload.personalID
ORDER BY db1.payload.personalID
but i have some questions:
a) is the query correct ?
b) what does MAX([db1.statedb.payload.time, db1])[1]
do exactly ? Is is calculating the max between the time and the document object ?
c) To run the query i created a primary index on statedb since i was getting errors about missing indexes.
I understand why (i think) but what indexes would i need to optimise the query ? Its taking me 10.76s for 20k documents.
The indexes i have at the moment:
[
{
"indexes": {
"datastore_id": "http://127.0.0.1:8091",
"id": "46c36c45b9980d91",
"index_key": [
"`eventType`",
"(`payload`.`time`)",
"(`payload`.`personalID`)"
],
"keyspace_id": "statedb",
"name": "idxIdentifiedPersonsByTime",
"namespace_id": "default",
"state": "online",
"using": "gsi"
}
},
{
"indexes": {
"datastore_id": "http://127.0.0.1:8091",
"id": "1522257852e3685f",
"index_key": [
"(distinct ((`payload`.`personalID`)))"
],
"keyspace_id": "statedb",
"name": "idxUniquePersons",
"namespace_id": "default",
"state": "online",
"using": "gsi"
}
},
{
"indexes": {
"datastore_id": "http://127.0.0.1:8091",
"id": "14ca0ca86fc2d2e2",
"index_key": [
"((`db1`.`payload`).`personalID`)"
],
"keyspace_id": "statedb",
"name": "idx1",
"namespace_id": "default",
"state": "online",
"using": "gsi"
}
},
{
"indexes": {
"datastore_id": "http://127.0.0.1:8091",
"id": "231a41e303dc1851",
"index_key": [
"`eventType`"
],
"keyspace_id": "statedb",
"name": "idxEventType",
"namespace_id": "default",
"state": "online",
"using": "gsi"
}
}
]
query plan:
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Sequence",
"~children": [
{
"#operator": "PrimaryScan",
"index": "statedb-primary-index",
"keyspace": "statedb",
"namespace": "default",
"using": "gsi"
},
{
"#operator": "Fetch",
"as": "db1",
"keyspace": "statedb",
"namespace": "default"
},
{
"#operator": "Join",
"as": "db2",
"keyspace": "statedb",
"namespace": "default",
"on_keys": "(meta(`db1`).`id`)"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "InitialGroup",
"aggregates": [
"max([(((`db1`.`statedb`).`payload`).`time`), `db1`])"
],
"group_keys": [
"((`db1`.`payload`).`personalID`)"
]
}
]
}
},
{
"#operator": "IntermediateGroup",
"aggregates": [
"max([(((`db1`.`statedb`).`payload`).`time`), `db1`])"
],
"group_keys": [
"((`db1`.`payload`).`personalID`)"
]
},
{
"#operator": "FinalGroup",
"aggregates": [
"max([(((`db1`.`statedb`).`payload`).`time`), `db1`])"
],
"group_keys": [
"((`db1`.`payload`).`personalID`)"
]
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "(max([(((`db1`.`statedb`).`payload`).`time`), `db1`])[1])"
}
]
}
]
}
}
]
},
{
"#operator": "Order",
"sort_terms": [
{
"expr": "((`db1`.`payload`).`personalID`)"
}
]
},
{
"#operator": "FinalProject"
}
]
},
"text": "SELECT MAX([db1.statedb.payload.time, db1])[1]\nFROM statedb AS db1 \nJOIN statedb AS db2 ON KEYS META(db1).id\nGROUP BY db1.payload.personalID\nORDER BY db1.payload.personalID"
}
Thanks!