Hey @vsr1,
Let me give you all information like structure of JSON in CB,INDEX,QUERY and NOs of records
N0. of record in CB= 300 millions
JSON Structure is below:
{
“default”: {
“active”: true,
“avatar_hierarchy”: [
{
“id”: 41730
},
{
“id”: 41738
},
{
“id”: 41768
},
{
“id”: 41824
},
{
“id”: 1010001
},
{
“id”: 41988
},
{
“id”: 42706
},
{
“id”: 215851
}
],
“category_hierarchy”: [
{
“id”: 4032
},
{
“id”: 4003
},
{
“id”: 4004
},
{
“id”: 4038
}
],
“client”: “hca”,
“content”: “Well if you can ever get an answer you might get what you need in times of need.”,
“engagement_status”: null,
“event_time”: “2017-01-06T00:00:00.000Z”,
“feedback_score”: -1,
“feedback_score_sentiment”: “negative”,
“id”: 100000001,
“location_id”: 0,
“location_name”: null,
“location_profile_picture_url”: null,
“provider_gender”: “M”,
“provider_id”: 1010001,
“provider_name”: “Randy, MD”,
“provider_profile_picture_url”: “051b23f9539b4a1d7.jpg”,
“source_hierarchy”: [
{
“id”: 100000
},
{
“id”: 100001
},
{
“id”: 100032
},
{
“id”: 100171
}
],
“source_id”: 100171,
“source_name”: “Google Plus”,
“source_score”: 0,
“survey_type”: “FEEDBACK”,
“task_escalated”: false,
“task_id”: 150224,
“task_open_for_days”: 7,
“task_owner_id”: “keith@abct.in”,
“task_owner_name”: “Rigdon, Keith”,
“task_response_type”: “O”,
“task_status_code”: “C”
}
}
secondary index:
CREATE INDEX filter ON default (client,feedback_score_sentiment,task_owner_name,DISTINCT ARRAY v.id FOR v IN avatar_hierarchy END, event_time);
Query
select d.* from default AS d
where d.event_time between “2017-01-01T00:00:00.000Z” and “2017-05-20T00:00:00.000Z”
and d.client= “hca”
AND
ANY a IN d.avatar_hierarchy SATISFIES a.id IN [41730] END AND
ANY s IN d.source_hierarchy SATISFIES s.id IN [100000,100001] END and
d.feedback_score_sentiment in [“positive”,“negative”,“unscored”]
and d.task_owner_name is null
order by d.provider_name desc limit 10 offset 10;
Explain is look like:;
[
{
“plan”: {
“#operator”: “Sequence”,
“~children”: [
{
“#operator”: “Sequence”,
“~children”: [
{
“#operator”: “DistinctScan”,
“scan”: {
“#operator”: “IndexScan”,
“index”: “filter”,
“index_id”: “9a45a6e6abd8d4a9”,
“keyspace”: “default”,
“namespace”: “default”,
“spans”: [
{
“Range”: {
“High”: [
“"hca"”,
“"negative"”,
“null”
],
“Inclusion”: 3,
“Low”: [
“"hca"”,
“"negative"”,
“null”
]
}
},
{
“Range”: {
“High”: [
“"hca"”,
“"positive"”,
“null”
],
“Inclusion”: 3,
“Low”: [
“"hca"”,
“"positive"”,
“null”
]
}
},
{
“Range”: {
“High”: [
“"hca"”,
“"unscored"”,
“null”
],
“Inclusion”: 3,
“Low”: [
“"hca"”,
“"unscored"”,
“null”
]
}
}
],
“using”: “gsi”
}
},
{
“#operator”: “Fetch”,
“as”: “d”,
“keyspace”: “default”,
“namespace”: “default”
},
{
“#operator”: “Parallel”,
“~child”: {
“#operator”: “Sequence”,
“~children”: [
{
“#operator”: “Filter”,
“condition”: “(((((((d
.event_time
) between "2017-01-01T00:00:00.000Z" and "2017-05-20T00:00:00.000Z") and ((d
.client
) = "hca")) and any a
in (d
.avatar_hierarchy
) satisfies ((a
.id
) in [41730]) end) and any s
in (d
.source_hierarchy
) satisfies ((s
.id
) in [100000, 100001]) end) and ((d
.feedback_score_sentiment
) in ["positive", "negative", "unscored"])) and ((d
.task_owner_name
) is null))”
},
{
“#operator”: “InitialProject”,
“result_terms”: [
{
“expr”: “d
”,
“star”: true
}
]
}
]
}
}
]
},
{
“#operator”: “Order”,
“limit”: “10”,
“offset”: “10”,
“sort_terms”: [
{
“desc”: true,
“expr”: “(d
.provider_name
)”
}
]
},
{
“#operator”: “Offset”,
“expr”: “10”
},
{
“#operator”: “Limit”,
“expr”: “10”
},
{
“#operator”: “FinalProject”
}
]
},
“text”: “select d.* from default AS d \nwhere d.event_time between "2017-01-01T00:00:00.000Z" and "2017-05-20T00:00:00.000Z"\nand d.client= "hca"\n AND\nANY a IN d.avatar_hierarchy SATISFIES a.id IN [41730] END AND\nANY s IN d.source_hierarchy SATISFIES s.id IN [100000,100001] END and\nd.feedback_score_sentiment in ["positive","negative","unscored"]\nand d.task_owner_name is null\norder by d.provider_name desc limit 10 offset 10;”
}
]
------------------------------------problem------------------
It taking hell out of time to return the Result and some time i need to interrupter to stop them.
Kindly guide me to make them optimized in either way,we need to test them so that we can proceed further to production or actual development.
Thanks