Hi Team, n @keshav_m,@geraldss
Currently i’m using CB v 4.5 with linux 16.04 and having approx 1 million of records, with enable FTS as well.
Below is the structure of doc(small snippet)
{
“ACTIVE_FLAG”: true,
“ANONYMOUS_FLAG”: false,
“TYPE”: “DOCUMENT”,
“AUTHOR”: {
“AUTHOR_ID”: “Jazzy4md”,
“AUTHOR_NAME”: “Jazzy4md”
},
“AUTHOR_POST_FLAG”: false,
“AVGSCORE”: [
{
“CATEGORY_ID”: 4000,
“INSIGHT”: 2,
“SCORE”: -1
},
{
“CATEGORY_ID”: 4001,
“INSIGHT”: 3,
“SCORE”: 0
},
{
“CATEGORY_ID”: 4002,
“INSIGHT”: 4,
“SCORE”: 1
},
{
“CATEGORY_ID”: 4003,
“INSIGHT”: 3,
“SCORE”: 0
},
{
“CATEGORY_ID”: 4004,
“INSIGHT”: 4,
“SCORE”: 1
},
{
“CATEGORY_ID”: 4005,
“INSIGHT”: 3,
“SCORE”: 0
}
],
“CONTENTS”: [
{
“ACTIVE_FLAG”: true,
“CREATED_BY”: “thread86@bfountain.com”,
“CREATED_TIME”: “2016-03-08T04:05:17Z”,
“ID”: 95218520,
“MD5”: “a6105c0a611b41b08f1209506350279e”,
“MODELS”: “*”,
“MODIFIED_BY”: “thread86@bfountain.com”,
“MODIFIED_TIME”: “2016-03-08T09:35:17Z”,
“NLP_FLAG”: false,
“OPTION_ID”: 1000100000,
“PHI_FLAG”: false,
“PREDEFINED”: false,
“PROFANITY_FLAG”: false,
“PUBLISHING_API_STATUS”: “H”,
“QUESTION_ID”: 1000000075,
“RESPONSE”: “yes”,
“RESPONSE_EN”: “yes”,
“STATUS”: “Q”,
“WORD_COUNT”: 0
}]
}
so we put most of statics data on AVGSCORE,
so create index is:
cbq> CREATE INDEX idx_ns_t ON default (DISTINCT ARRAY i.CATEGORY_ID FOR i IN AVGSCORE END, AVGSCORE,PERSON_ID) WHERE MENTION_TIME>=‘2001-01-01’ AND (array_length(AVGSCORE) >0) and TYPE=“DOCUMENT”;
and select query is below:
SELECT avg(i.SCORE) FROM default t use index(idx_ns_t) UNNEST t.AVGSCORE AS i WHERE (i.CATEGORY_ID =4000) AND (array_length(t.AVGSCORE) >0) AND (t.MENTION_TIME>=‘2001-01-01’) and t.TYPE=“DOCUMENT”;
but its very much slow ,time to execute is near a minute. as well some time it give me
“msg”: “Index scan timed out - cause: Index scan timed out”.
below is the explain as well,please let me know ,how can i optimized this query
explain SELECT avg(i.SCORE) FROM default t UNNEST t.AVGSCORE AS i WHERE (i.CATEGORY_ID =4000) AND (array_length(t.AVGSCORE) >0) AND (t.MENTION_TIME>=‘2001-01-01’) and t.TYPE=“DOCUMENT”;
{
“requestID”: “7a73401d-edad-4dab-a2db-230b8d1fef30”,
“signature”: “json”,
“results”: [
{
“plan”: {
“#operator”: “Sequence”,
“~children”: [
{
“#operator”: “DistinctScan”,
“scan”: {
“#operator”: “IndexScan”,
“covers”: [
“cover ((distinct (array (i
.CATEGORY_ID
) for i
in (t
.AVGSCORE
) end)))”,
“cover ((t
.AVGSCORE
))”,
“cover ((t
.PERSON_ID
))”,
“cover ((meta(t
).id
))”
],
“filter_covers”: {
“cover (("2001-01-01" \u003c= (t
.MENTION_TIME
)))”: true,
“cover ((0 \u003c array_length((t
.AVGSCORE
))))”: true,
“cover ((t
.TYPE
))”: “DOCUMENT”
},
“index”: “idx_ns_t”,
“index_id”: “66b173f89fd66046”,
“keyspace”: “default”,
“namespace”: “default”,
“spans”: [
{
“Range”: {
“High”: [
“4000”
],
“Inclusion”: 3,
“Low”: [
“4000”
]
}
}
],
“using”: “gsi”
}
},
{
“#operator”: “Parallel”,
“~child”: {
“#operator”: “Sequence”,
“~children”: [
{
“#operator”: “Unnest”,
“as”: “i”,
“expr”: “cover ((t
.AVGSCORE
))”
},
{
“#operator”: “Filter”,
“condition”: “(((((i
.CATEGORY_ID
) = 4000) and cover ((0 \u003c array_length((t
.AVGSCORE
))))) and cover (("2001-01-01" \u003c= (t
.MENTION_TIME
)))) and (cover ((t
.TYPE
)) = "DOCUMENT"))”
},
{
“#operator”: “InitialGroup”,
“aggregates”: [
“avg((i
.SCORE
))”
],
“group_keys”:
}
]
}
},
{
“#operator”: “IntermediateGroup”,
“aggregates”: [
“avg((i
.SCORE
))”
],
“group_keys”:
},
{
“#operator”: “FinalGroup”,
“aggregates”: [
“avg((i
.SCORE
))”
],
“group_keys”:
},
{
“#operator”: “Parallel”,
“~child”: {
“#operator”: “Sequence”,
“~children”: [
{
“#operator”: “InitialProject”,
“result_terms”: [
{
“expr”: “avg((i
.SCORE
))”
}
]
},
{
“#operator”: “FinalProject”
}
]
}
}
]
},
“text”: “SELECT avg(i.SCORE) FROM default t UNNEST t.AVGSCORE AS i WHERE (i.CATEGORY_ID =4000) AND (array_length(t.AVGSCORE) \u003e0) AND (t.MENTION_TIME\u003e=‘2001-01-01’) and t.TYPE="DOCUMENT"”
}
],
“status”: “success”,
“metrics”: {
“elapsedTime”: “10.377563ms”,
“executionTime”: “10.339567ms”,
“resultCount”: 1,
“resultSize”: 4622
}