Very slow query with index based on variations

I have the following index:
CREATE INDEX `cleanup_flex_old_docs` ON `data1`((split((meta().`id`), "::")[0]),(meta().`id`)) WHERE ((split((meta().`id`), "::")[3]) = "eventsiteeditor-service") WITH { "defer_build"=true }

This query does not timeout (very fast response):
query: SELECT count(1) FROM data1 WHERE (split((meta().`id`), "::")[3]) = "eventsiteeditor-service" and (split((meta().`id`), "::")[0]) is not null

This one does:
query: SELECT count(1) FROM data1 WHERE (split((meta().`id`), "::")[3]) = "eventsiteeditor-service"

The explanations on the queries:

  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "IndexScan",
          "covers": [
            "cover ((split((meta(`data1`).`id`), \"::\")[0]))",
            "cover ((meta(`data1`).`id`))",
            "cover ((meta(`data1`).`id`))"
          ],
          "filter_covers": {
            "cover ((split((meta(`data1`).`id`), \"::\")[3]))": "eventsiteeditor-service"
          },
          "index": "cleanup_flex_old_docs",
          "index_id": "7c50f6ee1044af57",
          "keyspace": "data1",
          "namespace": "default",
          "spans": [
            {
              "Range": {
                "Inclusion": 0,
                "Low": [
                  "null",
                  "null"
                ]
              }
            }
          ],
          "using": "gsi"
        },
        {
          "#operator": "Parallel",
          "~child": {
            "#operator": "Sequence",
            "~children": [
              {
                "#operator": "Filter",
                "condition": "((cover ((split((meta(`data1`).`id`), \"::\")[3])) = \"eventsiteeditor-service\") and (cover ((split((meta(`data1`).`id`), \"::\")[0])) is not null))"
              },
              {
                "#operator": "InitialGroup",
                "aggregates": [
                  "count(1)"
                ],
                "group_keys": []
              }
            ]
          }
        },
        {
          "#operator": "IntermediateGroup",
          "aggregates": [
            "count(1)"
          ],
          "group_keys": []
        },
        {
          "#operator": "FinalGroup",
          "aggregates": [
            "count(1)"
          ],
          "group_keys": []
        },
        {
          "#operator": "Parallel",
          "~child": {
            "#operator": "Sequence",
            "~children": [
              {
                "#operator": "InitialProject",
                "result_terms": [
                  {
                    "expr": "count(1)"
                  }
                ]
              },
              {
                "#operator": "FinalProject"
              }
            ]
          }
        }
      ]
    },
    "text": "SELECT count(1) FROM data1  WHERE (split((meta().`id`), \"::\")[3]) = \"eventsiteeditor-service\" and (split((meta().`id`), \"::\")[0]) is not null"
  }
]```

and:
```[
  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "IndexScan",
          "covers": [
            "cover ((meta(`data1`).`id`))",
            "cover ((meta(`data1`).`id`))"
          ],
          "index": "metaid_idx_1",
          "index_id": "d3b5a4866def0c28",
          "keyspace": "data1",
          "namespace": "default",
          "spans": [
            {
              "Range": {
                "Inclusion": 0,
                "Low": [
                  "null"
                ]
              }
            }
          ],
          "using": "gsi"
        },
        {
          "#operator": "Parallel",
          "~child": {
            "#operator": "Sequence",
            "~children": [
              {
                "#operator": "Filter",
                "condition": "((split(cover ((meta(`data1`).`id`)), \"::\")[3]) = \"eventsiteeditor-service\")"
              },
              {
                "#operator": "InitialGroup",
                "aggregates": [
                  "count(1)"
                ],
                "group_keys": []
              }
            ]
          }
        },
        {
          "#operator": "IntermediateGroup",
          "aggregates": [
            "count(1)"
          ],
          "group_keys": []
        },
        {
          "#operator": "FinalGroup",
          "aggregates": [
            "count(1)"
          ],
          "group_keys": []
        },
        {
          "#operator": "Parallel",
          "~child": {
            "#operator": "Sequence",
            "~children": [
              {
                "#operator": "InitialProject",
                "result_terms": [
                  {
                    "expr": "count(1)"
                  }
                ]
              },
              {
                "#operator": "FinalProject"
              }
            ]
          }
        }
      ]
    },
    "text": "SELECT count(1) FROM data1  WHERE (split((meta().`id`), \"::\")[3]) = \"eventsiteeditor-service\""
  }
]```

The above Query can’t use the index cleanup_flex_old_docs because leading index key not present as part of WHERE clause. So it using metaid_idx_1.
You can try the following query.

SELECT count(split((meta().id), "::")[0]) FROM data1 WHERE (split((meta().id), "::")[3]) = "eventsiteeditor-service"

NOTE: Indexes uses b-tree and leading key needs to be present as part of predicate. COUNT(expr) means count non null values. So we automatically rewrite some causes query as WHERE … AND expr IS NOT NULL.

1 Like