LEFT OUTER JOIN + WHERE clause

Hi @geraldss, manusyone here (I can’t login in the couchbase forums for some reason…).

We’re currently using Couchbase Server Community (4.5.0). Let me know if you need more info :slight_smile: thanks!

Try Couchbase 4.5.1.

@manusyone, As @geraldss suggested it is working correctly for above query.

Are you looking applying (order.docType=“Order” OR order.docType IS MISSING) AND order.clientId=“9281ae36-a418-4ea3-93f0-bfd7b1a38248”) as part of JOIN and when not matched you want to left document and project right document as null ?
In that case drop and re-create indexes and try following.

CREATE INDEX idx_storeType ON myBucket(status, docId) WHERE docType = "Store";
CREATE INDEX idx_orderStoreId ON myBucket( "Store::" || storeId );

SELECT s.status, o.clientId, o.status as orderStatus, s.docId
FROM myBucket s LEFT OUTER JOIN myBucket o ON KEY ("Store::" || o.storeId) FOR s
LET o = CASE WHEN ((o.docType="Order" OR o.docType IS MISSING) AND o.clientId="9281ae36-a418-4ea3-93f0-bfd7b1a38248") THEN o ELSE MISSING END
WHERE s.docType="Store" AND s.status="published";
1 Like

@vsr1, yes, that is exactly what I want. But I am a bit confused on the indices you’ve created:

  • Should in the first index be WHERE docType="Order"? And do you mean clientId instead of docId?
  • What’s the rationale for the first index taking into account the first question?
  • What if I want to make additional filters (some optional) for the orders? Does that mean I’ll have to create more indices?

I think I am having a hard time to understand what’s the criteria for index creation… I would greatly appreciate your help!

@manusyone, First index idx_storeType is used for LEFT side JOIN of bucket (i.e. s (s means store, o order from your query)). Second index idx_orderStoreId is used for ON clause due to index join.

If query has predicates on LEFT side of bucket and you can move those to index.
In case of LEFT OUTER JOIN any predicates on right side bucket will eliminates NULL projected items. Above we have moved them to LET and projected as NULL.

The following article will walk through steps Designing Index for Query in Couchbase N1QL.

Hey @vsr1 , thank you for your help.

Following the steps you’ve indicated returned a lot more results (1512 vs the expected 30), with different document types:

[
  {
    "docId": "0af38eef-9d68-4852-a9c5-baed11f0162f",
    "docType": "DiscountCode",
    "status": "published",
    "orderStatus": "bound",
    "clientId": "2a4dedb0-f6b7-4232-bb55-3933b4e33976"
  },
  {
    "docId": "0af38eef-9d68-4852-a9c5-baed11f0162f",
    "docType": "DiscountCode",
    "status": "published",
    "orderStatus": "bound",
    "clientId": "e1a85700-43f0-420a-a3cc-bcc672ce2bf6"
  },
  {
    "docId": "0af38eef-9d68-4852-a9c5-baed11f0162f",
    "docType": "DiscountCode",
    "status": "published",
    "orderStatus": "bound",
    "clientId": "32dcc414-f177-41d3-a29a-d7ec96d7dc56"
  },

Here’s the EXPLAIN:

[
  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "IndexScan",
          "covers": [
            "cover ((`s`.`status`))",
            "cover ((`s`.`docId`))",
            "cover ((meta(`s`).`id`))"
          ],
          "filter_covers": {
            "cover ((`s`.`docType`))": "Store"
          },
          "index": "idx_storeType",
          "index_id": "9a16908dea855a31",
          "keyspace": "myBucket",
          "namespace": "default",
          "spans": [
            {
              "Range": {
                "High": [
                  "successor(\"published\")"
                ],
                "Inclusion": 1,
                "Low": [
                  "\"published\""
                ]
              }
            }
          ],
          "using": "gsi"
        },
        {
          "#operator": "Parallel",
          "~child": {
            "#operator": "Sequence",
            "~children": [
              {
                "#operator": "IndexJoin",
                "as": "o",
                "for": "s",
                "keyspace": "myBucket",
                "namespace": "default",
                "on_key": "(\"Store::\" || (`o`.`storeId`))",
                "outer": true,
                "scan": {
                  "index": "idx_storeId",
                  "index_id": "7593bec3ba5d1a53",
                  "using": "gsi"
                }
              },
              {
                "#operator": "Let",
                "bindings": [
                  {
                    "expr": "case when ((((`o`.`docType`) = \"Order\") or ((`o`.`docType`) is missing)) and ((`o`.`userId`) = \"9281ae36-a418-4ea3-93f0-bfd7b1a38248\")) then `o` else null end",
                    "var": "o"
                  }
                ]
              },
              {
                "#operator": "Filter",
                "condition": "((cover ((`s`.`docType`)) = \"Store\") and (cover ((`s`.`status`)) = \"published\"))"
              },
              {
                "#operator": "InitialProject",
                "result_terms": [
                  {
                    "expr": "cover ((`s`.`status`))"
                  },
                  {
                    "expr": "(`o`.`userId`)"
                  },
                  {
                    "as": "orderStatus",
                    "expr": "(`o`.`status`)"
                  },
                  {
                    "expr": "cover ((`s`.`docId`))"
                  },
                  {
                    "expr": "(`o`.`docType`)"
                  }
                ]
              },
              {
                "#operator": "FinalProject"
              }
            ]
          }
        }
      ]
    },
    "text": "SELECT s.status, o.userId, o.status as orderStatus, s.docId, o.docType\nFROM myBucket s \nLEFT OUTER JOIN myBucket o ON KEY (\"Store::\" || o.storeId) FOR s\nLET o = CASE WHEN ((o.docType=\"Order\" OR o.docType IS MISSING) AND o.userId=\"9281ae36-a418-4ea3-93f0-bfd7b1a38248\") THEN o ELSE MISSING END\nWHERE s.docType=\"Store\" AND s.status=\"published\";"
  }
]

Anything that I am still missing?

@manusyone, Try the following query.

CREATE INDEX idx_storeType ON default(status, docId) WHERE docType = "Store";
CREATE INDEX idx_orderClientId ON default( clientId,docType, status, storeId ) WHERE ((`docType` IS MISSING) OR (`docType` = "Order"));
    SELECT o[0].docType, s.status, o[0].clientId, o[0].status as orderStatus, s.docId
    FROM default s
    LET o = ARRAY v FOR v IN (SELECT o.docType, o.clientId, o.status, o.storeId FROM default o WHERE (o.docType IS MISSING OR o.docType="Order") AND o.clientId="9281ae36-a418-4ea3-93f0-bfd7b1a38248") WHEN  ("Store::" || v.storeId) = meta(s).id END
    WHERE s.docType="Store" AND s.status="published";

In above query o has array of orders same clientId and storeId in case if you have multiple orders for same client on same store. You can choose how to project it.

1 Like

Hello @vsr1.

I’ve tried the code above, and I now get all of the stores as supposed to, but with it I never get any orders, i.e. the variable o is always an empty array.

Here are some results:

[
  {
    "docId": "04750480-63f3-4fa9-bb61-22d128ac3c08",
    "status": "published"
  },
  {
    "docId": "0af38eef-9d68-4852-a9c5-baed11f0162f",
    "status": "published"
  },
  {
    "docId": "0c05dd94-a6b1-489a-af0c-53b96e0c8375",
    "status": "published"
  },

Here’s the EXPLAIN:

  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "IndexScan",
          "covers": [
            "cover ((`s`.`status`))",
            "cover ((`s`.`docId`))",
            "cover ((meta(`s`).`id`))"
          ],
          "filter_covers": {
            "cover ((`s`.`docType`))": "Store"
          },
          "index": "idx_storeType",
          "index_id": "91601f6e14dc3506",
          "keyspace": "default",
          "namespace": "default",
          "spans": [
            {
              "Range": {
                "High": [
                  "successor(\"published\")"
                ],
                "Inclusion": 1,
                "Low": [
                  "\"published\""
                ]
              }
            }
          ],
          "using": "gsi"
        },
        {
          "#operator": "Parallel",
          "~child": {
            "#operator": "Sequence",
            "~children": [
              {
                "#operator": "Let",
                "bindings": [
                  {
                    "expr": "array `v` for `v` in (select (`o`.`docType`), (`o`.`clientId`), (`o`.`status`), (`o`.`storeId`) from `default` as `o` where ((((`o`.`docType`) is missing) or ((`o`.`docType`) = \"Order\")) and ((`o`.`clientId`) = \"6019c9bc-548f-4859-907b-49cd0a5fa9ba\"))) when ((\"Store::\" || (`v`.`storeId`)) = cover ((meta(`s`).`id`))) end",
                    "var": "o"
                  }
                ]
              },
              {
                "#operator": "Filter",
                "condition": "((cover ((`s`.`docType`)) = \"Store\") and (cover ((`s`.`status`)) = \"published\"))"
              },
              {
                "#operator": "InitialProject",
                "result_terms": [
                  {
                    "expr": "((`o`[0]).`docType`)"
                  },
                  {
                    "expr": "cover ((`s`.`status`))"
                  },
                  {
                    "expr": "((`o`[0]).`clientId`)"
                  },
                  {
                    "as": "orderStatus",
                    "expr": "((`o`[0]).`status`)"
                  },
                  {
                    "expr": "cover ((`s`.`docId`))"
                  }
                ]
              },
              {
                "#operator": "FinalProject"
              }
            ]
          }
        }
      ]
    },
    "text": "SELECT o[0].docType, s.status, o[0].clientId, o[0].status as orderStatus, s.docId\nFROM default s\nLET o = ARRAY v FOR v IN (SELECT o.docType, o.clientId, o.status, o.storeId FROM default o WHERE (o.docType IS MISSING OR o.docType=\"Order\") AND o.clientId=\"6019c9bc-548f-4859-907b-49cd0a5fa9ba\") WHEN  (\"Store::\" || v.storeId) = meta(s).id END\nWHERE s.docType=\"Store\" AND s.status=\"published\";"
  }
]

I may be wrong, but it seems like the second index is not being used?

PS: In our case, a client can only have one order per store (bad analogy, I know), so we don’t need to process orders into an array.

The second index used by this query. Execute this stand alone and see what results you get.

SELECT o.docType, o.clientId, o.status, o.storeId FROM default o WHERE (o.docType IS MISSING OR o.docType="Order") AND o.clientId="9281ae36-a418-4ea3-93f0-bfd7b1a38248"

Oh. Please change bucket names all the places. I have used default

No worries, I do a quick find and replace. Here are some results (there are 45 in total now):

[
  {
    "docType": "Order",
    "storeId": "3a7059c1-41cb-4693-88c3-ea50628d8af2",
    "status": "pending",
    "clientId": "6019c9bc-548f-4859-907b-49cd0a5fa9ba"
  },
  {
    "docType": "Order",
    "storeId": "0af38eef-9d68-4852-a9c5-baed11f0162f",
    "status": "started",
    "clientId": "6019c9bc-548f-4859-907b-49cd0a5fa9ba"
  },
  {
    "docType": "Order",
    "storeId": "0ee73e49-3c1b-41f8-bd39-1e1ed256ada6",
    "status": "started",
    "clientId": "6019c9bc-548f-4859-907b-49cd0a5fa9ba"
  },

EXPLAIN here:

[
  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "IndexScan",
          "covers": [
            "cover ((`o`.`clientId`))",
            "cover ((`o`.`docType`))",
            "cover ((`o`.`status`))",
            "cover ((`o`.`storeId`))",
            "cover ((meta(`o`).`id`))"
          ],
          "index": "idx_orderClientId",
          "index_id": "57dc17b601f59883",
          "keyspace": "default",
          "namespace": "default",
          "spans": [
            {
              "Range": {
                "High": [
                  "successor(\"6019c9bc-548f-4859-907b-49cd0a5fa9ba\")"
                ],
                "Inclusion": 1,
                "Low": [
                  "\"6019c9bc-548f-4859-907b-49cd0a5fa9ba\""
                ]
              }
            }
          ],
          "using": "gsi"
        },
        {
          "#operator": "Parallel",
          "~child": {
            "#operator": "Sequence",
            "~children": [
              {
                "#operator": "Filter",
                "condition": "(((cover ((`o`.`docType`)) is missing) or (cover ((`o`.`docType`)) = \"Order\")) and (cover ((`o`.`clientId`)) = \"6019c9bc-548f-4859-907b-49cd0a5fa9ba\"))"
              },
              {
                "#operator": "InitialProject",
                "result_terms": [
                  {
                    "expr": "cover ((`o`.`docType`))"
                  },
                  {
                    "expr": "cover ((`o`.`clientId`))"
                  },
                  {
                    "expr": "cover ((`o`.`status`))"
                  },
                  {
                    "expr": "cover ((`o`.`storeId`))"
                  }
                ]
              },
              {
                "#operator": "FinalProject"
              }
            ]
          }
        }
      ]
    },
    "text": "SELECT o.docType, o.clientId, o.status, o.missionId \nFROM default o \nWHERE (o.docType IS MISSING OR o.docType=\"Order\") AND o.clientId=\"6019c9bc-548f-4859-907b-49cd0a5fa9ba\""
  }
]

Can you project meta(s).id from yesterday’s query.

Make sure WHEN clause matches meta(s).id relation of storeId

Sorry, does that mean add meta(s).id to the SELECT statement?

Yes, temporarly to outer select until we know why it is not matching

Ran this query:

SELECT meta(s).id, o[0].docType, s.status, o[0].clientId, o[0].status as orderStatus, s.docId
FROM default s
LET o = ARRAY v FOR v IN (SELECT o.docType, o.clientId, o.status, o.storeId FROM default o WHERE (o.docType IS MISSING OR o.docType="Order") AND o.clientId="9281ae36-a418-4ea3-93f0-bfd7b1a38248") WHEN  ("Store::" || v.storeId) = meta(s).id END
WHERE s.docType="Store" AND s.status="published";

Here are some results:

[
  {
    "docId": "04750480-63f3-4fa9-bb61-22d128ac3c08",
    "id": "Store::04750480-63f3-4fa9-bb61-22d128ac3c08",
    "status": "published"
  },
  {
    "docId": "0af38eef-9d68-4852-a9c5-baed11f0162f",
    "id": "Store::0af38eef-9d68-4852-a9c5-baed11f0162f",
    "status": "published"
  },
  {
    "docId": "0c05dd94-a6b1-489a-af0c-53b96e0c8375",
    "id": "Store::0c05dd94-a6b1-489a-af0c-53b96e0c8375",
    "status": "published"
  },
  {
    "docId": "0ee73e49-3c1b-41f8-bd39-1e1ed256ada6",
    "id": "Store::0ee73e49-3c1b-41f8-bd39-1e1ed256ada6",
    "status": "published"
  },

And the EXPLAIN

[
  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "IndexScan",
          "covers": [
            "cover ((`s`.`status`))",
            "cover ((`s`.`docId`))",
            "cover ((meta(`s`).`id`))"
          ],
          "filter_covers": {
            "cover ((`s`.`docType`))": "Store"
          },
          "index": "idx_storeType",
          "index_id": "91601f6e14dc3506",
          "keyspace": "default",
          "namespace": "default",
          "spans": [
            {
              "Range": {
                "High": [
                  "successor(\"published\")"
                ],
                "Inclusion": 1,
                "Low": [
                  "\"published\""
                ]
              }
            }
          ],
          "using": "gsi"
        },
        {
          "#operator": "Parallel",
          "~child": {
            "#operator": "Sequence",
            "~children": [
              {
                "#operator": "Let",
                "bindings": [
                  {
                    "expr": "array `v` for `v` in (select (`o`.`docType`), (`o`.`clientId`), (`o`.`status`), (`o`.`storeId`) from `default` as `o` where ((((`o`.`docType`) is missing) or ((`o`.`docType`) = \"Order\")) and ((`o`.`clientId`) = \"9281ae36-a418-4ea3-93f0-bfd7b1a38248\"))) when ((\"Store::\" || (`v`.`storeId`)) = cover ((meta(`s`).`id`))) end",
                    "var": "o"
                  }
                ]
              },
              {
                "#operator": "Filter",
                "condition": "((cover ((`s`.`docType`)) = \"Store\") and (cover ((`s`.`status`)) = \"published\"))"
              },
              {
                "#operator": "InitialProject",
                "result_terms": [
                  {
                    "expr": "cover ((meta(`s`).`id`))"
                  },
                  {
                    "expr": "((`o`[0]).`docType`)"
                  },
                  {
                    "expr": "cover ((`s`.`status`))"
                  },
                  {
                    "expr": "((`o`[0]).`clientId`)"
                  },
                  {
                    "as": "orderStatus",
                    "expr": "((`o`[0]).`status`)"
                  },
                  {
                    "expr": "cover ((`s`.`docId`))"
                  }
                ]
              },
              {
                "#operator": "FinalProject"
              }
            ]
          }
        }
      ]
    },
    "text": "\nSELECT meta(s).id, o[0].docType, s.status, o[0].clientId, o[0].status as orderStatus, s.docId\nFROM default s\nLET o = ARRAY v FOR v IN (SELECT o.docType, o.clientId, o.status, o.storeId FROM default o WHERE (o.docType IS MISSING OR o.docType=\"Order\") AND o.clientId=\"9281ae36-a418-4ea3-93f0-bfd7b1a38248\") WHEN  (\"Store::\" || v.storeId) = meta(s).id END\nWHERE s.docType=\"Store\" AND s.status=\"published\";"
  }
]

If it’s more preferable for you, I’d be happy to have a call with you in order to sort this out!

It looks like meta().id of the docType=“Store” storeId of docType=“Order” doesn’t match. You might used UUID() which never generates same. Please check those and you should be good.

If you want verify use the following sample data with clientId=“9281ae36-a418-4ea3-93f0-bfd7b1a38248” in query.

insert into default values ("Store::001", {"status":"published", "docType":"Store"}), ("Store::002", {"status":"published", "docType":"Store"}),("Store::003", {"status":"published", "docType":"Store"}),("Store::004", {"status":"published", "docType":"Store"}), ("Store::007", {"status":"published", "docType":"Store"});
insert into default values ("Orders::001", {"docType":"Order", "clientId":"9281ae36-a418-4ea3-93f0-bfd7b1a38248","status":"closed","storeId":"001"}), ("Orders::002", {"docType":"order", "clientId":"9281ae36-a418-4ea3-93f0-bfd7b1a38248","status":"closed","storeId":"002"}),("Orders::003", {"clientId":"9281ae36-a418-4ea3-93f0-bfd7b1a38248","status":"closed","storeId":"003"}),("Orders::004", {"docType":"Order", "clientId":"8281ae36-a418-4ea3-93f0-bfd7b1a38248","status":"closed","storeId":"004"}),("Orders::005", {"docType":"Order", "clientId":"9281ae36-a418-4ea3-93f0-bfd7b1a38248","status":"closed","storeId":"005"}), ("Orders::101", {"docType":"ppOrder", "clientId":"9281ae36-a418-4ea3-93f0-bfd7b1a38248","status":"closed","storeId":"001"});

@vsr1, It doesn’t since the meta().id also has a prefix (e.g. “Store::xxxxx”), whilst the ids inside the documents only have the id part (i.e. xxxxxx). For that reason we perform the concatenation on the query with the prefix.

If I take one of the storeIds from the results and I make the following query:

SELECT o.docType, o.clientId, o.status, o.storeId 
FROM default o 
WHERE o.docType="Order" AND o.storeId="04750480-63f3-4fa9-bb61-22d128ac3c08"

Here are some results:

[
  {
    "docType": "Order",
    "storeId": "04750480-63f3-4fa9-bb61-22d128ac3c08",
    "status": "started",
    "clientId": "834be7eb-377c-4c54-8970-8c75fc703550"
  },
  {
    "docType": "Order",
    "storeId": "04750480-63f3-4fa9-bb61-22d128ac3c08",
    "status": "started",
    "clientId": "7af9ab9a-79ad-4246-bfb7-feb2269e1ead"
  },

If I do the reverse (join Orders with Stores), then I am able to get the correct result, which should mean that the ids are matching… Right?

Please run the full query with clientId = “6019c9bc-548f-4859-907b-49cd0a5fa9ba”

(I’ve used another id, removed that client meanwhile):
Query:

SELECT o.docType, o.clientId, o.status, o.storeId 
FROM default o 
WHERE o.docType="Order" 
AND o.storeId="04750480-63f3-4fa9-bb61-22d128ac3c08"
AND o.clientId="834be7eb-377c-4c54-8970-8c75fc703550"

Result:

[
  {
    "docType": "Order",
    "storeId": "04750480-63f3-4fa9-bb61-22d128ac3c08",
    "status": "started",
    "clientId": "834be7eb-377c-4c54-8970-8c75fc703550"
  }
]

and the EXPLAIN:

[
  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "IndexScan",
          "covers": [
            "cover ((`o`.`clientId`))",
            "cover ((`o`.`docType`))",
            "cover ((`o`.`status`))",
            "cover ((`o`.`storeId`))",
            "cover ((meta(`o`).`id`))"
          ],
          "index": "idx_orderClientId",
          "index_id": "57dc17b601f59883",
          "keyspace": "default",
          "namespace": "default",
          "spans": [
            {
              "Range": {
                "High": [
                  "\"834be7eb-377c-4c54-8970-8c75fc703550\"",
                  "successor(\"Order\")"
                ],
                "Inclusion": 1,
                "Low": [
                  "\"834be7eb-377c-4c54-8970-8c75fc703550\"",
                  "\"Order\""
                ]
              }
            }
          ],
          "using": "gsi"
        },
        {
          "#operator": "Parallel",
          "~child": {
            "#operator": "Sequence",
            "~children": [
              {
                "#operator": "Filter",
                "condition": "(((cover ((`o`.`docType`)) = \"Order\") and (cover ((`o`.`storeId`)) = \"04750480-63f3-4fa9-bb61-22d128ac3c08\")) and (cover ((`o`.`clientId`)) = \"834be7eb-377c-4c54-8970-8c75fc703550\"))"
              },
              {
                "#operator": "InitialProject",
                "result_terms": [
                  {
                    "expr": "cover ((`o`.`docType`))"
                  },
                  {
                    "expr": "cover ((`o`.`clientId`))"
                  },
                  {
                    "expr": "cover ((`o`.`status`))"
                  },
                  {
                    "expr": "cover ((`o`.`storeId`))"
                  }
                ]
              },
              {
                "#operator": "FinalProject"
              }
            ]
          }
        }
      ]
    },
    "text": "SELECT o.docType, o.clientId, o.status, o.storeId \nFROM default o \nWHERE o.docType=\"Order\" AND o.storeId=\"04750480-63f3-4fa9-bb61-22d128ac3c08\" AND o.clientId=\"834be7eb-377c-4c54-8970-8c75fc703550\""
  }
]