Hello, we’ve ran into an issue when one of our servers written in C++ using libcouchbase 3.3.8
lost connection to our couchbase cluster, using community version Community Edition 6.6.0 build 7909
.
The server does multiple lcb_get
calls with a set timeout each second. Upon losing the connection the get calls started to timeout, as expected. But simultaneously server’s memory started to grow indefinitely, going from stable 4GB to 20GB over two days, after which we manually killed it (the connection never recovered but it was a network issue on our side).
I was able to reproduce the issue using latest community edition and official example with slight modification
# on a debian(linux) host with docker
docker run -it --privileged --name=test-container -v /var/run/docker.sock:/var/run/docker.sock -v `pwd`:/sources debian:bullseye bash
# install required packages
apt update && apt -y install vim g++ valgrind wget gnupg git docker.io
# download sources and switch to 3.3.8 tag
cd /tmp && git clone https://github.com/couchbase/libcouchbase.git && cd libcouchbase && git checkout 3.3.8
# create patch
cat > instancepool.patch << EOF
diff --git a/example/instancepool/main.cc b/example/instancepool/main.cc
index 52244526..e725f10a 100644
--- a/example/instancepool/main.cc
+++ b/example/instancepool/main.cc
@@ -20,15 +20,18 @@
#include <vector>
#include <cstring>
#include <cstdlib>
+#include <signal.h>
using namespace lcb;
extern "C" {
static void get_callback(lcb_INSTANCE *instance, int, const lcb_RESPBASE *rb)
{
+ lcb_STATUS rc;
const lcb_RESPGET *rg = reinterpret_cast< const lcb_RESPGET * >(rb);
- if (lcb_respget_status(rg) != LCB_SUCCESS) {
- fprintf(stderr, "%p: Couldn't get key", instance);
+ rc = lcb_respget_status(rg);
+ if (rc != LCB_SUCCESS) {
+ fprintf(stderr, "%p: Couldn't get key: %s\n", instance, lcb_strerror_short(rc));
} else {
const char *key, *value;
size_t nkey, nvalue;
@@ -51,26 +54,36 @@ class MyPool : public Pool
// care about
fprintf(stderr, "Initializing %p\n", instance);
lcb_install_callback(instance, LCB_CALLBACK_GET, get_callback);
+ lcb_U32 tmo = 50 * 1000;
+ lcb_cntl(instance, LCB_CNTL_SET, LCB_CNTL_OP_TIMEOUT, &tmo);
}
};
+static int running = 1;
+static void sigint_handler(int unused)
+{
+ running = 0;
+}
+
extern "C" {
static void *pthr_func(void *arg)
{
Pool *pool = reinterpret_cast< Pool * >(arg);
- lcb_CMDGET *gcmd;
- lcb_cmdget_create(&gcmd);
- lcb_cmdget_key(gcmd, "foo", 3);
// Get an instance to use
lcb_INSTANCE *instance = pool->pop();
- // Issue the command
- lcb_get(instance, NULL, gcmd);
- lcb_cmdget_destroy(gcmd);
+ while (running) {
+ lcb_CMDGET *gcmd;
+ lcb_cmdget_create(&gcmd);
+ lcb_cmdget_key(gcmd, "foo", 3);
+ // Issue the command
+ lcb_get(instance, NULL, gcmd);
+ lcb_cmdget_destroy(gcmd);
- // Wait for the command to complete
- lcb_wait(instance, LCB_WAIT_DEFAULT);
+ // Wait for the command to complete
+ lcb_wait(instance, LCB_WAIT_DEFAULT);
+ }
// Release back to pool
pool->push(instance);
@@ -95,7 +108,7 @@ int main(int argc, char *argv[])
lcb_createopts_credentials(options, argv[3], strlen(argv[3]), argv[2], strlen(argv[2]));
}
- pool = new MyPool(options, 5);
+ pool = new MyPool(options, NUM_WORKERS);
err = pool->connect();
if (err != LCB_SUCCESS) {
@@ -103,6 +116,13 @@ int main(int argc, char *argv[])
exit(EXIT_FAILURE);
}
+ /* setup CTRL-C handler */
+ struct sigaction action;
+ sigemptyset(&action.sa_mask);
+ action.sa_handler = sigint_handler;
+ action.sa_flags = 0;
+ sigaction(SIGINT, &action, NULL);
+
for (size_t ii = 0; ii < NUM_WORKERS; ii++) {
pthread_create(&workers[ii], NULL, pthr_func, pool);
}
@@ -117,3 +137,4 @@ int main(int argc, char *argv[])
lcb_createopts_destroy(options);
return 0;
}
+
EOF
# apply patch
patch example/instancepool/main.cc instancepool.patch
# add couchbase repo
wget -q https://packages.couchbase.com/clients/c/repos/deb/couchbase.key -O- | apt-key add -
echo "deb https://packages.couchbase.com/clients/c/repos/deb/debian11 bullseye bullseye/main" >> /etc/apt/sources.list
apt update && apt install -y libcouchbase3 libcouchbase-dev libcouchbase-dbg
# compile modified test
g++ -std=c++17 -o /tmp/couchbase-instancepool example/instancepool/main.cc example/instancepool/pool.cc -lcouchbase -lpthread
# start and setup couchbase server
docker run -d --name db -p 8091-8096:8091-8096 -p 11210-11211:11210-11211 couchbase:community-7.2.2
sleep 10 # make sure server is initialized
docker exec db couchbase-cli cluster-init --cluster-name test-cluster --cluster-username Administrator --cluster-password 123456
sleep 10 # make sure cluster is initialized
docker exec db couchbase-cli bucket-create --cluster localhost --username Administrator --password 123456 --bucket test-bucket --bucket-type couchbase --bucket-ramsize 512
db_ip=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' db)
# start test
/tmp/couchbase-instancepool couchbase://${db_ip}/test-bucket 123456 Administrator
# in another terminal stop the DB container
docker stop db
After running for roughly two hours memory usage goes from initial ~20MB to ~600MB (RSS is in kilobytes).
$ ps -p 6381 -o %mem,rss,cmd
%MEM RSS CMD
1.8 609152 /tmp/couchbase-instancepool couchbase://172.17.0.3/test-bucket 123456 Administrator
One thing I noticed is that when the database gets brought back up docker start db
and then again back down docker stop db
the memory usage does not start to grow immediately but it takes a while. It looks to me like it is some kind of a buffer (maybe retry buffer)? But still I don’t think it should grow indefinitely, as it does.