Hi @makeawish
Eventing can be ideal to do this it is similar to the enrichment example (refer to Data Enrichment | Couchbase Docs).
For you specific needs I made an Eventing Function to generate a test set of 1M test documents like this
KEY: 00000fa3-5680-407b-9a0f-af9a17fb5baf
{
"_id": "00000fa3-5680-407b-9a0f-af9a17fb5baf",
"_type": "Residential",
"Changes": [61.86272836625059, 62.68936110399503, 8.105578035303562],
"Images": [83.07921141724484, 76.44777315916298, 3.523482294843383],
"Record": {
"Address": {
"Latitude": 42.900352929601496,
"Longitude": -109.21260185980817,
"other_address": 0.3689035282651276
},
"other_record": 0.7962077930432989
}
}
These 1,000,000 documents were made via this one time Eventing function which is single threaded so I only got 7K test docs per seconds. But here I don’t care about speed just making a test set.
Now deploy this function (from Everything) let run until the ‘test’ bucket has 1M docs and we have 1 processed mutation then undeploy it FYI I named the Eventing Function “gen_geo_docs”
// source bucket 'control'
// bucket alias test_bkt to bucket 'test' in r+w mode
// 1 worker, script timeout at 600 sec.
// mutate 'gen_geo_docs::1' in bucket 'control' to make 1M docs
function randFloat(min,max) {
return Math.random() * (max - min) + min;
}
function createUUID() {
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {http://192.168.3.150:8091/ui/index.html#/eventing/summary?scenarioZoom=minute
var r = Math.random() * 16 | 0, v = c == 'x' ? r : (r & 0x3 | 0x8);
return v.toString(16);
});
}
function OnUpdate(doc, meta) {
if (meta.id !== "gen_geo_docs::1") return;
var DOCS_TO_MAKE = 1000000;
for (var i = 0; i < DOCS_TO_MAKE; i++) {
if (i % 10000 == 0) {
log('generated ' + i + ' docs');
}
var newdoc = {
"_id": createUUID(),
"_type": "Residential",
"Changes": [randFloat(0, 100), randFloat(0, 100), randFloat(0, 100)],
"Images": [randFloat(0, 100), randFloat(0, 100), randFloat(0, 100)],
"Record": {
"Address": {
"Latitude": randFloat(10.0, 50.0),
"Longitude": randFloat(-122.9, -100.2),
"other_address": Math.random()
},
"other_record": Math.random()
}
};
try {
test_bkt[newdoc._id] = newdoc;
} catch (e) {
log('ERROR on '+i,e);
}
}
}
Okay so we have our test set ready to enrich so we have documents like the following
KEY: 00000fa3-5680-407b-9a0f-af9a17fb5baf
{
"_id": "00000fa3-5680-407b-9a0f-af9a17fb5baf",
"_type": "Residential",
"Changes": [61.86272836625059, 62.68936110399503, 8.105578035303562],
"Images": [83.07921141724484, 76.44777315916298, 3.523482294843383],
"Record": {
"Address": {
"Latitude": 42.900352929601496,
"Longitude": -109.21260185980817,
"other_address": 0.3689035282651276
},
"other_record": 0.7962077930432989
}
}
And we want to transform them into something new by adding your geo field.
"geo": [-109.21260185980817,42.900352929601496],
To the existing document (we could delete the OLD info from the address too if we want.
KEY: 00000fa3-5680-407b-9a0f-af9a17fb5baf
{
"_id": "00000fa3-5680-407b-9a0f-af9a17fb5baf",
"_type": "Residential",
"Changes": [61.86272836625059, 62.68936110399503, 8.105578035303562],
"Images": [83.07921141724484, 76.44777315916298, 3.523482294843383],
"Record": {
"Address": {
"Latitude": 42.900352929601496,
"Longitude": -109.21260185980817,
"other_address": 0.3689035282651276
},
"other_record": 0.7962077930432989
},
"geo": [-109.21260185980817,42.900352929601496],
}
So we make a new Eventing function with 8 workers and we can process 45K docs a seconds I will call this Eventing Function “enrich_geo_docs”. Note this Eventing Function without comments is just five (5) lines of code.
function OnUpdate(doc, meta) {
if (doc._type !== "Residential") return;
// make new lon,lat property
doc.geo = [ doc.Record.Longitude, doc.Record.Latitude ];
// optional delete unneeded properties
// delete doc.Record.Longitude;
// delete doc.Record.Latitude;
// for 6.5+ update source bucket directly
test_bkt[meta.id] = doc;
}
Now deploy the above (make sure the test generator is undeployed) it let it run in about 25 seconds it will have enriched 1M docs and our set will contain items as follows:
KEY: 00000fa3-5680-407b-9a0f-af9a17fb5baf
{
"_id": "00000fa3-5680-407b-9a0f-af9a17fb5baf",
"_type": "Residential",
"Changes": [61.86272836625059, 62.68936110399503, 8.105578035303562],
"Images": [83.07921141724484, 76.44777315916298, 3.523482294843383],
"Record": {
"Address": {
"Latitude": 42.900352929601496,
"Longitude": -109.21260185980817,
"other_address": 0.3689035282651276
},
"other_record": 0.7962077930432989
},
"geo": [-109.21260185980817, 42.900352929601496]
}
If I alter the function again
// make new lon,lat property (comment doc.geo = .., and uncomment the delete ... statements)
// doc.geo = [ doc.Record.Address.Longitude, doc.Record.Address.Latitude ];
// optional delete unneeded properties
delete doc.Record.Address.Longitude;
delete doc.Record.Address.Latitude;
And undeploy then (re) deploy the function and in 25 seconds we get the following:
KEY: 00000fa3-5680-407b-9a0f-af9a17fb5baf
{
"_id": "00000fa3-5680-407b-9a0f-af9a17fb5baf",
"_type": "Residential",
"Changes": [61.86272836625059, 62.68936110399503, 8.105578035303562],
"Images": [83.07921141724484, 76.44777315916298, 3.523482294843383],
"Record": {
"Address": {
"other_address": 0.3689035282651276
},
"other_record": 0.7962077930432989
},
"geo": [-109.21260185980817, 42.900352929601496]
}
Note, if you leave eventing functions running they will auto update or enrich all new documents.
You could put further protections like checking for field existence, etc. or short circuit if doc.geo already exists. I would imagine you want a final Eventing function like:
function OnUpdate(doc, meta) {
if (doc._type !== "Residential") return;
if (doc.geo) return; // do not redo what we have already done.
// make new lon,lat property
doc.geo = [ doc.Record.Address.Longitude, doc.Record.Address.Latitude ];
// optional delete unneeded properties
delete doc.Record.Address.Longitude;
delete doc.Record.Address.Latitude;
// for 6.5+ update source bucket directly
test_bkt[meta.id] = doc;
}
Hope this helps
Jon Strabala