I have a large data set in the format:
data = [{ a: 12, b: 8 }, { a: 2, c: 4, d: 14 }, { c: 2, e: 4, f: 14 }]
What I want is an object containing the sum of all keys (here a-f) and their values in the dataset, like this:
{ a: 14, b: 8, c: 6, d: 14, e: 4, f: 14 }
I can get the desired result like this:
function sum(a, b) { return a + b }; function countTotal(n) { let ndata = data.filter((i) => Object.keys(i).includes(n)) let cnt = Object.assign(ndata.map((i) => i[n])).reduce(sum); return {[n]:cnt}; }; let names = 'abcdef'.split('') let res = Array.from(names).map((n) => countTotal(n)) res = Object.assign({}, ...res);
My problem is that this takes a long time for the actual data set I have (which is quite large). Is there a way to do this more efficiently?
Some of the code below does create a large dummy dataset that approximates the real dataset.
let dummy_names = []; for (let i = 0; i < 2000; i++) { dummy_names.push((Math.random() + 1).toString(36).slice(2,7)); }; dummy_names = [...new Set(dummy_names)]; names = new Set(); function makeResponses() { let responses = {}; let idx = 0; for (let j = 0; j <= Math.floor(Math.random() * 7); j++) { idx = Math.floor(Math.random()*dummy_names.length); inam = dummy_names[idx]; names.add(inam); responses[inam] = Math.floor(Math.random()*20); }; return responses; }; let data = []; for (let i = 0; i < 20000; i++) { data.push(makeResponses()); };
P粉9764880152024-02-18 15:15:02
I will use a helper object to keep track of the sum and loop through the objects in the array.
The most important thing is to only look at each value once to keep complexity low (in O notation). There are many ways to iterate. I don't know whether it is faster for a for loop or .forEach
.
Here is a rough solution:
const data = [{a: 12, b: 8}, {a: 2, c: 4, d: 14}, {c: 2, e: 4, f: 14}]; const sums = {}; data.forEach(object => { Object.entries(object).forEach(([key, value]) => { if (sums.hasOwnProperty(key)) { sums[key] += value; } else { sums[key] = value; } }); }); console.log(sums);