Performance Comparison of Array, Map, and Object Queries in Node.js
A few months ago, I submitted a PR #13378 to Nest to optimize the performance of the WsAdapter message handler lookup, simply changing from iterating through an array to using a Map. In fact, there is a lot of code in Nest that queries by iterating through arrays, most of which are executed only once during startup, so the performance impact is minimal. However, the code in WsAdapter is executed every time a message is received, which has a certain impact on performance. When I submitted this PR, I didn’t do performance testing - I just felt that Map query performance should be faster than iterating through an array, but I didn’t know how much faster. It wasn’t until recently when someone asked me in the PR if I had done performance testing and whether I had compared the query performance of Array, Map, and Object with different data volumes that I realized things might not be as simple as I thought. So I wrote a simple benchmark program to test.
Test Code
const { performance } = require("perf_hooks");
function generateKey() {
const length = Math.floor(Math.random() * 10) + 5;
return Math.random()
.toString(36)
.substring(2, length + 2);
}
function generateTestData(size) {
const keys = new Set();
const data = [];
for (let i = 0; i < size; i++) {
let key = generateKey();
while (keys.has(key)) {
key = generateKey();
}
keys.add(key);
data.push({ key, value: `value${i}` });
}
return data;
}
function prepareDataStructures(data) {
const arr = data;
const map = new Map(data.map((item) => [item.key, item]));
const obj = Object.fromEntries(data.map((item) => [item.key, item]));
return { arr, map, obj };
}
function runQuery(structure, key, { arr, map, obj }) {
switch (structure) {
case "array":
return arr.find((item) => item.key === key);
case "map":
return map.get(key);
case "object":
return obj[key];
}
}
function runBenchmark(dataSize, iterations) {
const data = generateTestData(dataSize);
const structures = prepareDataStructures(data);
const structureTypes = ["array", "map", "object"];
const results = {};
const queryKeys = Array.from(
{ length: iterations },
() => structures.arr[Math.floor(Math.random() * dataSize)].key
);
structureTypes.forEach((structure) => {
const start = performance.now();
queryKeys.forEach((key) => runQuery(structure, key, structures));
const end = performance.now();
results[structure] = end - start;
});
console.log(`Data size: ${dataSize}, Iterations: ${iterations}`);
structureTypes.forEach((structure) => {
console.log(`${structure}: ${results[structure]}ms`);
});
console.log();
}
const dataSizes = [10, 100, 1000, 1100, 10000, 100000, 1000000];
const iterations = 10000;
dataSizes.forEach((size) => runBenchmark(size, iterations));
Test Results
Test environment: Node.js v20.10.0
Data size: 10, Iterations: 10000
array: 4.13458251953125ms
map: 0.550999641418457ms
object: 0.47237491607666016ms
Data size: 100, Iterations: 10000
array: 2.8057079315185547ms
map: 0.23449993133544922ms
object: 0.6267919540405273ms
Data size: 1000, Iterations: 10000
array: 10.07683277130127ms
map: 0.3053750991821289ms
object: 1.0291252136230469ms
Data size: 1100, Iterations: 10000
array: 10.884374618530273ms
map: 0.2077503204345703ms
object: 0.16404247283935547ms
Data size: 10000, Iterations: 10000
array: 142.96483325958252ms
map: 1.2331666946411133ms
object: 0.2637920379638672ms
Data size: 100000, Iterations: 10000
array: 1674.5742092132568ms
map: 1.2257919311523438ms
object: 0.8130826950073242ms
Data size: 1000000, Iterations: 10000
array: 17534.821665763855ms
map: 2.955416679382324ms
object: 0.935333251953125ms
Conclusion
It’s quite obvious that at any data volume, the query performance of Map and Object is far superior to Array, which is not surprising. However, there’s another unexpected finding in the test results: the performance difference between Map and Object is quite significant. When the data volume is less than 1000, Map performs better than Object. But when the data volume exceeds 1000, Map’s performance gradually becomes inferior to Object. From the test results, we can see that at a data volume of 1100, the query performance of both Map and Object is actually better than at 1000. This suggests that when the data volume exceeds 1000, both Map and Object undergo internal optimization, but Map’s optimization is not as effective as Object’s.
We can draw a simple conclusion: use Map when the data volume is small, and use Object when the data volume is large. However, this conclusion is not absolute and may also be affected by the key values. In the test, all key values were randomly generated with varying lengths. If you’re interested, you can test the performance when key is a number, when key values have repeated prefixes, etc.
The number of message handlers in Nest’s WsAdapter is generally not very large, so simply switching to Map can improve query performance by ten times.
Comments