| #!/usr/bin/env python3 |
| # Copyright 2023 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| # |
| # Processes the raw output from containers_memory_usage into CSV files. Each CSV |
| # file contains the results for all tested container types for a given key and |
| # value type. |
| # |
| # Usage: |
| # $ out/release/containers_memory_benchmark &> output.txt |
| # $ python3 analyze_containers_memory_benchmark.py < output.txt -o bench-results |
| |
| import argparse |
| from collections.abc import Sequence |
| import csv |
| import os.path |
| import re |
| import sys |
| from typing import Optional |
| |
| |
| _HEADER_RE = re.compile(r'===== (?P<name>.+) =====') |
| _ITER_RE = re.compile(r'iteration (?P<iter>\d+)') |
| _ALLOC_RE = re.compile(r'alloc address (?P<alloc_addr>.+) size (?P<size>\d+)') |
| _FREED_RE = re.compile(r'freed address (?P<freed_addr>.+)') |
| |
| |
| class ContainerStatsProcessor: |
| |
| def __init__(self, name: str): |
| # e.g. base::flat_map |
| self._name = name |
| # current number of elements in the container |
| self._n = None |
| # map of address to size for currently active allocations. Needed because |
| # the free handler only records an address, and not a size. |
| self._addr_to_size = {} |
| # running count of the number of bytes needed at the current iteration |
| self._running_size = 0 |
| # map of container size to number of bytes used to store a container of that |
| # size. Keys are expected to be contiguous from 0 to the total iteration |
| # count. |
| self._data = {} |
| |
| @property |
| def name(self): |
| return self._name |
| |
| @property |
| def data(self): |
| return self._data |
| |
| def did_alloc(self, addr: str, size: int): |
| self._addr_to_size[addr] = size |
| self._running_size += size |
| |
| def did_free(self, addr: str): |
| size = self._addr_to_size.pop(addr) |
| self._running_size -= size |
| |
| def did_iterate(self, n: int): |
| if self._n is not None: |
| self.flush_current_iteration_if_needed() |
| self._n = n |
| |
| def flush_current_iteration_if_needed(self): |
| self._data[self._n] = self._running_size |
| |
| |
| class TestCaseProcessor: |
| |
| def __init__(self, name: str): |
| # e.g. int -> std::string |
| self._name = name |
| # containers for which all allocation data has been processed and finalized. |
| self._finalized_stats: list[ContainerStatsProcessor] = [] |
| # the current container being processed. |
| self._current_container_stats: Optional[ContainerStatsProcessor] = None |
| |
| @property |
| def current_container_stats(self): |
| return self._current_container_stats |
| |
| def did_begin_container_stats(self, container_type: str): |
| self._finalize_current_container_stats_if_needed() |
| self._current_container_stats = ContainerStatsProcessor(container_type) |
| |
| def did_finish_container_stats(self, output_dir: str): |
| self._finalize_current_container_stats_if_needed() |
| with open( |
| os.path.join(output_dir, f'{self._name}.csv'), 'w', newline='' |
| ) as f: |
| writer = csv.writer(f) |
| # First the column headers... |
| writer.writerow( |
| ['size'] + [stats.name for stats in self._finalized_stats] |
| ) |
| # In theory, all processed containers should have the same number of keys, |
| # but assert just to be sure. |
| keys = [] |
| for stats in self._finalized_stats: |
| if not keys: |
| keys = sorted(stats.data.keys()) |
| else: |
| assert keys == sorted(stats.data.keys()) |
| for key in keys: |
| writer.writerow( |
| [key] + [stats.data[key] for stats in self._finalized_stats] |
| ) |
| |
| def _finalize_current_container_stats_if_needed(self): |
| if self._current_container_stats: |
| self._current_container_stats.flush_current_iteration_if_needed() |
| self._finalized_stats.append(self._current_container_stats) |
| self._current_container_stats = None |
| |
| |
| def main(argv: Sequence[str]) -> None: |
| parser = argparse.ArgumentParser( |
| description='Processes raw output from containers_memory_usage into CSVs.' |
| ) |
| parser.add_argument( |
| '-o', help='directory to write CSV files to', required=True |
| ) |
| args = parser.parse_args() |
| |
| # It would be nicer to use a ContextManager, but that complicates splitting up |
| # the input and iterating through it. This is "good enough". |
| processor: Optional[TestCaseProcessor] = None |
| |
| for line in sys.stdin: |
| line = line.strip() |
| if '->' in line: |
| if processor: |
| processor.did_finish_container_stats(args.o) |
| processor = TestCaseProcessor(line) |
| continue |
| |
| match = _HEADER_RE.match(line) |
| if match: |
| processor.did_begin_container_stats(match.group('name')) |
| |
| match = _ITER_RE.match(line) |
| if match: |
| processor.current_container_stats.did_iterate(int(match.group('iter'))) |
| continue |
| |
| match = _ALLOC_RE.match(line) |
| if match: |
| processor.current_container_stats.did_alloc( |
| match.group('alloc_addr'), int(match.group('size')) |
| ) |
| continue |
| |
| match = _FREED_RE.match(line) |
| if match: |
| processor.current_container_stats.did_free(match.group('freed_addr')) |
| continue |
| |
| if processor: |
| processor.did_finish_container_stats(args.o) |
| |
| |
| if __name__ == '__main__': |
| main(sys.argv) |