base/containers/analyze_containers_memory_benchmark.py - chromium/src - Git at Google

 #!/usr/bin/env python3
 # Copyright 2023 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 #
 # Processes the raw output from containers_memory_usage into CSV files. Each CSV
 # file contains the results for all tested container types for a given key and
 # value type.
 #
 # Usage:
 # $ out/release/containers_memory_benchmark &> output.txt
 # $ python3 analyze_containers_memory_benchmark.py < output.txt -o bench-results

 import argparse
 from collections.abc import Sequence
 import csv
 import os.path
 import re
 import sys
 from typing import Optional


 _HEADER_RE = re.compile(r'===== (?P<name>.+) =====')
 _ITER_RE = re.compile(r'iteration (?P<iter>\d+)')
 _ALLOC_RE = re.compile(r'alloc address (?P<alloc_addr>.+) size (?P<size>\d+)')
 _FREED_RE = re.compile(r'freed address (?P<freed_addr>.+)')


 class ContainerStatsProcessor:

   def __init__(self, name: str):
     # e.g. base::flat_map
     self._name = name
     # current number of elements in the container
     self._n = None
     # map of address to size for currently active allocations. Needed because
     # the free handler only records an address, and not a size.
     self._addr_to_size = {}
     # running count of the number of bytes needed at the current iteration
     self._running_size = 0
     # map of container size to number of bytes used to store a container of that
     # size. Keys are expected to be contiguous from 0 to the total iteration
     # count.
     self._data = {}

   @property
   def name(self):
     return self._name

   @property
   def data(self):
     return self._data

   def did_alloc(self, addr: str, size: int):
     self._addr_to_size[addr] = size
     self._running_size += size

   def did_free(self, addr: str):
     size = self._addr_to_size.pop(addr)
     self._running_size -= size

   def did_iterate(self, n: int):
     if self._n is not None:
       self.flush_current_iteration_if_needed()
     self._n = n

   def flush_current_iteration_if_needed(self):
     self._data[self._n] = self._running_size


 class TestCaseProcessor:

   def __init__(self, name: str):
     # e.g. int -> std::string
     self._name = name
     # containers for which all allocation data has been processed and finalized.
     self._finalized_stats: list[ContainerStatsProcessor] = []
     # the current container being processed.
     self._current_container_stats: Optional[ContainerStatsProcessor] = None

   @property
   def current_container_stats(self):
     return self._current_container_stats

   def did_begin_container_stats(self, container_type: str):
     self._finalize_current_container_stats_if_needed()
     self._current_container_stats = ContainerStatsProcessor(container_type)

   def did_finish_container_stats(self, output_dir: str):
     self._finalize_current_container_stats_if_needed()
     with open(
         os.path.join(output_dir, f'{self._name}.csv'), 'w', newline=''
     ) as f:
       writer = csv.writer(f)
       # First the column headers...
       writer.writerow(
           ['size'] + [stats.name for stats in self._finalized_stats]
       )
       # In theory, all processed containers should have the same number of keys,
       # but assert just to be sure.
       keys = []
       for stats in self._finalized_stats:
         if not keys:
           keys = sorted(stats.data.keys())
         else:
           assert keys == sorted(stats.data.keys())
       for key in keys:
         writer.writerow(
             [key] + [stats.data[key] for stats in self._finalized_stats]
         )

   def _finalize_current_container_stats_if_needed(self):
     if self._current_container_stats:
       self._current_container_stats.flush_current_iteration_if_needed()
       self._finalized_stats.append(self._current_container_stats)
       self._current_container_stats = None


 def main(argv: Sequence[str]) -> None:
   parser = argparse.ArgumentParser(
       description='Processes raw output from containers_memory_usage into CSVs.'
   )
   parser.add_argument(
       '-o', help='directory to write CSV files to', required=True
   )
   args = parser.parse_args()

   # It would be nicer to use a ContextManager, but that complicates splitting up
   # the input and iterating through it. This is "good enough".
   processor: Optional[TestCaseProcessor] = None

   for line in sys.stdin:
     line = line.strip()
     if '->' in line:
       if processor:
         processor.did_finish_container_stats(args.o)
       processor = TestCaseProcessor(line)
       continue

     match = _HEADER_RE.match(line)
     if match:
       processor.did_begin_container_stats(match.group('name'))

     match = _ITER_RE.match(line)
     if match:
       processor.current_container_stats.did_iterate(int(match.group('iter')))
       continue

     match = _ALLOC_RE.match(line)
     if match:
       processor.current_container_stats.did_alloc(
           match.group('alloc_addr'), int(match.group('size'))
       )
       continue

     match = _FREED_RE.match(line)
     if match:
       processor.current_container_stats.did_free(match.group('freed_addr'))
       continue

   if processor:
     processor.did_finish_container_stats(args.o)


 if __name__ == '__main__':
   main(sys.argv)
	#!/usr/bin/env python3
	# Copyright 2023 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	#
	# Processes the raw output from containers_memory_usage into CSV files. Each CSV
	# file contains the results for all tested container types for a given key and
	# value type.
	#
	# Usage:
	# $ out/release/containers_memory_benchmark &> output.txt
	# $ python3 analyze_containers_memory_benchmark.py < output.txt -o bench-results

	import argparse
	from collections.abc import Sequence
	import csv
	import os.path
	import re
	import sys
	from typing import Optional


	_HEADER_RE = re.compile(r'===== (?P<name>.+) =====')
	_ITER_RE = re.compile(r'iteration (?P<iter>\d+)')
	_ALLOC_RE = re.compile(r'alloc address (?P<alloc_addr>.+) size (?P<size>\d+)')
	_FREED_RE = re.compile(r'freed address (?P<freed_addr>.+)')


	class ContainerStatsProcessor:

	def __init__(self, name: str):
	# e.g. base::flat_map
	self._name = name
	# current number of elements in the container
	self._n = None
	# map of address to size for currently active allocations. Needed because
	# the free handler only records an address, and not a size.
	self._addr_to_size = {}
	# running count of the number of bytes needed at the current iteration
	self._running_size = 0
	# map of container size to number of bytes used to store a container of that
	# size. Keys are expected to be contiguous from 0 to the total iteration
	# count.
	self._data = {}

	@property
	def name(self):
	return self._name

	@property
	def data(self):
	return self._data

	def did_alloc(self, addr: str, size: int):
	self._addr_to_size[addr] = size
	self._running_size += size

	def did_free(self, addr: str):
	size = self._addr_to_size.pop(addr)
	self._running_size -= size

	def did_iterate(self, n: int):
	if self._n is not None:
	self.flush_current_iteration_if_needed()
	self._n = n

	def flush_current_iteration_if_needed(self):
	self._data[self._n] = self._running_size


	class TestCaseProcessor:

	def __init__(self, name: str):
	# e.g. int -> std::string
	self._name = name
	# containers for which all allocation data has been processed and finalized.
	self._finalized_stats: list[ContainerStatsProcessor] = []
	# the current container being processed.
	self._current_container_stats: Optional[ContainerStatsProcessor] = None

	@property
	def current_container_stats(self):
	return self._current_container_stats

	def did_begin_container_stats(self, container_type: str):
	self._finalize_current_container_stats_if_needed()
	self._current_container_stats = ContainerStatsProcessor(container_type)

	def did_finish_container_stats(self, output_dir: str):
	self._finalize_current_container_stats_if_needed()
	with open(
	os.path.join(output_dir, f'{self._name}.csv'), 'w', newline=''
	) as f:
	writer = csv.writer(f)
	# First the column headers...
	writer.writerow(
	['size'] + [stats.name for stats in self._finalized_stats]
	)
	# In theory, all processed containers should have the same number of keys,
	# but assert just to be sure.
	keys = []
	for stats in self._finalized_stats:
	if not keys:
	keys = sorted(stats.data.keys())
	else:
	assert keys == sorted(stats.data.keys())
	for key in keys:
	writer.writerow(
	[key] + [stats.data[key] for stats in self._finalized_stats]
	)

	def _finalize_current_container_stats_if_needed(self):
	if self._current_container_stats:
	self._current_container_stats.flush_current_iteration_if_needed()
	self._finalized_stats.append(self._current_container_stats)
	self._current_container_stats = None


	def main(argv: Sequence[str]) -> None:
	parser = argparse.ArgumentParser(
	description='Processes raw output from containers_memory_usage into CSVs.'
	)
	parser.add_argument(
	'-o', help='directory to write CSV files to', required=True
	)
	args = parser.parse_args()

	# It would be nicer to use a ContextManager, but that complicates splitting up
	# the input and iterating through it. This is "good enough".
	processor: Optional[TestCaseProcessor] = None

	for line in sys.stdin:
	line = line.strip()
	if '->' in line:
	if processor:
	processor.did_finish_container_stats(args.o)
	processor = TestCaseProcessor(line)
	continue

	match = _HEADER_RE.match(line)
	if match:
	processor.did_begin_container_stats(match.group('name'))

	match = _ITER_RE.match(line)
	if match:
	processor.current_container_stats.did_iterate(int(match.group('iter')))
	continue

	match = _ALLOC_RE.match(line)
	if match:
	processor.current_container_stats.did_alloc(
	match.group('alloc_addr'), int(match.group('size'))
	)
	continue

	match = _FREED_RE.match(line)
	if match:
	processor.current_container_stats.did_free(match.group('freed_addr'))
	continue

	if processor:
	processor.did_finish_container_stats(args.o)


	if __name__ == '__main__':
	main(sys.argv)