[go: nahoru, domu]

Orderfile: Simple call-graph-based orderfile.

This CL adds simple function call graph clustering. The call graph is
inferred from the function ordering offset list, and the orderfile is
produced by clustering based on this inferred call graph.

In local testing, this change recovers the speedometer performance
lost in the bug. Startup is not affected, and memory shows similar
improvement to the original system_health orderfile.

Bug: 894827
Change-Id: Iedab652d0f25c9c9ea481c2a75214cac41dee368
Reviewed-on: https://chromium-review.googlesource.com/c/1350876
Commit-Queue: Matthew Cary <mattcary@chromium.org>
Reviewed-by: Benoit L <lizeb@chromium.org>
Reviewed-by: Egor Pasko <pasko@chromium.org>
Cr-Commit-Position: refs/heads/master@{#612644}
diff --git a/tools/cygprofile/cluster.py b/tools/cygprofile/cluster.py
new file mode 100644
index 0000000..b20372ae
--- /dev/null
+++ b/tools/cygprofile/cluster.py
@@ -0,0 +1,275 @@
+# Copyright 2018 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Clustering for function call-graph.
+
+See the Clustering class for a detailed description.
+"""
+
+import collections
+import itertools
+import logging
+
+
+Neighbor = collections.namedtuple('Neighbor', ('src', 'dst', 'dist'))
+
+
+class Clustering(object):
+  """Cluster symbols.
+
+  We are given a list of the first function calls, ordered by
+  time. There are multiple lists: different benchmarks run multiple
+  times, as well as list from startup and then a second list after
+  startup (5 seconds) that runs until the benchmark memory dump.
+
+  We have evidence (see below) that this simple ordering of code from a
+  single profiling run (a load of a website) improves performance,
+  presumably by improving code locality. To reconstruct this ordering
+  using profiling information from multiple files, we cluster. Doing
+  this clustering over multiple runs on the speedometer benchmark
+  recovered speedometer performance compared with the legacy benchmark.
+
+  For each offset list, we record the distances between each symbol and
+  its neighborhood of the following k symbols (k=19, chosen
+  arbitrarily). For example, if we have an offset list of symbols
+  'abcdef', we add the neighbors (a->b, 1), (a->c, 2), (b->c, 1), (b->e,
+  3), etc. Then we average distances of a given neighbor pair over all
+  seen symbol lists. If we see an inversion (for example, (b->a, 3), we
+  use this as a distance of -3). For each file that a given pair does
+  not appear, that is, if the pair does not appear in that file or they
+  are separated by 20 symbols, we use a large distance D (D=1000). The
+  distances are then averages over all files. If the average is
+  negative, the neighbor pair is inverted and the distance flipped. The
+  idea is that if two symbols appear near each other in all profiling
+  runs, there is high confidence that they are usually called
+  together. If they don't appear near in some runs, there is less
+  confidence that they should be colocated. Symbol distances are taken
+  only as following distances to avoid confusing double-counting
+  possibilities as well as to give a clear ordering to combining
+  clusters.
+
+  Neighbors are sorted, and starting with the shortest distance, symbols
+  are coalesced into clusters. If the neighbor pair is (a->b), the
+  clusters containing a and b are combined in that order. If a and b are
+  already in the same cluster, nothing happens. After processing all
+  neighbors there is usually only one cluster; if there are multiple
+  clusters they are combined in order from largest to smallest (although
+  that choice may not matter).
+
+  Cluster merging may optionally be halted if they get above the size
+  of an android page. As of November 2018 this slightly reduces
+  performance and should not be used (1.7% decline in speedometer2,
+  450K native library memory regression).
+  """
+  NEIGHBOR_DISTANCE = 20
+  FAR_DISTANCE = 1000
+  MAX_CLUSTER_SIZE = 4096  # 4k pages on android.
+
+  class _Cluster(object):
+    def __init__(self, syms, size):
+      assert len(set(syms)) == len(syms), 'Duplicated symbols in cluster'
+      self._syms = syms
+      self._size = size
+
+    @property
+    def syms(self):
+      return self._syms
+
+    @property
+    def binary_size(self):
+      return self._size
+
+  @classmethod
+  def ClusteredSymbolLists(cls, sym_lists, size_map):
+    c = cls()
+    c.AddSymbolLists(sym_lists)
+    return c.ClusterToList(size_map)
+
+  def __init__(self):
+    self._num_lists = None
+    self._neighbors = None
+    self._cluster_map = {}
+    self._symbol_size = lambda _: 0  # Maps a symbol to a size.
+
+  def _MakeCluster(self, syms):
+    c = self._Cluster(syms, sum(self._symbol_size(s) for s in syms))
+    for s in syms:
+      self._cluster_map[s] = c
+    return c
+
+  def ClusterOf(self, s):
+    if isinstance(s, self._Cluster):
+      assert self._cluster_map[s.syms[0]] == s
+      return s
+    if s in self._cluster_map:
+      return self._cluster_map[s]
+    return self._MakeCluster([s])
+
+  def Combine(self, a, b):
+    """Combine clusters.
+
+    Args:
+      a, b: Clusters or str. The canonical cluster (ClusterOf) will be
+        used to do the combining.
+
+    Returns:
+      A merged cluster from a and b, or None if a and b are in the same cluster.
+    """
+    canonical_a = self.ClusterOf(a)
+    canonical_b = self.ClusterOf(b)
+    if canonical_a == canonical_b:
+      return None
+    return self._MakeCluster(canonical_a._syms + canonical_b._syms)
+
+  def AddSymbolLists(self, sym_lists):
+    self._num_lists = len(sym_lists)
+    self._neighbors = self._CoalesceNeighbors(
+        self._ConstructNeighbors(sym_lists))
+
+  def _ConstructNeighbors(self, sym_lists):
+    neighbors = []
+    for sym_list in sym_lists:
+      for i, s in enumerate(sym_list):
+        for j in xrange(i + 1, min(i + self.NEIGHBOR_DISTANCE, len(sym_list))):
+          if s == sym_list[j]:
+            # Free functions that are static inline seem to be the only
+            # source of these duplicates.
+            continue
+          neighbors.append(Neighbor(s, sym_list[j], j - i))
+    logging.info('Constructed %s symbol neighbors', len(neighbors))
+    return neighbors
+
+  def _CoalesceNeighbors(self, neighbors):
+    pairs = collections.defaultdict(list)
+    for n in neighbors:
+      pairs[(n.src, n.dst)].append(n.dist)
+    coalesced = []
+    logging.info('Will coalesce over %s neighbor pairs', len(pairs))
+    count = 0
+    for (s, t) in pairs:
+      assert s != t, '{} != {}'.format(s, t)
+      if (t, s) in pairs and t < s:
+        # Only process each unordered pair once.
+        continue
+      count += 1
+      if not (count % 1e6):
+        logging.info('tick')
+      distances = []
+      if (s, t) in pairs:
+        distances.extend(pairs[(s, t)])
+      if (t, s) in pairs:
+        distances.extend(-d for d in pairs[(t, s)])
+      if distances:
+        num_missing = self._num_lists - len(distances)
+        avg_distance = (float(sum(distances)) +
+                        self.FAR_DISTANCE * num_missing) / self._num_lists
+        if avg_distance > 0:
+          coalesced.append(Neighbor(s, t, avg_distance))
+        else:
+          coalesced.append(Neighbor(t, s, avg_distance))
+    return coalesced
+
+  def ClusterToList(self, size_map=None):
+    """Merge the clusters with the smallest distances.
+
+    Args:
+      size_map ({symbol: size} or None): Map symbol names to their size. Cluster
+        growth will be stopped at MAX_CLUSTER_SIZE. If None, sizes are taken to
+        be zero and cluster growth is not stopped.
+
+    Returns:
+      An ordered list of symbols from AddSymbolLists, appropriately clustered.
+    """
+    if size_map:
+      self._symbol_size = lambda s: size_map[s]
+    if not self._num_lists or not self._neighbors:
+      # Some sort of trivial set of symbol lists, such as all being
+      # length 1. Return an empty ordering.
+      return []
+    logging.info('Sorting %s neighbors', len(self._neighbors))
+    self._neighbors.sort(key=lambda n: (-n.dist, n.src, n.dst))
+    logging.info('Clustering...')
+    count = 0
+    while self._neighbors:
+      count += 1
+      if not (count % 1e6):
+        logging.info('tock')
+      neighbor = self._neighbors.pop()
+      src = self.ClusterOf(neighbor.src)
+      dst = self.ClusterOf(neighbor.dst)
+      if (src == dst or
+          src.binary_size + dst.binary_size > self.MAX_CLUSTER_SIZE):
+        continue
+      self.Combine(src, dst)
+    if size_map:
+      clusters_by_size = sorted(list(set(self._cluster_map.values())),
+                                key=lambda c: -c.binary_size)
+    else:
+      clusters_by_size = sorted(list(set(self._cluster_map.values())),
+                                key=lambda c: -len(c.syms))
+    logging.info('Produced %s clusters', len(clusters_by_size))
+    logging.info('Top sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size)
+                                   for c in clusters_by_size[:4]])
+    logging.info('Bottom sizes: %s', ['{}/{}'.format(len(c.syms), c.binary_size)
+                                      for c in clusters_by_size[-4:]])
+    ordered_syms = []
+    for c in clusters_by_size:
+      ordered_syms.extend(c.syms)
+    assert len(ordered_syms) == len(set(ordered_syms)), 'Duplicated symbols!'
+    return ordered_syms
+
+
+def ClusterOffsets(profiles, processor, limit_cluster_size=False):
+  """Cluster profile offsets.
+
+  Args:
+    profiles (ProfileManager) Manager of the profile dump files.
+    processor (SymbolOffsetProcessor) Symbol table processor for the dumps.
+
+  Returns:
+    A list of clustered symbol offsets.
+  """
+  raw_offsets = profiles.GetProcessOffsetLists()
+  process_symbols = collections.defaultdict(list)
+  seen_symbols = set()
+  for p in raw_offsets:
+    for offsets in raw_offsets[p]:
+      symbol_names = processor.GetOrderedSymbols(
+          processor.GetReachedOffsetsFromDump(offsets))
+      process_symbols[p].append(symbol_names)
+      seen_symbols |= set(symbol_names)
+  if limit_cluster_size:
+    name_map = processor.NameToSymbolMap()
+    size_map = {name: name_map[name].size for name in seen_symbols}
+  else:
+    size_map = None
+
+  # Process names from the profile dumps that are treated specially.
+  _RENDERER = 'renderer'
+  _BROWSER = 'browser'
+
+  assert _RENDERER in process_symbols
+  assert _BROWSER in process_symbols
+
+  renderer_clustering = Clustering.ClusteredSymbolLists(
+      process_symbols[_RENDERER], size_map)
+  browser_clustering = Clustering.ClusteredSymbolLists(
+      process_symbols[_BROWSER], size_map)
+  other_lists = []
+  for process, syms in process_symbols.items():
+    if process not in (_RENDERER, _BROWSER):
+      other_lists.extend(syms)
+  if other_lists:
+    other_clustering = Clustering.ClusteredSymbolLists(other_lists, size_map)
+  else:
+    other_clustering = []
+
+  # Start with the renderer cluster to favor rendering performance.
+  final_ordering = [s for s in renderer_clustering]
+  seen = set(final_ordering)
+  final_ordering.extend(s for s in browser_clustering if s not in seen)
+  seen |= set(browser_clustering)
+  final_ordering.extend(s for s in other_clustering if s not in seen)
+
+  return final_ordering
diff --git a/tools/cygprofile/cluster_unittest.py b/tools/cygprofile/cluster_unittest.py
new file mode 100755
index 0000000..249811c
--- /dev/null
+++ b/tools/cygprofile/cluster_unittest.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env vpython
+# Copyright 2018 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tests for cluster.py."""
+
+import unittest
+
+import cluster
+import process_profiles
+from test_utils import (ProfileFile,
+                        SimpleTestSymbol,
+                        TestProfileManager,
+                        TestSymbolOffsetProcessor)
+
+
+class ClusteringTestCase(unittest.TestCase):
+  def testClusterOf(self):
+    clstr = cluster.Clustering()
+    c = clstr.ClusterOf('a')
+    self.assertEqual(['a'], c.syms)
+    c = clstr._MakeCluster(['a', 'b', 'c'])
+    self.assertEqual(c, clstr.ClusterOf('a'))
+    self.assertEqual(c, clstr.ClusterOf('b'))
+    self.assertEqual(c, clstr.ClusterOf('c'))
+
+  def testClusterCombine(self):
+    clstr = cluster.Clustering()
+    x = clstr._MakeCluster(['a', 'b'])
+    self.assertEqual(x, clstr.ClusterOf('a'))
+    self.assertEqual(x, clstr.ClusterOf('b'))
+
+    y = clstr._MakeCluster(['c'])
+    self.assertEqual(y, clstr.ClusterOf('c'))
+
+    z = clstr.Combine(y, x)
+    self.assertEqual(['c', 'a', 'b'], z.syms)
+    self.assertEqual(z, clstr.ClusterOf('a'))
+    self.assertEqual(z, clstr.ClusterOf('b'))
+    self.assertEqual(z, clstr.ClusterOf('c'))
+
+  def testClusteringDistances(self):
+    c = cluster.Clustering()
+    c.NEIGHBOR_DISTANCE = 3
+    c.AddSymbolLists([list('abcd'), list('acbe'), list('bacf'),
+                      list('badf'), list('baef')])
+    distances = {}
+    for n in c._neighbors:
+      self.assertFalse((n.src, n.dst) in distances)
+      distances[(n.src, n.dst)] = n.dist
+    self.assertEqual(13, len(distances))
+    self.assertEqual((2 + 1 + 1 + 2000) / 5., distances[('a', 'c')])
+    self.assertEqual((1 + 4000) / 5., distances[('a', 'd')])
+    self.assertEqual((1 + 4000) / 5., distances[('a', 'e')])
+    self.assertEqual((2 + 2 + 2 + 2000) / 5., distances[('a', 'f')])
+    self.assertEqual(0, distances[('b', 'a')])
+    self.assertEqual((1 + -1 + 2 + 2000) / 5., distances[('b', 'c')])
+    self.assertTrue(('b', 'd') in distances)
+    self.assertTrue(('b', 'e') in distances)
+    self.assertTrue(('c', 'd') in distances)
+    self.assertTrue(('c', 'e') in distances)
+    self.assertTrue(('c', 'f') in distances)
+    self.assertTrue(('d', 'f') in distances)
+    self.assertTrue(('e', 'f') in distances)
+
+  def testClusterToList(self):
+    c = cluster.Clustering()
+    c.NEIGHBOR_DISTANCE = 3
+    c.AddSymbolLists([list('abcd'), list('acbe'), list('bacf'),
+                      list('badf'), list('baef')])
+    self.assertEqual(list('bacfed'), c.ClusterToList())
+
+  def testClusterOneList(self):
+    c = cluster.Clustering()
+    c.NEIGHBOR_DISTANCE = 3
+    c.AddSymbolLists([list('fedcba')])
+    self.assertEqual(list('fedcba'), c.ClusterToList())
+
+  def testClusterShortList(self):
+    c = cluster.Clustering()
+    c.NEIGHBOR_DISTANCE = 3
+    c.AddSymbolLists([list('ab')])
+    self.assertEqual(list('ab'), c.ClusterToList())
+
+  def testClusterReallyShortList(self):
+    c = cluster.Clustering()
+    c.NEIGHBOR_DISTANCE = 3
+    c.AddSymbolLists([list('a')])
+    self.assertEqual([], c.ClusterToList())
+
+  def testSizedClusterToList(self):
+    c = cluster.Clustering()
+    c.NEIGHBOR_DISTANCE = 3
+    c.MAX_CLUSTER_SIZE = 1  # Will supress all clusters
+    size_map = {'a': 3,
+                'b': 4,
+                'c': 5,
+                'd': 6,
+                'e': 7,
+                'f': 8}
+    c.AddSymbolLists([list('abcd'), list('acbe'), list('bacf'),
+                      list('badf'), list('baef')])
+    self.assertEqual(list('fedcba'), c.ClusterToList(size_map))
+
+
+  def testClusterOffsets(self):
+    processor = TestSymbolOffsetProcessor([
+        SimpleTestSymbol('linker_script_start_of_text', 0, 0),
+        SimpleTestSymbol('1', 1000, 999),
+        SimpleTestSymbol('2', 2000, 999),
+        SimpleTestSymbol('3', 3000, 999),
+        SimpleTestSymbol('4', 4000, 16),
+        SimpleTestSymbol('5', 5000, 16),
+        SimpleTestSymbol('6', 6000, 999),
+        SimpleTestSymbol('7', 7000, 16),
+        SimpleTestSymbol('8', 8000, 999),
+        SimpleTestSymbol('9', 9000, 16),
+    ])
+    mgr = TestProfileManager({
+        ProfileFile(40, 0, ''): [1000, 2000, 3000],
+        ProfileFile(50, 1, ''): [3000, 4000, 5000],
+        ProfileFile(51, 0, 'renderer'): [2000, 3000, 6000],
+        ProfileFile(51, 1, 'gpu-process'): [6000, 7000],
+        ProfileFile(70, 0, ''): [1000, 2000, 6000, 8000, 9000],
+        ProfileFile(70, 1, ''): [9000, 5000, 3000]})
+    syms = cluster.ClusterOffsets(mgr, processor, False)
+    self.assertListEqual(list('236148957'), syms)
+
+    syms = cluster.ClusterOffsets(mgr, processor, True)
+    self.assertListEqual(list('236489517'), syms)
+
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/tools/cygprofile/orderfile_generator_backend.py b/tools/cygprofile/orderfile_generator_backend.py
index 12d7091..54d5a34d 100755
--- a/tools/cygprofile/orderfile_generator_backend.py
+++ b/tools/cygprofile/orderfile_generator_backend.py
@@ -27,10 +27,10 @@
 import tempfile
 import time
 
+import cluster
 import cyglog_to_orderfile
 import cygprofile_utils
 import patch_orderfile
-import phased_orderfile
 import process_profiles
 import profile_android_startup
 import symbol_extractor
@@ -527,25 +527,11 @@
     profiles = process_profiles.ProfileManager(files)
     processor = process_profiles.SymbolOffsetProcessor(
         self._compiler.lib_chrome_so)
-    phaser = phased_orderfile.PhasedAnalyzer(profiles, processor)
-    if self._options.offsets_for_memory:
-      profile_offsets = phaser.GetOffsetsForMemoryFootprint()
-    else:
-      profile_offsets = phaser.GetOffsetsForStartup()
-    self._output_data['orderfile_size'] = {
-        'startup_kib': processor.OffsetsPrimarySize(
-            profile_offsets.startup) / 1024,
-        'common_kib': processor.OffsetsPrimarySize(
-            profile_offsets.common) / 1024,
-        'interaction_kib': processor.OffsetsPrimarySize(
-            profile_offsets.interaction) / 1024}
-
-    offsets_list = (profile_offsets.startup +
-                    profile_offsets.common +
-                    profile_offsets.interaction)
-    ordered_symbols = processor.GetOrderedSymbols(offsets_list)
+    ordered_symbols= cluster.ClusterOffsets(profiles, processor)
     if not ordered_symbols:
       raise Exception('Failed to get ordered symbols')
+    self._output_data['offsets_kib'] = processor.SymbolsSize(
+            ordered_symbols) / 1024
     with open(self._GetUnpatchedOrderfileFilename(), 'w') as orderfile:
       orderfile.write('\n'.join(ordered_symbols))
 
@@ -654,7 +640,7 @@
     Args:
       filename: (str) Orderfile to upload.
     """
-    # First compute hashes so that we can download them later if we need to
+    # First compute hashes so that we can download them later if we need to.
     self._step_recorder.BeginStep('Compute hash for ' + filename)
     self._RecordHash(filename)
     if self._options.buildbot:
@@ -830,10 +816,6 @@
   parser.add_argument('--monochrome', action='store_true',
                       help=('Compile and instrument monochrome (for post-N '
                             'devices).'))
-  parser.add_argument('--offsets-for-memory', action='store_true',
-                      help=('Favor memory savings in the orderfile. Used '
-                            'with --system-health-orderfile.'),
-                      default=False)
 
   parser.add_argument('--manual-symbol-offsets', default=None, type=str,
                       help=('File of list of ordered symbol offsets generated '
diff --git a/tools/cygprofile/phased_orderfile.py b/tools/cygprofile/phased_orderfile.py
deleted file mode 100755
index 73b20b9..0000000
--- a/tools/cygprofile/phased_orderfile.py
+++ /dev/null
@@ -1,326 +0,0 @@
-#!/usr/bin/env vpython
-# Copyright 2018 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Utilities for creating a phased orderfile.
-
-The profile dump format is described in process_profiles.py. These tools assume
-profiling has been done with two phases.
-
-The first phase, labeled 0 in the filename, is called "startup" and the second,
-labeled 1, is called "interaction". These two phases are used to create an
-orderfile with three parts: the code touched only in startup, the code
-touched only during interaction, and code common to the two phases. We refer to
-these parts as the orderfile phases.
-
-Example invocation, with PROFILE_DIR the location of the profile data pulled
-from a device and LIBTYPE either monochrome or chrome as appropriate.
-./tools/cygprofile/phased_orderfile.py \
-    --profile-directory=PROFILE_DIR \
-    --instrumented-build-dir=out-android/Orderfile/ \
-    --library-name=libLIBTYPE.so --offset-output-base=PROFILE_DIR/offset
-"""
-
-import argparse
-import collections
-import glob
-import itertools
-import logging
-import os.path
-
-import process_profiles
-
-
-# Files matched when using this script to analyze directly (see main()).
-PROFILE_GLOB = 'profile-hitmap-*.txt_*'
-
-
-OrderfilePhaseOffsets = collections.namedtuple(
-    'OrderfilePhaseOffsets', ('startup', 'common', 'interaction'))
-
-
-class PhasedAnalyzer(object):
-  """A class which collects analysis around phased orderfiles.
-
-  It maintains common data such as symbol table information to make analysis
-  more convenient.
-  """
-  # The process name of the browser as used in the profile dumps.
-  BROWSER = 'browser'
-
-  def __init__(self, profiles, processor):
-    """Intialize.
-
-    Args:
-      profiles (ProfileManager) Manager of the profile dump files.
-      processor (SymbolOffsetProcessor) Symbol table processor for the dumps.
-    """
-    self._profiles = profiles
-    self._processor = processor
-
-    # These members cache various computed values.
-    self._phase_offsets = None
-    self._annotated_offsets = None
-    self._process_list = None
-
-  def GetOffsetsForMemoryFootprint(self):
-    """Get offsets organized to minimize the memory footprint.
-
-    The startup, common and interaction offsets are computed for each
-    process. Any symbols used by one process in startup or interaction that are
-    used in a different phase by another process are moved to the common
-    section. This should minimize the memory footprint by keeping startup- or
-    interaction-only pages clean, at the possibly expense of startup time, as
-    more of the common section will need to be loaded. To mitigate that effect,
-    symbols moved from startup are placed at the beginning of the common
-    section, and those moved from interaction are placed at the end.
-
-    Browser startup symbols are placed at the beginning of the startup section
-    in the hope of working out with native library prefetching to minimize
-    startup time.
-
-    Returns:
-      OrdrerfilePhaseOffsets as described above.
-    """
-    startup = []
-    common_head = []
-    common = []
-    common_tail = []
-    interaction = []
-
-    process_offsets = {p: self._GetCombinedProcessOffsets(p)
-                       for p in self._GetProcessList()}
-    assert self.BROWSER in process_offsets.keys()
-
-    any_startup = set()
-    any_interaction = set()
-    any_common = set()
-    for offsets in process_offsets.itervalues():
-      any_startup |= set(offsets.startup)
-      any_interaction |= set(offsets.interaction)
-      any_common |= set(offsets.common)
-
-    already_added = set()
-    # This helper function splits |offsets|, adding to |alternate| all offsets
-    # that are in |interfering| or are already known to be common, and otherwise
-    # adding to |target|.
-    def add_process_offsets(offsets, interfering, target, alternate):
-      for o in offsets:
-        if o in already_added:
-          continue
-        if o in interfering or o in any_common:
-          alternate.append(o)
-        else:
-          target.append(o)
-        already_added.add(o)
-
-    # This helper updates |common| with new members of |offsets|.
-    def add_common_offsets(offsets):
-      for o in offsets:
-        if o not in already_added:
-          common.append(o)
-          already_added.add(o)
-
-    add_process_offsets(process_offsets[self.BROWSER].startup,
-                        any_interaction, startup, common_head)
-    add_process_offsets(process_offsets[self.BROWSER].interaction,
-                        any_startup, interaction, common_tail)
-    add_common_offsets(process_offsets[self.BROWSER].common)
-
-    for p in process_offsets:
-      if p == self.BROWSER:
-        continue
-      add_process_offsets(process_offsets[p].startup,
-                          any_interaction, startup, common_head)
-      add_process_offsets(process_offsets[p].interaction,
-                          any_startup, interaction, common_tail)
-      add_common_offsets(process_offsets[p].common)
-
-    return OrderfilePhaseOffsets(
-        startup=startup,
-        common=(common_head + common + common_tail),
-        interaction=interaction)
-
-  def GetOffsetsForStartup(self):
-    """Get offsets organized to minimize startup time.
-
-    The startup, common and interaction offsets are computed for each
-    process. Any symbol used by one process in interaction that appears in a
-    different phase in another process is moved to common, but any symbol that
-    appears in startup for *any* process stays in startup.
-
-    This should maximize startup performance at the expense of increasing the
-    memory footprint, as some startup symbols will not be able to page out.
-
-    The startup symbols in the browser process appear first in the hope of
-    working out with native library prefetching to minimize startup time.
-    """
-    startup = []
-    common = []
-    interaction = []
-    already_added = set()
-
-    process_offsets = {p: self._GetCombinedProcessOffsets(p)
-                       for p in self._GetProcessList()}
-    startup.extend(process_offsets[self.BROWSER].startup)
-    already_added |= set(process_offsets[self.BROWSER].startup)
-    common.extend(process_offsets[self.BROWSER].common)
-    already_added |= set(process_offsets[self.BROWSER].common)
-    interaction.extend(process_offsets[self.BROWSER].interaction)
-    already_added |= set(process_offsets[self.BROWSER].interaction)
-
-    for process, offsets in process_offsets.iteritems():
-      if process == self.BROWSER:
-        continue
-      startup.extend(o for o in offsets.startup
-                     if o not in already_added)
-      already_added |= set(offsets.startup)
-      common.extend(o for o in offsets.common
-                     if o not in already_added)
-      already_added |= set(offsets.common)
-      interaction.extend(o for o in offsets.interaction
-                     if o not in already_added)
-      already_added |= set(offsets.interaction)
-
-    return OrderfilePhaseOffsets(
-        startup=startup, common=common, interaction=interaction)
-
-  def _GetCombinedProcessOffsets(self, process):
-    """Combine offsets across runs for a particular process.
-
-    Args:
-      process (str) The process to combine.
-
-    Returns:
-      OrderfilePhaseOffsets, the startup, common and interaction offsets for the
-      process in question. The offsets are sorted arbitrarily.
-    """
-    (startup, common, interaction) = ([], [], [])
-    assert self._profiles.GetPhases() == set([0,1]), (
-        'Unexpected phases {}'.format(self._profiles.GetPhases()))
-    for o in self._GetAnnotatedOffsets():
-      startup_count = o.Count(0, process)
-      interaction_count = o.Count(1, process)
-      if not startup_count and not interaction_count:
-        continue
-      if startup_count and interaction_count:
-        common.append(o.Offset())
-      elif startup_count:
-        startup.append(o.Offset())
-      else:
-        interaction.append(o.Offset())
-    return OrderfilePhaseOffsets(
-        startup=startup, common=common, interaction=interaction)
-
-  def _GetAnnotatedOffsets(self):
-    if self._annotated_offsets is None:
-      self._annotated_offsets = self._profiles.GetAnnotatedOffsets()
-      self._processor.TranslateAnnotatedSymbolOffsets(self._annotated_offsets)
-      # A warning for missing offsets has already been emitted in
-      # TranslateAnnotatedSymbolOffsets.
-      self._annotated_offsets = filter(
-          lambda offset: offset.Offset() is not None,
-          self._annotated_offsets)
-    return self._annotated_offsets
-
-  def _GetProcessList(self):
-    if self._process_list is None:
-      self._process_list = set()
-      for o in self._GetAnnotatedOffsets():
-        self._process_list.update(o.Processes())
-    return self._process_list
-
-  def _GetOrderfilePhaseOffsets(self):
-    """Compute the phase offsets for each run.
-
-    Returns:
-      [OrderfilePhaseOffsets] Each run corresponds to an OrderfilePhaseOffsets,
-          which groups the symbol offsets discovered in the runs.
-    """
-    if self._phase_offsets is not None:
-      return self._phase_offsets
-
-    assert self._profiles.GetPhases() == set([0, 1]), (
-        'Unexpected phases {}'.format(self._profiles.GetPhases()))
-    self._phase_offsets = []
-    for first, second in zip(self._profiles.GetRunGroupOffsets(phase=0),
-                             self._profiles.GetRunGroupOffsets(phase=1)):
-      all_first_offsets = self._processor.GetReachedOffsetsFromDump(first)
-      all_second_offsets = self._processor.GetReachedOffsetsFromDump(second)
-      first_offsets_set = set(all_first_offsets)
-      second_offsets_set = set(all_second_offsets)
-      common_offsets_set = first_offsets_set & second_offsets_set
-      first_offsets_set -= common_offsets_set
-      second_offsets_set -= common_offsets_set
-
-      startup = [x for x in all_first_offsets
-                 if x in first_offsets_set]
-
-      interaction = [x for x in all_second_offsets
-                     if x in second_offsets_set]
-
-      common_seen = set()
-      common = []
-      for x in itertools.chain(all_first_offsets, all_second_offsets):
-        if x in common_offsets_set and x not in common_seen:
-          common_seen.add(x)
-          common.append(x)
-
-      self._phase_offsets.append(OrderfilePhaseOffsets(
-          startup=startup,
-          interaction=interaction,
-          common=common))
-
-    return self._phase_offsets
-
-
-def _CreateArgumentParser():
-  parser = argparse.ArgumentParser(
-      description='Compute statistics on phased orderfiles')
-  parser.add_argument('--profile-directory', type=str, required=True,
-                      help=('Directory containing profile runs. Files '
-                            'matching {} are used.'.format(PROFILE_GLOB)))
-  parser.add_argument('--instrumented-build-dir', type=str,
-                      help='Path to the instrumented build (eg, out/Orderfile)',
-                      required=True)
-  parser.add_argument('--library-name', default='libchrome.so',
-                      help=('Chrome shared library name (usually libchrome.so '
-                            'or libmonochrome.so'))
-  parser.add_argument('--offset-output-base', default=None, type=str,
-                      help=('If present, a base name to output offsets to. '
-                            'No offsets are output if this is missing. The '
-                            'base name is suffixed with _for_memory and '
-                            '_for_startup, corresponding to the two sets of '
-                            'offsets produced.'))
-  return parser
-
-
-def main():
-  logging.basicConfig(level=logging.INFO)
-  parser = _CreateArgumentParser()
-  args = parser.parse_args()
-  profiles = process_profiles.ProfileManager(itertools.chain.from_iterable(
-      glob.glob(os.path.join(d, PROFILE_GLOB))
-      for d in args.profile_directory.split(',')))
-  processor = process_profiles.SymbolOffsetProcessor(os.path.join(
-      args.instrumented_build_dir, 'lib.unstripped', args.library_name))
-  phaser = PhasedAnalyzer(profiles, processor)
-  for name, offsets in (
-      ('_for_memory', phaser.GetOffsetsForMemoryFootprint()),
-      ('_for_startup', phaser.GetOffsetsForStartup())):
-    logging.info('%s Offset sizes (KiB):\n'
-                 '%s startup\n%s common\n%s interaction',
-                 name, processor.OffsetsPrimarySize(offsets.startup) / 1024,
-                 processor.OffsetsPrimarySize(offsets.common) / 1024,
-                 processor.OffsetsPrimarySize(offsets.interaction) / 1024)
-    if args.offset_output_base is not None:
-      with file(args.offset_output_base + name, 'w') as output:
-        output.write('\n'.join(
-            str(i) for i in (offsets.startup + offsets.common +
-                             offsets.interaction)))
-        output.write('\n')
-
-
-if __name__ == '__main__':
-  main()
diff --git a/tools/cygprofile/phased_orderfile_unittest.py b/tools/cygprofile/phased_orderfile_unittest.py
deleted file mode 100755
index 100121c9..0000000
--- a/tools/cygprofile/phased_orderfile_unittest.py
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/usr/bin/env vpython
-# Copyright 2018 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Tests for phased_orderfile.py."""
-
-import collections
-import unittest
-
-import phased_orderfile
-import process_profiles
-
-from test_utils import (ProfileFile,
-                        SimpleTestSymbol,
-                        TestSymbolOffsetProcessor,
-                        TestProfileManager)
-
-
-class Mod10Processor(process_profiles.SymbolOffsetProcessor):
-  """A restricted mock for a SymbolOffsetProcessor.
-
-  This only implements {Translate,Get}ReacheOffsetsFromDump, and works by
-  mapping a dump offset to offset - (offset % 10). If the dump offset is
-  negative, it is marked as not found.
-  """
-  def __init__(self):
-    super(Mod10Processor, self).__init__(None)
-
-  def _TranslateReachedOffsetsFromDump(self, items, get, update):
-    for i in items:
-      x = get(i)
-      if x >= 0:
-        update(i, x - (x % 10))
-      else:
-        update(i, None)
-
-
-class IdentityProcessor(process_profiles.SymbolOffsetProcessor):
-  """A restricted mock for a SymbolOffsetProcessor.
-
-  This only implements {Translate,Get}ReachedOffsetsFromDump, and maps the dump
-  offset to itself. If the dump offset is negative, it is marked as not found.
-  """
-  def __init__(self):
-    super(IdentityProcessor, self).__init__(None)
-
-  def _TranslateReachedOffsetsFromDump(self, items, get, update):
-    for i in items:
-      x = get(i)
-      if x >= 0:
-        update(i, x)
-      else:
-        update(i, None)
-
-
-class PhasedOrderfileTestCase(unittest.TestCase):
-
-  def setUp(self):
-    self._file_counter = 0
-
-  def testGetOrderfilePhaseOffsets(self):
-    mgr = TestProfileManager({
-        ProfileFile(0, 0): [12, 21, -1, 33],
-        ProfileFile(0, 1): [31, 49, 52],
-        ProfileFile(100, 0): [113, 128],
-        ProfileFile(200, 1): [132, 146],
-        ProfileFile(300, 0): [19, 20, 32],
-        ProfileFile(300, 1): [24, 39]})
-    phaser = phased_orderfile.PhasedAnalyzer(mgr, Mod10Processor())
-    opo = lambda s, c, i: phased_orderfile.OrderfilePhaseOffsets(
-        startup=s, common=c, interaction=i)
-    self.assertListEqual([opo([10, 20], [30], [40, 50]),
-                          opo([110, 120], [], []),
-                          opo([], [], [130, 140]),
-                          opo([10], [20, 30], [])],
-                         phaser._GetOrderfilePhaseOffsets())
-
-  def testGetCombinedProcessOffsets(self):
-    mgr = TestProfileManager({
-        ProfileFile(40, 0, ''): [1, 2, 3],
-        ProfileFile(50, 1, ''): [3, 4, 5],
-        ProfileFile(51, 0, 'renderer'): [2, 3, 6],
-        ProfileFile(51, 1, 'gpu-process'): [6, 7],
-        ProfileFile(70, 0, ''): [2, 8, 9],
-        ProfileFile(70, 1, ''): [9]})
-    phaser = phased_orderfile.PhasedAnalyzer(mgr, IdentityProcessor())
-    offsets = phaser._GetCombinedProcessOffsets('browser')
-    self.assertListEqual([1, 2, 8], sorted(offsets.startup))
-    self.assertListEqual([4, 5], sorted(offsets.interaction))
-    self.assertListEqual([3, 9], sorted(offsets.common))
-
-    offsets = phaser._GetCombinedProcessOffsets('gpu-process')
-    self.assertListEqual([], sorted(offsets.startup))
-    self.assertListEqual([6, 7], sorted(offsets.interaction))
-    self.assertListEqual([], sorted(offsets.common))
-
-    self.assertListEqual(['browser', 'gpu-process', 'renderer'],
-                         sorted(phaser._GetProcessList()))
-
-  def testGetOffsetVariations(self):
-    mgr = TestProfileManager({
-        ProfileFile(40, 0, ''): [1, 2, 3],
-        ProfileFile(50, 1, ''): [3, 4, -10, 5],
-        ProfileFile(51, 0, 'renderer'): [2, 3, 6],
-        ProfileFile(51, 1, 'gpu-process'): [6, 7],
-        ProfileFile(70, 0, ''): [2, 6, 8, 9],
-        ProfileFile(70, 1, ''): [9]})
-    phaser = phased_orderfile.PhasedAnalyzer(mgr, IdentityProcessor())
-    offsets = phaser.GetOffsetsForMemoryFootprint()
-    self.assertListEqual([1, 2, 8], offsets.startup)
-    self.assertListEqual([6, 3, 9], offsets.common)
-    self.assertListEqual([4, 5, 7], offsets.interaction)
-
-    offsets = phaser.GetOffsetsForStartup()
-    self.assertListEqual([1, 2, 6, 8], offsets.startup)
-    self.assertListEqual([3, 9], offsets.common)
-    self.assertListEqual([4, 5, 7], offsets.interaction)
-
-
-if __name__ == "__main__":
-  unittest.main()
diff --git a/tools/cygprofile/process_profiles.py b/tools/cygprofile/process_profiles.py
index fff34483..1da9861 100755
--- a/tools/cygprofile/process_profiles.py
+++ b/tools/cygprofile/process_profiles.py
@@ -6,6 +6,7 @@
 """Lists all the reached symbols from an instrumentation dump."""
 
 import argparse
+import collections
 import logging
 import operator
 import os
@@ -152,16 +153,17 @@
       logging.warning('%d offsets do not have matching symbol', not_found)
     return symbols
 
-  def OffsetsPrimarySize(self, offsets):
-    """Computes the total primary size of a set of offsets.
+  def SymbolsSize(self, symbols):
+    """Computes the total size of a set of symbol names.
 
     Args:
-      offsets (int iterable) a set of offsets.
+      offsets (str iterable) a set of symbols.
 
     Returns
       int The sum of the primary size of the offsets.
     """
-    return sum(self.OffsetToPrimaryMap()[x].size for x in offsets)
+    name_map = self.NameToSymbolMap()
+    return sum(name_map[sym].size for sym in symbols)
 
   def GetReachedOffsetsFromDump(self, dump):
     """Find the symbol offsets from a list of binary offsets.
@@ -231,7 +233,6 @@
       update: (lambda item, int) As described above.
     """
     dump_offset_to_symbol_info = self._GetDumpOffsetToSymbolInfo()
-    logging.info('Offset to Symbol size = %d', len(dump_offset_to_symbol_info))
     for i in items:
       dump_offset = get(i)
       idx = dump_offset / 2
@@ -421,6 +422,13 @@
               phase, process)
     return offset_map.values()
 
+  def GetProcessOffsetLists(self):
+    """Returns all symbol offsets lists, grouped by process."""
+    offsets_by_process = collections.defaultdict(list)
+    for f in self._filenames:
+      offsets_by_process[self._ProcessName(f)].append(self._ReadOffsets(f))
+    return offsets_by_process
+
   def GetRunGroupOffsets(self, phase=None):
     """Merges files from each run group and returns offset list for each.
 
@@ -451,7 +459,7 @@
   @classmethod
   def _ProcessName(cls, filename):
     # The filename starts with 'profile-hitmap-' and ends with
-    # '-PID-TIMESTAMP.text_X'. Anything in between is the process name. The
+    # '-PID-TIMESTAMP.txt_X'. Anything in between is the process name. The
     # browser has an empty process name, which is insterted here.
     process_name_parts = os.path.basename(filename).split('-')[2:-2]
     if not process_name_parts:
diff --git a/tools/cygprofile/process_profiles_unittest.py b/tools/cygprofile/process_profiles_unittest.py
index 5c4641d..d65c388 100755
--- a/tools/cygprofile/process_profiles_unittest.py
+++ b/tools/cygprofile/process_profiles_unittest.py
@@ -107,13 +107,13 @@
     self.assertListEqual(symbols[1:3],
                          processor.MatchSymbolNames(['Y', 'X']))
 
-  def testOffsetsPrimarySize(self):
+  def testSymbolsSize(self):
     symbols = [SimpleTestSymbol('W', 10, 1),
                SimpleTestSymbol('X', 20, 2),
                SimpleTestSymbol('Y', 30, 4),
                SimpleTestSymbol('Z', 40, 8)]
     processor = TestSymbolOffsetProcessor(symbols)
-    self.assertEqual(13, processor.OffsetsPrimarySize([10, 30, 40]))
+    self.assertEqual(13, processor.SymbolsSize(['W', 'Y', 'Z']))
 
   def testMedian(self):
     self.assertEquals(None, process_profiles._Median([]))
diff --git a/tools/cygprofile/profile_android_startup.py b/tools/cygprofile/profile_android_startup.py
index 32a1151..fbe4faa 100755
--- a/tools/cygprofile/profile_android_startup.py
+++ b/tools/cygprofile/profile_android_startup.py
@@ -226,6 +226,7 @@
     Args:
       files: ([str]) List of pregenerated files.
     """
+    logging.info('Using pregenerated profiles')
     self._pregenerated_profiles = files
 
   def RunCygprofileTests(self):
@@ -300,9 +301,11 @@
       NoProfileDataError: No data was found on the device.
     """
     if self._pregenerated_profiles:
-      logging.info('Using pregenerated profiles instead of running profile')
+      logging.info('Using pregenerated profiles instead of running '
+                   'system health profile')
       logging.info('Profile files: %s', '\n'.join(self._pregenerated_profiles))
       return self._pregenerated_profiles
+    logging.info('Running system health profile')
     self._SetUpDeviceFolders()
     self._RunCommand(['tools/perf/run_benchmark',
                       '--device={}'.format(self._device.serial),