Added time and memory metrics and xz compression tests


diff --git a/courgette/analyze_mem_test b/courgette/analyze_mem_test
new file mode 100755
index 0000000..9470fa2
--- /dev/null
+++ b/courgette/analyze_mem_test
@@ -0,0 +1,96 @@
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+# Produce memory metrics for run_apply_test
+error() {
+  echo "error: ${@}" >&2
+compute_percentiles() {
+  if [ -z "${1}" ]; then
+    return;
+  fi
+  local pctls=".5 .9 1"
+  local lines=$(cat ${1} | wc -l)
+  for p in $pctls; do
+    local count="$(echo "${lines} * $p" | bc -lq | cut -d. -f1)"
+    local bytes=$(cat ${1} \
+      | cut -d' ' -f2 \
+      | sort -n \
+      | head -n$count \
+      | tail -n1)
+    echo -n "$((bytes / 1000000))MB "
+  done
+main() {
+  if [ $# -lt 1 ]; then
+    cat <<EOF
+USAGE: $(basename ${0}) dir
+Produce memory metrics for run_apply_test.  This shows the percentiles
+of the max heap size across all files.
+    exit 1
+  fi
+  local dir="${1}"
+  if [ ! -d "${dir}" ]; then
+    error "\"${dir}\" not found" exit 1
+  fi
+  local metrics_dir="${dir}/metrics"
+  local metrics="${dir}/mem_per_file.txt"
+  if [ ! -f "${metrics}" ]; then
+    local metrics_tmp="${metrics}.tmp"
+    echo "computing usage percentiles for courgette.  this may take a while..."
+    find "${metrics_dir}" \
+      | grep "\.apply_mem$" \
+      | while read i; do
+      local apply_mem="${i}"
+      local unbz2_mem="${apply_mem%.apply_mem}.unbz2_mem"
+      local unxz_mem="${apply_mem%.apply_mem}.unxz_mem"
+      echo -n "$apply_mem "
+      cat "${apply_mem}" "${unbz2_mem}" "${unxz_mem}" \
+        | grep "mem_heap_B" \
+        | cut -d= -f2 \
+        | sort -nr \
+        | head -n1
+    done | sort -k2 -n > "${metrics_tmp}"
+    mv "${metrics_tmp}" "${metrics}"
+  fi
+  echo "$(compute_percentiles ${metrics})max heap per file for Courgette" \
+    "(50th 90th 100th)"
+  local metrics_bsdiff="${dir}/mem_per_file_bsdiff.txt"
+  if [ ! -f "${metrics_bsdiff}" ]; then
+    local metrics_bsdiff_tmp="${metrics_bsdiff}.tmp"
+    echo "computing usage percentiles for bsdiff.  this may take a while..."
+    find "${metrics_dir}" \
+      | grep "\.bsdiff_mem$" \
+      | while read i; do
+      local bsdiff_mem="${i}"
+      echo -n "$bsdiff_mem "
+      cat $bsdiff_mem \
+        | grep "mem_heap_B" \
+        | cut -d= -f2 \
+        | sort -nr \
+        | head -n1
+    done | sort -k2 -n > "${metrics_bsdiff_tmp}"
+    mv "${metrics_bsdiff_tmp}" "${metrics_bsdiff}"
+  fi
+  echo "$(compute_percentiles ${metrics_bsdiff})max heap per file for bsdiff" \
+    "(50th 90th 100th)"
+main "${@}"
diff --git a/courgette/analyze_stress_test b/courgette/analyze_stress_test
index 6aff656e..338eb6e5f 100755
--- a/courgette/analyze_stress_test
+++ b/courgette/analyze_stress_test
@@ -22,6 +22,24 @@
+# Given a token, search for and compute the percentiles from logfile.
+compute_percentiles() {
+  if [ ! -z "${1}" ]; then
+    local pctls=".5 .9 1"
+    local lines=$(count_result ${1})
+    for p in $pctls; do
+      local count="$(echo "${lines} * $p" | bc -lq | cut -d. -f1)"
+      echo -n $(cat ${log} \
+        | grep ${1} \
+        | cut -d' ' -f2 \
+        | sort -n \
+        | head -n$count \
+        | tail -n1)
+      echo -n "s "
+    done
+  fi
 main() {
   if [ $# -lt 1 ]; then
     cat <<EOF
@@ -46,9 +64,12 @@
 $(count_result "FAIL_DISASSEMBLE") failed to disassemble/assemble
 $(count_result "PASS_BSDIFF") succesful bsdiff patches
 $(count_result "FAIL_BSDIFF") failed bsdiff patches
-$(count_result "BEST_COURGETTE") patch(es) where courgette is smaller
-$(count_result "BEST_BSDIFF") patch(es) where bsdiff is smaller
-$(count_result "BEST_TIE") patch(es) where both are the same size
+$(count_result "BEST_COURGETTE") patch(es) where courgette is smaller (bz2)
+$(count_result "BEST_BSDIFF") patch(es) where bsdiff is smaller (xz)
+$(count_result "BEST_TIE") patch(es) where both are the same size (bz2)
+$(count_result "XZBEST_COURGETTE") patch(es) where courgette (xz) is smaller
+$(count_result "XZBEST_BSDIFF") patch(es) where bsdiff is smaller (xz)
+$(count_result "XZBEST_TIE") patch(es) where both are the same size (xz)
   # Log file has the format "^SIZE courgette=... bsdiff=..."
@@ -56,7 +77,13 @@
     | grep "^SIZE " \
     | cut -d' ' -f2 \
     | awk -F= 'BEGIN{sum=0} {sum += $2} END{print sum}')"
-  echo "${courgette_total} bytes for a courgette payload"
+  echo "${courgette_total} bytes for a courgette payload (bz2)"
+  local courgette_total_xz="$(cat "${log}" \
+    | grep "^SIZE " \
+    | cut -d' ' -f4 \
+    | awk -F= 'BEGIN{sum=0} {sum += $2} END{print sum}')"
+  echo "${courgette_total_xz} bytes for a courgette payload (xz)"
   local bsdiff_total="$(cat "${log}" \
     | grep "^SIZE " \
@@ -67,20 +94,43 @@
   local best_total="$(cat "${log}" \
     | grep "^BEST_" \
     | awk 'BEGIN{sum=0} {sum += $2} END{print sum}')"
-    echo "${best_total} bytes for a best-choice payload"
+    echo "${best_total} bytes for a best-choice payload (bz2)"
+  local best_total_xz="$(cat "${log}" \
+    | grep "^XZBEST_" \
+    | awk 'BEGIN{sum=0} {sum += $2} END{print sum}')"
+    echo "${best_total_xz} bytes for a best-choice payload (xz)"
   local pct="$(echo "100*${best_total}/${bsdiff_total}" \
     | bc -lq \
     | awk '{printf "%.2f\n", $0}')"
-    echo "${pct}% of a bsdiff-only payload"
+    echo "${pct}% of a bsdiff-only payload (bz2)"
+  local pct="$(echo "100*${best_total_xz}/${bsdiff_total}" \
+    | bc -lq \
+    | awk '{printf "%.2f\n", $0}')"
+    echo "${pct}% of a bsdiff-only payload (xz)"
   local savings="$((bsdiff_total - best_total))"
-  echo "${savings} bytes saved by courgette"
+  echo "${savings} bytes saved by courgette (bz2)"
+  local savings_xz="$((bsdiff_total - best_total_xz))"
+  echo "${savings} bytes saved by courgette (xz)"
   local pct_savings="$(echo "100*${savings}/${bsdiff_total}" \
     | bc -lq \
     | awk '{printf "%.2f\n", $0}')"
-  echo "${pct_savings}% savings"
+  echo "${pct_savings}% savings (bz2)"
+  local pct_savings="$(echo "100*${savings_xz}/${bsdiff_total}" \
+    | bc -lq \
+    | awk '{printf "%.2f\n", $0}')"
+  echo "${pct_savings}% savings (xz)"
+  echo "$(compute_percentiles "TIME_GEN")to generate a patch (50th 90th 100th)"
+  echo "$(compute_percentiles "TIME_APPLY")to apply a patch (50th 90th 100th)"
+  echo "$(compute_percentiles "TIME_BSDIFF")for bsdiff (50th 90th 100th)"
+  echo "$(compute_percentiles "TIME_BSPATCH")for bspatch (50th 90th 100th)"
 main "${@}"
diff --git a/courgette/run_mem_test b/courgette/run_mem_test
new file mode 100755
index 0000000..07a8e5bf
--- /dev/null
+++ b/courgette/run_mem_test
@@ -0,0 +1,69 @@
+# Copyright 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+# Collect memory usage on the patches from run_stress_test
+error() {
+  echo "error: ${@}" >&2
+main() {
+  if [ $# -lt 1 ]; then
+    cat <<EOF
+USAGE: $(basename ${0}) dir
+Collect memory usage on the patches from run_stress_test
+    exit 1
+  fi
+  local dir="${1}"
+  if [ ! -d "${dir}" ]; then
+    error "\"${dir}\" not found"
+    exit 1
+  fi
+  local patches_dir="${dir}/patches"
+  find "${patches_dir}" \
+    | grep "\.patch$" \
+    | while read i; do
+    local patch="${i}"
+    local subdir_filename="${patch:$((${#patches_dir} + 1))}"
+    local out_base="${dir}/metrics/${subdir_filename}"
+    mkdir -p "$(dirname ${out_base})"
+    local original="${subdir_filename%.patch}"
+    local applied="${out_base}.applied"
+    local apply_mem="${out_base}.apply_mem"
+    valgrind --tool=massif --massif-out-file="${apply_mem}" courgette -apply \
+      "${original}" "${patch}" "${applied}" &
+    local bz2_patch="${i}.bz2"
+    local unbz2="${out_base}.unbz2"
+    local unbz2_mem="${out_base}.unbz2_mem"
+    valgrind --tool=massif --massif-out-file="${unbz2_mem}" bunzip2 -c \
+      "${bz2_patch}" > "${unbz2}" &
+    local xz_patch="${i}.xz"
+    local unxz="${out_base}.unxz"
+    local unxz_mem="${out_base}.unxz_mem"
+    valgrind --tool=massif --massif-out-file="${unxz_mem}" unxz -c \
+      "${xz_patch}" > "${unxz}" &
+    local bsdiff_patch="${patch%.patch}.bsdiff_patch"
+    local applied_bsdiff="${out_base}.applied_bsdiff"
+    local bsdiff_mem="${out_base}.bsdiff_mem"
+    valgrind --tool=massif --massif-out-file="${bsdiff_mem}" bspatch \
+      "${original}" "${applied_bsdiff}" "${bsdiff_patch}" &
+    wait
+  done
+main "${@}"
diff --git a/courgette/run_stress_test b/courgette/run_stress_test
index 1110e59..442ad3d 100755
--- a/courgette/run_stress_test
+++ b/courgette/run_stress_test
@@ -11,6 +11,7 @@
 if [ $# -lt 2 ]; then
   cat <<EOF
@@ -90,8 +91,12 @@
     mkdir -p "$(dirname "${patch}")"
     mkdir -p "$(dirname "${apply}")"
-    courgette -gen "${file1}" "${file2}" "${patch}"
-    courgette -apply "${file1}" "${patch}" "${apply}"
+    echo "courgette -gen"
+    ${time} -f "TIME_GEN %e ${file1}" courgette -gen "${file1}" "${file2}" \
+      "${patch}"
+    echo "courgette -apply"
+    ${time} -f "TIME_APPLY %e ${file1}" courgette -apply "${file1}" "${patch}" \
+      "${apply}"
     cmp -s "${file2}" "${apply}"
     if [ "${?}" -ne 0 ]; then
       echo "FAIL_COURGETTE ${file1}"
@@ -99,17 +104,26 @@
       echo "PASS_COURGETTE ${file1}"
       local bsdiff_patch="${patches_dir}/${file1}.bsdiff_patch"
       local bsdiff_apply="${applied_dir}/${file2}.bsdiff_applied"
-      bsdiff "${file1}" "${file2}" "${bsdiff_patch}"
-      bspatch "${file1}" "${bsdiff_apply}" "${bsdiff_patch}"
+      echo "RUN bsdiff"
+      ${time} -f "TIME_BSDIFF %e ${file1}" bsdiff "${file1}" "${file2}" \
+        "${bsdiff_patch}"
+      echo "RUN bspatch"
+      ${time} -f "TIME_BSPATCH %e ${file1}" bspatch "${file1}" \
+        "${bsdiff_apply}" "${bsdiff_patch}"
       cmp -s "${file2}" "${bsdiff_apply}"
       if [ "${?}" -ne 0 ]; then
         echo "FAIL_BSDIFF ${file1}"
         echo "PASS_BSDIFF ${file1}"
-        bzip2 -k -9 "${patch}"
-        local patch_size="$(du -b "${patch}.bz2" | cut -f1)"
+        local bz2_patch="${patch}.bz2"
+        local xz_patch="${patch}.xz"
+        bzip2 -9 -c "${patch}" > "${bz2_patch}"
+        xz -9 -c "${patch}" > "${xz_patch}"
+        local patch_size="$(du -b "${bz2_patch}" | cut -f1)"
         local bsdiff_patch_size="$(du -b "${bsdiff_patch}" | cut -f1)"
-        echo "SIZE courgette=${patch_size} bsdiff=${bsdiff_patch_size} ${file1}"
+        local xz_patch_size="$(du -b "${xz_patch}" | cut -f1)"
+        echo "SIZE courgette=${patch_size} bsdiff=${bsdiff_patch_size}" \
+          "courgette_xz=${xz_patch_size} ${file1}"
         if [ "${patch_size}" -eq "${bsdiff_patch_size}" ]; then
           echo "BEST_TIE ${patch_size} ${file1}"
         elif [ "${patch_size}" -lt "${bsdiff_patch_size}" ]; then
@@ -117,6 +131,13 @@
         elif [ "${patch_size}" -gt "${bsdiff_patch_size}" ]; then
           echo "BEST_BSDIFF ${bsdiff_patch_size} ${file1}"
+        if [ "${xz_patch_size}" -eq "${bsdiff_patch_size}" ]; then
+          echo "XZBEST_TIE ${xz_patch_size} ${file1}"
+        elif [ "${xz_patch_size}" -lt "${bsdiff_patch_size}" ]; then
+          echo "XZBEST_COURGETTE ${xz_patch_size} ${file1}"
+        elif [ "${xz_patch_size}" -gt "${bsdiff_patch_size}" ]; then
+          echo "XZBEST_BSDIFF ${bsdiff_patch_size} ${file1}"
+        fi
@@ -158,7 +179,10 @@
 $(count_result "PASS_BSDIFF") succesful bsdiff patches
 $(count_result "FAIL_BSDIFF") failed bsdiff patches
-$(count_result "BEST_COURGETTE") patch(es) where courgette is smaller
-$(count_result "BEST_BSDIFF") patch(es) where bsdiff is smaller
-$(count_result "BEST_TIE") patch(es) where both are the same size
+$(count_result "BEST_COURGETTE") patch(es) where courgette (bz2) is smaller
+$(count_result "BEST_BSDIFF") patch(es) where bsdiff is smaller (bz2)
+$(count_result "BEST_TIE") patch(es) where both are the same size (bz2)
+$(count_result "XZBEST_COURGETTE") patch(es) where courgette (xz) is smaller
+$(count_result "XZBEST_BSDIFF") patch(es) where bsdiff is smaller (xz)
+$(count_result "XZBEST_TIE") patch(es) where both are the same size (xz)