From 4b46546af0d230a56b4f66d0fcc2ede7b89c09b5 Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Mon, 4 May 2015 20:56:28 +1000
Subject: [PATCH] Make `BitVec::process` faster (branch free).

This makes the `bit::vec::bench::bench_bit_vec_big_union` benchmark go
from `774 ns/iter (+/- 190)` to `602 ns/iter (+/- 5)`.

(There's room for more work here too: if one can guarantee 128-bit
alignment for the vector, the compiler actually optimises `union`,
`intersection` etc. to SIMD instructions, which end up being ~5x faster
that the original version, and 4x faster than the optimised version in
this patch.)
---
 src/libcollections/bit.rs | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/libcollections/bit.rs b/src/libcollections/bit.rs
index d9151298a35e3..ba3e144e6287e 100644
--- a/src/libcollections/bit.rs
+++ b/src/libcollections/bit.rs
@@ -210,15 +210,13 @@ impl BitVec {
         assert_eq!(self.len(), other.len());
         // This could theoretically be a `debug_assert!`.
         assert_eq!(self.storage.len(), other.storage.len());
-        let mut changed = false;
+        let mut changed_bits = 0;
         for (a, b) in self.blocks_mut().zip(other.blocks()) {
             let w = op(*a, b);
-            if *a != w {
-                changed = true;
-                *a = w;
-            }
+            changed_bits |= *a ^ w;
+            *a = w;
         }
-        changed
+        changed_bits != 0
     }
 
     /// Iterator over mutable refs to  the underlying blocks of data.