diff --git a/.bazelrc b/.bazelrc
new file mode 100644
index 0000000..673b332
--- /dev/null
+++ b/.bazelrc
@@ -0,0 +1,4 @@
+# googletest requires C++14 or above
+build --cxxopt='-std=c++17'
+# Enable Bzlmod for every Bazel command
+common --enable_bzlmod
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..fc2ad10
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,135 @@
+# Copyright 2021 Google Inc. All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: ci
+on: [push, pull_request]
+
+permissions:
+  contents: read
+
+jobs:
+  build-and-test:
+    name:  >-
+      CI
+      ${{ matrix.os }}
+      ${{ matrix.cpu_level }}
+      ${{ matrix.compiler }}
+      ${{ matrix.optimized && 'release' || 'debug' }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        compiler: [clang, gcc, msvc]
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        cpu_level: [baseline, avx, avx2]
+        optimized: [true, false]
+        exclude:
+        # MSVC only works on Windows.
+        - os: ubuntu-latest
+          compiler: msvc
+        - os: macos-latest
+          compiler: msvc
+        # GitHub servers seem to run on pre-Haswell CPUs. Attempting to use AVX2
+        # results in crashes.
+        - os: macos-latest
+          cpu_level: avx2
+        # Not testing with GCC on macOS.
+        - os: macos-latest
+          compiler: gcc
+        # Only testing with MSVC on Windows.
+        - os: windows-latest
+          compiler: clang
+        - os: windows-latest
+          compiler: gcc
+        include:
+        - compiler: clang
+          CC: clang
+          CXX: clang++
+        - compiler: gcc
+          CC: gcc
+          CXX: g++
+        - compiler: msvc
+          CC:
+          CXX:
+
+    env:
+      CMAKE_BUILD_DIR: ${{ github.workspace }}/build
+      CMAKE_BUILD_TYPE: ${{ matrix.optimized && 'RelWithDebInfo' || 'Debug' }}
+      CC: ${{ matrix.CC }}
+      CXX: ${{ matrix.CXX }}
+      SNAPPY_REQUIRE_AVX: ${{ matrix.cpu_level == 'baseline' && '0' || '1' }}
+      SNAPPY_REQUIRE_AVX2: ${{ matrix.cpu_level == 'avx2' && '1' || '0' }}
+      SNAPPY_FUZZING_BUILD: >-
+        ${{ (startsWith(matrix.os, 'ubuntu') && matrix.compiler == 'clang' &&
+        !matrix.optimized) && '1' || '0' }}
+      BINARY_SUFFIX: ${{ startsWith(matrix.os, 'windows') && '.exe' || '' }}
+      BINARY_PATH: >-
+        ${{ format(
+        startsWith(matrix.os, 'windows') && '{0}\build\{1}\' || '{0}/build/',
+        github.workspace,
+        matrix.optimized && 'RelWithDebInfo' || 'Debug') }}
+
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        submodules: true
+
+    - name: Generate build config
+      run: >-
+        cmake -S "${{ github.workspace }}" -B "${{ env.CMAKE_BUILD_DIR }}"
+        -DCMAKE_BUILD_TYPE=${{ env.CMAKE_BUILD_TYPE }}
+        -DCMAKE_INSTALL_PREFIX=${{ runner.temp }}/install_test/
+        -DSNAPPY_FUZZING_BUILD=${{ env.SNAPPY_FUZZING_BUILD }}
+        -DSNAPPY_REQUIRE_AVX=${{ env.SNAPPY_REQUIRE_AVX }}
+        -DSNAPPY_REQUIRE_AVX2=${{ env.SNAPPY_REQUIRE_AVX2 }}
+
+    - name: Build
+      run: >-
+        cmake --build "${{ env.CMAKE_BUILD_DIR }}"
+        --config "${{ env.CMAKE_BUILD_TYPE }}"
+
+    - name: Run C++ API Tests
+      run: ${{ env.BINARY_PATH }}snappy_unittest${{ env.BINARY_SUFFIX }}
+
+    - name: Run Compression Fuzzer
+      if: ${{ env.SNAPPY_FUZZING_BUILD == '1' }}
+      run: >-
+        ${{ env.BINARY_PATH }}snappy_compress_fuzzer${{ env.BINARY_SUFFIX }}
+        -runs=1000 -close_fd_mask=3
+
+    - name: Run Decompression Fuzzer
+      if: ${{ env.SNAPPY_FUZZING_BUILD == '1' }}
+      run: >-
+        ${{ env.BINARY_PATH }}snappy_uncompress_fuzzer${{ env.BINARY_SUFFIX }}
+        -runs=1000 -close_fd_mask=3
+
+    - name: Run Benchmarks
+      run: ${{ env.BINARY_PATH }}snappy_benchmark${{ env.BINARY_SUFFIX }}
+
+    - name: Test CMake installation
+      run: cmake --build "${{ env.CMAKE_BUILD_DIR }}" --target install
diff --git a/.github/workflows/riscv64-qemu-test.yaml b/.github/workflows/riscv64-qemu-test.yaml
new file mode 100644
index 0000000..3b97480
--- /dev/null
+++ b/.github/workflows/riscv64-qemu-test.yaml
@@ -0,0 +1,43 @@
+name: riscv64-qemu-test
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    env: 
+      RISCV_CROSSCOMPILE: "ON"
+      riscv_gnu_toolchain_download_path: https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/2025.07.03/riscv64-glibc-ubuntu-24.04-gcc-nightly-2025.07.03-nightly.tar.xz
+      RISCV_PATH: /opt/riscv
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: recursive
+    
+    - name: Install dependencies
+      run: |
+        sudo apt update
+        sudo apt install -y --no-install-recommends \
+          qemu-user qemu-user-static \
+          build-essential \
+          cmake \
+          git
+        sudo mkdir -p $RISCV_PATH
+        wget ${riscv_gnu_toolchain_download_path} -O riscv-toolchain.tar.xz
+        sudo tar -xvf riscv-toolchain.tar.xz -C $RISCV_PATH --strip-components=1
+        sudo sed -i "s|libdir='/mnt/riscv/riscv64-unknown-linux-gnu/lib'|libdir='$RISCV_PATH/riscv64-unknown-linux-gnu/lib'|g" $RISCV_PATH/riscv64-unknown-linux-gnu/lib/libatomic.la
+
+    - name: Build and Run Unit Tests
+      run: |
+        export PATH=$RISCV_PATH/bin:$PATH
+        export LD_LIBRARY_PATH="/opt/riscv/lib:$LD_LIBRARY_PATH"
+        export QEMU_LD_PREFIX=$RISCV_PATH/sysroot
+        mkdir build && cd build
+        cmake -DCMAKE_BUILD_TYPE=Release ../
+        make -j$(nproc)
+        make test
+
+    - name: Run Benchmark
+      run: ./build/snappy_benchmark
+      working-directory: ./
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0c8cf0e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+# Editors.
+*.sw*
+.vscode
+.DS_Store
+
+# Build directory.
+build/
+/bazel-*
+MODULE.bazel.lock
+out/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..06c3fd3
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "third_party/benchmark"]
+	path = third_party/benchmark
+	url = https://github.com/google/benchmark.git
+[submodule "third_party/googletest"]
+	path = third_party/googletest
+	url = https://github.com/google/googletest.git
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 88c28fd..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-# Build matrix / environment variables are explained on:
-# http://about.travis-ci.org/docs/user/build-configuration/
-# This file can be validated on: http://lint.travis-ci.org/
-
-sudo: false
-dist: trusty
-language: cpp
-
-compiler:
-  - gcc
-  - clang
-os:
-  - linux
-  - osx
-
-env:
-  - BUILD_TYPE=Debug
-  - BUILD_TYPE=RelWithDebInfo
-
-matrix:
-  allow_failures:
-    - compiler: clang
-      env: BUILD_TYPE=RelWithDebInfo
-
-addons:
-  apt:
-    # List of whitelisted in travis packages for ubuntu-precise can be found here:
-    #   https://github.com/travis-ci/apt-package-whitelist/blob/master/ubuntu-precise
-    # List of whitelisted in travis apt-sources:
-    #   https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json
-    sources:
-    - ubuntu-toolchain-r-test
-    - llvm-toolchain-trusty-4.0
-    packages:
-    - cmake
-    - gcc-6
-    - g++-6
-    - clang-4.0
-
-install:
-# Travis doesn't have a nice way to install homebrew packages yet.
-# https://github.com/travis-ci/travis-ci/issues/5377
-- if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew update; fi
-- if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew install gcc@6; fi
-# /usr/bin/gcc is stuck to old versions by on both Linux and OSX.
-- if [ "$CXX" = "g++" ]; then export CXX="g++-6" CC="gcc-6"; fi
-- echo ${CC}
-- echo ${CXX}
-- ${CXX} --version
-
-script:
-- mkdir -p build && cd build && cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE &&
-      CTEST_OUTPUT_ON_FAILURE=1 make all test
\ No newline at end of file
diff --git a/BUILD.bazel b/BUILD.bazel
new file mode 100644
index 0000000..e6622ff
--- /dev/null
+++ b/BUILD.bazel
@@ -0,0 +1,211 @@
+# Copyright 2023 Google Inc. All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+SNAPPY_VERSION = (1, 2, 2)
+
+config_setting(
+    name = "windows",
+    constraint_values = ["@platforms//os:windows"],
+)
+
+cc_library(
+    name = "config",
+    hdrs = ["config.h"],
+    defines = ["HAVE_CONFIG_H"],
+)
+
+cc_library(
+    name = "snappy-stubs-public",
+    hdrs = [":snappy-stubs-public.h"],
+)
+
+cc_library(
+    name = "snappy-stubs-internal",
+    srcs = ["snappy-stubs-internal.cc"],
+    hdrs = ["snappy-stubs-internal.h"],
+    deps = [
+        ":config",
+        ":snappy-stubs-public",
+    ],
+)
+
+cc_library(
+    name = "snappy",
+    srcs = [
+        "snappy.cc",
+        "snappy-internal.h",
+        "snappy-sinksource.cc",
+    ],
+    hdrs = [
+        "snappy.h",
+        "snappy-sinksource.h",
+    ],
+    copts = select({
+        ":windows": [],
+        "//conditions:default": [
+            "-Wno-sign-compare",
+        ],
+    }),
+    deps = [
+        ":config",
+        ":snappy-stubs-internal",
+        ":snappy-stubs-public",
+    ],
+)
+
+cc_library(
+    name = "snappy-c",
+    srcs = ["snappy-c.cc"],
+    hdrs = ["snappy-c.h"],
+    deps = [":snappy"],
+)
+
+filegroup(
+    name = "testdata",
+    srcs = glob(["testdata/*"]),
+)
+
+cc_library(
+    name = "snappy-test",
+    testonly = True,
+    srcs = [
+        "snappy-test.cc",
+        "snappy_test_data.cc",
+    ],
+    hdrs = [
+        "snappy-test.h",
+        "snappy_test_data.h",
+    ],
+    deps = [":snappy-stubs-internal"],
+)
+
+cc_test(
+    name = "snappy_benchmark",
+    srcs = ["snappy_benchmark.cc"],
+    data = [":testdata"],
+    deps = [
+        ":snappy",
+        ":snappy-test",
+        "@com_google_benchmark//:benchmark_main",
+    ],
+)
+
+cc_test(
+    name = "snappy_unittest",
+    srcs = [
+        "snappy_unittest.cc",
+    ],
+    data = [":testdata"],
+    deps = [
+        ":snappy",
+        ":snappy-test",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+# Generate a config.h similar to what cmake would produce.
+genrule(
+    name = "config_h",
+    outs = ["config.h"],
+    cmd = """cat <<EOF >$@
+#define HAVE_STDDEF_H 1
+#define HAVE_STDINT_H 1
+#ifdef __has_builtin
+#  if !defined(HAVE_BUILTIN_EXPECT) && __has_builtin(__builtin_expect)
+#    define HAVE_BUILTIN_EXPECT 1
+#  endif
+#  if !defined(HAVE_BUILTIN_CTZ) && __has_builtin(__builtin_ctzll)
+#    define HAVE_BUILTIN_CTZ 1
+#  endif
+#  if !defined(HAVE_BUILTIN_PREFETCH) && __has_builtin(__builtin_prefetech)
+#    define HAVE_BUILTIN_PREFETCH 1
+#  endif
+#elif defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#  ifndef HAVE_BUILTIN_EXPECT
+#    define HAVE_BUILTIN_EXPECT 1
+#  endif
+#  ifndef HAVE_BUILTIN_CTZ
+#    define HAVE_BUILTIN_CTZ 1
+#  endif
+#  ifndef HAVE_BUILTIN_PREFETCH
+#    define HAVE_BUILTIN_PREFETCH 1
+#  endif
+#endif
+
+#if defined(_WIN32) && !defined(HAVE_WINDOWS_H)
+#define HAVE_WINDOWS_H 1
+#endif
+
+#ifdef __has_include
+#  if !defined(HAVE_BYTESWAP_H) && __has_include(<byteswap.h>)
+#    define HAVE_BYTESWAP_H 1
+#  endif
+#  if !defined(HAVE_UNISTD_H) && __has_include(<unistd.h>)
+#    define HAVE_UNISTD_H 1
+#  endif
+#  if !defined(HAVE_SYS_ENDIAN_H) && __has_include(<sys/endian.h>)
+#    define HAVE_SYS_ENDIAN_H 1
+#  endif
+#  if !defined(HAVE_SYS_MMAN_H) && __has_include(<sys/mman.h>)
+#    define HAVE_SYS_MMAN_H 1
+#  endif
+#  if !defined(HAVE_SYS_UIO_H) && __has_include(<sys/uio.h>)
+#    define HAVE_SYS_UIO_H 1
+#  endif
+#  if !defined(HAVE_SYS_TIME_H) && __has_include(<sys/time.h>)
+#    define HAVE_SYS_TIME_H 1
+#  endif
+#endif
+
+#ifndef SNAPPY_IS_BIG_ENDIAN
+#  ifdef __s390x__
+#    define SNAPPY_IS_BIG_ENDIAN 1
+#  elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#    define SNAPPY_IS_BIG_ENDIAN 1
+#  endif
+#endif
+EOF
+""",
+)
+
+genrule(
+    name = "snappy_stubs_public_h",
+    srcs = ["snappy-stubs-public.h.in"],
+    outs = ["snappy-stubs-public.h"],
+    # Assume sys/uio.h is available on non-Windows.
+    # Set the version numbers.
+    cmd = ("""sed -e 's/$${HAVE_SYS_UIO_H_01}/!_WIN32/g' \
+           -e 's/$${PROJECT_VERSION_MAJOR}/%d/g' \
+           -e 's/$${PROJECT_VERSION_MINOR}/%d/g' \
+           -e 's/$${PROJECT_VERSION_PATCH}/%d/g' \
+    $< >$@""" % SNAPPY_VERSION),
+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index de52666..490f5b8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,156 +1,467 @@
-CMAKE_MINIMUM_REQUIRED(VERSION 3.1)
-PROJECT(Snappy VERSION 1.1.6 LANGUAGES C CXX)
-
-SET(CMAKE_INCLUDE_CURRENT_DIR ON)
-
-INCLUDE(CheckIncludeFiles)
-INCLUDE(CheckLibraryExists)
-INCLUDE(CheckCXXSourceCompiles)
-INCLUDE(TestBigEndian)
-INCLUDE(CMakePackageConfigHelpers)
-
-TEST_BIG_ENDIAN(WORDS_BIG_ENDIAN)
-IF (WORDS_BIG_ENDIAN)
-    MESSAGE(STATUS "Building on big endian system")
-    ADD_DEFINITIONS(-DWORDS_BIGENDIAN=1)
-ENDIF (WORDS_BIG_ENDIAN)
-
-CHECK_INCLUDE_FILES("byteswap.h" HAVE_BYTESWAP_H)
-CHECK_INCLUDE_FILES("dlfcn.h" HAVE_DLFCN_H)
-CHECK_INCLUDE_FILES("inttypes.h" HAVE_INTTYPES_H)
-CHECK_INCLUDE_FILES("memory.h" HAVE_MEMORY_H)
-CHECK_INCLUDE_FILES("stddef.h" HAVE_STDDEF_H)
-CHECK_INCLUDE_FILES("stdint.h" HAVE_STDINT_H)
-CHECK_INCLUDE_FILES("stdlib.h" HAVE_STDLIB_H)
-CHECK_INCLUDE_FILES("strings.h" HAVE_STRINGS_H)
-CHECK_INCLUDE_FILES("string.h" HAVE_STRING_H)
-CHECK_INCLUDE_FILES("sys/byteswap.h" HAVE_SYS_BYTESWAP_H)
-CHECK_INCLUDE_FILES("sys/endian.h" HAVE_SYS_ENDIAN_H)
-CHECK_INCLUDE_FILES("sys/mman.h" HAVE_SYS_MMAN_H)
-CHECK_INCLUDE_FILES("sys/resource.h" HAVE_SYS_RESOURCE_H)
-CHECK_INCLUDE_FILES("sys/stat.h" HAVE_SYS_STAT_H)
-CHECK_INCLUDE_FILES("sys/time.h" HAVE_SYS_TIME_H)
-CHECK_INCLUDE_FILES("sys/types.h" HAVE_SYS_TYPES_H)
-CHECK_INCLUDE_FILES("sys/uio.h" HAVE_SYS_UIO_H)
-CHECK_INCLUDE_FILES("unistd.h" HAVE_UNISTD_H)
-CHECK_INCLUDE_FILES("windows.h" HAVE_WINDOWS_H)
-
-IF (NOT HAVE_SYS_UIO_H)
-    SET(HAVE_SYS_UIO_H 0)
-ENDIF (NOT HAVE_SYS_UIO_H)
-
-IF (NOT HAVE_STDINT_H)
-    SET(HAVE_STDINT_H 0)
-ENDIF (NOT HAVE_STDINT_H)
-
-IF (NOT HAVE_STDDEF_H)
-    SET(HAVE_STDDEF_H 0)
-ENDIF (NOT HAVE_STDDEF_H)
-
-CHECK_LIBRARY_EXISTS(z zlibVersion "" HAVE_LIBZ)
-CHECK_LIBRARY_EXISTS(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2)
-
-CHECK_CXX_SOURCE_COMPILES("int main(void) { return __builtin_expect(0, 1); }"
-        HAVE_BUILTIN_EXPECT)
-
-CHECK_CXX_SOURCE_COMPILES("int main(void) { return __builtin_ctzll(0); }"
-        HAVE_BUILTIN_CTZ)
-
-FIND_PACKAGE(GTest QUIET)
-IF(GTEST_FOUND)
-    SET(HAVE_GTEST 1)
-ENDIF()
-
-FIND_PACKAGE(Gflags QUIET)
-IF(GFLAGS_FOUND)
-    SET(HAVE_GFLAGS 1)
-ENDIF()
-
-CONFIGURE_FILE(${Snappy_SOURCE_DIR}/cmake/config.h.in config.h)
-
-# Configure snappy-stubs-public.h.in
-SET(ac_cv_have_stdint_h ${HAVE_STDINT_H})
-SET(ac_cv_have_stddef_h ${HAVE_STDDEF_H})
-SET(ac_cv_have_sys_uio_h ${HAVE_SYS_UIO_H})
-CONFIGURE_FILE(${Snappy_SOURCE_DIR}/snappy-stubs-public.h.in
-               snappy-stubs-public.h)
-
-IF (WIN32)
-    ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS)
-ENDIF (WIN32)
-
-# Define the main library.
-ADD_LIBRARY(snappy SHARED
-        snappy-c.cc
-        snappy-c.h
-        snappy-sinksource.cc
-        snappy-sinksource.h
-        snappy-stubs-internal.cc
-        snappy-stubs-public.h
-        snappy.cc
-        snappy.h)
-
-TARGET_COMPILE_DEFINITIONS(snappy PRIVATE -DHAVE_CONFIG_H)
-
-SET_TARGET_PROPERTIES(snappy PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
-
-INSTALL(FILES snappy.h
-        snappy-c.h
-        snappy-sinksource.h
-        ${Snappy_BINARY_DIR}/snappy-stubs-public.h
-        DESTINATION include)
-
-INSTALL(TARGETS snappy
-        EXPORT SnappyTargets
-        RUNTIME DESTINATION bin
-        LIBRARY DESTINATION lib
-        ARCHIVE DESTINATION lib)
-INSTALL(EXPORT SnappyTargets NAMESPACE Snappy:: DESTINATION lib/cmake/Snappy)
-
-SET_TARGET_PROPERTIES(snappy PROPERTIES VERSION ${PROJECT_VERSION}
-                      SOVERSION ${PROJECT_VERSION_MAJOR})
-
-SET(INCLUDE_INSTALL_DIR include)
-SET(LIBRARY_INSTALL_DIR lib)
-SET(BINARY_INSTALL_DIR bin)
-
-CONFIGURE_PACKAGE_CONFIG_FILE(cmake/SnappyConfig.cmake.in
-        ${Snappy_BINARY_DIR}/SnappyConfig.cmake
-        INSTALL_DESTINATION lib/Snappy/cmake
-        PATH_VARS INCLUDE_INSTALL_DIR LIBRARY_INSTALL_DIR BINARY_INSTALL_DIR
-        )
-
-WRITE_BASIC_PACKAGE_VERSION_FILE(${Snappy_BINARY_DIR}/SnappyConfigVersion.cmake
-        COMPATIBILITY SameMajorVersion)
-INSTALL(FILES ${Snappy_BINARY_DIR}/SnappyConfig.cmake
-        ${Snappy_BINARY_DIR}/SnappyConfigVersion.cmake
-        DESTINATION lib/cmake)
-
-ENABLE_TESTING()
-
-IF (HAVE_LIBZ)
-    LIST(APPEND COMPRESSION_LIBS z)
-ENDIF (HAVE_LIBZ)
-
-IF (HAVE_LIBLZO2)
-    LIST(APPEND COMPRESSION_LIBS lzo2)
-ENDIF (HAVE_LIBLZO2)
-
-IF (HAVE_LIBLZF)
-    LIST(APPEND COMPRESSION_LIBS lzf)
-ENDIF (HAVE_LIBLZF)
-
-IF (HAVE_LIBQUICKLZ)
-    LIST(APPEND COMPRESSION_LIBS quicklz)
-ENDIF (HAVE_LIBQUICKLZ)
-
-ADD_EXECUTABLE(snappy-unittest snappy_unittest.cc snappy-test.cc)
-TARGET_COMPILE_DEFINITIONS(snappy-unittest PRIVATE -DHAVE_CONFIG_H)
-TARGET_LINK_LIBRARIES(snappy-unittest snappy ${COMPRESSION_LIBS}
-                      ${GFLAGS_LIBRARIES})
-TARGET_INCLUDE_DIRECTORIES(snappy-unittest BEFORE PRIVATE ${Snappy_SOURCE_DIR}
-                           ${GTEST_INCLUDE_DIRS} ${GFLAGS_INCLUDE_DIRS})
-
-ADD_TEST(NAME snappy-unittest
-         WORKING_DIRECTORY ${Snappy_SOURCE_DIR}
-         COMMAND ${Snappy_BINARY_DIR}/snappy-unittest)
+# Copyright 2019 Google Inc. All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.10)
+project(Snappy VERSION 1.2.2 LANGUAGES C CXX)
+
+# C++ standard can be overridden when this is used as a sub-project.
+if(NOT CMAKE_CXX_STANDARD)
+  # This project requires C++11.
+  set(CMAKE_CXX_STANDARD 11)
+  set(CMAKE_CXX_STANDARD_REQUIRED ON)
+  set(CMAKE_CXX_EXTENSIONS OFF)
+endif(NOT CMAKE_CXX_STANDARD)
+
+# https://github.com/izenecloud/cmake/blob/master/SetCompilerWarningAll.cmake
+if(MSVC)
+  # Use the highest warning level for Visual Studio.
+  set(CMAKE_CXX_WARNING_LEVEL 4)
+  if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
+    string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  else(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
+  endif(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
+
+  # Disable C++ exceptions.
+  string(REGEX REPLACE "/EH[a-z]+" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHs-c-")
+  add_definitions(-D_HAS_EXCEPTIONS=0)
+
+  # Disable RTTI.
+  string(REGEX REPLACE "/GR" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR-")
+else(MSVC)
+  # Use -Wall for clang and gcc.
+  if(NOT CMAKE_CXX_FLAGS MATCHES "-Wall")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
+  endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wall")
+
+  # Use -Wextra for clang and gcc.
+  if(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
+  endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra")
+
+  # Use -Werror for clang only.
+  if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    if(NOT CMAKE_CXX_FLAGS MATCHES "-Werror")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
+    endif(NOT CMAKE_CXX_FLAGS MATCHES "-Werror")
+  endif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+
+  # Disable sign comparison warnings. Matches upcoming Bazel setup.
+  if(NOT CMAKE_CXX_FLAGS MATCHES "-Wno-sign-compare")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare")
+  endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wno-sign-compare")
+
+  # Disable C++ exceptions.
+  string(REGEX REPLACE "-fexceptions" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
+
+  # Disable RTTI.
+  string(REGEX REPLACE "-frtti" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
+endif(MSVC)
+
+# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make
+# it prominent in the GUI.
+option(BUILD_SHARED_LIBS "Build shared libraries(DLLs)." OFF)
+
+option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON)
+
+option(SNAPPY_BUILD_BENCHMARKS "Build Snappy's benchmarks" ON)
+
+option(SNAPPY_FUZZING_BUILD "Build Snappy for fuzzing." OFF)
+
+option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF)
+
+option(SNAPPY_REQUIRE_AVX2 "Target processors with AVX2 support." OFF)
+
+option(SNAPPY_INSTALL "Install Snappy's header and library" ON)
+
+include(TestBigEndian)
+test_big_endian(SNAPPY_IS_BIG_ENDIAN)
+
+include(CheckIncludeFile)
+check_include_file("sys/mman.h" HAVE_SYS_MMAN_H)
+check_include_file("sys/resource.h" HAVE_SYS_RESOURCE_H)
+check_include_file("sys/time.h" HAVE_SYS_TIME_H)
+check_include_file("sys/uio.h" HAVE_SYS_UIO_H)
+check_include_file("unistd.h" HAVE_UNISTD_H)
+check_include_file("windows.h" HAVE_WINDOWS_H)
+
+include(CheckLibraryExists)
+check_library_exists(z zlibVersion "" HAVE_LIBZ)
+check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2)
+check_library_exists(lz4 LZ4_compress_default "" HAVE_LIBLZ4)
+
+include(CheckCXXCompilerFlag)
+CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX)
+CHECK_CXX_COMPILER_FLAG("/arch:AVX2" HAVE_VISUAL_STUDIO_ARCH_AVX2)
+CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX)
+CHECK_CXX_COMPILER_FLAG("-mbmi2" HAVE_CLANG_MBMI2)
+if(SNAPPY_REQUIRE_AVX2)
+  if(HAVE_VISUAL_STUDIO_ARCH_AVX2)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
+  endif(HAVE_VISUAL_STUDIO_ARCH_AVX2)
+  if(HAVE_CLANG_MAVX)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
+  endif(HAVE_CLANG_MAVX)
+  if(HAVE_CLANG_MBMI2)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2")
+  endif(HAVE_CLANG_MBMI2)
+elseif (SNAPPY_REQUIRE_AVX)
+  if(HAVE_VISUAL_STUDIO_ARCH_AVX)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX")
+  endif(HAVE_VISUAL_STUDIO_ARCH_AVX)
+  if(HAVE_CLANG_MAVX)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
+  endif(HAVE_CLANG_MAVX)
+endif(SNAPPY_REQUIRE_AVX2)
+
+# Used by googletest.
+check_cxx_compiler_flag(-Wno-missing-field-initializers
+                        SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS)
+check_cxx_compiler_flag(-Wno-implicit-int-float-conversion
+                        SNAPPY_HAVE_NO_IMPLICIT_INT_FLOAT_CONVERSION)
+
+include(CheckCXXSourceCompiles)
+check_cxx_source_compiles("
+int main() {
+  return __builtin_expect(0, 1);
+}" HAVE_BUILTIN_EXPECT)
+
+check_cxx_source_compiles("
+int main() {
+  return __builtin_ctzll(0);
+}" HAVE_BUILTIN_CTZ)
+
+check_cxx_source_compiles("
+int main() {
+  __builtin_prefetch(0, 0, 3);
+  return 0;
+}" HAVE_BUILTIN_PREFETCH)
+
+check_cxx_source_compiles("
+__attribute__((always_inline)) int zero() { return 0; }
+
+int main() {
+  return zero();
+}" HAVE_ATTRIBUTE_ALWAYS_INLINE)
+
+check_cxx_source_compiles("
+#include <tmmintrin.h>
+
+int main() {
+  const __m128i *src = 0;
+  __m128i dest;
+  const __m128i shuffle_mask = _mm_load_si128(src);
+  const __m128i pattern = _mm_shuffle_epi8(_mm_loadl_epi64(src), shuffle_mask);
+  _mm_storeu_si128(&dest, pattern);
+  return 0;
+}" SNAPPY_HAVE_SSSE3)
+
+check_cxx_source_compiles("
+#include <immintrin.h>
+int main() {
+  return _mm_crc32_u32(0, 1);
+}" SNAPPY_HAVE_X86_CRC32)
+
+check_cxx_source_compiles("
+#include <arm_neon.h>
+#include <arm_acle.h>
+int main() {
+  return __crc32cw(0, 1);
+}" SNAPPY_HAVE_NEON_CRC32)
+
+check_cxx_source_compiles("
+#include <immintrin.h>
+int main() {
+  return _bzhi_u32(0, 1);
+}" SNAPPY_HAVE_BMI2)
+
+check_cxx_source_compiles("
+#include <arm_neon.h>
+#include <stdint.h>
+int main() {
+  uint8_t val = 3, dup[8];
+  uint8x16_t v1 = vld1q_dup_u8(&val);
+  uint8x16_t v2 = vqtbl1q_u8(v1, v1);
+  vst1q_u8(dup, v1);
+  vst1q_u8(dup, v2);
+  return 0;
+}" SNAPPY_HAVE_NEON)
+
+#check RVV 1.0 need __riscv_ prefix
+check_cxx_source_compiles("
+  #include <riscv_vector.h>
+  #include <stdint.h>
+  #include <stddef.h>
+  int main() {
+    uint8_t val = 3, dup[8];
+    size_t vl = __riscv_vsetvl_e8m1(8);
+    vuint8m1_t v = __riscv_vmv_v_x_u8m1(val, vl);
+    return 0;
+  }" SNAPPY_RVV_1)
+
+
+#check RVV 0.7.1 not  __riscv_ prefix
+check_cxx_source_compiles("
+  #include <riscv_vector.h>
+  #include <stdint.h>
+  #include <stddef.h>
+  int main() {
+    uint8_t val = 3, dup[8];
+    size_t vl = vsetvl_e8m1(8);
+    vuint8m1_t v = vmv_v_x_u8m1(val, vl);
+    return 0;
+  }" SNAPPY_RVV_0_7)
+
+include(CheckSymbolExists)
+check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP)
+check_symbol_exists("sysconf" "unistd.h" HAVE_FUNC_SYSCONF)
+
+configure_file(
+  "cmake/config.h.in"
+  "${PROJECT_BINARY_DIR}/config.h"
+)
+
+# We don't want to define HAVE_ macros in public headers. Instead, we use
+# CMake's variable substitution with 0/1 variables, which will be seen by the
+# preprocessor as constants.
+set(HAVE_SYS_UIO_H_01 ${HAVE_SYS_UIO_H})
+if(NOT HAVE_SYS_UIO_H_01)
+  set(HAVE_SYS_UIO_H_01 0)
+endif(NOT HAVE_SYS_UIO_H_01)
+
+if (SNAPPY_FUZZING_BUILD)
+  if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+    message(WARNING "Fuzzing builds are only supported with Clang")
+  endif (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+
+  if(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=address")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
+  endif(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=address")
+
+  if(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=fuzzer-no-link")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer-no-link")
+  endif(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=fuzzer-no-link")
+endif (SNAPPY_FUZZING_BUILD)
+
+configure_file(
+  "snappy-stubs-public.h.in"
+  "${PROJECT_BINARY_DIR}/snappy-stubs-public.h")
+
+add_library(snappy "")
+target_sources(snappy
+  PRIVATE
+    "snappy-internal.h"
+    "snappy-stubs-internal.h"
+    "snappy-c.cc"
+    "snappy-sinksource.cc"
+    "snappy-stubs-internal.cc"
+    "snappy.cc"
+    "${PROJECT_BINARY_DIR}/config.h"
+  PUBLIC
+    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/snappy-c.h>
+    $<INSTALL_INTERFACE:include/snappy-c.h>
+    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/snappy-sinksource.h>
+    $<INSTALL_INTERFACE:include/snappy-sinksource.h>
+    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/snappy.h>
+    $<INSTALL_INTERFACE:include/snappy.h>
+    $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/snappy-stubs-public.h>
+    $<INSTALL_INTERFACE:include/snappy-stubs-public.h>
+)
+target_include_directories(snappy
+  PUBLIC
+    $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}>
+    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>
+    $<INSTALL_INTERFACE:include>
+)
+set_target_properties(snappy
+  PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
+
+target_compile_definitions(snappy PRIVATE -DHAVE_CONFIG_H)
+if(BUILD_SHARED_LIBS)
+  set_target_properties(snappy PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
+endif(BUILD_SHARED_LIBS)
+
+if(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS)
+  add_library(snappy_test_support "")
+  target_sources(snappy_test_support
+    PRIVATE
+      "snappy-test.cc"
+      "snappy-test.h"
+      "snappy_test_data.cc"
+      "snappy_test_data.h"
+      "${PROJECT_BINARY_DIR}/config.h"
+  )
+
+  # Test files include snappy-test.h, HAVE_CONFIG_H must be defined.
+  target_compile_definitions(snappy_test_support PUBLIC -DHAVE_CONFIG_H)
+  if(BUILD_SHARED_LIBS)
+    set_target_properties(snappy_test_support PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
+  endif(BUILD_SHARED_LIBS)
+
+  target_link_libraries(snappy_test_support snappy)
+
+  if(HAVE_LIBZ)
+    target_link_libraries(snappy_test_support z)
+  endif(HAVE_LIBZ)
+  if(HAVE_LIBLZO2)
+    target_link_libraries(snappy_test_support lzo2)
+  endif(HAVE_LIBLZO2)
+  if(HAVE_LIBLZ4)
+    target_link_libraries(snappy_test_support lz4)
+  endif(HAVE_LIBLZ4)
+
+  target_include_directories(snappy_test_support
+    BEFORE PUBLIC
+      "${PROJECT_SOURCE_DIR}"
+  )
+endif(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS)
+
+if(SNAPPY_BUILD_TESTS)
+  enable_testing()
+
+  # Prevent overriding the parent project's compiler/linker settings on Windows.
+  set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+  set(install_gtest OFF)
+  set(install_gmock OFF)
+  set(build_gmock ON)
+
+  # This project is tested using GoogleTest.
+  add_subdirectory("third_party/googletest")
+
+  # GoogleTest triggers a missing field initializers warning.
+  if(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS)
+    set_property(TARGET gtest
+        APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers)
+    set_property(TARGET gmock
+        APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers)
+  endif(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS)
+
+  if(SNAPPY_HAVE_NO_IMPLICIT_INT_FLOAT_CONVERSION)
+    set_property(TARGET gtest
+        APPEND PROPERTY COMPILE_OPTIONS -Wno-implicit-int-float-conversion)
+  endif(SNAPPY_HAVE_NO_IMPLICIT_INT_FLOAT_CONVERSION)
+
+  add_executable(snappy_unittest "")
+  target_sources(snappy_unittest
+    PRIVATE
+      "snappy_unittest.cc"
+  )
+  target_link_libraries(snappy_unittest snappy_test_support gmock_main gtest)
+
+  add_test(
+    NAME snappy_unittest
+    WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
+    COMMAND "${PROJECT_BINARY_DIR}/snappy_unittest")
+
+  add_executable(snappy_test_tool "")
+  target_sources(snappy_test_tool
+    PRIVATE
+      "snappy_test_tool.cc"
+  )
+  target_link_libraries(snappy_test_tool snappy_test_support)
+endif(SNAPPY_BUILD_TESTS)
+
+if(SNAPPY_BUILD_BENCHMARKS)
+  add_executable(snappy_benchmark "")
+  target_sources(snappy_benchmark
+    PRIVATE
+      "snappy_benchmark.cc"
+  )
+  target_link_libraries(snappy_benchmark snappy_test_support benchmark_main)
+
+  # This project uses Google benchmark for benchmarking.
+  set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
+  set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "" FORCE)
+  add_subdirectory("third_party/benchmark")
+endif(SNAPPY_BUILD_BENCHMARKS)
+
+if(SNAPPY_FUZZING_BUILD)
+  add_executable(snappy_compress_fuzzer "")
+  target_sources(snappy_compress_fuzzer
+    PRIVATE "snappy_compress_fuzzer.cc"
+  )
+  target_link_libraries(snappy_compress_fuzzer snappy)
+  set_target_properties(snappy_compress_fuzzer
+    PROPERTIES LINK_FLAGS "-fsanitize=fuzzer"
+  )
+
+  add_executable(snappy_uncompress_fuzzer "")
+  target_sources(snappy_uncompress_fuzzer
+    PRIVATE "snappy_uncompress_fuzzer.cc"
+  )
+  target_link_libraries(snappy_uncompress_fuzzer snappy)
+  set_target_properties(snappy_uncompress_fuzzer
+    PROPERTIES LINK_FLAGS "-fsanitize=fuzzer"
+  )
+endif(SNAPPY_FUZZING_BUILD)
+
+# Must be included before CMAKE_INSTALL_INCLUDEDIR is used.
+include(GNUInstallDirs)
+
+if(SNAPPY_INSTALL)
+  install(TARGETS snappy
+    EXPORT SnappyTargets
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  )
+  install(
+    FILES
+      "snappy-c.h"
+      "snappy-sinksource.h"
+      "snappy.h"
+      "${PROJECT_BINARY_DIR}/snappy-stubs-public.h"
+    DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
+  )
+
+  include(CMakePackageConfigHelpers)
+  configure_package_config_file(
+    "cmake/${PROJECT_NAME}Config.cmake.in"
+    "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake"
+    INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
+  )
+  write_basic_package_version_file(
+    "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake"
+    COMPATIBILITY SameMajorVersion
+  )
+  install(
+    EXPORT SnappyTargets
+    NAMESPACE Snappy::
+    DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
+  )
+  install(
+    FILES
+      "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake"
+      "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake"
+    DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}"
+  )
+endif(SNAPPY_INSTALL)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..66a60d5
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,31 @@
+# How to Contribute
+
+We'd love to accept your patches and contributions to this project. There are
+just a few small guidelines you need to follow.
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Code Reviews
+
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+
+See [the README](README.md#contributing-to-the-snappy-project) for areas
+where we are likely to accept external contributions.
+
+## Community Guidelines
+
+This project follows [Google's Open Source Community
+Guidelines](https://opensource.google/conduct/).
diff --git a/ChangeLog b/ChangeLog
deleted file mode 100644
index 1478db5..0000000
--- a/ChangeLog
+++ /dev/null
@@ -1,2468 +0,0 @@
-commit eb66d8176b3d1f560ee012e1b488cb1540c45f88
-Author: Steinar H. Gunderson <sesse@google.com>
-Date:   Mon Jun 22 16:10:47 2015 +0200
-
-    Initialized members of SnappyArrayWriter and SnappyDecompressionValidator.
-    These members were almost surely initialized before use by other member
-    functions, but Coverity was warning about this. Eliminating these warnings
-    minimizes clutter in that report and the likelihood of overlooking a real bug.
-    
-    A=cmumford
-    R=jeff
-
-commit b2312c4c25883ab03b5110f1b006dce95f419a4f
-Author: Steinar H. Gunderson <sesse@google.com>
-Date:   Mon Jun 22 16:03:28 2015 +0200
-
-    Add support for Uncompress(source, sink). Various changes to allow
-    Uncompress(source, sink) to get the same performance as the different
-    variants of Uncompress to Cord/DataBuffer/String/FlatBuffer.
-    
-    Changes to efficiently support Uncompress(source, sink)
-    --------
-    
-    a) For strings - we add support to StringByteSink to do GetAppendBuffer so we
-       can write to it without copying.
-    b) For flat array buffers, we do GetAppendBuffer and see if we can get a full buffer.
-    
-    With the above changes we get performance with ByteSource/ByteSink
-    that is	very close to directly using flat arrays and strings.
-    
-    We add various benchmark cases to demonstrate that.
-    
-    Orthogonal change
-    ------------------
-    
-    Add support for TryFastAppend() for SnappyScatteredWriter.
-    
-    Benchmark results are below
-    
-    CPU: Intel Core2 dL1:32KB dL2:4096KB
-    Benchmark              Time(ns)    CPU(ns) Iterations
-    -----------------------------------------------------
-    BM_UFlat/0               109065     108996       6410 896.0MB/s  html
-    BM_UFlat/1              1012175    1012343        691 661.4MB/s  urls
-    BM_UFlat/2                26775      26771      26149 4.4GB/s  jpg
-    BM_UFlat/3                48947      48940      14363 1.8GB/s  pdf
-    BM_UFlat/4               441029     440835       1589 886.1MB/s  html4
-    BM_UFlat/5                39861      39880      17823 588.3MB/s  cp
-    BM_UFlat/6                18315      18300      38126 581.1MB/s  c
-    BM_UFlat/7                 5254       5254     100000 675.4MB/s  lsp
-    BM_UFlat/8              1568060    1567376        447 626.6MB/s  xls
-    BM_UFlat/9               337512     337734       2073 429.5MB/s  txt1
-    BM_UFlat/10              287269     287054       2434 415.9MB/s  txt2
-    BM_UFlat/11              890098     890219        787 457.2MB/s  txt3
-    BM_UFlat/12             1186593    1186863        590 387.2MB/s  txt4
-    BM_UFlat/13              573927     573318       1000 853.7MB/s  bin
-    BM_UFlat/14               64250      64294      10000 567.2MB/s  sum
-    BM_UFlat/15                7301       7300      96153 552.2MB/s  man
-    BM_UFlat/16              109617     109636       6375 1031.5MB/s  pb
-    BM_UFlat/17              364438     364497       1921 482.3MB/s  gaviota
-    BM_UFlatSink/0           108518     108465       6450 900.4MB/s  html
-    BM_UFlatSink/1           991952     991997        705 675.0MB/s  urls
-    BM_UFlatSink/2            26815      26798      26065 4.4GB/s  jpg
-    BM_UFlatSink/3            49127      49122      14255 1.8GB/s  pdf
-    BM_UFlatSink/4           436674     436731       1604 894.4MB/s  html4
-    BM_UFlatSink/5            39738      39733      17345 590.5MB/s  cp
-    BM_UFlatSink/6            18413      18416      37962 577.4MB/s  c
-    BM_UFlatSink/7             5677       5676     100000 625.2MB/s  lsp
-    BM_UFlatSink/8          1552175    1551026        451 633.2MB/s  xls
-    BM_UFlatSink/9           338526     338489       2065 428.5MB/s  txt1
-    BM_UFlatSink/10          289387     289307       2420 412.6MB/s  txt2
-    BM_UFlatSink/11          893803     893706        783 455.4MB/s  txt3
-    BM_UFlatSink/12         1195919    1195459        586 384.4MB/s  txt4
-    BM_UFlatSink/13          559637     559779       1000 874.3MB/s  bin
-    BM_UFlatSink/14           65073      65094      10000 560.2MB/s  sum
-    BM_UFlatSink/15            7618       7614      92823 529.5MB/s  man
-    BM_UFlatSink/16          110085     110121       6352 1027.0MB/s  pb
-    BM_UFlatSink/17          369196     368915       1896 476.5MB/s  gaviota
-    BM_UValidate/0            46954      46957      14899 2.0GB/s  html
-    BM_UValidate/1           500621     500868       1000 1.3GB/s  urls
-    BM_UValidate/2              283        283    2481447 417.2GB/s  jpg
-    BM_UValidate/3            16230      16228      43137 5.4GB/s  pdf
-    BM_UValidate/4           189129     189193       3701 2.0GB/s  html4
-    
-    A=uday
-    R=sanjay
-
-commit b2ad96006741d40935db2f73194a3e489b467338
-Author: Steinar H. Gunderson <sesse@google.com>
-Date:   Mon Jun 22 15:48:29 2015 +0200
-
-    Changes to eliminate compiler warnings on MSVC
-    
-    This code was not compiling under Visual Studio 2013 with warnings being treated
-    as errors. Specifically:
-    
-    1. Changed int -> size_t to eliminate signed/unsigned mismatch warning.
-    2. Added some missing return values to functions.
-    3. Inserting character instead of integer literals into strings to avoid type
-       conversions.
-    
-    A=cmumford
-    R=jeff
-
-commit e7a897e187e90b33f87bd9e64872cf561de9ebca
-Author: Steinar H. Gunderson <sesse@google.com>
-Date:   Mon Jun 22 15:45:11 2015 +0200
-
-    Fixed unit tests to compile under MSVC.
-    
-    1. Including config.h in test.
-    2. Including windows.h before zippy-test.h.
-    3. Removed definition of WIN32_LEAN_AND_MEAN. This caused problems in
-       build environments that define WIN32_LEAN_AND_MEAN as our
-       definition didn't check for prior existence. This constant is old
-       and no longer needed anyhow.
-    4. Disable MSVC warning 4722 since ~LogMessageCrash() never returns.
-    
-    A=cmumford
-    R=jeff
-
-commit 86eb8b152bdb065ad11bf331a9f7d65b72616acf
-Author: Steinar H. Gunderson <sesse@google.com>
-Date:   Mon Jun 22 15:41:30 2015 +0200
-
-    Change a few branch annotations that profiling found to be wrong.
-    Overall performance is neutral or slightly positive.
-    
-    Westmere (64-bit, opt):
-    
-    Benchmark               Base (ns)  New (ns)                                Improvement
-    --------------------------------------------------------------------------------------
-    BM_UFlat/0                  73798     71464  1.3GB/s  html                    +3.3%
-    BM_UFlat/1                 715223    704318  953.5MB/s  urls                  +1.5%
-    BM_UFlat/2                   8137      8871  13.0GB/s  jpg                    -8.3%
-    BM_UFlat/3                    200       204  935.5MB/s  jpg_200               -2.0%
-    BM_UFlat/4                  21627     21281  4.5GB/s  pdf                     +1.6%
-    BM_UFlat/5                 302806    290350  1.3GB/s  html4                   +4.3%
-    BM_UFlat/6                 218920    219017  664.1MB/s  txt1                  -0.0%
-    BM_UFlat/7                 190437    191212  626.1MB/s  txt2                  -0.4%
-    BM_UFlat/8                 584192    580484  703.4MB/s  txt3                  +0.6%
-    BM_UFlat/9                 776537    779055  591.6MB/s  txt4                  -0.3%
-    BM_UFlat/10                 76056     72606  1.5GB/s  pb                      +4.8%
-    BM_UFlat/11                235962    239043  737.4MB/s  gaviota               -1.3%
-    BM_UFlat/12                 28049     28000  840.1MB/s  cp                    +0.2%
-    BM_UFlat/13                 12225     12021  886.9MB/s  c                     +1.7%
-    BM_UFlat/14                  3362      3544  1004.0MB/s  lsp                  -5.1%
-    BM_UFlat/15                937015    939206  1048.9MB/s  xls                  -0.2%
-    BM_UFlat/16                   236       233  823.1MB/s  xls_200               +1.3%
-    BM_UFlat/17                373170    361947  1.3GB/s  bin                     +3.1%
-    BM_UFlat/18                   264       264  725.5MB/s  bin_200               +0.0%
-    BM_UFlat/19                 42834     43577  839.2MB/s  sum                   -1.7%
-    BM_UFlat/20                  4770      4736  853.6MB/s  man                   +0.7%
-    BM_UValidate/0              39671     39944  2.4GB/s  html                    -0.7%
-    BM_UValidate/1             443391    443391  1.5GB/s  urls                    +0.0%
-    BM_UValidate/2                163       163  703.3GB/s  jpg                   +0.0%
-    BM_UValidate/3                113       112  1.7GB/s  jpg_200                 +0.9%
-    BM_UValidate/4               7555      7608  12.6GB/s  pdf                    -0.7%
-    BM_ZFlat/0                 157616    157568  621.5MB/s  html (22.31 %)        +0.0%
-    BM_ZFlat/1                1997290   2014486  333.4MB/s  urls (47.77 %)        -0.9%
-    BM_ZFlat/2                  23035     22237  5.2GB/s  jpg (99.95 %)           +3.6%
-    BM_ZFlat/3                    539       540  354.5MB/s  jpg_200 (73.00 %)     -0.2%
-    BM_ZFlat/4                  80709     81369  1.2GB/s  pdf (81.85 %)           -0.8%
-    BM_ZFlat/5                 639059    639220  613.0MB/s  html4 (22.51 %)       -0.0%
-    BM_ZFlat/6                 577203    583370  249.3MB/s  txt1 (57.87 %)        -1.1%
-    BM_ZFlat/7                 510887    516094  232.0MB/s  txt2 (61.93 %)        -1.0%
-    BM_ZFlat/8                1535843   1556973  262.2MB/s  txt3 (54.92 %)        -1.4%
-    BM_ZFlat/9                2070068   2102380  219.3MB/s  txt4 (66.22 %)        -1.5%
-    BM_ZFlat/10                152396    152148  745.5MB/s  pb (19.64 %)          +0.2%
-    BM_ZFlat/11                447367    445859  395.4MB/s  gaviota (37.72 %)     +0.3%
-    BM_ZFlat/12                 76375     76797  306.3MB/s  cp (48.12 %)          -0.5%
-    BM_ZFlat/13                 31518     31987  333.3MB/s  c (42.40 %)           -1.5%
-    BM_ZFlat/14                 10598     10827  328.6MB/s  lsp (48.37 %)         -2.1%
-    BM_ZFlat/15               1782243   1802728  546.5MB/s  xls (41.23 %)         -1.1%
-    BM_ZFlat/16                   526       539  355.0MB/s  xls_200 (78.00 %)     -2.4%
-    BM_ZFlat/17                598141    597311  822.1MB/s  bin (18.11 %)         +0.1%
-    BM_ZFlat/18                   121       120  1.6GB/s  bin_200 (7.50 %)        +0.8%
-    BM_ZFlat/19                109981    112173  326.0MB/s  sum (48.96 %)         -2.0%
-    BM_ZFlat/20                 14355     14575  277.4MB/s  man (59.36 %)         -1.5%
-    Sum of all benchmarks    33882722  33879325                                   +0.0%
-    
-    Sandy Bridge (64-bit, opt):
-    
-    Benchmark               Base (ns)  New (ns)                                Improvement
-    --------------------------------------------------------------------------------------
-    BM_UFlat/0                  43764     41600  2.3GB/s  html                    +5.2%
-    BM_UFlat/1                 517990    507058  1.3GB/s  urls                    +2.2%
-    BM_UFlat/2                   6625      5529  20.8GB/s  jpg                   +19.8%
-    BM_UFlat/3                    154       155  1.2GB/s  jpg_200                 -0.6%
-    BM_UFlat/4                  12795     11747  8.1GB/s  pdf                     +8.9%
-    BM_UFlat/5                 200335    193413  2.0GB/s  html4                   +3.6%
-    BM_UFlat/6                 156574    156426  929.2MB/s  txt1                  +0.1%
-    BM_UFlat/7                 137574    137464  870.4MB/s  txt2                  +0.1%
-    BM_UFlat/8                 422551    421603  967.4MB/s  txt3                  +0.2%
-    BM_UFlat/9                 577749    578985  795.6MB/s  txt4                  -0.2%
-    BM_UFlat/10                 42329     39362  2.8GB/s  pb                      +7.5%
-    BM_UFlat/11                170615    169751  1037.9MB/s  gaviota              +0.5%
-    BM_UFlat/12                 12800     12719  1.8GB/s  cp                      +0.6%
-    BM_UFlat/13                  6585      6579  1.6GB/s  c                       +0.1%
-    BM_UFlat/14                  2066      2044  1.7GB/s  lsp                     +1.1%
-    BM_UFlat/15                750861    746911  1.3GB/s  xls                     +0.5%
-    BM_UFlat/16                   188       192  996.0MB/s  xls_200               -2.1%
-    BM_UFlat/17                271622    264333  1.8GB/s  bin                     +2.8%
-    BM_UFlat/18                   208       207  923.6MB/s  bin_200               +0.5%
-    BM_UFlat/19                 24667     24845  1.4GB/s  sum                     -0.7%
-    BM_UFlat/20                  2663      2662  1.5GB/s  man                     +0.0%
-    BM_ZFlat/0                 115173    115624  846.5MB/s  html (22.31 %)        -0.4%
-    BM_ZFlat/1                1530331   1537769  436.5MB/s  urls (47.77 %)        -0.5%
-    BM_ZFlat/2                  17503     17013  6.8GB/s  jpg (99.95 %)           +2.9%
-    BM_ZFlat/3                    385       385  496.3MB/s  jpg_200 (73.00 %)     +0.0%
-    BM_ZFlat/4                  61753     61540  1.6GB/s  pdf (81.85 %)           +0.3%
-    BM_ZFlat/5                 484806    483356  810.1MB/s  html4 (22.51 %)       +0.3%
-    BM_ZFlat/6                 464143    467609  310.9MB/s  txt1 (57.87 %)        -0.7%
-    BM_ZFlat/7                 410315    413319  289.5MB/s  txt2 (61.93 %)        -0.7%
-    BM_ZFlat/8                1244082   1249381  326.5MB/s  txt3 (54.92 %)        -0.4%
-    BM_ZFlat/9                1696914   1709685  269.4MB/s  txt4 (66.22 %)        -0.7%
-    BM_ZFlat/10                104148    103372  1096.7MB/s  pb (19.64 %)         +0.8%
-    BM_ZFlat/11                363522    359722  489.8MB/s  gaviota (37.72 %)     +1.1%
-    BM_ZFlat/12                 47021     50095  469.3MB/s  cp (48.12 %)          -6.1%
-    BM_ZFlat/13                 16888     16985  627.4MB/s  c (42.40 %)           -0.6%
-    BM_ZFlat/14                  5496      5469  650.3MB/s  lsp (48.37 %)         +0.5%
-    BM_ZFlat/15               1460713   1448760  679.5MB/s  xls (41.23 %)         +0.8%
-    BM_ZFlat/16                   387       393  486.8MB/s  xls_200 (78.00 %)     -1.5%
-    BM_ZFlat/17                457654    451462  1086.6MB/s  bin (18.11 %)        +1.4%
-    BM_ZFlat/18                    97        87  2.1GB/s  bin_200 (7.50 %)       +11.5%
-    BM_ZFlat/19                 77904     80924  451.7MB/s  sum (48.96 %)         -3.7%
-    BM_ZFlat/20                  7648      7663  527.1MB/s  man (59.36 %)         -0.2%
-    Sum of all benchmarks    25493635  25482069                                   +0.0%
-    
-    A=dehao
-    R=sesse
-
-commit 11ccdfb868387e56d845766d89ddab9d489c4128
-Author: Steinar H. Gunderson <sesse@google.com>
-Date:   Mon Jun 22 16:07:58 2015 +0200
-
-    Sync with various Google-internal changes.
-    
-    Should not mean much for the open-source version.
-
-commit 22acaf438ed93ab21a2ff1919d173206798b996e
-Author: Steinar H. Gunderson <sesse@google.com>
-Date:   Mon Jun 22 15:39:08 2015 +0200
-
-    Change some internal path names.
-    
-    This is mostly to sync up with some changes from Google's internal
-    repositories; it does not affect the open-source distribution in itself.
-
-commit 1ff9be9b8fafc8528ca9e055646f5932aa5db9c4
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Feb 28 11:18:07 2014 +0000
-
-    Release Snappy 1.1.2.
-    
-    R=jeff
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@84 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 19690d78e83f8963f497585031efa3d9ca66b807
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Feb 19 10:31:49 2014 +0000
-
-    Fix public issue 82: Stop distributing benchmark data files that have
-    unclear or unsuitable licensing.
-    
-    In general, we replace the files we can with liberally licensed data,
-    and remove all the others (in particular all the parts of the Canterbury
-    corpus that are not clearly in the public domain). The replacements
-    do not always have the exact same characteristics as the original ones,
-    but they are more than good enough to be useful for benchmarking.
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@83 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit f82bff66afe0de4c9ae22f8c4ef84e3c2233e799
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Oct 25 13:31:27 2013 +0000
-
-    Add support for padding in the Snappy framed format.
-    
-    This is specifically motivated by DICOM's demands that embedded data
-    must be of an even number of bytes, but could in principle be used for
-    any sort of padding/alignment needed.
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@82 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit eeead8dc38ea359f027fb6e89f345448e8e9d723
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue Oct 15 15:21:31 2013 +0000
-
-    Release Snappy 1.1.1.
-    
-    R=jeff
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@81 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 6bc39e24c76adbbff26ae629fafbf7dfc795f554
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue Aug 13 12:55:00 2013 +0000
-
-    Add autoconf tests for size_t and ssize_t. Sort-of resolves public issue 79;
-    it would solve the problem if MSVC typically used autoconf. However, it gives
-    a natural place (config.h) to put the typedef even for MSVC.
-    
-    R=jsbell
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@80 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 7c3c01df77e191ad1f8377448961fe88db2802e9
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Mon Jul 29 11:06:44 2013 +0000
-
-    When we compare the number of bytes produced with the offset for a
-    backreference, make the signedness of the bytes produced clear,
-    by sticking it into a size_t. This avoids a signed/unsigned compare
-    warning from MSVC (public issue 71), and also is slightly clearer.
-    
-    Since the line is now so long the explanatory comment about the -1u
-    trick has to go somewhere else anyway, I used the opportunity to
-    explain it in slightly more detail.
-    
-    This is a purely stylistic change; the emitted assembler from GCC
-    is identical.
-    
-    R=jeff
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@79 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 2f0aaf8631d8fb2475ca1a6687c181efb14ed286
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Sun Jun 30 19:24:03 2013 +0000
-
-    In the fast path for decompressing literals, instead of checking
-    whether there's 16 bytes free and then checking right afterwards
-    (when having subtracted the literal size) that there are now
-    5 bytes free, just check once for 21 bytes. This skips a compare
-    and a branch; although it is easily predictable, it is still
-    a few cycles on a fast path that we would like to get rid of.
-    
-    Benchmarking this yields very confusing results. On open-source
-    GCC 4.8.1 on Haswell, we get exactly the expected results; the
-    benchmarks where we hit the fast path for literals (in particular
-    the two HTML benchmarks and the protobuf benchmark) give very nice
-    speedups, and the others are not really affected.
-    
-    However, benchmarks with Google's GCC branch on other hardware
-    is much less clear. It seems that we have a weak loss in some cases
-    (and the win for the “typical” win cases are not nearly as clear),
-    but that it depends on microarchitecture and plain luck in how we run
-    the benchmark. Looking at the generated assembler, it seems that
-    the removal of the if causes other large-scale changes in how the
-    function is laid out, which makes it likely that this is just bad luck.
-    
-    Thus, we should keep this change, even though its exact current impact is
-    unclear; it's a sensible change per se, and dropping it on the basis of
-    microoptimization for a given compiler (or even branch of a compiler)
-    would seem like a bad strategy in the long run.
-    
-    Microbenchmark results (all in 64-bit, opt mode):
-    
-      Nehalem, Google GCC:
-    
-      Benchmark                Base (ns)  New (ns)                       Improvement
-      ------------------------------------------------------------------------------
-      BM_UFlat/0                   76747     75591  1.3GB/s  html           +1.5%
-      BM_UFlat/1                  765756    757040  886.3MB/s  urls         +1.2%
-      BM_UFlat/2                   10867     10893  10.9GB/s  jpg           -0.2%
-      BM_UFlat/3                     124       131  1.4GB/s  jpg_200        -5.3%
-      BM_UFlat/4                   31663     31596  2.8GB/s  pdf            +0.2%
-      BM_UFlat/5                  314162    308176  1.2GB/s  html4          +1.9%
-      BM_UFlat/6                   29668     29746  790.6MB/s  cp           -0.3%
-      BM_UFlat/7                   12958     13386  796.4MB/s  c            -3.2%
-      BM_UFlat/8                    3596      3682  966.0MB/s  lsp          -2.3%
-      BM_UFlat/9                 1019193   1033493  953.3MB/s  xls          -1.4%
-      BM_UFlat/10                    239       247  775.3MB/s  xls_200      -3.2%
-      BM_UFlat/11                 236411    240271  606.9MB/s  txt1         -1.6%
-      BM_UFlat/12                 206639    209768  571.2MB/s  txt2         -1.5%
-      BM_UFlat/13                 627803    635722  641.4MB/s  txt3         -1.2%
-      BM_UFlat/14                 845932    857816  538.2MB/s  txt4         -1.4%
-      BM_UFlat/15                 402107    391670  1.2GB/s  bin            +2.7%
-      BM_UFlat/16                    283       279  683.6MB/s  bin_200      +1.4%
-      BM_UFlat/17                  46070     46815  781.5MB/s  sum          -1.6%
-      BM_UFlat/18                   5053      5163  782.0MB/s  man          -2.1%
-      BM_UFlat/19                  79721     76581  1.4GB/s  pb             +4.1%
-      BM_UFlat/20                 251158    252330  697.5MB/s  gaviota      -0.5%
-      Sum of all benchmarks      4966150   4980396                          -0.3%
-    
-    
-      Sandy Bridge, Google GCC:
-    
-      Benchmark                Base (ns)  New (ns)                       Improvement
-      ------------------------------------------------------------------------------
-      BM_UFlat/0                   42850     42182  2.3GB/s  html           +1.6%
-      BM_UFlat/1                  525660    515816  1.3GB/s  urls           +1.9%
-      BM_UFlat/2                    7173      7283  16.3GB/s  jpg           -1.5%
-      BM_UFlat/3                      92        91  2.1GB/s  jpg_200        +1.1%
-      BM_UFlat/4                   15147     14872  5.9GB/s  pdf            +1.8%
-      BM_UFlat/5                  199936    192116  2.0GB/s  html4          +4.1%
-      BM_UFlat/6                   12796     12443  1.8GB/s  cp             +2.8%
-      BM_UFlat/7                    6588      6400  1.6GB/s  c              +2.9%
-      BM_UFlat/8                    2010      1951  1.8GB/s  lsp            +3.0%
-      BM_UFlat/9                  761124    763049  1.3GB/s  xls            -0.3%
-      BM_UFlat/10                    186       189  1016.1MB/s  xls_200     -1.6%
-      BM_UFlat/11                 159354    158460  918.6MB/s  txt1         +0.6%
-      BM_UFlat/12                 139732    139950  856.1MB/s  txt2         -0.2%
-      BM_UFlat/13                 429917    425027  961.7MB/s  txt3         +1.2%
-      BM_UFlat/14                 585255    587324  785.8MB/s  txt4         -0.4%
-      BM_UFlat/15                 276186    266173  1.8GB/s  bin            +3.8%
-      BM_UFlat/16                    205       207  925.5MB/s  bin_200      -1.0%
-      BM_UFlat/17                  24925     24935  1.4GB/s  sum            -0.0%
-      BM_UFlat/18                   2632      2576  1.5GB/s  man            +2.2%
-      BM_UFlat/19                  40546     39108  2.8GB/s  pb             +3.7%
-      BM_UFlat/20                 175803    168209  1048.9MB/s  gaviota     +4.5%
-      Sum of all benchmarks      3408117   3368361                          +1.2%
-    
-    
-      Haswell, upstream GCC 4.8.1:
-    
-      Benchmark                Base (ns)  New (ns)                       Improvement
-      ------------------------------------------------------------------------------
-      BM_UFlat/0                   46308     40641  2.3GB/s  html          +13.9%
-      BM_UFlat/1                  513385    514706  1.3GB/s  urls           -0.3%
-      BM_UFlat/2                    6197      6151  19.2GB/s  jpg           +0.7%
-      BM_UFlat/3                      61        61  3.0GB/s  jpg_200        +0.0%
-      BM_UFlat/4                   13551     13429  6.5GB/s  pdf            +0.9%
-      BM_UFlat/5                  198317    190243  2.0GB/s  html4          +4.2%
-      BM_UFlat/6                   14768     12560  1.8GB/s  cp            +17.6%
-      BM_UFlat/7                    6453      6447  1.6GB/s  c              +0.1%
-      BM_UFlat/8                    1991      1980  1.8GB/s  lsp            +0.6%
-      BM_UFlat/9                  766947    770424  1.2GB/s  xls            -0.5%
-      BM_UFlat/10                    170       169  1.1GB/s  xls_200        +0.6%
-      BM_UFlat/11                 164350    163554  888.7MB/s  txt1         +0.5%
-      BM_UFlat/12                 145444    143830  832.1MB/s  txt2         +1.1%
-      BM_UFlat/13                 437849    438413  929.2MB/s  txt3         -0.1%
-      BM_UFlat/14                 603587    605309  759.8MB/s  txt4         -0.3%
-      BM_UFlat/15                 249799    248067  1.9GB/s  bin            +0.7%
-      BM_UFlat/16                    191       188  1011.4MB/s  bin_200     +1.6%
-      BM_UFlat/17                  26064     24778  1.4GB/s  sum            +5.2%
-      BM_UFlat/18                   2620      2601  1.5GB/s  man            +0.7%
-      BM_UFlat/19                  44551     37373  3.0GB/s  pb            +19.2%
-      BM_UFlat/20                 165408    164584  1.0GB/s  gaviota        +0.5%
-      Sum of all benchmarks      3408011   3385508                          +0.7%
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@78 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 062bf544a61107db730b6d08cb0b159c4dd9b24c
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Jun 14 21:42:26 2013 +0000
-
-    Make the two IncrementalCopy* functions take in an ssize_t instead of a len,
-    in order to avoid having to do 32-to-64-bit signed conversions on a hot path
-    during decompression. (Also fixes some MSVC warnings, mentioned in public
-    issue 75, but more of those remain.) They cannot be size_t because we expect
-    them to go negative and test for that.
-    
-    This saves a few movzwl instructions, yielding ~2% speedup in decompression.
-    
-    
-    Sandy Bridge:
-    
-    Benchmark                          Base (ns)  New (ns)                                Improvement
-    -------------------------------------------------------------------------------------------------
-    BM_UFlat/0                             48009     41283  2.3GB/s  html                   +16.3%
-    BM_UFlat/1                            531274    513419  1.3GB/s  urls                    +3.5%
-    BM_UFlat/2                              7378      7062  16.8GB/s  jpg                    +4.5%
-    BM_UFlat/3                                92        92  2.0GB/s  jpg_200                 +0.0%
-    BM_UFlat/4                             15057     14974  5.9GB/s  pdf                     +0.6%
-    BM_UFlat/5                            204323    193140  2.0GB/s  html4                   +5.8%
-    BM_UFlat/6                             13282     12611  1.8GB/s  cp                      +5.3%
-    BM_UFlat/7                              6511      6504  1.6GB/s  c                       +0.1%
-    BM_UFlat/8                              2014      2030  1.7GB/s  lsp                     -0.8%
-    BM_UFlat/9                            775909    768336  1.3GB/s  xls                     +1.0%
-    BM_UFlat/10                              182       184  1043.2MB/s  xls_200              -1.1%
-    BM_UFlat/11                           167352    161630  901.2MB/s  txt1                  +3.5%
-    BM_UFlat/12                           147393    142246  842.8MB/s  txt2                  +3.6%
-    BM_UFlat/13                           449960    432853  944.4MB/s  txt3                  +4.0%
-    BM_UFlat/14                           620497    594845  775.9MB/s  txt4                  +4.3%
-    BM_UFlat/15                           265610    267356  1.8GB/s  bin                     -0.7%
-    BM_UFlat/16                              206       205  932.7MB/s  bin_200               +0.5%
-    BM_UFlat/17                            25561     24730  1.4GB/s  sum                     +3.4%
-    BM_UFlat/18                             2620      2644  1.5GB/s  man                     -0.9%
-    BM_UFlat/19                            45766     38589  2.9GB/s  pb                     +18.6%
-    BM_UFlat/20                           171107    169832  1039.5MB/s  gaviota              +0.8%
-    Sum of all benchmarks                3500103   3394565                                   +3.1%
-    
-    
-    Westmere:
-    
-    Benchmark                          Base (ns)  New (ns)                                Improvement
-    -------------------------------------------------------------------------------------------------
-    BM_UFlat/0                             72624     71526  1.3GB/s  html                    +1.5%
-    BM_UFlat/1                            735821    722917  930.8MB/s  urls                  +1.8%
-    BM_UFlat/2                             10450     10172  11.7GB/s  jpg                    +2.7%
-    BM_UFlat/3                               117       117  1.6GB/s  jpg_200                 +0.0%
-    BM_UFlat/4                             29817     29648  3.0GB/s  pdf                     +0.6%
-    BM_UFlat/5                            297126    293073  1.3GB/s  html4                   +1.4%
-    BM_UFlat/6                             28252     27994  842.0MB/s  cp                    +0.9%
-    BM_UFlat/7                             12672     12391  862.1MB/s  c                     +2.3%
-    BM_UFlat/8                              3507      3425  1040.9MB/s  lsp                  +2.4%
-    BM_UFlat/9                           1004268    969395  1018.0MB/s  xls                  +3.6%
-    BM_UFlat/10                              233       227  844.8MB/s  xls_200               +2.6%
-    BM_UFlat/11                           230054    224981  647.8MB/s  txt1                  +2.3%
-    BM_UFlat/12                           201229    196447  610.5MB/s  txt2                  +2.4%
-    BM_UFlat/13                           609547    596761  685.3MB/s  txt3                  +2.1%
-    BM_UFlat/14                           824362    804821  573.8MB/s  txt4                  +2.4%
-    BM_UFlat/15                           371095    374899  1.3GB/s  bin                     -1.0%
-    BM_UFlat/16                              267       267  717.8MB/s  bin_200               +0.0%
-    BM_UFlat/17                            44623     43828  835.9MB/s  sum                   +1.8%
-    BM_UFlat/18                             5077      4815  841.0MB/s  man                   +5.4%
-    BM_UFlat/19                            74964     73210  1.5GB/s  pb                      +2.4%
-    BM_UFlat/20                           237987    236745  746.0MB/s  gaviota               +0.5%
-    Sum of all benchmarks                4794092   4697659                                   +2.1%
-    
-    
-    Istanbul:
-    
-    Benchmark                          Base (ns)  New (ns)                                Improvement
-    -------------------------------------------------------------------------------------------------
-    BM_UFlat/0                             98614     96376  1020.4MB/s  html                 +2.3%
-    BM_UFlat/1                            963740    953241  707.2MB/s  urls                  +1.1%
-    BM_UFlat/2                             25042     24769  4.8GB/s  jpg                     +1.1%
-    BM_UFlat/3                               180       180  1065.6MB/s  jpg_200              +0.0%
-    BM_UFlat/4                             45942     45403  1.9GB/s  pdf                     +1.2%
-    BM_UFlat/5                            400135    390226  1008.2MB/s  html4                +2.5%
-    BM_UFlat/6                             37768     37392  631.9MB/s  cp                    +1.0%
-    BM_UFlat/7                             18585     18200  588.2MB/s  c                     +2.1%
-    BM_UFlat/8                              5751      5690  627.7MB/s  lsp                   +1.1%
-    BM_UFlat/9                           1543154   1542209  641.4MB/s  xls                   +0.1%
-    BM_UFlat/10                              381       388  494.6MB/s  xls_200               -1.8%
-    BM_UFlat/11                           339715    331973  440.1MB/s  txt1                  +2.3%
-    BM_UFlat/12                           294807    289418  415.4MB/s  txt2                  +1.9%
-    BM_UFlat/13                           906160    884094  463.3MB/s  txt3                  +2.5%
-    BM_UFlat/14                          1224221   1198435  386.1MB/s  txt4                  +2.2%
-    BM_UFlat/15                           516277    502923  979.5MB/s  bin                   +2.7%
-    BM_UFlat/16                              405       402  477.2MB/s  bin_200               +0.7%
-    BM_UFlat/17                            61640     60621  605.6MB/s  sum                   +1.7%
-    BM_UFlat/18                             7326      7383  549.5MB/s  man                   -0.8%
-    BM_UFlat/19                            94720     92653  1.2GB/s  pb                      +2.2%
-    BM_UFlat/20                           360435    346687  510.6MB/s  gaviota               +4.0%
-    Sum of all benchmarks                6944998   6828663                                   +1.7%
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@77 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 328aafa1980824a9afdcd50edc30d9d5157e417f
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Thu Jun 13 16:19:52 2013 +0000
-
-    Add support for uncompressing to iovecs (scatter I/O).
-    Windows does not have struct iovec defined anywhere,
-    so we define our own version that's equal to what UNIX
-    typically has.
-    
-    The bulk of this patch was contributed by Mohit Aron.
-    
-    R=jeff
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@76 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit cd92eb0852e2339187b693eef3595a07d2276c1d
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Jun 12 19:51:15 2013 +0000
-
-    Some code reorganization needed for an internal change.
-    
-    R=fikes
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@75 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit a3e928d62bbd61b523b988c07b560253950cf73b
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue Apr 9 15:33:30 2013 +0000
-
-    Supports truncated test data in zippy benchmark.
-    
-    R=sesse
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@74 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit bde324c0169763688f35ee44630a26ad1f49eec3
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue Feb 5 14:36:15 2013 +0000
-
-    Release Snappy 1.1.0.
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@73 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 8168446c7eaaa0594e1f4ca923376dcf3a2846fa
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue Feb 5 14:30:05 2013 +0000
-
-    Make ./snappy_unittest pass without "srcdir" being defined.
-    
-    Previously, snappy_unittests would read from an absolute path /testdata/..;
-    convert it to use a relative path instead.
-    
-    Patch from Marc-Antonie Ruel.
-    
-    R=maruel
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@72 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 27a0cc394950ebdad2e8d67322f0862835b10bd9
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Jan 18 12:16:36 2013 +0000
-
-    Increase the Zippy block size from 32 kB to 64 kB, winning ~3% density
-    while being effectively performance neutral.
-    
-    The longer story about density is that we win 3-6% density on the benchmarks
-    where this has any effect at all; many of the benchmarks (cp, c, lsp, man)
-    are smaller than 32 kB and thus will have no effect. Binary data also seems
-    to win little or nothing; of course, the already-compressed data wins nothing.
-    The protobuf benchmark wins as much as ~18% depending on architecture,
-    but I wouldn't be too sure that this is representative of protobuf data in
-    general.
-    
-    As of performance, we lose a tiny amount since we get more tags (e.g., a long
-    literal might be broken up into literal-copy-literal), but we win it back with
-    less clearing of the hash table, and more opportunities to skip incompressible
-    data (e.g. in the jpg benchmark). Decompression seems to get ever so slightly
-    slower, again due to more tags. The total net change is about as close to zero
-    as we can get, so the end effect seems to be simply more density and no
-    real performance change.
-    
-    The comment about not changing kBlockSize, scary as it is, is not really
-    relevant, since we're never going to have a block-level decompressor without
-    explicitly marked blocks. Replace it with something more appropriate.
-    
-    This affects the framing format, but it's okay to change it since it basically
-    has no users yet.
-    
-    
-    Density (note that cp, c, lsp and man are all smaller than 32 kB):
-    
-       Benchmark         Description   Base (%)  New (%)  Improvement
-       --------------------------------------------------------------
-       ZFlat/0           html            22.57    22.31     +5.6%
-       ZFlat/1           urls            50.89    47.77     +6.5%
-       ZFlat/2           jpg             99.88    99.87     +0.0%
-       ZFlat/3           pdf             82.13    82.07     +0.1%
-       ZFlat/4           html4           23.55    22.51     +4.6%
-       ZFlat/5           cp              48.12    48.12     +0.0%
-       ZFlat/6           c               42.40    42.40     +0.0%
-       ZFlat/7           lsp             48.37    48.37     +0.0%
-       ZFlat/8           xls             41.34    41.23     +0.3%
-       ZFlat/9           txt1            59.81    57.87     +3.4%
-       ZFlat/10          txt2            64.07    61.93     +3.5%
-       ZFlat/11          txt3            57.11    54.92     +4.0%
-       ZFlat/12          txt4            68.35    66.22     +3.2%
-       ZFlat/13          bin             18.21    18.11     +0.6%
-       ZFlat/14          sum             51.88    48.96     +6.0%
-       ZFlat/15          man             59.36    59.36     +0.0%
-       ZFlat/16          pb              23.15    19.64    +17.9%
-       ZFlat/17          gaviota         38.27    37.72     +1.5%
-       Geometric mean                    45.51    44.15     +3.1%
-    
-    
-    Microbenchmarks (64-bit, opt):
-    
-    Westmere 2.8 GHz:
-    
-       Benchmark                          Base (ns)  New (ns)                                Improvement
-       -------------------------------------------------------------------------------------------------
-       BM_UFlat/0                             75342     75027  1.3GB/s  html                    +0.4%
-       BM_UFlat/1                            723767    744269  899.6MB/s  urls                  -2.8%
-       BM_UFlat/2                             10072     10072  11.7GB/s  jpg                    +0.0%
-       BM_UFlat/3                             30747     30388  2.9GB/s  pdf                     +1.2%
-       BM_UFlat/4                            307353    306063  1.2GB/s  html4                   +0.4%
-       BM_UFlat/5                             28593     28743  816.3MB/s  cp                    -0.5%
-       BM_UFlat/6                             12958     12998  818.1MB/s  c                     -0.3%
-       BM_UFlat/7                              3700      3792  935.8MB/s  lsp                   -2.4%
-       BM_UFlat/8                            999685    999905  982.1MB/s  xls                   -0.0%
-       BM_UFlat/9                            232954    230079  630.4MB/s  txt1                  +1.2%
-       BM_UFlat/10                           200785    201468  592.6MB/s  txt2                  -0.3%
-       BM_UFlat/11                           617267    610968  666.1MB/s  txt3                  +1.0%
-       BM_UFlat/12                           821595    822475  558.7MB/s  txt4                  -0.1%
-       BM_UFlat/13                           377097    377632  1.3GB/s  bin                     -0.1%
-       BM_UFlat/14                            45476     45260  805.8MB/s  sum                   +0.5%
-       BM_UFlat/15                             4985      5003  805.7MB/s  man                   -0.4%
-       BM_UFlat/16                            80813     77494  1.4GB/s  pb                      +4.3%
-       BM_UFlat/17                           251792    241553  727.7MB/s  gaviota               +4.2%
-       BM_UValidate/0                         40343     40354  2.4GB/s  html                    -0.0%
-       BM_UValidate/1                        426890    451574  1.4GB/s  urls                    -5.5%
-       BM_UValidate/2                           187       179  661.9GB/s  jpg                   +4.5%
-       BM_UValidate/3                         13783     13827  6.4GB/s  pdf                     -0.3%
-       BM_UValidate/4                        162393    163335  2.3GB/s  html4                   -0.6%
-       BM_UDataBuffer/0                       93756     93302  1046.7MB/s  html                 +0.5%
-       BM_UDataBuffer/1                      886714    916292  730.7MB/s  urls                  -3.2%
-       BM_UDataBuffer/2                       15861     16401  7.2GB/s  jpg                     -3.3%
-       BM_UDataBuffer/3                       38934     39224  2.2GB/s  pdf                     -0.7%
-       BM_UDataBuffer/4                      381008    379428  1029.5MB/s  html4                +0.4%
-       BM_UCord/0                             92528     91098  1072.0MB/s  html                 +1.6%
-       BM_UCord/1                            858421    885287  756.3MB/s  urls                  -3.0%
-       BM_UCord/2                             13140     13464  8.8GB/s  jpg                     -2.4%
-       BM_UCord/3                             39012     37773  2.3GB/s  pdf                     +3.3%
-       BM_UCord/4                            376869    371267  1052.1MB/s  html4                +1.5%
-       BM_UCordString/0                       75810     75303  1.3GB/s  html                    +0.7%
-       BM_UCordString/1                      735290    753841  888.2MB/s  urls                  -2.5%
-       BM_UCordString/2                       11945     13113  9.0GB/s  jpg                     -8.9%
-       BM_UCordString/3                       33901     32562  2.7GB/s  pdf                     +4.1%
-       BM_UCordString/4                      310985    309390  1.2GB/s  html4                   +0.5%
-       BM_UCordValidate/0                     40952     40450  2.4GB/s  html                    +1.2%
-       BM_UCordValidate/1                    433842    456531  1.4GB/s  urls                    -5.0%
-       BM_UCordValidate/2                      1179      1173  100.8GB/s  jpg                   +0.5%
-       BM_UCordValidate/3                     14481     14392  6.1GB/s  pdf                     +0.6%
-       BM_UCordValidate/4                    164364    164151  2.3GB/s  html4                   +0.1%
-       BM_ZFlat/0                            160610    156601  623.6MB/s  html (22.31 %)        +2.6%
-       BM_ZFlat/1                           1995238   1993582  335.9MB/s  urls (47.77 %)        +0.1%
-       BM_ZFlat/2                             30133     24983  4.7GB/s  jpg (99.87 %)          +20.6%
-       BM_ZFlat/3                             74453     73128  1.2GB/s  pdf (82.07 %)           +1.8%
-       BM_ZFlat/4                            647674    633729  616.4MB/s  html4 (22.51 %)       +2.2%
-       BM_ZFlat/5                             76259     76090  308.4MB/s  cp (48.12 %)          +0.2%
-       BM_ZFlat/6                             31106     31084  342.1MB/s  c (42.40 %)           +0.1%
-       BM_ZFlat/7                             10507     10443  339.8MB/s  lsp (48.37 %)         +0.6%
-       BM_ZFlat/8                           1811047   1793325  547.6MB/s  xls (41.23 %)         +1.0%
-       BM_ZFlat/9                            597903    581793  249.3MB/s  txt1 (57.87 %)        +2.8%
-       BM_ZFlat/10                           525320    514522  232.0MB/s  txt2 (61.93 %)        +2.1%
-       BM_ZFlat/11                          1596591   1551636  262.3MB/s  txt3 (54.92 %)        +2.9%
-       BM_ZFlat/12                          2134523   2094033  219.5MB/s  txt4 (66.22 %)        +1.9%
-       BM_ZFlat/13                           593024    587869  832.6MB/s  bin (18.11 %)         +0.9%
-       BM_ZFlat/14                           114746    110666  329.5MB/s  sum (48.96 %)         +3.7%
-       BM_ZFlat/15                            14376     14485  278.3MB/s  man (59.36 %)         -0.8%
-       BM_ZFlat/16                           167908    150070  753.6MB/s  pb (19.64 %)         +11.9%
-       BM_ZFlat/17                           460228    442253  397.5MB/s  gaviota (37.72 %)     +4.1%
-       BM_ZCord/0                            164896    160241  609.4MB/s  html                  +2.9%
-       BM_ZCord/1                           2070239   2043492  327.7MB/s  urls                  +1.3%
-       BM_ZCord/2                             54402     47002  2.5GB/s  jpg                    +15.7%
-       BM_ZCord/3                             85871     83832  1073.1MB/s  pdf                  +2.4%
-       BM_ZCord/4                            664078    648825  602.0MB/s  html4                 +2.4%
-       BM_ZDataBuffer/0                      174874    172549  566.0MB/s  html                  +1.3%
-       BM_ZDataBuffer/1                     2134410   2139173  313.0MB/s  urls                  -0.2%
-       BM_ZDataBuffer/2                       71911     69551  1.7GB/s  jpg                     +3.4%
-       BM_ZDataBuffer/3                       98236     99727  902.1MB/s  pdf                   -1.5%
-       BM_ZDataBuffer/4                      710776    699104  558.8MB/s  html4                 +1.7%
-       Sum of all benchmarks               27358908  27200688                                   +0.6%
-    
-    
-    Sandy Bridge 2.6 GHz:
-    
-       Benchmark                          Base (ns)  New (ns)                                Improvement
-       -------------------------------------------------------------------------------------------------
-       BM_UFlat/0                             49356     49018  1.9GB/s  html                    +0.7%
-       BM_UFlat/1                            516764    531955  1.2GB/s  urls                    -2.9%
-       BM_UFlat/2                              6982      7304  16.2GB/s  jpg                    -4.4%
-       BM_UFlat/3                             15285     15598  5.6GB/s  pdf                     -2.0%
-       BM_UFlat/4                            206557    206669  1.8GB/s  html4                   -0.1%
-       BM_UFlat/5                             13681     13567  1.7GB/s  cp                      +0.8%
-       BM_UFlat/6                              6571      6592  1.6GB/s  c                       -0.3%
-       BM_UFlat/7                              2008      1994  1.7GB/s  lsp                     +0.7%
-       BM_UFlat/8                            775700    773286  1.2GB/s  xls                     +0.3%
-       BM_UFlat/9                            165578    164480  881.8MB/s  txt1                  +0.7%
-       BM_UFlat/10                           143707    144139  828.2MB/s  txt2                  -0.3%
-       BM_UFlat/11                           443026    436281  932.8MB/s  txt3                  +1.5%
-       BM_UFlat/12                           603129    595856  771.2MB/s  txt4                  +1.2%
-       BM_UFlat/13                           271682    270450  1.8GB/s  bin                     +0.5%
-       BM_UFlat/14                            26200     25666  1.4GB/s  sum                     +2.1%
-       BM_UFlat/15                             2620      2608  1.5GB/s  man                     +0.5%
-       BM_UFlat/16                            48908     47756  2.3GB/s  pb                      +2.4%
-       BM_UFlat/17                           174638    170346  1031.9MB/s  gaviota              +2.5%
-       BM_UValidate/0                         31922     31898  3.0GB/s  html                    +0.1%
-       BM_UValidate/1                        341265    363554  1.8GB/s  urls                    -6.1%
-       BM_UValidate/2                           160       151  782.8GB/s  jpg                   +6.0%
-       BM_UValidate/3                         10402     10380  8.5GB/s  pdf                     +0.2%
-       BM_UValidate/4                        129490    130587  2.9GB/s  html4                   -0.8%
-       BM_UDataBuffer/0                       59383     58736  1.6GB/s  html                    +1.1%
-       BM_UDataBuffer/1                      619222    637786  1049.8MB/s  urls                 -2.9%
-       BM_UDataBuffer/2                       10775     11941  9.9GB/s  jpg                     -9.8%
-       BM_UDataBuffer/3                       18002     17930  4.9GB/s  pdf                     +0.4%
-       BM_UDataBuffer/4                      259182    259306  1.5GB/s  html4                   -0.0%
-       BM_UCord/0                             59379     57814  1.6GB/s  html                    +2.7%
-       BM_UCord/1                            598456    615162  1088.4MB/s  urls                 -2.7%
-       BM_UCord/2                              8519      8628  13.7GB/s  jpg                    -1.3%
-       BM_UCord/3                             18123     17537  5.0GB/s  pdf                     +3.3%
-       BM_UCord/4                            252375    252331  1.5GB/s  html4                   +0.0%
-       BM_UCordString/0                       49494     49790  1.9GB/s  html                    -0.6%
-       BM_UCordString/1                      524659    541803  1.2GB/s  urls                    -3.2%
-       BM_UCordString/2                        8206      8354  14.2GB/s  jpg                    -1.8%
-       BM_UCordString/3                       17235     16537  5.3GB/s  pdf                     +4.2%
-       BM_UCordString/4                      210188    211072  1.8GB/s  html4                   -0.4%
-       BM_UCordValidate/0                     31956     31587  3.0GB/s  html                    +1.2%
-       BM_UCordValidate/1                    340828    362141  1.8GB/s  urls                    -5.9%
-       BM_UCordValidate/2                       783       744  158.9GB/s  jpg                   +5.2%
-       BM_UCordValidate/3                     10543     10462  8.4GB/s  pdf                     +0.8%
-       BM_UCordValidate/4                    130150    129789  2.9GB/s  html4                   +0.3%
-       BM_ZFlat/0                            113873    111200  878.2MB/s  html (22.31 %)        +2.4%
-       BM_ZFlat/1                           1473023   1489858  449.4MB/s  urls (47.77 %)        -1.1%
-       BM_ZFlat/2                             23569     19486  6.1GB/s  jpg (99.87 %)          +21.0%
-       BM_ZFlat/3                             49178     48046  1.8GB/s  pdf (82.07 %)           +2.4%
-       BM_ZFlat/4                            475063    469394  832.2MB/s  html4 (22.51 %)       +1.2%
-       BM_ZFlat/5                             46910     46816  501.2MB/s  cp (48.12 %)          +0.2%
-       BM_ZFlat/6                             16883     16916  628.6MB/s  c (42.40 %)           -0.2%
-       BM_ZFlat/7                              5381      5447  651.5MB/s  lsp (48.37 %)         -1.2%
-       BM_ZFlat/8                           1466870   1473861  666.3MB/s  xls (41.23 %)         -0.5%
-       BM_ZFlat/9                            468006    464101  312.5MB/s  txt1 (57.87 %)        +0.8%
-       BM_ZFlat/10                           408157    408957  291.9MB/s  txt2 (61.93 %)        -0.2%
-       BM_ZFlat/11                          1253348   1232910  330.1MB/s  txt3 (54.92 %)        +1.7%
-       BM_ZFlat/12                          1702373   1702977  269.8MB/s  txt4 (66.22 %)        -0.0%
-       BM_ZFlat/13                           439792    438557  1116.0MB/s  bin (18.11 %)        +0.3%
-       BM_ZFlat/14                            80766     78851  462.5MB/s  sum (48.96 %)         +2.4%
-       BM_ZFlat/15                             7420      7542  534.5MB/s  man (59.36 %)         -1.6%
-       BM_ZFlat/16                           112043    100126  1.1GB/s  pb (19.64 %)           +11.9%
-       BM_ZFlat/17                           368877    357703  491.4MB/s  gaviota (37.72 %)     +3.1%
-       BM_ZCord/0                            116402    113564  859.9MB/s  html                  +2.5%
-       BM_ZCord/1                           1507156   1519911  440.5MB/s  urls                  -0.8%
-       BM_ZCord/2                             39860     33686  3.5GB/s  jpg                    +18.3%
-       BM_ZCord/3                             56211     54694  1.6GB/s  pdf                     +2.8%
-       BM_ZCord/4                            485594    479212  815.1MB/s  html4                 +1.3%
-       BM_ZDataBuffer/0                      123185    121572  803.3MB/s  html                  +1.3%
-       BM_ZDataBuffer/1                     1569111   1589380  421.3MB/s  urls                  -1.3%
-       BM_ZDataBuffer/2                       53143     49556  2.4GB/s  jpg                     +7.2%
-       BM_ZDataBuffer/3                       65725     66826  1.3GB/s  pdf                     -1.6%
-       BM_ZDataBuffer/4                      517871    514750  758.9MB/s  html4                 +0.6%
-       Sum of all benchmarks               20258879  20315484                                   -0.3%
-    
-    
-    AMD Instanbul 2.4 GHz:
-    
-       Benchmark                          Base (ns)  New (ns)                                Improvement
-       -------------------------------------------------------------------------------------------------
-       BM_UFlat/0                             97120     96585  1011.1MB/s  html                 +0.6%
-       BM_UFlat/1                            917473    948016  706.3MB/s  urls                  -3.2%
-       BM_UFlat/2                             21496     23938  4.9GB/s  jpg                    -10.2%
-       BM_UFlat/3                             44751     45639  1.9GB/s  pdf                     -1.9%
-       BM_UFlat/4                            391950    391413  998.0MB/s  html4                 +0.1%
-       BM_UFlat/5                             37366     37201  630.7MB/s  cp                    +0.4%
-       BM_UFlat/6                             18350     18318  580.5MB/s  c                     +0.2%
-       BM_UFlat/7                              5672      5661  626.9MB/s  lsp                   +0.2%
-       BM_UFlat/8                           1533390   1529441  642.1MB/s  xls                   +0.3%
-       BM_UFlat/9                            335477    336553  431.0MB/s  txt1                  -0.3%
-       BM_UFlat/10                           285140    292080  408.7MB/s  txt2                  -2.4%
-       BM_UFlat/11                           888507    894758  454.9MB/s  txt3                  -0.7%
-       BM_UFlat/12                          1187643   1210928  379.5MB/s  txt4                  -1.9%
-       BM_UFlat/13                           493717    507447  964.5MB/s  bin                   -2.7%
-       BM_UFlat/14                            61740     60870  599.1MB/s  sum                   +1.4%
-       BM_UFlat/15                             7211      7187  560.9MB/s  man                   +0.3%
-       BM_UFlat/16                            97435     93100  1.2GB/s  pb                      +4.7%
-       BM_UFlat/17                           362662    356395  493.2MB/s  gaviota               +1.8%
-       BM_UValidate/0                         47475     47118  2.0GB/s  html                    +0.8%
-       BM_UValidate/1                        501304    529741  1.2GB/s  urls                    -5.4%
-       BM_UValidate/2                           276       243  486.2GB/s  jpg                  +13.6%
-       BM_UValidate/3                         16361     16261  5.4GB/s  pdf                     +0.6%
-       BM_UValidate/4                        190741    190353  2.0GB/s  html4                   +0.2%
-       BM_UDataBuffer/0                      111080    109771  889.6MB/s  html                  +1.2%
-       BM_UDataBuffer/1                     1051035   1085999  616.5MB/s  urls                  -3.2%
-       BM_UDataBuffer/2                       25801     25463  4.6GB/s  jpg                     +1.3%
-       BM_UDataBuffer/3                       50493     49946  1.8GB/s  pdf                     +1.1%
-       BM_UDataBuffer/4                      447258    444138  879.5MB/s  html4                 +0.7%
-       BM_UCord/0                            109350    107909  905.0MB/s  html                  +1.3%
-       BM_UCord/1                           1023396   1054964  634.7MB/s  urls                  -3.0%
-       BM_UCord/2                             25292     24371  4.9GB/s  jpg                     +3.8%
-       BM_UCord/3                             48955     49736  1.8GB/s  pdf                     -1.6%
-       BM_UCord/4                            440452    437331  893.2MB/s  html4                 +0.7%
-       BM_UCordString/0                       98511     98031  996.2MB/s  html                  +0.5%
-       BM_UCordString/1                      933230    963495  694.9MB/s  urls                  -3.1%
-       BM_UCordString/2                       23311     24076  4.9GB/s  jpg                     -3.2%
-       BM_UCordString/3                       45568     46196  1.9GB/s  pdf                     -1.4%
-       BM_UCordString/4                      397791    396934  984.1MB/s  html4                 +0.2%
-       BM_UCordValidate/0                     47537     46921  2.0GB/s  html                    +1.3%
-       BM_UCordValidate/1                    505071    532716  1.2GB/s  urls                    -5.2%
-       BM_UCordValidate/2                      1663      1621  72.9GB/s  jpg                    +2.6%
-       BM_UCordValidate/3                     16890     16926  5.2GB/s  pdf                     -0.2%
-       BM_UCordValidate/4                    192365    191984  2.0GB/s  html4                   +0.2%
-       BM_ZFlat/0                            184708    179103  545.3MB/s  html (22.31 %)        +3.1%
-       BM_ZFlat/1                           2293864   2302950  290.7MB/s  urls (47.77 %)        -0.4%
-       BM_ZFlat/2                             52852     47618  2.5GB/s  jpg (99.87 %)          +11.0%
-       BM_ZFlat/3                            100766     96179  935.3MB/s  pdf (82.07 %)         +4.8%
-       BM_ZFlat/4                            741220    727977  536.6MB/s  html4 (22.51 %)       +1.8%
-       BM_ZFlat/5                             85402     85418  274.7MB/s  cp (48.12 %)          -0.0%
-       BM_ZFlat/6                             36558     36494  291.4MB/s  c (42.40 %)           +0.2%
-       BM_ZFlat/7                             12706     12507  283.7MB/s  lsp (48.37 %)         +1.6%
-       BM_ZFlat/8                           2336823   2335688  420.5MB/s  xls (41.23 %)         +0.0%
-       BM_ZFlat/9                            701804    681153  212.9MB/s  txt1 (57.87 %)        +3.0%
-       BM_ZFlat/10                           606700    597194  199.9MB/s  txt2 (61.93 %)        +1.6%
-       BM_ZFlat/11                          1852283   1803238  225.7MB/s  txt3 (54.92 %)        +2.7%
-       BM_ZFlat/12                          2475527   2443354  188.1MB/s  txt4 (66.22 %)        +1.3%
-       BM_ZFlat/13                           694497    696654  702.6MB/s  bin (18.11 %)         -0.3%
-       BM_ZFlat/14                           136929    129855  280.8MB/s  sum (48.96 %)         +5.4%
-       BM_ZFlat/15                            17172     17124  235.4MB/s  man (59.36 %)         +0.3%
-       BM_ZFlat/16                           190364    171763  658.4MB/s  pb (19.64 %)         +10.8%
-       BM_ZFlat/17                           567285    555190  316.6MB/s  gaviota (37.72 %)     +2.2%
-       BM_ZCord/0                            193490    187031  522.1MB/s  html                  +3.5%
-       BM_ZCord/1                           2427537   2415315  277.2MB/s  urls                  +0.5%
-       BM_ZCord/2                             85378     81412  1.5GB/s  jpg                     +4.9%
-       BM_ZCord/3                            121898    119419  753.3MB/s  pdf                   +2.1%
-       BM_ZCord/4                            779564    762961  512.0MB/s  html4                 +2.2%
-       BM_ZDataBuffer/0                      213820    207272  471.1MB/s  html                  +3.2%
-       BM_ZDataBuffer/1                     2589010   2586495  258.9MB/s  urls                  +0.1%
-       BM_ZDataBuffer/2                      121871    118885  1018.4MB/s  jpg                  +2.5%
-       BM_ZDataBuffer/3                      145382    145986  616.2MB/s  pdf                   -0.4%
-       BM_ZDataBuffer/4                      868117    852754  458.1MB/s  html4                 +1.8%
-       Sum of all benchmarks               33771833  33744763                                   +0.1%
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@71 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 81f34784b7b812dcda956ee489dfdc74ec2da990
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Sun Jan 6 19:21:26 2013 +0000
-
-    Adjust the Snappy open-source distribution for the changes in Google's
-    internal file API.
-    
-    R=sanjay
-    
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@70 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 698af469b47fe809905e2ed173ad84241de5800f
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Jan 4 11:54:20 2013 +0000
-
-    Change a few ORs to additions where they don't matter. This helps the compiler
-    use the LEA instruction more efficiently, since e.g. a + (b << 2) can be encoded
-    as one instruction. Even more importantly, it can constant-fold the
-    COPY_* enums together with the shifted negative constants, which also saves
-    some instructions. (We don't need it for LITERAL, since it happens to be 0.)
-    
-    I am unsure why the compiler couldn't do this itself, but the theory is that
-    it cannot prove that len-1 and len-4 cannot underflow/wrap, and thus can't
-    do the optimization safely.
-    
-    The gains are small but measurable; 0.5-1.0% over the BM_Z* benchmarks
-    (measured on Westmere, Sandy Bridge and Istanbul).
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@69 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 55209f9b92efd97e0a61be28ed94210de04c3bfc
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Mon Oct 8 11:37:16 2012 +0000
-
-    Stop giving -Werror to automake, due to an incompatibility between current
-    versions of libtool and automake on non-GNU platforms (e.g. Mac OS X).
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@68 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit b86e81c8b3426a62d8ab3a7674c2506e9e678740
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Aug 17 13:54:47 2012 +0000
-
-    Fix public issue 66: Document GetUncompressedLength better, in particular that
-    it leaves the source in a state that's not appropriate for RawUncompress.
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@67 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 2e225ba821b420ae28e1d427075d5589c1e892d9
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue Jul 31 11:44:44 2012 +0000
-
-    Fix public issue 64: Check for <sys/time.h> at configure time,
-    since MSVC seemingly does not have it.
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@66 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit e89f20ab46ee11050760c6d57f05c2a3825a911c
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Jul 4 09:34:48 2012 +0000
-
-    Handle the case where gettimeofday() goes backwards or returns the same value
-    twice; it could cause division by zero in the unit test framework.
-    (We already had one fix for this in place, but it was incomplete.)
-    
-    This could in theory happen on any system, since there are few guarantees
-    about gettimeofday(), but seems to only happen in practice on GNU/Hurd, where
-    gettimeofday() is cached and only updated ever so often.
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@65 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 3ec60ac9878de5d0317ad38fc545080a4bfaa74f
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Jul 4 09:28:33 2012 +0000
-
-    Mark ARMv4 as not supporting unaligned accesses (not just ARMv5 and ARMv6);
-    apparently Debian still targets these by default, giving us segfaults on
-    armel.
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@64 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit be80d6f74f9d82220e952a54f3f129aae1f13f95
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue May 22 09:46:05 2012 +0000
-
-    Fix public bug #62: Remove an extraneous comma at the end of an enum list,
-    causing compile errors when embedded in Mozilla on OpenBSD.
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@63 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 8b95464146dddab1c7068f879162db9a885cdafe
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue May 22 09:32:50 2012 +0000
-
-    Snappy library no longer depends on iostream.
-    
-    Achieved by moving logging macro definitions to a test-only
-    header file, and by changing non-test code to use assert,
-    fprintf, and abort instead of LOG/CHECK macros.
-    
-    R=sesse
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@62 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit fc723b212d6972af7051261754770b3f70a7dc03
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Feb 24 15:46:37 2012 +0000
-
-    Release Snappy 1.0.5.
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@61 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit dc63e0ad9693e13390ba31b00d92ecccaf7605c3
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Thu Feb 23 17:00:36 2012 +0000
-
-    For 32-bit platforms, do not try to accelerate multiple neighboring
-    32-bit loads with a 64-bit load during compression (it's not a win).
-    
-    The main target for this optimization is ARM, but 32-bit x86 gets
-    a small gain, too, although there is noise in the microbenchmarks.
-    It's a no-op for 64-bit x86. It does not affect decompression.
-    
-    Microbenchmark results on a Cortex-A9 1GHz, using g++ 4.6.2 (from
-    Ubuntu/Linaro), -O2 -DNDEBUG -Wa,-march=armv7a -mtune=cortex-a9
-    -mthumb-interwork, minimum 1000 iterations:
-    
-      Benchmark            Time(ns)    CPU(ns) Iterations
-      ---------------------------------------------------
-      BM_ZFlat/0            1158277    1160000       1000 84.2MB/s  html (23.57 %)    [ +4.3%]
-      BM_ZFlat/1           14861782   14860000       1000 45.1MB/s  urls (50.89 %)    [ +1.1%]
-      BM_ZFlat/2             393595     390000       1000 310.5MB/s  jpg (99.88 %)    [ +0.0%]
-      BM_ZFlat/3             650583     650000       1000 138.4MB/s  pdf (82.13 %)    [ +3.1%]
-      BM_ZFlat/4            4661480    4660000       1000 83.8MB/s  html4 (23.55 %)   [ +4.3%]
-      BM_ZFlat/5             491973     490000       1000 47.9MB/s  cp (48.12 %)      [ +2.0%]
-      BM_ZFlat/6             193575     192678       1038 55.2MB/s  c (42.40 %)       [ +9.0%]
-      BM_ZFlat/7              62343      62754       3187 56.5MB/s  lsp (48.37 %)     [ +2.6%]
-      BM_ZFlat/8           17708468   17710000       1000 55.5MB/s  xls (41.34 %)     [ -0.3%]
-      BM_ZFlat/9            3755345    3760000       1000 38.6MB/s  txt1 (59.81 %)    [ +8.2%]
-      BM_ZFlat/10           3324217    3320000       1000 36.0MB/s  txt2 (64.07 %)    [ +4.2%]
-      BM_ZFlat/11          10139932   10140000       1000 40.1MB/s  txt3 (57.11 %)    [ +6.4%]
-      BM_ZFlat/12          13532109   13530000       1000 34.0MB/s  txt4 (68.35 %)    [ +5.0%]
-      BM_ZFlat/13           4690847    4690000       1000 104.4MB/s  bin (18.21 %)    [ +4.1%]
-      BM_ZFlat/14            830682     830000       1000 43.9MB/s  sum (51.88 %)     [ +1.2%]
-      BM_ZFlat/15             84784      85011       2235 47.4MB/s  man (59.36 %)     [ +1.1%]
-      BM_ZFlat/16           1293254    1290000       1000 87.7MB/s  pb (23.15 %)      [ +2.3%]
-      BM_ZFlat/17           2775155    2780000       1000 63.2MB/s  gaviota (38.27 %) [+12.2%]
-    
-    Core i7 in 32-bit mode (only one run and 100 iterations, though, so noisy):
-    
-      Benchmark            Time(ns)    CPU(ns) Iterations
-      ---------------------------------------------------
-      BM_ZFlat/0             227582     223464       3043 437.0MB/s  html (23.57 %)    [ +7.4%]
-      BM_ZFlat/1            2982430    2918455        233 229.4MB/s  urls (50.89 %)    [ +2.9%]
-      BM_ZFlat/2              46967      46658      15217 2.5GB/s  jpg (99.88 %)       [ +0.0%]
-      BM_ZFlat/3             115298     114864       5833 783.2MB/s  pdf (82.13 %)     [ +1.5%]
-      BM_ZFlat/4             913440     899743        778 434.2MB/s  html4 (23.55 %)   [ +0.3%]
-      BM_ZFlat/5             110302     108571       7000 216.1MB/s  cp (48.12 %)      [ +0.0%]
-      BM_ZFlat/6              44409      43372      15909 245.2MB/s  c (42.40 %)       [ +0.8%]
-      BM_ZFlat/7              15713      15643      46667 226.9MB/s  lsp (48.37 %)     [ +2.7%]
-      BM_ZFlat/8            2625539    2602230        269 377.4MB/s  xls (41.34 %)     [ +1.4%]
-      BM_ZFlat/9             808884     811429        875 178.8MB/s  txt1 (59.81 %)    [ -3.9%]
-      BM_ZFlat/10            709532     700000       1000 170.5MB/s  txt2 (64.07 %)    [ +0.0%]
-      BM_ZFlat/11           2177682    2162162        333 188.2MB/s  txt3 (57.11 %)    [ -1.4%]
-      BM_ZFlat/12           2849640    2840000        250 161.8MB/s  txt4 (68.35 %)    [ -1.4%]
-      BM_ZFlat/13            849760     835476        778 585.8MB/s  bin (18.21 %)     [ +1.2%]
-      BM_ZFlat/14            165940     164571       4375 221.6MB/s  sum (51.88 %)     [ +1.4%]
-      BM_ZFlat/15             20939      20571      35000 196.0MB/s  man (59.36 %)     [ +2.1%]
-      BM_ZFlat/16            239209     236544       2917 478.1MB/s  pb (23.15 %)      [ +4.2%]
-      BM_ZFlat/17            616206     610000       1000 288.2MB/s  gaviota (38.27 %) [ -1.6%]
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@60 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit f8829ea39d51432ba4e6a26ddaec57acea779f4c
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue Feb 21 17:02:17 2012 +0000
-
-    Enable the use of unaligned loads and stores for ARM-based architectures
-    where they are available (ARMv7 and higher). This gives a significant
-    speed boost on ARM, both for compression and decompression.
-    It should not affect x86 at all.
-    
-    There are more changes possible to speed up ARM, but it might not be
-    that easy to do without hurting x86 or making the code uglier.
-    Also, we de not try to use NEON yet.
-    
-    Microbenchmark results on a Cortex-A9 1GHz, using g++ 4.6.2 (from Ubuntu/Linaro),
-    -O2 -DNDEBUG -Wa,-march=armv7a -mtune=cortex-a9 -mthumb-interwork:
-    
-    Benchmark            Time(ns)    CPU(ns) Iterations
-    ---------------------------------------------------
-    BM_UFlat/0             524806     529100        378 184.6MB/s  html            [+33.6%]
-    BM_UFlat/1            5139790    5200000        100 128.8MB/s  urls            [+28.8%]
-    BM_UFlat/2              86540      84166       1901 1.4GB/s  jpg               [ +0.6%]
-    BM_UFlat/3             215351     210176        904 428.0MB/s  pdf             [+29.8%]
-    BM_UFlat/4            2144490    2100000        100 186.0MB/s  html4           [+33.3%]
-    BM_UFlat/5             194482     190000       1000 123.5MB/s  cp              [+36.2%]
-    BM_UFlat/6              91843      90175       2107 117.9MB/s  c               [+38.6%]
-    BM_UFlat/7              28535      28426       6684 124.8MB/s  lsp             [+34.7%]
-    BM_UFlat/8            9206600    9200000        100 106.7MB/s  xls             [+42.4%]
-    BM_UFlat/9            1865273    1886792        106 76.9MB/s  txt1             [+32.5%]
-    BM_UFlat/10           1576809    1587301        126 75.2MB/s  txt2             [+32.3%]
-    BM_UFlat/11           4968450    4900000        100 83.1MB/s  txt3             [+32.7%]
-    BM_UFlat/12           6673970    6700000        100 68.6MB/s  txt4             [+32.8%]
-    BM_UFlat/13           2391470    2400000        100 203.9MB/s  bin             [+29.2%]
-    BM_UFlat/14            334601     344827        522 105.8MB/s  sum             [+30.6%]
-    BM_UFlat/15             37404      38080       5252 105.9MB/s  man             [+33.8%]
-    BM_UFlat/16            535470     540540        370 209.2MB/s  pb              [+31.2%]
-    BM_UFlat/17           1875245    1886792        106 93.2MB/s  gaviota          [+37.8%]
-    BM_UValidate/0         178425     179533       1114 543.9MB/s  html            [ +2.7%]
-    BM_UValidate/1        2100450    2000000        100 334.8MB/s  urls            [ +5.0%]
-    BM_UValidate/2           1039       1044     172413 113.3GB/s  jpg             [ +3.4%]
-    BM_UValidate/3          59423      59470       3363 1.5GB/s  pdf               [ +7.8%]
-    BM_UValidate/4         760716     766283        261 509.8MB/s  html4           [ +6.5%]
-    BM_ZFlat/0            1204632    1204819        166 81.1MB/s  html (23.57 %)   [+32.8%]
-    BM_ZFlat/1           15656190   15600000        100 42.9MB/s  urls (50.89 %)   [+27.6%]
-    BM_ZFlat/2             403336     410677        487 294.8MB/s  jpg (99.88 %)   [+16.5%]
-    BM_ZFlat/3             664073     671140        298 134.0MB/s  pdf (82.13 %)   [+28.4%]
-    BM_ZFlat/4            4961940    4900000        100 79.7MB/s  html4 (23.55 %)  [+30.6%]
-    BM_ZFlat/5             500664     501253        399 46.8MB/s  cp (48.12 %)     [+33.4%]
-    BM_ZFlat/6             217276     215982        926 49.2MB/s  c (42.40 %)      [+25.0%]
-    BM_ZFlat/7              64122      65487       3054 54.2MB/s  lsp (48.37 %)    [+36.1%]
-    BM_ZFlat/8           18045730   18000000        100 54.6MB/s  xls (41.34 %)    [+34.4%]
-    BM_ZFlat/9            4051530    4000000        100 36.3MB/s  txt1 (59.81 %)   [+25.0%]
-    BM_ZFlat/10           3451800    3500000        100 34.1MB/s  txt2 (64.07 %)   [+25.7%]
-    BM_ZFlat/11          11052340   11100000        100 36.7MB/s  txt3 (57.11 %)   [+24.3%]
-    BM_ZFlat/12          14538690   14600000        100 31.5MB/s  txt4 (68.35 %)   [+24.7%]
-    BM_ZFlat/13           5041850    5000000        100 97.9MB/s  bin (18.21 %)    [+32.0%]
-    BM_ZFlat/14            908840     909090        220 40.1MB/s  sum (51.88 %)    [+22.2%]
-    BM_ZFlat/15             86921      86206       1972 46.8MB/s  man (59.36 %)    [+42.2%]
-    BM_ZFlat/16           1312315    1315789        152 86.0MB/s  pb (23.15 %)     [+34.5%]
-    BM_ZFlat/17           3173120    3200000        100 54.9MB/s  gaviota (38.27%) [+28.1%]
-    
-    
-    The move from 64-bit to 32-bit operations for the copies also affected 32-bit x86;
-    positive on the decompression side, and slightly negative on the compression side
-    (unless that is noise; I only ran once):
-    
-    Benchmark              Time(ns)    CPU(ns) Iterations
-    -----------------------------------------------------
-    BM_UFlat/0                86279      86140       7778 1.1GB/s  html             [ +7.5%]
-    BM_UFlat/1               839265     822622        778 813.9MB/s  urls           [ +9.4%]
-    BM_UFlat/2                 9180       9143      87500 12.9GB/s  jpg             [ +1.2%]
-    BM_UFlat/3                35080      35000      20000 2.5GB/s  pdf              [+10.1%]
-    BM_UFlat/4               350318     345000       2000 1.1GB/s  html4            [ +7.0%]
-    BM_UFlat/5                33808      33472      21212 701.0MB/s  cp             [ +9.0%]
-    BM_UFlat/6                15201      15214      46667 698.9MB/s  c              [+14.9%]
-    BM_UFlat/7                 4652       4651     159091 762.9MB/s  lsp            [ +7.5%]
-    BM_UFlat/8              1285551    1282528        538 765.7MB/s  xls            [+10.7%]
-    BM_UFlat/9               282510     281690       2414 514.9MB/s  txt1           [+13.6%]
-    BM_UFlat/10              243494     239286       2800 498.9MB/s  txt2           [+14.4%]
-    BM_UFlat/11              743625     740000       1000 550.0MB/s  txt3           [+14.3%]
-    BM_UFlat/12              999441     989717        778 464.3MB/s  txt4           [+16.1%]
-    BM_UFlat/13              412402     410076       1707 1.2GB/s  bin              [ +7.3%]
-    BM_UFlat/14               54876      54000      10000 675.3MB/s  sum            [+13.0%]
-    BM_UFlat/15                6146       6100     100000 660.8MB/s  man            [+14.8%]
-    BM_UFlat/16               90496      90286       8750 1.2GB/s  pb               [ +4.0%]
-    BM_UFlat/17              292650     292000       2500 602.0MB/s  gaviota        [+18.1%]
-    BM_UValidate/0            49620      49699      14286 1.9GB/s  html             [ +0.0%]
-    BM_UValidate/1           501371     500000       1000 1.3GB/s  urls             [ +0.0%]
-    BM_UValidate/2              232        227    3043478 521.5GB/s  jpg            [ +1.3%]
-    BM_UValidate/3            17250      17143      43750 5.1GB/s  pdf              [ -1.3%]
-    BM_UValidate/4           198643     200000       3500 1.9GB/s  html4            [ -0.9%]
-    BM_ZFlat/0               227128     229415       3182 425.7MB/s  html (23.57 %) [ -1.4%]
-    BM_ZFlat/1              2970089    2960000        250 226.2MB/s  urls (50.89 %) [ -1.9%]
-    BM_ZFlat/2                45683      44999      15556 2.6GB/s  jpg (99.88 %)    [ +2.2%]
-    BM_ZFlat/3               114661     113136       6364 795.1MB/s  pdf (82.13 %)  [ -1.5%]
-    BM_ZFlat/4               919702     914286        875 427.2MB/s  html4 (23.55%) [ -1.3%]
-    BM_ZFlat/5               108189     108422       6364 216.4MB/s  cp (48.12 %)   [ -1.2%]
-    BM_ZFlat/6                44525      44000      15909 241.7MB/s  c (42.40 %)    [ -2.9%]
-    BM_ZFlat/7                15973      15857      46667 223.8MB/s  lsp (48.37 %)  [ +0.0%]
-    BM_ZFlat/8              2677888    2639405        269 372.1MB/s  xls (41.34 %)  [ -1.4%]
-    BM_ZFlat/9               800715     780000       1000 186.0MB/s  txt1 (59.81 %) [ -0.4%]
-    BM_ZFlat/10              700089     700000       1000 170.5MB/s  txt2 (64.07 %) [ -2.9%]
-    BM_ZFlat/11             2159356    2138365        318 190.3MB/s  txt3 (57.11 %) [ -0.3%]
-    BM_ZFlat/12             2796143    2779923        259 165.3MB/s  txt4 (68.35 %) [ -1.4%]
-    BM_ZFlat/13              856458     835476        778 585.8MB/s  bin (18.21 %)  [ -0.1%]
-    BM_ZFlat/14              166908     166857       4375 218.6MB/s  sum (51.88 %)  [ -1.4%]
-    BM_ZFlat/15               21181      20857      35000 193.3MB/s  man (59.36 %)  [ -0.8%]
-    BM_ZFlat/16              244009     239973       2917 471.3MB/s  pb (23.15 %)   [ -1.4%]
-    BM_ZFlat/17              596362     590000       1000 297.9MB/s  gaviota (38.27%) [ +0.0%]
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@59 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit f2e184f638bdc7905f26c24faaf10fc0f5d33403
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Sat Feb 11 22:11:22 2012 +0000
-
-    Lower the size allocated in the "corrupted input" unit test from 256 MB
-    to 2 MB. This fixes issues with running the unit test on platforms with
-    little RAM (e.g. some ARM boards).
-    
-    Also, reactivate the 2 MB test for 64-bit platforms; there's no good
-    reason why it shouldn't be.
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@58 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit e750dc0f054ba74b0ce76dd2013e6728cc7a41c5
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Sun Jan 8 17:55:48 2012 +0000
-
-    Minor refactoring to accomodate changes in Google's internal code tree.
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@57 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit d9068ee301bdf893a4d8cb7c6518eacc44c4c1f2
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Jan 4 13:10:46 2012 +0000
-
-    Fix public issue r57: Fix most warnings with -Wall, mostly signed/unsigned
-    warnings. There are still some in the unit test, but the main .cc file should
-    be clean. We haven't enabled -Wall for the default build, since the unit test
-    is still not clean.
-    
-    This also fixes a real bug in the open-source implementation of
-    ReadFileToStringOrDie(); it would not detect errors correctly.
-    
-    I had to go through some pains to avoid performance loss as the types
-    were changed; I think there might still be some with 32-bit if and only if LFS
-    is enabled (ie., size_t is 64-bit), but for regular 32-bit and 64-bit I can't
-    see any losses, and I've diffed the generated GCC assembler between the old and
-    new code without seeing any significant choices. If anything, it's ever so
-    slightly faster.
-    
-    This may or may not enable compression of very large blocks (>2^32 bytes)
-    when size_t is 64-bit, but I haven't checked, and it is still not a supported
-    case.
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@56 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 0755c815197dacc77d8971ae917c86d7aa96bf8e
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Jan 4 10:46:39 2012 +0000
-
-    Add a framing format description. We do not have any implementation of this at
-    the current point, but there seems to be enough of a general interest in the
-    topic (cf. public bug #34).
-    
-    R=csilvers,sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@55 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit d7eb2dc4133794b62cba691f9be40d1549bc32e2
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Mon Dec 5 21:27:26 2011 +0000
-
-    Speed up decompression by moving the refill check to the end of the loop.
-    
-    This seems to work because in most of the branches, the compiler can evaluate
-    “ip_limit_ - ip” in a more efficient way than reloading ip_limit_ from memory
-    (either by already having the entire expression in a register, or reconstructing
-    it from “avail”, or something else). Memory loads, even from L1, are seemingly
-    costly in the big picture at the current decompression speeds.
-    
-    Microbenchmarks (64-bit, opt mode):
-    
-    Westmere (Intel Core i7):
-    
-      Benchmark     Time(ns)    CPU(ns) Iterations
-      --------------------------------------------
-      BM_UFlat/0       74492      74491     187894 1.3GB/s  html      [ +5.9%]
-      BM_UFlat/1      712268     712263      19644 940.0MB/s  urls    [ +3.8%]
-      BM_UFlat/2       10591      10590    1000000 11.2GB/s  jpg      [ -6.8%]
-      BM_UFlat/3       29643      29643     469915 3.0GB/s  pdf       [ +7.9%]
-      BM_UFlat/4      304669     304667      45930 1.3GB/s  html4     [ +4.8%]
-      BM_UFlat/5       28508      28507     490077 823.1MB/s  cp      [ +4.0%]
-      BM_UFlat/6       12415      12415    1000000 856.5MB/s  c       [ +8.6%]
-      BM_UFlat/7        3415       3415    4084723 1039.0MB/s  lsp    [+18.0%]
-      BM_UFlat/8      979569     979563      14261 1002.5MB/s  xls    [ +5.8%]
-      BM_UFlat/9      230150     230148      60934 630.2MB/s  txt1    [ +5.2%]
-      BM_UFlat/10     197167     197166      71135 605.5MB/s  txt2    [ +4.7%]
-      BM_UFlat/11     607394     607390      23041 670.1MB/s  txt3    [ +5.6%]
-      BM_UFlat/12     808502     808496      17316 568.4MB/s  txt4    [ +5.0%]
-      BM_UFlat/13     372791     372788      37564 1.3GB/s  bin       [ +3.3%]
-      BM_UFlat/14      44541      44541     313969 818.8MB/s  sum     [ +5.7%]
-      BM_UFlat/15       4833       4833    2898697 834.1MB/s  man     [ +4.8%]
-      BM_UFlat/16      79855      79855     175356 1.4GB/s  pb        [ +4.8%]
-      BM_UFlat/17     245845     245843      56838 715.0MB/s  gaviota [ +5.8%]
-    
-    Clovertown (Intel Core 2):
-    
-      Benchmark     Time(ns)    CPU(ns) Iterations
-      --------------------------------------------
-      BM_UFlat/0      107911     107890     100000 905.1MB/s  html    [ +2.2%]
-      BM_UFlat/1     1011237    1011041      10000 662.3MB/s  urls    [ +2.5%]
-      BM_UFlat/2       26775      26770     523089 4.4GB/s  jpg       [ +0.0%]
-      BM_UFlat/3       48103      48095     290618 1.8GB/s  pdf       [ +3.4%]
-      BM_UFlat/4      437724     437644      31937 892.6MB/s  html4   [ +2.1%]
-      BM_UFlat/5       39607      39600     358284 592.5MB/s  cp      [ +2.4%]
-      BM_UFlat/6       18227      18224     768191 583.5MB/s  c       [ +2.7%]
-      BM_UFlat/7        5171       5170    2709437 686.4MB/s  lsp     [ +3.9%]
-      BM_UFlat/8     1560291    1559989       8970 629.5MB/s  xls     [ +3.6%]
-      BM_UFlat/9      335401     335343      41731 432.5MB/s  txt1    [ +3.0%]
-      BM_UFlat/10     287014     286963      48758 416.0MB/s  txt2    [ +2.8%]
-      BM_UFlat/11     888522     888356      15752 458.1MB/s  txt3    [ +2.9%]
-      BM_UFlat/12    1186600    1186378      10000 387.3MB/s  txt4    [ +3.1%]
-      BM_UFlat/13     572295     572188      24468 855.4MB/s  bin     [ +2.1%]
-      BM_UFlat/14      64060      64049     218401 569.4MB/s  sum     [ +4.1%]
-      BM_UFlat/15       7264       7263    1916168 555.0MB/s  man     [ +1.4%]
-      BM_UFlat/16     108853     108836     100000 1039.1MB/s  pb     [ +1.7%]
-      BM_UFlat/17     364289     364223      38419 482.6MB/s  gaviota [ +4.9%]
-    
-    Barcelona (AMD Opteron):
-    
-      Benchmark     Time(ns)    CPU(ns) Iterations
-      --------------------------------------------
-      BM_UFlat/0      103900     103871     100000 940.2MB/s  html    [ +8.3%]
-      BM_UFlat/1     1000435    1000107      10000 669.5MB/s  urls    [ +6.6%]
-      BM_UFlat/2       24659      24652     567362 4.8GB/s  jpg       [ +0.1%]
-      BM_UFlat/3       48206      48193     291121 1.8GB/s  pdf       [ +5.0%]
-      BM_UFlat/4      421980     421850      33174 926.0MB/s  html4   [ +7.3%]
-      BM_UFlat/5       40368      40357     346994 581.4MB/s  cp      [ +8.7%]
-      BM_UFlat/6       19836      19830     708695 536.2MB/s  c       [ +8.0%]
-      BM_UFlat/7        6100       6098    2292774 581.9MB/s  lsp     [ +9.0%]
-      BM_UFlat/8     1693093    1692514       8261 580.2MB/s  xls     [ +8.0%]
-      BM_UFlat/9      365991     365886      38225 396.4MB/s  txt1    [ +7.1%]
-      BM_UFlat/10     311330     311238      44950 383.6MB/s  txt2    [ +7.6%]
-      BM_UFlat/11     975037     974737      14376 417.5MB/s  txt3    [ +6.9%]
-      BM_UFlat/12    1303558    1303175      10000 352.6MB/s  txt4    [ +7.3%]
-      BM_UFlat/13     517448     517290      27144 946.2MB/s  bin     [ +5.5%]
-      BM_UFlat/14      66537      66518     210352 548.3MB/s  sum     [ +7.5%]
-      BM_UFlat/15       7976       7974    1760383 505.6MB/s  man     [ +5.6%]
-      BM_UFlat/16     103121     103092     100000 1097.0MB/s  pb     [ +8.7%]
-      BM_UFlat/17     391431     391314      35733 449.2MB/s  gaviota [ +6.5%]
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@54 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 5ed51ce15fc4ff8d2f7235704eb6b0c3f762fb88
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Nov 23 11:14:17 2011 +0000
-
-    Speed up decompression by making the fast path for literals faster.
-    
-    We do the fast-path step as soon as possible; in fact, as soon as we know the
-    literal length. Since we usually hit the fast path, we can then skip the checks
-    for long literals and available input space (beyond what the fast path check
-    already does).
-    
-    Note that this changes the decompression Writer API; however, it does not
-    change the ABI, since writers are always templatized and as such never
-    cross compilation units. The new API is slightly more general, in that it
-    doesn't hard-code the value 16. Note that we also take care to check
-    for len <= 16 first, since the other two checks almost always succeed
-    (so we don't want to waste time checking for them until we have to).
-    
-    The improvements are most marked on Nehalem, but are generally positive
-    on other platforms as well. All microbenchmarks are 64-bit, opt.
-    
-    Clovertown (Core 2):
-    
-      Benchmark     Time(ns)    CPU(ns) Iterations
-      --------------------------------------------
-      BM_UFlat/0      110226     110224     100000 886.0MB/s  html    [ +1.5%]
-      BM_UFlat/1     1036523    1036508      10000 646.0MB/s  urls    [ -0.8%]
-      BM_UFlat/2       26775      26775     522570 4.4GB/s  jpg       [ +0.0%]
-      BM_UFlat/3       49738      49737     280974 1.8GB/s  pdf       [ +0.3%]
-      BM_UFlat/4      446790     446792      31334 874.3MB/s  html4   [ +0.8%]
-      BM_UFlat/5       40561      40562     350424 578.5MB/s  cp      [ +1.3%]
-      BM_UFlat/6       18722      18722     746903 568.0MB/s  c       [ +1.4%]
-      BM_UFlat/7        5373       5373    2608632 660.5MB/s  lsp     [ +8.3%]
-      BM_UFlat/8     1615716    1615718       8670 607.8MB/s  xls     [ +2.0%]
-      BM_UFlat/9      345278     345281      40481 420.1MB/s  txt1    [ +1.4%]
-      BM_UFlat/10     294855     294855      47452 404.9MB/s  txt2    [ +1.6%]
-      BM_UFlat/11     914263     914263      15316 445.2MB/s  txt3    [ +1.1%]
-      BM_UFlat/12    1222694    1222691      10000 375.8MB/s  txt4    [ +1.4%]
-      BM_UFlat/13     584495     584489      23954 837.4MB/s  bin     [ -0.6%]
-      BM_UFlat/14      66662      66662     210123 547.1MB/s  sum     [ +1.2%]
-      BM_UFlat/15       7368       7368    1881856 547.1MB/s  man     [ +4.0%]
-      BM_UFlat/16     110727     110726     100000 1021.4MB/s  pb     [ +2.3%]
-      BM_UFlat/17     382138     382141      36616 460.0MB/s  gaviota [ -0.7%]
-    
-    Westmere (Core i7):
-    
-      Benchmark     Time(ns)    CPU(ns) Iterations
-      --------------------------------------------
-      BM_UFlat/0       78861      78853     177703 1.2GB/s  html      [ +2.1%]
-      BM_UFlat/1      739560     739491      18912 905.4MB/s  urls    [ +3.4%]
-      BM_UFlat/2        9867       9866    1419014 12.0GB/s  jpg      [ +3.4%]
-      BM_UFlat/3       31989      31986     438385 2.7GB/s  pdf       [ +0.2%]
-      BM_UFlat/4      319406     319380      43771 1.2GB/s  html4     [ +1.9%]
-      BM_UFlat/5       29639      29636     472862 791.7MB/s  cp      [ +5.2%]
-      BM_UFlat/6       13478      13477    1000000 789.0MB/s  c       [ +2.3%]
-      BM_UFlat/7        4030       4029    3475364 880.7MB/s  lsp     [ +8.7%]
-      BM_UFlat/8     1036585    1036492      10000 947.5MB/s  xls     [ +6.9%]
-      BM_UFlat/9      242127     242105      57838 599.1MB/s  txt1    [ +3.0%]
-      BM_UFlat/10     206499     206480      67595 578.2MB/s  txt2    [ +3.4%]
-      BM_UFlat/11     641635     641570      21811 634.4MB/s  txt3    [ +2.4%]
-      BM_UFlat/12     848847     848769      16443 541.4MB/s  txt4    [ +3.1%]
-      BM_UFlat/13     384968     384938      36366 1.2GB/s  bin       [ +0.3%]
-      BM_UFlat/14      47106      47101     297770 774.3MB/s  sum     [ +4.4%]
-      BM_UFlat/15       5063       5063    2772202 796.2MB/s  man     [ +7.7%]
-      BM_UFlat/16      83663      83656     167697 1.3GB/s  pb        [ +1.8%]
-      BM_UFlat/17     260224     260198      53823 675.6MB/s  gaviota [ -0.5%]
-    
-    Barcelona (Opteron):
-    
-      Benchmark     Time(ns)    CPU(ns) Iterations
-      --------------------------------------------
-      BM_UFlat/0      112490     112457     100000 868.4MB/s  html    [ -0.4%]
-      BM_UFlat/1     1066719    1066339      10000 627.9MB/s  urls    [ +1.0%]
-      BM_UFlat/2       24679      24672     563802 4.8GB/s  jpg       [ +0.7%]
-      BM_UFlat/3       50603      50589     277285 1.7GB/s  pdf       [ +2.6%]
-      BM_UFlat/4      452982     452849      30900 862.6MB/s  html4   [ -0.2%]
-      BM_UFlat/5       43860      43848     319554 535.1MB/s  cp      [ +1.2%]
-      BM_UFlat/6       21419      21413     653573 496.6MB/s  c       [ +1.0%]
-      BM_UFlat/7        6646       6645    2105405 534.1MB/s  lsp     [ +0.3%]
-      BM_UFlat/8     1828487    1827886       7658 537.3MB/s  xls     [ +2.6%]
-      BM_UFlat/9      391824     391714      35708 370.3MB/s  txt1    [ +2.2%]
-      BM_UFlat/10     334913     334816      41885 356.6MB/s  txt2    [ +1.7%]
-      BM_UFlat/11    1042062    1041674      10000 390.7MB/s  txt3    [ +1.1%]
-      BM_UFlat/12    1398902    1398456      10000 328.6MB/s  txt4    [ +1.7%]
-      BM_UFlat/13     545706     545530      25669 897.2MB/s  bin     [ -0.4%]
-      BM_UFlat/14      71512      71505     196035 510.0MB/s  sum     [ +1.4%]
-      BM_UFlat/15       8422       8421    1665036 478.7MB/s  man     [ +2.6%]
-      BM_UFlat/16     112053     112048     100000 1009.3MB/s  pb     [ -0.4%]
-      BM_UFlat/17     416723     416713      33612 421.8MB/s  gaviota [ -2.0%]
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@53 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 0c1b9c3904430f5b399bd057d76de4bc36b7a123
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue Nov 8 14:46:39 2011 +0000
-
-    Fix public issue #53: Update the README to the API we actually open-sourced
-    with.
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@52 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit b61134bc0a6a904b41522b4e5c9e80874c730cef
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Oct 5 12:27:12 2011 +0000
-
-    In the format description, use a clearer example to emphasize that varints are
-    stored in little-endian. Patch from Christian von Roques.
-    
-    R=csilvers
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@51 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 21a2e4f55758e759302cd84ad0f3580affcba7d9
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Thu Sep 15 19:34:06 2011 +0000
-
-    Release Snappy 1.0.4.
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@50 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit e2e303286813c759c5b1cdb46dad63c494f0a061
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Thu Sep 15 09:50:05 2011 +0000
-
-    Fix public issue #50: Include generic byteswap macros.
-    Also include Solaris 10 and FreeBSD versions.
-    
-    R=csilvers
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@49 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 593002da3c051f4721312869f816b41485bad3b7
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Aug 10 18:57:27 2011 +0000
-
-    Partially fix public issue 50: Remove an extra comma from the end of some
-    enum declarations, as it seems the Sun compiler does not like it.
-    
-    Based on patch by Travis Vitek.
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@48 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit f1063a5dc43891eed37f0586bfea57b84dddd756
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Aug 10 18:44:16 2011 +0000
-
-    Use the right #ifdef test for sys/mman.h.
-    
-    Based on patch by Travis Vitek.
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@47 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 41c827a2fa9ce048202d941187f211180feadde4
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Aug 10 01:22:09 2011 +0000
-
-    Fix public issue #47: Small comment cleanups in the unit test.
-    
-    Originally based on a patch by Patrick Pelletier.
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@46 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 59aeffa6049b5c2a3a467e7602c1f93630b870e7
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Aug 10 01:14:43 2011 +0000
-
-    Fix public issue #46: Format description said "3-byte offset"
-    instead of "4-byte offset" for the longest copies.
-    
-    Also fix an inconsistency in the heading for section 2.2.3.
-    Both patches by Patrick Pelletier.
-    
-    R=csilvers
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@45 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 57e7cd72559cb022ef32856f2252a4c4585e562e
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue Jun 28 11:40:25 2011 +0000
-
-    Fix public issue #44: Make the definition and declaration of CompressFragment
-    identical, even regarding cv-qualifiers.
-    
-    This is required to work around a bug in the Solaris Studio C++ compiler
-    (it does not properly disregard cv-qualifiers when doing name mangling).
-    
-    R=sanjay
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@44 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 13c4a449a8ea22139c9aa441e8024eebc9dbdf6e
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Sat Jun 4 10:19:05 2011 +0000
-
-    Correct an inaccuracy in the Snappy format description.
-    (I stumbled into this when changing the way we decompress literals.)
-    
-    R=csilvers
-    
-    Revision created by MOE tool push_codebase.
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@43 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit f5406737403119e1483a71d2084d17728663a114
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Jun 3 20:53:06 2011 +0000
-
-    Speed up decompression by removing a fast-path attempt.
-    
-    Whenever we try to enter a copy fast-path, there is a certain cost in checking
-    that all the preconditions are in place, but it's normally offset by the fact
-    that we can usually take the cheaper path. However, in a certain path we've
-    already established that "avail < literal_length", which usually means that
-    either the available space is small, or the literal is big. Both will disqualify
-    us from taking the fast path, and thus we take the hit from the precondition
-    checking without gaining much from having a fast path. Thus, simply don't try
-    the fast path in this situation -- we're already on a slow path anyway
-    (one where we need to refill more data from the reader).
-    
-    I'm a bit surprised at how much this gained; it could be that this path is
-    more common than I thought, or that the simpler structure somehow makes the
-    compiler happier. I haven't looked at the assembler, but it's a win across
-    the board on both Core 2, Core i7 and Opteron, at least for the cases we
-    typically care about. The gains seem to be the largest on Core i7, though.
-    Results from my Core i7 workstation:
-    
-    
-      Benchmark            Time(ns)    CPU(ns) Iterations
-      ---------------------------------------------------
-      BM_UFlat/0              73337      73091     190996 1.3GB/s  html      [ +1.7%]
-      BM_UFlat/1             696379     693501      20173 965.5MB/s  urls    [ +2.7%]
-      BM_UFlat/2               9765       9734    1472135 12.1GB/s  jpg      [ +0.7%]
-      BM_UFlat/3              29720      29621     472973 3.0GB/s  pdf       [ +1.8%]
-      BM_UFlat/4             294636     293834      47782 1.3GB/s  html4     [ +2.3%]
-      BM_UFlat/5              28399      28320     494700 828.5MB/s  cp      [ +3.5%]
-      BM_UFlat/6              12795      12760    1000000 833.3MB/s  c       [ +1.2%]
-      BM_UFlat/7               3984       3973    3526448 893.2MB/s  lsp     [ +5.7%]
-      BM_UFlat/8             991996     989322      14141 992.6MB/s  xls     [ +3.3%]
-      BM_UFlat/9             228620     227835      61404 636.6MB/s  txt1    [ +4.0%]
-      BM_UFlat/10            197114     196494      72165 607.5MB/s  txt2    [ +3.5%]
-      BM_UFlat/11            605240     603437      23217 674.4MB/s  txt3    [ +3.7%]
-      BM_UFlat/12            804157     802016      17456 573.0MB/s  txt4    [ +3.9%]
-      BM_UFlat/13            347860     346998      40346 1.4GB/s  bin       [ +1.2%]
-      BM_UFlat/14             44684      44559     315315 818.4MB/s  sum     [ +2.3%]
-      BM_UFlat/15              5120       5106    2739726 789.4MB/s  man     [ +3.3%]
-      BM_UFlat/16             76591      76355     183486 1.4GB/s  pb        [ +2.8%]
-      BM_UFlat/17            238564     237828      58824 739.1MB/s  gaviota [ +1.6%]
-      BM_UValidate/0          42194      42060     333333 2.3GB/s  html      [ -0.1%]
-      BM_UValidate/1         433182     432005      32407 1.5GB/s  urls      [ -0.1%]
-      BM_UValidate/2            197        196   71428571 603.3GB/s  jpg     [ +0.5%]
-      BM_UValidate/3          14494      14462     972222 6.1GB/s  pdf       [ +0.5%]
-      BM_UValidate/4         168444     167836      83832 2.3GB/s  html4     [ +0.1%]
-    
-    R=jeff
-    
-    Revision created by MOE tool push_codebase.
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@42 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 197f3ee9f9397e98c9abf07f9da875fbcb725dba
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Jun 3 20:47:14 2011 +0000
-
-    Speed up decompression by not needing a lookup table for literal items.
-    
-    Looking up into and decoding the values from char_table has long shown up as a
-    hotspot in the decompressor. While it turns out that it's hard to make a more
-    efficient decoder for the copy ops, the literals are simple enough that we can
-    decode them without needing a table lookup. (This means that 1/4 of the table
-    is now unused, although that in itself doesn't buy us anything.)
-    
-    The gains are small, but definitely present; some tests win as much as 10%,
-    but 1-4% is more typical. These results are from Core i7, in 64-bit mode;
-    Core 2 and Opteron show similar results. (I've run with more iterations
-    than unusual to make sure the smaller gains don't drown entirely in noise.)
-    
-      Benchmark            Time(ns)    CPU(ns) Iterations
-      ---------------------------------------------------
-      BM_UFlat/0              74665      74428     182055 1.3GB/s  html      [ +3.1%]
-      BM_UFlat/1             714106     711997      19663 940.4MB/s  urls    [ +4.4%]
-      BM_UFlat/2               9820       9789    1427115 12.1GB/s  jpg      [ -1.2%]
-      BM_UFlat/3              30461      30380     465116 2.9GB/s  pdf       [ +0.8%]
-      BM_UFlat/4             301445     300568      46512 1.3GB/s  html4     [ +2.2%]
-      BM_UFlat/5              29338      29263     479452 801.8MB/s  cp      [ +1.6%]
-      BM_UFlat/6              13004      12970    1000000 819.9MB/s  c       [ +2.1%]
-      BM_UFlat/7               4180       4168    3349282 851.4MB/s  lsp     [ +1.3%]
-      BM_UFlat/8            1026149    1024000      10000 959.0MB/s  xls     [+10.7%]
-      BM_UFlat/9             237441     236830      59072 612.4MB/s  txt1    [ +0.3%]
-      BM_UFlat/10            203966     203298      69307 587.2MB/s  txt2    [ +0.8%]
-      BM_UFlat/11            627230     625000      22400 651.2MB/s  txt3    [ +0.7%]
-      BM_UFlat/12            836188     833979      16787 551.0MB/s  txt4    [ +1.3%]
-      BM_UFlat/13            351904     350750      39886 1.4GB/s  bin       [ +3.8%]
-      BM_UFlat/14             45685      45562     308370 800.4MB/s  sum     [ +5.9%]
-      BM_UFlat/15              5286       5270    2656546 764.9MB/s  man     [ +1.5%]
-      BM_UFlat/16             78774      78544     178117 1.4GB/s  pb        [ +4.3%]
-      BM_UFlat/17            242270     241345      58091 728.3MB/s  gaviota [ +1.2%]
-      BM_UValidate/0          42149      42000     333333 2.3GB/s  html      [ -3.0%]
-      BM_UValidate/1         432741     431303      32483 1.5GB/s  urls      [ +7.8%]
-      BM_UValidate/2            198        197   71428571 600.7GB/s  jpg     [+16.8%]
-      BM_UValidate/3          14560      14521     965517 6.1GB/s  pdf       [ -4.1%]
-      BM_UValidate/4         169065     168671      83832 2.3GB/s  html4     [ -2.9%]
-    
-    R=jeff
-    
-    Revision created by MOE tool push_codebase.
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@41 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 8efa2639e885ac467e7b11c662975c5844019fb9
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Thu Jun 2 22:57:41 2011 +0000
-
-    Release Snappy 1.0.3.
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@40 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 2e12124bd87f39296709decc65195fa5bfced538
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Thu Jun 2 18:06:54 2011 +0000
-
-    Remove an unneeded goto in the decompressor; it turns out that the
-    state of ip_ after decompression (or attempted decompresion) is
-    completely irrelevant, so we don't need the trailer.
-    
-    Performance is, as expected, mostly flat -- there's a curious ~3-5%
-    loss in the "lsp" test, but that test case is so short it is hard to say
-    anything definitive about why (most likely, it's some sort of
-    unrelated effect).
-    
-    R=jeff
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@39 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit c266bbf32103f8ed4a83e2272ed3d8828d5b8b34
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Thu Jun 2 17:59:40 2011 +0000
-
-    Speed up decompression by caching ip_.
-    
-    It is seemingly hard for the compiler to understand that ip_, the current input
-    pointer into the compressed data stream, can not alias on anything else, and
-    thus using it directly will incur memory traffic as it cannot be kept in a
-    register. The code already knew about this and cached it into a local
-    variable, but since Step() only decoded one tag, it had to move ip_ back into
-    place between every tag. This seems to have cost us a significant amount of
-    performance, so changing Step() into a function that decodes as much as it can
-    before it saves ip_ back and returns. (Note that Step() was already inlined,
-    so it is not the manual inlining that buys the performance here.)
-    
-    The wins are about 3-6% for Core 2, 6-13% on Core i7 and 5-12% on Opteron
-    (for plain array-to-array decompression, in 64-bit opt mode).
-    
-    There is a tiny difference in the behavior here; if an invalid literal is
-    encountered (ie., the writer refuses the Append() operation), ip_ will now
-    point to the byte past the tag byte, instead of where the literal was
-    originally thought to end. However, we don't use ip_ for anything after
-    DecompressAllTags() has returned, so this should not change external behavior
-    in any way.
-    
-    Microbenchmark results for Core i7, 64-bit (Opteron results are similar):
-    
-    Benchmark            Time(ns)    CPU(ns) Iterations
-    ---------------------------------------------------
-    BM_UFlat/0              79134      79110       8835 1.2GB/s  html      [ +6.2%]
-    BM_UFlat/1             786126     786096        891 851.8MB/s  urls    [+10.0%]
-    BM_UFlat/2               9948       9948      69125 11.9GB/s  jpg      [ -1.3%]
-    BM_UFlat/3              31999      31998      21898 2.7GB/s  pdf       [ +6.5%]
-    BM_UFlat/4             318909     318829       2204 1.2GB/s  html4     [ +6.5%]
-    BM_UFlat/5              31384      31390      22363 747.5MB/s  cp      [ +9.2%]
-    BM_UFlat/6              14037      14034      49858 757.7MB/s  c       [+10.6%]
-    BM_UFlat/7               4612       4612     151395 769.5MB/s  lsp     [ +9.5%]
-    BM_UFlat/8            1203174    1203007        582 816.3MB/s  xls     [+19.3%]
-    BM_UFlat/9             253869     253955       2757 571.1MB/s  txt1    [+11.4%]
-    BM_UFlat/10            219292     219290       3194 544.4MB/s  txt2    [+12.1%]
-    BM_UFlat/11            672135     672131       1000 605.5MB/s  txt3    [+11.2%]
-    BM_UFlat/12            902512     902492        776 509.2MB/s  txt4    [+12.5%]
-    BM_UFlat/13            372110     371998       1881 1.3GB/s  bin       [ +5.8%]
-    BM_UFlat/14             50407      50407      10000 723.5MB/s  sum     [+13.5%]
-    BM_UFlat/15              5699       5701     100000 707.2MB/s  man     [+12.4%]
-    BM_UFlat/16             83448      83424       8383 1.3GB/s  pb        [ +5.7%]
-    BM_UFlat/17            256958     256963       2723 684.1MB/s  gaviota [ +7.9%]
-    BM_UValidate/0          42795      42796      16351 2.2GB/s  html      [+25.8%]
-    BM_UValidate/1         490672     490622       1427 1.3GB/s  urls      [+22.7%]
-    BM_UValidate/2            237        237    2950297 499.0GB/s  jpg     [+24.9%]
-    BM_UValidate/3          14610      14611      47901 6.0GB/s  pdf       [+26.8%]
-    BM_UValidate/4         171973     171990       4071 2.2GB/s  html4     [+25.7%]
-    
-    
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@38 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit d0ee043bc50c62c5b5ff3da044f0b5567257407d
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue May 17 08:48:25 2011 +0000
-
-    Fix the numbering of the headlines in the Snappy format description.
-    
-    R=csilvers
-    DELTA=4  (0 added, 0 deleted, 4 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1906
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@37 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 6c7053871fbdb459c9c14287a138d7f82d6d84a1
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Mon May 16 08:59:18 2011 +0000
-
-    Fix public issue #32: Add compressed format documentation for Snappy.
-    This text is new, but an earlier version from Zeev Tarantov was used
-    as reference.
-    
-    R=csilvers
-    DELTA=112  (111 added, 0 deleted, 1 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1867
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@36 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit a1f9f9973d127992f341d442969c86fd9a0847c9
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Mon May 9 21:29:02 2011 +0000
-
-    Fix public issue #39: Pick out the median runs based on CPU time,
-    not real time. Also, use nth_element instead of sort, since we
-    only need one element.
-    
-    R=csilvers
-    DELTA=5  (3 added, 0 deleted, 2 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1799
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@35 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit f7b105683c074cdf233740089e245e43f63e7e55
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Mon May 9 21:28:45 2011 +0000
-
-    Fix public issue #38: Make the microbenchmark framework handle
-    properly cases where gettimeofday() can stand return the same
-    result twice (as sometimes on GNU/Hurd) or go backwards
-    (as when the user adjusts the clock). We avoid a division-by-zero,
-    and put a lower bound on the number of iterations -- the same
-    amount as we use to calibrate.
-    
-    We should probably use CLOCK_MONOTONIC for platforms that support
-    it, to be robust against clock adjustments; we already use Windows'
-    monotonic timers. However, that's for a later changelist.
-    
-    R=csilvers
-    DELTA=7  (5 added, 0 deleted, 2 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1798
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@34 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit d8d481427a05b88cdb0810c29bf400153595c423
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue May 3 23:22:52 2011 +0000
-
-    Fix public issue #37: Only link snappy_unittest against -lz and other autodetected
-    libraries, not libsnappy.so (which doesn't need any such dependency).
-    
-    R=csilvers
-    DELTA=20  (14 added, 0 deleted, 6 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1710
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@33 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit bcecf195c0aeb2c98144d3d54b4d8d228774f50d
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue May 3 23:22:33 2011 +0000
-
-    Release Snappy 1.0.2, to get the license change and various other fixes into
-    a release.
-    
-    R=csilvers
-    DELTA=239  (236 added, 0 deleted, 3 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1709
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@32 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 84d9f642025cda672dda0d94a8008f094500aaa6
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue Apr 26 12:34:55 2011 +0000
-
-    Fix public issue #30: Stop using gettimeofday() altogether on Win32,
-    as MSVC doesn't include it. Replace with QueryPerformanceCounter(),
-    which is monotonic and probably reasonably high-resolution.
-    (Some machines have traditionally had bugs in QPC, but they should
-    be relatively rare these days, and there's really no much better
-    alternative that I know of.)
-    
-    R=csilvers
-    DELTA=74  (55 added, 19 deleted, 0 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1556
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@31 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 3d8e71df8d30f980d71d4c784ebfc5ff62d5b0cb
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Tue Apr 26 12:34:37 2011 +0000
-
-    Fix public issue #31: Don't reset PATH in autogen.sh; instead, do the trickery
-    we need for our own build system internally.
-    
-    R=csilvers
-    DELTA=16  (13 added, 1 deleted, 2 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1555
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@30 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 73987351de54c88e2fc3f5dcdeceb47708df3585
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Apr 15 22:55:56 2011 +0000
-
-    When including <windows.h>, define WIN32_LEAN_AND_MEAN first,
-    so we won't pull in macro definitions of things like min() and max(),
-    which can conflict with <algorithm>.
-    
-    R=csilvers
-    DELTA=1  (1 added, 0 deleted, 0 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1485
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@29 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit fb7e0eade471a20b009720a84fea0af1552791d5
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Mon Apr 11 09:07:01 2011 +0000
-
-    Fix public issue #29: Write CPU timing code for Windows, based on GetProcessTimes()
-    instead of getursage().
-    
-    I thought I'd already committed this patch, so that the 1.0.1 release already
-    would have a Windows-compatible snappy_unittest, but I'd seemingly deleted it
-    instead, so this is a reconstruction.
-    
-    R=csilvers
-    DELTA=43  (39 added, 3 deleted, 1 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1295
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@28 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit c67fa0c755a329000da5546fff79089d62ac2f82
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Apr 8 09:51:53 2011 +0000
-
-    Include C bindings of Snappy, contributed by Martin Gieseking.
-    
-    I've made a few changes since Martin's version; mostly style nits, but also
-    a semantic change -- most functions that return bool in the C++ version now
-    return an enum, to better match typical C (and zlib) semantics.
-    
-    I've kept the copyright notice, since Martin is obviously the author here;
-    he has signed the contributor license agreement, though, so this should not
-    hinder Google's use in the future.
-    
-    We'll need to update the libtool version number to match the added interface,
-    but as of http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
-    I'm going to wait until public release.
-    
-    R=csilvers
-    DELTA=238  (233 added, 0 deleted, 5 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1294
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@27 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 56be85cb9ae06f2e92180ae2575bdd10c012ab73
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Thu Apr 7 16:36:43 2011 +0000
-
-    Replace geo.protodata with a newer version.
-    
-    The data compresses/decompresses slightly faster than the old data, and has
-    similar density.
-    
-    R=lookingbill
-    DELTA=1  (0 added, 0 deleted, 1 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1288
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@26 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 3dd93f3ec74df54a37f68bffabb058ac757bbe72
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 30 20:27:53 2011 +0000
-
-    Fix public issue #27: Add HAVE_CONFIG_H tests around the config.h
-    inclusion in snappy-stubs-internal.h, which eases compiling outside the
-    automake/autoconf framework.
-    
-    R=csilvers
-    DELTA=5  (4 added, 1 deleted, 0 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1152
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@25 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit f67bcaa61006da8b325a7ed9909a782590971815
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 30 20:27:39 2011 +0000
-
-    Fix public issue #26: Take memory allocation and reallocation entirely out of the
-    Measure() loop. This gives all algorithms a small speed boost, except Snappy which
-    already didn't do reallocation (so the measurements were slightly biased in its
-    favor).
-    
-    R=csilvers
-    DELTA=92  (69 added, 9 deleted, 14 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1151
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@24 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit cc333c1c5cc4eabceceb9848ff3cac6c604ecbc6
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 30 20:25:09 2011 +0000
-
-    Renamed "namespace zippy" to "namespace snappy" to reduce
-    the differences from the opensource code.  Will make it easier
-    in the future to mix-and-match third-party code that uses
-    snappy with google code.
-    
-    Currently, csearch shows that the only external user of
-    "namespace zippy" is some bigtable code that accesses
-    a TEST variable, which is temporarily kept in the zippy
-    namespace.
-    
-    R=sesse
-    DELTA=123  (18 added, 3 deleted, 102 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1150
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@23 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit f19fb07e6dc79d6857e37df572dba25ff30fc8f3
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Mon Mar 28 22:17:04 2011 +0000
-
-    Put back the final few lines of what was truncated during the
-    license header change.
-    
-    R=csilvers
-    DELTA=5  (4 added, 0 deleted, 1 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1094
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@22 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 7e8ca8f8315fc2ecb4eea19db695039ab2ca43a0
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Sat Mar 26 02:34:34 2011 +0000
-
-    Change on 2011-03-25 19:18:00-07:00 by sesse
-    
-    	Replace the Apache 2.0 license header by the BSD-type license header;
-    	somehow a lot of the files were missed in the last round.
-    
-    	R=dannyb,csilvers
-    	DELTA=147  (74 added, 2 deleted, 71 changed)
-    
-    Change on 2011-03-25 19:25:07-07:00 by sesse
-    
-    	Unbreak the build; the relicensing removed a bit too much (only comments
-    	were intended, but I also accidentially removed some of the top lines of
-    	the actual source).
-    
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1072
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@21 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit b4bbc1041b35d844ec26fbae25f2864995361fd8
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Mar 25 16:14:41 2011 +0000
-
-    Change Snappy from the Apache 2.0 to a BSD-type license.
-    
-    R=dannyb
-    DELTA=328  (80 added, 184 deleted, 64 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1061
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@20 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit c47640c510eb11cf8913edfa34f667bceb3a4401
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Mar 25 00:39:01 2011 +0000
-
-    Release Snappy 1.0.1, to soup up all the various small changes
-    that have been made since release.
-    
-    R=csilvers
-    DELTA=266  (260 added, 0 deleted, 6 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1057
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@19 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit b1dc1f643eaff897a5ce135f525799b99687b118
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Thu Mar 24 19:15:54 2011 +0000
-
-    Fix a microbenchmark crash on mingw32; seemingly %lld is not universally
-    supported on Windows, and %I64d is recommended instead.
-    
-    R=csilvers
-    DELTA=6  (5 added, 0 deleted, 1 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1034
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@18 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 98004ca9afc62a3279dfe9d9a359083f61db437f
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Thu Mar 24 19:15:27 2011 +0000
-
-    Fix public issue #19: Fix unit test when Google Test is installed but the
-    gflags package isn't (Google Test is not properly initialized).
-    
-    Patch by Martin Gieseking.
-    
-    R=csilvers
-    DELTA=2  (1 added, 0 deleted, 1 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1033
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@17 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 444a6c5f72d6f8d8f7213a5bcc08b26606eb9934
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Thu Mar 24 19:13:57 2011 +0000
-
-    Make the unit test work on systems without mmap(). This is required for,
-    among others, Windows support. For Windows in specific, we could have used
-    CreateFileMapping/MapViewOfFile, but this should at least get us a bit closer
-    to compiling, and is of course also relevant for embedded systems with no MMU.
-    
-    (Part 2/2)
-    
-    R=csilvers
-    DELTA=15  (12 added, 3 deleted, 0 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1032
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@16 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 2e182e9bb840737f9cd8817e859dc17a82f2c16b
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Thu Mar 24 19:12:27 2011 +0000
-
-    Make the unit test work on systems without mmap(). This is required for,
-    among others, Windows support. For Windows in specific, we could have used
-    CreateFileMapping/MapViewOfFile, but this should at least get us a bit closer
-    to compiling, and is of course also relevant for embedded systems with no MMU.
-    
-    (Part 1/2)
-    
-    R=csilvers
-    DELTA=9  (8 added, 0 deleted, 1 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1031
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@15 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 48662cbb7f81533977334629790d346220084527
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 23 23:17:36 2011 +0000
-
-    Fix public issue #12: Don't keep autogenerated auto* files in Subversion;
-    it causes problems with others sending patches etc..
-    
-    We can't get this 100% hermetic anyhow, due to files like lt~obsolete.m4,
-    so we can just as well go cleanly in the other direction.
-    
-    R=csilvers
-    DELTA=21038  (0 added, 21036 deleted, 2 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=1012
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@14 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 9e4717a586149c9538b353400312bab5ab5458c4
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 23 17:50:49 2011 +0000
-
-    Fix public issue tracker bug #3: Call AC_SUBST([LIBTOOL_DEPS]), or the rule
-    to rebuild libtool in Makefile.am won't work.
-    
-    R=csilvers
-    DELTA=1  (1 added, 0 deleted, 0 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=997
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@13 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 519c822a34a91a0c0eb32d98e9686ee7d9cd6651
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 23 11:16:39 2011 +0000
-
-    Fix public issue #10: Don't add GTEST_CPPFLAGS to snappy_unittest_CXXFLAGS;
-    it's not needed (CPPFLAGS are always included when compiling).
-    
-    R=csilvers
-    DELTA=1  (0 added, 1 deleted, 0 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=994
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@12 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit ea6b936378583cba730c33c8a53776edc1782208
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 23 11:16:18 2011 +0000
-
-    Fix public issue #9: Add -Wall -Werror to automake flags.
-    (This concerns automake itself, not the C++ compiler.)
-    
-    R=csilvers
-    DELTA=4  (3 added, 0 deleted, 1 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=993
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@11 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit e3ca06af253094b1c3a8eae508cd97accf077535
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 23 11:13:37 2011 +0000
-
-    Fix a typo in the Snappy README file.
-    
-    R=csilvers
-    DELTA=1  (0 added, 0 deleted, 1 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=992
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@10 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 39d27bea23873abaa663e884261386b17b058f20
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 23 11:13:13 2011 +0000
-
-    Fix public issue #6: Add a --with-gflags for disabling gflags autodetection
-    and using a manually given setting (use/don't use) instead.
-    
-    R=csilvers
-    DELTA=16  (13 added, 0 deleted, 3 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=991
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@9 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 60add43d99c1c31aeecd895cb555ad6f6520608e
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 23 11:12:44 2011 +0000
-
-    Fix public issue #5: Replace the EXTRA_LIBSNAPPY_LDFLAGS setup with something
-    slightly more standard, that also doesn't leak libtool command-line into
-    configure.ac.
-    
-    R=csilvers
-    DELTA=7  (0 added, 4 deleted, 3 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=990
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@8 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit a8dd1700879ad646106742aa0e9c3a48dc07b01d
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 23 11:12:22 2011 +0000
-
-    Fix public issue #4: Properly quote all macro arguments in configure.ac.
-    
-    R=csilvers
-    DELTA=16  (0 added, 0 deleted, 16 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=989
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@7 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 79752dd7033658e28dc894de55012bdf2c9afca3
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 23 11:11:54 2011 +0000
-
-    Fix public issue #7: Don't use internal variables named ac_*, as those belong
-    to autoconf's namespace.
-    
-    R=csilvers
-    DELTA=6  (0 added, 0 deleted, 6 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=988
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@6 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 46e39fb20c297129494b969ac4ea64fcd04b4fa0
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 23 11:11:09 2011 +0000
-
-    Add missing licensing headers to a few files. (Part 2/2.)
-    
-    R=csilvers
-    DELTA=12  (12 added, 0 deleted, 0 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=987
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@5 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 3e764216fc8edaafca480443b90e55c14eaae2c2
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 23 11:10:39 2011 +0000
-
-    Add mising licensing headers to a few files. (Part 1/2.)
-    
-    R=csilvers
-    DELTA=24  (24 added, 0 deleted, 0 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=986
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@4 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 9a59f183c8ffec62dcdabd3499d0d515e44e4ef0
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Wed Mar 23 11:10:04 2011 +0000
-
-    Use the correct license file for the Apache 2.0 license;
-    spotted by Florian Weimer.
-    
-    R=csilvers
-    DELTA=202  (174 added, 0 deleted, 28 changed)
-    
-    
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=985
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@3 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 28a64402392c791905d6e1384ea1b48a5cb0b281
-Author: snappy.mirrorbot@gmail.com <snappy.mirrorbot@gmail.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Mar 18 17:14:15 2011 +0000
-
-    Revision created by MOE tool push_codebase.
-    MOE_MIGRATION=
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@2 03e5f5b5-db94-4691-08a0-1a8bf15f6143
-
-commit 7c3c6077b72b4ae2237267a20f640b55e9a90569
-Author: sesse@google.com <sesse@google.com@03e5f5b5-db94-4691-08a0-1a8bf15f6143>
-Date:   Fri Mar 18 17:13:52 2011 +0000
-
-    Create trunk directory.
-    
-    
-    git-svn-id: https://snappy.googlecode.com/svn/trunk@1 03e5f5b5-db94-4691-08a0-1a8bf15f6143
diff --git a/MODULE.bazel b/MODULE.bazel
new file mode 100644
index 0000000..4f80d95
--- /dev/null
+++ b/MODULE.bazel
@@ -0,0 +1,23 @@
+module(
+    name = "snappy",
+    version = "1.2.2",
+    compatibility_level = 1,
+)
+
+bazel_dep(
+    name = "googletest",
+    version = "1.14.0.bcr.1",
+    dev_dependency = True,
+    repo_name = "com_google_googletest",
+)
+bazel_dep(
+    name = "google_benchmark",
+    version = "1.9.0",
+    dev_dependency = True,
+    repo_name = "com_google_benchmark",
+)
+
+bazel_dep(
+    name = "platforms",
+    version = "0.0.9",
+)
diff --git a/Makefile.am b/Makefile.am
deleted file mode 100644
index 0746a16..0000000
--- a/Makefile.am
+++ /dev/null
@@ -1,31 +0,0 @@
-ACLOCAL_AMFLAGS = -I m4
-
-# Library.
-lib_LTLIBRARIES = libsnappy.la
-libsnappy_la_SOURCES = snappy.cc snappy-sinksource.cc snappy-stubs-internal.cc snappy-c.cc
-libsnappy_la_LDFLAGS = -version-info $(SNAPPY_LTVERSION)
-
-include_HEADERS = snappy.h snappy-sinksource.h snappy-stubs-public.h snappy-c.h
-noinst_HEADERS = snappy-internal.h snappy-stubs-internal.h snappy-test.h
-
-# Unit tests and benchmarks.
-snappy_unittest_CPPFLAGS = $(gflags_CFLAGS) $(GTEST_CPPFLAGS)
-snappy_unittest_SOURCES = snappy_unittest.cc snappy-test.cc
-snappy_unittest_LDFLAGS = $(GTEST_LDFLAGS)
-snappy_unittest_LDADD = libsnappy.la $(UNITTEST_LIBS) $(gflags_LIBS) $(GTEST_LIBS)
-TESTS = snappy_unittest
-noinst_PROGRAMS = $(TESTS)
-
-EXTRA_DIST = autogen.sh testdata/alice29.txt testdata/asyoulik.txt testdata/baddata1.snappy testdata/baddata2.snappy testdata/baddata3.snappy testdata/geo.protodata testdata/fireworks.jpeg testdata/html testdata/html_x_4 testdata/kppkn.gtb testdata/lcet10.txt testdata/paper-100k.pdf testdata/plrabn12.txt testdata/urls.10K
-dist_doc_DATA = ChangeLog COPYING INSTALL NEWS README format_description.txt framing_format.txt
-
-pkgconfigdir = $(libdir)/pkgconfig
-nodist_pkgconfig_DATA = snappy.pc
-
-libtool: $(LIBTOOL_DEPS)
-	$(SHELL) ./config.status --recheck
-
-# Needed by autoconf because we use README.md instead of README.
-# See http://stackoverflow.com/q/15013672/
-README: README.md
-	cat $< > $@.tmp
diff --git a/NEWS b/NEWS
index 8aeafd7..ef935ba 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,46 @@
+Snappy v1.2.2, Mar 26th 2025:
+
+  * We added a new compression level in v1.2.1 which compresses a bit
+    denser but slower. Decompression speed should be even faster with it.
+
+  * We fixed a very old issue of data corruption when compressed size
+    exceeds 4GB. This can happen when you compress data close to 4GB
+    and it's incompressible, for example, random data.
+
+  * Started to use minimum CMake 3.10 because older ones are not
+    planned to be supported.
+
+  * Various other small fixes and performance improvements (especially
+    for clang).
+
+Snappy v1.1.10, Mar 8th 2023:
+
+  * Performance improvements
+
+  * Compilation fixes for various environments
+
+Snappy v1.1.9, May 4th 2021:
+
+  * Performance improvements.
+
+  * Google Test and Google Benchmark are now bundled in third_party/.
+
+Snappy v1.1.8, January 15th 2020:
+
+  * Small performance improvements.
+
+  * Removed snappy::string alias for std::string.
+
+  * Improved CMake configuration.
+
+Snappy v1.1.7, August 24th 2017:
+
+  * Improved CMake build support for 64-bit Linux distributions.
+
+  * MSVC builds now use MSVC-specific intrinsics that map to clzll.
+
+  * ARM64 (AArch64) builds use the code paths optimized for 64-bit processors.
+
 Snappy v1.1.6, July 12th 2017:
 
 This is a re-release of v1.1.5 with proper SONAME / SOVERSION values.
diff --git a/README.md b/README.md
index b9db833..9b4a494 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 Snappy, a fast compressor/decompressor.
 
+[![Build Status](https://github.com/google/snappy/actions/workflows/build.yml/badge.svg)](https://github.com/google/snappy/actions/workflows/build.yml)
 
 Introduction
 ============
@@ -51,7 +52,7 @@ In particular:
 
  - Snappy uses 64-bit operations in several places to process more data at
    once than would otherwise be possible.
- - Snappy assumes unaligned 32- and 64-bit loads and stores are cheap.
+ - Snappy assumes unaligned 32 and 64-bit loads and stores are cheap.
    On some platforms, these must be emulated with single-byte loads
    and stores, which is much slower.
  - Snappy assumes little-endian throughout, and needs to byte-swap data in
@@ -65,32 +66,38 @@ are of course most welcome; see "Contact", below.
 Building
 ========
 
-CMake is supported and autotools will soon be deprecated.
-You need CMake 3.4 or above to build:
-
-  mkdir build
-  cd build && cmake ../ && make
+You need the CMake version specified in [CMakeLists.txt](./CMakeLists.txt)
+or later to build:
 
+```bash
+git submodule update --init
+mkdir build
+cd build && cmake ../ && make
+```
 
 Usage
 =====
 
 Note that Snappy, both the implementation and the main interface,
 is written in C++. However, several third-party bindings to other languages
-are available; see the home page at http://google.github.io/snappy/
-for more information. Also, if you want to use Snappy from C code, you can
-use the included C bindings in snappy-c.h.
+are available; see the [home page](docs/README.md) for more information.
+Also, if you want to use Snappy from C code, you can use the included C
+bindings in snappy-c.h.
 
 To use Snappy from your own C++ program, include the file "snappy.h" from
 your calling file, and link against the compiled library.
 
 There are many ways to call Snappy, but the simplest possible is
 
-  snappy::Compress(input.data(), input.size(), &output);
+```c++
+snappy::Compress(input.data(), input.size(), &output);
+```
 
 and similarly
 
-  snappy::Uncompress(input.data(), input.size(), &output);
+```c++
+snappy::Uncompress(input.data(), input.size(), &output);
+```
 
 where "input" and "output" are both instances of std::string.
 
@@ -102,48 +109,57 @@ information.
 Tests and benchmarks
 ====================
 
-When you compile Snappy, snappy_unittest is compiled in addition to the
-library itself. You do not need it to use the compressor from your own library,
-but it contains several useful components for Snappy development.
-
-First of all, it contains unit tests, verifying correctness on your machine in
-various scenarios. If you want to change or optimize Snappy, please run the
-tests to verify you have not broken anything. Note that if you have the
-Google Test library installed, unit test behavior (especially failures) will be
-significantly more user-friendly. You can find Google Test at
-
-  http://github.com/google/googletest
-
-You probably also want the gflags library for handling of command-line flags;
-you can find it at
-
-  http://gflags.github.io/gflags/
-
-In addition to the unit tests, snappy contains microbenchmarks used to
-tune compression and decompression performance. These are automatically run
-before the unit tests, but you can disable them using the flag
---run_microbenchmarks=false if you have gflags installed (otherwise you will
-need to edit the source).
-
-Finally, snappy can benchmark Snappy against a few other compression libraries
-(zlib, LZO, LZF, and QuickLZ), if they were detected at configure time.
-To benchmark using a given file, give the compression algorithm you want to test
-Snappy against (e.g. --zlib) and then a list of one or more file names on the
-command line. The testdata/ directory contains the files used by the
-microbenchmark, which should provide a reasonably balanced starting point for
-benchmarking. (Note that baddata[1-3].snappy are not intended as benchmarks; they
-are used to verify correctness in the presence of corrupted data in the unit
-test.)
-
+When you compile Snappy, the following binaries are compiled in addition to the
+library itself. You do not need them to use the compressor from your own
+library, but they are useful for Snappy development.
+
+* `snappy_benchmark` contains microbenchmarks used to tune compression and
+  decompression performance.
+* `snappy_unittests` contains unit tests, verifying correctness on your machine
+  in various scenarios.
+* `snappy_test_tool` can benchmark Snappy against a few other compression
+  libraries (zlib, LZO, LZF, and QuickLZ), if they were detected at configure
+  time. To benchmark using a given file, give the compression algorithm you want
+  to test Snappy against (e.g. --zlib) and then a list of one or more file names
+  on the command line.
+
+If you want to change or optimize Snappy, please run the tests and benchmarks to
+verify you have not broken anything.
+
+The testdata/ directory contains the files used by the microbenchmarks, which
+should provide a reasonably balanced starting point for benchmarking. (Note that
+baddata[1-3].snappy are not intended as benchmarks; they are used to verify
+correctness in the presence of corrupted data in the unit test.)
+
+Contributing to the Snappy Project
+==================================
+
+In addition to the aims listed at the top of the [README](README.md) Snappy
+explicitly supports the following:
+
+1. C++11
+2. Clang (gcc and MSVC are best-effort).
+3. Low level optimizations (e.g. assembly or equivalent intrinsics) for:
+     - [x86](https://en.wikipedia.org/wiki/X86)
+     - [x86-64](https://en.wikipedia.org/wiki/X86-64)
+     - ARMv7 (32-bit)
+     - ARMv8 (AArch64)
+4. Supports only the Snappy compression scheme as described in
+  [format_description.txt](format_description.txt).
+5. CMake for building
+
+Changes adding features or dependencies outside of the core area of focus listed
+above might not be accepted. If in doubt post a message to the
+[Snappy discussion mailing list](https://groups.google.com/g/snappy-compression).
+
+We are unlikely to accept contributions to the build configuration files, such
+as `CMakeLists.txt`. We are focused on maintaining a build configuration that
+allows us to test that the project works in a few supported configurations
+inside Google. We are not currently interested in supporting other requirements,
+such as different operating systems, compilers, or build systems.
 
 Contact
 =======
 
-Snappy is distributed through GitHub. For the latest version, a bug tracker,
-and other information, see
-
-  http://google.github.io/snappy/
-
-or the repository at
-
-  https://github.com/google/snappy
+Snappy is distributed through GitHub. For the latest version and other
+information, see https://github.com/google/snappy.
diff --git a/WORKSPACE b/WORKSPACE
new file mode 100644
index 0000000..7e60888
--- /dev/null
+++ b/WORKSPACE
@@ -0,0 +1,27 @@
+# Copyright 2023 Google Inc. All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/WORKSPACE.bzlmod b/WORKSPACE.bzlmod
new file mode 100644
index 0000000..e69de29
diff --git a/appveyor.yml b/appveyor.yml
deleted file mode 100644
index 886150f..0000000
--- a/appveyor.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Build matrix / environment variables are explained on:
-# https://www.appveyor.com/docs/appveyor-yml/
-# This file can be validated on: https://ci.appveyor.com/tools/validate-yaml
-
-version: "{build}"
-
-environment:
-  matrix:
-    # AppVeyor currently has no custom job name feature.
-    # http://help.appveyor.com/discussions/questions/1623-can-i-provide-a-friendly-name-for-jobs
-    - JOB: Visual Studio 2017
-      APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
-      CMAKE_GENERATOR: Visual Studio 15 2017
-    - JOB: Visual Studio 2015
-      APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
-      CMAKE_GENERATOR: Visual Studio 14 2015
-
-platform:
-  - x86
-  - x64
-
-configuration:
-  - RelWithDebInfo
-  - Debug
-
-build:
-  verbosity: minimal
-
-build_script:
-  - git submodule update --init --recursive
-  - mkdir out
-  - cd out
-  - cmake .. -G "%CMAKE_GENERATOR%"
-      -DCMAKE_CONFIGURATION_TYPES="Debug;RelWithDebInfo"
-  - cmake --build . --config %CONFIGURATION%
-
-test_script:
-  - ctest -C %CONFIGURATION% --output-on-failure
diff --git a/autogen.sh b/autogen.sh
deleted file mode 100755
index 9cb502e..0000000
--- a/autogen.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#! /bin/sh -e
-rm -rf autom4te.cache
-aclocal -I m4
-autoheader
-if glibtoolize --version >/dev/null 2>/dev/null; then
-  LIBTOOLIZE=${LIBTOOLIZE:-glibtoolize}
-else
-  LIBTOOLIZE=${LIBTOOLIZE:-libtoolize}
-fi
-$LIBTOOLIZE --copy
-automake --add-missing --copy
-autoconf
diff --git a/cmake/SnappyConfig.cmake.in b/cmake/SnappyConfig.cmake.in
index 5e604fe..9e7d134 100644
--- a/cmake/SnappyConfig.cmake.in
+++ b/cmake/SnappyConfig.cmake.in
@@ -1,9 +1,33 @@
-set(SNAPPY_VERSION @SNAPPY_MAJOR@.@SNAPPY_MINOR@.@SNAPPY_PATCHLEVEL@)
+# Copyright 2019 Google Inc. All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 @PACKAGE_INIT@
 
-set_and_check(SNAPPY_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@")
-set_and_check(SNAPPY_LIBRARY_DIR "@PACKAGE_LIBRARY_INSTALL_DIR@")
-set_and_check(SNAPPY_BINARY_DIR "@PACKAGE_BINARY_INSTALL_DIR@")
+include("${CMAKE_CURRENT_LIST_DIR}/SnappyTargets.cmake")
 
-check_required_components(SNAPPY)
\ No newline at end of file
+check_required_components(Snappy)
\ No newline at end of file
diff --git a/cmake/config.h.in b/cmake/config.h.in
index c06e3ad..de80c5f 100644
--- a/cmake/config.h.in
+++ b/cmake/config.h.in
@@ -1,90 +1,75 @@
-#ifndef SNAPPY_CONFIG_H
-#define SNAPPY_CONFIG_H 1
+#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
+#define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
+
+/* Define to 1 if the compiler supports __attribute__((always_inline)). */
+#cmakedefine01 HAVE_ATTRIBUTE_ALWAYS_INLINE
 
 /* Define to 1 if the compiler supports __builtin_ctz and friends. */
-#cmakedefine HAVE_BUILTIN_CTZ ${HAVE_BUILTIN_CTZ}
+#cmakedefine01 HAVE_BUILTIN_CTZ
 
 /* Define to 1 if the compiler supports __builtin_expect. */
-#cmakedefine HAVE_BUILTIN_EXPECT ${HAVE_BUILTIN_EXPECT}
-
-/* Define to 1 if you have the <byteswap.h> header file. */
-#cmakedefine HAVE_BYTESWAP_H ${HAVE_BYTESWAP_H}
-
-/* Define to 1 if you have the <dlfcn.h> header file. */
-#cmakedefine HAVE_DLFCN_H ${HAVE_DLFCN_H}
+#cmakedefine01 HAVE_BUILTIN_EXPECT
 
-/* Use the gflags package for command-line parsing. */
-#cmakedefine HAVE_GFLAGS ${HAVE_GFLAGS}
+/* Define to 1 if the compiler supports __builtin_prefetch. */
+#cmakedefine01 HAVE_BUILTIN_PREFETCH
 
-/* Defined when Google Test is available. */
-#cmakedefine HAVE_GTEST ${HAVE_GTEST}
+/* Define to 1 if you have a definition for mmap() in <sys/mman.h>. */
+#cmakedefine01 HAVE_FUNC_MMAP
 
-/* Define to 1 if you have the <inttypes.h> header file. */
-#cmakedefine HAVE_INTTYPES_H ${HAVE_INTTYPES_H}
+/* Define to 1 if you have a definition for sysconf() in <unistd.h>. */
+#cmakedefine01 HAVE_FUNC_SYSCONF
 
 /* Define to 1 if you have the `lzo2' library (-llzo2). */
-#cmakedefine HAVE_LIBLZO2 ${HAVE_LIBLZO2}
+#cmakedefine01 HAVE_LIBLZO2
 
 /* Define to 1 if you have the `z' library (-lz). */
-#cmakedefine HAVE_LIBZ ${HAVE_LIBZ}
-
-/* Define to 1 if you have the <sys/uio.h> header file. */
-#cmakedefine HAVE_SYS_UIO_H ${HAVE_SYS_UIO_H}
-
-/* Define to 1 if you have the <memory.h> header file. */
-#cmakedefine HAVE_MEMORY_H ${HAVE_MEMORY_H}
-
-/* Define to 1 if you have the <stddef.h> header file. */
-#cmakedefine HAVE_STDDEF_H ${HAVE_STDDEF_H}
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#cmakedefine HAVE_STDINT_H ${HAVE_STDINT_H}
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#cmakedefine HAVE_STDLIB_H ${HAVE_STDLIB_H}
-
-/* Define to 1 if you have the <strings.h> header file. */
-#cmakedefine HAVE_STRINGS_H ${HAVE_STRINGS_H}
+#cmakedefine01 HAVE_LIBZ
 
-/* Define to 1 if you have the <string.h> header file. */
-#cmakedefine HAVE_STRING_H ${HAVE_STRING_H}
-
-/* Define to 1 if you have the <sys/byteswap.h> header file. */
-#cmakedefine HAVE_SYS_BYTESWAP_H ${HAVE_SYS_BYTESWAP_H}
-
-/* Define to 1 if you have the <sys/endian.h> header file. */
-#cmakedefine HAVE_SYS_ENDIAN_H ${HAVE_SYS_ENDIAN_H}
+/* Define to 1 if you have the `lz4' library (-llz4). */
+#cmakedefine01 HAVE_LIBLZ4
 
 /* Define to 1 if you have the <sys/mman.h> header file. */
-#cmakedefine HAVE_SYS_MMAN_H ${HAVE_SYS_MMAN_H}
+#cmakedefine01 HAVE_SYS_MMAN_H
 
 /* Define to 1 if you have the <sys/resource.h> header file. */
-#cmakedefine HAVE_SYS_RESOURCE_H ${HAVE_SYS_RESOURCE_H}
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#cmakedefine HAVE_SYS_STAT_H ${HAVE_SYS_STAT_H}
+#cmakedefine01 HAVE_SYS_RESOURCE_H
 
 /* Define to 1 if you have the <sys/time.h> header file. */
-#cmakedefine HAVE_SYS_TIME_H ${HAVE_SYS_TIME_H}
+#cmakedefine01 HAVE_SYS_TIME_H
 
-/* Define to 1 if you have the <sys/types.h> header file. */
-#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES}
+/* Define to 1 if you have the <sys/uio.h> header file. */
+#cmakedefine01 HAVE_SYS_UIO_H
 
 /* Define to 1 if you have the <unistd.h> header file. */
-#cmakedefine HAVE_UNISTD_H ${HAVE_UNISTD_H}
+#cmakedefine01 HAVE_UNISTD_H
 
 /* Define to 1 if you have the <windows.h> header file. */
-#cmakedefine HAVE_WINDOWS_H ${HAVE_WINDOWS_H}
+#cmakedefine01 HAVE_WINDOWS_H
+
+/* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */
+#cmakedefine01 SNAPPY_HAVE_SSSE3
+
+/* Define to 1 if you target processors with SSE4.2 and have <crc32intrin.h>. */
+#cmakedefine01 SNAPPY_HAVE_X86_CRC32
+
+/* Define to 1 if you target processors with BMI2+ and have <bmi2intrin.h>. */
+#cmakedefine01 SNAPPY_HAVE_BMI2
+
+/* Define to 1 if you target processors with NEON and have <arm_neon.h>. */
+#cmakedefine01 SNAPPY_HAVE_NEON
+
+/* Define to 1 if you target processors with RVV1.0 and have <riscv_vector.h>. */
+#cmakedefine01 SNAPPY_RVV_1
 
-/* Define to 1 if you have the ANSI C header files. */
-#define STDC_HEADERS 1
+/* Define to 1 if you target processors with RVV0.7 and have <riscv_vector.h>. */
+#cmakedefine01 SNAPPY_RVV_0_7
 
-/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
-   significant byte first (like Motorola and SPARC, unlike Intel and VAX). */
-#cmakedefine WORDS_BIGENDIAN
+/* Define to 1 if you have <arm_neon.h> and <arm_acle.h> and want to optimize
+   compression speed by using __crc32cw from <arm_acle.h>. */
+#cmakedefine01 SNAPPY_HAVE_NEON_CRC32
 
-#if defined(_MSC_VER) && (_MSC_VER <= 1900)
-typedef __int64 ssize_t;
-#endif
+/* Define to 1 if your processor stores words with the most significant byte
+   first (like Motorola and SPARC, unlike Intel and VAX). */
+#cmakedefine01 SNAPPY_IS_BIG_ENDIAN
 
-#endif
+#endif  // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
diff --git a/configure.ac b/configure.ac
deleted file mode 100644
index 595826e..0000000
--- a/configure.ac
+++ /dev/null
@@ -1,131 +0,0 @@
-m4_define([snappy_major], [1])
-m4_define([snappy_minor], [1])
-m4_define([snappy_patchlevel], [6])
-
-# Libtool shared library interface versions (current:revision:age)
-# Update this value for every release!  (A:B:C will map to foo.so.(A-C).C.B)
-# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
-m4_define([snappy_ltversion], [2:6:1])
-
-AC_INIT([snappy], [snappy_major.snappy_minor.snappy_patchlevel])
-AC_CONFIG_MACRO_DIR([m4])
-AC_CONFIG_AUX_DIR([.])
-
-# These are flags passed to automake (though they look like gcc flags!)
-AM_INIT_AUTOMAKE([-Wall])
-
-LT_INIT
-AC_SUBST([LIBTOOL_DEPS])
-AC_PROG_CXX
-AC_LANG([C++])
-AC_C_BIGENDIAN
-AC_TYPE_SIZE_T
-AC_TYPE_SSIZE_T
-AC_CHECK_HEADERS([stdint.h stddef.h sys/mman.h sys/resource.h sys/uio.h windows.h byteswap.h sys/byteswap.h sys/endian.h sys/time.h])
-
-# Don't use AC_FUNC_MMAP, as it checks for mappings of already-mapped memory,
-# which we don't need (and does not exist on Windows).
-AC_CHECK_FUNC([mmap])
-
-GTEST_LIB_CHECK([], [true], [true # Ignore; we can live without it.])
-
-AC_ARG_WITH([gflags],
-  [AS_HELP_STRING(
-    [--with-gflags],
-    [use Google Flags package to enhance the unit test @<:@default=check@:>@])],
-    [],
-    [with_gflags=check])
-
-if test "x$with_gflags" != "xno"; then
-  PKG_CHECK_MODULES(
-    [gflags],
-    [libgflags],
-    [AC_DEFINE([HAVE_GFLAGS], [1], [Use the gflags package for command-line parsing.])],
-    [if test "x$with_gflags" != "xcheck"; then
-      AC_MSG_FAILURE([--with-gflags was given, but test for gflags failed])
-    fi])
-fi
-
-# See if we have __builtin_expect.
-# TODO: Use AC_CACHE.
-AC_MSG_CHECKING([if the compiler supports __builtin_expect])
-
-AC_TRY_COMPILE(, [
-    return __builtin_expect(1, 1) ? 1 : 0
-], [
-    snappy_have_builtin_expect=yes
-    AC_MSG_RESULT([yes])
-], [
-    snappy_have_builtin_expect=no
-    AC_MSG_RESULT([no])
-])
-if test x$snappy_have_builtin_expect = xyes ; then
-    AC_DEFINE([HAVE_BUILTIN_EXPECT], [1], [Define to 1 if the compiler supports __builtin_expect.])
-fi
-
-# See if we have working count-trailing-zeros intrinsics.
-# TODO: Use AC_CACHE.
-AC_MSG_CHECKING([if the compiler supports __builtin_ctzll])
-
-AC_TRY_COMPILE(, [
-    return (__builtin_ctzll(0x100000000LL) == 32) ? 1 : 0
-], [
-    snappy_have_builtin_ctz=yes
-    AC_MSG_RESULT([yes])
-], [
-    snappy_have_builtin_ctz=no
-    AC_MSG_RESULT([no])
-])
-if test x$snappy_have_builtin_ctz = xyes ; then
-    AC_DEFINE([HAVE_BUILTIN_CTZ], [1], [Define to 1 if the compiler supports __builtin_ctz and friends.])
-fi
-
-# Other compression libraries; the unit test can use these for comparison
-# if they are available. If they are not found, just ignore.
-UNITTEST_LIBS=""
-AC_DEFUN([CHECK_EXT_COMPRESSION_LIB], [
-  AH_CHECK_LIB([$1])
-  AC_CHECK_LIB(
-    [$1],
-    [$2],
-    [
-      AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_LIB$1))
-      UNITTEST_LIBS="-l$1 $UNITTEST_LIBS"
-    ],
-    [true]
-  )
-])
-CHECK_EXT_COMPRESSION_LIB([z], [zlibVersion])
-CHECK_EXT_COMPRESSION_LIB([lzo2], [lzo1x_1_15_compress])
-AC_SUBST([UNITTEST_LIBS])
-
-# These are used by snappy-stubs-public.h.in.
-if test "$ac_cv_header_stdint_h" = "yes"; then
-    AC_SUBST([ac_cv_have_stdint_h], [1])
-else
-    AC_SUBST([ac_cv_have_stdint_h], [0])
-fi
-if test "$ac_cv_header_stddef_h" = "yes"; then
-    AC_SUBST([ac_cv_have_stddef_h], [1])
-else
-    AC_SUBST([ac_cv_have_stddef_h], [0])
-fi
-if test "$ac_cv_header_sys_uio_h" = "yes"; then
-    AC_SUBST([ac_cv_have_sys_uio_h], [1])
-else
-    AC_SUBST([ac_cv_have_sys_uio_h], [0])
-fi
-
-# Export the version to snappy-stubs-public.h.
-SNAPPY_MAJOR="snappy_major"
-SNAPPY_MINOR="snappy_minor"
-SNAPPY_PATCHLEVEL="snappy_patchlevel"
-
-AC_SUBST([SNAPPY_MAJOR])
-AC_SUBST([SNAPPY_MINOR])
-AC_SUBST([SNAPPY_PATCHLEVEL])
-AC_SUBST([SNAPPY_LTVERSION], snappy_ltversion)
-
-AC_CONFIG_HEADERS([config.h])
-AC_CONFIG_FILES([Makefile snappy-stubs-public.h snappy.pc])
-AC_OUTPUT
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..d5e0e63
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,72 @@
+Snappy is a compression/decompression library. It does not aim for maximum
+compression, or compatibility with any other compression library; instead, it
+aims for very high speeds and reasonable compression. For instance, compared
+to the fastest mode of zlib, Snappy is an order of magnitude faster for most
+inputs, but the resulting compressed files are anywhere from 20% to 100%
+bigger. On a single core of a Core i7 processor in 64-bit mode, Snappy
+compresses at about 250 MB/sec or more and decompresses at about 500 MB/sec
+or more.
+
+Snappy is widely used inside Google, in everything from BigTable and MapReduce
+to our internal RPC systems. (Snappy has previously been referred to as "Zippy"
+in some presentations and the likes.)
+
+For more information, please see the [README](../README.md). Benchmarks against
+a few other compression libraries (zlib, LZO, LZF, FastLZ, and QuickLZ) are
+included in the source code distribution. The source code also contains a
+[formal format specification](../format_description.txt), as well
+as a specification for a [framing format](../framing_format.txt) useful for
+higher-level framing and encapsulation of Snappy data, e.g. for transporting
+Snappy-compressed data across HTTP in a streaming fashion. Note that the Snappy
+distribution currently has no code implementing the latter, but some of the
+ports do (see below).
+
+Snappy is written in C++, but C bindings are included, and several bindings to
+other languages are maintained by third parties:
+
+* C#: [Snappy for .NET](http://snappy4net.codeplex.com/) (P/Invoke wrapper),
+  [Snappy.NET](http://snappy.angeloflogic.com/) (P/Invoke wrapper),
+  [Snappy.Sharp](https://github.com/jeffesp/Snappy.Sharp) (native
+  reimplementation)
+* [C port](http://github.com/andikleen/snappy-c)
+* [C++ MSVC packaging](http://snappy.angeloflogic.com/) (plus Windows binaries,
+  NuGet packages and command-line tool)
+* Common Lisp: [Library bindings](http://flambard.github.com/thnappy/),
+  [native reimplementation](https://github.com/brown/snappy)
+* Erlang: [esnappy](https://github.com/thekvs/esnappy),
+  [snappy-erlang-nif](https://github.com/fdmanana/snappy-erlang-nif)
+* [Go](https://github.com/golang/snappy/)
+* [Haskell](http://hackage.haskell.org/package/snappy)
+* [Haxe](https://github.com/MaddinXx/hxsnappy) (C++/Neko)
+* [iOS packaging](https://github.com/ideawu/snappy-ios)
+* Java: [JNI wrapper](https://github.com/xerial/snappy-java) (including the
+  framing format), [native reimplementation](http://code.google.com/p/jsnappy/),
+  [other native reimplementation](https://github.com/dain/snappy) (including
+  the framing format)
+* [Lua](https://github.com/forhappy/lua-snappy)
+* [Node.js](https://github.com/kesla/node-snappy) (including the [framing
+  format](https://github.com/kesla/node-snappy-stream))
+* [Perl](http://search.cpan.org/dist/Compress-Snappy/)
+* [PHP](https://github.com/kjdev/php-ext-snappy)
+* [Python](http://pypi.python.org/pypi/python-snappy) (including a command-line
+  tool for the framing format)
+* [R](https://github.com/lulyon/R-snappy)
+* [Ruby](https://github.com/miyucy/snappy)
+* [Rust](https://github.com/BurntSushi/rust-snappy)
+* [Smalltalk](https://github.com/mumez/sqnappy) (including the framing format)
+
+Snappy is used or is available as an alternative in software such as
+
+* [MongoDB](https://www.mongodb.com/)
+* [Cassandra](http://cassandra.apache.org/)
+* [Couchbase](http://www.couchbase.com/)
+* [Hadoop](http://hadoop.apache.org/)
+* [LessFS](http://www.lessfs.com/wordpress/)
+* [LevelDB](https://github.com/google/leveldb) (which is in turn used by
+  [Google Chrome](http://chrome.google.com/))
+* [Lucene](http://lucene.apache.org/)
+* [VoltDB](http://voltdb.com/)
+
+If you know of more, do not hesitate to let us know. The easiest way to get in
+touch is via the
+[Snappy discussion mailing list](http://groups.google.com/group/snappy-compression).
diff --git a/m4/gtest.m4 b/m4/gtest.m4
deleted file mode 100644
index 98e61f9..0000000
--- a/m4/gtest.m4
+++ /dev/null
@@ -1,74 +0,0 @@
-dnl GTEST_LIB_CHECK([minimum version [,
-dnl                  action if found [,action if not found]]])
-dnl
-dnl Check for the presence of the Google Test library, optionally at a minimum
-dnl version, and indicate a viable version with the HAVE_GTEST flag. It defines
-dnl standard variables for substitution including GTEST_CPPFLAGS,
-dnl GTEST_CXXFLAGS, GTEST_LDFLAGS, and GTEST_LIBS. It also defines
-dnl GTEST_VERSION as the version of Google Test found. Finally, it provides
-dnl optional custom action slots in the event GTEST is found or not.
-AC_DEFUN([GTEST_LIB_CHECK],
-[
-dnl Provide a flag to enable or disable Google Test usage.
-AC_ARG_ENABLE([gtest],
-  [AS_HELP_STRING([--enable-gtest],
-                  [Enable tests using the Google C++ Testing Framework.
-                  (Default is enabled.)])],
-  [],
-  [enable_gtest=])
-AC_ARG_VAR([GTEST_CONFIG],
-           [The exact path of Google Test's 'gtest-config' script.])
-AC_ARG_VAR([GTEST_CPPFLAGS],
-           [C-like preprocessor flags for Google Test.])
-AC_ARG_VAR([GTEST_CXXFLAGS],
-           [C++ compile flags for Google Test.])
-AC_ARG_VAR([GTEST_LDFLAGS],
-           [Linker path and option flags for Google Test.])
-AC_ARG_VAR([GTEST_LIBS],
-           [Library linking flags for Google Test.])
-AC_ARG_VAR([GTEST_VERSION],
-           [The version of Google Test available.])
-HAVE_GTEST="no"
-AS_IF([test "x${enable_gtest}" != "xno"],
-  [AC_MSG_CHECKING([for 'gtest-config'])
-   AS_IF([test "x${enable_gtest}" = "xyes"],
-     [AS_IF([test -x "${enable_gtest}/scripts/gtest-config"],
-        [GTEST_CONFIG="${enable_gtest}/scripts/gtest-config"],
-        [GTEST_CONFIG="${enable_gtest}/bin/gtest-config"])
-      AS_IF([test -x "${GTEST_CONFIG}"], [],
-        [AC_MSG_RESULT([no])
-         AC_MSG_ERROR([dnl
-Unable to locate either a built or installed Google Test.
-The specific location '${enable_gtest}' was provided for a built or installed
-Google Test, but no 'gtest-config' script could be found at this location.])
-         ])],
-     [AC_PATH_PROG([GTEST_CONFIG], [gtest-config])])
-   AS_IF([test -x "${GTEST_CONFIG}"],
-     [AC_MSG_RESULT([${GTEST_CONFIG}])
-      m4_ifval([$1],
-        [_gtest_min_version="--min-version=$1"
-         AC_MSG_CHECKING([for Google Test at least version >= $1])],
-        [_gtest_min_version="--min-version=0"
-         AC_MSG_CHECKING([for Google Test])])
-      AS_IF([${GTEST_CONFIG} ${_gtest_min_version}],
-        [AC_MSG_RESULT([yes])
-         HAVE_GTEST='yes'],
-        [AC_MSG_RESULT([no])])],
-     [AC_MSG_RESULT([no])])
-   AS_IF([test "x${HAVE_GTEST}" = "xyes"],
-     [GTEST_CPPFLAGS=`${GTEST_CONFIG} --cppflags`
-      GTEST_CXXFLAGS=`${GTEST_CONFIG} --cxxflags`
-      GTEST_LDFLAGS=`${GTEST_CONFIG} --ldflags`
-      GTEST_LIBS=`${GTEST_CONFIG} --libs`
-      GTEST_VERSION=`${GTEST_CONFIG} --version`
-      AC_DEFINE([HAVE_GTEST],[1],[Defined when Google Test is available.])],
-     [AS_IF([test "x${enable_gtest}" = "xyes"],
-        [AC_MSG_ERROR([dnl
-Google Test was enabled, but no viable version could be found.])
-         ])])])
-AC_SUBST([HAVE_GTEST])
-AM_CONDITIONAL([HAVE_GTEST],[test "x$HAVE_GTEST" = "xyes"])
-AS_IF([test "x$HAVE_GTEST" = "xyes"],
-  [m4_ifval([$2], [$2])],
-  [m4_ifval([$3], [$3])])
-])
diff --git a/snappy-internal.h b/snappy-internal.h
index 0cccba1..00b2db5 100644
--- a/snappy-internal.h
+++ b/snappy-internal.h
@@ -31,26 +31,131 @@
 #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
 #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
 
+#include <utility>
+
 #include "snappy-stubs-internal.h"
 
+#if SNAPPY_HAVE_SSSE3
+// Please do not replace with <x86intrin.h> or with headers that assume more
+// advanced SSE versions without checking with all the OWNERS.
+#include <emmintrin.h>
+#include <tmmintrin.h>
+#endif
+
+#if SNAPPY_HAVE_NEON
+#include <arm_neon.h>
+#endif
+
+#if SNAPPY_RVV_1 || SNAPPY_RVV_0_7
+#define SNAPPY_HAVE_RVV 1
+#include <riscv_vector.h>
+#else
+#define SNAPPY_HAVE_RVV 0
+#endif
+
+#ifdef SNAPPY_RVV_1
+#define VSETVL_E8M2 __riscv_vsetvl_e8m2
+#define VLE8_V_U8M2 __riscv_vle8_v_u8m2
+#define VSE8_V_U8M2 __riscv_vse8_v_u8m2
+#elif SNAPPY_RVV_0_7
+#define VSETVL_E8M2 vsetvl_e8m2
+#define VLE8_V_U8M2 vle8_v_u8m2
+#define VSE8_V_U8M2 vse8_v_u8m2
+#endif
+
+#if SNAPPY_HAVE_SSSE3 || SNAPPY_HAVE_NEON 
+#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 1
+#else
+#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 0
+#endif
+
 namespace snappy {
 namespace internal {
 
+#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+#if SNAPPY_HAVE_SSSE3
+using V128 = __m128i;
+#elif SNAPPY_HAVE_NEON
+using V128 = uint8x16_t;
+#endif
+ 
+// Load 128 bits of integer data. `src` must be 16-byte aligned.
+inline V128 V128_Load(const V128* src);
+
+// Load 128 bits of integer data. `src` does not need to be aligned.
+inline V128 V128_LoadU(const V128* src);
+
+// Store 128 bits of integer data. `dst` does not need to be aligned.
+inline void V128_StoreU(V128* dst, V128 val);
+
+// Shuffle packed 8-bit integers using a shuffle mask.
+// Each packed integer in the shuffle mask must be in [0,16).
+inline V128 V128_Shuffle(V128 input, V128 shuffle_mask);
+
+// Constructs V128 with 16 chars |c|.
+inline V128 V128_DupChar(char c);
+
+#if SNAPPY_HAVE_SSSE3
+inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); }
+
+inline V128 V128_LoadU(const V128* src) { return _mm_loadu_si128(src); }
+
+inline void V128_StoreU(V128* dst, V128 val) { _mm_storeu_si128(dst, val); }
+
+inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) {
+  return _mm_shuffle_epi8(input, shuffle_mask);
+}
+
+inline V128 V128_DupChar(char c) { return _mm_set1_epi8(c); }
+
+#elif SNAPPY_HAVE_NEON
+inline V128 V128_Load(const V128* src) {
+  return vld1q_u8(reinterpret_cast<const uint8_t*>(src));
+}
+
+inline V128 V128_LoadU(const V128* src) {
+  return vld1q_u8(reinterpret_cast<const uint8_t*>(src));
+}
+
+inline void V128_StoreU(V128* dst, V128 val) {
+  vst1q_u8(reinterpret_cast<uint8_t*>(dst), val);
+}
+
+inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) {
+  assert(vminvq_u8(shuffle_mask) >= 0 && vmaxvq_u8(shuffle_mask) <= 15);
+  return vqtbl1q_u8(input, shuffle_mask);
+}
+
+inline V128 V128_DupChar(char c) { return vdupq_n_u8(c); }
+
+
+#endif
+#endif  // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+
+// Working memory performs a single allocation to hold all scratch space
+// required for compression.
 class WorkingMemory {
  public:
-  WorkingMemory() : large_table_(NULL) { }
-  ~WorkingMemory() { delete[] large_table_; }
+  explicit WorkingMemory(size_t input_size);
+  ~WorkingMemory();
 
   // Allocates and clears a hash table using memory in "*this",
   // stores the number of buckets in "*table_size" and returns a pointer to
   // the base of the hash table.
-  uint16* GetHashTable(size_t input_size, int* table_size);
+  uint16_t* GetHashTable(size_t fragment_size, int* table_size) const;
+  char* GetScratchInput() const { return input_; }
+  char* GetScratchOutput() const { return output_; }
 
  private:
-  uint16 small_table_[1<<10];    // 2KB
-  uint16* large_table_;          // Allocated only when needed
+  char* mem_;        // the allocated memory, never nullptr
+  size_t size_;      // the size of the allocated memory, never 0
+  uint16_t* table_;  // the pointer to the hashtable
+  char* input_;      // the pointer to the input scratch buffer
+  char* output_;     // the pointer to the output scratch buffer
 
-  DISALLOW_COPY_AND_ASSIGN(WorkingMemory);
+  // No copying
+  WorkingMemory(const WorkingMemory&);
+  void operator=(const WorkingMemory&);
 };
 
 // Flat array compression that does not emit the "uncompressed length"
@@ -67,7 +172,7 @@ class WorkingMemory {
 char* CompressFragment(const char* input,
                        size_t input_length,
                        char* op,
-                       uint16* table,
+                       uint16_t* table,
                        const int table_size);
 
 // Find the largest n such that
@@ -80,11 +185,20 @@ char* CompressFragment(const char* input,
 // Does not read *(s1 + (s2_limit - s2)) or beyond.
 // Requires that s2_limit >= s2.
 //
+// In addition populate *data with the next 5 bytes from the end of the match.
+// This is only done if 8 bytes are available (s2_limit - s2 >= 8). The point is
+// that on some arch's this can be done faster in this routine than subsequent
+// loading from s2 + n.
+//
 // Separate implementation for 64-bit, little-endian cpus.
-#if defined(ARCH_K8) || (defined(ARCH_PPC) && !defined(WORDS_BIGENDIAN))
+// riscv and little-endian cpu choose this routinue can be done faster too.
+#if !SNAPPY_IS_BIG_ENDIAN && \
+    (defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || \
+     defined(ARCH_ARM) || defined(__riscv))
 static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
                                                       const char* s2,
-                                                      const char* s2_limit) {
+                                                      const char* s2_limit,
+                                                      uint64_t* data) {
   assert(s2_limit >= s2);
   size_t matched = 0;
 
@@ -93,39 +207,118 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
   // uncommon code paths that determine, without extra effort, whether the match
   // length is less than 8.  In short, we are hoping to avoid a conditional
   // branch, and perhaps get better code layout from the C++ compiler.
-  if (PREDICT_TRUE(s2 <= s2_limit - 8)) {
-    uint64 a1 = UNALIGNED_LOAD64(s1);
-    uint64 a2 = UNALIGNED_LOAD64(s2);
-    if (a1 != a2) {
-      return std::pair<size_t, bool>(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3,
-                                     true);
+  if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 16)) {
+    uint64_t a1 = UNALIGNED_LOAD64(s1);
+    uint64_t a2 = UNALIGNED_LOAD64(s2);
+    if (SNAPPY_PREDICT_TRUE(a1 != a2)) {
+      // This code is critical for performance. The reason is that it determines
+      // how much to advance `ip` (s2). This obviously depends on both the loads
+      // from the `candidate` (s1) and `ip`. Furthermore the next `candidate`
+      // depends on the advanced `ip` calculated here through a load, hash and
+      // new candidate hash lookup (a lot of cycles). This makes s1 (ie.
+      // `candidate`) the variable that limits throughput. This is the reason we
+      // go through hoops to have this function update `data` for the next iter.
+      // The straightforward code would use *data, given by
+      //
+      // *data = UNALIGNED_LOAD64(s2 + matched_bytes) (Latency of 5 cycles),
+      //
+      // as input for the hash table lookup to find next candidate. However
+      // this forces the load on the data dependency chain of s1, because
+      // matched_bytes directly depends on s1. However matched_bytes is 0..7, so
+      // we can also calculate *data by
+      //
+      // *data = AlignRight(UNALIGNED_LOAD64(s2), UNALIGNED_LOAD64(s2 + 8),
+      //                    matched_bytes);
+      //
+      // The loads do not depend on s1 anymore and are thus off the bottleneck.
+      // The straightforward implementation on x86_64 would be to use
+      //
+      // shrd rax, rdx, cl  (cl being matched_bytes * 8)
+      //
+      // unfortunately shrd with a variable shift has a 4 cycle latency. So this
+      // only wins 1 cycle. The BMI2 shrx instruction is a 1 cycle variable
+      // shift instruction but can only shift 64 bits. If we focus on just
+      // obtaining the least significant 4 bytes, we can obtain this by
+      //
+      // *data = ConditionalMove(matched_bytes < 4, UNALIGNED_LOAD64(s2),
+      //     UNALIGNED_LOAD64(s2 + 4) >> ((matched_bytes & 3) * 8);
+      //
+      // Writen like above this is not a big win, the conditional move would be
+      // a cmp followed by a cmov (2 cycles) followed by a shift (1 cycle).
+      // However matched_bytes < 4 is equal to
+      // static_cast<uint32_t>(xorval) != 0. Writen that way, the conditional
+      // move (2 cycles) can execute in parallel with FindLSBSetNonZero64
+      // (tzcnt), which takes 3 cycles.
+      uint64_t xorval = a1 ^ a2;
+      int shift = Bits::FindLSBSetNonZero64(xorval);
+      size_t matched_bytes = shift >> 3;
+      uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
+#ifndef __x86_64__
+      a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
+#else
+      // Ideally this would just be
+      //
+      // a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
+      //
+      // However clang correctly infers that the above statement participates on
+      // a critical data dependency chain and thus, unfortunately, refuses to
+      // use a conditional move (it's tuned to cut data dependencies). In this
+      // case there is a longer parallel chain anyway AND this will be fairly
+      // unpredictable.
+      asm("testl %k2, %k2\n\t"
+          "cmovzq %1, %0\n\t"
+          : "+r"(a2)
+          : "r"(a3), "r"(xorval)
+          : "cc");
+#endif
+      *data = a2 >> (shift & (3 * 8));
+      return std::pair<size_t, bool>(matched_bytes, true);
     } else {
       matched = 8;
       s2 += 8;
     }
   }
+  SNAPPY_PREFETCH(s1 + 64);
+  SNAPPY_PREFETCH(s2 + 64);
 
   // Find out how long the match is. We loop over the data 64 bits at a
   // time until we find a 64-bit block that doesn't match; then we find
   // the first non-matching bit and use that to calculate the total
   // length of the match.
-  while (PREDICT_TRUE(s2 <= s2_limit - 8)) {
-    if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
+  while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 16)) {
+    uint64_t a1 = UNALIGNED_LOAD64(s1 + matched);
+    uint64_t a2 = UNALIGNED_LOAD64(s2);
+    if (a1 == a2) {
       s2 += 8;
       matched += 8;
     } else {
-      uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
-      int matching_bits = Bits::FindLSBSetNonZero64(x);
-      matched += matching_bits >> 3;
+      uint64_t xorval = a1 ^ a2;
+      int shift = Bits::FindLSBSetNonZero64(xorval);
+      size_t matched_bytes = shift >> 3;
+      uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
+#ifndef __x86_64__
+      a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
+#else
+      asm("testl %k2, %k2\n\t"
+          "cmovzq %1, %0\n\t"
+          : "+r"(a2)
+          : "r"(a3), "r"(xorval)
+          : "cc");
+#endif
+      *data = a2 >> (shift & (3 * 8));
+      matched += matched_bytes;
       assert(matched >= 8);
       return std::pair<size_t, bool>(matched, false);
     }
   }
-  while (PREDICT_TRUE(s2 < s2_limit)) {
+  while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) {
     if (s1[matched] == *s2) {
       ++s2;
       ++matched;
     } else {
+      if (s2 <= s2_limit - 8) {
+        *data = UNALIGNED_LOAD64(s2);
+      }
       return std::pair<size_t, bool>(matched, matched < 8);
     }
   }
@@ -134,7 +327,8 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
 #else
 static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
                                                       const char* s2,
-                                                      const char* s2_limit) {
+                                                      const char* s2_limit,
+                                                      uint64_t* data) {
   // Implementation based on the x86-64 version, above.
   assert(s2_limit >= s2);
   int matched = 0;
@@ -145,19 +339,46 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
     matched += 4;
   }
   if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) {
-    uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
+    uint32_t x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
     int matching_bits = Bits::FindLSBSetNonZero(x);
     matched += matching_bits >> 3;
+    s2 += matching_bits >> 3;
   } else {
     while ((s2 < s2_limit) && (s1[matched] == *s2)) {
       ++s2;
       ++matched;
     }
   }
+  if (s2 <= s2_limit - 8) *data = LittleEndian::Load64(s2);
   return std::pair<size_t, bool>(matched, matched < 8);
 }
 #endif
 
+static inline size_t FindMatchLengthPlain(const char* s1, const char* s2,
+                                          const char* s2_limit) {
+  // Implementation based on the x86-64 version, above.
+  assert(s2_limit >= s2);
+  int matched = 0;
+
+  while (s2 <= s2_limit - 8 &&
+         UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
+    s2 += 8;
+    matched += 8;
+  }
+  if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 8) {
+    uint64_t x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
+    int matching_bits = Bits::FindLSBSetNonZero64(x);
+    matched += matching_bits >> 3;
+    s2 += matching_bits >> 3;
+  } else {
+    while ((s2 < s2_limit) && (s1[matched] == *s2)) {
+      ++s2;
+      ++matched;
+    }
+  }
+  return matched;
+}
+
 // Lookup tables for decompression code.  Give --snappy_dump_decompression_table
 // to the unit test to recompute char_table.
 
@@ -180,7 +401,8 @@ static const int kMaximumTagLength = 5;  // COPY_4_BYTE_OFFSET plus the actual o
 // because of efficiency reasons:
 //      (1) Extracting a byte is faster than a bit-field
 //      (2) It properly aligns copy offset so we do not need a <<8
-static const uint16 char_table[256] = {
+static constexpr uint16_t char_table[256] = {
+    // clang-format off
   0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
   0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
   0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
@@ -212,7 +434,8 @@ static const uint16 char_table[256] = {
   0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
   0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
   0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
-  0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
+  0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040,
+    // clang-format on
 };
 
 }  // end namespace internal
diff --git a/snappy-sinksource.cc b/snappy-sinksource.cc
index 369a132..8214964 100644
--- a/snappy-sinksource.cc
+++ b/snappy-sinksource.cc
@@ -26,23 +26,31 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include <string.h>
+#include <stddef.h>
+#include <cstring>
 
 #include "snappy-sinksource.h"
 
 namespace snappy {
 
-Source::~Source() { }
+Source::~Source() = default;
 
-Sink::~Sink() { }
+Sink::~Sink() = default;
 
 char* Sink::GetAppendBuffer(size_t length, char* scratch) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)length;
+
   return scratch;
 }
 
 char* Sink::GetAppendBufferVariable(
       size_t min_size, size_t desired_size_hint, char* scratch,
       size_t scratch_size, size_t* allocated_size) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)min_size;
+  (void)desired_size_hint;
+
   *allocated_size = scratch_size;
   return scratch;
 }
@@ -55,7 +63,7 @@ void Sink::AppendAndTakeOwnership(
   (*deleter)(deleter_arg, bytes, n);
 }
 
-ByteArraySource::~ByteArraySource() { }
+ByteArraySource::~ByteArraySource() = default;
 
 size_t ByteArraySource::Available() const { return left_; }
 
@@ -74,22 +82,26 @@ UncheckedByteArraySink::~UncheckedByteArraySink() { }
 void UncheckedByteArraySink::Append(const char* data, size_t n) {
   // Do no copying if the caller filled in the result of GetAppendBuffer()
   if (data != dest_) {
-    memcpy(dest_, data, n);
+    std::memcpy(dest_, data, n);
   }
   dest_ += n;
 }
 
 char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)len;
+  (void)scratch;
+
   return dest_;
 }
 
 void UncheckedByteArraySink::AppendAndTakeOwnership(
-    char* data, size_t n,
+    char* bytes, size_t n,
     void (*deleter)(void*, const char*, size_t),
     void *deleter_arg) {
-  if (data != dest_) {
-    memcpy(dest_, data, n);
-    (*deleter)(deleter_arg, data, n);
+  if (bytes != dest_) {
+    std::memcpy(dest_, bytes, n);
+    (*deleter)(deleter_arg, bytes, n);
   }
   dest_ += n;
 }
@@ -97,6 +109,11 @@ void UncheckedByteArraySink::AppendAndTakeOwnership(
 char* UncheckedByteArraySink::GetAppendBufferVariable(
       size_t min_size, size_t desired_size_hint, char* scratch,
       size_t scratch_size, size_t* allocated_size) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)min_size;
+  (void)scratch;
+  (void)scratch_size;
+
   *allocated_size = desired_size_hint;
   return dest_;
 }
diff --git a/snappy-sinksource.h b/snappy-sinksource.h
index 8afcdaa..3c74e1b 100644
--- a/snappy-sinksource.h
+++ b/snappy-sinksource.h
@@ -146,10 +146,10 @@ class Source {
 class ByteArraySource : public Source {
  public:
   ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { }
-  virtual ~ByteArraySource();
-  virtual size_t Available() const;
-  virtual const char* Peek(size_t* len);
-  virtual void Skip(size_t n);
+  ~ByteArraySource() override;
+  size_t Available() const override;
+  const char* Peek(size_t* len) override;
+  void Skip(size_t n) override;
  private:
   const char* ptr_;
   size_t left_;
@@ -159,15 +159,15 @@ class ByteArraySource : public Source {
 class UncheckedByteArraySink : public Sink {
  public:
   explicit UncheckedByteArraySink(char* dest) : dest_(dest) { }
-  virtual ~UncheckedByteArraySink();
-  virtual void Append(const char* data, size_t n);
-  virtual char* GetAppendBuffer(size_t len, char* scratch);
-  virtual char* GetAppendBufferVariable(
+  ~UncheckedByteArraySink() override;
+  void Append(const char* data, size_t n) override;
+  char* GetAppendBuffer(size_t len, char* scratch) override;
+  char* GetAppendBufferVariable(
       size_t min_size, size_t desired_size_hint, char* scratch,
-      size_t scratch_size, size_t* allocated_size);
-  virtual void AppendAndTakeOwnership(
+      size_t scratch_size, size_t* allocated_size) override;
+  void AppendAndTakeOwnership(
       char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
-      void *deleter_arg);
+      void *deleter_arg) override;
 
   // Return the current output pointer so that a caller can see how
   // many bytes were produced.
diff --git a/snappy-stubs-internal.cc b/snappy-stubs-internal.cc
index 6ed3343..0bc8c2d 100644
--- a/snappy-stubs-internal.cc
+++ b/snappy-stubs-internal.cc
@@ -33,7 +33,7 @@
 
 namespace snappy {
 
-void Varint::Append32(string* s, uint32 value) {
+void Varint::Append32(std::string* s, uint32_t value) {
   char buf[Varint::kMax32];
   const char* p = Varint::Encode32(buf, value);
   s->append(buf, p - buf);
diff --git a/snappy-stubs-internal.h b/snappy-stubs-internal.h
index 6979e1a..526c38b 100644
--- a/snappy-stubs-internal.h
+++ b/snappy-stubs-internal.h
@@ -31,31 +31,49 @@
 #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
 #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
 
-#ifdef HAVE_CONFIG_H
+#if HAVE_CONFIG_H
 #include "config.h"
 #endif
 
-#include <string>
+#include <stdint.h>
 
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+#include <string>
 
-#ifdef HAVE_SYS_MMAN_H
+#if HAVE_SYS_MMAN_H
 #include <sys/mman.h>
 #endif
 
-#include "snappy-stubs-public.h"
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif  // defined(_MSC_VER)
 
-#if defined(__x86_64__)
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
 
-// Enable 64-bit optimized versions of some routines.
-#define ARCH_K8 1
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \
+    __msan_unpoison((address), (size))
+#else
+#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */
+#endif  // __has_feature(memory_sanitizer)
 
-#elif defined(__ppc64__)
+#include "snappy-stubs-public.h"
 
+// Used to enable 64-bit optimized versions of some routines.
+#if defined(__PPC64__) || defined(__powerpc64__)
 #define ARCH_PPC 1
-
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#define ARCH_ARM 1
 #endif
 
 // Needed by OS X, among others.
@@ -69,222 +87,83 @@
 #ifdef ARRAYSIZE
 #undef ARRAYSIZE
 #endif
-#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a)))
+#define ARRAYSIZE(a) int{sizeof(a) / sizeof(*(a))}
 
 // Static prediction hints.
-#ifdef HAVE_BUILTIN_EXPECT
-#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
-#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#if HAVE_BUILTIN_EXPECT
+#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
 #else
-#define PREDICT_FALSE(x) x
-#define PREDICT_TRUE(x) x
-#endif
-
-// This is only used for recomputing the tag byte table used during
-// decompression; for simplicity we just remove it from the open-source
-// version (anyone who wants to regenerate it can just do the call
-// themselves within main()).
-#define DEFINE_bool(flag_name, default_value, description) \
-  bool FLAGS_ ## flag_name = default_value
-#define DECLARE_bool(flag_name) \
-  extern bool FLAGS_ ## flag_name
-
-namespace snappy {
-
-static const uint32 kuint32max = static_cast<uint32>(0xFFFFFFFF);
-static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
-
-// Potentially unaligned loads and stores.
-
-// x86 and PowerPC can simply do these loads and stores native.
-
-#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
+#define SNAPPY_PREDICT_FALSE(x) x
+#define SNAPPY_PREDICT_TRUE(x) x
+#endif  // HAVE_BUILTIN_EXPECT
 
-#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
-#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
-#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
-
-#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
-#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
-#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
+// Inlining hints.
+#if HAVE_ATTRIBUTE_ALWAYS_INLINE
+#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
+#else
+#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+#endif  // HAVE_ATTRIBUTE_ALWAYS_INLINE
 
-// ARMv7 and newer support native unaligned accesses, but only of 16-bit
-// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
-// do an unaligned read and rotate the words around a bit, or do the reads very
-// slowly (trip through kernel mode). There's no simple #define that says just
-// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
-// sub-architectures.
-//
-// This is a mess, but there's not much we can do about it.
-//
-// To further complicate matters, only LDR instructions (single reads) are
-// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
-// explicitly tell the compiler that these accesses can be unaligned, it can and
-// will combine accesses. On armcc, the way to signal this is done by accessing
-// through the type (uint32 __packed *), but GCC has no such attribute
-// (it ignores __attribute__((packed)) on individual variables). However,
-// we can tell it that a _struct_ is unaligned, which has the same effect,
-// so we do that.
-
-#elif defined(__arm__) && \
-      !defined(__ARM_ARCH_4__) && \
-      !defined(__ARM_ARCH_4T__) && \
-      !defined(__ARM_ARCH_5__) && \
-      !defined(__ARM_ARCH_5T__) && \
-      !defined(__ARM_ARCH_5TE__) && \
-      !defined(__ARM_ARCH_5TEJ__) && \
-      !defined(__ARM_ARCH_6__) && \
-      !defined(__ARM_ARCH_6J__) && \
-      !defined(__ARM_ARCH_6K__) && \
-      !defined(__ARM_ARCH_6Z__) && \
-      !defined(__ARM_ARCH_6ZK__) && \
-      !defined(__ARM_ARCH_6T2__)
-
-#if __GNUC__
-#define ATTRIBUTE_PACKED __attribute__((__packed__))
+#if HAVE_BUILTIN_PREFETCH
+#define SNAPPY_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 3)
 #else
-#define ATTRIBUTE_PACKED
+#define SNAPPY_PREFETCH(ptr) (void)(ptr)
 #endif
 
-namespace base {
-namespace internal {
-
-struct Unaligned16Struct {
-  uint16 value;
-  uint8 dummy;  // To make the size non-power-of-two.
-} ATTRIBUTE_PACKED;
-
-struct Unaligned32Struct {
-  uint32 value;
-  uint8 dummy;  // To make the size non-power-of-two.
-} ATTRIBUTE_PACKED;
-
-}  // namespace internal
-}  // namespace base
-
-#define UNALIGNED_LOAD16(_p) \
-    ((reinterpret_cast<const ::snappy::base::internal::Unaligned16Struct *>(_p))->value)
-#define UNALIGNED_LOAD32(_p) \
-    ((reinterpret_cast<const ::snappy::base::internal::Unaligned32Struct *>(_p))->value)
-
-#define UNALIGNED_STORE16(_p, _val) \
-    ((reinterpret_cast< ::snappy::base::internal::Unaligned16Struct *>(_p))->value = \
-         (_val))
-#define UNALIGNED_STORE32(_p, _val) \
-    ((reinterpret_cast< ::snappy::base::internal::Unaligned32Struct *>(_p))->value = \
-         (_val))
-
-// TODO(user): NEON supports unaligned 64-bit loads and stores.
-// See if that would be more efficient on platforms supporting it,
-// at least for copies.
-
-inline uint64 UNALIGNED_LOAD64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline void UNALIGNED_STORE64(void *p, uint64 v) {
-  memcpy(p, &v, sizeof v);
-}
-
-#else
+// Stubbed version of ABSL_FLAG.
+//
+// In the open source version, flags can only be changed at compile time.
+#define SNAPPY_FLAG(flag_type, flag_name, default_value, help) \
+  flag_type FLAGS_ ## flag_name = default_value
 
-// These functions are provided for architectures that don't support
-// unaligned loads and stores.
+namespace snappy {
 
-inline uint16 UNALIGNED_LOAD16(const void *p) {
-  uint16 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
+// Stubbed version of absl::GetFlag().
+template <typename T>
+inline T GetFlag(T flag) { return flag; }
 
-inline uint32 UNALIGNED_LOAD32(const void *p) {
-  uint32 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
+static const uint32_t kuint32max = std::numeric_limits<uint32_t>::max();
+static const int64_t kint64max = std::numeric_limits<int64_t>::max();
 
-inline uint64 UNALIGNED_LOAD64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
+// Potentially unaligned loads and stores.
 
-inline void UNALIGNED_STORE16(void *p, uint16 v) {
-  memcpy(p, &v, sizeof v);
+inline uint16_t UNALIGNED_LOAD16(const void *p) {
+  // Compiles to a single movzx/ldrh on clang/gcc/msvc.
+  uint16_t v;
+  std::memcpy(&v, p, sizeof(v));
+  return v;
 }
 
-inline void UNALIGNED_STORE32(void *p, uint32 v) {
-  memcpy(p, &v, sizeof v);
+inline uint32_t UNALIGNED_LOAD32(const void *p) {
+  // Compiles to a single mov/ldr on clang/gcc/msvc.
+  uint32_t v;
+  std::memcpy(&v, p, sizeof(v));
+  return v;
 }
 
-inline void UNALIGNED_STORE64(void *p, uint64 v) {
-  memcpy(p, &v, sizeof v);
+inline uint64_t UNALIGNED_LOAD64(const void *p) {
+  // Compiles to a single mov/ldr on clang/gcc/msvc.
+  uint64_t v;
+  std::memcpy(&v, p, sizeof(v));
+  return v;
 }
 
-#endif
-
-// The following guarantees declaration of the byte swap functions.
-#ifdef WORDS_BIGENDIAN
-
-#ifdef HAVE_SYS_BYTEORDER_H
-#include <sys/byteorder.h>
-#endif
-
-#ifdef HAVE_SYS_ENDIAN_H
-#include <sys/endian.h>
-#endif
-
-#ifdef _MSC_VER
-#include <stdlib.h>
-#define bswap_16(x) _byteswap_ushort(x)
-#define bswap_32(x) _byteswap_ulong(x)
-#define bswap_64(x) _byteswap_uint64(x)
-
-#elif defined(__APPLE__)
-// Mac OS X / Darwin features
-#include <libkern/OSByteOrder.h>
-#define bswap_16(x) OSSwapInt16(x)
-#define bswap_32(x) OSSwapInt32(x)
-#define bswap_64(x) OSSwapInt64(x)
-
-#elif defined(HAVE_BYTESWAP_H)
-#include <byteswap.h>
-
-#elif defined(bswap32)
-// FreeBSD defines bswap{16,32,64} in <sys/endian.h> (already #included).
-#define bswap_16(x) bswap16(x)
-#define bswap_32(x) bswap32(x)
-#define bswap_64(x) bswap64(x)
-
-#elif defined(BSWAP_64)
-// Solaris 10 defines BSWAP_{16,32,64} in <sys/byteorder.h> (already #included).
-#define bswap_16(x) BSWAP_16(x)
-#define bswap_32(x) BSWAP_32(x)
-#define bswap_64(x) BSWAP_64(x)
-
-#else
-
-inline uint16 bswap_16(uint16 x) {
-  return (x << 8) | (x >> 8);
+inline void UNALIGNED_STORE16(void *p, uint16_t v) {
+  // Compiles to a single mov/strh on clang/gcc/msvc.
+  std::memcpy(p, &v, sizeof(v));
 }
 
-inline uint32 bswap_32(uint32 x) {
-  x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8);
-  return (x >> 16) | (x << 16);
+inline void UNALIGNED_STORE32(void *p, uint32_t v) {
+  // Compiles to a single mov/str on clang/gcc/msvc.
+  std::memcpy(p, &v, sizeof(v));
 }
 
-inline uint64 bswap_64(uint64 x) {
-  x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8);
-  x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16);
-  return (x >> 32) | (x << 32);
+inline void UNALIGNED_STORE64(void *p, uint64_t v) {
+  // Compiles to a single mov/str on clang/gcc/msvc.
+  std::memcpy(p, &v, sizeof(v));
 }
 
-#endif
-
-#endif  // WORDS_BIGENDIAN
-
 // Convert to little-endian storage, opposite of network format.
 // Convert x from host to little endian: x = LittleEndian.FromHost(x);
 // convert x from little endian to host: x = LittleEndian.ToHost(x);
@@ -296,87 +175,194 @@ inline uint64 bswap_64(uint64 x) {
 //    x = LittleEndian.Load16(p);
 class LittleEndian {
  public:
-  // Conversion functions.
-#ifdef WORDS_BIGENDIAN
-
-  static uint16 FromHost16(uint16 x) { return bswap_16(x); }
-  static uint16 ToHost16(uint16 x) { return bswap_16(x); }
-
-  static uint32 FromHost32(uint32 x) { return bswap_32(x); }
-  static uint32 ToHost32(uint32 x) { return bswap_32(x); }
-
-  static bool IsLittleEndian() { return false; }
-
-#else  // !defined(WORDS_BIGENDIAN)
-
-  static uint16 FromHost16(uint16 x) { return x; }
-  static uint16 ToHost16(uint16 x) { return x; }
-
-  static uint32 FromHost32(uint32 x) { return x; }
-  static uint32 ToHost32(uint32 x) { return x; }
+  // Functions to do unaligned loads and stores in little-endian order.
+  static inline uint16_t Load16(const void *ptr) {
+    // Compiles to a single mov/str on recent clang and gcc.
+#if SNAPPY_IS_BIG_ENDIAN
+    const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
+    return (static_cast<uint16_t>(buffer[0])) |
+            (static_cast<uint16_t>(buffer[1]) << 8);
+#else
+    // memcpy() turns into a single instruction early in the optimization
+    // pipeline (relatively to a series of byte accesses). So, using memcpy
+    // instead of byte accesses may lead to better decisions in more stages of
+    // the optimization pipeline.
+    uint16_t value;
+    std::memcpy(&value, ptr, 2);
+    return value;
+#endif
+  }
 
-  static bool IsLittleEndian() { return true; }
+  static inline uint32_t Load32(const void *ptr) {
+    // Compiles to a single mov/str on recent clang and gcc.
+#if SNAPPY_IS_BIG_ENDIAN
+    const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
+    return (static_cast<uint32_t>(buffer[0])) |
+            (static_cast<uint32_t>(buffer[1]) << 8) |
+            (static_cast<uint32_t>(buffer[2]) << 16) |
+            (static_cast<uint32_t>(buffer[3]) << 24);
+#else
+    // See Load16() for the rationale of using memcpy().
+    uint32_t value;
+    std::memcpy(&value, ptr, 4);
+    return value;
+#endif
+  }
 
-#endif  // !defined(WORDS_BIGENDIAN)
+  static inline uint64_t Load64(const void *ptr) {
+    // Compiles to a single mov/str on recent clang and gcc.
+#if SNAPPY_IS_BIG_ENDIAN
+    const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
+    return (static_cast<uint64_t>(buffer[0])) |
+            (static_cast<uint64_t>(buffer[1]) << 8) |
+            (static_cast<uint64_t>(buffer[2]) << 16) |
+            (static_cast<uint64_t>(buffer[3]) << 24) |
+            (static_cast<uint64_t>(buffer[4]) << 32) |
+            (static_cast<uint64_t>(buffer[5]) << 40) |
+            (static_cast<uint64_t>(buffer[6]) << 48) |
+            (static_cast<uint64_t>(buffer[7]) << 56);
+#else
+    // See Load16() for the rationale of using memcpy().
+    uint64_t value;
+    std::memcpy(&value, ptr, 8);
+    return value;
+#endif
+  }
 
-  // Functions to do unaligned loads and stores in little-endian order.
-  static uint16 Load16(const void *p) {
-    return ToHost16(UNALIGNED_LOAD16(p));
+  static inline void Store16(void *dst, uint16_t value) {
+    // Compiles to a single mov/str on recent clang and gcc.
+#if SNAPPY_IS_BIG_ENDIAN
+    uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
+    buffer[0] = static_cast<uint8_t>(value);
+    buffer[1] = static_cast<uint8_t>(value >> 8);
+#else
+    // See Load16() for the rationale of using memcpy().
+    std::memcpy(dst, &value, 2);
+#endif
   }
 
-  static void Store16(void *p, uint16 v) {
-    UNALIGNED_STORE16(p, FromHost16(v));
+  static void Store32(void *dst, uint32_t value) {
+    // Compiles to a single mov/str on recent clang and gcc.
+#if SNAPPY_IS_BIG_ENDIAN
+    uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
+    buffer[0] = static_cast<uint8_t>(value);
+    buffer[1] = static_cast<uint8_t>(value >> 8);
+    buffer[2] = static_cast<uint8_t>(value >> 16);
+    buffer[3] = static_cast<uint8_t>(value >> 24);
+#else
+    // See Load16() for the rationale of using memcpy().
+    std::memcpy(dst, &value, 4);
+#endif
   }
 
-  static uint32 Load32(const void *p) {
-    return ToHost32(UNALIGNED_LOAD32(p));
+  static void Store64(void* dst, uint64_t value) {
+    // Compiles to a single mov/str on recent clang and gcc.
+#if SNAPPY_IS_BIG_ENDIAN
+    uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
+    buffer[0] = static_cast<uint8_t>(value);
+    buffer[1] = static_cast<uint8_t>(value >> 8);
+    buffer[2] = static_cast<uint8_t>(value >> 16);
+    buffer[3] = static_cast<uint8_t>(value >> 24);
+    buffer[4] = static_cast<uint8_t>(value >> 32);
+    buffer[5] = static_cast<uint8_t>(value >> 40);
+    buffer[6] = static_cast<uint8_t>(value >> 48);
+    buffer[7] = static_cast<uint8_t>(value >> 56);
+#else
+    // See Load16() for the rationale of using memcpy().
+    std::memcpy(dst, &value, 8);
+#endif
   }
 
-  static void Store32(void *p, uint32 v) {
-    UNALIGNED_STORE32(p, FromHost32(v));
+  static inline constexpr bool IsLittleEndian() {
+#if SNAPPY_IS_BIG_ENDIAN
+    return false;
+#else
+    return true;
+#endif  // SNAPPY_IS_BIG_ENDIAN
   }
 };
 
 // Some bit-manipulation functions.
 class Bits {
  public:
+  // Return floor(log2(n)) for positive integer n.
+  static int Log2FloorNonZero(uint32_t n);
+
   // Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
-  static int Log2Floor(uint32 n);
+  static int Log2Floor(uint32_t n);
 
   // Return the first set least / most significant bit, 0-indexed.  Returns an
   // undefined value if n == 0.  FindLSBSetNonZero() is similar to ffs() except
   // that it's 0-indexed.
-  static int FindLSBSetNonZero(uint32 n);
-  static int FindLSBSetNonZero64(uint64 n);
+  static int FindLSBSetNonZero(uint32_t n);
+
+  static int FindLSBSetNonZero64(uint64_t n);
 
  private:
-  DISALLOW_COPY_AND_ASSIGN(Bits);
+  // No copying
+  Bits(const Bits&);
+  void operator=(const Bits&);
 };
 
-#ifdef HAVE_BUILTIN_CTZ
+#if HAVE_BUILTIN_CTZ
+
+inline int Bits::Log2FloorNonZero(uint32_t n) {
+  assert(n != 0);
+  // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof
+  // represents subtraction in base 2 and observes that there's no carry.
+  //
+  // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x).
+  // Using "31 ^" here instead of "31 -" allows the optimizer to strip the
+  // function body down to _bit_scan_reverse(x).
+  return 31 ^ __builtin_clz(n);
+}
 
-inline int Bits::Log2Floor(uint32 n) {
-  return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+inline int Bits::Log2Floor(uint32_t n) {
+  return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
 }
 
-inline int Bits::FindLSBSetNonZero(uint32 n) {
+inline int Bits::FindLSBSetNonZero(uint32_t n) {
+  assert(n != 0);
   return __builtin_ctz(n);
 }
 
-inline int Bits::FindLSBSetNonZero64(uint64 n) {
-  return __builtin_ctzll(n);
+#elif defined(_MSC_VER)
+
+inline int Bits::Log2FloorNonZero(uint32_t n) {
+  assert(n != 0);
+  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
+  unsigned long where;
+  _BitScanReverse(&where, n);
+  return static_cast<int>(where);
+}
+
+inline int Bits::Log2Floor(uint32_t n) {
+  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
+  unsigned long where;
+  if (_BitScanReverse(&where, n))
+    return static_cast<int>(where);
+  return -1;
+}
+
+inline int Bits::FindLSBSetNonZero(uint32_t n) {
+  assert(n != 0);
+  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
+  unsigned long where;
+  if (_BitScanForward(&where, n))
+    return static_cast<int>(where);
+  return 32;
 }
 
 #else  // Portable versions.
 
-inline int Bits::Log2Floor(uint32 n) {
-  if (n == 0)
-    return -1;
+inline int Bits::Log2FloorNonZero(uint32_t n) {
+  assert(n != 0);
+
   int log = 0;
-  uint32 value = n;
+  uint32_t value = n;
   for (int i = 4; i >= 0; --i) {
     int shift = (1 << i);
-    uint32 x = value >> shift;
+    uint32_t x = value >> shift;
     if (x != 0) {
       value = x;
       log += shift;
@@ -386,10 +372,16 @@ inline int Bits::Log2Floor(uint32 n) {
   return log;
 }
 
-inline int Bits::FindLSBSetNonZero(uint32 n) {
+inline int Bits::Log2Floor(uint32_t n) {
+  return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
+}
+
+inline int Bits::FindLSBSetNonZero(uint32_t n) {
+  assert(n != 0);
+
   int rc = 31;
   for (int i = 4, shift = 1 << 4; i >= 0; --i) {
-    const uint32 x = n << shift;
+    const uint32_t x = n << shift;
     if (x != 0) {
       n = x;
       rc -= shift;
@@ -399,23 +391,48 @@ inline int Bits::FindLSBSetNonZero(uint32 n) {
   return rc;
 }
 
+#endif  // End portable versions.
+
+#if HAVE_BUILTIN_CTZ
+
+inline int Bits::FindLSBSetNonZero64(uint64_t n) {
+  assert(n != 0);
+  return __builtin_ctzll(n);
+}
+
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
+// _BitScanForward64() is only available on x64 and ARM64.
+
+inline int Bits::FindLSBSetNonZero64(uint64_t n) {
+  assert(n != 0);
+  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
+  unsigned long where;
+  if (_BitScanForward64(&where, n))
+    return static_cast<int>(where);
+  return 64;
+}
+
+#else  // Portable version.
+
 // FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
-inline int Bits::FindLSBSetNonZero64(uint64 n) {
-  const uint32 bottombits = static_cast<uint32>(n);
+inline int Bits::FindLSBSetNonZero64(uint64_t n) {
+  assert(n != 0);
+
+  const uint32_t bottombits = static_cast<uint32_t>(n);
   if (bottombits == 0) {
-    // Bottom bits are zero, so scan in top bits
-    return 32 + FindLSBSetNonZero(static_cast<uint32>(n >> 32));
+    // Bottom bits are zero, so scan the top bits.
+    return 32 + FindLSBSetNonZero(static_cast<uint32_t>(n >> 32));
   } else {
     return FindLSBSetNonZero(bottombits);
   }
 }
 
-#endif  // End portable versions.
+#endif  // HAVE_BUILTIN_CTZ
 
 // Variable-length integer encoding.
 class Varint {
  public:
-  // Maximum lengths of varint encoding of uint32.
+  // Maximum lengths of varint encoding of uint32_t.
   static const int kMax32 = 5;
 
   // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1].
@@ -424,23 +441,23 @@ class Varint {
   // past the last byte of the varint32. Else returns NULL.  On success,
   // "result <= limit".
   static const char* Parse32WithLimit(const char* ptr, const char* limit,
-                                      uint32* OUTPUT);
+                                      uint32_t* OUTPUT);
 
   // REQUIRES   "ptr" points to a buffer of length sufficient to hold "v".
   // EFFECTS    Encodes "v" into "ptr" and returns a pointer to the
   //            byte just past the last encoded byte.
-  static char* Encode32(char* ptr, uint32 v);
+  static char* Encode32(char* ptr, uint32_t v);
 
   // EFFECTS    Appends the varint representation of "value" to "*s".
-  static void Append32(string* s, uint32 value);
+  static void Append32(std::string* s, uint32_t value);
 };
 
 inline const char* Varint::Parse32WithLimit(const char* p,
                                             const char* l,
-                                            uint32* OUTPUT) {
+                                            uint32_t* OUTPUT) {
   const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
   const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
-  uint32 b, result;
+  uint32_t b, result;
   if (ptr >= limit) return NULL;
   b = *(ptr++); result = b & 127;          if (b < 128) goto done;
   if (ptr >= limit) return NULL;
@@ -457,30 +474,30 @@ inline const char* Varint::Parse32WithLimit(const char* p,
   return reinterpret_cast<const char*>(ptr);
 }
 
-inline char* Varint::Encode32(char* sptr, uint32 v) {
+inline char* Varint::Encode32(char* sptr, uint32_t v) {
   // Operate on characters as unsigneds
-  unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr);
-  static const int B = 128;
-  if (v < (1<<7)) {
-    *(ptr++) = v;
-  } else if (v < (1<<14)) {
-    *(ptr++) = v | B;
-    *(ptr++) = v>>7;
-  } else if (v < (1<<21)) {
-    *(ptr++) = v | B;
-    *(ptr++) = (v>>7) | B;
-    *(ptr++) = v>>14;
-  } else if (v < (1<<28)) {
-    *(ptr++) = v | B;
-    *(ptr++) = (v>>7) | B;
-    *(ptr++) = (v>>14) | B;
-    *(ptr++) = v>>21;
+  uint8_t* ptr = reinterpret_cast<uint8_t*>(sptr);
+  static const uint8_t B = 128;
+  if (v < (1 << 7)) {
+    *(ptr++) = static_cast<uint8_t>(v);
+  } else if (v < (1 << 14)) {
+    *(ptr++) = static_cast<uint8_t>(v | B);
+    *(ptr++) = static_cast<uint8_t>(v >> 7);
+  } else if (v < (1 << 21)) {
+    *(ptr++) = static_cast<uint8_t>(v | B);
+    *(ptr++) = static_cast<uint8_t>((v >> 7) | B);
+    *(ptr++) = static_cast<uint8_t>(v >> 14);
+  } else if (v < (1 << 28)) {
+    *(ptr++) = static_cast<uint8_t>(v | B);
+    *(ptr++) = static_cast<uint8_t>((v >> 7) | B);
+    *(ptr++) = static_cast<uint8_t>((v >> 14) | B);
+    *(ptr++) = static_cast<uint8_t>(v >> 21);
   } else {
-    *(ptr++) = v | B;
-    *(ptr++) = (v>>7) | B;
-    *(ptr++) = (v>>14) | B;
-    *(ptr++) = (v>>21) | B;
-    *(ptr++) = v>>28;
+    *(ptr++) = static_cast<uint8_t>(v | B);
+    *(ptr++) = static_cast<uint8_t>((v>>7) | B);
+    *(ptr++) = static_cast<uint8_t>((v>>14) | B);
+    *(ptr++) = static_cast<uint8_t>((v>>21) | B);
+    *(ptr++) = static_cast<uint8_t>(v >> 28);
   }
   return reinterpret_cast<char*>(ptr);
 }
@@ -489,7 +506,7 @@ inline char* Varint::Encode32(char* sptr, uint32 v) {
 // replace this function with one that resizes the string without
 // filling the new space with zeros (if applicable) --
 // it will be non-portable but faster.
-inline void STLStringResizeUninitialized(string* s, size_t new_size) {
+inline void STLStringResizeUninitialized(std::string* s, size_t new_size) {
   s->resize(new_size);
 }
 
@@ -505,7 +522,7 @@ inline void STLStringResizeUninitialized(string* s, size_t new_size) {
 // (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530)
 // proposes this as the method. It will officially be part of the standard
 // for C++0x. This should already work on all current implementations.
-inline char* string_as_array(string* str) {
+inline char* string_as_array(std::string* str) {
   return str->empty() ? NULL : &*str->begin();
 }
 
diff --git a/snappy-stubs-public.h.in b/snappy-stubs-public.h.in
index 96989ac..02947fa 100644
--- a/snappy-stubs-public.h.in
+++ b/snappy-stubs-public.h.in
@@ -1,5 +1,4 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
-// Author: sesse@google.com (Steinar H. Gunderson)
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -36,64 +35,28 @@
 #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
 #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
 
-#if @ac_cv_have_stdint_h@
-#include <stdint.h>
-#endif
+#include <cstddef>
 
-#if @ac_cv_have_stddef_h@
-#include <stddef.h>
-#endif
-
-#if @ac_cv_have_sys_uio_h@
+#if ${HAVE_SYS_UIO_H_01}  // HAVE_SYS_UIO_H
 #include <sys/uio.h>
-#endif
+#endif  // HAVE_SYS_UIO_H
 
-#define SNAPPY_MAJOR @SNAPPY_MAJOR@
-#define SNAPPY_MINOR @SNAPPY_MINOR@
-#define SNAPPY_PATCHLEVEL @SNAPPY_PATCHLEVEL@
+#define SNAPPY_MAJOR ${PROJECT_VERSION_MAJOR}
+#define SNAPPY_MINOR ${PROJECT_VERSION_MINOR}
+#define SNAPPY_PATCHLEVEL ${PROJECT_VERSION_PATCH}
 #define SNAPPY_VERSION \
     ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
 
-#include <string>
-
 namespace snappy {
 
-#if @ac_cv_have_stdint_h@
-typedef int8_t int8;
-typedef uint8_t uint8;
-typedef int16_t int16;
-typedef uint16_t uint16;
-typedef int32_t int32;
-typedef uint32_t uint32;
-typedef int64_t int64;
-typedef uint64_t uint64;
-#else
-typedef signed char int8;
-typedef unsigned char uint8;
-typedef short int16;
-typedef unsigned short uint16;
-typedef int int32;
-typedef unsigned int uint32;
-typedef long long int64;
-typedef unsigned long long uint64;
-#endif
-
-typedef std::string string;
-
-#ifndef DISALLOW_COPY_AND_ASSIGN
-#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName&);               \
-  void operator=(const TypeName&)
-#endif
-
-#if !@ac_cv_have_sys_uio_h@
+#if !${HAVE_SYS_UIO_H_01}  // !HAVE_SYS_UIO_H
 // Windows does not have an iovec type, yet the concept is universally useful.
 // It is simple to define it ourselves, so we put it inside our own namespace.
 struct iovec {
-	void* iov_base;
-	size_t iov_len;
+  void* iov_base;
+  size_t iov_len;
 };
-#endif
+#endif  // !HAVE_SYS_UIO_H
 
 }  // namespace snappy
 
diff --git a/snappy-test.cc b/snappy-test.cc
index 01d5541..aae6072 100644
--- a/snappy-test.cc
+++ b/snappy-test.cc
@@ -28,239 +28,130 @@
 //
 // Various stubs for the unit tests for the open-source version of Snappy.
 
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#ifdef HAVE_WINDOWS_H
-// Needed to be able to use std::max without workarounds in the source code.
-// https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts
-#define NOMINMAX
-#include <windows.h>
-#endif
-
 #include "snappy-test.h"
 
 #include <algorithm>
+#include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <string>
 
-DEFINE_bool(run_microbenchmarks, true,
-            "Run microbenchmarks before doing anything else.");
+namespace file {
 
-namespace snappy {
+OptionsStub::OptionsStub() = default;
+OptionsStub::~OptionsStub() = default;
 
-string ReadTestDataFile(const string& base, size_t size_limit) {
-  string contents;
-  const char* srcdir = getenv("srcdir");  // This is set by Automake.
-  string prefix;
-  if (srcdir) {
-    prefix = string(srcdir) + "/";
-  }
-  file::GetContents(prefix + "testdata/" + base, &contents, file::Defaults()
-      ).CheckSuccess();
-  if (size_limit > 0) {
-    contents = contents.substr(0, size_limit);
-  }
-  return contents;
-}
-
-string ReadTestDataFile(const string& base) {
-  return ReadTestDataFile(base, 0);
+const OptionsStub &Defaults() {
+  static OptionsStub defaults;
+  return defaults;
 }
 
-string StringPrintf(const char* format, ...) {
-  char buf[4096];
-  va_list ap;
-  va_start(ap, format);
-  vsnprintf(buf, sizeof(buf), format, ap);
-  va_end(ap);
-  return buf;
-}
+StatusStub::StatusStub() = default;
+StatusStub::StatusStub(const StatusStub &) = default;
+StatusStub &StatusStub::operator=(const StatusStub &) = default;
+StatusStub::~StatusStub() = default;
 
-bool benchmark_running = false;
-int64 benchmark_real_time_us = 0;
-int64 benchmark_cpu_time_us = 0;
-string *benchmark_label = NULL;
-int64 benchmark_bytes_processed = 0;
+bool StatusStub::ok() { return true; }
 
-void ResetBenchmarkTiming() {
-  benchmark_real_time_us = 0;
-  benchmark_cpu_time_us = 0;
-}
+StatusStub GetContents(const std::string &filename, std::string *output,
+                       const OptionsStub & /* options */) {
+  std::FILE *fp = std::fopen(filename.c_str(), "rb");
+  if (fp == nullptr) {
+    std::perror(filename.c_str());
+    std::exit(1);
+  }
 
-#ifdef WIN32
-LARGE_INTEGER benchmark_start_real;
-FILETIME benchmark_start_cpu;
-#else  // WIN32
-struct timeval benchmark_start_real;
-struct rusage benchmark_start_cpu;
-#endif  // WIN32
-
-void StartBenchmarkTiming() {
-#ifdef WIN32
-  QueryPerformanceCounter(&benchmark_start_real);
-  FILETIME dummy;
-  CHECK(GetProcessTimes(
-      GetCurrentProcess(), &dummy, &dummy, &dummy, &benchmark_start_cpu));
-#else
-  gettimeofday(&benchmark_start_real, NULL);
-  if (getrusage(RUSAGE_SELF, &benchmark_start_cpu) == -1) {
-    perror("getrusage(RUSAGE_SELF)");
-    exit(1);
+  output->clear();
+  while (!std::feof(fp)) {
+    char buffer[4096];
+    size_t bytes_read = std::fread(buffer, 1, sizeof(buffer), fp);
+    if (bytes_read == 0 && std::ferror(fp)) {
+      std::perror("fread");
+      std::exit(1);
+    }
+    output->append(buffer, bytes_read);
   }
-#endif
-  benchmark_running = true;
+
+  std::fclose(fp);
+  return StatusStub();
 }
 
-void StopBenchmarkTiming() {
-  if (!benchmark_running) {
-    return;
+StatusStub SetContents(const std::string &file_name, const std::string &content,
+                       const OptionsStub & /* options */) {
+  std::FILE *fp = std::fopen(file_name.c_str(), "wb");
+  if (fp == nullptr) {
+    std::perror(file_name.c_str());
+    std::exit(1);
   }
 
-#ifdef WIN32
-  LARGE_INTEGER benchmark_stop_real;
-  LARGE_INTEGER benchmark_frequency;
-  QueryPerformanceCounter(&benchmark_stop_real);
-  QueryPerformanceFrequency(&benchmark_frequency);
-
-  double elapsed_real = static_cast<double>(
-      benchmark_stop_real.QuadPart - benchmark_start_real.QuadPart) /
-      benchmark_frequency.QuadPart;
-  benchmark_real_time_us += elapsed_real * 1e6 + 0.5;
-
-  FILETIME benchmark_stop_cpu, dummy;
-  CHECK(GetProcessTimes(
-      GetCurrentProcess(), &dummy, &dummy, &dummy, &benchmark_stop_cpu));
-
-  ULARGE_INTEGER start_ulargeint;
-  start_ulargeint.LowPart = benchmark_start_cpu.dwLowDateTime;
-  start_ulargeint.HighPart = benchmark_start_cpu.dwHighDateTime;
-
-  ULARGE_INTEGER stop_ulargeint;
-  stop_ulargeint.LowPart = benchmark_stop_cpu.dwLowDateTime;
-  stop_ulargeint.HighPart = benchmark_stop_cpu.dwHighDateTime;
-
-  benchmark_cpu_time_us +=
-      (stop_ulargeint.QuadPart - start_ulargeint.QuadPart + 5) / 10;
-#else  // WIN32
-  struct timeval benchmark_stop_real;
-  gettimeofday(&benchmark_stop_real, NULL);
-  benchmark_real_time_us +=
-      1000000 * (benchmark_stop_real.tv_sec - benchmark_start_real.tv_sec);
-  benchmark_real_time_us +=
-      (benchmark_stop_real.tv_usec - benchmark_start_real.tv_usec);
-
-  struct rusage benchmark_stop_cpu;
-  if (getrusage(RUSAGE_SELF, &benchmark_stop_cpu) == -1) {
-    perror("getrusage(RUSAGE_SELF)");
-    exit(1);
+  size_t bytes_written = std::fwrite(content.data(), 1, content.size(), fp);
+  if (bytes_written != content.size()) {
+    std::perror("fwrite");
+    std::exit(1);
   }
-  benchmark_cpu_time_us += 1000000 * (benchmark_stop_cpu.ru_utime.tv_sec -
-                                      benchmark_start_cpu.ru_utime.tv_sec);
-  benchmark_cpu_time_us += (benchmark_stop_cpu.ru_utime.tv_usec -
-                            benchmark_start_cpu.ru_utime.tv_usec);
-#endif  // WIN32
 
-  benchmark_running = false;
+  std::fclose(fp);
+  return StatusStub();
 }
 
-void SetBenchmarkLabel(const string& str) {
-  if (benchmark_label) {
-    delete benchmark_label;
+}  // namespace file
+
+namespace snappy {
+
+std::string ReadTestDataFile(const std::string& base, size_t size_limit) {
+  std::string contents;
+  const char* srcdir = getenv("srcdir");  // This is set by Automake.
+  std::string prefix;
+  if (srcdir) {
+    prefix = std::string(srcdir) + "/";
   }
-  benchmark_label = new string(str);
+  file::GetContents(prefix + "testdata/" + base, &contents, file::Defaults()
+      ).ok();
+  if (size_limit > 0) {
+    contents = contents.substr(0, size_limit);
+  }
+  return contents;
 }
 
-void SetBenchmarkBytesProcessed(int64 bytes) {
-  benchmark_bytes_processed = bytes;
+std::string StrFormat(const char* format, ...) {
+  char buffer[4096];
+  std::va_list ap;
+  va_start(ap, format);
+  std::vsnprintf(buffer, sizeof(buffer), format, ap);
+  va_end(ap);
+  return buffer;
 }
 
-struct BenchmarkRun {
-  int64 real_time_us;
-  int64 cpu_time_us;
-};
+LogMessage::~LogMessage() { std::cerr << std::endl; }
 
-struct BenchmarkCompareCPUTime {
-  bool operator() (const BenchmarkRun& a, const BenchmarkRun& b) const {
-    return a.cpu_time_us < b.cpu_time_us;
-  }
-};
-
-void Benchmark::Run() {
-  for (int test_case_num = start_; test_case_num <= stop_; ++test_case_num) {
-    // Run a few iterations first to find out approximately how fast
-    // the benchmark is.
-    const int kCalibrateIterations = 100;
-    ResetBenchmarkTiming();
-    StartBenchmarkTiming();
-    (*function_)(kCalibrateIterations, test_case_num);
-    StopBenchmarkTiming();
-
-    // Let each test case run for about 200ms, but at least as many
-    // as we used to calibrate.
-    // Run five times and pick the median.
-    const int kNumRuns = 5;
-    const int kMedianPos = kNumRuns / 2;
-    int num_iterations = 0;
-    if (benchmark_real_time_us > 0) {
-      num_iterations = 200000 * kCalibrateIterations / benchmark_real_time_us;
-    }
-    num_iterations = std::max(num_iterations, kCalibrateIterations);
-    BenchmarkRun benchmark_runs[kNumRuns];
+LogMessage &LogMessage::operator<<(const std::string &message) {
+  std::cerr << message;
+  return *this;
+}
 
-    for (int run = 0; run < kNumRuns; ++run) {
-      ResetBenchmarkTiming();
-      StartBenchmarkTiming();
-      (*function_)(num_iterations, test_case_num);
-      StopBenchmarkTiming();
+LogMessage &LogMessage::operator<<(int number) {
+  std::cerr << number;
+  return *this;
+}
 
-      benchmark_runs[run].real_time_us = benchmark_real_time_us;
-      benchmark_runs[run].cpu_time_us = benchmark_cpu_time_us;
-    }
+#ifdef _MSC_VER
+// ~LogMessageCrash calls std::abort() and therefore never exits. This is by
+// design, so temporarily disable warning C4722.
+#pragma warning(push)
+#pragma warning(disable : 4722)
+#endif
 
-    string heading = StringPrintf("%s/%d", name_.c_str(), test_case_num);
-    string human_readable_speed;
-
-    std::nth_element(benchmark_runs,
-                     benchmark_runs + kMedianPos,
-                     benchmark_runs + kNumRuns,
-                     BenchmarkCompareCPUTime());
-    int64 real_time_us = benchmark_runs[kMedianPos].real_time_us;
-    int64 cpu_time_us = benchmark_runs[kMedianPos].cpu_time_us;
-    if (cpu_time_us <= 0) {
-      human_readable_speed = "?";
-    } else {
-      int64 bytes_per_second =
-          benchmark_bytes_processed * 1000000 / cpu_time_us;
-      if (bytes_per_second < 1024) {
-        human_readable_speed = StringPrintf("%dB/s", bytes_per_second);
-      } else if (bytes_per_second < 1024 * 1024) {
-        human_readable_speed = StringPrintf(
-            "%.1fkB/s", bytes_per_second / 1024.0f);
-      } else if (bytes_per_second < 1024 * 1024 * 1024) {
-        human_readable_speed = StringPrintf(
-            "%.1fMB/s", bytes_per_second / (1024.0f * 1024.0f));
-      } else {
-        human_readable_speed = StringPrintf(
-            "%.1fGB/s", bytes_per_second / (1024.0f * 1024.0f * 1024.0f));
-      }
-    }
+LogMessageCrash::~LogMessageCrash() {
+  std::cerr << std::endl;
+  std::abort();
+}
 
-    fprintf(stderr,
-#ifdef WIN32
-            "%-18s %10I64d %10I64d %10d %s  %s\n",
-#else
-            "%-18s %10lld %10lld %10d %s  %s\n",
+#ifdef _MSC_VER
+#pragma warning(pop)
 #endif
-            heading.c_str(),
-            static_cast<long long>(real_time_us * 1000 / num_iterations),
-            static_cast<long long>(cpu_time_us * 1000 / num_iterations),
-            num_iterations,
-            human_readable_speed.c_str(),
-            benchmark_label->c_str());
-  }
-}
 
-#ifdef HAVE_LIBZ
+#if HAVE_LIBZ
 
 ZLib::ZLib()
     : comp_init_(false),
diff --git a/snappy-test.h b/snappy-test.h
index cebb4ee..65f3725 100644
--- a/snappy-test.h
+++ b/snappy-test.h
@@ -31,241 +31,110 @@
 #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
 #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
 
-#include <iostream>
-#include <string>
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
 
 #include "snappy-stubs-internal.h"
 
-#include <stdio.h>
-#include <stdarg.h>
-
-#ifdef HAVE_SYS_MMAN_H
+#if HAVE_SYS_MMAN_H
 #include <sys/mman.h>
 #endif
 
-#ifdef HAVE_SYS_RESOURCE_H
+#if HAVE_SYS_RESOURCE_H
 #include <sys/resource.h>
 #endif
 
-#ifdef HAVE_SYS_TIME_H
+#if HAVE_SYS_TIME_H
 #include <sys/time.h>
 #endif
 
-#ifdef HAVE_WINDOWS_H
+#if HAVE_WINDOWS_H
+// Needed to be able to use std::max without workarounds in the source code.
+// https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts
+#define NOMINMAX
 #include <windows.h>
 #endif
 
-#include <string>
-
-#ifdef HAVE_GTEST
-
-#include <gtest/gtest.h>
-#undef TYPED_TEST
-#define TYPED_TEST TEST
-#define INIT_GTEST(argc, argv) ::testing::InitGoogleTest(argc, *argv)
-
-#else
-
-// Stubs for if the user doesn't have Google Test installed.
-
-#define TEST(test_case, test_subcase) \
-  void Test_ ## test_case ## _ ## test_subcase()
-#define INIT_GTEST(argc, argv)
-
-#define TYPED_TEST TEST
-#define EXPECT_EQ CHECK_EQ
-#define EXPECT_NE CHECK_NE
-#define EXPECT_FALSE(cond) CHECK(!(cond))
-
-#endif
-
-#ifdef HAVE_GFLAGS
-
-#include <gflags/gflags.h>
-
-// This is tricky; both gflags and Google Test want to look at the command line
-// arguments. Google Test seems to be the most happy with unknown arguments,
-// though, so we call it first and hope for the best.
-#define InitGoogle(argv0, argc, argv, remove_flags) \
-  INIT_GTEST(argc, argv); \
-  google::ParseCommandLineFlags(argc, argv, remove_flags);
-
-#else
-
-// If we don't have the gflags package installed, these can only be
-// changed at compile time.
-#define DEFINE_int32(flag_name, default_value, description) \
-  static int FLAGS_ ## flag_name = default_value;
+#define InitGoogle(argv0, argc, argv, remove_flags) ((void)(0))
 
-#define InitGoogle(argv0, argc, argv, remove_flags) \
-  INIT_GTEST(argc, argv)
-
-#endif
-
-#ifdef HAVE_LIBZ
+#if HAVE_LIBZ
 #include "zlib.h"
 #endif
 
-#ifdef HAVE_LIBLZO2
+#if HAVE_LIBLZO2
 #include "lzo/lzo1x.h"
 #endif
 
-#ifdef HAVE_LIBLZF
-extern "C" {
-#include "lzf.h"
-}
+#if HAVE_LIBLZ4
+#include "lz4.h"
 #endif
 
-#ifdef HAVE_LIBQUICKLZ
-#include "quicklz.h"
-#endif
+namespace file {
+
+// Stubs the class file::Options.
+//
+// This class should not be instantiated explicitly. It should only be used by
+// passing file::Defaults() to file::GetContents() / file::SetContents().
+class OptionsStub {
+ public:
+  OptionsStub();
+  OptionsStub(const OptionsStub &) = delete;
+  OptionsStub &operator=(const OptionsStub &) = delete;
+  ~OptionsStub();
+};
 
-namespace {
+const OptionsStub &Defaults();
 
-namespace file {
-  int Defaults() { return 0; }
-
-  class DummyStatus {
-   public:
-    void CheckSuccess() { }
-  };
-
-  DummyStatus GetContents(
-      const std::string& filename, std::string* data, int unused) {
-    FILE* fp = fopen(filename.c_str(), "rb");
-    if (fp == NULL) {
-      perror(filename.c_str());
-      exit(1);
-    }
-
-    data->clear();
-    while (!feof(fp)) {
-      char buf[4096];
-      size_t ret = fread(buf, 1, 4096, fp);
-      if (ret == 0 && ferror(fp)) {
-        perror("fread");
-        exit(1);
-      }
-      data->append(std::string(buf, ret));
-    }
-
-    fclose(fp);
-
-    return DummyStatus();
-  }
+// Stubs the class absl::Status.
+//
+// This class should not be instantiated explicitly. It should only be used by
+// passing the result of file::GetContents() / file::SetContents() to
+// CHECK_OK().
+class StatusStub {
+ public:
+  StatusStub();
+  StatusStub(const StatusStub &);
+  StatusStub &operator=(const StatusStub &);
+  ~StatusStub();
 
-  inline DummyStatus SetContents(
-      const std::string& filename, const std::string& str, int unused) {
-    FILE* fp = fopen(filename.c_str(), "wb");
-    if (fp == NULL) {
-      perror(filename.c_str());
-      exit(1);
-    }
+  bool ok();
+};
 
-    int ret = fwrite(str.data(), str.size(), 1, fp);
-    if (ret != 1) {
-      perror("fwrite");
-      exit(1);
-    }
+StatusStub GetContents(const std::string &file_name, std::string *output,
+                       const OptionsStub & /* options */);
 
-    fclose(fp);
+StatusStub SetContents(const std::string &file_name, const std::string &content,
+                       const OptionsStub & /* options */);
 
-    return DummyStatus();
-  }
 }  // namespace file
 
-}  // namespace
-
 namespace snappy {
 
 #define FLAGS_test_random_seed 301
-typedef string TypeParam;
 
-void Test_CorruptedTest_VerifyCorrupted();
-void Test_Snappy_SimpleTests();
-void Test_Snappy_MaxBlowup();
-void Test_Snappy_RandomData();
-void Test_Snappy_FourByteOffset();
-void Test_SnappyCorruption_TruncatedVarint();
-void Test_SnappyCorruption_UnterminatedVarint();
-void Test_SnappyCorruption_OverflowingVarint();
-void Test_Snappy_ReadPastEndOfBuffer();
-void Test_Snappy_FindMatchLength();
-void Test_Snappy_FindMatchLengthRandom();
+std::string ReadTestDataFile(const std::string& base, size_t size_limit);
 
-string ReadTestDataFile(const string& base, size_t size_limit);
-
-string ReadTestDataFile(const string& base);
-
-// A sprintf() variant that returns a std::string.
+// A std::sprintf() variant that returns a std::string.
 // Not safe for general use due to truncation issues.
-string StringPrintf(const char* format, ...);
-
-// A simple, non-cryptographically-secure random generator.
-class ACMRandom {
- public:
-  explicit ACMRandom(uint32 seed) : seed_(seed) {}
-
-  int32 Next();
-
-  int32 Uniform(int32 n) {
-    return Next() % n;
-  }
-  uint8 Rand8() {
-    return static_cast<uint8>((Next() >> 1) & 0x000000ff);
-  }
-  bool OneIn(int X) { return Uniform(X) == 0; }
-
-  // Skewed: pick "base" uniformly from range [0,max_log] and then
-  // return "base" random bits.  The effect is to pick a number in the
-  // range [0,2^max_log-1] with bias towards smaller numbers.
-  int32 Skewed(int max_log);
-
- private:
-  static const uint32 M = 2147483647L;   // 2^31-1
-  uint32 seed_;
-};
-
-inline int32 ACMRandom::Next() {
-  static const uint64 A = 16807;  // bits 14, 8, 7, 5, 2, 1, 0
-  // We are computing
-  //       seed_ = (seed_ * A) % M,    where M = 2^31-1
-  //
-  // seed_ must not be zero or M, or else all subsequent computed values
-  // will be zero or M respectively.  For all other values, seed_ will end
-  // up cycling through every number in [1,M-1]
-  uint64 product = seed_ * A;
-
-  // Compute (product % M) using the fact that ((x << 31) % M) == x.
-  seed_ = (product >> 31) + (product & M);
-  // The first reduction may overflow by 1 bit, so we may need to repeat.
-  // mod == M is not possible; using > allows the faster sign-bit-based test.
-  if (seed_ > M) {
-    seed_ -= M;
-  }
-  return seed_;
-}
-
-inline int32 ACMRandom::Skewed(int max_log) {
-  const int32 base = (Next() - 1) % (max_log+1);
-  return (Next() - 1) & ((1u << base)-1);
-}
+std::string StrFormat(const char* format, ...);
 
 // A wall-time clock. This stub is not super-accurate, nor resistant to the
 // system time changing.
 class CycleTimer {
  public:
-  CycleTimer() : real_time_us_(0) {}
+  inline CycleTimer() : real_time_us_(0) {}
+  inline ~CycleTimer() = default;
 
-  void Start() {
+  inline void Start() {
 #ifdef WIN32
     QueryPerformanceCounter(&start_);
 #else
-    gettimeofday(&start_, NULL);
+    ::gettimeofday(&start_, nullptr);
 #endif
   }
 
-  void Stop() {
+  inline void Stop() {
 #ifdef WIN32
     LARGE_INTEGER stop;
     LARGE_INTEGER frequency;
@@ -276,65 +145,78 @@ class CycleTimer {
         frequency.QuadPart;
     real_time_us_ += elapsed * 1e6 + 0.5;
 #else
-    struct timeval stop;
-    gettimeofday(&stop, NULL);
+    struct ::timeval stop;
+    ::gettimeofday(&stop, nullptr);
 
     real_time_us_ += 1000000 * (stop.tv_sec - start_.tv_sec);
     real_time_us_ += (stop.tv_usec - start_.tv_usec);
 #endif
   }
 
-  double Get() {
-    return real_time_us_ * 1e-6;
-  }
+  inline double Get() { return real_time_us_ * 1e-6; }
 
  private:
-  int64 real_time_us_;
+  int64_t real_time_us_;
 #ifdef WIN32
   LARGE_INTEGER start_;
 #else
-  struct timeval start_;
+  struct ::timeval start_;
 #endif
 };
 
-// Minimalistic microbenchmark framework.
+// Logging.
+
+class LogMessage {
+ public:
+  inline LogMessage() = default;
+  ~LogMessage();
 
-typedef void (*BenchmarkFunction)(int, int);
+  LogMessage &operator<<(const std::string &message);
+  LogMessage &operator<<(int number);
+};
 
-class Benchmark {
+class LogMessageCrash : public LogMessage {
  public:
-  Benchmark(const string& name, BenchmarkFunction function) :
-      name_(name), function_(function) {}
+  inline LogMessageCrash() = default;
+  ~LogMessageCrash();
+};
 
-  Benchmark* DenseRange(int start, int stop) {
-    start_ = start;
-    stop_ = stop;
-    return this;
-  }
+// This class is used to explicitly ignore values in the conditional
+// logging macros.  This avoids compiler warnings like "value computed
+// is not used" and "statement has no effect".
 
-  void Run();
+class LogMessageVoidify {
+ public:
+  inline LogMessageVoidify() = default;
+  inline ~LogMessageVoidify() = default;
 
- private:
-  const string name_;
-  const BenchmarkFunction function_;
-  int start_, stop_;
+  // This has to be an operator with a precedence lower than << but
+  // higher than ?:
+  inline void operator&(const LogMessage &) {}
 };
-#define BENCHMARK(benchmark_name) \
-  Benchmark* Benchmark_ ## benchmark_name = \
-          (new Benchmark(#benchmark_name, benchmark_name))
 
-extern Benchmark* Benchmark_BM_UFlat;
-extern Benchmark* Benchmark_BM_UIOVec;
-extern Benchmark* Benchmark_BM_UValidate;
-extern Benchmark* Benchmark_BM_ZFlat;
+// Asserts, both versions activated in debug mode only,
+// and ones that are always active.
 
-void ResetBenchmarkTiming();
-void StartBenchmarkTiming();
-void StopBenchmarkTiming();
-void SetBenchmarkLabel(const string& str);
-void SetBenchmarkBytesProcessed(int64 bytes);
+#define CRASH_UNLESS(condition)  \
+  SNAPPY_PREDICT_TRUE(condition) \
+      ? (void)0                  \
+      : snappy::LogMessageVoidify() & snappy::LogMessageCrash()
 
-#ifdef HAVE_LIBZ
+#define LOG(level) LogMessage()
+#define VLOG(level) \
+  true ? (void)0 : snappy::LogMessageVoidify() & snappy::LogMessage()
+
+#define CHECK(cond) CRASH_UNLESS(cond)
+#define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b))
+#define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b))
+#define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b))
+#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b))
+#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b))
+#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
+#define CHECK_OK(cond) (cond).ok()
+
+#if HAVE_LIBZ
 
 // Object-oriented wrapper around zlib.
 class ZLib {
@@ -457,127 +339,4 @@ class ZLib {
 
 }  // namespace snappy
 
-DECLARE_bool(run_microbenchmarks);
-
-static inline void RunSpecifiedBenchmarks() {
-  if (!FLAGS_run_microbenchmarks) {
-    return;
-  }
-
-  fprintf(stderr, "Running microbenchmarks.\n");
-#ifndef NDEBUG
-  fprintf(stderr, "WARNING: Compiled with assertions enabled, will be slow.\n");
-#endif
-#ifndef __OPTIMIZE__
-  fprintf(stderr, "WARNING: Compiled without optimization, will be slow.\n");
-#endif
-  fprintf(stderr, "Benchmark            Time(ns)    CPU(ns) Iterations\n");
-  fprintf(stderr, "---------------------------------------------------\n");
-
-  snappy::Benchmark_BM_UFlat->Run();
-  snappy::Benchmark_BM_UIOVec->Run();
-  snappy::Benchmark_BM_UValidate->Run();
-  snappy::Benchmark_BM_ZFlat->Run();
-
-  fprintf(stderr, "\n");
-}
-
-#ifndef HAVE_GTEST
-
-static inline int RUN_ALL_TESTS() {
-  fprintf(stderr, "Running correctness tests.\n");
-  snappy::Test_CorruptedTest_VerifyCorrupted();
-  snappy::Test_Snappy_SimpleTests();
-  snappy::Test_Snappy_MaxBlowup();
-  snappy::Test_Snappy_RandomData();
-  snappy::Test_Snappy_FourByteOffset();
-  snappy::Test_SnappyCorruption_TruncatedVarint();
-  snappy::Test_SnappyCorruption_UnterminatedVarint();
-  snappy::Test_SnappyCorruption_OverflowingVarint();
-  snappy::Test_Snappy_ReadPastEndOfBuffer();
-  snappy::Test_Snappy_FindMatchLength();
-  snappy::Test_Snappy_FindMatchLengthRandom();
-  fprintf(stderr, "All tests passed.\n");
-
-  return 0;
-}
-
-#endif  // HAVE_GTEST
-
-// For main().
-namespace snappy {
-
-// Logging.
-
-#define LOG(level) LogMessage()
-#define VLOG(level) true ? (void)0 : \
-    snappy::LogMessageVoidify() & snappy::LogMessage()
-
-class LogMessage {
- public:
-  LogMessage() { }
-  ~LogMessage() {
-    std::cerr << std::endl;
-  }
-
-  LogMessage& operator<<(const std::string& msg) {
-    std::cerr << msg;
-    return *this;
-  }
-  LogMessage& operator<<(int x) {
-    std::cerr << x;
-    return *this;
-  }
-};
-
-// Asserts, both versions activated in debug mode only,
-// and ones that are always active.
-
-#define CRASH_UNLESS(condition) \
-    PREDICT_TRUE(condition) ? (void)0 : \
-    snappy::LogMessageVoidify() & snappy::LogMessageCrash()
-
-#ifdef _MSC_VER
-// ~LogMessageCrash calls abort() and therefore never exits. This is by design
-// so temporarily disable warning C4722.
-#pragma warning(push)
-#pragma warning(disable:4722)
-#endif
-
-class LogMessageCrash : public LogMessage {
- public:
-  LogMessageCrash() { }
-  ~LogMessageCrash() {
-    std::cerr << std::endl;
-    abort();
-  }
-};
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-// This class is used to explicitly ignore values in the conditional
-// logging macros.  This avoids compiler warnings like "value computed
-// is not used" and "statement has no effect".
-
-class LogMessageVoidify {
- public:
-  LogMessageVoidify() { }
-  // This has to be an operator with a precedence lower than << but
-  // higher than ?:
-  void operator&(const LogMessage&) { }
-};
-
-#define CHECK(cond) CRASH_UNLESS(cond)
-#define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b))
-#define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b))
-#define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b))
-#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b))
-#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b))
-#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
-#define CHECK_OK(cond) (cond).CheckSuccess()
-
-}  // namespace snappy
-
 #endif  // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
diff --git a/snappy.cc b/snappy.cc
index 1ba247b..d6d709a 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -26,51 +26,175 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include "snappy.h"
 #include "snappy-internal.h"
 #include "snappy-sinksource.h"
-
-#ifndef SNAPPY_HAVE_SSE2
-#if defined(__SSE2__) || defined(_M_X64) || \
-    (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
-#define SNAPPY_HAVE_SSE2 1
+#include "snappy.h"
+#if !defined(SNAPPY_HAVE_BMI2)
+// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2
+// specifically, but it does define __AVX2__ when AVX2 support is available.
+// Fortunately, AVX2 was introduced in Haswell, just like BMI2.
+//
+// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So,
+// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which
+// case issuing BMI2 instructions results in a compiler error.
+#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__))
+#define SNAPPY_HAVE_BMI2 1
 #else
-#define SNAPPY_HAVE_SSE2 0
+#define SNAPPY_HAVE_BMI2 0
 #endif
+#endif  // !defined(SNAPPY_HAVE_BMI2)
+
+#if !defined(SNAPPY_HAVE_X86_CRC32)
+#if defined(__SSE4_2__)
+#define SNAPPY_HAVE_X86_CRC32 1
+#else
+#define SNAPPY_HAVE_X86_CRC32 0
 #endif
+#endif  // !defined(SNAPPY_HAVE_X86_CRC32)
 
-#if SNAPPY_HAVE_SSE2
-#include <emmintrin.h>
+#if !defined(SNAPPY_HAVE_NEON_CRC32)
+#if SNAPPY_HAVE_NEON && defined(__ARM_FEATURE_CRC32)
+#define SNAPPY_HAVE_NEON_CRC32 1
+#else
+#define SNAPPY_HAVE_NEON_CRC32 0
+#endif
+#endif  // !defined(SNAPPY_HAVE_NEON_CRC32)
+
+#if SNAPPY_HAVE_BMI2 || SNAPPY_HAVE_X86_CRC32
+// Please do not replace with <x86intrin.h>. or with headers that assume more
+// advanced SSE versions without checking with all the OWNERS.
+#include <immintrin.h>
+#elif SNAPPY_HAVE_NEON_CRC32
+#include <arm_acle.h>
 #endif
-#include <stdio.h>
 
 #include <algorithm>
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <functional>
+#include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 
-
 namespace snappy {
 
+namespace {
+
+// The amount of slop bytes writers are using for unconditional copies.
+constexpr int kSlopBytes = 64;
+
+using internal::char_table;
 using internal::COPY_1_BYTE_OFFSET;
 using internal::COPY_2_BYTE_OFFSET;
-using internal::LITERAL;
-using internal::char_table;
+using internal::COPY_4_BYTE_OFFSET;
 using internal::kMaximumTagLength;
+using internal::LITERAL;
+#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+using internal::V128;
+using internal::V128_Load;
+using internal::V128_LoadU;
+using internal::V128_Shuffle;
+using internal::V128_StoreU;
+using internal::V128_DupChar;
+#endif
+
+// We translate the information encoded in a tag through a lookup table to a
+// format that requires fewer instructions to decode. Effectively we store
+// the length minus the tag part of the offset. The lowest significant byte
+// thus stores the length. While total length - offset is given by
+// entry - ExtractOffset(type). The nice thing is that the subtraction
+// immediately sets the flags for the necessary check that offset >= length.
+// This folds the cmp with sub. We engineer the long literals and copy-4 to
+// always fail this check, so their presence doesn't affect the fast path.
+// To prevent literals from triggering the guard against offset < length (offset
+// does not apply to literals) the table is giving them a spurious offset of
+// 256.
+inline constexpr int16_t MakeEntry(int16_t len, int16_t offset) {
+  return len - (offset << 8);
+}
+
+inline constexpr int16_t LengthMinusOffset(int data, int type) {
+  return type == 3   ? 0xFF                    // copy-4 (or type == 3)
+         : type == 2 ? MakeEntry(data + 1, 0)  // copy-2
+         : type == 1 ? MakeEntry((data & 7) + 4, data >> 3)  // copy-1
+         : data < 60 ? MakeEntry(data + 1, 1)  // note spurious offset.
+                     : 0xFF;                   // long literal
+}
+
+inline constexpr int16_t LengthMinusOffset(uint8_t tag) {
+  return LengthMinusOffset(tag >> 2, tag & 3);
+}
+
+template <size_t... Ints>
+struct index_sequence {};
+
+template <std::size_t N, size_t... Is>
+struct make_index_sequence : make_index_sequence<N - 1, N - 1, Is...> {};
+
+template <size_t... Is>
+struct make_index_sequence<0, Is...> : index_sequence<Is...> {};
+
+template <size_t... seq>
+constexpr std::array<int16_t, 256> MakeTable(index_sequence<seq...>) {
+  return std::array<int16_t, 256>{LengthMinusOffset(seq)...};
+}
+
+alignas(64) const std::array<int16_t, 256> kLengthMinusOffset =
+    MakeTable(make_index_sequence<256>{});
+
+// Given a table of uint16_t whose size is mask / 2 + 1, return a pointer to the
+// relevant entry, if any, for the given bytes.  Any hash function will do,
+// but a good hash function reduces the number of collisions and thus yields
+// better compression for compressible input.
+//
+// REQUIRES: mask is 2 * (table_size - 1), and table_size is a power of two.
+inline uint16_t* TableEntry(uint16_t* table, uint32_t bytes, uint32_t mask) {
+  // Our choice is quicker-and-dirtier than the typical hash function;
+  // empirically, that seems beneficial.  The upper bits of kMagic * bytes are a
+  // higher-quality hash than the lower bits, so when using kMagic * bytes we
+  // also shift right to get a higher-quality end result.  There's no similar
+  // issue with a CRC because all of the output bits of a CRC are equally good
+  // "hashes." So, a CPU instruction for CRC, if available, tends to be a good
+  // choice.
+#if SNAPPY_HAVE_NEON_CRC32
+  // We use mask as the second arg to the CRC function, as it's about to
+  // be used anyway; it'd be equally correct to use 0 or some constant.
+  // Mathematically, _mm_crc32_u32 (or similar) is a function of the
+  // xor of its arguments.
+  const uint32_t hash = __crc32cw(bytes, mask);
+#elif SNAPPY_HAVE_X86_CRC32
+  const uint32_t hash = _mm_crc32_u32(bytes, mask);
+#else
+  constexpr uint32_t kMagic = 0x1e35a7bd;
+  const uint32_t hash = (kMagic * bytes) >> (31 - kMaxHashTableBits);
+#endif
+  return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
+                                     (hash & mask));
+}
 
-// Any hash function will produce a valid compressed bitstream, but a good
-// hash function reduces the number of collisions and thus yields better
-// compression for compressible input, and more speed for incompressible
-// input. Of course, it doesn't hurt if the hash function is reasonably fast
-// either, as it gets called a lot.
-static inline uint32 HashBytes(uint32 bytes, int shift) {
-  uint32 kMul = 0x1e35a7bd;
-  return (bytes * kMul) >> shift;
+inline uint16_t* TableEntry4ByteMatch(uint16_t* table, uint32_t bytes,
+                                      uint32_t mask) {
+  constexpr uint32_t kMagic = 2654435761U;
+  const uint32_t hash = (kMagic * bytes) >> (32 - kMaxHashTableBits);
+  return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
+                                     (hash & mask));
 }
-static inline uint32 Hash(const char* p, int shift) {
-  return HashBytes(UNALIGNED_LOAD32(p), shift);
+
+inline uint16_t* TableEntry8ByteMatch(uint16_t* table, uint64_t bytes,
+                                      uint32_t mask) {
+  constexpr uint64_t kMagic = 58295818150454627ULL;
+  const uint32_t hash = (kMagic * bytes) >> (64 - kMaxHashTableBits);
+  return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
+                                     (hash & mask));
 }
 
-size_t MaxCompressedLength(size_t source_len) {
+}  // namespace
+
+size_t MaxCompressedLength(size_t source_bytes) {
   // Compressed data can be defined as:
   //    compressed := item* literal*
   //    item       := literal* copy
@@ -91,28 +215,34 @@ size_t MaxCompressedLength(size_t source_len) {
   // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
   //
   // This last factor dominates the blowup, so the final estimate is:
-  return 32 + source_len + source_len/6;
+  return 32 + source_bytes + source_bytes / 6;
 }
 
 namespace {
 
 void UnalignedCopy64(const void* src, void* dst) {
   char tmp[8];
-  memcpy(tmp, src, 8);
-  memcpy(dst, tmp, 8);
+  std::memcpy(tmp, src, 8);
+  std::memcpy(dst, tmp, 8);
 }
 
 void UnalignedCopy128(const void* src, void* dst) {
-  // TODO(alkis): Remove this when we upgrade to a recent compiler that emits
-  // SSE2 moves for memcpy(dst, src, 16).
-#if SNAPPY_HAVE_SSE2
-  __m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
-  _mm_storeu_si128(static_cast<__m128i*>(dst), x);
-#else
+  // std::memcpy() gets vectorized when the appropriate compiler options are
+  // used. For example, x86 compilers targeting SSE2+ will optimize to an SSE2
+  // load and store.
   char tmp[16];
-  memcpy(tmp, src, 16);
-  memcpy(dst, tmp, 16);
-#endif
+  std::memcpy(tmp, src, 16);
+  std::memcpy(dst, tmp, 16);
+}
+
+template <bool use_16bytes_chunk>
+inline void ConditionalUnalignedCopy128(const char* src, char* dst) {
+  if (use_16bytes_chunk) {
+    UnalignedCopy128(src, dst);
+  } else {
+    UnalignedCopy64(src, dst);
+    UnalignedCopy64(src + 8, dst + 8);
+  }
 }
 
 // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used
@@ -124,30 +254,194 @@ void UnalignedCopy128(const void* src, void* dst) {
 // After IncrementalCopySlow(src, op, op_limit), the result will have eleven
 // copies of "ab"
 //    ababababababababababab
-// Note that this does not match the semantics of either memcpy() or memmove().
+// Note that this does not match the semantics of either std::memcpy() or
+// std::memmove().
 inline char* IncrementalCopySlow(const char* src, char* op,
                                  char* const op_limit) {
+  // TODO: Remove pragma when LLVM is aware this
+  // function is only called in cold regions and when cold regions don't get
+  // vectorized or unrolled.
+#ifdef __clang__
+#pragma clang loop unroll(disable)
+#endif
   while (op < op_limit) {
     *op++ = *src++;
   }
   return op_limit;
 }
 
-// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than
+#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+
+// Computes the bytes for shuffle control mask (please read comments on
+// 'pattern_generation_masks' as well) for the given index_offset and
+// pattern_size. For example, when the 'offset' is 6, it will generate a
+// repeating pattern of size 6. So, the first 16 byte indexes will correspond to
+// the pattern-bytes {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3} and the
+// next 16 byte indexes will correspond to the pattern-bytes {4, 5, 0, 1, 2, 3,
+// 4, 5, 0, 1, 2, 3, 4, 5, 0, 1}. These byte index sequences are generated by
+// calling MakePatternMaskBytes(0, 6, index_sequence<16>()) and
+// MakePatternMaskBytes(16, 6, index_sequence<16>()) respectively.
+
+
+template <size_t... indexes>
+inline constexpr std::array<char, sizeof...(indexes)> MakePatternMaskBytes(
+    int index_offset, int pattern_size, index_sequence<indexes...>) {
+  return {static_cast<char>((index_offset + indexes) % pattern_size)...};
+}
+
+// Computes the shuffle control mask bytes array for given pattern-sizes and
+// returns an array.
+template <size_t... pattern_sizes_minus_one>
+inline constexpr std::array<std::array<char, sizeof(V128)>,
+                            sizeof...(pattern_sizes_minus_one)>
+MakePatternMaskBytesTable(int index_offset,
+                          index_sequence<pattern_sizes_minus_one...>) {
+  return {
+      MakePatternMaskBytes(index_offset, pattern_sizes_minus_one + 1,
+                           make_index_sequence</*indexes=*/sizeof(V128)>())...};
+}
+// This is an array of shuffle control masks that can be used as the source
+// operand for PSHUFB to permute the contents of the destination XMM register
+// into a repeating byte pattern.
+alignas(16) constexpr std::array<std::array<char, sizeof(V128)>,
+                                 16> pattern_generation_masks =
+    MakePatternMaskBytesTable(
+        /*index_offset=*/0,
+        /*pattern_sizes_minus_one=*/make_index_sequence<16>());
+
+// Similar to 'pattern_generation_masks', this table is used to "rotate" the
+// pattern so that we can copy the *next 16 bytes* consistent with the pattern.
+// Basically, pattern_reshuffle_masks is a continuation of
+// pattern_generation_masks. It follows that, pattern_reshuffle_masks is same as
+// pattern_generation_masks for offsets 1, 2, 4, 8 and 16.
+alignas(16) constexpr std::array<std::array<char, sizeof(V128)>,
+                                 16> pattern_reshuffle_masks =
+    MakePatternMaskBytesTable(
+        /*index_offset=*/16,
+        /*pattern_sizes_minus_one=*/make_index_sequence<16>());
+
+SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+static inline V128 LoadPattern(const char* src, const size_t pattern_size) {
+  V128 generation_mask = V128_Load(reinterpret_cast<const V128*>(
+      pattern_generation_masks[pattern_size - 1].data()));
+  // Uninitialized bytes are masked out by the shuffle mask.
+  // TODO: remove annotation and macro defs once MSan is fixed.
+  SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(src + pattern_size, 16 - pattern_size);
+  return V128_Shuffle(V128_LoadU(reinterpret_cast<const V128*>(src)),
+                      generation_mask);
+}
+SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+static inline std::pair<V128 /* pattern */, V128 /* reshuffle_mask */>
+LoadPatternAndReshuffleMask(const char* src, const size_t pattern_size) {
+  V128 pattern = LoadPattern(src, pattern_size);
+
+  // This mask will generate the next 16 bytes in-place. Doing so enables us to
+  // write data by at most 4 V128_StoreU.
+  //
+  // For example, suppose pattern is:        abcdefabcdefabcd
+  // Shuffling with this mask will generate: efabcdefabcdefab
+  // Shuffling again will generate:          cdefabcdefabcdef
+  V128 reshuffle_mask = V128_Load(reinterpret_cast<const V128*>(
+      pattern_reshuffle_masks[pattern_size - 1].data()));
+  return {pattern, reshuffle_mask};
+}
+#endif  // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+
+// Fallback for when we need to copy while extending the pattern, for example
+// copying 10 bytes from 3 positions back abc -> abcabcabcabca.
+//
+// REQUIRES: [dst - offset, dst + 64) is a valid address range.
+SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+static inline bool Copy64BytesWithPatternExtension(char* dst, size_t offset) {
+#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+  if (SNAPPY_PREDICT_TRUE(offset <= 16)) {
+    switch (offset) {
+      case 0:
+        return false;
+      case 1: {
+        // TODO: Ideally we should memset, move back once the
+        // codegen issues are fixed.
+        V128 pattern = V128_DupChar(dst[-1]);
+        for (int i = 0; i < 4; i++) {
+          V128_StoreU(reinterpret_cast<V128*>(dst + 16 * i), pattern);
+        }
+        return true;
+      }
+      case 2:
+      case 4:
+      case 8:
+      case 16: {
+        V128 pattern = LoadPattern(dst - offset, offset);
+        for (int i = 0; i < 4; i++) {
+          V128_StoreU(reinterpret_cast<V128*>(dst + 16 * i), pattern);
+        }
+        return true;
+      }
+      default: {
+        auto pattern_and_reshuffle_mask =
+            LoadPatternAndReshuffleMask(dst - offset, offset);
+        V128 pattern = pattern_and_reshuffle_mask.first;
+        V128 reshuffle_mask = pattern_and_reshuffle_mask.second;
+        for (int i = 0; i < 4; i++) {
+          V128_StoreU(reinterpret_cast<V128*>(dst + 16 * i), pattern);
+          pattern = V128_Shuffle(pattern, reshuffle_mask);
+        }
+        return true;
+      }
+    }
+  }
+#else
+  if (SNAPPY_PREDICT_TRUE(offset < 16)) {
+    if (SNAPPY_PREDICT_FALSE(offset == 0)) return false;
+    // Extend the pattern to the first 16 bytes.
+    // The simpler formulation of `dst[i - offset]` induces undefined behavior.
+    for (int i = 0; i < 16; i++) dst[i] = (dst - offset)[i];
+    // Find a multiple of pattern >= 16.
+    static std::array<uint8_t, 16> pattern_sizes = []() {
+      std::array<uint8_t, 16> res;
+      for (int i = 1; i < 16; i++) res[i] = (16 / i + 1) * i;
+      return res;
+    }();
+    offset = pattern_sizes[offset];
+    for (int i = 1; i < 4; i++) {
+      std::memcpy(dst + i * 16, dst + i * 16 - offset, 16);
+    }
+    return true;
+  }
+#endif  // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+
+  // Very rare.
+  for (int i = 0; i < 4; i++) {
+    std::memcpy(dst + i * 16, dst + i * 16 - offset, 16);
+  }
+  return true;
+}
+
+// Copy [src, src+(op_limit-op)) to [op, op_limit) but faster than
 // IncrementalCopySlow. buf_limit is the address past the end of the writable
 // region of the buffer.
 inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
                              char* const buf_limit) {
+#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+  constexpr int big_pattern_size_lower_bound = 16;
+#else
+  constexpr int big_pattern_size_lower_bound = 8;
+#endif
+
   // Terminology:
   //
   // slop = buf_limit - op
   // pat  = op - src
-  // len  = limit - op
+  // len  = op_limit - op
   assert(src < op);
+  assert(op < op_limit);
   assert(op_limit <= buf_limit);
-  // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that
-  // to optimize this function but we have to also handle these cases in case
-  // the input does not satisfy these conditions.
+  // NOTE: The copy tags use 3 or 6 bits to store the copy length, so len <= 64.
+  assert(op_limit - op <= 64);
+  // NOTE: In practice the compressor always emits len >= 4, so it is ok to
+  // assume that to optimize this function, but this is not guaranteed by the
+  // compression format, so we have to also handle len < 4 in case the input
+  // does not satisfy these conditions.
 
   size_t pattern_size = op - src;
   // The cases are split into different branches to allow the branch predictor,
@@ -171,47 +465,139 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
   // input. In general if we always predict len <= 16 it would be an ok
   // prediction.
   //
-  // In order to be fast we want a pattern >= 8 bytes and an unrolled loop
-  // copying 2x 8 bytes at a time.
-
-  // Handle the uncommon case where pattern is less than 8 bytes.
-  if (PREDICT_FALSE(pattern_size < 8)) {
-    // Expand pattern to at least 8 bytes. The worse case scenario in terms of
-    // buffer usage is when the pattern is size 3. ^ is the original position
-    // of op. x are irrelevant bytes copied by the last UnalignedCopy64.
+  // In order to be fast we want a pattern >= 16 bytes (or 8 bytes in non-SSE)
+  // and an unrolled loop copying 1x 16 bytes (or 2x 8 bytes in non-SSE) at a
+  // time.
+
+  // Handle the uncommon case where pattern is less than 16 (or 8 in non-SSE)
+  // bytes.
+  if (pattern_size < big_pattern_size_lower_bound) {
+#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+    // Load the first eight bytes into an 128-bit XMM register, then use PSHUFB
+    // to permute the register's contents in-place into a repeating sequence of
+    // the first "pattern_size" bytes.
+    // For example, suppose:
+    //    src       == "abc"
+    //    op        == op + 3
+    // After V128_Shuffle(), "pattern" will have five copies of "abc"
+    // followed by one byte of slop: abcabcabcabcabca.
     //
-    // abc
-    // abcabcxxxxx
-    // abcabcabcabcxxxxx
-    //    ^
-    // The last x is 14 bytes after ^.
-    if (PREDICT_TRUE(op <= buf_limit - 14)) {
+    // The non-SSE fallback implementation suffers from store-forwarding stalls
+    // because its loads and stores partly overlap. By expanding the pattern
+    // in-place, we avoid the penalty.
+
+    // Typically, the op_limit is the gating factor so try to simplify the loop
+    // based on that.
+    if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 15)) {
+      auto pattern_and_reshuffle_mask =
+          LoadPatternAndReshuffleMask(src, pattern_size);
+      V128 pattern = pattern_and_reshuffle_mask.first;
+      V128 reshuffle_mask = pattern_and_reshuffle_mask.second;
+      // There is at least one, and at most four 16-byte blocks. Writing four
+      // conditionals instead of a loop allows FDO to layout the code with
+      // respect to the actual probabilities of each length.
+      // TODO: Replace with loop with trip count hint.
+      V128_StoreU(reinterpret_cast<V128*>(op), pattern);
+
+      if (op + 16 < op_limit) {
+        pattern = V128_Shuffle(pattern, reshuffle_mask);
+        V128_StoreU(reinterpret_cast<V128*>(op + 16), pattern);
+      }
+      if (op + 32 < op_limit) {
+        pattern = V128_Shuffle(pattern, reshuffle_mask);
+        V128_StoreU(reinterpret_cast<V128*>(op + 32), pattern);
+      }
+      if (op + 48 < op_limit) {
+        pattern = V128_Shuffle(pattern, reshuffle_mask);
+        V128_StoreU(reinterpret_cast<V128*>(op + 48), pattern);
+      }
+      return op_limit;
+    }
+    char* const op_end = buf_limit - 15;
+    if (SNAPPY_PREDICT_TRUE(op < op_end)) {
+      auto pattern_and_reshuffle_mask =
+          LoadPatternAndReshuffleMask(src, pattern_size);
+      V128 pattern = pattern_and_reshuffle_mask.first;
+      V128 reshuffle_mask = pattern_and_reshuffle_mask.second;
+      // This code path is relatively cold however so we save code size
+      // by avoiding unrolling and vectorizing.
+      //
+      // TODO: Remove pragma when when cold regions don't get
+      // vectorized or unrolled.
+#ifdef __clang__
+#pragma clang loop unroll(disable)
+#endif
+      do {
+        V128_StoreU(reinterpret_cast<V128*>(op), pattern);
+        pattern = V128_Shuffle(pattern, reshuffle_mask);
+        op += 16;
+      } while (SNAPPY_PREDICT_TRUE(op < op_end));
+    }
+    return IncrementalCopySlow(op - pattern_size, op, op_limit);
+#else   // !SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
+    // If plenty of buffer space remains, expand the pattern to at least 8
+    // bytes. The way the following loop is written, we need 8 bytes of buffer
+    // space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10
+    // bytes if pattern_size is 2.  Precisely encoding that is probably not
+    // worthwhile; instead, invoke the slow path if we cannot write 11 bytes
+    // (because 11 are required in the worst case).
+    if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 11)) {
       while (pattern_size < 8) {
         UnalignedCopy64(src, op);
         op += pattern_size;
         pattern_size *= 2;
       }
-      if (PREDICT_TRUE(op >= op_limit)) return op_limit;
+      if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
     } else {
       return IncrementalCopySlow(src, op, op_limit);
     }
+#endif  // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
   }
-  assert(pattern_size >= 8);
+  assert(pattern_size >= big_pattern_size_lower_bound);
+  constexpr bool use_16bytes_chunk = big_pattern_size_lower_bound == 16;
 
-  // Copy 2x 8 bytes at a time. Because op - src can be < 16, a single
-  // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe
-  // because expanding the pattern to at least 8 bytes guarantees that
-  // op - src >= 8.
-  while (op <= buf_limit - 16) {
-    UnalignedCopy64(src, op);
-    UnalignedCopy64(src + 8, op + 8);
-    src += 16;
-    op += 16;
-    if (PREDICT_TRUE(op >= op_limit)) return op_limit;
+  // Copy 1x 16 bytes (or 2x 8 bytes in non-SSE) at a time. Because op - src can
+  // be < 16 in non-SSE, a single UnalignedCopy128 might overwrite data in op.
+  // UnalignedCopy64 is safe because expanding the pattern to at least 8 bytes
+  // guarantees that op - src >= 8.
+  //
+  // Typically, the op_limit is the gating factor so try to simplify the loop
+  // based on that.
+  if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 15)) {
+    // There is at least one, and at most four 16-byte blocks. Writing four
+    // conditionals instead of a loop allows FDO to layout the code with respect
+    // to the actual probabilities of each length.
+    // TODO: Replace with loop with trip count hint.
+    ConditionalUnalignedCopy128<use_16bytes_chunk>(src, op);
+    if (op + 16 < op_limit) {
+      ConditionalUnalignedCopy128<use_16bytes_chunk>(src + 16, op + 16);
+    }
+    if (op + 32 < op_limit) {
+      ConditionalUnalignedCopy128<use_16bytes_chunk>(src + 32, op + 32);
+    }
+    if (op + 48 < op_limit) {
+      ConditionalUnalignedCopy128<use_16bytes_chunk>(src + 48, op + 48);
+    }
+    return op_limit;
+  }
+
+  // Fall back to doing as much as we can with the available slop in the
+  // buffer. This code path is relatively cold however so we save code size by
+  // avoiding unrolling and vectorizing.
+  //
+  // TODO: Remove pragma when when cold regions don't get vectorized
+  // or unrolled.
+#ifdef __clang__
+#pragma clang loop unroll(disable)
+#endif
+  for (char* op_end = buf_limit - 16; op < op_end; op += 16, src += 16) {
+    ConditionalUnalignedCopy128<use_16bytes_chunk>(src, op);
   }
+  if (op >= op_limit) return op_limit;
+
   // We only take this branch if we didn't have enough slop and we can do a
   // single 8 byte copy.
-  if (PREDICT_FALSE(op <= buf_limit - 8)) {
+  if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
     UnalignedCopy64(src, op);
     src += 8;
     op += 8;
@@ -221,12 +607,10 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
 
 }  // namespace
 
-static inline char* EmitLiteral(char* op,
-                                const char* literal,
-                                int len,
-                                bool allow_fast_path) {
+template <bool allow_fast_path>
+static inline char* EmitLiteral(char* op, const char* literal, int len) {
   // The vast majority of copies are below 16 bytes, for which a
-  // call to memcpy is overkill. This fast path can sometimes
+  // call to std::memcpy() is overkill. This fast path can sometimes
   // copy up to 15 bytes too much, but that is okay in the
   // main loop, since we have a bit to go on for both sides:
   //
@@ -235,7 +619,7 @@ static inline char* EmitLiteral(char* op,
   //     if not, allow_fast_path = false.
   //   - The output will always have 32 spare bytes (see
   //     MaxCompressedLength).
-  assert(len > 0);      // Zero-length literals are disallowed
+  assert(len > 0);  // Zero-length literals are disallowed
   int n = len - 1;
   if (allow_fast_path && len <= 16) {
     // Fits in tag byte
@@ -249,74 +633,95 @@ static inline char* EmitLiteral(char* op,
     // Fits in tag byte
     *op++ = LITERAL | (n << 2);
   } else {
-    // Encode in upcoming bytes
-    char* base = op;
-    int count = 0;
-    op++;
-    while (n > 0) {
-      *op++ = n & 0xff;
-      n >>= 8;
-      count++;
-    }
+    int count = (Bits::Log2Floor(n) >> 3) + 1;
     assert(count >= 1);
     assert(count <= 4);
-    *base = LITERAL | ((59+count) << 2);
+    *op++ = LITERAL | ((59 + count) << 2);
+    // Encode in upcoming bytes.
+    // Write 4 bytes, though we may care about only 1 of them. The output buffer
+    // is guaranteed to have at least 3 more spaces left as 'len >= 61' holds
+    // here and there is a std::memcpy() of size 'len' below.
+    LittleEndian::Store32(op, n);
+    op += count;
+  }
+  // When allow_fast_path is true, we can overwrite up to 16 bytes.
+  if (allow_fast_path) {
+    char* destination = op;
+    const char* source = literal;
+    const char* end = destination + len;
+    do {
+      std::memcpy(destination, source, 16);
+      destination += 16;
+      source += 16;
+    } while (destination < end);
+  } else {
+    std::memcpy(op, literal, len);
   }
-  memcpy(op, literal, len);
   return op + len;
 }
 
-static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
-                                     bool len_less_than_12) {
+template <bool len_less_than_12>
+static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) {
   assert(len <= 64);
   assert(len >= 4);
   assert(offset < 65536);
   assert(len_less_than_12 == (len < 12));
 
-  if (len_less_than_12 && PREDICT_TRUE(offset < 2048)) {
-    // offset fits in 11 bits.  The 3 highest go in the top of the first byte,
-    // and the rest go in the second byte.
-    *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
-    *op++ = offset & 0xff;
+  if (len_less_than_12) {
+    uint32_t u = (len << 2) + (offset << 8);
+    uint32_t copy1 = COPY_1_BYTE_OFFSET - (4 << 2) + ((offset >> 3) & 0xe0);
+    uint32_t copy2 = COPY_2_BYTE_OFFSET - (1 << 2);
+    // It turns out that offset < 2048 is a difficult to predict branch.
+    // `perf record` shows this is the highest percentage of branch misses in
+    // benchmarks. This code produces branch free code, the data dependency
+    // chain that bottlenecks the throughput is so long that a few extra
+    // instructions are completely free (IPC << 6 because of data deps).
+    u += offset < 2048 ? copy1 : copy2;
+    LittleEndian::Store32(op, u);
+    op += offset < 2048 ? 2 : 3;
   } else {
     // Write 4 bytes, though we only care about 3 of them.  The output buffer
     // is required to have some slack, so the extra byte won't overrun it.
-    uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8);
+    uint32_t u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8);
     LittleEndian::Store32(op, u);
     op += 3;
   }
   return op;
 }
 
-static inline char* EmitCopy(char* op, size_t offset, size_t len,
-                             bool len_less_than_12) {
+template <bool len_less_than_12>
+static inline char* EmitCopy(char* op, size_t offset, size_t len) {
   assert(len_less_than_12 == (len < 12));
   if (len_less_than_12) {
-    return EmitCopyAtMost64(op, offset, len, true);
+    return EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
   } else {
     // A special case for len <= 64 might help, but so far measurements suggest
     // it's in the noise.
 
     // Emit 64 byte copies but make sure to keep at least four bytes reserved.
-    while (PREDICT_FALSE(len >= 68)) {
-      op = EmitCopyAtMost64(op, offset, 64, false);
+    while (SNAPPY_PREDICT_FALSE(len >= 68)) {
+      op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 64);
       len -= 64;
     }
 
     // One or two copies will now finish the job.
     if (len > 64) {
-      op = EmitCopyAtMost64(op, offset, 60, false);
+      op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, 60);
       len -= 60;
     }
 
     // Emit remainder.
-    op = EmitCopyAtMost64(op, offset, len, len < 12);
+    if (len < 12) {
+      op = EmitCopyAtMost64</*len_less_than_12=*/true>(op, offset, len);
+    } else {
+      op = EmitCopyAtMost64</*len_less_than_12=*/false>(op, offset, len);
+    }
     return op;
   }
 }
 
 bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
-  uint32 v = 0;
+  uint32_t v = 0;
   const char* limit = start + n;
   if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
     *result = v;
@@ -326,76 +731,47 @@ bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
   }
 }
 
-namespace internal {
-uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
-  // Use smaller hash table when input.size() is smaller, since we
-  // fill the table, incurring O(hash table size) overhead for
-  // compression, and if the input is short, we won't need that
-  // many hash table entries anyway.
-  assert(kMaxHashTableSize >= 256);
-  size_t htsize = 256;
-  while (htsize < kMaxHashTableSize && htsize < input_size) {
-    htsize <<= 1;
-  }
-
-  uint16* table;
-  if (htsize <= ARRAYSIZE(small_table_)) {
-    table = small_table_;
-  } else {
-    if (large_table_ == NULL) {
-      large_table_ = new uint16[kMaxHashTableSize];
-    }
-    table = large_table_;
+namespace {
+uint32_t CalculateTableSize(uint32_t input_size) {
+  static_assert(
+      kMaxHashTableSize >= kMinHashTableSize,
+      "kMaxHashTableSize should be greater or equal to kMinHashTableSize.");
+  if (input_size > kMaxHashTableSize) {
+    return kMaxHashTableSize;
   }
-
-  *table_size = htsize;
-  memset(table, 0, htsize * sizeof(*table));
-  return table;
-}
-}  // end namespace internal
-
-// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
-// equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
-// empirically found that overlapping loads such as
-//  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
-// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
-//
-// We have different versions for 64- and 32-bit; ideally we would avoid the
-// two functions and just inline the UNALIGNED_LOAD64 call into
-// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
-// enough to avoid loading the value multiple times then. For 64-bit, the load
-// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
-// done at GetUint32AtOffset() time.
-
-#ifdef ARCH_K8
-
-typedef uint64 EightBytesReference;
-
-static inline EightBytesReference GetEightBytesAt(const char* ptr) {
-  return UNALIGNED_LOAD64(ptr);
+  if (input_size < kMinHashTableSize) {
+    return kMinHashTableSize;
+  }
+  // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1.
+  // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)).
+  return 2u << Bits::Log2Floor(input_size - 1);
 }
+}  // namespace
 
-static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
-  assert(offset >= 0);
-  assert(offset <= 4);
-  return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
+namespace internal {
+WorkingMemory::WorkingMemory(size_t input_size) {
+  const size_t max_fragment_size = std::min(input_size, kBlockSize);
+  const size_t table_size = CalculateTableSize(max_fragment_size);
+  size_ = table_size * sizeof(*table_) + max_fragment_size +
+          MaxCompressedLength(max_fragment_size);
+  mem_ = std::allocator<char>().allocate(size_);
+  table_ = reinterpret_cast<uint16_t*>(mem_);
+  input_ = mem_ + table_size * sizeof(*table_);
+  output_ = input_ + max_fragment_size;
 }
 
-#else
-
-typedef const char* EightBytesReference;
-
-static inline EightBytesReference GetEightBytesAt(const char* ptr) {
-  return ptr;
+WorkingMemory::~WorkingMemory() {
+  std::allocator<char>().deallocate(mem_, size_);
 }
 
-static inline uint32 GetUint32AtOffset(const char* v, int offset) {
-  assert(offset >= 0);
-  assert(offset <= 4);
-  return UNALIGNED_LOAD32(v + offset);
+uint16_t* WorkingMemory::GetHashTable(size_t fragment_size,
+                                      int* table_size) const {
+  const size_t htsize = CalculateTableSize(fragment_size);
+  memset(table_, 0, htsize * sizeof(*table_));
+  *table_size = htsize;
+  return table_;
 }
-
-#endif
+}  // end namespace internal
 
 // Flat array compression that does not emit the "uncompressed length"
 // prefix. Compresses "input" string to the "*op" buffer.
@@ -409,29 +785,25 @@ static inline uint32 GetUint32AtOffset(const char* v, int offset) {
 // Returns an "end" pointer into "op" buffer.
 // "end - op" is the compressed size of "input".
 namespace internal {
-char* CompressFragment(const char* input,
-                       size_t input_size,
-                       char* op,
-                       uint16* table,
-                       const int table_size) {
+char* CompressFragment(const char* input, size_t input_size, char* op,
+                       uint16_t* table, const int table_size) {
   // "ip" is the input pointer, and "op" is the output pointer.
   const char* ip = input;
   assert(input_size <= kBlockSize);
-  assert((table_size & (table_size - 1)) == 0); // table must be power of two
-  const int shift = 32 - Bits::Log2Floor(table_size);
-  assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
+  assert((table_size & (table_size - 1)) == 0);  // table must be power of two
+  const uint32_t mask = 2 * (table_size - 1);
   const char* ip_end = input + input_size;
   const char* base_ip = ip;
-  // Bytes in [next_emit, ip) will be emitted as literal bytes.  Or
-  // [next_emit, ip_end) after the main loop.
-  const char* next_emit = ip;
 
   const size_t kInputMarginBytes = 15;
-  if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
+  if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
     const char* ip_limit = input + input_size - kInputMarginBytes;
 
-    for (uint32 next_hash = Hash(++ip, shift); ; ) {
-      assert(next_emit < ip);
+    for (uint32_t preload = LittleEndian::Load32(ip + 1);;) {
+      // Bytes in [next_emit, ip) will be emitted as literal bytes.  Or
+      // [next_emit, ip_end) after the main loop.
+      const char* next_emit = ip++;
+      uint64_t data = LittleEndian::Load64(ip);
       // The body of this loop calls EmitLiteral once and then EmitCopy one or
       // more times.  (The exception is that when we're close to exhausting
       // the input we goto emit_remainder.)
@@ -457,34 +829,66 @@ char* CompressFragment(const char* input,
       // The "skip" variable keeps track of how many bytes there are since the
       // last match; dividing it by 32 (ie. right-shifting by five) gives the
       // number of bytes to move ahead for each iteration.
-      uint32 skip = 32;
+      uint32_t skip = 32;
 
-      const char* next_ip = ip;
       const char* candidate;
-      do {
-        ip = next_ip;
-        uint32 hash = next_hash;
-        assert(hash == Hash(ip, shift));
-        uint32 bytes_between_hash_lookups = skip >> 5;
+      if (ip_limit - ip >= 16) {
+        auto delta = ip - base_ip;
+        for (int j = 0; j < 4; ++j) {
+          for (int k = 0; k < 4; ++k) {
+            int i = 4 * j + k;
+            // These for-loops are meant to be unrolled. So we can freely
+            // special case the first iteration to use the value already
+            // loaded in preload.
+            uint32_t dword = i == 0 ? preload : static_cast<uint32_t>(data);
+            assert(dword == LittleEndian::Load32(ip + i));
+            uint16_t* table_entry = TableEntry(table, dword, mask);
+            candidate = base_ip + *table_entry;
+            assert(candidate >= base_ip);
+            assert(candidate < ip + i);
+            *table_entry = delta + i;
+            if (SNAPPY_PREDICT_FALSE(LittleEndian::Load32(candidate) == dword)) {
+              *op = LITERAL | (i << 2);
+              UnalignedCopy128(next_emit, op + 1);
+              ip += i;
+              op = op + i + 2;
+              goto emit_match;
+            }
+            data >>= 8;
+          }
+          data = LittleEndian::Load64(ip + 4 * j + 4);
+        }
+        ip += 16;
+        skip += 16;
+      }
+      while (true) {
+        assert(static_cast<uint32_t>(data) == LittleEndian::Load32(ip));
+        uint16_t* table_entry = TableEntry(table, data, mask);
+        uint32_t bytes_between_hash_lookups = skip >> 5;
         skip += bytes_between_hash_lookups;
-        next_ip = ip + bytes_between_hash_lookups;
-        if (PREDICT_FALSE(next_ip > ip_limit)) {
+        const char* next_ip = ip + bytes_between_hash_lookups;
+        if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
+          ip = next_emit;
           goto emit_remainder;
         }
-        next_hash = Hash(next_ip, shift);
-        candidate = base_ip + table[hash];
+        candidate = base_ip + *table_entry;
         assert(candidate >= base_ip);
         assert(candidate < ip);
 
-        table[hash] = ip - base_ip;
-      } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
-                            UNALIGNED_LOAD32(candidate)));
+        *table_entry = ip - base_ip;
+        if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
+                                LittleEndian::Load32(candidate))) {
+          break;
+        }
+        data = LittleEndian::Load32(next_ip);
+        ip = next_ip;
+      }
 
       // Step 2: A 4-byte match has been found.  We'll later see if more
       // than 4 bytes match.  But, prior to the match, input
       // bytes [next_emit, ip) are unmatched.  Emit them as "literal bytes."
       assert(next_emit + 16 <= ip_end);
-      op = EmitLiteral(op, next_emit, ip - next_emit, true);
+      op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit, ip - next_emit);
 
       // Step 3: Call EmitCopy, and then see if another EmitCopy could
       // be our next move.  Repeat until we find no match for the
@@ -494,54 +898,239 @@ char* CompressFragment(const char* input,
       // though we don't yet know how big the literal will be.  We handle that
       // by proceeding to the next iteration of the main loop.  We also can exit
       // this loop via goto if we get close to exhausting the input.
-      EightBytesReference input_bytes;
-      uint32 candidate_bytes = 0;
-
+    emit_match:
       do {
         // We have a 4-byte match at ip, and no need to emit any
         // "literal bytes" prior to ip.
         const char* base = ip;
         std::pair<size_t, bool> p =
-            FindMatchLength(candidate + 4, ip + 4, ip_end);
+            FindMatchLength(candidate + 4, ip + 4, ip_end, &data);
         size_t matched = 4 + p.first;
         ip += matched;
         size_t offset = base - candidate;
         assert(0 == memcmp(base, candidate, matched));
-        op = EmitCopy(op, offset, matched, p.second);
-        next_emit = ip;
-        if (PREDICT_FALSE(ip >= ip_limit)) {
+        if (p.second) {
+          op = EmitCopy</*len_less_than_12=*/true>(op, offset, matched);
+        } else {
+          op = EmitCopy</*len_less_than_12=*/false>(op, offset, matched);
+        }
+        if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
           goto emit_remainder;
         }
+        // Expect 5 bytes to match
+        assert((data & 0xFFFFFFFFFF) ==
+               (LittleEndian::Load64(ip) & 0xFFFFFFFFFF));
         // We are now looking for a 4-byte match again.  We read
-        // table[Hash(ip, shift)] for that.  To improve compression,
-        // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)].
-        input_bytes = GetEightBytesAt(ip - 1);
-        uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
-        table[prev_hash] = ip - base_ip - 1;
-        uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
-        candidate = base_ip + table[cur_hash];
-        candidate_bytes = UNALIGNED_LOAD32(candidate);
-        table[cur_hash] = ip - base_ip;
-      } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
-
-      next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
-      ++ip;
+        // table[Hash(ip, mask)] for that.  To improve compression,
+        // we also update table[Hash(ip - 1, mask)] and table[Hash(ip, mask)].
+        *TableEntry(table, LittleEndian::Load32(ip - 1), mask) =
+            ip - base_ip - 1;
+        uint16_t* table_entry = TableEntry(table, data, mask);
+        candidate = base_ip + *table_entry;
+        *table_entry = ip - base_ip;
+        // Measurements on the benchmarks have shown the following probabilities
+        // for the loop to exit (ie. avg. number of iterations is reciprocal).
+        // BM_Flat/6  txt1    p = 0.3-0.4
+        // BM_Flat/7  txt2    p = 0.35
+        // BM_Flat/8  txt3    p = 0.3-0.4
+        // BM_Flat/9  txt3    p = 0.34-0.4
+        // BM_Flat/10 pb      p = 0.4
+        // BM_Flat/11 gaviota p = 0.1
+        // BM_Flat/12 cp      p = 0.5
+        // BM_Flat/13 c       p = 0.3
+      } while (static_cast<uint32_t>(data) == LittleEndian::Load32(candidate));
+      // Because the least significant 5 bytes matched, we can utilize data
+      // for the next iteration.
+      preload = data >> 8;
     }
   }
 
- emit_remainder:
+emit_remainder:
   // Emit the remaining bytes as a literal
-  if (next_emit < ip_end) {
-    op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
+  if (ip < ip_end) {
+    op = EmitLiteral</*allow_fast_path=*/false>(op, ip, ip_end - ip);
+  }
+
+  return op;
+}
+
+char* CompressFragmentDoubleHash(const char* input, size_t input_size, char* op,
+                                 uint16_t* table, const int table_size,
+                                 uint16_t* table2, const int table_size2) {
+  (void)table_size2;
+  assert(table_size == table_size2);
+  // "ip" is the input pointer, and "op" is the output pointer.
+  const char* ip = input;
+  assert(input_size <= kBlockSize);
+  assert((table_size & (table_size - 1)) == 0);  // table must be power of two
+  const uint32_t mask = 2 * (table_size - 1);
+  const char* ip_end = input + input_size;
+  const char* base_ip = ip;
+
+  const size_t kInputMarginBytes = 15;
+  if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
+    const char* ip_limit = input + input_size - kInputMarginBytes;
+
+    for (;;) {
+      const char* next_emit = ip++;
+      uint64_t data = LittleEndian::Load64(ip);
+      uint32_t skip = 512;
+
+      const char* candidate;
+      uint32_t candidate_length;
+      while (true) {
+        assert(static_cast<uint32_t>(data) == LittleEndian::Load32(ip));
+        uint16_t* table_entry2 = TableEntry8ByteMatch(table2, data, mask);
+        uint32_t bytes_between_hash_lookups = skip >> 9;
+        skip++;
+        const char* next_ip = ip + bytes_between_hash_lookups;
+        if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
+          ip = next_emit;
+          goto emit_remainder;
+        }
+        candidate = base_ip + *table_entry2;
+        assert(candidate >= base_ip);
+        assert(candidate < ip);
+
+        *table_entry2 = ip - base_ip;
+        if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
+                                LittleEndian::Load32(candidate))) {
+          candidate_length =
+              FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
+          break;
+        }
+
+        uint16_t* table_entry = TableEntry4ByteMatch(table, data, mask);
+        candidate = base_ip + *table_entry;
+        assert(candidate >= base_ip);
+        assert(candidate < ip);
+
+        *table_entry = ip - base_ip;
+        if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
+                                LittleEndian::Load32(candidate))) {
+          candidate_length =
+              FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
+          table_entry2 =
+              TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask);
+          auto candidate2 = base_ip + *table_entry2;
+          size_t candidate_length2 =
+              FindMatchLengthPlain(candidate2, ip + 1, ip_end);
+          if (candidate_length2 > candidate_length) {
+            *table_entry2 = ip - base_ip;
+            candidate = candidate2;
+            candidate_length = candidate_length2;
+            ++ip;
+          }
+          break;
+        }
+        data = LittleEndian::Load64(next_ip);
+        ip = next_ip;
+      }
+      // Backtrack to the point it matches fully.
+      while (ip > next_emit && candidate > base_ip &&
+             *(ip - 1) == *(candidate - 1)) {
+        --ip;
+        --candidate;
+        ++candidate_length;
+      }
+      *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask) =
+          ip - base_ip + 1;
+      *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 2), mask) =
+          ip - base_ip + 2;
+      *TableEntry4ByteMatch(table, LittleEndian::Load32(ip + 1), mask) =
+          ip - base_ip + 1;
+      // Step 2: A 4-byte or 8-byte match has been found.
+      // We'll later see if more than 4 bytes match.  But, prior to the match,
+      // input bytes [next_emit, ip) are unmatched.  Emit them as
+      // "literal bytes."
+      assert(next_emit + 16 <= ip_end);
+      if (ip - next_emit > 0) {
+        op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit,
+                                                   ip - next_emit);
+      }
+      // Step 3: Call EmitCopy, and then see if another EmitCopy could
+      // be our next move.  Repeat until we find no match for the
+      // input immediately after what was consumed by the last EmitCopy call.
+      //
+      // If we exit this loop normally then we need to call EmitLiteral next,
+      // though we don't yet know how big the literal will be.  We handle that
+      // by proceeding to the next iteration of the main loop.  We also can exit
+      // this loop via goto if we get close to exhausting the input.
+      do {
+        // We have a 4-byte match at ip, and no need to emit any
+        // "literal bytes" prior to ip.
+        const char* base = ip;
+        ip += candidate_length;
+        size_t offset = base - candidate;
+        if (candidate_length < 12) {
+          op =
+              EmitCopy</*len_less_than_12=*/true>(op, offset, candidate_length);
+        } else {
+          op = EmitCopy</*len_less_than_12=*/false>(op, offset,
+                                                    candidate_length);
+        }
+        if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        // We are now looking for a 4-byte match again.  We read
+        // table[Hash(ip, mask)] for that. To improve compression,
+        // we also update several previous table entries.
+        if (ip - base_ip > 7) {
+          *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 7), mask) =
+              ip - base_ip - 7;
+          *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 4), mask) =
+              ip - base_ip - 4;
+        }
+        *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 3), mask) =
+            ip - base_ip - 3;
+        *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 2), mask) =
+            ip - base_ip - 2;
+        *TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 2), mask) =
+            ip - base_ip - 2;
+        *TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 1), mask) =
+            ip - base_ip - 1;
+
+        uint16_t* table_entry =
+            TableEntry8ByteMatch(table2, LittleEndian::Load64(ip), mask);
+        candidate = base_ip + *table_entry;
+        *table_entry = ip - base_ip;
+        if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) {
+          candidate_length =
+              FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
+          continue;
+        }
+        table_entry =
+            TableEntry4ByteMatch(table, LittleEndian::Load32(ip), mask);
+        candidate = base_ip + *table_entry;
+        *table_entry = ip - base_ip;
+        if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) {
+          candidate_length =
+              FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
+          continue;
+        }
+        break;
+      } while (true);
+    }
+  }
+
+emit_remainder:
+  // Emit the remaining bytes as a literal
+  if (ip < ip_end) {
+    op = EmitLiteral</*allow_fast_path=*/false>(op, ip, ip_end - ip);
   }
 
   return op;
 }
 }  // end namespace internal
 
-// Called back at avery compression call to trace parameters and sizes.
-static inline void Report(const char *algorithm, size_t compressed_size,
-                          size_t uncompressed_size) {}
+static inline void Report(int token, const char *algorithm, size_t
+compressed_size, size_t uncompressed_size) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)token;
+  (void)algorithm;
+  (void)compressed_size;
+  (void)uncompressed_size;
+}
 
 // Signature of output types needed by decompression code.
 // The decompression code is templatized on a type that obeys this
@@ -553,12 +1142,28 @@ static inline void Report(const char *algorithm, size_t compressed_size,
 //   // Called before decompression
 //   void SetExpectedLength(size_t length);
 //
+//   // For performance a writer may choose to donate the cursor variable to the
+//   // decompression function. The decompression will inject it in all its
+//   // function calls to the writer. Keeping the important output cursor as a
+//   // function local stack variable allows the compiler to keep it in
+//   // register, which greatly aids performance by avoiding loads and stores of
+//   // this variable in the fast path loop iterations.
+//   T GetOutputPtr() const;
+//
+//   // At end of decompression the loop donates the ownership of the cursor
+//   // variable back to the writer by calling this function.
+//   void SetOutputPtr(T op);
+//
 //   // Called after decompression
 //   bool CheckLength() const;
 //
 //   // Called repeatedly during decompression
-//   bool Append(const char* ip, size_t length);
-//   bool AppendFromSelf(uint32 offset, size_t length);
+//   // Each function get a pointer to the op (output pointer), that the writer
+//   // can use and update. Note it's important that these functions get fully
+//   // inlined so that no actual address of the local variable needs to be
+//   // taken.
+//   bool Append(const char* ip, size_t length, T* op);
+//   bool AppendFromSelf(uint32_t offset, size_t length, T* op);
 //
 //   // The rules for how TryFastAppend differs from Append are somewhat
 //   // convoluted:
@@ -580,27 +1185,341 @@ static inline void Report(const char *algorithm, size_t compressed_size,
 //   //    as it is unlikely that one would implement a fast path accepting
 //   //    this much data.
 //   //
-//   bool TryFastAppend(const char* ip, size_t available, size_t length);
+//   bool TryFastAppend(const char* ip, size_t available, size_t length, T* op);
 // };
 
-namespace internal {
+static inline uint32_t ExtractLowBytes(const uint32_t& v, int n) {
+  assert(n >= 0);
+  assert(n <= 4);
+#if SNAPPY_HAVE_BMI2
+  return _bzhi_u32(v, 8 * n);
+#else
+  // This needs to be wider than uint32_t otherwise `mask << 32` will be
+  // undefined.
+  uint64_t mask = 0xffffffff;
+  return v & ~(mask << (8 * n));
+#endif
+}
+
+static inline bool LeftShiftOverflows(uint8_t value, uint32_t shift) {
+  assert(shift < 32);
+  static const uint8_t masks[] = {
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  //
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  //
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  //
+      0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe};
+  return (value & masks[shift]) != 0;
+}
+
+inline bool Copy64BytesWithPatternExtension(ptrdiff_t dst, size_t offset) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)dst;
+  return offset != 0;
+}
+
+// Copies between size bytes and 64 bytes from src to dest.  size cannot exceed
+// 64.  More than size bytes, but never exceeding 64, might be copied if doing
+// so gives better performance.  [src, src + size) must not overlap with
+// [dst, dst + size), but [src, src + 64) may overlap with [dst, dst + 64).
+void MemCopy64(char* dst, const void* src, size_t size) {
+  // Always copy this many bytes.  If that's below size then copy the full 64.
+  constexpr int kShortMemCopy = 32;
+  (void)kShortMemCopy;
+  assert(size <= 64);
+  assert(std::less_equal<const void*>()(static_cast<const char*>(src) + size,
+                                        dst) ||
+         std::less_equal<const void*>()(dst + size, src));
+
+  // We know that src and dst are at least size bytes apart. However, because we
+  // might copy more than size bytes the copy still might overlap past size.
+  // E.g. if src and dst appear consecutively in memory (src + size >= dst).
+  // TODO: Investigate wider copies on other platforms.
+#if defined(__x86_64__) && defined(__AVX__)
+  assert(kShortMemCopy <= 32);
+  __m256i data = _mm256_lddqu_si256(static_cast<const __m256i *>(src));
+  _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), data);
+  // Profiling shows that nearly all copies are short.
+  if (SNAPPY_PREDICT_FALSE(size > kShortMemCopy)) {
+    data = _mm256_lddqu_si256(static_cast<const __m256i *>(src) + 1);
+    _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst) + 1, data);
+  }
+  // RVV acceleration available on RISC-V when compiled with -march=rv64gcv
+#elif defined(__riscv) && SNAPPY_HAVE_RVV
+  // Cast pointers to the type we will operate on.
+  unsigned char* dst_ptr = reinterpret_cast<unsigned char*>(dst);
+  const unsigned char* src_ptr = reinterpret_cast<const unsigned char*>(src);
+  size_t remaining_bytes = size;
+  // Loop as long as there are bytes remaining to be copied.
+  while (remaining_bytes > 0) {
+    // Set vector configuration: e8 (8-bit elements), m2 (LMUL=2).
+    // Use e8m2 configuration to maximize throughput.
+    size_t vl = VSETVL_E8M2(remaining_bytes);
+    // Load data from the current source pointer.
+    vuint8m2_t vec = VLE8_V_U8M2(src_ptr, vl);
+    // Store data to the current destination pointer.
+    VSE8_V_U8M2(dst_ptr, vec, vl);
+    // Update pointers and the remaining count.
+    src_ptr += vl;
+    dst_ptr += vl;
+    remaining_bytes -= vl;
+  }
+
+#else
+  std::memmove(dst, src, kShortMemCopy);
+  // Profiling shows that nearly all copies are short.
+  if (SNAPPY_PREDICT_FALSE(size > kShortMemCopy)) {
+    std::memmove(dst + kShortMemCopy,
+                 static_cast<const uint8_t*>(src) + kShortMemCopy,
+                 64 - kShortMemCopy);
+  }
+#endif
+}
+
+void MemCopy64(ptrdiff_t dst, const void* src, size_t size) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)dst;
+  (void)src;
+  (void)size;
+}
+
+void ClearDeferred(const void** deferred_src, size_t* deferred_length,
+                   uint8_t* safe_source) {
+  *deferred_src = safe_source;
+  *deferred_length = 0;
+}
+
+void DeferMemCopy(const void** deferred_src, size_t* deferred_length,
+                  const void* src, size_t length) {
+  *deferred_src = src;
+  *deferred_length = length;
+}
 
-// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
-static const uint32 wordmask[] = {
-  0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
+SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+inline size_t AdvanceToNextTagARMOptimized(const uint8_t** ip_p, size_t* tag) {
+  const uint8_t*& ip = *ip_p;
+  // This section is crucial for the throughput of the decompression loop.
+  // The latency of an iteration is fundamentally constrained by the
+  // following data chain on ip.
+  // ip -> c = Load(ip) -> delta1 = (c & 3)        -> ip += delta1 or delta2
+  //                       delta2 = ((c >> 2) + 1)    ip++
+  // This is different from X86 optimizations because ARM has conditional add
+  // instruction (csinc) and it removes several register moves.
+  const size_t tag_type = *tag & 3;
+  const bool is_literal = (tag_type == 0);
+  if (is_literal) {
+    size_t next_literal_tag = (*tag >> 2) + 1;
+    *tag = ip[next_literal_tag];
+    ip += next_literal_tag + 1;
+  } else {
+    *tag = ip[tag_type];
+    ip += tag_type + 1;
+  }
+  return tag_type;
+}
+
+SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+inline size_t AdvanceToNextTagX86Optimized(const uint8_t** ip_p, size_t* tag) {
+  const uint8_t*& ip = *ip_p;
+  // This section is crucial for the throughput of the decompression loop.
+  // The latency of an iteration is fundamentally constrained by the
+  // following data chain on ip.
+  // ip -> c = Load(ip) -> ip1 = ip + 1 + (c & 3) -> ip = ip1 or ip2
+  //                       ip2 = ip + 2 + (c >> 2)
+  // This amounts to 8 cycles.
+  // 5 (load) + 1 (c & 3) + 1 (lea ip1, [ip + (c & 3) + 1]) + 1 (cmov)
+  size_t literal_len = *tag >> 2;
+  size_t tag_type = *tag;
+  bool is_literal;
+#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
+  // TODO clang misses the fact that the (c & 3) already correctly
+  // sets the zero flag.
+  asm("and $3, %k[tag_type]\n\t"
+      : [tag_type] "+r"(tag_type), "=@ccz"(is_literal)
+      :: "cc");
+#else
+  tag_type &= 3;
+  is_literal = (tag_type == 0);
+#endif
+  // TODO
+  // This is code is subtle. Loading the values first and then cmov has less
+  // latency then cmov ip and then load. However clang would move the loads
+  // in an optimization phase, volatile prevents this transformation.
+  // Note that we have enough slop bytes (64) that the loads are always valid.
+  size_t tag_literal =
+      static_cast<const volatile uint8_t*>(ip)[1 + literal_len];
+  size_t tag_copy = static_cast<const volatile uint8_t*>(ip)[tag_type];
+  *tag = is_literal ? tag_literal : tag_copy;
+  const uint8_t* ip_copy = ip + 1 + tag_type;
+  const uint8_t* ip_literal = ip + 2 + literal_len;
+  ip = is_literal ? ip_literal : ip_copy;
+#if defined(__GNUC__) && defined(__x86_64__)
+  // TODO Clang is "optimizing" zero-extension (a totally free
+  // operation) this means that after the cmov of tag, it emits another movzb
+  // tag, byte(tag). It really matters as it's on the core chain. This dummy
+  // asm, persuades clang to do the zero-extension at the load (it's automatic)
+  // removing the expensive movzb.
+  asm("" ::"r"(tag_copy));
+#endif
+  return tag_type;
+}
+
+// Extract the offset for copy-1 and copy-2 returns 0 for literals or copy-4.
+inline uint32_t ExtractOffset(uint32_t val, size_t tag_type) {
+  // For x86 non-static storage works better. For ARM static storage is better.
+  // TODO: Once the array is recognized as a register, improve the
+  // readability for x86.
+#if defined(__x86_64__)
+  constexpr uint64_t kExtractMasksCombined = 0x0000FFFF00FF0000ull;
+  uint16_t result;
+  memcpy(&result,
+         reinterpret_cast<const char*>(&kExtractMasksCombined) + 2 * tag_type,
+         sizeof(result));
+  return val & result;
+#elif defined(__aarch64__)
+  constexpr uint64_t kExtractMasksCombined = 0x0000FFFF00FF0000ull;
+  return val & static_cast<uint32_t>(
+      (kExtractMasksCombined >> (tag_type * 16)) & 0xFFFF);
+#else
+  static constexpr uint32_t kExtractMasks[4] = {0, 0xFF, 0xFFFF, 0};
+  return val & kExtractMasks[tag_type];
+#endif
 };
 
-}  // end namespace internal
+// Core decompression loop, when there is enough data available.
+// Decompresses the input buffer [ip, ip_limit) into the output buffer
+// [op, op_limit_min_slop). Returning when either we are too close to the end
+// of the input buffer, or we exceed op_limit_min_slop or when a exceptional
+// tag is encountered (literal of length > 60) or a copy-4.
+// Returns {ip, op} at the points it stopped decoding.
+// TODO This function probably does not need to be inlined, as it
+// should decode large chunks at a time. This allows runtime dispatch to
+// implementations based on CPU capability (BMI2 / perhaps 32 / 64 byte memcpy).
+template <typename T>
+std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
+    const uint8_t* ip, const uint8_t* ip_limit, ptrdiff_t op, T op_base,
+    ptrdiff_t op_limit_min_slop) {
+  // If deferred_src is invalid point it here.
+  uint8_t safe_source[64];
+  const void* deferred_src;
+  size_t deferred_length;
+  ClearDeferred(&deferred_src, &deferred_length, safe_source);
+
+  // We unroll the inner loop twice so we need twice the spare room.
+  op_limit_min_slop -= kSlopBytes;
+  if (2 * (kSlopBytes + 1) < ip_limit - ip && op < op_limit_min_slop) {
+    const uint8_t* const ip_limit_min_slop = ip_limit - 2 * kSlopBytes - 1;
+    ip++;
+    // ip points just past the tag and we are touching at maximum kSlopBytes
+    // in an iteration.
+    size_t tag = ip[-1];
+#if defined(__clang__) && defined(__aarch64__)
+    // Workaround for https://bugs.llvm.org/show_bug.cgi?id=51317
+    // when loading 1 byte, clang for aarch64 doesn't realize that it(ldrb)
+    // comes with free zero-extension, so clang generates another
+    // 'and xn, xm, 0xff' before it use that as the offset. This 'and' is
+    // redundant and can be removed by adding this dummy asm, which gives
+    // clang a hint that we're doing the zero-extension at the load.
+    asm("" ::"r"(tag));
+#endif
+    do {
+      // The throughput is limited by instructions, unrolling the inner loop
+      // twice reduces the amount of instructions checking limits and also
+      // leads to reduced mov's.
+
+      SNAPPY_PREFETCH(ip + 128);
+      for (int i = 0; i < 2; i++) {
+        const uint8_t* old_ip = ip;
+        assert(tag == ip[-1]);
+        // For literals tag_type = 0, hence we will always obtain 0 from
+        // ExtractLowBytes. For literals offset will thus be kLiteralOffset.
+        ptrdiff_t len_minus_offset = kLengthMinusOffset[tag];
+        uint32_t next;
+#if defined(__aarch64__)
+        size_t tag_type = AdvanceToNextTagARMOptimized(&ip, &tag);
+        // We never need more than 16 bits. Doing a Load16 allows the compiler
+        // to elide the masking operation in ExtractOffset.
+        next = LittleEndian::Load16(old_ip);
+#else
+        size_t tag_type = AdvanceToNextTagX86Optimized(&ip, &tag);
+        next = LittleEndian::Load32(old_ip);
+#endif
+        size_t len = len_minus_offset & 0xFF;
+        ptrdiff_t extracted = ExtractOffset(next, tag_type);
+        ptrdiff_t len_min_offset = len_minus_offset - extracted;
+        if (SNAPPY_PREDICT_FALSE(len_minus_offset > extracted)) {
+          if (SNAPPY_PREDICT_FALSE(len & 0x80)) {
+            // Exceptional case (long literal or copy 4).
+            // Actually doing the copy here is negatively impacting the main
+            // loop due to compiler incorrectly allocating a register for
+            // this fallback. Hence we just break.
+          break_loop:
+            ip = old_ip;
+            goto exit;
+          }
+          // Only copy-1 or copy-2 tags can get here.
+          assert(tag_type == 1 || tag_type == 2);
+          std::ptrdiff_t delta = (op + deferred_length) + len_min_offset - len;
+          // Guard against copies before the buffer start.
+          // Execute any deferred MemCopy since we write to dst here.
+          MemCopy64(op_base + op, deferred_src, deferred_length);
+          op += deferred_length;
+          ClearDeferred(&deferred_src, &deferred_length, safe_source);
+          if (SNAPPY_PREDICT_FALSE(delta < 0 ||
+                                  !Copy64BytesWithPatternExtension(
+                                      op_base + op, len - len_min_offset))) {
+            goto break_loop;
+          }
+          // We aren't deferring this copy so add length right away.
+          op += len;
+          continue;
+        }
+        std::ptrdiff_t delta = (op + deferred_length) + len_min_offset - len;
+        if (SNAPPY_PREDICT_FALSE(delta < 0)) {
+          // Due to the spurious offset in literals have this will trigger
+          // at the start of a block when op is still smaller than 256.
+          if (tag_type != 0) goto break_loop;
+          MemCopy64(op_base + op, deferred_src, deferred_length);
+          op += deferred_length;
+          DeferMemCopy(&deferred_src, &deferred_length, old_ip, len);
+          continue;
+        }
+
+        // For copies we need to copy from op_base + delta, for literals
+        // we need to copy from ip instead of from the stream.
+        const void* from =
+            tag_type ? reinterpret_cast<void*>(op_base + delta) : old_ip;
+        MemCopy64(op_base + op, deferred_src, deferred_length);
+        op += deferred_length;
+        DeferMemCopy(&deferred_src, &deferred_length, from, len);
+      }
+    } while (ip < ip_limit_min_slop &&
+             static_cast<ptrdiff_t>(op + deferred_length) < op_limit_min_slop);
+  exit:
+    ip--;
+    assert(ip <= ip_limit);
+  }
+  // If we deferred a copy then we can perform.  If we are up to date then we
+  // might not have enough slop bytes and could run past the end.
+  if (deferred_length) {
+    MemCopy64(op_base + op, deferred_src, deferred_length);
+    op += deferred_length;
+    ClearDeferred(&deferred_src, &deferred_length, safe_source);
+  }
+  return {ip, op};
+}
 
 // Helper class for decompression
 class SnappyDecompressor {
  private:
-  Source*       reader_;         // Underlying source of bytes to decompress
-  const char*   ip_;             // Points to next buffered byte
-  const char*   ip_limit_;       // Points just past buffered bytes
-  uint32        peeked_;         // Bytes peeked from reader (need to skip)
-  bool          eof_;            // Hit end of input without an error?
-  char          scratch_[kMaximumTagLength];  // See RefillTag().
+  Source* reader_;        // Underlying source of bytes to decompress
+  const char* ip_;        // Points to next buffered byte
+  const char* ip_limit_;  // Points just past buffered bytes
+  // If ip < ip_limit_min_maxtaglen_ it's safe to read kMaxTagLength from
+  // buffer.
+  const char* ip_limit_min_maxtaglen_;
+  uint64_t peeked_;                  // Bytes peeked from reader (need to skip)
+  bool eof_;                         // Hit end of input without an error?
+  char scratch_[kMaximumTagLength];  // See RefillTag().
 
   // Ensure that all of the tag metadata for the next tag is available
   // in [ip_..ip_limit_-1].  Also ensures that [ip,ip+4] is readable even
@@ -609,14 +1528,14 @@ class SnappyDecompressor {
   // Returns true on success, false on error or end of input.
   bool RefillTag();
 
+  void ResetLimit(const char* ip) {
+    ip_limit_min_maxtaglen_ =
+        ip_limit_ - std::min<ptrdiff_t>(ip_limit_ - ip, kMaximumTagLength - 1);
+  }
+
  public:
   explicit SnappyDecompressor(Source* reader)
-      : reader_(reader),
-        ip_(NULL),
-        ip_limit_(NULL),
-        peeked_(0),
-        eof_(false) {
-  }
+      : reader_(reader), ip_(NULL), ip_limit_(NULL), peeked_(0), eof_(false) {}
 
   ~SnappyDecompressor() {
     // Advance past any bytes we peeked at from the reader
@@ -624,18 +1543,16 @@ class SnappyDecompressor {
   }
 
   // Returns true iff we have hit the end of the input without an error.
-  bool eof() const {
-    return eof_;
-  }
+  bool eof() const { return eof_; }
 
   // Read the uncompressed length stored at the start of the compressed data.
-  // On succcess, stores the length in *result and returns true.
+  // On success, stores the length in *result and returns true.
   // On failure, returns false.
-  bool ReadUncompressedLength(uint32* result) {
-    assert(ip_ == NULL);       // Must not have read anything yet
+  bool ReadUncompressedLength(uint32_t* result) {
+    assert(ip_ == NULL);  // Must not have read anything yet
     // Length is encoded in 1..5 bytes
     *result = 0;
-    uint32 shift = 0;
+    uint32_t shift = 0;
     while (true) {
       if (shift >= 32) return false;
       size_t n;
@@ -643,8 +1560,8 @@ class SnappyDecompressor {
       if (n == 0) return false;
       const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
       reader_->Skip(1);
-      uint32 val = c & 0x7f;
-      if (((val << shift) >> shift) != val) return false;
+      uint32_t val = c & 0x7f;
+      if (LeftShiftOverflows(static_cast<uint8_t>(val), shift)) return false;
       *result |= val << shift;
       if (c < 128) {
         break;
@@ -657,36 +1574,47 @@ class SnappyDecompressor {
   // Process the next item found in the input.
   // Returns true if successful, false on error or end of input.
   template <class Writer>
-  void DecompressAllTags(Writer* writer) {
+#if defined(__GNUC__) && defined(__x86_64__)
+  __attribute__((aligned(32)))
+#endif
+  void
+  DecompressAllTags(Writer* writer) {
     const char* ip = ip_;
-    // For position-independent executables, accessing global arrays can be
-    // slow.  Move wordmask array onto the stack to mitigate this.
-    uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)];
-    // Do not use memcpy to copy internal::wordmask to
-    // wordmask.  LLVM converts stack arrays to global arrays if it detects
-    // const stack arrays and this hurts the performance of position
-    // independent code. This change is temporary and can be reverted when
-    // https://reviews.llvm.org/D30759 is approved.
-    wordmask[0] = internal::wordmask[0];
-    wordmask[1] = internal::wordmask[1];
-    wordmask[2] = internal::wordmask[2];
-    wordmask[3] = internal::wordmask[3];
-    wordmask[4] = internal::wordmask[4];
-
+    ResetLimit(ip);
+    auto op = writer->GetOutputPtr();
     // We could have put this refill fragment only at the beginning of the loop.
     // However, duplicating it at the end of each branch gives the compiler more
     // scope to optimize the <ip_limit_ - ip> expression based on the local
     // context, which overall increases speed.
-    #define MAYBE_REFILL() \
-        if (ip_limit_ - ip < kMaximumTagLength) { \
-          ip_ = ip; \
-          if (!RefillTag()) return; \
-          ip = ip_; \
-        }
-
+#define MAYBE_REFILL()                                      \
+  if (SNAPPY_PREDICT_FALSE(ip >= ip_limit_min_maxtaglen_)) { \
+    ip_ = ip;                                               \
+    if (SNAPPY_PREDICT_FALSE(!RefillTag())) goto exit;       \
+    ip = ip_;                                               \
+    ResetLimit(ip);                                         \
+  }                                                         \
+  preload = static_cast<uint8_t>(*ip)
+
+    // At the start of the for loop below the least significant byte of preload
+    // contains the tag.
+    uint32_t preload;
     MAYBE_REFILL();
-    for ( ;; ) {
-      const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
+    for (;;) {
+      {
+        ptrdiff_t op_limit_min_slop;
+        auto op_base = writer->GetBase(&op_limit_min_slop);
+        if (op_base) {
+          auto res =
+              DecompressBranchless(reinterpret_cast<const uint8_t*>(ip),
+                                   reinterpret_cast<const uint8_t*>(ip_limit_),
+                                   op - op_base, op_base, op_limit_min_slop);
+          ip = reinterpret_cast<const char*>(res.first);
+          op = op_base + res.second;
+          MAYBE_REFILL();
+        }
+      }
+      const uint8_t c = static_cast<uint8_t>(preload);
+      ip++;
 
       // Ratio of iterations that have LITERAL vs non-LITERAL for different
       // inputs.
@@ -700,67 +1628,102 @@ class SnappyDecompressor {
       // txt[1-4]        25%        75%
       // pb              24%        76%
       // bin             24%        76%
-      if (PREDICT_FALSE((c & 0x3) == LITERAL)) {
+      if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
         size_t literal_length = (c >> 2) + 1u;
-        if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
+        if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length, &op)) {
           assert(literal_length < 61);
           ip += literal_length;
-          // NOTE(user): There is no MAYBE_REFILL() here, as TryFastAppend()
+          // NOTE: There is no MAYBE_REFILL() here, as TryFastAppend()
           // will not return true unless there's already at least five spare
           // bytes in addition to the literal.
+          preload = static_cast<uint8_t>(*ip);
           continue;
         }
-        if (PREDICT_FALSE(literal_length >= 61)) {
+        if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
           // Long literal.
           const size_t literal_length_length = literal_length - 60;
           literal_length =
-              (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
+              ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) +
+              1;
           ip += literal_length_length;
         }
 
         size_t avail = ip_limit_ - ip;
         while (avail < literal_length) {
-          if (!writer->Append(ip, avail)) return;
+          if (!writer->Append(ip, avail, &op)) goto exit;
           literal_length -= avail;
           reader_->Skip(peeked_);
           size_t n;
           ip = reader_->Peek(&n);
           avail = n;
           peeked_ = avail;
-          if (avail == 0) return;  // Premature end of input
+          if (avail == 0) goto exit;
           ip_limit_ = ip + avail;
+          ResetLimit(ip);
         }
-        if (!writer->Append(ip, literal_length)) {
-          return;
-        }
+        if (!writer->Append(ip, literal_length, &op)) goto exit;
         ip += literal_length;
         MAYBE_REFILL();
       } else {
-        const size_t entry = char_table[c];
-        const size_t trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
-        const size_t length = entry & 0xff;
-        ip += entry >> 11;
-
-        // copy_offset/256 is encoded in bits 8..10.  By just fetching
-        // those bits, we get copy_offset (since the bit-field starts at
-        // bit 8).
-        const size_t copy_offset = entry & 0x700;
-        if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
-          return;
+        if (SNAPPY_PREDICT_FALSE((c & 3) == COPY_4_BYTE_OFFSET)) {
+          const size_t copy_offset = LittleEndian::Load32(ip);
+          const size_t length = (c >> 2) + 1;
+          ip += 4;
+
+          if (!writer->AppendFromSelf(copy_offset, length, &op)) goto exit;
+        } else {
+          const ptrdiff_t entry = kLengthMinusOffset[c];
+          preload = LittleEndian::Load32(ip);
+          const uint32_t trailer = ExtractLowBytes(preload, c & 3);
+          const uint32_t length = entry & 0xff;
+          assert(length > 0);
+
+          // copy_offset/256 is encoded in bits 8..10.  By just fetching
+          // those bits, we get copy_offset (since the bit-field starts at
+          // bit 8).
+          const uint32_t copy_offset = trailer - entry + length;
+          if (!writer->AppendFromSelf(copy_offset, length, &op)) goto exit;
+
+          ip += (c & 3);
+          // By using the result of the previous load we reduce the critical
+          // dependency chain of ip to 4 cycles.
+          preload >>= (c & 3) * 8;
+          if (ip < ip_limit_min_maxtaglen_) continue;
         }
         MAYBE_REFILL();
       }
     }
-
 #undef MAYBE_REFILL
+  exit:
+    writer->SetOutputPtr(op);
   }
 };
 
+constexpr uint32_t CalculateNeeded(uint8_t tag) {
+  return ((tag & 3) == 0 && tag >= (60 * 4))
+             ? (tag >> 2) - 58
+             : (0x05030201 >> ((tag * 8) & 31)) & 0xFF;
+}
+
+#if __cplusplus >= 201402L
+constexpr bool VerifyCalculateNeeded() {
+  for (int i = 0; i < 1; i++) {
+    if (CalculateNeeded(i) != static_cast<uint32_t>((char_table[i] >> 11)) + 1)
+      return false;
+  }
+  return true;
+}
+
+// Make sure CalculateNeeded is correct by verifying it against the established
+// table encoding the number of added bytes needed.
+static_assert(VerifyCalculateNeeded(), "");
+#endif  // c++14
+
 bool SnappyDecompressor::RefillTag() {
   const char* ip = ip_;
   if (ip == ip_limit_) {
     // Fetch a new fragment from the reader
-    reader_->Skip(peeked_);   // All peeked bytes are used up
+    reader_->Skip(peeked_);  // All peeked bytes are used up
     size_t n;
     ip = reader_->Peek(&n);
     peeked_ = n;
@@ -772,26 +1735,31 @@ bool SnappyDecompressor::RefillTag() {
   // Read the tag character
   assert(ip < ip_limit_);
   const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
-  const uint32 entry = char_table[c];
-  const uint32 needed = (entry >> 11) + 1;  // +1 byte for 'c'
+  // At this point make sure that the data for the next tag is consecutive.
+  // For copy 1 this means the next 2 bytes (tag and 1 byte offset)
+  // For copy 2 the next 3 bytes (tag and 2 byte offset)
+  // For copy 4 the next 5 bytes (tag and 4 byte offset)
+  // For all small literals we only need 1 byte buf for literals 60...63 the
+  // length is encoded in 1...4 extra bytes.
+  const uint32_t needed = CalculateNeeded(c);
   assert(needed <= sizeof(scratch_));
 
   // Read more bytes from reader if needed
-  uint32 nbuf = ip_limit_ - ip;
+  uint64_t nbuf = ip_limit_ - ip;
   if (nbuf < needed) {
     // Stitch together bytes from ip and reader to form the word
     // contents.  We store the needed bytes in "scratch_".  They
     // will be consumed immediately by the caller since we do not
     // read more than we need.
-    memmove(scratch_, ip, nbuf);
+    std::memmove(scratch_, ip, nbuf);
     reader_->Skip(peeked_);  // All peeked bytes are used up
     peeked_ = 0;
     while (nbuf < needed) {
       size_t length;
       const char* src = reader_->Peek(&length);
       if (length == 0) return false;
-      uint32 to_add = std::min<uint32>(needed - nbuf, length);
-      memcpy(scratch_ + nbuf, src, to_add);
+      uint64_t to_add = std::min<uint64_t>(needed - nbuf, length);
+      std::memcpy(scratch_ + nbuf, src, to_add);
       nbuf += to_add;
       reader_->Skip(to_add);
     }
@@ -801,7 +1769,7 @@ bool SnappyDecompressor::RefillTag() {
   } else if (nbuf < kMaximumTagLength) {
     // Have enough bytes, but move into scratch_ so that we do not
     // read past end of input
-    memmove(scratch_, ip, nbuf);
+    std::memmove(scratch_, ip, nbuf);
     reader_->Skip(peeked_);  // All peeked bytes are used up
     peeked_ = 0;
     ip_ = scratch_;
@@ -817,7 +1785,7 @@ template <typename Writer>
 static bool InternalUncompress(Source* r, Writer* writer) {
   // Read the uncompressed length from the front of the compressed input
   SnappyDecompressor decompressor(r);
-  uint32 uncompressed_len = 0;
+  uint32_t uncompressed_len = 0;
   if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
 
   return InternalUncompressAllTags(&decompressor, writer, r->Available(),
@@ -826,10 +1794,10 @@ static bool InternalUncompress(Source* r, Writer* writer) {
 
 template <typename Writer>
 static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
-                                      Writer* writer,
-                                      uint32 compressed_len,
-                                      uint32 uncompressed_len) {
-  Report("snappy_uncompress", compressed_len, uncompressed_len);
+                                      Writer* writer, uint32_t compressed_len,
+                                      uint32_t uncompressed_len) {
+    int token = 0;
+  Report(token, "snappy_uncompress", compressed_len, uncompressed_len);
 
   writer->SetExpectedLength(uncompressed_len);
 
@@ -839,23 +1807,28 @@ static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
   return (decompressor->eof() && writer->CheckLength());
 }
 
-bool GetUncompressedLength(Source* source, uint32* result) {
+bool GetUncompressedLength(Source* source, uint32_t* result) {
   SnappyDecompressor decompressor(source);
   return decompressor.ReadUncompressedLength(result);
 }
 
 size_t Compress(Source* reader, Sink* writer) {
+  return Compress(reader, writer, CompressionOptions{});
+}
+
+size_t Compress(Source* reader, Sink* writer, CompressionOptions options) {
+  assert(options.level == 1 || options.level == 2);
+  int token = 0;
   size_t written = 0;
   size_t N = reader->Available();
+  assert(N <= 0xFFFFFFFFu);
   const size_t uncompressed_size = N;
   char ulength[Varint::kMax32];
   char* p = Varint::Encode32(ulength, N);
-  writer->Append(ulength, p-ulength);
+  writer->Append(ulength, p - ulength);
   written += (p - ulength);
 
-  internal::WorkingMemory wmem;
-  char* scratch = NULL;
-  char* scratch_output = NULL;
+  internal::WorkingMemory wmem(N);
 
   while (N > 0) {
     // Get next block to compress (without copying if possible)
@@ -871,20 +1844,14 @@ size_t Compress(Source* reader, Sink* writer) {
       pending_advance = num_to_read;
       fragment_size = num_to_read;
     } else {
-      // Read into scratch buffer
-      if (scratch == NULL) {
-        // If this is the last iteration, we want to allocate N bytes
-        // of space, otherwise the max possible kBlockSize space.
-        // num_to_read contains exactly the correct value
-        scratch = new char[num_to_read];
-      }
-      memcpy(scratch, fragment, bytes_read);
+      char* scratch = wmem.GetScratchInput();
+      std::memcpy(scratch, fragment, bytes_read);
       reader->Skip(bytes_read);
 
       while (bytes_read < num_to_read) {
         fragment = reader->Peek(&fragment_size);
         size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
-        memcpy(scratch + bytes_read, fragment, n);
+        std::memcpy(scratch + bytes_read, fragment, n);
         bytes_read += n;
         reader->Skip(n);
       }
@@ -896,23 +1863,26 @@ size_t Compress(Source* reader, Sink* writer) {
 
     // Get encoding table for compression
     int table_size;
-    uint16* table = wmem.GetHashTable(num_to_read, &table_size);
+    uint16_t* table = wmem.GetHashTable(num_to_read, &table_size);
 
     // Compress input_fragment and append to dest
-    const int max_output = MaxCompressedLength(num_to_read);
+    int max_output = MaxCompressedLength(num_to_read);
 
+    // Since we encode kBlockSize regions followed by a region
+    // which is <= kBlockSize in length, a previously allocated
+    // scratch_output[] region is big enough for this iteration.
     // Need a scratch buffer for the output, in case the byte sink doesn't
     // have room for us directly.
-    if (scratch_output == NULL) {
-      scratch_output = new char[max_output];
-    } else {
-      // Since we encode kBlockSize regions followed by a region
-      // which is <= kBlockSize in length, a previously allocated
-      // scratch_output[] region is big enough for this iteration.
+    char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput());
+    char* end = nullptr;
+    if (options.level == 1) {
+      end = internal::CompressFragment(fragment, fragment_size, dest, table,
+                                       table_size);
+    } else if (options.level == 2) {
+      end = internal::CompressFragmentDoubleHash(
+          fragment, fragment_size, dest, table, table_size >> 1,
+          table + (table_size >> 1), table_size >> 1);
     }
-    char* dest = writer->GetAppendBuffer(max_output, scratch_output);
-    char* end = internal::CompressFragment(fragment, fragment_size,
-                                           dest, table, table_size);
     writer->Append(dest, end - dest);
     written += (end - dest);
 
@@ -920,11 +1890,7 @@ size_t Compress(Source* reader, Sink* writer) {
     reader->Skip(pending_advance);
   }
 
-  Report("snappy_compress", written, uncompressed_size);
-
-  delete[] scratch;
-  delete[] scratch_output;
-
+  Report(token, "snappy_compress", written, uncompressed_size);
   return written;
 }
 
@@ -932,19 +1898,88 @@ size_t Compress(Source* reader, Sink* writer) {
 // IOVec interfaces
 // -----------------------------------------------------------------------
 
+// A `Source` implementation that yields the contents of an `iovec` array. Note
+// that `total_size` is the total number of bytes to be read from the elements
+// of `iov` (_not_ the total number of elements in `iov`).
+class SnappyIOVecReader : public Source {
+ public:
+  SnappyIOVecReader(const struct iovec* iov, size_t total_size)
+      : curr_iov_(iov),
+        curr_pos_(total_size > 0 ? reinterpret_cast<const char*>(iov->iov_base)
+                                 : nullptr),
+        curr_size_remaining_(total_size > 0 ? iov->iov_len : 0),
+        total_size_remaining_(total_size) {
+    // Skip empty leading `iovec`s.
+    if (total_size > 0 && curr_size_remaining_ == 0) Advance();
+  }
+
+  ~SnappyIOVecReader() override = default;
+
+  size_t Available() const override { return total_size_remaining_; }
+
+  const char* Peek(size_t* len) override {
+    *len = curr_size_remaining_;
+    return curr_pos_;
+  }
+
+  void Skip(size_t n) override {
+    while (n >= curr_size_remaining_ && n > 0) {
+      n -= curr_size_remaining_;
+      Advance();
+    }
+    curr_size_remaining_ -= n;
+    total_size_remaining_ -= n;
+    curr_pos_ += n;
+  }
+
+ private:
+  // Advances to the next nonempty `iovec` and updates related variables.
+  void Advance() {
+    do {
+      assert(total_size_remaining_ >= curr_size_remaining_);
+      total_size_remaining_ -= curr_size_remaining_;
+      if (total_size_remaining_ == 0) {
+        curr_pos_ = nullptr;
+        curr_size_remaining_ = 0;
+        return;
+      }
+      ++curr_iov_;
+      curr_pos_ = reinterpret_cast<const char*>(curr_iov_->iov_base);
+      curr_size_remaining_ = curr_iov_->iov_len;
+    } while (curr_size_remaining_ == 0);
+  }
+
+  // The `iovec` currently being read.
+  const struct iovec* curr_iov_;
+  // The location in `curr_iov_` currently being read.
+  const char* curr_pos_;
+  // The amount of unread data in `curr_iov_`.
+  size_t curr_size_remaining_;
+  // The amount of unread data in the entire input array.
+  size_t total_size_remaining_;
+};
+
 // A type that writes to an iovec.
 // Note that this is not a "ByteSink", but a type that matches the
 // Writer template argument to SnappyDecompressor::DecompressAllTags().
 class SnappyIOVecWriter {
  private:
+  // output_iov_end_ is set to iov + count and used to determine when
+  // the end of the iovs is reached.
+  const struct iovec* output_iov_end_;
+
+#if !defined(NDEBUG)
   const struct iovec* output_iov_;
-  const size_t output_iov_count_;
+#endif  // !defined(NDEBUG)
+
+  // Current iov that is being written into.
+  const struct iovec* curr_iov_;
 
-  // We are currently writing into output_iov_[curr_iov_index_].
-  size_t curr_iov_index_;
+  // Pointer to current iov's write location.
+  char* curr_iov_output_;
 
-  // Bytes written to output_iov_[curr_iov_index_] so far.
-  size_t curr_iov_written_;
+  // Remaining bytes to write into curr_iov_output.
+  size_t curr_iov_remaining_;
 
   // Total bytes decompressed into output_iov_ so far.
   size_t total_written_;
@@ -952,53 +1987,61 @@ class SnappyIOVecWriter {
   // Maximum number of bytes that will be decompressed into output_iov_.
   size_t output_limit_;
 
-  inline char* GetIOVecPointer(size_t index, size_t offset) {
-    return reinterpret_cast<char*>(output_iov_[index].iov_base) +
-        offset;
+  static inline char* GetIOVecPointer(const struct iovec* iov, size_t offset) {
+    return reinterpret_cast<char*>(iov->iov_base) + offset;
   }
 
  public:
   // Does not take ownership of iov. iov must be valid during the
   // entire lifetime of the SnappyIOVecWriter.
   inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count)
-      : output_iov_(iov),
-        output_iov_count_(iov_count),
-        curr_iov_index_(0),
-        curr_iov_written_(0),
+      : output_iov_end_(iov + iov_count),
+#if !defined(NDEBUG)
+        output_iov_(iov),
+#endif  // !defined(NDEBUG)
+        curr_iov_(iov),
+        curr_iov_output_(iov_count ? reinterpret_cast<char*>(iov->iov_base)
+                                   : nullptr),
+        curr_iov_remaining_(iov_count ? iov->iov_len : 0),
         total_written_(0),
         output_limit_(-1) {
   }
 
-  inline void SetExpectedLength(size_t len) {
-    output_limit_ = len;
-  }
+  inline void SetExpectedLength(size_t len) { output_limit_ = len; }
 
-  inline bool CheckLength() const {
-    return total_written_ == output_limit_;
-  }
+  inline bool CheckLength() const { return total_written_ == output_limit_; }
 
-  inline bool Append(const char* ip, size_t len) {
+  inline bool Append(const char* ip, size_t len, char**) {
     if (total_written_ + len > output_limit_) {
       return false;
     }
 
+    return AppendNoCheck(ip, len);
+  }
+
+  char* GetOutputPtr() { return nullptr; }
+  char* GetBase(ptrdiff_t*) { return nullptr; }
+  void SetOutputPtr(char* op) {
+    // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+    (void)op;
+  }
+
+  inline bool AppendNoCheck(const char* ip, size_t len) {
     while (len > 0) {
-      assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
-      if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) {
+      if (curr_iov_remaining_ == 0) {
         // This iovec is full. Go to the next one.
-        if (curr_iov_index_ + 1 >= output_iov_count_) {
+        if (curr_iov_ + 1 >= output_iov_end_) {
           return false;
         }
-        curr_iov_written_ = 0;
-        ++curr_iov_index_;
+        ++curr_iov_;
+        curr_iov_output_ = reinterpret_cast<char*>(curr_iov_->iov_base);
+        curr_iov_remaining_ = curr_iov_->iov_len;
       }
 
-      const size_t to_write = std::min(
-          len, output_iov_[curr_iov_index_].iov_len - curr_iov_written_);
-      memcpy(GetIOVecPointer(curr_iov_index_, curr_iov_written_),
-             ip,
-             to_write);
-      curr_iov_written_ += to_write;
+      const size_t to_write = std::min(len, curr_iov_remaining_);
+      std::memcpy(curr_iov_output_, ip, to_write);
+      curr_iov_output_ += to_write;
+      curr_iov_remaining_ -= to_write;
       total_written_ += to_write;
       ip += to_write;
       len -= to_write;
@@ -1007,14 +2050,15 @@ class SnappyIOVecWriter {
     return true;
   }
 
-  inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
+  inline bool TryFastAppend(const char* ip, size_t available, size_t len,
+                            char**) {
     const size_t space_left = output_limit_ - total_written_;
     if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 &&
-        output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
+        curr_iov_remaining_ >= 16) {
       // Fast path, used for the majority (about 95%) of invocations.
-      char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
-      UnalignedCopy128(ip, ptr);
-      curr_iov_written_ += len;
+      UnalignedCopy128(ip, curr_iov_output_);
+      curr_iov_output_ += len;
+      curr_iov_remaining_ -= len;
       total_written_ += len;
       return true;
     }
@@ -1022,8 +2066,10 @@ class SnappyIOVecWriter {
     return false;
   }
 
-  inline bool AppendFromSelf(size_t offset, size_t len) {
-    if (offset > total_written_ || offset == 0) {
+  inline bool AppendFromSelf(size_t offset, size_t len, char**) {
+    // See SnappyArrayWriter::AppendFromSelf for an explanation of
+    // the "offset - 1u" trick.
+    if (offset - 1u >= total_written_) {
       return false;
     }
     const size_t space_left = output_limit_ - total_written_;
@@ -1032,8 +2078,8 @@ class SnappyIOVecWriter {
     }
 
     // Locate the iovec from which we need to start the copy.
-    size_t from_iov_index = curr_iov_index_;
-    size_t from_iov_offset = curr_iov_written_;
+    const iovec* from_iov = curr_iov_;
+    size_t from_iov_offset = curr_iov_->iov_len - curr_iov_remaining_;
     while (offset > 0) {
       if (from_iov_offset >= offset) {
         from_iov_offset -= offset;
@@ -1041,47 +2087,48 @@ class SnappyIOVecWriter {
       }
 
       offset -= from_iov_offset;
-      assert(from_iov_index > 0);
-      --from_iov_index;
-      from_iov_offset = output_iov_[from_iov_index].iov_len;
+      --from_iov;
+#if !defined(NDEBUG)
+      assert(from_iov >= output_iov_);
+#endif  // !defined(NDEBUG)
+      from_iov_offset = from_iov->iov_len;
     }
 
     // Copy <len> bytes starting from the iovec pointed to by from_iov_index to
     // the current iovec.
     while (len > 0) {
-      assert(from_iov_index <= curr_iov_index_);
-      if (from_iov_index != curr_iov_index_) {
-        const size_t to_copy = std::min(
-            output_iov_[from_iov_index].iov_len - from_iov_offset,
-            len);
-        Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy);
+      assert(from_iov <= curr_iov_);
+      if (from_iov != curr_iov_) {
+        const size_t to_copy =
+            std::min(from_iov->iov_len - from_iov_offset, len);
+        AppendNoCheck(GetIOVecPointer(from_iov, from_iov_offset), to_copy);
         len -= to_copy;
         if (len > 0) {
-          ++from_iov_index;
+          ++from_iov;
           from_iov_offset = 0;
         }
       } else {
-        assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
-        size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len -
-                                      curr_iov_written_,
-                                  len);
+        size_t to_copy = curr_iov_remaining_;
         if (to_copy == 0) {
           // This iovec is full. Go to the next one.
-          if (curr_iov_index_ + 1 >= output_iov_count_) {
+          if (curr_iov_ + 1 >= output_iov_end_) {
             return false;
           }
-          ++curr_iov_index_;
-          curr_iov_written_ = 0;
+          ++curr_iov_;
+          curr_iov_output_ = reinterpret_cast<char*>(curr_iov_->iov_base);
+          curr_iov_remaining_ = curr_iov_->iov_len;
           continue;
         }
         if (to_copy > len) {
           to_copy = len;
         }
-        IncrementalCopySlow(
-            GetIOVecPointer(from_iov_index, from_iov_offset),
-            GetIOVecPointer(curr_iov_index_, curr_iov_written_),
-            GetIOVecPointer(curr_iov_index_, curr_iov_written_) + to_copy);
-        curr_iov_written_ += to_copy;
+        assert(to_copy > 0);
+
+        IncrementalCopy(GetIOVecPointer(from_iov, from_iov_offset),
+                        curr_iov_output_, curr_iov_output_ + to_copy,
+                        curr_iov_output_ + curr_iov_remaining_);
+        curr_iov_output_ += to_copy;
+        curr_iov_remaining_ -= to_copy;
         from_iov_offset += to_copy;
         total_written_ += to_copy;
         len -= to_copy;
@@ -1118,59 +2165,74 @@ class SnappyArrayWriter {
   char* base_;
   char* op_;
   char* op_limit_;
+  // If op < op_limit_min_slop_ then it's safe to unconditionally write
+  // kSlopBytes starting at op.
+  char* op_limit_min_slop_;
 
  public:
   inline explicit SnappyArrayWriter(char* dst)
       : base_(dst),
         op_(dst),
-        op_limit_(dst) {
-  }
+        op_limit_(dst),
+        op_limit_min_slop_(dst) {}  // Safe default see invariant.
 
   inline void SetExpectedLength(size_t len) {
     op_limit_ = op_ + len;
+    // Prevent pointer from being past the buffer.
+    op_limit_min_slop_ = op_limit_ - std::min<size_t>(kSlopBytes - 1, len);
   }
 
-  inline bool CheckLength() const {
-    return op_ == op_limit_;
+  inline bool CheckLength() const { return op_ == op_limit_; }
+
+  char* GetOutputPtr() { return op_; }
+  char* GetBase(ptrdiff_t* op_limit_min_slop) {
+    *op_limit_min_slop = op_limit_min_slop_ - base_;
+    return base_;
   }
+  void SetOutputPtr(char* op) { op_ = op; }
 
-  inline bool Append(const char* ip, size_t len) {
-    char* op = op_;
+  inline bool Append(const char* ip, size_t len, char** op_p) {
+    char* op = *op_p;
     const size_t space_left = op_limit_ - op;
-    if (space_left < len) {
-      return false;
-    }
-    memcpy(op, ip, len);
-    op_ = op + len;
+    if (space_left < len) return false;
+    std::memcpy(op, ip, len);
+    *op_p = op + len;
     return true;
   }
 
-  inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
-    char* op = op_;
+  inline bool TryFastAppend(const char* ip, size_t available, size_t len,
+                            char** op_p) {
+    char* op = *op_p;
     const size_t space_left = op_limit_ - op;
     if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
       // Fast path, used for the majority (about 95%) of invocations.
       UnalignedCopy128(ip, op);
-      op_ = op + len;
+      *op_p = op + len;
       return true;
     } else {
       return false;
     }
   }
 
-  inline bool AppendFromSelf(size_t offset, size_t len) {
-    char* const op_end = op_ + len;
+  SNAPPY_ATTRIBUTE_ALWAYS_INLINE
+  inline bool AppendFromSelf(size_t offset, size_t len, char** op_p) {
+    assert(len > 0);
+    char* const op = *op_p;
+    assert(op >= base_);
+    char* const op_end = op + len;
 
     // Check if we try to append from before the start of the buffer.
-    // Normally this would just be a check for "produced < offset",
-    // but "produced <= offset - 1u" is equivalent for every case
-    // except the one where offset==0, where the right side will wrap around
-    // to a very big number. This is convenient, as offset==0 is another
-    // invalid case that we also want to catch, so that we do not go
-    // into an infinite loop.
-    if (Produced() <= offset - 1u || op_end > op_limit_) return false;
-    op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_);
+    if (SNAPPY_PREDICT_FALSE(static_cast<size_t>(op - base_) < offset))
+      return false;
 
+    if (SNAPPY_PREDICT_FALSE((kSlopBytes < 64 && len > kSlopBytes) ||
+                            op >= op_limit_min_slop_ || offset < len)) {
+      if (op_end > op_limit_ || offset == 0) return false;
+      *op_p = IncrementalCopy(op - offset, op, op_end, op_limit_);
+      return true;
+    }
+    std::memmove(op, op - offset, kSlopBytes);
+    *op_p = op_end;
     return true;
   }
   inline size_t Produced() const {
@@ -1180,8 +2242,9 @@ class SnappyArrayWriter {
   inline void Flush() {}
 };
 
-bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
-  ByteArraySource reader(compressed, n);
+bool RawUncompress(const char* compressed, size_t compressed_length,
+                   char* uncompressed) {
+  ByteArraySource reader(compressed, compressed_length);
   return RawUncompress(&reader, uncompressed);
 }
 
@@ -1190,9 +2253,10 @@ bool RawUncompress(Source* compressed, char* uncompressed) {
   return InternalUncompress(compressed, &output);
 }
 
-bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
+bool Uncompress(const char* compressed, size_t compressed_length,
+                std::string* uncompressed) {
   size_t ulength;
-  if (!GetUncompressedLength(compressed, n, &ulength)) {
+  if (!GetUncompressedLength(compressed, compressed_length, &ulength)) {
     return false;
   }
   // On 32-bit builds: max_size() < kuint32max.  Check for that instead
@@ -1201,7 +2265,8 @@ bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
     return false;
   }
   STLStringResizeUninitialized(uncompressed, ulength);
-  return RawUncompress(compressed, n, string_as_array(uncompressed));
+  return RawUncompress(compressed, compressed_length,
+                       string_as_array(uncompressed));
 }
 
 // A Writer that drops everything on the floor and just does validation
@@ -1211,32 +2276,44 @@ class SnappyDecompressionValidator {
   size_t produced_;
 
  public:
-  inline SnappyDecompressionValidator() : expected_(0), produced_(0) { }
-  inline void SetExpectedLength(size_t len) {
-    expected_ = len;
-  }
-  inline bool CheckLength() const {
-    return expected_ == produced_;
+  inline SnappyDecompressionValidator() : expected_(0), produced_(0) {}
+  inline void SetExpectedLength(size_t len) { expected_ = len; }
+  size_t GetOutputPtr() { return produced_; }
+  size_t GetBase(ptrdiff_t* op_limit_min_slop) {
+    *op_limit_min_slop = std::numeric_limits<ptrdiff_t>::max() - kSlopBytes + 1;
+    return 1;
   }
-  inline bool Append(const char* ip, size_t len) {
-    produced_ += len;
-    return produced_ <= expected_;
+  void SetOutputPtr(size_t op) { produced_ = op; }
+  inline bool CheckLength() const { return expected_ == produced_; }
+  inline bool Append(const char* ip, size_t len, size_t* produced) {
+    // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+    (void)ip;
+
+    *produced += len;
+    return *produced <= expected_;
   }
-  inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
+  inline bool TryFastAppend(const char* ip, size_t available, size_t length,
+                            size_t* produced) {
+    // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+    (void)ip;
+    (void)available;
+    (void)length;
+    (void)produced;
+
     return false;
   }
-  inline bool AppendFromSelf(size_t offset, size_t len) {
+  inline bool AppendFromSelf(size_t offset, size_t len, size_t* produced) {
     // See SnappyArrayWriter::AppendFromSelf for an explanation of
     // the "offset - 1u" trick.
-    if (produced_ <= offset - 1u) return false;
-    produced_ += len;
-    return produced_ <= expected_;
+    if (*produced <= offset - 1u) return false;
+    *produced += len;
+    return *produced <= expected_;
   }
   inline void Flush() {}
 };
 
-bool IsValidCompressedBuffer(const char* compressed, size_t n) {
-  ByteArraySource reader(compressed, n);
+bool IsValidCompressedBuffer(const char* compressed, size_t compressed_length) {
+  ByteArraySource reader(compressed, compressed_length);
   SnappyDecompressionValidator writer;
   return InternalUncompress(&reader, &writer);
 }
@@ -1246,26 +2323,77 @@ bool IsValidCompressed(Source* compressed) {
   return InternalUncompress(compressed, &writer);
 }
 
-void RawCompress(const char* input,
-                 size_t input_length,
-                 char* compressed,
+void RawCompress(const char* input, size_t input_length, char* compressed,
                  size_t* compressed_length) {
+  RawCompress(input, input_length, compressed, compressed_length,
+              CompressionOptions{});
+}
+
+void RawCompress(const char* input, size_t input_length, char* compressed,
+                 size_t* compressed_length, CompressionOptions options) {
   ByteArraySource reader(input, input_length);
   UncheckedByteArraySink writer(compressed);
-  Compress(&reader, &writer);
+  Compress(&reader, &writer, options);
 
   // Compute how many bytes were added
   *compressed_length = (writer.CurrentDestination() - compressed);
 }
 
-size_t Compress(const char* input, size_t input_length, string* compressed) {
+void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
+                          char* compressed, size_t* compressed_length) {
+  RawCompressFromIOVec(iov, uncompressed_length, compressed, compressed_length,
+                       CompressionOptions{});
+}
+
+void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
+                          char* compressed, size_t* compressed_length,
+                          CompressionOptions options) {
+  SnappyIOVecReader reader(iov, uncompressed_length);
+  UncheckedByteArraySink writer(compressed);
+  Compress(&reader, &writer, options);
+
+  // Compute how many bytes were added.
+  *compressed_length = writer.CurrentDestination() - compressed;
+}
+
+size_t Compress(const char* input, size_t input_length,
+                std::string* compressed) {
+  return Compress(input, input_length, compressed, CompressionOptions{});
+}
+
+size_t Compress(const char* input, size_t input_length, std::string* compressed,
+                CompressionOptions options) {
   // Pre-grow the buffer to the max length of the compressed output
   STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
 
   size_t compressed_length;
   RawCompress(input, input_length, string_as_array(compressed),
-              &compressed_length);
-  compressed->resize(compressed_length);
+              &compressed_length, options);
+  compressed->erase(compressed_length);
+  return compressed_length;
+}
+
+size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
+                         std::string* compressed) {
+  return CompressFromIOVec(iov, iov_cnt, compressed, CompressionOptions{});
+}
+
+size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
+                         std::string* compressed, CompressionOptions options) {
+  // Compute the number of bytes to be compressed.
+  size_t uncompressed_length = 0;
+  for (size_t i = 0; i < iov_cnt; ++i) {
+    uncompressed_length += iov[i].iov_len;
+  }
+
+  // Pre-grow the buffer to the max length of the compressed output.
+  STLStringResizeUninitialized(compressed, MaxCompressedLength(
+      uncompressed_length));
+
+  size_t compressed_length;
+  RawCompressFromIOVec(iov, uncompressed_length, string_as_array(compressed),
+                       &compressed_length, options);
+  compressed->erase(compressed_length);
   return compressed_length;
 }
 
@@ -1290,13 +2418,14 @@ class SnappyScatteredWriter {
   size_t full_size_;
 
   // Pointer into current output block
-  char* op_base_;       // Base of output block
-  char* op_ptr_;        // Pointer to next unfilled byte in block
-  char* op_limit_;      // Pointer just past block
+  char* op_base_;   // Base of output block
+  char* op_ptr_;    // Pointer to next unfilled byte in block
+  char* op_limit_;  // Pointer just past block
+  // If op < op_limit_min_slop_ then it's safe to unconditionally write
+  // kSlopBytes starting at op.
+  char* op_limit_min_slop_;
 
-  inline size_t Size() const {
-    return full_size_ + (op_ptr_ - op_base_);
-  }
+  inline size_t Size() const { return full_size_ + (op_ptr_ - op_base_); }
 
   bool SlowAppend(const char* ip, size_t len);
   bool SlowAppendFromSelf(size_t offset, size_t len);
@@ -1307,59 +2436,79 @@ class SnappyScatteredWriter {
         full_size_(0),
         op_base_(NULL),
         op_ptr_(NULL),
-        op_limit_(NULL) {
+        op_limit_(NULL),
+        op_limit_min_slop_(NULL) {}
+  char* GetOutputPtr() { return op_ptr_; }
+  char* GetBase(ptrdiff_t* op_limit_min_slop) {
+    *op_limit_min_slop = op_limit_min_slop_ - op_base_;
+    return op_base_;
   }
+  void SetOutputPtr(char* op) { op_ptr_ = op; }
 
   inline void SetExpectedLength(size_t len) {
     assert(blocks_.empty());
     expected_ = len;
   }
 
-  inline bool CheckLength() const {
-    return Size() == expected_;
-  }
+  inline bool CheckLength() const { return Size() == expected_; }
 
   // Return the number of bytes actually uncompressed so far
-  inline size_t Produced() const {
-    return Size();
-  }
+  inline size_t Produced() const { return Size(); }
 
-  inline bool Append(const char* ip, size_t len) {
-    size_t avail = op_limit_ - op_ptr_;
+  inline bool Append(const char* ip, size_t len, char** op_p) {
+    char* op = *op_p;
+    size_t avail = op_limit_ - op;
     if (len <= avail) {
       // Fast path
-      memcpy(op_ptr_, ip, len);
-      op_ptr_ += len;
+      std::memcpy(op, ip, len);
+      *op_p = op + len;
       return true;
     } else {
-      return SlowAppend(ip, len);
+      op_ptr_ = op;
+      bool res = SlowAppend(ip, len);
+      *op_p = op_ptr_;
+      return res;
     }
   }
 
-  inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
-    char* op = op_ptr_;
+  inline bool TryFastAppend(const char* ip, size_t available, size_t length,
+                            char** op_p) {
+    char* op = *op_p;
     const int space_left = op_limit_ - op;
     if (length <= 16 && available >= 16 + kMaximumTagLength &&
         space_left >= 16) {
       // Fast path, used for the majority (about 95%) of invocations.
       UnalignedCopy128(ip, op);
-      op_ptr_ = op + length;
+      *op_p = op + length;
       return true;
     } else {
       return false;
     }
   }
 
-  inline bool AppendFromSelf(size_t offset, size_t len) {
-    char* const op_end = op_ptr_ + len;
-    // See SnappyArrayWriter::AppendFromSelf for an explanation of
-    // the "offset - 1u" trick.
-    if (PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ && op_end <= op_limit_)) {
-      // Fast path: src and dst in current block.
-      op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
+  inline bool AppendFromSelf(size_t offset, size_t len, char** op_p) {
+    char* op = *op_p;
+    assert(op >= op_base_);
+    // Check if we try to append from before the start of the buffer.
+    if (SNAPPY_PREDICT_FALSE((kSlopBytes < 64 && len > kSlopBytes) ||
+                            static_cast<size_t>(op - op_base_) < offset ||
+                            op >= op_limit_min_slop_ || offset < len)) {
+      if (offset == 0) return false;
+      if (SNAPPY_PREDICT_FALSE(static_cast<size_t>(op - op_base_) < offset ||
+                              op + len > op_limit_)) {
+        op_ptr_ = op;
+        bool res = SlowAppendFromSelf(offset, len);
+        *op_p = op_ptr_;
+        return res;
+      }
+      *op_p = IncrementalCopy(op - offset, op, op + len, op_limit_);
       return true;
     }
-    return SlowAppendFromSelf(offset, len);
+    // Fast path
+    char* const op_end = op + len;
+    std::memmove(op, op - offset, kSlopBytes);
+    *op_p = op_end;
+    return true;
   }
 
   // Called at the end of the decompress. We ask the allocator
@@ -1367,12 +2516,12 @@ class SnappyScatteredWriter {
   inline void Flush() { allocator_.Flush(Produced()); }
 };
 
-template<typename Allocator>
+template <typename Allocator>
 bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
   size_t avail = op_limit_ - op_ptr_;
   while (len > avail) {
     // Completely fill this block
-    memcpy(op_ptr_, ip, avail);
+    std::memcpy(op_ptr_, ip, avail);
     op_ptr_ += avail;
     assert(op_limit_ - op_ptr_ == 0);
     full_size_ += (op_ptr_ - op_base_);
@@ -1380,25 +2529,25 @@ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
     ip += avail;
 
     // Bounds check
-    if (full_size_ + len > expected_) {
-      return false;
-    }
+    if (full_size_ + len > expected_) return false;
 
     // Make new block
     size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
     op_base_ = allocator_.Allocate(bsize);
     op_ptr_ = op_base_;
     op_limit_ = op_base_ + bsize;
+    op_limit_min_slop_ = op_limit_ - std::min<size_t>(kSlopBytes - 1, bsize);
+
     blocks_.push_back(op_base_);
     avail = bsize;
   }
 
-  memcpy(op_ptr_, ip, len);
+  std::memcpy(op_ptr_, ip, len);
   op_ptr_ += len;
   return true;
 }
 
-template<typename Allocator>
+template <typename Allocator>
 bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
                                                          size_t len) {
   // Overflow check
@@ -1413,19 +2562,26 @@ bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
   // nice if we do not rely on that, since we can get better compression if we
   // allow cross-block copies and thus might want to change the compressor in
   // the future.
+  // TODO Replace this with a properly optimized path. This is not
+  // triggered right now. But this is so super slow, that it would regress
+  // performance unacceptably if triggered.
   size_t src = cur - offset;
+  char* op = op_ptr_;
   while (len-- > 0) {
-    char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)];
-    Append(&c, 1);
+    char c = blocks_[src >> kBlockLog][src & (kBlockSize - 1)];
+    if (!Append(&c, 1, &op)) {
+      op_ptr_ = op;
+      return false;
+    }
     src++;
   }
+  op_ptr_ = op;
   return true;
 }
 
 class SnappySinkAllocator {
  public:
-  explicit SnappySinkAllocator(Sink* dest): dest_(dest) {}
-  ~SnappySinkAllocator() {}
+  explicit SnappySinkAllocator(Sink* dest) : dest_(dest) {}
 
   char* Allocate(int size) {
     Datablock block(new char[size], size);
@@ -1440,10 +2596,9 @@ class SnappySinkAllocator {
   // to the blocks.
   void Flush(size_t size) {
     size_t size_written = 0;
-    size_t block_size;
-    for (int i = 0; i < blocks_.size(); ++i) {
-      block_size = std::min<size_t>(blocks_[i].size, size - size_written);
-      dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
+    for (Datablock& block : blocks_) {
+      size_t block_size = std::min<size_t>(block.size, size - size_written);
+      dest_->AppendAndTakeOwnership(block.data, block_size,
                                     &SnappySinkAllocator::Deleter, NULL);
       size_written += block_size;
     }
@@ -1458,6 +2613,10 @@ class SnappySinkAllocator {
   };
 
   static void Deleter(void* arg, const char* bytes, size_t size) {
+    // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+    (void)arg;
+    (void)size;
+
     delete[] bytes;
   }
 
@@ -1477,15 +2636,15 @@ size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) {
 bool Uncompress(Source* compressed, Sink* uncompressed) {
   // Read the uncompressed length from the front of the compressed input
   SnappyDecompressor decompressor(compressed);
-  uint32 uncompressed_len = 0;
+  uint32_t uncompressed_len = 0;
   if (!decompressor.ReadUncompressedLength(&uncompressed_len)) {
     return false;
   }
 
   char c;
   size_t allocated_size;
-  char* buf = uncompressed->GetAppendBufferVariable(
-      1, uncompressed_len, &c, 1, &allocated_size);
+  char* buf = uncompressed->GetAppendBufferVariable(1, uncompressed_len, &c, 1,
+                                                    &allocated_size);
 
   const size_t compressed_len = compressed->Available();
   // If we can get a flat buffer, then use it, otherwise do block by block
@@ -1504,4 +2663,4 @@ bool Uncompress(Source* compressed, Sink* uncompressed) {
   }
 }
 
-} // end namespace snappy
+}  // namespace snappy
diff --git a/snappy.h b/snappy.h
index 4568db8..2f1b802 100644
--- a/snappy.h
+++ b/snappy.h
@@ -40,6 +40,8 @@
 #define THIRD_PARTY_SNAPPY_SNAPPY_H__
 
 #include <stddef.h>
+#include <stdint.h>
+
 #include <string>
 
 #include "snappy-stubs-public.h"
@@ -48,13 +50,38 @@ namespace snappy {
   class Source;
   class Sink;
 
+  struct CompressionOptions {
+    // Compression level.
+    // Level 1 is the fastest
+    // Level 2 is a little slower but provides better compression. Level 2 is
+    // **EXPERIMENTAL** for the time being. It might happen that we decide to
+    // fall back to level 1 in the future.
+    // Levels 3+ are currently not supported. We plan to support levels up to
+    // 9 in the future.
+    // If you played with other compression algorithms, level 1 is equivalent to
+    // fast mode (level 1) of LZ4, level 2 is equivalent to LZ4's level 2 mode
+    // and compresses somewhere around zstd:-3 and zstd:-2 but generally with
+    // faster decompression speeds than snappy:1 and zstd:-3.
+    int level = DefaultCompressionLevel();
+
+    constexpr CompressionOptions() = default;
+    constexpr CompressionOptions(int compression_level)
+        : level(compression_level) {}
+    static constexpr int MinCompressionLevel() { return 1; }
+    static constexpr int MaxCompressionLevel() { return 2; }
+    static constexpr int DefaultCompressionLevel() { return 1; }
+  };
+
   // ------------------------------------------------------------------------
   // Generic compression/decompression routines.
   // ------------------------------------------------------------------------
 
-  // Compress the bytes read from "*source" and append to "*sink". Return the
+  // Compress the bytes read from "*reader" and append to "*writer". Return the
   // number of bytes written.
-  size_t Compress(Source* source, Sink* sink);
+  // First version is to preserve ABI.
+  size_t Compress(Source* reader, Sink* writer);
+  size_t Compress(Source* reader, Sink* writer,
+                  CompressionOptions options);
 
   // Find the uncompressed length of the given stream, as given by the header.
   // Note that the true length could deviate from this; the stream could e.g.
@@ -63,26 +90,41 @@ namespace snappy {
   // Also note that this leaves "*source" in a state that is unsuitable for
   // further operations, such as RawUncompress(). You will need to rewind
   // or recreate the source yourself before attempting any further calls.
-  bool GetUncompressedLength(Source* source, uint32* result);
+  bool GetUncompressedLength(Source* source, uint32_t* result);
 
   // ------------------------------------------------------------------------
   // Higher-level string based routines (should be sufficient for most users)
   // ------------------------------------------------------------------------
 
-  // Sets "*output" to the compressed version of "input[0,input_length-1]".
-  // Original contents of *output are lost.
+  // Sets "*compressed" to the compressed version of "input[0..input_length-1]".
+  // Original contents of *compressed are lost.
   //
-  // REQUIRES: "input[]" is not an alias of "*output".
-  size_t Compress(const char* input, size_t input_length, string* output);
+  // REQUIRES: "input[]" is not an alias of "*compressed".
+  // First version is to preserve ABI.
+  size_t Compress(const char* input, size_t input_length,
+                  std::string* compressed);
+  size_t Compress(const char* input, size_t input_length,
+                  std::string* compressed, CompressionOptions options);
 
-  // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
+  // Same as `Compress` above but taking an `iovec` array as input. Note that
+  // this function preprocesses the inputs to compute the sum of
+  // `iov[0..iov_cnt-1].iov_len` before reading. To avoid this, use
+  // `RawCompressFromIOVec` below.
+  // First version is to preserve ABI.
+  size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
+                           std::string* compressed);
+  size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
+                           std::string* compressed,
+                           CompressionOptions options);
+
+  // Decompresses "compressed[0..compressed_length-1]" to "*uncompressed".
   // Original contents of "*uncompressed" are lost.
   //
   // REQUIRES: "compressed[]" is not an alias of "*uncompressed".
   //
   // returns false if the message is corrupted and could not be decompressed
   bool Uncompress(const char* compressed, size_t compressed_length,
-                  string* uncompressed);
+                  std::string* uncompressed);
 
   // Decompresses "compressed" to "*uncompressed".
   //
@@ -116,10 +158,19 @@ namespace snappy {
   //    RawCompress(input, input_length, output, &output_length);
   //    ... Process(output, output_length) ...
   //    delete [] output;
-  void RawCompress(const char* input,
-                   size_t input_length,
-                   char* compressed,
+  void RawCompress(const char* input, size_t input_length, char* compressed,
                    size_t* compressed_length);
+  void RawCompress(const char* input, size_t input_length, char* compressed,
+                   size_t* compressed_length, CompressionOptions options);
+
+  // Same as `RawCompress` above but taking an `iovec` array as input. Note that
+  // `uncompressed_length` is the total number of bytes to be read from the
+  // elements of `iov` (_not_ the number of elements in `iov`).
+  void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
+                            char* compressed, size_t* compressed_length);
+  void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
+                            char* compressed, size_t* compressed_length,
+                            CompressionOptions options);
 
   // Given data in "compressed[0..compressed_length-1]" generated by
   // calling the Snappy::Compress routine, this routine
@@ -193,11 +244,14 @@ namespace snappy {
   // Note that there might be older data around that is compressed with larger
   // block sizes, so the decompression code should not rely on the
   // non-existence of long backreferences.
-  static const int kBlockLog = 16;
-  static const size_t kBlockSize = 1 << kBlockLog;
+  static constexpr int kBlockLog = 16;
+  static constexpr size_t kBlockSize = 1 << kBlockLog;
+
+  static constexpr int kMinHashTableBits = 8;
+  static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
 
-  static const int kMaxHashTableBits = 14;
-  static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
+  static constexpr int kMaxHashTableBits = 15;
+  static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
 }  // end namespace snappy
 
 #endif  // THIRD_PARTY_SNAPPY_SNAPPY_H__
diff --git a/snappy.pc.in b/snappy.pc.in
deleted file mode 100644
index 982d240..0000000
--- a/snappy.pc.in
+++ /dev/null
@@ -1,10 +0,0 @@
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-Name: @PACKAGE@
-Description: A fast compression/decompression library
-Version: @PACKAGE_VERSION@
-Libs: -L${libdir} -l@PACKAGE@
-Cflags: -I${includedir}
diff --git a/snappy_benchmark.cc b/snappy_benchmark.cc
new file mode 100644
index 0000000..d6e35d3
--- /dev/null
+++ b/snappy_benchmark.cc
@@ -0,0 +1,398 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "snappy-internal.h"
+#include "snappy-sinksource.h"
+#include "snappy-test.h"
+#include "snappy.h"
+#include "snappy_test_data.h"
+
+namespace snappy {
+
+namespace {
+
+void FilesAndLevels(benchmark::internal::Benchmark* benchmark) {
+  for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
+    for (int level = snappy::CompressionOptions::MinCompressionLevel();
+         level <= snappy::CompressionOptions::MaxCompressionLevel(); ++level) {
+      benchmark->ArgPair(i, level);
+    }
+  }
+}
+
+void BM_UFlat(benchmark::State& state) {
+  // Pick file to process based on state.range(0).
+  int file_index = state.range(0);
+
+  CHECK_GE(file_index, 0);
+  CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
+  std::string contents =
+      ReadTestDataFile(kTestDataFiles[file_index].filename,
+                       kTestDataFiles[file_index].size_limit);
+
+  std::string zcontents;
+  snappy::Compress(
+      contents.data(), contents.size(), &zcontents,
+      snappy::CompressionOptions{/*level=*/static_cast<int>(state.range(1))});
+  char* dst = new char[contents.size()];
+
+  for (auto s : state) {
+    CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
+    benchmark::DoNotOptimize(dst);
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          static_cast<int64_t>(contents.size()));
+  state.SetLabel(kTestDataFiles[file_index].label);
+
+  delete[] dst;
+}
+BENCHMARK(BM_UFlat)->Apply(FilesAndLevels);
+
+struct SourceFiles {
+  SourceFiles() {
+    for (int i = 0; i < kFiles; i++) {
+      std::string contents = ReadTestDataFile(kTestDataFiles[i].filename,
+                                              kTestDataFiles[i].size_limit);
+      max_size = std::max(max_size, contents.size());
+      sizes[i] = contents.size();
+      snappy::Compress(contents.data(), contents.size(), &zcontents[i]);
+    }
+  }
+  static constexpr int kFiles = ARRAYSIZE(kTestDataFiles);
+  std::string zcontents[kFiles];
+  size_t sizes[kFiles];
+  size_t max_size = 0;
+};
+
+void BM_UFlatMedley(benchmark::State& state) {
+  static const SourceFiles* const source = new SourceFiles();
+
+  std::vector<char> dst(source->max_size);
+
+  for (auto s : state) {
+    for (int i = 0; i < SourceFiles::kFiles; i++) {
+      CHECK(snappy::RawUncompress(source->zcontents[i].data(),
+                                  source->zcontents[i].size(), dst.data()));
+      benchmark::DoNotOptimize(dst);
+    }
+  }
+
+  int64_t source_sizes = 0;
+  for (int i = 0; i < SourceFiles::kFiles; i++) {
+    source_sizes += static_cast<int64_t>(source->sizes[i]);
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          source_sizes);
+}
+BENCHMARK(BM_UFlatMedley);
+
+void BM_UValidate(benchmark::State& state) {
+  // Pick file to process based on state.range(0).
+  int file_index = state.range(0);
+
+  CHECK_GE(file_index, 0);
+  CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
+  std::string contents =
+      ReadTestDataFile(kTestDataFiles[file_index].filename,
+                       kTestDataFiles[file_index].size_limit);
+
+  std::string zcontents;
+  snappy::Compress(
+      contents.data(), contents.size(), &zcontents,
+      snappy::CompressionOptions{/*level=*/static_cast<int>(state.range(1))});
+
+  for (auto s : state) {
+    CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          static_cast<int64_t>(contents.size()));
+  state.SetLabel(kTestDataFiles[file_index].label);
+}
+BENCHMARK(BM_UValidate)->Apply(FilesAndLevels);
+
+void BM_UValidateMedley(benchmark::State& state) {
+  static const SourceFiles* const source = new SourceFiles();
+
+  for (auto s : state) {
+    for (int i = 0; i < SourceFiles::kFiles; i++) {
+      CHECK(snappy::IsValidCompressedBuffer(source->zcontents[i].data(),
+                                            source->zcontents[i].size()));
+    }
+  }
+
+  int64_t source_sizes = 0;
+  for (int i = 0; i < SourceFiles::kFiles; i++) {
+    source_sizes += static_cast<int64_t>(source->sizes[i]);
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          source_sizes);
+}
+BENCHMARK(BM_UValidateMedley);
+
+void BM_UIOVecSource(benchmark::State& state) {
+  // Pick file to process based on state.range(0).
+  int file_index = state.range(0);
+  int level = state.range(1);
+
+  CHECK_GE(file_index, 0);
+  CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
+  std::string contents =
+      ReadTestDataFile(kTestDataFiles[file_index].filename,
+                       kTestDataFiles[file_index].size_limit);
+
+  // Create `iovec`s of the `contents`.
+  const int kNumEntries = 10;
+  struct iovec iov[kNumEntries];
+  size_t used_so_far = 0;
+  for (int i = 0; i < kNumEntries; ++i) {
+    iov[i].iov_base = const_cast<char*>(contents.data()) + used_so_far;
+    if (used_so_far == contents.size()) {
+      iov[i].iov_len = 0;
+      continue;
+    }
+    if (i == kNumEntries - 1) {
+      iov[i].iov_len = contents.size() - used_so_far;
+    } else {
+      iov[i].iov_len = contents.size() / kNumEntries;
+    }
+    used_so_far += iov[i].iov_len;
+  }
+
+  char* dst = new char[snappy::MaxCompressedLength(contents.size())];
+  size_t zsize = 0;
+  for (auto s : state) {
+    snappy::RawCompressFromIOVec(iov, contents.size(), dst, &zsize,
+                                 snappy::CompressionOptions{/*level=*/level});
+    benchmark::DoNotOptimize(iov);
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          static_cast<int64_t>(contents.size()));
+  const double compression_ratio =
+      static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
+  state.SetLabel(StrFormat("%s (%.2f %%)", kTestDataFiles[file_index].label,
+                           100.0 * compression_ratio));
+  VLOG(0) << StrFormat("compression for %s: %d -> %d bytes",
+                       kTestDataFiles[file_index].label, contents.size(),
+                       zsize);
+
+  delete[] dst;
+}
+BENCHMARK(BM_UIOVecSource)->Apply(FilesAndLevels);
+
+void BM_UIOVecSink(benchmark::State& state) {
+  // Pick file to process based on state.range(0).
+  int file_index = state.range(0);
+
+  CHECK_GE(file_index, 0);
+  CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
+  std::string contents =
+      ReadTestDataFile(kTestDataFiles[file_index].filename,
+                       kTestDataFiles[file_index].size_limit);
+
+  std::string zcontents;
+  snappy::Compress(contents.data(), contents.size(), &zcontents);
+
+  // Uncompress into an iovec containing ten entries.
+  const int kNumEntries = 10;
+  struct iovec iov[kNumEntries];
+  char* dst = new char[contents.size()];
+  size_t used_so_far = 0;
+  for (int i = 0; i < kNumEntries; ++i) {
+    iov[i].iov_base = dst + used_so_far;
+    if (used_so_far == contents.size()) {
+      iov[i].iov_len = 0;
+      continue;
+    }
+
+    if (i == kNumEntries - 1) {
+      iov[i].iov_len = contents.size() - used_so_far;
+    } else {
+      iov[i].iov_len = contents.size() / kNumEntries;
+    }
+    used_so_far += iov[i].iov_len;
+  }
+
+  for (auto s : state) {
+    CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
+                                       kNumEntries));
+    benchmark::DoNotOptimize(iov);
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          static_cast<int64_t>(contents.size()));
+  state.SetLabel(kTestDataFiles[file_index].label);
+
+  delete[] dst;
+}
+BENCHMARK(BM_UIOVecSink)->DenseRange(0, 4);
+
+void BM_UFlatSink(benchmark::State& state) {
+  // Pick file to process based on state.range(0).
+  int file_index = state.range(0);
+
+  CHECK_GE(file_index, 0);
+  CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
+  std::string contents =
+      ReadTestDataFile(kTestDataFiles[file_index].filename,
+                       kTestDataFiles[file_index].size_limit);
+
+  std::string zcontents;
+  snappy::Compress(
+      contents.data(), contents.size(), &zcontents,
+      snappy::CompressionOptions{/*level=*/static_cast<int>(state.range(1))});
+  char* dst = new char[contents.size()];
+
+  for (auto s : state) {
+    snappy::ByteArraySource source(zcontents.data(), zcontents.size());
+    snappy::UncheckedByteArraySink sink(dst);
+    CHECK(snappy::Uncompress(&source, &sink));
+    benchmark::DoNotOptimize(sink);
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          static_cast<int64_t>(contents.size()));
+  state.SetLabel(kTestDataFiles[file_index].label);
+
+  std::string s(dst, contents.size());
+  CHECK_EQ(contents, s);
+
+  delete[] dst;
+}
+
+BENCHMARK(BM_UFlatSink)->Apply(FilesAndLevels);
+
+void BM_ZFlat(benchmark::State& state) {
+  // Pick file to process based on state.range(0).
+  int file_index = state.range(0);
+  int level = state.range(1);
+
+  CHECK_GE(file_index, 0);
+  CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
+  std::string contents =
+      ReadTestDataFile(kTestDataFiles[file_index].filename,
+                       kTestDataFiles[file_index].size_limit);
+  char* dst = new char[snappy::MaxCompressedLength(contents.size())];
+
+  size_t zsize = 0;
+  for (auto s : state) {
+    snappy::RawCompress(contents.data(), contents.size(), dst, &zsize,
+                        snappy::CompressionOptions{/*level=*/level});
+    benchmark::DoNotOptimize(dst);
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          static_cast<int64_t>(contents.size()));
+  const double compression_ratio =
+      static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
+  state.SetLabel(StrFormat("%s (%.2f %%)", kTestDataFiles[file_index].label,
+                           100.0 * compression_ratio));
+  VLOG(0) << StrFormat("compression for %s: %d -> %d bytes",
+                       kTestDataFiles[file_index].label, contents.size(),
+                       zsize);
+  delete[] dst;
+}
+
+BENCHMARK(BM_ZFlat)->Apply(FilesAndLevels);
+
+void BM_ZFlatAll(benchmark::State& state) {
+  const int num_files = ARRAYSIZE(kTestDataFiles);
+  int level = state.range(0);
+
+  std::vector<std::string> contents(num_files);
+  std::vector<char*> dst(num_files);
+
+  int64_t total_contents_size = 0;
+  for (int i = 0; i < num_files; ++i) {
+    contents[i] = ReadTestDataFile(kTestDataFiles[i].filename,
+                                   kTestDataFiles[i].size_limit);
+    dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
+    total_contents_size += contents[i].size();
+  }
+
+  size_t zsize = 0;
+  for (auto s : state) {
+    for (int i = 0; i < num_files; ++i) {
+      snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
+                          &zsize, snappy::CompressionOptions{/*level=*/level});
+      benchmark::DoNotOptimize(dst);
+    }
+  }
+
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          total_contents_size);
+
+  for (char* dst_item : dst) {
+    delete[] dst_item;
+  }
+  state.SetLabel(StrFormat("%d kTestDataFiles", num_files));
+}
+BENCHMARK(BM_ZFlatAll)->DenseRange(1, 2);
+
+void BM_ZFlatIncreasingTableSize(benchmark::State& state) {
+  CHECK_GT(ARRAYSIZE(kTestDataFiles), 0);
+  int level = state.range(0);
+  const std::string base_content = ReadTestDataFile(
+      kTestDataFiles[0].filename, kTestDataFiles[0].size_limit);
+
+  std::vector<std::string> contents;
+  std::vector<char*> dst;
+  int64_t total_contents_size = 0;
+  for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
+       ++table_bits) {
+    std::string content = base_content;
+    content.resize(1 << table_bits);
+    dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
+    total_contents_size += content.size();
+    contents.push_back(std::move(content));
+  }
+
+  size_t zsize = 0;
+  for (auto s : state) {
+    for (size_t i = 0; i < contents.size(); ++i) {
+      snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
+                          &zsize, snappy::CompressionOptions{/*level=*/level});
+      benchmark::DoNotOptimize(dst);
+    }
+  }
+
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          total_contents_size);
+
+  for (char* dst_item : dst) {
+    delete[] dst_item;
+  }
+  state.SetLabel(StrFormat("%d tables", contents.size()));
+}
+BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(1, 2);
+
+}  // namespace
+
+}  // namespace snappy
diff --git a/snappy_compress_fuzzer.cc b/snappy_compress_fuzzer.cc
new file mode 100644
index 0000000..93254a2
--- /dev/null
+++ b/snappy_compress_fuzzer.cc
@@ -0,0 +1,64 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// libFuzzer harness for fuzzing snappy compression code.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cassert>
+#include <string>
+
+#include "snappy.h"
+
+// Entry point for LibFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  std::string input(reinterpret_cast<const char*>(data), size);
+  for (int level = snappy::CompressionOptions::MinCompressionLevel();
+       level <= snappy::CompressionOptions::MaxCompressionLevel(); ++level) {
+    std::string compressed;
+    size_t compressed_size =
+        snappy::Compress(input.data(), input.size(), &compressed,
+                         snappy::CompressionOptions{/*level=*/level});
+
+    (void)compressed_size;  // Variable only used in debug builds.
+    assert(compressed_size == compressed.size());
+    assert(compressed.size() <= snappy::MaxCompressedLength(input.size()));
+    assert(
+        snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
+
+    std::string uncompressed_after_compress;
+    bool uncompress_succeeded = snappy::Uncompress(
+        compressed.data(), compressed.size(), &uncompressed_after_compress);
+
+    (void)uncompress_succeeded;  // Variable only used in debug builds.
+    assert(uncompress_succeeded);
+    assert(input == uncompressed_after_compress);
+  }
+  return 0;
+}
diff --git a/snappy_test_data.cc b/snappy_test_data.cc
new file mode 100644
index 0000000..8b54153
--- /dev/null
+++ b/snappy_test_data.cc
@@ -0,0 +1,57 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Support code for reading test data.
+
+#include "snappy_test_data.h"
+
+#include <cstddef>
+#include <cstdlib>
+#include <string>
+
+#include "snappy-test.h"
+
+namespace snappy {
+
+std::string ReadTestDataFile(const char* base, size_t size_limit) {
+  std::string srcdir;
+  const char* srcdir_env = std::getenv("srcdir");  // This is set by Automake.
+  if (srcdir_env) {
+    srcdir = std::string(srcdir_env) + "/";
+  }
+
+  std::string contents;
+  CHECK_OK(file::GetContents(srcdir + "testdata/" + base, &contents,
+                             file::Defaults()));
+  if (size_limit > 0) {
+    contents = contents.substr(0, size_limit);
+  }
+  return contents;
+}
+
+}  // namespace snappy
diff --git a/snappy_test_data.h b/snappy_test_data.h
new file mode 100644
index 0000000..b01f74b
--- /dev/null
+++ b/snappy_test_data.h
@@ -0,0 +1,68 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// List of test case files.
+
+#ifndef THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__
+#define THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__
+
+#include <cstddef>
+#include <string>
+
+namespace snappy {
+
+std::string ReadTestDataFile(const char* base, size_t size_limit);
+
+// TODO: Replace anonymous namespace with inline variable when we can
+//               rely on C++17.
+namespace {
+
+constexpr struct {
+  const char* label;
+  const char* filename;
+  size_t size_limit;
+} kTestDataFiles[] = {
+  { "html", "html", 0 },
+  { "urls", "urls.10K", 0 },
+  { "jpg", "fireworks.jpeg", 0 },
+  { "jpg_200", "fireworks.jpeg", 200 },
+  { "pdf", "paper-100k.pdf", 0 },
+  { "html4", "html_x_4", 0 },
+  { "txt1", "alice29.txt", 0 },
+  { "txt2", "asyoulik.txt", 0 },
+  { "txt3", "lcet10.txt", 0 },
+  { "txt4", "plrabn12.txt", 0 },
+  { "pb", "geo.protodata", 0 },
+  { "gaviota", "kppkn.gtb", 0 },
+};
+
+}  // namespace
+
+}  // namespace snappy
+
+#endif  // THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__
diff --git a/snappy_test_tool.cc b/snappy_test_tool.cc
new file mode 100644
index 0000000..a7c779b
--- /dev/null
+++ b/snappy_test_tool.cc
@@ -0,0 +1,471 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstdlib>
+#include <random>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "snappy-test.h"
+
+#include "snappy-internal.h"
+#include "snappy-sinksource.h"
+#include "snappy.h"
+#include "snappy_test_data.h"
+
+SNAPPY_FLAG(int32_t, start_len, -1,
+            "Starting prefix size for testing (-1: just full file contents)");
+SNAPPY_FLAG(int32_t, end_len, -1,
+            "Starting prefix size for testing (-1: just full file contents)");
+SNAPPY_FLAG(int32_t, bytes, 10485760,
+            "How many bytes to compress/uncompress per file for timing");
+
+SNAPPY_FLAG(bool, zlib, true,
+            "Run zlib compression (http://www.zlib.net)");
+SNAPPY_FLAG(bool, lzo, true,
+            "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
+SNAPPY_FLAG(bool, lz4, true,
+            "Run LZ4 compression (https://github.com/lz4/lz4)");
+SNAPPY_FLAG(bool, snappy, true, "Run snappy compression");
+
+SNAPPY_FLAG(bool, write_compressed, false,
+            "Write compressed versions of each file to <file>.comp");
+SNAPPY_FLAG(bool, write_uncompressed, false,
+            "Write uncompressed versions of each file to <file>.uncomp");
+
+namespace snappy {
+
+namespace {
+
+#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
+
+// To test against code that reads beyond its input, this class copies a
+// string to a newly allocated group of pages, the last of which
+// is made unreadable via mprotect. Note that we need to allocate the
+// memory with mmap(), as POSIX allows mprotect() only on memory allocated
+// with mmap(), and some malloc/posix_memalign implementations expect to
+// be able to read previously allocated memory while doing heap allocations.
+class DataEndingAtUnreadablePage {
+ public:
+  explicit DataEndingAtUnreadablePage(const std::string& s) {
+    const size_t page_size = sysconf(_SC_PAGESIZE);
+    const size_t size = s.size();
+    // Round up space for string to a multiple of page_size.
+    size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
+    alloc_size_ = space_for_string + page_size;
+    mem_ = mmap(NULL, alloc_size_,
+                PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+    CHECK_NE(MAP_FAILED, mem_);
+    protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
+    char* dst = protected_page_ - size;
+    std::memcpy(dst, s.data(), size);
+    data_ = dst;
+    size_ = size;
+    // Make guard page unreadable.
+    CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
+  }
+
+  ~DataEndingAtUnreadablePage() {
+    const size_t page_size = sysconf(_SC_PAGESIZE);
+    // Undo the mprotect.
+    CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
+    CHECK_EQ(0, munmap(mem_, alloc_size_));
+  }
+
+  const char* data() const { return data_; }
+  size_t size() const { return size_; }
+
+ private:
+  size_t alloc_size_;
+  void* mem_;
+  char* protected_page_;
+  const char* data_;
+  size_t size_;
+};
+
+#else  // HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
+
+// Fallback for systems without mmap.
+using DataEndingAtUnreadablePage = std::string;
+
+#endif
+
+enum CompressorType { ZLIB, LZO, LZ4, SNAPPY };
+
+const char* names[] = {"ZLIB", "LZO", "LZ4", "SNAPPY"};
+
+size_t MinimumRequiredOutputSpace(size_t input_size, CompressorType comp) {
+  switch (comp) {
+#ifdef ZLIB_VERSION
+    case ZLIB:
+      return ZLib::MinCompressbufSize(input_size);
+#endif  // ZLIB_VERSION
+
+#ifdef LZO_VERSION
+    case LZO:
+      return input_size + input_size/64 + 16 + 3;
+#endif  // LZO_VERSION
+
+#ifdef LZ4_VERSION_NUMBER
+    case LZ4:
+      return LZ4_compressBound(input_size);
+#endif  // LZ4_VERSION_NUMBER
+
+    case SNAPPY:
+      return snappy::MaxCompressedLength(input_size);
+
+    default:
+      LOG(FATAL) << "Unknown compression type number " << comp;
+      return 0;
+  }
+}
+
+// Returns true if we successfully compressed, false otherwise.
+//
+// If compressed_is_preallocated is set, do not resize the compressed buffer.
+// This is typically what you want for a benchmark, in order to not spend
+// time in the memory allocator. If you do set this flag, however,
+// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
+// number of bytes, and may contain junk bytes at the end after return.
+bool Compress(const char* input, size_t input_size, CompressorType comp,
+              std::string* compressed, bool compressed_is_preallocated) {
+  if (!compressed_is_preallocated) {
+    compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
+  }
+
+  switch (comp) {
+#ifdef ZLIB_VERSION
+    case ZLIB: {
+      ZLib zlib;
+      uLongf destlen = compressed->size();
+      int ret = zlib.Compress(
+          reinterpret_cast<Bytef*>(string_as_array(compressed)),
+          &destlen,
+          reinterpret_cast<const Bytef*>(input),
+          input_size);
+      CHECK_EQ(Z_OK, ret);
+      if (!compressed_is_preallocated) {
+        compressed->resize(destlen);
+      }
+      return true;
+    }
+#endif  // ZLIB_VERSION
+
+#ifdef LZO_VERSION
+    case LZO: {
+      unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
+      lzo_uint destlen;
+      int ret = lzo1x_1_15_compress(
+          reinterpret_cast<const uint8_t*>(input),
+          input_size,
+          reinterpret_cast<uint8_t*>(string_as_array(compressed)),
+          &destlen,
+          mem);
+      CHECK_EQ(LZO_E_OK, ret);
+      delete[] mem;
+      if (!compressed_is_preallocated) {
+        compressed->resize(destlen);
+      }
+      break;
+    }
+#endif  // LZO_VERSION
+
+#ifdef LZ4_VERSION_NUMBER
+    case LZ4: {
+      int destlen = compressed->size();
+      destlen = LZ4_compress_default(input, string_as_array(compressed),
+                                     input_size, destlen);
+      CHECK_NE(destlen, 0);
+      if (!compressed_is_preallocated) {
+        compressed->resize(destlen);
+      }
+      break;
+    }
+#endif  // LZ4_VERSION_NUMBER
+
+    case SNAPPY: {
+      size_t destlen;
+      snappy::RawCompress(input, input_size,
+                          string_as_array(compressed),
+                          &destlen);
+      CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
+      if (!compressed_is_preallocated) {
+        compressed->resize(destlen);
+      }
+      break;
+    }
+
+    default: {
+      return false;     // the asked-for library wasn't compiled in
+    }
+  }
+  return true;
+}
+
+bool Uncompress(const std::string& compressed, CompressorType comp, int size,
+                std::string* output) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)size;
+  switch (comp) {
+#ifdef ZLIB_VERSION
+    case ZLIB: {
+      output->resize(size);
+      ZLib zlib;
+      uLongf destlen = output->size();
+      int ret = zlib.Uncompress(
+          reinterpret_cast<Bytef*>(string_as_array(output)),
+          &destlen,
+          reinterpret_cast<const Bytef*>(compressed.data()),
+          compressed.size());
+      CHECK_EQ(Z_OK, ret);
+      CHECK_EQ(static_cast<uLongf>(size), destlen);
+      break;
+    }
+#endif  // ZLIB_VERSION
+
+#ifdef LZO_VERSION
+    case LZO: {
+      output->resize(size);
+      lzo_uint destlen;
+      int ret = lzo1x_decompress(
+          reinterpret_cast<const uint8_t*>(compressed.data()),
+          compressed.size(),
+          reinterpret_cast<uint8_t*>(string_as_array(output)),
+          &destlen,
+          NULL);
+      CHECK_EQ(LZO_E_OK, ret);
+      CHECK_EQ(static_cast<lzo_uint>(size), destlen);
+      break;
+    }
+#endif  // LZO_VERSION
+
+#ifdef LZ4_VERSION_NUMBER
+    case LZ4: {
+      output->resize(size);
+      int destlen = output->size();
+      destlen = LZ4_decompress_safe(compressed.data(), string_as_array(output),
+                                    compressed.size(), destlen);
+      CHECK_NE(destlen, 0);
+      CHECK_EQ(size, destlen);
+      break;
+    }
+#endif  // LZ4_VERSION_NUMBER
+    case SNAPPY: {
+      snappy::RawUncompress(compressed.data(), compressed.size(),
+                            string_as_array(output));
+      break;
+    }
+
+    default: {
+      return false;     // the asked-for library wasn't compiled in
+    }
+  }
+  return true;
+}
+
+void Measure(const char* data, size_t length, CompressorType comp, int repeats,
+             int block_size) {
+  // Run tests a few time and pick median running times
+  static const int kRuns = 5;
+  double ctime[kRuns];
+  double utime[kRuns];
+  int compressed_size = 0;
+
+  {
+    // Chop the input into blocks
+    int num_blocks = (length + block_size - 1) / block_size;
+    std::vector<const char*> input(num_blocks);
+    std::vector<size_t> input_length(num_blocks);
+    std::vector<std::string> compressed(num_blocks);
+    std::vector<std::string> output(num_blocks);
+    for (int b = 0; b < num_blocks; ++b) {
+      int input_start = b * block_size;
+      int input_limit = std::min<int>((b+1)*block_size, length);
+      input[b] = data+input_start;
+      input_length[b] = input_limit-input_start;
+    }
+
+    // Pre-grow the output buffers so we don't measure string append time.
+    for (std::string& compressed_block : compressed) {
+      compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
+    }
+
+    // First, try one trial compression to make sure the code is compiled in
+    if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
+      LOG(WARNING) << "Skipping " << names[comp] << ": "
+                   << "library not compiled in";
+      return;
+    }
+
+    for (int run = 0; run < kRuns; ++run) {
+      CycleTimer ctimer, utimer;
+
+      // Pre-grow the output buffers so we don't measure string append time.
+      for (std::string& compressed_block : compressed) {
+        compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
+      }
+
+      ctimer.Start();
+      for (int b = 0; b < num_blocks; ++b) {
+        for (int i = 0; i < repeats; ++i)
+          Compress(input[b], input_length[b], comp, &compressed[b], true);
+      }
+      ctimer.Stop();
+
+      // Compress once more, with resizing, so we don't leave junk
+      // at the end that will confuse the decompressor.
+      for (int b = 0; b < num_blocks; ++b) {
+        Compress(input[b], input_length[b], comp, &compressed[b], false);
+      }
+
+      for (int b = 0; b < num_blocks; ++b) {
+        output[b].resize(input_length[b]);
+      }
+
+      utimer.Start();
+      for (int i = 0; i < repeats; ++i) {
+        for (int b = 0; b < num_blocks; ++b)
+          Uncompress(compressed[b], comp, input_length[b], &output[b]);
+      }
+      utimer.Stop();
+
+      ctime[run] = ctimer.Get();
+      utime[run] = utimer.Get();
+    }
+
+    compressed_size = 0;
+    for (const std::string& compressed_item : compressed) {
+      compressed_size += compressed_item.size();
+    }
+  }
+
+  std::sort(ctime, ctime + kRuns);
+  std::sort(utime, utime + kRuns);
+  const int med = kRuns/2;
+
+  float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
+  float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
+  std::string x = names[comp];
+  x += ":";
+  std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
+                                         : std::string("?");
+  std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%%  "
+              "comp %5.1f MB/s  uncomp %5s MB/s\n",
+              x.c_str(),
+              block_size/(1<<20),
+              static_cast<int>(length), static_cast<uint32_t>(compressed_size),
+              (compressed_size * 100.0) / std::max<int>(1, length),
+              comp_rate,
+              urate.c_str());
+}
+
+void CompressFile(const char* fname) {
+  std::string fullinput;
+  CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
+
+  std::string compressed;
+  Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
+
+  CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
+                             file::Defaults()));
+}
+
+void UncompressFile(const char* fname) {
+  std::string fullinput;
+  CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
+
+  size_t uncompLength;
+  CHECK(snappy::GetUncompressedLength(fullinput.data(), fullinput.size(),
+                                      &uncompLength));
+
+  std::string uncompressed;
+  uncompressed.resize(uncompLength);
+  CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
+
+  CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
+                             file::Defaults()));
+}
+
+void MeasureFile(const char* fname) {
+  std::string fullinput;
+  CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
+  std::printf("%-40s :\n", fname);
+
+  int start_len = (snappy::GetFlag(FLAGS_start_len) < 0)
+                      ? fullinput.size()
+                      : snappy::GetFlag(FLAGS_start_len);
+  int end_len = fullinput.size();
+  if (snappy::GetFlag(FLAGS_end_len) >= 0) {
+    end_len = std::min<int>(fullinput.size(), snappy::GetFlag(FLAGS_end_len));
+  }
+  for (int len = start_len; len <= end_len; ++len) {
+    const char* const input = fullinput.data();
+    int repeats = (snappy::GetFlag(FLAGS_bytes) + len) / (len + 1);
+    if (snappy::GetFlag(FLAGS_zlib))
+      Measure(input, len, ZLIB, repeats, 1024 << 10);
+    if (snappy::GetFlag(FLAGS_lzo))
+      Measure(input, len, LZO, repeats, 1024 << 10);
+    if (snappy::GetFlag(FLAGS_lz4))
+      Measure(input, len, LZ4, repeats, 1024 << 10);
+    if (snappy::GetFlag(FLAGS_snappy))
+      Measure(input, len, SNAPPY, repeats, 4096 << 10);
+
+    // For block-size based measurements
+    if (0 && snappy::GetFlag(FLAGS_snappy)) {
+      Measure(input, len, SNAPPY, repeats, 8<<10);
+      Measure(input, len, SNAPPY, repeats, 16<<10);
+      Measure(input, len, SNAPPY, repeats, 32<<10);
+      Measure(input, len, SNAPPY, repeats, 64<<10);
+      Measure(input, len, SNAPPY, repeats, 256<<10);
+      Measure(input, len, SNAPPY, repeats, 1024<<10);
+    }
+  }
+}
+
+}  // namespace
+
+}  // namespace snappy
+
+int main(int argc, char** argv) {
+  InitGoogle(argv[0], &argc, &argv, true);
+
+  for (int arg = 1; arg < argc; ++arg) {
+    if (snappy::GetFlag(FLAGS_write_compressed)) {
+      snappy::CompressFile(argv[arg]);
+    } else if (snappy::GetFlag(FLAGS_write_uncompressed)) {
+      snappy::UncompressFile(argv[arg]);
+    } else {
+      snappy::MeasureFile(argv[arg]);
+    }
+  }
+  return 0;
+}
diff --git a/snappy_uncompress_fuzzer.cc b/snappy_uncompress_fuzzer.cc
new file mode 100644
index 0000000..385bfb5
--- /dev/null
+++ b/snappy_uncompress_fuzzer.cc
@@ -0,0 +1,58 @@
+// Copyright 2019 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// libFuzzer harness for fuzzing snappy's decompression code.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cassert>
+#include <string>
+
+#include "snappy.h"
+
+// Entry point for LibFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  std::string input(reinterpret_cast<const char*>(data), size);
+
+  // Avoid self-crafted decompression bombs.
+  size_t uncompressed_size;
+  constexpr size_t kMaxUncompressedSize = 1 << 20;
+  bool get_uncompressed_length_succeeded = snappy::GetUncompressedLength(
+      input.data(), input.size(), &uncompressed_size);
+  if (!get_uncompressed_length_succeeded ||
+      (uncompressed_size > kMaxUncompressedSize)) {
+    return 0;
+  }
+
+  std::string uncompressed;
+  // The return value of snappy::Uncompress() is ignored because decompression
+  // will fail on invalid inputs.
+  snappy::Uncompress(input.data(), input.size(), &uncompressed);
+  return 0;
+}
diff --git a/snappy_unittest.cc b/snappy_unittest.cc
index 19062e4..923a0ec 100644
--- a/snappy_unittest.cc
+++ b/snappy_unittest.cc
@@ -26,44 +26,32 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include <math.h>
-#include <stdlib.h>
-
-
 #include <algorithm>
+#include <cinttypes>
+#include <cmath>
+#include <cstdlib>
+#include <random>
 #include <string>
 #include <utility>
 #include <vector>
 
-#include "snappy.h"
-#include "snappy-internal.h"
 #include "snappy-test.h"
+
+#include "gtest/gtest.h"
+
+#include "snappy-internal.h"
 #include "snappy-sinksource.h"
+#include "snappy.h"
+#include "snappy_test_data.h"
 
-DEFINE_int32(start_len, -1,
-             "Starting prefix size for testing (-1: just full file contents)");
-DEFINE_int32(end_len, -1,
-             "Starting prefix size for testing (-1: just full file contents)");
-DEFINE_int32(bytes, 10485760,
-             "How many bytes to compress/uncompress per file for timing");
-
-DEFINE_bool(zlib, false,
-            "Run zlib compression (http://www.zlib.net)");
-DEFINE_bool(lzo, false,
-            "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
-DEFINE_bool(snappy, true, "Run snappy compression");
-
-DEFINE_bool(write_compressed, false,
-            "Write compressed versions of each file to <file>.comp");
-DEFINE_bool(write_uncompressed, false,
-            "Write uncompressed versions of each file to <file>.uncomp");
-
-DEFINE_bool(snappy_dump_decompression_table, false,
+SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
             "If true, we print the decompression table during tests.");
 
 namespace snappy {
 
-#ifdef HAVE_FUNC_MMAP
+namespace {
+
+#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
 
 // To test against code that reads beyond its input, this class copies a
 // string to a newly allocated group of pages, the last of which
@@ -73,8 +61,8 @@ namespace snappy {
 // be able to read previously allocated memory while doing heap allocations.
 class DataEndingAtUnreadablePage {
  public:
-  explicit DataEndingAtUnreadablePage(const string& s) {
-    const size_t page_size = getpagesize();
+  explicit DataEndingAtUnreadablePage(const std::string& s) {
+    const size_t page_size = sysconf(_SC_PAGESIZE);
     const size_t size = s.size();
     // Round up space for string to a multiple of page_size.
     size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
@@ -84,7 +72,7 @@ class DataEndingAtUnreadablePage {
     CHECK_NE(MAP_FAILED, mem_);
     protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
     char* dst = protected_page_ - size;
-    memcpy(dst, s.data(), size);
+    std::memcpy(dst, s.data(), size);
     data_ = dst;
     size_ = size;
     // Make guard page unreadable.
@@ -92,8 +80,9 @@ class DataEndingAtUnreadablePage {
   }
 
   ~DataEndingAtUnreadablePage() {
+    const size_t page_size = sysconf(_SC_PAGESIZE);
     // Undo the mprotect.
-    CHECK_EQ(0, mprotect(protected_page_, getpagesize(), PROT_READ|PROT_WRITE));
+    CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
     CHECK_EQ(0, munmap(mem_, alloc_size_));
   }
 
@@ -108,258 +97,15 @@ class DataEndingAtUnreadablePage {
   size_t size_;
 };
 
-#else  // HAVE_FUNC_MMAP
+#else  // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
 
 // Fallback for systems without mmap.
-typedef string DataEndingAtUnreadablePage;
+using DataEndingAtUnreadablePage = std::string;
 
 #endif
 
-enum CompressorType {
-  ZLIB, LZO, SNAPPY
-};
-
-const char* names[] = {
-  "ZLIB", "LZO", "SNAPPY"
-};
-
-static size_t MinimumRequiredOutputSpace(size_t input_size,
-                                         CompressorType comp) {
-  switch (comp) {
-#ifdef ZLIB_VERSION
-    case ZLIB:
-      return ZLib::MinCompressbufSize(input_size);
-#endif  // ZLIB_VERSION
-
-#ifdef LZO_VERSION
-    case LZO:
-      return input_size + input_size/64 + 16 + 3;
-#endif  // LZO_VERSION
-
-    case SNAPPY:
-      return snappy::MaxCompressedLength(input_size);
-
-    default:
-      LOG(FATAL) << "Unknown compression type number " << comp;
-      return 0;
-  }
-}
-
-// Returns true if we successfully compressed, false otherwise.
-//
-// If compressed_is_preallocated is set, do not resize the compressed buffer.
-// This is typically what you want for a benchmark, in order to not spend
-// time in the memory allocator. If you do set this flag, however,
-// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
-// number of bytes, and may contain junk bytes at the end after return.
-static bool Compress(const char* input, size_t input_size, CompressorType comp,
-                     string* compressed, bool compressed_is_preallocated) {
-  if (!compressed_is_preallocated) {
-    compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
-  }
-
-  switch (comp) {
-#ifdef ZLIB_VERSION
-    case ZLIB: {
-      ZLib zlib;
-      uLongf destlen = compressed->size();
-      int ret = zlib.Compress(
-          reinterpret_cast<Bytef*>(string_as_array(compressed)),
-          &destlen,
-          reinterpret_cast<const Bytef*>(input),
-          input_size);
-      CHECK_EQ(Z_OK, ret);
-      if (!compressed_is_preallocated) {
-        compressed->resize(destlen);
-      }
-      return true;
-    }
-#endif  // ZLIB_VERSION
-
-#ifdef LZO_VERSION
-    case LZO: {
-      unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
-      lzo_uint destlen;
-      int ret = lzo1x_1_15_compress(
-          reinterpret_cast<const uint8*>(input),
-          input_size,
-          reinterpret_cast<uint8*>(string_as_array(compressed)),
-          &destlen,
-          mem);
-      CHECK_EQ(LZO_E_OK, ret);
-      delete[] mem;
-      if (!compressed_is_preallocated) {
-        compressed->resize(destlen);
-      }
-      break;
-    }
-#endif  // LZO_VERSION
-
-    case SNAPPY: {
-      size_t destlen;
-      snappy::RawCompress(input, input_size,
-                          string_as_array(compressed),
-                          &destlen);
-      CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
-      if (!compressed_is_preallocated) {
-        compressed->resize(destlen);
-      }
-      break;
-    }
-
-    default: {
-      return false;     // the asked-for library wasn't compiled in
-    }
-  }
-  return true;
-}
-
-static bool Uncompress(const string& compressed, CompressorType comp,
-                       int size, string* output) {
-  switch (comp) {
-#ifdef ZLIB_VERSION
-    case ZLIB: {
-      output->resize(size);
-      ZLib zlib;
-      uLongf destlen = output->size();
-      int ret = zlib.Uncompress(
-          reinterpret_cast<Bytef*>(string_as_array(output)),
-          &destlen,
-          reinterpret_cast<const Bytef*>(compressed.data()),
-          compressed.size());
-      CHECK_EQ(Z_OK, ret);
-      CHECK_EQ(static_cast<uLongf>(size), destlen);
-      break;
-    }
-#endif  // ZLIB_VERSION
-
-#ifdef LZO_VERSION
-    case LZO: {
-      output->resize(size);
-      lzo_uint destlen;
-      int ret = lzo1x_decompress(
-          reinterpret_cast<const uint8*>(compressed.data()),
-          compressed.size(),
-          reinterpret_cast<uint8*>(string_as_array(output)),
-          &destlen,
-          NULL);
-      CHECK_EQ(LZO_E_OK, ret);
-      CHECK_EQ(static_cast<lzo_uint>(size), destlen);
-      break;
-    }
-#endif  // LZO_VERSION
-
-    case SNAPPY: {
-      snappy::RawUncompress(compressed.data(), compressed.size(),
-                            string_as_array(output));
-      break;
-    }
-
-    default: {
-      return false;     // the asked-for library wasn't compiled in
-    }
-  }
-  return true;
-}
-
-static void Measure(const char* data,
-                    size_t length,
-                    CompressorType comp,
-                    int repeats,
-                    int block_size) {
-  // Run tests a few time and pick median running times
-  static const int kRuns = 5;
-  double ctime[kRuns];
-  double utime[kRuns];
-  int compressed_size = 0;
-
-  {
-    // Chop the input into blocks
-    int num_blocks = (length + block_size - 1) / block_size;
-    std::vector<const char*> input(num_blocks);
-    std::vector<size_t> input_length(num_blocks);
-    std::vector<string> compressed(num_blocks);
-    std::vector<string> output(num_blocks);
-    for (int b = 0; b < num_blocks; b++) {
-      int input_start = b * block_size;
-      int input_limit = std::min<int>((b+1)*block_size, length);
-      input[b] = data+input_start;
-      input_length[b] = input_limit-input_start;
-
-      // Pre-grow the output buffer so we don't measure string append time.
-      compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
-    }
-
-    // First, try one trial compression to make sure the code is compiled in
-    if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
-      LOG(WARNING) << "Skipping " << names[comp] << ": "
-                   << "library not compiled in";
-      return;
-    }
-
-    for (int run = 0; run < kRuns; run++) {
-      CycleTimer ctimer, utimer;
-
-      for (int b = 0; b < num_blocks; b++) {
-        // Pre-grow the output buffer so we don't measure string append time.
-        compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp));
-      }
-
-      ctimer.Start();
-      for (int b = 0; b < num_blocks; b++)
-        for (int i = 0; i < repeats; i++)
-          Compress(input[b], input_length[b], comp, &compressed[b], true);
-      ctimer.Stop();
-
-      // Compress once more, with resizing, so we don't leave junk
-      // at the end that will confuse the decompressor.
-      for (int b = 0; b < num_blocks; b++) {
-        Compress(input[b], input_length[b], comp, &compressed[b], false);
-      }
-
-      for (int b = 0; b < num_blocks; b++) {
-        output[b].resize(input_length[b]);
-      }
-
-      utimer.Start();
-      for (int i = 0; i < repeats; i++)
-        for (int b = 0; b < num_blocks; b++)
-          Uncompress(compressed[b], comp, input_length[b], &output[b]);
-      utimer.Stop();
-
-      ctime[run] = ctimer.Get();
-      utime[run] = utimer.Get();
-    }
-
-    compressed_size = 0;
-    for (size_t i = 0; i < compressed.size(); i++) {
-      compressed_size += compressed[i].size();
-    }
-  }
-
-  std::sort(ctime, ctime + kRuns);
-  std::sort(utime, utime + kRuns);
-  const int med = kRuns/2;
-
-  float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
-  float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
-  string x = names[comp];
-  x += ":";
-  string urate = (uncomp_rate >= 0)
-                 ? StringPrintf("%.1f", uncomp_rate)
-                 : string("?");
-  printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%%  "
-         "comp %5.1f MB/s  uncomp %5s MB/s\n",
-         x.c_str(),
-         block_size/(1<<20),
-         static_cast<int>(length), static_cast<uint32>(compressed_size),
-         (compressed_size * 100.0) / std::max<int>(1, length),
-         comp_rate,
-         urate.c_str());
-}
-
-static int VerifyString(const string& input) {
-  string compressed;
+int VerifyString(const std::string& input) {
+  std::string compressed;
   DataEndingAtUnreadablePage i(input);
   const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
   CHECK_EQ(written, compressed.size());
@@ -367,15 +113,15 @@ static int VerifyString(const string& input) {
            snappy::MaxCompressedLength(input.size()));
   CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
 
-  string uncompressed;
+  std::string uncompressed;
   DataEndingAtUnreadablePage c(compressed);
   CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
   CHECK_EQ(uncompressed, input);
   return uncompressed.size();
 }
 
-static void VerifyStringSink(const string& input) {
-  string compressed;
+void VerifyStringSink(const std::string& input) {
+  std::string compressed;
   DataEndingAtUnreadablePage i(input);
   const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
   CHECK_EQ(written, compressed.size());
@@ -383,7 +129,7 @@ static void VerifyStringSink(const string& input) {
            snappy::MaxCompressedLength(input.size()));
   CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
 
-  string uncompressed;
+  std::string uncompressed;
   uncompressed.resize(input.size());
   snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
   DataEndingAtUnreadablePage c(compressed);
@@ -392,41 +138,67 @@ static void VerifyStringSink(const string& input) {
   CHECK_EQ(uncompressed, input);
 }
 
-static void VerifyIOVec(const string& input) {
-  string compressed;
-  DataEndingAtUnreadablePage i(input);
-  const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
-  CHECK_EQ(written, compressed.size());
-  CHECK_LE(compressed.size(),
-           snappy::MaxCompressedLength(input.size()));
-  CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
-
-  // Try uncompressing into an iovec containing a random number of entries
-  // ranging from 1 to 10.
-  char* buf = new char[input.size()];
-  ACMRandom rnd(input.size());
-  size_t num = rnd.Next() % 10 + 1;
+struct iovec* GetIOVec(const std::string& input, char*& buf, size_t& num) {
+  std::minstd_rand0 rng(input.size());
+  std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
+  num = uniform_1_to_10(rng);
   if (input.size() < num) {
     num = input.size();
   }
   struct iovec* iov = new iovec[num];
-  int used_so_far = 0;
+  size_t used_so_far = 0;
+  std::bernoulli_distribution one_in_five(1.0 / 5);
   for (size_t i = 0; i < num; ++i) {
+    assert(used_so_far < input.size());
     iov[i].iov_base = buf + used_so_far;
     if (i == num - 1) {
       iov[i].iov_len = input.size() - used_so_far;
     } else {
       // Randomly choose to insert a 0 byte entry.
-      if (rnd.OneIn(5)) {
+      if (one_in_five(rng)) {
         iov[i].iov_len = 0;
       } else {
-        iov[i].iov_len = rnd.Uniform(input.size());
+        std::uniform_int_distribution<size_t> uniform_not_used_so_far(
+            0, input.size() - used_so_far - 1);
+        iov[i].iov_len = uniform_not_used_so_far(rng);
       }
     }
     used_so_far += iov[i].iov_len;
   }
-  CHECK(snappy::RawUncompressToIOVec(
-      compressed.data(), compressed.size(), iov, num));
+  return iov;
+}
+
+int VerifyIOVecSource(const std::string& input) {
+  std::string compressed;
+  std::string copy = input;
+  char* buf = const_cast<char*>(copy.data());
+  size_t num = 0;
+  struct iovec* iov = GetIOVec(input, buf, num);
+  const size_t written = snappy::CompressFromIOVec(iov, num, &compressed);
+  CHECK_EQ(written, compressed.size());
+  CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
+  CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
+
+  std::string uncompressed;
+  DataEndingAtUnreadablePage c(compressed);
+  CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
+  CHECK_EQ(uncompressed, input);
+  delete[] iov;
+  return uncompressed.size();
+}
+
+void VerifyIOVecSink(const std::string& input) {
+  std::string compressed;
+  DataEndingAtUnreadablePage i(input);
+  const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
+  CHECK_EQ(written, compressed.size());
+  CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
+  CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
+  char* buf = new char[input.size()];
+  size_t num = 0;
+  struct iovec* iov = GetIOVec(input, buf, num);
+  CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), iov,
+                                     num));
   CHECK(!memcmp(buf, input.data(), input.size()));
   delete[] iov;
   delete[] buf;
@@ -434,22 +206,22 @@ static void VerifyIOVec(const string& input) {
 
 // Test that data compressed by a compressor that does not
 // obey block sizes is uncompressed properly.
-static void VerifyNonBlockedCompression(const string& input) {
+void VerifyNonBlockedCompression(const std::string& input) {
   if (input.length() > snappy::kBlockSize) {
     // We cannot test larger blocks than the maximum block size, obviously.
     return;
   }
 
-  string prefix;
+  std::string prefix;
   Varint::Append32(&prefix, input.size());
 
   // Setup compression table
-  snappy::internal::WorkingMemory wmem;
+  snappy::internal::WorkingMemory wmem(input.size());
   int table_size;
-  uint16* table = wmem.GetHashTable(input.size(), &table_size);
+  uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
 
   // Compress entire input in one shot
-  string compressed;
+  std::string compressed;
   compressed += prefix;
   compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
   char* dest = string_as_array(&compressed) + prefix.size();
@@ -457,13 +229,13 @@ static void VerifyNonBlockedCompression(const string& input) {
                                                 dest, table, table_size);
   compressed.resize(end - compressed.data());
 
-  // Uncompress into string
-  string uncomp_str;
+  // Uncompress into std::string
+  std::string uncomp_str;
   CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
   CHECK_EQ(uncomp_str, input);
 
   // Uncompress using source/sink
-  string uncomp_str2;
+  std::string uncomp_str2;
   uncomp_str2.resize(input.size());
   snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
   snappy::ByteArraySource source(compressed.data(), compressed.size());
@@ -475,62 +247,64 @@ static void VerifyNonBlockedCompression(const string& input) {
     static const int kNumBlocks = 10;
     struct iovec vec[kNumBlocks];
     const int block_size = 1 + input.size() / kNumBlocks;
-    string iovec_data(block_size * kNumBlocks, 'x');
-    for (int i = 0; i < kNumBlocks; i++) {
+    std::string iovec_data(block_size * kNumBlocks, 'x');
+    for (int i = 0; i < kNumBlocks; ++i) {
       vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
       vec[i].iov_len = block_size;
     }
     CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
                                        vec, kNumBlocks));
-    CHECK_EQ(string(iovec_data.data(), input.size()), input);
+    CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
   }
 }
 
 // Expand the input so that it is at least K times as big as block size
-static string Expand(const string& input) {
+std::string Expand(const std::string& input) {
   static const int K = 3;
-  string data = input;
+  std::string data = input;
   while (data.size() < K * snappy::kBlockSize) {
     data += input;
   }
   return data;
 }
 
-static int Verify(const string& input) {
+int Verify(const std::string& input) {
   VLOG(1) << "Verifying input of size " << input.size();
 
   // Compress using string based routines
   const int result = VerifyString(input);
 
+  // Compress using `iovec`-based routines.
+  CHECK_EQ(VerifyIOVecSource(input), result);
+
   // Verify using sink based routines
   VerifyStringSink(input);
 
   VerifyNonBlockedCompression(input);
-  VerifyIOVec(input);
+  VerifyIOVecSink(input);
   if (!input.empty()) {
-    const string expanded = Expand(input);
+    const std::string expanded = Expand(input);
     VerifyNonBlockedCompression(expanded);
-    VerifyIOVec(input);
+    VerifyIOVecSink(input);
   }
 
   return result;
 }
 
-
-static bool IsValidCompressedBuffer(const string& c) {
+bool IsValidCompressedBuffer(const std::string& c) {
   return snappy::IsValidCompressedBuffer(c.data(), c.size());
 }
-static bool Uncompress(const string& c, string* u) {
+bool Uncompress(const std::string& c, std::string* u) {
   return snappy::Uncompress(c.data(), c.size(), u);
 }
 
 // This test checks to ensure that snappy doesn't coredump if it gets
 // corrupted data.
 TEST(CorruptedTest, VerifyCorrupted) {
-  string source = "making sure we don't crash with corrupted input";
+  std::string source = "making sure we don't crash with corrupted input";
   VLOG(1) << source;
-  string dest;
-  string uncmp;
+  std::string dest;
+  std::string uncmp;
   snappy::Compress(source.data(), source.size(), &dest);
 
   // Mess around with the data. It's hard to simulate all possible
@@ -545,8 +319,8 @@ TEST(CorruptedTest, VerifyCorrupted) {
   // This is testing for a security bug - a buffer that decompresses to 100k
   // but we lie in the snappy header and only reserve 0 bytes of memory :)
   source.resize(100000);
-  for (size_t i = 0; i < source.length(); ++i) {
-    source[i] = 'A';
+  for (char& source_char : source) {
+    source_char = 'A';
   }
   snappy::Compress(source.data(), source.size(), &dest);
   dest[0] = dest[1] = dest[2] = dest[3] = 0;
@@ -577,14 +351,14 @@ TEST(CorruptedTest, VerifyCorrupted) {
 
   // try reading stuff in from a bad file.
   for (int i = 1; i <= 3; ++i) {
-    string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str(),
-                                   0);
-    string uncmp;
+    std::string data =
+        ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
+    std::string uncmp;
     // check that we don't return a crazy length
     size_t ulen;
     CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
           || (ulen < (1<<20)));
-    uint32 ulen2;
+    uint32_t ulen2;
     snappy::ByteArraySource source(data.data(), data.size());
     CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
           (ulen2 < (1<<20)));
@@ -597,7 +371,7 @@ TEST(CorruptedTest, VerifyCorrupted) {
 // These mirror the compression code in snappy.cc, but are copied
 // here so that we can bypass some limitations in the how snappy.cc
 // invokes these routines.
-static void AppendLiteral(string* dst, const string& literal) {
+void AppendLiteral(std::string* dst, const std::string& literal) {
   if (literal.empty()) return;
   int n = literal.size() - 1;
   if (n < 60) {
@@ -612,12 +386,12 @@ static void AppendLiteral(string* dst, const string& literal) {
       n >>= 8;
     }
     dst->push_back(0 | ((59+count) << 2));
-    *dst += string(number, count);
+    *dst += std::string(number, count);
   }
   *dst += literal;
 }
 
-static void AppendCopy(string* dst, int offset, int length) {
+void AppendCopy(std::string* dst, int offset, int length) {
   while (length > 0) {
     // Figure out how much to copy in one shot
     int to_copy;
@@ -654,51 +428,114 @@ TEST(Snappy, SimpleTests) {
   Verify("ab");
   Verify("abc");
 
-  Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc");
-  Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc");
-  Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc");
-  Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
-  Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc");
+  Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
+  Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
+  Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
+  Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
+  Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
+}
+
+// Regression test for cr/345340892.
+TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
+  Verify("abcabcabcabcabcabcab");
+  Verify("abcabcabcabcabcabcab0123456789ABCDEF");
+
+  Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
+  Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
+}
+
+// Regression test for cr/345340892.
+TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
+  std::mt19937 rng;
+  std::uniform_int_distribution<int> uniform_byte(0, 255);
+  for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) {
+    for (int length = 1; length <= 64; ++length) {
+      for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) {
+        const int size = pattern_size + length + extra_bytes_after_pattern;
+        std::string input;
+        input.resize(size);
+        for (int i = 0; i < pattern_size; ++i) {
+          input[i] = 'a' + i;
+        }
+        for (int i = 0; i < length; ++i) {
+          input[pattern_size + i] = input[i];
+        }
+        for (int i = 0; i < extra_bytes_after_pattern; ++i) {
+          input[pattern_size + length + i] =
+              static_cast<char>(uniform_byte(rng));
+        }
+        Verify(input);
+      }
+    }
+  }
 }
 
 // Verify max blowup (lots of four-byte copies)
 TEST(Snappy, MaxBlowup) {
-  string input;
-  for (int i = 0; i < 20000; i++) {
-    ACMRandom rnd(i);
-    uint32 bytes = static_cast<uint32>(rnd.Next());
-    input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
-  }
-  for (int i = 19999; i >= 0; i--) {
-    ACMRandom rnd(i);
-    uint32 bytes = static_cast<uint32>(rnd.Next());
-    input.append(reinterpret_cast<char*>(&bytes), sizeof(bytes));
+  std::mt19937 rng;
+  std::uniform_int_distribution<int> uniform_byte(0, 255);
+  std::string input;
+  for (int i = 0; i < 80000; ++i)
+    input.push_back(static_cast<char>(uniform_byte(rng)));
+
+  for (int i = 0; i < 80000; i += 4) {
+    std::string four_bytes(input.end() - i - 4, input.end() - i);
+    input.append(four_bytes);
   }
   Verify(input);
 }
 
-TEST(Snappy, RandomData) {
-  ACMRandom rnd(FLAGS_test_random_seed);
+// Issue #201, when output is more than 4GB, we had a data corruption bug.
+// We cannot run this test always because of CI constraints.
+TEST(Snappy, DISABLED_MoreThan4GB) {
+  std::mt19937 rng;
+  std::uniform_int_distribution<int> uniform_byte(0, 255);
+  std::string input;
+  input.resize((1ull << 32) - 1);
+  for (uint64_t i = 0; i < ((1ull << 32) - 1); ++i)
+    input[i] = static_cast<char>(uniform_byte(rng));
+  Verify(input);
+}
 
-  const int num_ops = 20000;
-  for (int i = 0; i < num_ops; i++) {
+TEST(Snappy, RandomData) {
+  std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
+  std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
+  std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
+  std::uniform_int_distribution<int> uniform_byte(0, 255);
+  std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
+  std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
+  std::bernoulli_distribution one_in_ten(1.0 / 10);
+
+  constexpr int num_ops = 20000;
+  for (int i = 0; i < num_ops; ++i) {
     if ((i % 1000) == 0) {
       VLOG(0) << "Random op " << i << " of " << num_ops;
     }
 
-    string x;
-    size_t len = rnd.Uniform(4096);
+    std::string x;
+    size_t len = uniform_4k(rng);
     if (i < 100) {
-      len = 65536 + rnd.Uniform(65536);
+      len = 65536 + uniform_64k(rng);
     }
     while (x.size() < len) {
       int run_len = 1;
-      if (rnd.OneIn(10)) {
-        run_len = rnd.Skewed(8);
+      if (one_in_ten(rng)) {
+        int skewed_bits = uniform_0_to_8(rng);
+        // int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
+        std::uniform_int_distribution<int> skewed_low(0,
+                                                      (1 << skewed_bits) - 1);
+        run_len = skewed_low(rng);
+      }
+      char c = static_cast<char>(uniform_byte(rng));
+      if (i >= 100) {
+        int skewed_bits = uniform_0_to_3(rng);
+        // int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
+        std::uniform_int_distribution<int> skewed_low(0,
+                                                      (1 << skewed_bits) - 1);
+        c = static_cast<char>(skewed_low(rng));
       }
-      char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3);
       while (run_len-- > 0 && x.size() < len) {
-        x += c;
+        x.push_back(c);
       }
     }
 
@@ -712,20 +549,20 @@ TEST(Snappy, FourByteOffset) {
   // copy manually.
 
   // The two fragments that make up the input string.
-  string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
-  string fragment2 = "some other string";
+  std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
+  std::string fragment2 = "some other string";
 
   // How many times each fragment is emitted.
   const int n1 = 2;
   const int n2 = 100000 / fragment2.size();
-  const int length = n1 * fragment1.size() + n2 * fragment2.size();
+  const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
 
-  string compressed;
+  std::string compressed;
   Varint::Append32(&compressed, length);
 
   AppendLiteral(&compressed, fragment1);
-  string src = fragment1;
-  for (int i = 0; i < n2; i++) {
+  std::string src = fragment1;
+  for (int i = 0; i < n2; ++i) {
     AppendLiteral(&compressed, fragment2);
     src += fragment2;
   }
@@ -733,14 +570,34 @@ TEST(Snappy, FourByteOffset) {
   src += fragment1;
   CHECK_EQ(length, src.size());
 
-  string uncompressed;
+  std::string uncompressed;
   CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
   CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
                            &uncompressed));
   CHECK_EQ(uncompressed, src);
 }
 
-TEST(Snappy, IOVecEdgeCases) {
+TEST(Snappy, IOVecSourceEdgeCases) {
+  // Validate that empty leading, trailing, and in-between iovecs are handled:
+  // [] [] ['a'] [] ['b'] [].
+  std::string data = "ab";
+  char* buf = const_cast<char*>(data.data());
+  size_t used_so_far = 0;
+  static const int kLengths[] = {0, 0, 1, 0, 1, 0};
+  struct iovec iov[ARRAYSIZE(kLengths)];
+  for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
+    iov[i].iov_base = buf + used_so_far;
+    iov[i].iov_len = kLengths[i];
+    used_so_far += kLengths[i];
+  }
+  std::string compressed;
+  snappy::CompressFromIOVec(iov, ARRAYSIZE(kLengths), &compressed);
+  std::string uncompressed;
+  snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed);
+  CHECK_EQ(data, uncompressed);
+}
+
+TEST(Snappy, IOVecSinkEdgeCases) {
   // Test some tricky edge cases in the iovec output that are not necessarily
   // exercised by random tests.
 
@@ -755,7 +612,7 @@ TEST(Snappy, IOVecEdgeCases) {
     iov[i].iov_len = kLengths[i];
   }
 
-  string compressed;
+  std::string compressed;
   Varint::Append32(&compressed, 22);
 
   // A literal whose output crosses three blocks.
@@ -816,7 +673,7 @@ TEST(Snappy, IOVecLiteralOverflow) {
     iov[i].iov_len = kLengths[i];
   }
 
-  string compressed;
+  std::string compressed;
   Varint::Append32(&compressed, 8);
 
   AppendLiteral(&compressed, "12345678");
@@ -838,7 +695,7 @@ TEST(Snappy, IOVecCopyOverflow) {
     iov[i].iov_len = kLengths[i];
   }
 
-  string compressed;
+  std::string compressed;
   Varint::Append32(&compressed, 8);
 
   AppendLiteral(&compressed, "123");
@@ -852,21 +709,20 @@ TEST(Snappy, IOVecCopyOverflow) {
   }
 }
 
-static bool CheckUncompressedLength(const string& compressed,
-                                    size_t* ulength) {
+bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
   const bool result1 = snappy::GetUncompressedLength(compressed.data(),
                                                      compressed.size(),
                                                      ulength);
 
   snappy::ByteArraySource source(compressed.data(), compressed.size());
-  uint32 length;
+  uint32_t length;
   const bool result2 = snappy::GetUncompressedLength(&source, &length);
   CHECK_EQ(result1, result2);
   return result1;
 }
 
 TEST(SnappyCorruption, TruncatedVarint) {
-  string compressed, uncompressed;
+  std::string compressed, uncompressed;
   size_t ulength;
   compressed.push_back('\xf0');
   CHECK(!CheckUncompressedLength(compressed, &ulength));
@@ -876,7 +732,7 @@ TEST(SnappyCorruption, TruncatedVarint) {
 }
 
 TEST(SnappyCorruption, UnterminatedVarint) {
-  string compressed, uncompressed;
+  std::string compressed, uncompressed;
   size_t ulength;
   compressed.push_back('\x80');
   compressed.push_back('\x80');
@@ -891,7 +747,7 @@ TEST(SnappyCorruption, UnterminatedVarint) {
 }
 
 TEST(SnappyCorruption, OverflowingVarint) {
-  string compressed, uncompressed;
+  std::string compressed, uncompressed;
   size_t ulength;
   compressed.push_back('\xfb');
   compressed.push_back('\xff');
@@ -908,14 +764,14 @@ TEST(Snappy, ReadPastEndOfBuffer) {
   // Check that we do not read past end of input
 
   // Make a compressed string that ends with a single-byte literal
-  string compressed;
+  std::string compressed;
   Varint::Append32(&compressed, 1);
   AppendLiteral(&compressed, "x");
 
-  string uncompressed;
+  std::string uncompressed;
   DataEndingAtUnreadablePage c(compressed);
   CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
-  CHECK_EQ(uncompressed, string("x"));
+  CHECK_EQ(uncompressed, std::string("x"));
 }
 
 // Check for an infinite loop caused by a copy with offset==0
@@ -934,17 +790,14 @@ TEST(Snappy, ZeroOffsetCopyValidation) {
   EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
 }
 
-namespace {
-
 int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
+  uint64_t data;
   std::pair<size_t, bool> p =
-      snappy::internal::FindMatchLength(s1, s2, s2 + length);
+      snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
   CHECK_EQ(p.first < 8, p.second);
   return p.first;
 }
 
-}  // namespace
-
 TEST(Snappy, FindMatchLength) {
   // Exercise all different code paths through the function.
   // 64-bit version:
@@ -1036,35 +889,37 @@ TEST(Snappy, FindMatchLength) {
 }
 
 TEST(Snappy, FindMatchLengthRandom) {
-  const int kNumTrials = 10000;
-  const int kTypicalLength = 10;
-  ACMRandom rnd(FLAGS_test_random_seed);
-
-  for (int i = 0; i < kNumTrials; i++) {
-    string s, t;
-    char a = rnd.Rand8();
-    char b = rnd.Rand8();
-    while (!rnd.OneIn(kTypicalLength)) {
-      s.push_back(rnd.OneIn(2) ? a : b);
-      t.push_back(rnd.OneIn(2) ? a : b);
+  constexpr int kNumTrials = 10000;
+  constexpr int kTypicalLength = 10;
+  std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
+  std::uniform_int_distribution<int> uniform_byte(0, 255);
+  std::bernoulli_distribution one_in_two(1.0 / 2);
+  std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
+
+  for (int i = 0; i < kNumTrials; ++i) {
+    std::string s, t;
+    char a = static_cast<char>(uniform_byte(rng));
+    char b = static_cast<char>(uniform_byte(rng));
+    while (!one_in_typical_length(rng)) {
+      s.push_back(one_in_two(rng) ? a : b);
+      t.push_back(one_in_two(rng) ? a : b);
     }
     DataEndingAtUnreadablePage u(s);
     DataEndingAtUnreadablePage v(t);
-    int matched = TestFindMatchLength(u.data(), v.data(), t.size());
+    size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
     if (matched == t.size()) {
       EXPECT_EQ(s, t);
     } else {
       EXPECT_NE(s[matched], t[matched]);
-      for (int j = 0; j < matched; j++) {
+      for (size_t j = 0; j < matched; ++j) {
         EXPECT_EQ(s[j], t[j]);
       }
     }
   }
 }
 
-static uint16 MakeEntry(unsigned int extra,
-                        unsigned int len,
-                        unsigned int copy_offset) {
+uint16_t MakeEntry(unsigned int extra, unsigned int len,
+                   unsigned int copy_offset) {
   // Check that all of the fields fit within the allocated space
   assert(extra       == (extra & 0x7));          // At most 3 bits
   assert(copy_offset == (copy_offset & 0x7));    // At most 3 bits
@@ -1081,329 +936,88 @@ TEST(Snappy, VerifyCharTable) {
   using snappy::internal::COPY_4_BYTE_OFFSET;
   using snappy::internal::char_table;
 
-  uint16 dst[256];
+  uint16_t dst[256];
 
   // Place invalid entries in all places to detect missing initialization
   int assigned = 0;
-  for (int i = 0; i < 256; i++) {
+  for (int i = 0; i < 256; ++i) {
     dst[i] = 0xffff;
   }
 
   // Small LITERAL entries.  We store (len-1) in the top 6 bits.
-  for (unsigned int len = 1; len <= 60; len++) {
-    dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
+  for (uint8_t len = 1; len <= 60; ++len) {
+    dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0);
     assigned++;
   }
 
   // Large LITERAL entries.  We use 60..63 in the high 6 bits to
   // encode the number of bytes of length info that follow the opcode.
-  for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
+  for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
     // We set the length field in the lookup table to 1 because extra
     // bytes encode len-1.
-    dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
+    dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
     assigned++;
   }
 
   // COPY_1_BYTE_OFFSET.
   //
   // The tag byte in the compressed data stores len-4 in 3 bits, and
-  // offset/256 in 5 bits.  offset%256 is stored in the next byte.
+  // offset/256 in 3 bits.  offset%256 is stored in the next byte.
   //
   // This format is used for length in range [4..11] and offset in
   // range [0..2047]
-  for (unsigned int len = 4; len < 12; len++) {
-    for (unsigned int offset = 0; offset < 2048; offset += 256) {
-      dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
-        MakeEntry(1, len, offset>>8);
+  for (uint8_t len = 4; len < 12; ++len) {
+    for (uint16_t offset = 0; offset < 2048; offset += 256) {
+      uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
+      dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] =
+          MakeEntry(1, len, offset_high);
       assigned++;
     }
   }
 
   // COPY_2_BYTE_OFFSET.
   // Tag contains len-1 in top 6 bits, and offset in next two bytes.
-  for (unsigned int len = 1; len <= 64; len++) {
-    dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
+  for (uint8_t len = 1; len <= 64; ++len) {
+    dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0);
     assigned++;
   }
 
   // COPY_4_BYTE_OFFSET.
   // Tag contents len-1 in top 6 bits, and offset in next four bytes.
-  for (unsigned int len = 1; len <= 64; len++) {
-    dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
+  for (uint8_t len = 1; len <= 64; ++len) {
+    dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0);
     assigned++;
   }
 
   // Check that each entry was initialized exactly once.
   EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
-  for (int i = 0; i < 256; i++) {
+  for (int i = 0; i < 256; ++i) {
     EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
   }
 
-  if (FLAGS_snappy_dump_decompression_table) {
-    printf("static const uint16 char_table[256] = {\n  ");
-    for (int i = 0; i < 256; i++) {
-      printf("0x%04x%s",
-             dst[i],
-             ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n  " : ", ")));
+  if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
+    std::printf("static const uint16_t char_table[256] = {\n  ");
+    for (int i = 0; i < 256; ++i) {
+      std::printf("0x%04x%s",
+                  dst[i],
+                  ((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n  " : ", ")));
     }
-    printf("};\n");
+    std::printf("};\n");
   }
 
   // Check that computed table matched recorded table.
-  for (int i = 0; i < 256; i++) {
+  for (int i = 0; i < 256; ++i) {
     EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
   }
 }
 
-static void CompressFile(const char* fname) {
-  string fullinput;
-  CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
-
-  string compressed;
-  Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
-
-  CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed,
-                             file::Defaults()));
-}
-
-static void UncompressFile(const char* fname) {
-  string fullinput;
-  CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
-
-  size_t uncompLength;
-  CHECK(CheckUncompressedLength(fullinput, &uncompLength));
-
-  string uncompressed;
-  uncompressed.resize(uncompLength);
-  CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
-
-  CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed,
-                             file::Defaults()));
-}
-
-static void MeasureFile(const char* fname) {
-  string fullinput;
-  CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
-  printf("%-40s :\n", fname);
-
-  int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len;
-  int end_len = fullinput.size();
-  if (FLAGS_end_len >= 0) {
-    end_len = std::min<int>(fullinput.size(), FLAGS_end_len);
-  }
-  for (int len = start_len; len <= end_len; len++) {
-    const char* const input = fullinput.data();
-    int repeats = (FLAGS_bytes + len) / (len + 1);
-    if (FLAGS_zlib)     Measure(input, len, ZLIB, repeats, 1024<<10);
-    if (FLAGS_lzo)      Measure(input, len, LZO, repeats, 1024<<10);
-    if (FLAGS_snappy)    Measure(input, len, SNAPPY, repeats, 4096<<10);
-
-    // For block-size based measurements
-    if (0 && FLAGS_snappy) {
-      Measure(input, len, SNAPPY, repeats, 8<<10);
-      Measure(input, len, SNAPPY, repeats, 16<<10);
-      Measure(input, len, SNAPPY, repeats, 32<<10);
-      Measure(input, len, SNAPPY, repeats, 64<<10);
-      Measure(input, len, SNAPPY, repeats, 256<<10);
-      Measure(input, len, SNAPPY, repeats, 1024<<10);
-    }
-  }
-}
-
-static struct {
-  const char* label;
-  const char* filename;
-  size_t size_limit;
-} files[] = {
-  { "html", "html", 0 },
-  { "urls", "urls.10K", 0 },
-  { "jpg", "fireworks.jpeg", 0 },
-  { "jpg_200", "fireworks.jpeg", 200 },
-  { "pdf", "paper-100k.pdf", 0 },
-  { "html4", "html_x_4", 0 },
-  { "txt1", "alice29.txt", 0 },
-  { "txt2", "asyoulik.txt", 0 },
-  { "txt3", "lcet10.txt", 0 },
-  { "txt4", "plrabn12.txt", 0 },
-  { "pb", "geo.protodata", 0 },
-  { "gaviota", "kppkn.gtb", 0 },
-};
-
-static void BM_UFlat(int iters, int arg) {
-  StopBenchmarkTiming();
-
-  // Pick file to process based on "arg"
-  CHECK_GE(arg, 0);
-  CHECK_LT(arg, ARRAYSIZE(files));
-  string contents = ReadTestDataFile(files[arg].filename,
-                                     files[arg].size_limit);
-
-  string zcontents;
-  snappy::Compress(contents.data(), contents.size(), &zcontents);
-  char* dst = new char[contents.size()];
-
-  SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
-                             static_cast<int64>(contents.size()));
-  SetBenchmarkLabel(files[arg].label);
-  StartBenchmarkTiming();
-  while (iters-- > 0) {
-    CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
-  }
-  StopBenchmarkTiming();
-
-  delete[] dst;
-}
-BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
-
-static void BM_UValidate(int iters, int arg) {
-  StopBenchmarkTiming();
-
-  // Pick file to process based on "arg"
-  CHECK_GE(arg, 0);
-  CHECK_LT(arg, ARRAYSIZE(files));
-  string contents = ReadTestDataFile(files[arg].filename,
-                                     files[arg].size_limit);
-
-  string zcontents;
-  snappy::Compress(contents.data(), contents.size(), &zcontents);
-
-  SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
-                             static_cast<int64>(contents.size()));
-  SetBenchmarkLabel(files[arg].label);
-  StartBenchmarkTiming();
-  while (iters-- > 0) {
-    CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
-  }
-  StopBenchmarkTiming();
-}
-BENCHMARK(BM_UValidate)->DenseRange(0, 4);
-
-static void BM_UIOVec(int iters, int arg) {
-  StopBenchmarkTiming();
-
-  // Pick file to process based on "arg"
-  CHECK_GE(arg, 0);
-  CHECK_LT(arg, ARRAYSIZE(files));
-  string contents = ReadTestDataFile(files[arg].filename,
-                                     files[arg].size_limit);
-
-  string zcontents;
-  snappy::Compress(contents.data(), contents.size(), &zcontents);
-
-  // Uncompress into an iovec containing ten entries.
-  const int kNumEntries = 10;
-  struct iovec iov[kNumEntries];
-  char *dst = new char[contents.size()];
-  int used_so_far = 0;
-  for (int i = 0; i < kNumEntries; ++i) {
-    iov[i].iov_base = dst + used_so_far;
-    if (used_so_far == contents.size()) {
-      iov[i].iov_len = 0;
-      continue;
-    }
-
-    if (i == kNumEntries - 1) {
-      iov[i].iov_len = contents.size() - used_so_far;
-    } else {
-      iov[i].iov_len = contents.size() / kNumEntries;
-    }
-    used_so_far += iov[i].iov_len;
-  }
-
-  SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
-                             static_cast<int64>(contents.size()));
-  SetBenchmarkLabel(files[arg].label);
-  StartBenchmarkTiming();
-  while (iters-- > 0) {
-    CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
-                                       kNumEntries));
-  }
-  StopBenchmarkTiming();
-
-  delete[] dst;
-}
-BENCHMARK(BM_UIOVec)->DenseRange(0, 4);
-
-static void BM_UFlatSink(int iters, int arg) {
-  StopBenchmarkTiming();
-
-  // Pick file to process based on "arg"
-  CHECK_GE(arg, 0);
-  CHECK_LT(arg, ARRAYSIZE(files));
-  string contents = ReadTestDataFile(files[arg].filename,
-                                     files[arg].size_limit);
-
-  string zcontents;
-  snappy::Compress(contents.data(), contents.size(), &zcontents);
-  char* dst = new char[contents.size()];
-
-  SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
-                             static_cast<int64>(contents.size()));
-  SetBenchmarkLabel(files[arg].label);
-  StartBenchmarkTiming();
-  while (iters-- > 0) {
-    snappy::ByteArraySource source(zcontents.data(), zcontents.size());
-    snappy::UncheckedByteArraySink sink(dst);
-    CHECK(snappy::Uncompress(&source, &sink));
+TEST(Snappy, TestBenchmarkFiles) {
+  for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
+    Verify(ReadTestDataFile(kTestDataFiles[i].filename,
+                            kTestDataFiles[i].size_limit));
   }
-  StopBenchmarkTiming();
-
-  string s(dst, contents.size());
-  CHECK_EQ(contents, s);
-
-  delete[] dst;
 }
 
-BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1);
-
-static void BM_ZFlat(int iters, int arg) {
-  StopBenchmarkTiming();
-
-  // Pick file to process based on "arg"
-  CHECK_GE(arg, 0);
-  CHECK_LT(arg, ARRAYSIZE(files));
-  string contents = ReadTestDataFile(files[arg].filename,
-                                     files[arg].size_limit);
-
-  char* dst = new char[snappy::MaxCompressedLength(contents.size())];
-
-  SetBenchmarkBytesProcessed(static_cast<int64>(iters) *
-                             static_cast<int64>(contents.size()));
-  StartBenchmarkTiming();
-
-  size_t zsize = 0;
-  while (iters-- > 0) {
-    snappy::RawCompress(contents.data(), contents.size(), dst, &zsize);
-  }
-  StopBenchmarkTiming();
-  const double compression_ratio =
-      static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
-  SetBenchmarkLabel(StringPrintf("%s (%.2f %%)",
-                                 files[arg].label, 100.0 * compression_ratio));
-  VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes",
-                          files[arg].label, contents.size(), zsize);
-  delete[] dst;
-}
-BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1);
+}  // namespace
 
 }  // namespace snappy
-
-int main(int argc, char** argv) {
-  InitGoogle(argv[0], &argc, &argv, true);
-  RunSpecifiedBenchmarks();
-
-  if (argc >= 2) {
-    for (int arg = 1; arg < argc; arg++) {
-      if (FLAGS_write_compressed) {
-        snappy::CompressFile(argv[arg]);
-      } else if (FLAGS_write_uncompressed) {
-        snappy::UncompressFile(argv[arg]);
-      } else {
-        snappy::MeasureFile(argv[arg]);
-      }
-    }
-    return 0;
-  }
-
-  return RUN_ALL_TESTS();
-}
diff --git a/third_party/benchmark b/third_party/benchmark
new file mode 160000
index 0000000..b20cea6
--- /dev/null
+++ b/third_party/benchmark
@@ -0,0 +1 @@
+Subproject commit b20cea674170b2ba45da0dfaf03953cdea473d0d
diff --git a/third_party/googletest b/third_party/googletest
new file mode 160000
index 0000000..b796f7d
--- /dev/null
+++ b/third_party/googletest
@@ -0,0 +1 @@
+Subproject commit b796f7d44681514f58a683a3a71ff17c94edb0c1