diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 0000000..673b332 --- /dev/null +++ b/.bazelrc @@ -0,0 +1,4 @@ +# googletest requires C++14 or above +build --cxxopt='-std=c++17' +# Enable Bzlmod for every Bazel command +common --enable_bzlmod diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..fc2ad10 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,135 @@ +# Copyright 2021 Google Inc. All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +name: ci +on: [push, pull_request] + +permissions: + contents: read + +jobs: + build-and-test: + name: >- + CI + ${{ matrix.os }} + ${{ matrix.cpu_level }} + ${{ matrix.compiler }} + ${{ matrix.optimized && 'release' || 'debug' }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + compiler: [clang, gcc, msvc] + os: [ubuntu-latest, macos-latest, windows-latest] + cpu_level: [baseline, avx, avx2] + optimized: [true, false] + exclude: + # MSVC only works on Windows. + - os: ubuntu-latest + compiler: msvc + - os: macos-latest + compiler: msvc + # GitHub servers seem to run on pre-Haswell CPUs. Attempting to use AVX2 + # results in crashes. + - os: macos-latest + cpu_level: avx2 + # Not testing with GCC on macOS. + - os: macos-latest + compiler: gcc + # Only testing with MSVC on Windows. + - os: windows-latest + compiler: clang + - os: windows-latest + compiler: gcc + include: + - compiler: clang + CC: clang + CXX: clang++ + - compiler: gcc + CC: gcc + CXX: g++ + - compiler: msvc + CC: + CXX: + + env: + CMAKE_BUILD_DIR: ${{ github.workspace }}/build + CMAKE_BUILD_TYPE: ${{ matrix.optimized && 'RelWithDebInfo' || 'Debug' }} + CC: ${{ matrix.CC }} + CXX: ${{ matrix.CXX }} + SNAPPY_REQUIRE_AVX: ${{ matrix.cpu_level == 'baseline' && '0' || '1' }} + SNAPPY_REQUIRE_AVX2: ${{ matrix.cpu_level == 'avx2' && '1' || '0' }} + SNAPPY_FUZZING_BUILD: >- + ${{ (startsWith(matrix.os, 'ubuntu') && matrix.compiler == 'clang' && + !matrix.optimized) && '1' || '0' }} + BINARY_SUFFIX: ${{ startsWith(matrix.os, 'windows') && '.exe' || '' }} + BINARY_PATH: >- + ${{ format( + startsWith(matrix.os, 'windows') && '{0}\build\{1}\' || '{0}/build/', + github.workspace, + matrix.optimized && 'RelWithDebInfo' || 'Debug') }} + + steps: + - uses: actions/checkout@v2 + with: + submodules: true + + - name: Generate build config + run: >- + cmake -S "${{ github.workspace }}" -B "${{ env.CMAKE_BUILD_DIR }}" + -DCMAKE_BUILD_TYPE=${{ env.CMAKE_BUILD_TYPE }} + -DCMAKE_INSTALL_PREFIX=${{ runner.temp }}/install_test/ + -DSNAPPY_FUZZING_BUILD=${{ env.SNAPPY_FUZZING_BUILD }} + -DSNAPPY_REQUIRE_AVX=${{ env.SNAPPY_REQUIRE_AVX }} + -DSNAPPY_REQUIRE_AVX2=${{ env.SNAPPY_REQUIRE_AVX2 }} + + - name: Build + run: >- + cmake --build "${{ env.CMAKE_BUILD_DIR }}" + --config "${{ env.CMAKE_BUILD_TYPE }}" + + - name: Run C++ API Tests + run: ${{ env.BINARY_PATH }}snappy_unittest${{ env.BINARY_SUFFIX }} + + - name: Run Compression Fuzzer + if: ${{ env.SNAPPY_FUZZING_BUILD == '1' }} + run: >- + ${{ env.BINARY_PATH }}snappy_compress_fuzzer${{ env.BINARY_SUFFIX }} + -runs=1000 -close_fd_mask=3 + + - name: Run Decompression Fuzzer + if: ${{ env.SNAPPY_FUZZING_BUILD == '1' }} + run: >- + ${{ env.BINARY_PATH }}snappy_uncompress_fuzzer${{ env.BINARY_SUFFIX }} + -runs=1000 -close_fd_mask=3 + + - name: Run Benchmarks + run: ${{ env.BINARY_PATH }}snappy_benchmark${{ env.BINARY_SUFFIX }} + + - name: Test CMake installation + run: cmake --build "${{ env.CMAKE_BUILD_DIR }}" --target install diff --git a/.github/workflows/riscv64-qemu-test.yaml b/.github/workflows/riscv64-qemu-test.yaml new file mode 100644 index 0000000..3b97480 --- /dev/null +++ b/.github/workflows/riscv64-qemu-test.yaml @@ -0,0 +1,43 @@ +name: riscv64-qemu-test + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + env: + RISCV_CROSSCOMPILE: "ON" + riscv_gnu_toolchain_download_path: https://github.com/riscv-collab/riscv-gnu-toolchain/releases/download/2025.07.03/riscv64-glibc-ubuntu-24.04-gcc-nightly-2025.07.03-nightly.tar.xz + RISCV_PATH: /opt/riscv + + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y --no-install-recommends \ + qemu-user qemu-user-static \ + build-essential \ + cmake \ + git + sudo mkdir -p $RISCV_PATH + wget ${riscv_gnu_toolchain_download_path} -O riscv-toolchain.tar.xz + sudo tar -xvf riscv-toolchain.tar.xz -C $RISCV_PATH --strip-components=1 + sudo sed -i "s|libdir='/mnt/riscv/riscv64-unknown-linux-gnu/lib'|libdir='$RISCV_PATH/riscv64-unknown-linux-gnu/lib'|g" $RISCV_PATH/riscv64-unknown-linux-gnu/lib/libatomic.la + + - name: Build and Run Unit Tests + run: | + export PATH=$RISCV_PATH/bin:$PATH + export LD_LIBRARY_PATH="/opt/riscv/lib:$LD_LIBRARY_PATH" + export QEMU_LD_PREFIX=$RISCV_PATH/sysroot + mkdir build && cd build + cmake -DCMAKE_BUILD_TYPE=Release ../ + make -j$(nproc) + make test + + - name: Run Benchmark + run: ./build/snappy_benchmark + working-directory: ./ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0c8cf0e --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# Editors. +*.sw* +.vscode +.DS_Store + +# Build directory. +build/ +/bazel-* +MODULE.bazel.lock +out/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..06c3fd3 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "third_party/benchmark"] + path = third_party/benchmark + url = https://github.com/google/benchmark.git +[submodule "third_party/googletest"] + path = third_party/googletest + url = https://github.com/google/googletest.git diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 88c28fd..0000000 --- a/.travis.yml +++ /dev/null @@ -1,53 +0,0 @@ -# Build matrix / environment variables are explained on: -# http://about.travis-ci.org/docs/user/build-configuration/ -# This file can be validated on: http://lint.travis-ci.org/ - -sudo: false -dist: trusty -language: cpp - -compiler: - - gcc - - clang -os: - - linux - - osx - -env: - - BUILD_TYPE=Debug - - BUILD_TYPE=RelWithDebInfo - -matrix: - allow_failures: - - compiler: clang - env: BUILD_TYPE=RelWithDebInfo - -addons: - apt: - # List of whitelisted in travis packages for ubuntu-precise can be found here: - # https://github.com/travis-ci/apt-package-whitelist/blob/master/ubuntu-precise - # List of whitelisted in travis apt-sources: - # https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json - sources: - - ubuntu-toolchain-r-test - - llvm-toolchain-trusty-4.0 - packages: - - cmake - - gcc-6 - - g++-6 - - clang-4.0 - -install: -# Travis doesn't have a nice way to install homebrew packages yet. -# https://github.com/travis-ci/travis-ci/issues/5377 -- if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew update; fi -- if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew install gcc@6; fi -# /usr/bin/gcc is stuck to old versions by on both Linux and OSX. -- if [ "$CXX" = "g++" ]; then export CXX="g++-6" CC="gcc-6"; fi -- echo ${CC} -- echo ${CXX} -- ${CXX} --version - -script: -- mkdir -p build && cd build && cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE && - CTEST_OUTPUT_ON_FAILURE=1 make all test \ No newline at end of file diff --git a/BUILD.bazel b/BUILD.bazel new file mode 100644 index 0000000..e6622ff --- /dev/null +++ b/BUILD.bazel @@ -0,0 +1,211 @@ +# Copyright 2023 Google Inc. All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +SNAPPY_VERSION = (1, 2, 2) + +config_setting( + name = "windows", + constraint_values = ["@platforms//os:windows"], +) + +cc_library( + name = "config", + hdrs = ["config.h"], + defines = ["HAVE_CONFIG_H"], +) + +cc_library( + name = "snappy-stubs-public", + hdrs = [":snappy-stubs-public.h"], +) + +cc_library( + name = "snappy-stubs-internal", + srcs = ["snappy-stubs-internal.cc"], + hdrs = ["snappy-stubs-internal.h"], + deps = [ + ":config", + ":snappy-stubs-public", + ], +) + +cc_library( + name = "snappy", + srcs = [ + "snappy.cc", + "snappy-internal.h", + "snappy-sinksource.cc", + ], + hdrs = [ + "snappy.h", + "snappy-sinksource.h", + ], + copts = select({ + ":windows": [], + "//conditions:default": [ + "-Wno-sign-compare", + ], + }), + deps = [ + ":config", + ":snappy-stubs-internal", + ":snappy-stubs-public", + ], +) + +cc_library( + name = "snappy-c", + srcs = ["snappy-c.cc"], + hdrs = ["snappy-c.h"], + deps = [":snappy"], +) + +filegroup( + name = "testdata", + srcs = glob(["testdata/*"]), +) + +cc_library( + name = "snappy-test", + testonly = True, + srcs = [ + "snappy-test.cc", + "snappy_test_data.cc", + ], + hdrs = [ + "snappy-test.h", + "snappy_test_data.h", + ], + deps = [":snappy-stubs-internal"], +) + +cc_test( + name = "snappy_benchmark", + srcs = ["snappy_benchmark.cc"], + data = [":testdata"], + deps = [ + ":snappy", + ":snappy-test", + "@com_google_benchmark//:benchmark_main", + ], +) + +cc_test( + name = "snappy_unittest", + srcs = [ + "snappy_unittest.cc", + ], + data = [":testdata"], + deps = [ + ":snappy", + ":snappy-test", + "@com_google_googletest//:gtest_main", + ], +) + +# Generate a config.h similar to what cmake would produce. +genrule( + name = "config_h", + outs = ["config.h"], + cmd = """cat <$@ +#define HAVE_STDDEF_H 1 +#define HAVE_STDINT_H 1 +#ifdef __has_builtin +# if !defined(HAVE_BUILTIN_EXPECT) && __has_builtin(__builtin_expect) +# define HAVE_BUILTIN_EXPECT 1 +# endif +# if !defined(HAVE_BUILTIN_CTZ) && __has_builtin(__builtin_ctzll) +# define HAVE_BUILTIN_CTZ 1 +# endif +# if !defined(HAVE_BUILTIN_PREFETCH) && __has_builtin(__builtin_prefetech) +# define HAVE_BUILTIN_PREFETCH 1 +# endif +#elif defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# ifndef HAVE_BUILTIN_EXPECT +# define HAVE_BUILTIN_EXPECT 1 +# endif +# ifndef HAVE_BUILTIN_CTZ +# define HAVE_BUILTIN_CTZ 1 +# endif +# ifndef HAVE_BUILTIN_PREFETCH +# define HAVE_BUILTIN_PREFETCH 1 +# endif +#endif + +#if defined(_WIN32) && !defined(HAVE_WINDOWS_H) +#define HAVE_WINDOWS_H 1 +#endif + +#ifdef __has_include +# if !defined(HAVE_BYTESWAP_H) && __has_include() +# define HAVE_BYTESWAP_H 1 +# endif +# if !defined(HAVE_UNISTD_H) && __has_include() +# define HAVE_UNISTD_H 1 +# endif +# if !defined(HAVE_SYS_ENDIAN_H) && __has_include() +# define HAVE_SYS_ENDIAN_H 1 +# endif +# if !defined(HAVE_SYS_MMAN_H) && __has_include() +# define HAVE_SYS_MMAN_H 1 +# endif +# if !defined(HAVE_SYS_UIO_H) && __has_include() +# define HAVE_SYS_UIO_H 1 +# endif +# if !defined(HAVE_SYS_TIME_H) && __has_include() +# define HAVE_SYS_TIME_H 1 +# endif +#endif + +#ifndef SNAPPY_IS_BIG_ENDIAN +# ifdef __s390x__ +# define SNAPPY_IS_BIG_ENDIAN 1 +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define SNAPPY_IS_BIG_ENDIAN 1 +# endif +#endif +EOF +""", +) + +genrule( + name = "snappy_stubs_public_h", + srcs = ["snappy-stubs-public.h.in"], + outs = ["snappy-stubs-public.h"], + # Assume sys/uio.h is available on non-Windows. + # Set the version numbers. + cmd = ("""sed -e 's/$${HAVE_SYS_UIO_H_01}/!_WIN32/g' \ + -e 's/$${PROJECT_VERSION_MAJOR}/%d/g' \ + -e 's/$${PROJECT_VERSION_MINOR}/%d/g' \ + -e 's/$${PROJECT_VERSION_PATCH}/%d/g' \ + $< >$@""" % SNAPPY_VERSION), +) diff --git a/CMakeLists.txt b/CMakeLists.txt index de52666..490f5b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,156 +1,467 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 3.1) -PROJECT(Snappy VERSION 1.1.6 LANGUAGES C CXX) - -SET(CMAKE_INCLUDE_CURRENT_DIR ON) - -INCLUDE(CheckIncludeFiles) -INCLUDE(CheckLibraryExists) -INCLUDE(CheckCXXSourceCompiles) -INCLUDE(TestBigEndian) -INCLUDE(CMakePackageConfigHelpers) - -TEST_BIG_ENDIAN(WORDS_BIG_ENDIAN) -IF (WORDS_BIG_ENDIAN) - MESSAGE(STATUS "Building on big endian system") - ADD_DEFINITIONS(-DWORDS_BIGENDIAN=1) -ENDIF (WORDS_BIG_ENDIAN) - -CHECK_INCLUDE_FILES("byteswap.h" HAVE_BYTESWAP_H) -CHECK_INCLUDE_FILES("dlfcn.h" HAVE_DLFCN_H) -CHECK_INCLUDE_FILES("inttypes.h" HAVE_INTTYPES_H) -CHECK_INCLUDE_FILES("memory.h" HAVE_MEMORY_H) -CHECK_INCLUDE_FILES("stddef.h" HAVE_STDDEF_H) -CHECK_INCLUDE_FILES("stdint.h" HAVE_STDINT_H) -CHECK_INCLUDE_FILES("stdlib.h" HAVE_STDLIB_H) -CHECK_INCLUDE_FILES("strings.h" HAVE_STRINGS_H) -CHECK_INCLUDE_FILES("string.h" HAVE_STRING_H) -CHECK_INCLUDE_FILES("sys/byteswap.h" HAVE_SYS_BYTESWAP_H) -CHECK_INCLUDE_FILES("sys/endian.h" HAVE_SYS_ENDIAN_H) -CHECK_INCLUDE_FILES("sys/mman.h" HAVE_SYS_MMAN_H) -CHECK_INCLUDE_FILES("sys/resource.h" HAVE_SYS_RESOURCE_H) -CHECK_INCLUDE_FILES("sys/stat.h" HAVE_SYS_STAT_H) -CHECK_INCLUDE_FILES("sys/time.h" HAVE_SYS_TIME_H) -CHECK_INCLUDE_FILES("sys/types.h" HAVE_SYS_TYPES_H) -CHECK_INCLUDE_FILES("sys/uio.h" HAVE_SYS_UIO_H) -CHECK_INCLUDE_FILES("unistd.h" HAVE_UNISTD_H) -CHECK_INCLUDE_FILES("windows.h" HAVE_WINDOWS_H) - -IF (NOT HAVE_SYS_UIO_H) - SET(HAVE_SYS_UIO_H 0) -ENDIF (NOT HAVE_SYS_UIO_H) - -IF (NOT HAVE_STDINT_H) - SET(HAVE_STDINT_H 0) -ENDIF (NOT HAVE_STDINT_H) - -IF (NOT HAVE_STDDEF_H) - SET(HAVE_STDDEF_H 0) -ENDIF (NOT HAVE_STDDEF_H) - -CHECK_LIBRARY_EXISTS(z zlibVersion "" HAVE_LIBZ) -CHECK_LIBRARY_EXISTS(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2) - -CHECK_CXX_SOURCE_COMPILES("int main(void) { return __builtin_expect(0, 1); }" - HAVE_BUILTIN_EXPECT) - -CHECK_CXX_SOURCE_COMPILES("int main(void) { return __builtin_ctzll(0); }" - HAVE_BUILTIN_CTZ) - -FIND_PACKAGE(GTest QUIET) -IF(GTEST_FOUND) - SET(HAVE_GTEST 1) -ENDIF() - -FIND_PACKAGE(Gflags QUIET) -IF(GFLAGS_FOUND) - SET(HAVE_GFLAGS 1) -ENDIF() - -CONFIGURE_FILE(${Snappy_SOURCE_DIR}/cmake/config.h.in config.h) - -# Configure snappy-stubs-public.h.in -SET(ac_cv_have_stdint_h ${HAVE_STDINT_H}) -SET(ac_cv_have_stddef_h ${HAVE_STDDEF_H}) -SET(ac_cv_have_sys_uio_h ${HAVE_SYS_UIO_H}) -CONFIGURE_FILE(${Snappy_SOURCE_DIR}/snappy-stubs-public.h.in - snappy-stubs-public.h) - -IF (WIN32) - ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS) -ENDIF (WIN32) - -# Define the main library. -ADD_LIBRARY(snappy SHARED - snappy-c.cc - snappy-c.h - snappy-sinksource.cc - snappy-sinksource.h - snappy-stubs-internal.cc - snappy-stubs-public.h - snappy.cc - snappy.h) - -TARGET_COMPILE_DEFINITIONS(snappy PRIVATE -DHAVE_CONFIG_H) - -SET_TARGET_PROPERTIES(snappy PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) - -INSTALL(FILES snappy.h - snappy-c.h - snappy-sinksource.h - ${Snappy_BINARY_DIR}/snappy-stubs-public.h - DESTINATION include) - -INSTALL(TARGETS snappy - EXPORT SnappyTargets - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib) -INSTALL(EXPORT SnappyTargets NAMESPACE Snappy:: DESTINATION lib/cmake/Snappy) - -SET_TARGET_PROPERTIES(snappy PROPERTIES VERSION ${PROJECT_VERSION} - SOVERSION ${PROJECT_VERSION_MAJOR}) - -SET(INCLUDE_INSTALL_DIR include) -SET(LIBRARY_INSTALL_DIR lib) -SET(BINARY_INSTALL_DIR bin) - -CONFIGURE_PACKAGE_CONFIG_FILE(cmake/SnappyConfig.cmake.in - ${Snappy_BINARY_DIR}/SnappyConfig.cmake - INSTALL_DESTINATION lib/Snappy/cmake - PATH_VARS INCLUDE_INSTALL_DIR LIBRARY_INSTALL_DIR BINARY_INSTALL_DIR - ) - -WRITE_BASIC_PACKAGE_VERSION_FILE(${Snappy_BINARY_DIR}/SnappyConfigVersion.cmake - COMPATIBILITY SameMajorVersion) -INSTALL(FILES ${Snappy_BINARY_DIR}/SnappyConfig.cmake - ${Snappy_BINARY_DIR}/SnappyConfigVersion.cmake - DESTINATION lib/cmake) - -ENABLE_TESTING() - -IF (HAVE_LIBZ) - LIST(APPEND COMPRESSION_LIBS z) -ENDIF (HAVE_LIBZ) - -IF (HAVE_LIBLZO2) - LIST(APPEND COMPRESSION_LIBS lzo2) -ENDIF (HAVE_LIBLZO2) - -IF (HAVE_LIBLZF) - LIST(APPEND COMPRESSION_LIBS lzf) -ENDIF (HAVE_LIBLZF) - -IF (HAVE_LIBQUICKLZ) - LIST(APPEND COMPRESSION_LIBS quicklz) -ENDIF (HAVE_LIBQUICKLZ) - -ADD_EXECUTABLE(snappy-unittest snappy_unittest.cc snappy-test.cc) -TARGET_COMPILE_DEFINITIONS(snappy-unittest PRIVATE -DHAVE_CONFIG_H) -TARGET_LINK_LIBRARIES(snappy-unittest snappy ${COMPRESSION_LIBS} - ${GFLAGS_LIBRARIES}) -TARGET_INCLUDE_DIRECTORIES(snappy-unittest BEFORE PRIVATE ${Snappy_SOURCE_DIR} - ${GTEST_INCLUDE_DIRS} ${GFLAGS_INCLUDE_DIRS}) - -ADD_TEST(NAME snappy-unittest - WORKING_DIRECTORY ${Snappy_SOURCE_DIR} - COMMAND ${Snappy_BINARY_DIR}/snappy-unittest) +# Copyright 2019 Google Inc. All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +cmake_minimum_required(VERSION 3.10) +project(Snappy VERSION 1.2.2 LANGUAGES C CXX) + +# C++ standard can be overridden when this is used as a sub-project. +if(NOT CMAKE_CXX_STANDARD) + # This project requires C++11. + set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD_REQUIRED ON) + set(CMAKE_CXX_EXTENSIONS OFF) +endif(NOT CMAKE_CXX_STANDARD) + +# https://github.com/izenecloud/cmake/blob/master/SetCompilerWarningAll.cmake +if(MSVC) + # Use the highest warning level for Visual Studio. + set(CMAKE_CXX_WARNING_LEVEL 4) + if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") + string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + else(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") + endif(CMAKE_CXX_FLAGS MATCHES "/W[0-4]") + + # Disable C++ exceptions. + string(REGEX REPLACE "/EH[a-z]+" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHs-c-") + add_definitions(-D_HAS_EXCEPTIONS=0) + + # Disable RTTI. + string(REGEX REPLACE "/GR" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR-") +else(MSVC) + # Use -Wall for clang and gcc. + if(NOT CMAKE_CXX_FLAGS MATCHES "-Wall") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") + endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wall") + + # Use -Wextra for clang and gcc. + if(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra") + endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wextra") + + # Use -Werror for clang only. + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + if(NOT CMAKE_CXX_FLAGS MATCHES "-Werror") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") + endif(NOT CMAKE_CXX_FLAGS MATCHES "-Werror") + endif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + + # Disable sign comparison warnings. Matches upcoming Bazel setup. + if(NOT CMAKE_CXX_FLAGS MATCHES "-Wno-sign-compare") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare") + endif(NOT CMAKE_CXX_FLAGS MATCHES "-Wno-sign-compare") + + # Disable C++ exceptions. + string(REGEX REPLACE "-fexceptions" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions") + + # Disable RTTI. + string(REGEX REPLACE "-frtti" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") +endif(MSVC) + +# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make +# it prominent in the GUI. +option(BUILD_SHARED_LIBS "Build shared libraries(DLLs)." OFF) + +option(SNAPPY_BUILD_TESTS "Build Snappy's own tests." ON) + +option(SNAPPY_BUILD_BENCHMARKS "Build Snappy's benchmarks" ON) + +option(SNAPPY_FUZZING_BUILD "Build Snappy for fuzzing." OFF) + +option(SNAPPY_REQUIRE_AVX "Target processors with AVX support." OFF) + +option(SNAPPY_REQUIRE_AVX2 "Target processors with AVX2 support." OFF) + +option(SNAPPY_INSTALL "Install Snappy's header and library" ON) + +include(TestBigEndian) +test_big_endian(SNAPPY_IS_BIG_ENDIAN) + +include(CheckIncludeFile) +check_include_file("sys/mman.h" HAVE_SYS_MMAN_H) +check_include_file("sys/resource.h" HAVE_SYS_RESOURCE_H) +check_include_file("sys/time.h" HAVE_SYS_TIME_H) +check_include_file("sys/uio.h" HAVE_SYS_UIO_H) +check_include_file("unistd.h" HAVE_UNISTD_H) +check_include_file("windows.h" HAVE_WINDOWS_H) + +include(CheckLibraryExists) +check_library_exists(z zlibVersion "" HAVE_LIBZ) +check_library_exists(lzo2 lzo1x_1_15_compress "" HAVE_LIBLZO2) +check_library_exists(lz4 LZ4_compress_default "" HAVE_LIBLZ4) + +include(CheckCXXCompilerFlag) +CHECK_CXX_COMPILER_FLAG("/arch:AVX" HAVE_VISUAL_STUDIO_ARCH_AVX) +CHECK_CXX_COMPILER_FLAG("/arch:AVX2" HAVE_VISUAL_STUDIO_ARCH_AVX2) +CHECK_CXX_COMPILER_FLAG("-mavx" HAVE_CLANG_MAVX) +CHECK_CXX_COMPILER_FLAG("-mbmi2" HAVE_CLANG_MBMI2) +if(SNAPPY_REQUIRE_AVX2) + if(HAVE_VISUAL_STUDIO_ARCH_AVX2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") + endif(HAVE_VISUAL_STUDIO_ARCH_AVX2) + if(HAVE_CLANG_MAVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") + endif(HAVE_CLANG_MAVX) + if(HAVE_CLANG_MBMI2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2") + endif(HAVE_CLANG_MBMI2) +elseif (SNAPPY_REQUIRE_AVX) + if(HAVE_VISUAL_STUDIO_ARCH_AVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX") + endif(HAVE_VISUAL_STUDIO_ARCH_AVX) + if(HAVE_CLANG_MAVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") + endif(HAVE_CLANG_MAVX) +endif(SNAPPY_REQUIRE_AVX2) + +# Used by googletest. +check_cxx_compiler_flag(-Wno-missing-field-initializers + SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS) +check_cxx_compiler_flag(-Wno-implicit-int-float-conversion + SNAPPY_HAVE_NO_IMPLICIT_INT_FLOAT_CONVERSION) + +include(CheckCXXSourceCompiles) +check_cxx_source_compiles(" +int main() { + return __builtin_expect(0, 1); +}" HAVE_BUILTIN_EXPECT) + +check_cxx_source_compiles(" +int main() { + return __builtin_ctzll(0); +}" HAVE_BUILTIN_CTZ) + +check_cxx_source_compiles(" +int main() { + __builtin_prefetch(0, 0, 3); + return 0; +}" HAVE_BUILTIN_PREFETCH) + +check_cxx_source_compiles(" +__attribute__((always_inline)) int zero() { return 0; } + +int main() { + return zero(); +}" HAVE_ATTRIBUTE_ALWAYS_INLINE) + +check_cxx_source_compiles(" +#include + +int main() { + const __m128i *src = 0; + __m128i dest; + const __m128i shuffle_mask = _mm_load_si128(src); + const __m128i pattern = _mm_shuffle_epi8(_mm_loadl_epi64(src), shuffle_mask); + _mm_storeu_si128(&dest, pattern); + return 0; +}" SNAPPY_HAVE_SSSE3) + +check_cxx_source_compiles(" +#include +int main() { + return _mm_crc32_u32(0, 1); +}" SNAPPY_HAVE_X86_CRC32) + +check_cxx_source_compiles(" +#include +#include +int main() { + return __crc32cw(0, 1); +}" SNAPPY_HAVE_NEON_CRC32) + +check_cxx_source_compiles(" +#include +int main() { + return _bzhi_u32(0, 1); +}" SNAPPY_HAVE_BMI2) + +check_cxx_source_compiles(" +#include +#include +int main() { + uint8_t val = 3, dup[8]; + uint8x16_t v1 = vld1q_dup_u8(&val); + uint8x16_t v2 = vqtbl1q_u8(v1, v1); + vst1q_u8(dup, v1); + vst1q_u8(dup, v2); + return 0; +}" SNAPPY_HAVE_NEON) + +#check RVV 1.0 need __riscv_ prefix +check_cxx_source_compiles(" + #include + #include + #include + int main() { + uint8_t val = 3, dup[8]; + size_t vl = __riscv_vsetvl_e8m1(8); + vuint8m1_t v = __riscv_vmv_v_x_u8m1(val, vl); + return 0; + }" SNAPPY_RVV_1) + + +#check RVV 0.7.1 not __riscv_ prefix +check_cxx_source_compiles(" + #include + #include + #include + int main() { + uint8_t val = 3, dup[8]; + size_t vl = vsetvl_e8m1(8); + vuint8m1_t v = vmv_v_x_u8m1(val, vl); + return 0; + }" SNAPPY_RVV_0_7) + +include(CheckSymbolExists) +check_symbol_exists("mmap" "sys/mman.h" HAVE_FUNC_MMAP) +check_symbol_exists("sysconf" "unistd.h" HAVE_FUNC_SYSCONF) + +configure_file( + "cmake/config.h.in" + "${PROJECT_BINARY_DIR}/config.h" +) + +# We don't want to define HAVE_ macros in public headers. Instead, we use +# CMake's variable substitution with 0/1 variables, which will be seen by the +# preprocessor as constants. +set(HAVE_SYS_UIO_H_01 ${HAVE_SYS_UIO_H}) +if(NOT HAVE_SYS_UIO_H_01) + set(HAVE_SYS_UIO_H_01 0) +endif(NOT HAVE_SYS_UIO_H_01) + +if (SNAPPY_FUZZING_BUILD) + if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + message(WARNING "Fuzzing builds are only supported with Clang") + endif (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + + if(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=address") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") + endif(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=address") + + if(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=fuzzer-no-link") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer-no-link") + endif(NOT CMAKE_CXX_FLAGS MATCHES "-fsanitize=fuzzer-no-link") +endif (SNAPPY_FUZZING_BUILD) + +configure_file( + "snappy-stubs-public.h.in" + "${PROJECT_BINARY_DIR}/snappy-stubs-public.h") + +add_library(snappy "") +target_sources(snappy + PRIVATE + "snappy-internal.h" + "snappy-stubs-internal.h" + "snappy-c.cc" + "snappy-sinksource.cc" + "snappy-stubs-internal.cc" + "snappy.cc" + "${PROJECT_BINARY_DIR}/config.h" + PUBLIC + $ + $ + $ + $ + $ + $ + $ + $ +) +target_include_directories(snappy + PUBLIC + $ + $ + $ +) +set_target_properties(snappy + PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) + +target_compile_definitions(snappy PRIVATE -DHAVE_CONFIG_H) +if(BUILD_SHARED_LIBS) + set_target_properties(snappy PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) +endif(BUILD_SHARED_LIBS) + +if(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS) + add_library(snappy_test_support "") + target_sources(snappy_test_support + PRIVATE + "snappy-test.cc" + "snappy-test.h" + "snappy_test_data.cc" + "snappy_test_data.h" + "${PROJECT_BINARY_DIR}/config.h" + ) + + # Test files include snappy-test.h, HAVE_CONFIG_H must be defined. + target_compile_definitions(snappy_test_support PUBLIC -DHAVE_CONFIG_H) + if(BUILD_SHARED_LIBS) + set_target_properties(snappy_test_support PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) + endif(BUILD_SHARED_LIBS) + + target_link_libraries(snappy_test_support snappy) + + if(HAVE_LIBZ) + target_link_libraries(snappy_test_support z) + endif(HAVE_LIBZ) + if(HAVE_LIBLZO2) + target_link_libraries(snappy_test_support lzo2) + endif(HAVE_LIBLZO2) + if(HAVE_LIBLZ4) + target_link_libraries(snappy_test_support lz4) + endif(HAVE_LIBLZ4) + + target_include_directories(snappy_test_support + BEFORE PUBLIC + "${PROJECT_SOURCE_DIR}" + ) +endif(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS) + +if(SNAPPY_BUILD_TESTS) + enable_testing() + + # Prevent overriding the parent project's compiler/linker settings on Windows. + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + set(install_gtest OFF) + set(install_gmock OFF) + set(build_gmock ON) + + # This project is tested using GoogleTest. + add_subdirectory("third_party/googletest") + + # GoogleTest triggers a missing field initializers warning. + if(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS) + set_property(TARGET gtest + APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers) + set_property(TARGET gmock + APPEND PROPERTY COMPILE_OPTIONS -Wno-missing-field-initializers) + endif(SNAPPY_HAVE_NO_MISSING_FIELD_INITIALIZERS) + + if(SNAPPY_HAVE_NO_IMPLICIT_INT_FLOAT_CONVERSION) + set_property(TARGET gtest + APPEND PROPERTY COMPILE_OPTIONS -Wno-implicit-int-float-conversion) + endif(SNAPPY_HAVE_NO_IMPLICIT_INT_FLOAT_CONVERSION) + + add_executable(snappy_unittest "") + target_sources(snappy_unittest + PRIVATE + "snappy_unittest.cc" + ) + target_link_libraries(snappy_unittest snappy_test_support gmock_main gtest) + + add_test( + NAME snappy_unittest + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" + COMMAND "${PROJECT_BINARY_DIR}/snappy_unittest") + + add_executable(snappy_test_tool "") + target_sources(snappy_test_tool + PRIVATE + "snappy_test_tool.cc" + ) + target_link_libraries(snappy_test_tool snappy_test_support) +endif(SNAPPY_BUILD_TESTS) + +if(SNAPPY_BUILD_BENCHMARKS) + add_executable(snappy_benchmark "") + target_sources(snappy_benchmark + PRIVATE + "snappy_benchmark.cc" + ) + target_link_libraries(snappy_benchmark snappy_test_support benchmark_main) + + # This project uses Google benchmark for benchmarking. + set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE) + set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "" FORCE) + add_subdirectory("third_party/benchmark") +endif(SNAPPY_BUILD_BENCHMARKS) + +if(SNAPPY_FUZZING_BUILD) + add_executable(snappy_compress_fuzzer "") + target_sources(snappy_compress_fuzzer + PRIVATE "snappy_compress_fuzzer.cc" + ) + target_link_libraries(snappy_compress_fuzzer snappy) + set_target_properties(snappy_compress_fuzzer + PROPERTIES LINK_FLAGS "-fsanitize=fuzzer" + ) + + add_executable(snappy_uncompress_fuzzer "") + target_sources(snappy_uncompress_fuzzer + PRIVATE "snappy_uncompress_fuzzer.cc" + ) + target_link_libraries(snappy_uncompress_fuzzer snappy) + set_target_properties(snappy_uncompress_fuzzer + PROPERTIES LINK_FLAGS "-fsanitize=fuzzer" + ) +endif(SNAPPY_FUZZING_BUILD) + +# Must be included before CMAKE_INSTALL_INCLUDEDIR is used. +include(GNUInstallDirs) + +if(SNAPPY_INSTALL) + install(TARGETS snappy + EXPORT SnappyTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + install( + FILES + "snappy-c.h" + "snappy-sinksource.h" + "snappy.h" + "${PROJECT_BINARY_DIR}/snappy-stubs-public.h" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" + ) + + include(CMakePackageConfigHelpers) + configure_package_config_file( + "cmake/${PROJECT_NAME}Config.cmake.in" + "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake" + INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + ) + write_basic_package_version_file( + "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake" + COMPATIBILITY SameMajorVersion + ) + install( + EXPORT SnappyTargets + NAMESPACE Snappy:: + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + ) + install( + FILES + "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake" + "${PROJECT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + ) +endif(SNAPPY_INSTALL) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..66a60d5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,31 @@ +# How to Contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution; +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code Reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. + +See [the README](README.md#contributing-to-the-snappy-project) for areas +where we are likely to accept external contributions. + +## Community Guidelines + +This project follows [Google's Open Source Community +Guidelines](https://opensource.google/conduct/). diff --git a/ChangeLog b/ChangeLog deleted file mode 100644 index 1478db5..0000000 --- a/ChangeLog +++ /dev/null @@ -1,2468 +0,0 @@ -commit eb66d8176b3d1f560ee012e1b488cb1540c45f88 -Author: Steinar H. Gunderson -Date: Mon Jun 22 16:10:47 2015 +0200 - - Initialized members of SnappyArrayWriter and SnappyDecompressionValidator. - These members were almost surely initialized before use by other member - functions, but Coverity was warning about this. Eliminating these warnings - minimizes clutter in that report and the likelihood of overlooking a real bug. - - A=cmumford - R=jeff - -commit b2312c4c25883ab03b5110f1b006dce95f419a4f -Author: Steinar H. Gunderson -Date: Mon Jun 22 16:03:28 2015 +0200 - - Add support for Uncompress(source, sink). Various changes to allow - Uncompress(source, sink) to get the same performance as the different - variants of Uncompress to Cord/DataBuffer/String/FlatBuffer. - - Changes to efficiently support Uncompress(source, sink) - -------- - - a) For strings - we add support to StringByteSink to do GetAppendBuffer so we - can write to it without copying. - b) For flat array buffers, we do GetAppendBuffer and see if we can get a full buffer. - - With the above changes we get performance with ByteSource/ByteSink - that is very close to directly using flat arrays and strings. - - We add various benchmark cases to demonstrate that. - - Orthogonal change - ------------------ - - Add support for TryFastAppend() for SnappyScatteredWriter. - - Benchmark results are below - - CPU: Intel Core2 dL1:32KB dL2:4096KB - Benchmark Time(ns) CPU(ns) Iterations - ----------------------------------------------------- - BM_UFlat/0 109065 108996 6410 896.0MB/s html - BM_UFlat/1 1012175 1012343 691 661.4MB/s urls - BM_UFlat/2 26775 26771 26149 4.4GB/s jpg - BM_UFlat/3 48947 48940 14363 1.8GB/s pdf - BM_UFlat/4 441029 440835 1589 886.1MB/s html4 - BM_UFlat/5 39861 39880 17823 588.3MB/s cp - BM_UFlat/6 18315 18300 38126 581.1MB/s c - BM_UFlat/7 5254 5254 100000 675.4MB/s lsp - BM_UFlat/8 1568060 1567376 447 626.6MB/s xls - BM_UFlat/9 337512 337734 2073 429.5MB/s txt1 - BM_UFlat/10 287269 287054 2434 415.9MB/s txt2 - BM_UFlat/11 890098 890219 787 457.2MB/s txt3 - BM_UFlat/12 1186593 1186863 590 387.2MB/s txt4 - BM_UFlat/13 573927 573318 1000 853.7MB/s bin - BM_UFlat/14 64250 64294 10000 567.2MB/s sum - BM_UFlat/15 7301 7300 96153 552.2MB/s man - BM_UFlat/16 109617 109636 6375 1031.5MB/s pb - BM_UFlat/17 364438 364497 1921 482.3MB/s gaviota - BM_UFlatSink/0 108518 108465 6450 900.4MB/s html - BM_UFlatSink/1 991952 991997 705 675.0MB/s urls - BM_UFlatSink/2 26815 26798 26065 4.4GB/s jpg - BM_UFlatSink/3 49127 49122 14255 1.8GB/s pdf - BM_UFlatSink/4 436674 436731 1604 894.4MB/s html4 - BM_UFlatSink/5 39738 39733 17345 590.5MB/s cp - BM_UFlatSink/6 18413 18416 37962 577.4MB/s c - BM_UFlatSink/7 5677 5676 100000 625.2MB/s lsp - BM_UFlatSink/8 1552175 1551026 451 633.2MB/s xls - BM_UFlatSink/9 338526 338489 2065 428.5MB/s txt1 - BM_UFlatSink/10 289387 289307 2420 412.6MB/s txt2 - BM_UFlatSink/11 893803 893706 783 455.4MB/s txt3 - BM_UFlatSink/12 1195919 1195459 586 384.4MB/s txt4 - BM_UFlatSink/13 559637 559779 1000 874.3MB/s bin - BM_UFlatSink/14 65073 65094 10000 560.2MB/s sum - BM_UFlatSink/15 7618 7614 92823 529.5MB/s man - BM_UFlatSink/16 110085 110121 6352 1027.0MB/s pb - BM_UFlatSink/17 369196 368915 1896 476.5MB/s gaviota - BM_UValidate/0 46954 46957 14899 2.0GB/s html - BM_UValidate/1 500621 500868 1000 1.3GB/s urls - BM_UValidate/2 283 283 2481447 417.2GB/s jpg - BM_UValidate/3 16230 16228 43137 5.4GB/s pdf - BM_UValidate/4 189129 189193 3701 2.0GB/s html4 - - A=uday - R=sanjay - -commit b2ad96006741d40935db2f73194a3e489b467338 -Author: Steinar H. Gunderson -Date: Mon Jun 22 15:48:29 2015 +0200 - - Changes to eliminate compiler warnings on MSVC - - This code was not compiling under Visual Studio 2013 with warnings being treated - as errors. Specifically: - - 1. Changed int -> size_t to eliminate signed/unsigned mismatch warning. - 2. Added some missing return values to functions. - 3. Inserting character instead of integer literals into strings to avoid type - conversions. - - A=cmumford - R=jeff - -commit e7a897e187e90b33f87bd9e64872cf561de9ebca -Author: Steinar H. Gunderson -Date: Mon Jun 22 15:45:11 2015 +0200 - - Fixed unit tests to compile under MSVC. - - 1. Including config.h in test. - 2. Including windows.h before zippy-test.h. - 3. Removed definition of WIN32_LEAN_AND_MEAN. This caused problems in - build environments that define WIN32_LEAN_AND_MEAN as our - definition didn't check for prior existence. This constant is old - and no longer needed anyhow. - 4. Disable MSVC warning 4722 since ~LogMessageCrash() never returns. - - A=cmumford - R=jeff - -commit 86eb8b152bdb065ad11bf331a9f7d65b72616acf -Author: Steinar H. Gunderson -Date: Mon Jun 22 15:41:30 2015 +0200 - - Change a few branch annotations that profiling found to be wrong. - Overall performance is neutral or slightly positive. - - Westmere (64-bit, opt): - - Benchmark Base (ns) New (ns) Improvement - -------------------------------------------------------------------------------------- - BM_UFlat/0 73798 71464 1.3GB/s html +3.3% - BM_UFlat/1 715223 704318 953.5MB/s urls +1.5% - BM_UFlat/2 8137 8871 13.0GB/s jpg -8.3% - BM_UFlat/3 200 204 935.5MB/s jpg_200 -2.0% - BM_UFlat/4 21627 21281 4.5GB/s pdf +1.6% - BM_UFlat/5 302806 290350 1.3GB/s html4 +4.3% - BM_UFlat/6 218920 219017 664.1MB/s txt1 -0.0% - BM_UFlat/7 190437 191212 626.1MB/s txt2 -0.4% - BM_UFlat/8 584192 580484 703.4MB/s txt3 +0.6% - BM_UFlat/9 776537 779055 591.6MB/s txt4 -0.3% - BM_UFlat/10 76056 72606 1.5GB/s pb +4.8% - BM_UFlat/11 235962 239043 737.4MB/s gaviota -1.3% - BM_UFlat/12 28049 28000 840.1MB/s cp +0.2% - BM_UFlat/13 12225 12021 886.9MB/s c +1.7% - BM_UFlat/14 3362 3544 1004.0MB/s lsp -5.1% - BM_UFlat/15 937015 939206 1048.9MB/s xls -0.2% - BM_UFlat/16 236 233 823.1MB/s xls_200 +1.3% - BM_UFlat/17 373170 361947 1.3GB/s bin +3.1% - BM_UFlat/18 264 264 725.5MB/s bin_200 +0.0% - BM_UFlat/19 42834 43577 839.2MB/s sum -1.7% - BM_UFlat/20 4770 4736 853.6MB/s man +0.7% - BM_UValidate/0 39671 39944 2.4GB/s html -0.7% - BM_UValidate/1 443391 443391 1.5GB/s urls +0.0% - BM_UValidate/2 163 163 703.3GB/s jpg +0.0% - BM_UValidate/3 113 112 1.7GB/s jpg_200 +0.9% - BM_UValidate/4 7555 7608 12.6GB/s pdf -0.7% - BM_ZFlat/0 157616 157568 621.5MB/s html (22.31 %) +0.0% - BM_ZFlat/1 1997290 2014486 333.4MB/s urls (47.77 %) -0.9% - BM_ZFlat/2 23035 22237 5.2GB/s jpg (99.95 %) +3.6% - BM_ZFlat/3 539 540 354.5MB/s jpg_200 (73.00 %) -0.2% - BM_ZFlat/4 80709 81369 1.2GB/s pdf (81.85 %) -0.8% - BM_ZFlat/5 639059 639220 613.0MB/s html4 (22.51 %) -0.0% - BM_ZFlat/6 577203 583370 249.3MB/s txt1 (57.87 %) -1.1% - BM_ZFlat/7 510887 516094 232.0MB/s txt2 (61.93 %) -1.0% - BM_ZFlat/8 1535843 1556973 262.2MB/s txt3 (54.92 %) -1.4% - BM_ZFlat/9 2070068 2102380 219.3MB/s txt4 (66.22 %) -1.5% - BM_ZFlat/10 152396 152148 745.5MB/s pb (19.64 %) +0.2% - BM_ZFlat/11 447367 445859 395.4MB/s gaviota (37.72 %) +0.3% - BM_ZFlat/12 76375 76797 306.3MB/s cp (48.12 %) -0.5% - BM_ZFlat/13 31518 31987 333.3MB/s c (42.40 %) -1.5% - BM_ZFlat/14 10598 10827 328.6MB/s lsp (48.37 %) -2.1% - BM_ZFlat/15 1782243 1802728 546.5MB/s xls (41.23 %) -1.1% - BM_ZFlat/16 526 539 355.0MB/s xls_200 (78.00 %) -2.4% - BM_ZFlat/17 598141 597311 822.1MB/s bin (18.11 %) +0.1% - BM_ZFlat/18 121 120 1.6GB/s bin_200 (7.50 %) +0.8% - BM_ZFlat/19 109981 112173 326.0MB/s sum (48.96 %) -2.0% - BM_ZFlat/20 14355 14575 277.4MB/s man (59.36 %) -1.5% - Sum of all benchmarks 33882722 33879325 +0.0% - - Sandy Bridge (64-bit, opt): - - Benchmark Base (ns) New (ns) Improvement - -------------------------------------------------------------------------------------- - BM_UFlat/0 43764 41600 2.3GB/s html +5.2% - BM_UFlat/1 517990 507058 1.3GB/s urls +2.2% - BM_UFlat/2 6625 5529 20.8GB/s jpg +19.8% - BM_UFlat/3 154 155 1.2GB/s jpg_200 -0.6% - BM_UFlat/4 12795 11747 8.1GB/s pdf +8.9% - BM_UFlat/5 200335 193413 2.0GB/s html4 +3.6% - BM_UFlat/6 156574 156426 929.2MB/s txt1 +0.1% - BM_UFlat/7 137574 137464 870.4MB/s txt2 +0.1% - BM_UFlat/8 422551 421603 967.4MB/s txt3 +0.2% - BM_UFlat/9 577749 578985 795.6MB/s txt4 -0.2% - BM_UFlat/10 42329 39362 2.8GB/s pb +7.5% - BM_UFlat/11 170615 169751 1037.9MB/s gaviota +0.5% - BM_UFlat/12 12800 12719 1.8GB/s cp +0.6% - BM_UFlat/13 6585 6579 1.6GB/s c +0.1% - BM_UFlat/14 2066 2044 1.7GB/s lsp +1.1% - BM_UFlat/15 750861 746911 1.3GB/s xls +0.5% - BM_UFlat/16 188 192 996.0MB/s xls_200 -2.1% - BM_UFlat/17 271622 264333 1.8GB/s bin +2.8% - BM_UFlat/18 208 207 923.6MB/s bin_200 +0.5% - BM_UFlat/19 24667 24845 1.4GB/s sum -0.7% - BM_UFlat/20 2663 2662 1.5GB/s man +0.0% - BM_ZFlat/0 115173 115624 846.5MB/s html (22.31 %) -0.4% - BM_ZFlat/1 1530331 1537769 436.5MB/s urls (47.77 %) -0.5% - BM_ZFlat/2 17503 17013 6.8GB/s jpg (99.95 %) +2.9% - BM_ZFlat/3 385 385 496.3MB/s jpg_200 (73.00 %) +0.0% - BM_ZFlat/4 61753 61540 1.6GB/s pdf (81.85 %) +0.3% - BM_ZFlat/5 484806 483356 810.1MB/s html4 (22.51 %) +0.3% - BM_ZFlat/6 464143 467609 310.9MB/s txt1 (57.87 %) -0.7% - BM_ZFlat/7 410315 413319 289.5MB/s txt2 (61.93 %) -0.7% - BM_ZFlat/8 1244082 1249381 326.5MB/s txt3 (54.92 %) -0.4% - BM_ZFlat/9 1696914 1709685 269.4MB/s txt4 (66.22 %) -0.7% - BM_ZFlat/10 104148 103372 1096.7MB/s pb (19.64 %) +0.8% - BM_ZFlat/11 363522 359722 489.8MB/s gaviota (37.72 %) +1.1% - BM_ZFlat/12 47021 50095 469.3MB/s cp (48.12 %) -6.1% - BM_ZFlat/13 16888 16985 627.4MB/s c (42.40 %) -0.6% - BM_ZFlat/14 5496 5469 650.3MB/s lsp (48.37 %) +0.5% - BM_ZFlat/15 1460713 1448760 679.5MB/s xls (41.23 %) +0.8% - BM_ZFlat/16 387 393 486.8MB/s xls_200 (78.00 %) -1.5% - BM_ZFlat/17 457654 451462 1086.6MB/s bin (18.11 %) +1.4% - BM_ZFlat/18 97 87 2.1GB/s bin_200 (7.50 %) +11.5% - BM_ZFlat/19 77904 80924 451.7MB/s sum (48.96 %) -3.7% - BM_ZFlat/20 7648 7663 527.1MB/s man (59.36 %) -0.2% - Sum of all benchmarks 25493635 25482069 +0.0% - - A=dehao - R=sesse - -commit 11ccdfb868387e56d845766d89ddab9d489c4128 -Author: Steinar H. Gunderson -Date: Mon Jun 22 16:07:58 2015 +0200 - - Sync with various Google-internal changes. - - Should not mean much for the open-source version. - -commit 22acaf438ed93ab21a2ff1919d173206798b996e -Author: Steinar H. Gunderson -Date: Mon Jun 22 15:39:08 2015 +0200 - - Change some internal path names. - - This is mostly to sync up with some changes from Google's internal - repositories; it does not affect the open-source distribution in itself. - -commit 1ff9be9b8fafc8528ca9e055646f5932aa5db9c4 -Author: snappy.mirrorbot@gmail.com -Date: Fri Feb 28 11:18:07 2014 +0000 - - Release Snappy 1.1.2. - - R=jeff - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@84 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 19690d78e83f8963f497585031efa3d9ca66b807 -Author: snappy.mirrorbot@gmail.com -Date: Wed Feb 19 10:31:49 2014 +0000 - - Fix public issue 82: Stop distributing benchmark data files that have - unclear or unsuitable licensing. - - In general, we replace the files we can with liberally licensed data, - and remove all the others (in particular all the parts of the Canterbury - corpus that are not clearly in the public domain). The replacements - do not always have the exact same characteristics as the original ones, - but they are more than good enough to be useful for benchmarking. - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@83 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit f82bff66afe0de4c9ae22f8c4ef84e3c2233e799 -Author: snappy.mirrorbot@gmail.com -Date: Fri Oct 25 13:31:27 2013 +0000 - - Add support for padding in the Snappy framed format. - - This is specifically motivated by DICOM's demands that embedded data - must be of an even number of bytes, but could in principle be used for - any sort of padding/alignment needed. - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@82 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit eeead8dc38ea359f027fb6e89f345448e8e9d723 -Author: snappy.mirrorbot@gmail.com -Date: Tue Oct 15 15:21:31 2013 +0000 - - Release Snappy 1.1.1. - - R=jeff - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@81 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 6bc39e24c76adbbff26ae629fafbf7dfc795f554 -Author: snappy.mirrorbot@gmail.com -Date: Tue Aug 13 12:55:00 2013 +0000 - - Add autoconf tests for size_t and ssize_t. Sort-of resolves public issue 79; - it would solve the problem if MSVC typically used autoconf. However, it gives - a natural place (config.h) to put the typedef even for MSVC. - - R=jsbell - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@80 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 7c3c01df77e191ad1f8377448961fe88db2802e9 -Author: snappy.mirrorbot@gmail.com -Date: Mon Jul 29 11:06:44 2013 +0000 - - When we compare the number of bytes produced with the offset for a - backreference, make the signedness of the bytes produced clear, - by sticking it into a size_t. This avoids a signed/unsigned compare - warning from MSVC (public issue 71), and also is slightly clearer. - - Since the line is now so long the explanatory comment about the -1u - trick has to go somewhere else anyway, I used the opportunity to - explain it in slightly more detail. - - This is a purely stylistic change; the emitted assembler from GCC - is identical. - - R=jeff - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@79 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 2f0aaf8631d8fb2475ca1a6687c181efb14ed286 -Author: snappy.mirrorbot@gmail.com -Date: Sun Jun 30 19:24:03 2013 +0000 - - In the fast path for decompressing literals, instead of checking - whether there's 16 bytes free and then checking right afterwards - (when having subtracted the literal size) that there are now - 5 bytes free, just check once for 21 bytes. This skips a compare - and a branch; although it is easily predictable, it is still - a few cycles on a fast path that we would like to get rid of. - - Benchmarking this yields very confusing results. On open-source - GCC 4.8.1 on Haswell, we get exactly the expected results; the - benchmarks where we hit the fast path for literals (in particular - the two HTML benchmarks and the protobuf benchmark) give very nice - speedups, and the others are not really affected. - - However, benchmarks with Google's GCC branch on other hardware - is much less clear. It seems that we have a weak loss in some cases - (and the win for the “typical” win cases are not nearly as clear), - but that it depends on microarchitecture and plain luck in how we run - the benchmark. Looking at the generated assembler, it seems that - the removal of the if causes other large-scale changes in how the - function is laid out, which makes it likely that this is just bad luck. - - Thus, we should keep this change, even though its exact current impact is - unclear; it's a sensible change per se, and dropping it on the basis of - microoptimization for a given compiler (or even branch of a compiler) - would seem like a bad strategy in the long run. - - Microbenchmark results (all in 64-bit, opt mode): - - Nehalem, Google GCC: - - Benchmark Base (ns) New (ns) Improvement - ------------------------------------------------------------------------------ - BM_UFlat/0 76747 75591 1.3GB/s html +1.5% - BM_UFlat/1 765756 757040 886.3MB/s urls +1.2% - BM_UFlat/2 10867 10893 10.9GB/s jpg -0.2% - BM_UFlat/3 124 131 1.4GB/s jpg_200 -5.3% - BM_UFlat/4 31663 31596 2.8GB/s pdf +0.2% - BM_UFlat/5 314162 308176 1.2GB/s html4 +1.9% - BM_UFlat/6 29668 29746 790.6MB/s cp -0.3% - BM_UFlat/7 12958 13386 796.4MB/s c -3.2% - BM_UFlat/8 3596 3682 966.0MB/s lsp -2.3% - BM_UFlat/9 1019193 1033493 953.3MB/s xls -1.4% - BM_UFlat/10 239 247 775.3MB/s xls_200 -3.2% - BM_UFlat/11 236411 240271 606.9MB/s txt1 -1.6% - BM_UFlat/12 206639 209768 571.2MB/s txt2 -1.5% - BM_UFlat/13 627803 635722 641.4MB/s txt3 -1.2% - BM_UFlat/14 845932 857816 538.2MB/s txt4 -1.4% - BM_UFlat/15 402107 391670 1.2GB/s bin +2.7% - BM_UFlat/16 283 279 683.6MB/s bin_200 +1.4% - BM_UFlat/17 46070 46815 781.5MB/s sum -1.6% - BM_UFlat/18 5053 5163 782.0MB/s man -2.1% - BM_UFlat/19 79721 76581 1.4GB/s pb +4.1% - BM_UFlat/20 251158 252330 697.5MB/s gaviota -0.5% - Sum of all benchmarks 4966150 4980396 -0.3% - - - Sandy Bridge, Google GCC: - - Benchmark Base (ns) New (ns) Improvement - ------------------------------------------------------------------------------ - BM_UFlat/0 42850 42182 2.3GB/s html +1.6% - BM_UFlat/1 525660 515816 1.3GB/s urls +1.9% - BM_UFlat/2 7173 7283 16.3GB/s jpg -1.5% - BM_UFlat/3 92 91 2.1GB/s jpg_200 +1.1% - BM_UFlat/4 15147 14872 5.9GB/s pdf +1.8% - BM_UFlat/5 199936 192116 2.0GB/s html4 +4.1% - BM_UFlat/6 12796 12443 1.8GB/s cp +2.8% - BM_UFlat/7 6588 6400 1.6GB/s c +2.9% - BM_UFlat/8 2010 1951 1.8GB/s lsp +3.0% - BM_UFlat/9 761124 763049 1.3GB/s xls -0.3% - BM_UFlat/10 186 189 1016.1MB/s xls_200 -1.6% - BM_UFlat/11 159354 158460 918.6MB/s txt1 +0.6% - BM_UFlat/12 139732 139950 856.1MB/s txt2 -0.2% - BM_UFlat/13 429917 425027 961.7MB/s txt3 +1.2% - BM_UFlat/14 585255 587324 785.8MB/s txt4 -0.4% - BM_UFlat/15 276186 266173 1.8GB/s bin +3.8% - BM_UFlat/16 205 207 925.5MB/s bin_200 -1.0% - BM_UFlat/17 24925 24935 1.4GB/s sum -0.0% - BM_UFlat/18 2632 2576 1.5GB/s man +2.2% - BM_UFlat/19 40546 39108 2.8GB/s pb +3.7% - BM_UFlat/20 175803 168209 1048.9MB/s gaviota +4.5% - Sum of all benchmarks 3408117 3368361 +1.2% - - - Haswell, upstream GCC 4.8.1: - - Benchmark Base (ns) New (ns) Improvement - ------------------------------------------------------------------------------ - BM_UFlat/0 46308 40641 2.3GB/s html +13.9% - BM_UFlat/1 513385 514706 1.3GB/s urls -0.3% - BM_UFlat/2 6197 6151 19.2GB/s jpg +0.7% - BM_UFlat/3 61 61 3.0GB/s jpg_200 +0.0% - BM_UFlat/4 13551 13429 6.5GB/s pdf +0.9% - BM_UFlat/5 198317 190243 2.0GB/s html4 +4.2% - BM_UFlat/6 14768 12560 1.8GB/s cp +17.6% - BM_UFlat/7 6453 6447 1.6GB/s c +0.1% - BM_UFlat/8 1991 1980 1.8GB/s lsp +0.6% - BM_UFlat/9 766947 770424 1.2GB/s xls -0.5% - BM_UFlat/10 170 169 1.1GB/s xls_200 +0.6% - BM_UFlat/11 164350 163554 888.7MB/s txt1 +0.5% - BM_UFlat/12 145444 143830 832.1MB/s txt2 +1.1% - BM_UFlat/13 437849 438413 929.2MB/s txt3 -0.1% - BM_UFlat/14 603587 605309 759.8MB/s txt4 -0.3% - BM_UFlat/15 249799 248067 1.9GB/s bin +0.7% - BM_UFlat/16 191 188 1011.4MB/s bin_200 +1.6% - BM_UFlat/17 26064 24778 1.4GB/s sum +5.2% - BM_UFlat/18 2620 2601 1.5GB/s man +0.7% - BM_UFlat/19 44551 37373 3.0GB/s pb +19.2% - BM_UFlat/20 165408 164584 1.0GB/s gaviota +0.5% - Sum of all benchmarks 3408011 3385508 +0.7% - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@78 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 062bf544a61107db730b6d08cb0b159c4dd9b24c -Author: snappy.mirrorbot@gmail.com -Date: Fri Jun 14 21:42:26 2013 +0000 - - Make the two IncrementalCopy* functions take in an ssize_t instead of a len, - in order to avoid having to do 32-to-64-bit signed conversions on a hot path - during decompression. (Also fixes some MSVC warnings, mentioned in public - issue 75, but more of those remain.) They cannot be size_t because we expect - them to go negative and test for that. - - This saves a few movzwl instructions, yielding ~2% speedup in decompression. - - - Sandy Bridge: - - Benchmark Base (ns) New (ns) Improvement - ------------------------------------------------------------------------------------------------- - BM_UFlat/0 48009 41283 2.3GB/s html +16.3% - BM_UFlat/1 531274 513419 1.3GB/s urls +3.5% - BM_UFlat/2 7378 7062 16.8GB/s jpg +4.5% - BM_UFlat/3 92 92 2.0GB/s jpg_200 +0.0% - BM_UFlat/4 15057 14974 5.9GB/s pdf +0.6% - BM_UFlat/5 204323 193140 2.0GB/s html4 +5.8% - BM_UFlat/6 13282 12611 1.8GB/s cp +5.3% - BM_UFlat/7 6511 6504 1.6GB/s c +0.1% - BM_UFlat/8 2014 2030 1.7GB/s lsp -0.8% - BM_UFlat/9 775909 768336 1.3GB/s xls +1.0% - BM_UFlat/10 182 184 1043.2MB/s xls_200 -1.1% - BM_UFlat/11 167352 161630 901.2MB/s txt1 +3.5% - BM_UFlat/12 147393 142246 842.8MB/s txt2 +3.6% - BM_UFlat/13 449960 432853 944.4MB/s txt3 +4.0% - BM_UFlat/14 620497 594845 775.9MB/s txt4 +4.3% - BM_UFlat/15 265610 267356 1.8GB/s bin -0.7% - BM_UFlat/16 206 205 932.7MB/s bin_200 +0.5% - BM_UFlat/17 25561 24730 1.4GB/s sum +3.4% - BM_UFlat/18 2620 2644 1.5GB/s man -0.9% - BM_UFlat/19 45766 38589 2.9GB/s pb +18.6% - BM_UFlat/20 171107 169832 1039.5MB/s gaviota +0.8% - Sum of all benchmarks 3500103 3394565 +3.1% - - - Westmere: - - Benchmark Base (ns) New (ns) Improvement - ------------------------------------------------------------------------------------------------- - BM_UFlat/0 72624 71526 1.3GB/s html +1.5% - BM_UFlat/1 735821 722917 930.8MB/s urls +1.8% - BM_UFlat/2 10450 10172 11.7GB/s jpg +2.7% - BM_UFlat/3 117 117 1.6GB/s jpg_200 +0.0% - BM_UFlat/4 29817 29648 3.0GB/s pdf +0.6% - BM_UFlat/5 297126 293073 1.3GB/s html4 +1.4% - BM_UFlat/6 28252 27994 842.0MB/s cp +0.9% - BM_UFlat/7 12672 12391 862.1MB/s c +2.3% - BM_UFlat/8 3507 3425 1040.9MB/s lsp +2.4% - BM_UFlat/9 1004268 969395 1018.0MB/s xls +3.6% - BM_UFlat/10 233 227 844.8MB/s xls_200 +2.6% - BM_UFlat/11 230054 224981 647.8MB/s txt1 +2.3% - BM_UFlat/12 201229 196447 610.5MB/s txt2 +2.4% - BM_UFlat/13 609547 596761 685.3MB/s txt3 +2.1% - BM_UFlat/14 824362 804821 573.8MB/s txt4 +2.4% - BM_UFlat/15 371095 374899 1.3GB/s bin -1.0% - BM_UFlat/16 267 267 717.8MB/s bin_200 +0.0% - BM_UFlat/17 44623 43828 835.9MB/s sum +1.8% - BM_UFlat/18 5077 4815 841.0MB/s man +5.4% - BM_UFlat/19 74964 73210 1.5GB/s pb +2.4% - BM_UFlat/20 237987 236745 746.0MB/s gaviota +0.5% - Sum of all benchmarks 4794092 4697659 +2.1% - - - Istanbul: - - Benchmark Base (ns) New (ns) Improvement - ------------------------------------------------------------------------------------------------- - BM_UFlat/0 98614 96376 1020.4MB/s html +2.3% - BM_UFlat/1 963740 953241 707.2MB/s urls +1.1% - BM_UFlat/2 25042 24769 4.8GB/s jpg +1.1% - BM_UFlat/3 180 180 1065.6MB/s jpg_200 +0.0% - BM_UFlat/4 45942 45403 1.9GB/s pdf +1.2% - BM_UFlat/5 400135 390226 1008.2MB/s html4 +2.5% - BM_UFlat/6 37768 37392 631.9MB/s cp +1.0% - BM_UFlat/7 18585 18200 588.2MB/s c +2.1% - BM_UFlat/8 5751 5690 627.7MB/s lsp +1.1% - BM_UFlat/9 1543154 1542209 641.4MB/s xls +0.1% - BM_UFlat/10 381 388 494.6MB/s xls_200 -1.8% - BM_UFlat/11 339715 331973 440.1MB/s txt1 +2.3% - BM_UFlat/12 294807 289418 415.4MB/s txt2 +1.9% - BM_UFlat/13 906160 884094 463.3MB/s txt3 +2.5% - BM_UFlat/14 1224221 1198435 386.1MB/s txt4 +2.2% - BM_UFlat/15 516277 502923 979.5MB/s bin +2.7% - BM_UFlat/16 405 402 477.2MB/s bin_200 +0.7% - BM_UFlat/17 61640 60621 605.6MB/s sum +1.7% - BM_UFlat/18 7326 7383 549.5MB/s man -0.8% - BM_UFlat/19 94720 92653 1.2GB/s pb +2.2% - BM_UFlat/20 360435 346687 510.6MB/s gaviota +4.0% - Sum of all benchmarks 6944998 6828663 +1.7% - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@77 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 328aafa1980824a9afdcd50edc30d9d5157e417f -Author: snappy.mirrorbot@gmail.com -Date: Thu Jun 13 16:19:52 2013 +0000 - - Add support for uncompressing to iovecs (scatter I/O). - Windows does not have struct iovec defined anywhere, - so we define our own version that's equal to what UNIX - typically has. - - The bulk of this patch was contributed by Mohit Aron. - - R=jeff - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@76 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit cd92eb0852e2339187b693eef3595a07d2276c1d -Author: snappy.mirrorbot@gmail.com -Date: Wed Jun 12 19:51:15 2013 +0000 - - Some code reorganization needed for an internal change. - - R=fikes - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@75 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit a3e928d62bbd61b523b988c07b560253950cf73b -Author: snappy.mirrorbot@gmail.com -Date: Tue Apr 9 15:33:30 2013 +0000 - - Supports truncated test data in zippy benchmark. - - R=sesse - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@74 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit bde324c0169763688f35ee44630a26ad1f49eec3 -Author: snappy.mirrorbot@gmail.com -Date: Tue Feb 5 14:36:15 2013 +0000 - - Release Snappy 1.1.0. - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@73 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 8168446c7eaaa0594e1f4ca923376dcf3a2846fa -Author: snappy.mirrorbot@gmail.com -Date: Tue Feb 5 14:30:05 2013 +0000 - - Make ./snappy_unittest pass without "srcdir" being defined. - - Previously, snappy_unittests would read from an absolute path /testdata/..; - convert it to use a relative path instead. - - Patch from Marc-Antonie Ruel. - - R=maruel - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@72 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 27a0cc394950ebdad2e8d67322f0862835b10bd9 -Author: snappy.mirrorbot@gmail.com -Date: Fri Jan 18 12:16:36 2013 +0000 - - Increase the Zippy block size from 32 kB to 64 kB, winning ~3% density - while being effectively performance neutral. - - The longer story about density is that we win 3-6% density on the benchmarks - where this has any effect at all; many of the benchmarks (cp, c, lsp, man) - are smaller than 32 kB and thus will have no effect. Binary data also seems - to win little or nothing; of course, the already-compressed data wins nothing. - The protobuf benchmark wins as much as ~18% depending on architecture, - but I wouldn't be too sure that this is representative of protobuf data in - general. - - As of performance, we lose a tiny amount since we get more tags (e.g., a long - literal might be broken up into literal-copy-literal), but we win it back with - less clearing of the hash table, and more opportunities to skip incompressible - data (e.g. in the jpg benchmark). Decompression seems to get ever so slightly - slower, again due to more tags. The total net change is about as close to zero - as we can get, so the end effect seems to be simply more density and no - real performance change. - - The comment about not changing kBlockSize, scary as it is, is not really - relevant, since we're never going to have a block-level decompressor without - explicitly marked blocks. Replace it with something more appropriate. - - This affects the framing format, but it's okay to change it since it basically - has no users yet. - - - Density (note that cp, c, lsp and man are all smaller than 32 kB): - - Benchmark Description Base (%) New (%) Improvement - -------------------------------------------------------------- - ZFlat/0 html 22.57 22.31 +5.6% - ZFlat/1 urls 50.89 47.77 +6.5% - ZFlat/2 jpg 99.88 99.87 +0.0% - ZFlat/3 pdf 82.13 82.07 +0.1% - ZFlat/4 html4 23.55 22.51 +4.6% - ZFlat/5 cp 48.12 48.12 +0.0% - ZFlat/6 c 42.40 42.40 +0.0% - ZFlat/7 lsp 48.37 48.37 +0.0% - ZFlat/8 xls 41.34 41.23 +0.3% - ZFlat/9 txt1 59.81 57.87 +3.4% - ZFlat/10 txt2 64.07 61.93 +3.5% - ZFlat/11 txt3 57.11 54.92 +4.0% - ZFlat/12 txt4 68.35 66.22 +3.2% - ZFlat/13 bin 18.21 18.11 +0.6% - ZFlat/14 sum 51.88 48.96 +6.0% - ZFlat/15 man 59.36 59.36 +0.0% - ZFlat/16 pb 23.15 19.64 +17.9% - ZFlat/17 gaviota 38.27 37.72 +1.5% - Geometric mean 45.51 44.15 +3.1% - - - Microbenchmarks (64-bit, opt): - - Westmere 2.8 GHz: - - Benchmark Base (ns) New (ns) Improvement - ------------------------------------------------------------------------------------------------- - BM_UFlat/0 75342 75027 1.3GB/s html +0.4% - BM_UFlat/1 723767 744269 899.6MB/s urls -2.8% - BM_UFlat/2 10072 10072 11.7GB/s jpg +0.0% - BM_UFlat/3 30747 30388 2.9GB/s pdf +1.2% - BM_UFlat/4 307353 306063 1.2GB/s html4 +0.4% - BM_UFlat/5 28593 28743 816.3MB/s cp -0.5% - BM_UFlat/6 12958 12998 818.1MB/s c -0.3% - BM_UFlat/7 3700 3792 935.8MB/s lsp -2.4% - BM_UFlat/8 999685 999905 982.1MB/s xls -0.0% - BM_UFlat/9 232954 230079 630.4MB/s txt1 +1.2% - BM_UFlat/10 200785 201468 592.6MB/s txt2 -0.3% - BM_UFlat/11 617267 610968 666.1MB/s txt3 +1.0% - BM_UFlat/12 821595 822475 558.7MB/s txt4 -0.1% - BM_UFlat/13 377097 377632 1.3GB/s bin -0.1% - BM_UFlat/14 45476 45260 805.8MB/s sum +0.5% - BM_UFlat/15 4985 5003 805.7MB/s man -0.4% - BM_UFlat/16 80813 77494 1.4GB/s pb +4.3% - BM_UFlat/17 251792 241553 727.7MB/s gaviota +4.2% - BM_UValidate/0 40343 40354 2.4GB/s html -0.0% - BM_UValidate/1 426890 451574 1.4GB/s urls -5.5% - BM_UValidate/2 187 179 661.9GB/s jpg +4.5% - BM_UValidate/3 13783 13827 6.4GB/s pdf -0.3% - BM_UValidate/4 162393 163335 2.3GB/s html4 -0.6% - BM_UDataBuffer/0 93756 93302 1046.7MB/s html +0.5% - BM_UDataBuffer/1 886714 916292 730.7MB/s urls -3.2% - BM_UDataBuffer/2 15861 16401 7.2GB/s jpg -3.3% - BM_UDataBuffer/3 38934 39224 2.2GB/s pdf -0.7% - BM_UDataBuffer/4 381008 379428 1029.5MB/s html4 +0.4% - BM_UCord/0 92528 91098 1072.0MB/s html +1.6% - BM_UCord/1 858421 885287 756.3MB/s urls -3.0% - BM_UCord/2 13140 13464 8.8GB/s jpg -2.4% - BM_UCord/3 39012 37773 2.3GB/s pdf +3.3% - BM_UCord/4 376869 371267 1052.1MB/s html4 +1.5% - BM_UCordString/0 75810 75303 1.3GB/s html +0.7% - BM_UCordString/1 735290 753841 888.2MB/s urls -2.5% - BM_UCordString/2 11945 13113 9.0GB/s jpg -8.9% - BM_UCordString/3 33901 32562 2.7GB/s pdf +4.1% - BM_UCordString/4 310985 309390 1.2GB/s html4 +0.5% - BM_UCordValidate/0 40952 40450 2.4GB/s html +1.2% - BM_UCordValidate/1 433842 456531 1.4GB/s urls -5.0% - BM_UCordValidate/2 1179 1173 100.8GB/s jpg +0.5% - BM_UCordValidate/3 14481 14392 6.1GB/s pdf +0.6% - BM_UCordValidate/4 164364 164151 2.3GB/s html4 +0.1% - BM_ZFlat/0 160610 156601 623.6MB/s html (22.31 %) +2.6% - BM_ZFlat/1 1995238 1993582 335.9MB/s urls (47.77 %) +0.1% - BM_ZFlat/2 30133 24983 4.7GB/s jpg (99.87 %) +20.6% - BM_ZFlat/3 74453 73128 1.2GB/s pdf (82.07 %) +1.8% - BM_ZFlat/4 647674 633729 616.4MB/s html4 (22.51 %) +2.2% - BM_ZFlat/5 76259 76090 308.4MB/s cp (48.12 %) +0.2% - BM_ZFlat/6 31106 31084 342.1MB/s c (42.40 %) +0.1% - BM_ZFlat/7 10507 10443 339.8MB/s lsp (48.37 %) +0.6% - BM_ZFlat/8 1811047 1793325 547.6MB/s xls (41.23 %) +1.0% - BM_ZFlat/9 597903 581793 249.3MB/s txt1 (57.87 %) +2.8% - BM_ZFlat/10 525320 514522 232.0MB/s txt2 (61.93 %) +2.1% - BM_ZFlat/11 1596591 1551636 262.3MB/s txt3 (54.92 %) +2.9% - BM_ZFlat/12 2134523 2094033 219.5MB/s txt4 (66.22 %) +1.9% - BM_ZFlat/13 593024 587869 832.6MB/s bin (18.11 %) +0.9% - BM_ZFlat/14 114746 110666 329.5MB/s sum (48.96 %) +3.7% - BM_ZFlat/15 14376 14485 278.3MB/s man (59.36 %) -0.8% - BM_ZFlat/16 167908 150070 753.6MB/s pb (19.64 %) +11.9% - BM_ZFlat/17 460228 442253 397.5MB/s gaviota (37.72 %) +4.1% - BM_ZCord/0 164896 160241 609.4MB/s html +2.9% - BM_ZCord/1 2070239 2043492 327.7MB/s urls +1.3% - BM_ZCord/2 54402 47002 2.5GB/s jpg +15.7% - BM_ZCord/3 85871 83832 1073.1MB/s pdf +2.4% - BM_ZCord/4 664078 648825 602.0MB/s html4 +2.4% - BM_ZDataBuffer/0 174874 172549 566.0MB/s html +1.3% - BM_ZDataBuffer/1 2134410 2139173 313.0MB/s urls -0.2% - BM_ZDataBuffer/2 71911 69551 1.7GB/s jpg +3.4% - BM_ZDataBuffer/3 98236 99727 902.1MB/s pdf -1.5% - BM_ZDataBuffer/4 710776 699104 558.8MB/s html4 +1.7% - Sum of all benchmarks 27358908 27200688 +0.6% - - - Sandy Bridge 2.6 GHz: - - Benchmark Base (ns) New (ns) Improvement - ------------------------------------------------------------------------------------------------- - BM_UFlat/0 49356 49018 1.9GB/s html +0.7% - BM_UFlat/1 516764 531955 1.2GB/s urls -2.9% - BM_UFlat/2 6982 7304 16.2GB/s jpg -4.4% - BM_UFlat/3 15285 15598 5.6GB/s pdf -2.0% - BM_UFlat/4 206557 206669 1.8GB/s html4 -0.1% - BM_UFlat/5 13681 13567 1.7GB/s cp +0.8% - BM_UFlat/6 6571 6592 1.6GB/s c -0.3% - BM_UFlat/7 2008 1994 1.7GB/s lsp +0.7% - BM_UFlat/8 775700 773286 1.2GB/s xls +0.3% - BM_UFlat/9 165578 164480 881.8MB/s txt1 +0.7% - BM_UFlat/10 143707 144139 828.2MB/s txt2 -0.3% - BM_UFlat/11 443026 436281 932.8MB/s txt3 +1.5% - BM_UFlat/12 603129 595856 771.2MB/s txt4 +1.2% - BM_UFlat/13 271682 270450 1.8GB/s bin +0.5% - BM_UFlat/14 26200 25666 1.4GB/s sum +2.1% - BM_UFlat/15 2620 2608 1.5GB/s man +0.5% - BM_UFlat/16 48908 47756 2.3GB/s pb +2.4% - BM_UFlat/17 174638 170346 1031.9MB/s gaviota +2.5% - BM_UValidate/0 31922 31898 3.0GB/s html +0.1% - BM_UValidate/1 341265 363554 1.8GB/s urls -6.1% - BM_UValidate/2 160 151 782.8GB/s jpg +6.0% - BM_UValidate/3 10402 10380 8.5GB/s pdf +0.2% - BM_UValidate/4 129490 130587 2.9GB/s html4 -0.8% - BM_UDataBuffer/0 59383 58736 1.6GB/s html +1.1% - BM_UDataBuffer/1 619222 637786 1049.8MB/s urls -2.9% - BM_UDataBuffer/2 10775 11941 9.9GB/s jpg -9.8% - BM_UDataBuffer/3 18002 17930 4.9GB/s pdf +0.4% - BM_UDataBuffer/4 259182 259306 1.5GB/s html4 -0.0% - BM_UCord/0 59379 57814 1.6GB/s html +2.7% - BM_UCord/1 598456 615162 1088.4MB/s urls -2.7% - BM_UCord/2 8519 8628 13.7GB/s jpg -1.3% - BM_UCord/3 18123 17537 5.0GB/s pdf +3.3% - BM_UCord/4 252375 252331 1.5GB/s html4 +0.0% - BM_UCordString/0 49494 49790 1.9GB/s html -0.6% - BM_UCordString/1 524659 541803 1.2GB/s urls -3.2% - BM_UCordString/2 8206 8354 14.2GB/s jpg -1.8% - BM_UCordString/3 17235 16537 5.3GB/s pdf +4.2% - BM_UCordString/4 210188 211072 1.8GB/s html4 -0.4% - BM_UCordValidate/0 31956 31587 3.0GB/s html +1.2% - BM_UCordValidate/1 340828 362141 1.8GB/s urls -5.9% - BM_UCordValidate/2 783 744 158.9GB/s jpg +5.2% - BM_UCordValidate/3 10543 10462 8.4GB/s pdf +0.8% - BM_UCordValidate/4 130150 129789 2.9GB/s html4 +0.3% - BM_ZFlat/0 113873 111200 878.2MB/s html (22.31 %) +2.4% - BM_ZFlat/1 1473023 1489858 449.4MB/s urls (47.77 %) -1.1% - BM_ZFlat/2 23569 19486 6.1GB/s jpg (99.87 %) +21.0% - BM_ZFlat/3 49178 48046 1.8GB/s pdf (82.07 %) +2.4% - BM_ZFlat/4 475063 469394 832.2MB/s html4 (22.51 %) +1.2% - BM_ZFlat/5 46910 46816 501.2MB/s cp (48.12 %) +0.2% - BM_ZFlat/6 16883 16916 628.6MB/s c (42.40 %) -0.2% - BM_ZFlat/7 5381 5447 651.5MB/s lsp (48.37 %) -1.2% - BM_ZFlat/8 1466870 1473861 666.3MB/s xls (41.23 %) -0.5% - BM_ZFlat/9 468006 464101 312.5MB/s txt1 (57.87 %) +0.8% - BM_ZFlat/10 408157 408957 291.9MB/s txt2 (61.93 %) -0.2% - BM_ZFlat/11 1253348 1232910 330.1MB/s txt3 (54.92 %) +1.7% - BM_ZFlat/12 1702373 1702977 269.8MB/s txt4 (66.22 %) -0.0% - BM_ZFlat/13 439792 438557 1116.0MB/s bin (18.11 %) +0.3% - BM_ZFlat/14 80766 78851 462.5MB/s sum (48.96 %) +2.4% - BM_ZFlat/15 7420 7542 534.5MB/s man (59.36 %) -1.6% - BM_ZFlat/16 112043 100126 1.1GB/s pb (19.64 %) +11.9% - BM_ZFlat/17 368877 357703 491.4MB/s gaviota (37.72 %) +3.1% - BM_ZCord/0 116402 113564 859.9MB/s html +2.5% - BM_ZCord/1 1507156 1519911 440.5MB/s urls -0.8% - BM_ZCord/2 39860 33686 3.5GB/s jpg +18.3% - BM_ZCord/3 56211 54694 1.6GB/s pdf +2.8% - BM_ZCord/4 485594 479212 815.1MB/s html4 +1.3% - BM_ZDataBuffer/0 123185 121572 803.3MB/s html +1.3% - BM_ZDataBuffer/1 1569111 1589380 421.3MB/s urls -1.3% - BM_ZDataBuffer/2 53143 49556 2.4GB/s jpg +7.2% - BM_ZDataBuffer/3 65725 66826 1.3GB/s pdf -1.6% - BM_ZDataBuffer/4 517871 514750 758.9MB/s html4 +0.6% - Sum of all benchmarks 20258879 20315484 -0.3% - - - AMD Instanbul 2.4 GHz: - - Benchmark Base (ns) New (ns) Improvement - ------------------------------------------------------------------------------------------------- - BM_UFlat/0 97120 96585 1011.1MB/s html +0.6% - BM_UFlat/1 917473 948016 706.3MB/s urls -3.2% - BM_UFlat/2 21496 23938 4.9GB/s jpg -10.2% - BM_UFlat/3 44751 45639 1.9GB/s pdf -1.9% - BM_UFlat/4 391950 391413 998.0MB/s html4 +0.1% - BM_UFlat/5 37366 37201 630.7MB/s cp +0.4% - BM_UFlat/6 18350 18318 580.5MB/s c +0.2% - BM_UFlat/7 5672 5661 626.9MB/s lsp +0.2% - BM_UFlat/8 1533390 1529441 642.1MB/s xls +0.3% - BM_UFlat/9 335477 336553 431.0MB/s txt1 -0.3% - BM_UFlat/10 285140 292080 408.7MB/s txt2 -2.4% - BM_UFlat/11 888507 894758 454.9MB/s txt3 -0.7% - BM_UFlat/12 1187643 1210928 379.5MB/s txt4 -1.9% - BM_UFlat/13 493717 507447 964.5MB/s bin -2.7% - BM_UFlat/14 61740 60870 599.1MB/s sum +1.4% - BM_UFlat/15 7211 7187 560.9MB/s man +0.3% - BM_UFlat/16 97435 93100 1.2GB/s pb +4.7% - BM_UFlat/17 362662 356395 493.2MB/s gaviota +1.8% - BM_UValidate/0 47475 47118 2.0GB/s html +0.8% - BM_UValidate/1 501304 529741 1.2GB/s urls -5.4% - BM_UValidate/2 276 243 486.2GB/s jpg +13.6% - BM_UValidate/3 16361 16261 5.4GB/s pdf +0.6% - BM_UValidate/4 190741 190353 2.0GB/s html4 +0.2% - BM_UDataBuffer/0 111080 109771 889.6MB/s html +1.2% - BM_UDataBuffer/1 1051035 1085999 616.5MB/s urls -3.2% - BM_UDataBuffer/2 25801 25463 4.6GB/s jpg +1.3% - BM_UDataBuffer/3 50493 49946 1.8GB/s pdf +1.1% - BM_UDataBuffer/4 447258 444138 879.5MB/s html4 +0.7% - BM_UCord/0 109350 107909 905.0MB/s html +1.3% - BM_UCord/1 1023396 1054964 634.7MB/s urls -3.0% - BM_UCord/2 25292 24371 4.9GB/s jpg +3.8% - BM_UCord/3 48955 49736 1.8GB/s pdf -1.6% - BM_UCord/4 440452 437331 893.2MB/s html4 +0.7% - BM_UCordString/0 98511 98031 996.2MB/s html +0.5% - BM_UCordString/1 933230 963495 694.9MB/s urls -3.1% - BM_UCordString/2 23311 24076 4.9GB/s jpg -3.2% - BM_UCordString/3 45568 46196 1.9GB/s pdf -1.4% - BM_UCordString/4 397791 396934 984.1MB/s html4 +0.2% - BM_UCordValidate/0 47537 46921 2.0GB/s html +1.3% - BM_UCordValidate/1 505071 532716 1.2GB/s urls -5.2% - BM_UCordValidate/2 1663 1621 72.9GB/s jpg +2.6% - BM_UCordValidate/3 16890 16926 5.2GB/s pdf -0.2% - BM_UCordValidate/4 192365 191984 2.0GB/s html4 +0.2% - BM_ZFlat/0 184708 179103 545.3MB/s html (22.31 %) +3.1% - BM_ZFlat/1 2293864 2302950 290.7MB/s urls (47.77 %) -0.4% - BM_ZFlat/2 52852 47618 2.5GB/s jpg (99.87 %) +11.0% - BM_ZFlat/3 100766 96179 935.3MB/s pdf (82.07 %) +4.8% - BM_ZFlat/4 741220 727977 536.6MB/s html4 (22.51 %) +1.8% - BM_ZFlat/5 85402 85418 274.7MB/s cp (48.12 %) -0.0% - BM_ZFlat/6 36558 36494 291.4MB/s c (42.40 %) +0.2% - BM_ZFlat/7 12706 12507 283.7MB/s lsp (48.37 %) +1.6% - BM_ZFlat/8 2336823 2335688 420.5MB/s xls (41.23 %) +0.0% - BM_ZFlat/9 701804 681153 212.9MB/s txt1 (57.87 %) +3.0% - BM_ZFlat/10 606700 597194 199.9MB/s txt2 (61.93 %) +1.6% - BM_ZFlat/11 1852283 1803238 225.7MB/s txt3 (54.92 %) +2.7% - BM_ZFlat/12 2475527 2443354 188.1MB/s txt4 (66.22 %) +1.3% - BM_ZFlat/13 694497 696654 702.6MB/s bin (18.11 %) -0.3% - BM_ZFlat/14 136929 129855 280.8MB/s sum (48.96 %) +5.4% - BM_ZFlat/15 17172 17124 235.4MB/s man (59.36 %) +0.3% - BM_ZFlat/16 190364 171763 658.4MB/s pb (19.64 %) +10.8% - BM_ZFlat/17 567285 555190 316.6MB/s gaviota (37.72 %) +2.2% - BM_ZCord/0 193490 187031 522.1MB/s html +3.5% - BM_ZCord/1 2427537 2415315 277.2MB/s urls +0.5% - BM_ZCord/2 85378 81412 1.5GB/s jpg +4.9% - BM_ZCord/3 121898 119419 753.3MB/s pdf +2.1% - BM_ZCord/4 779564 762961 512.0MB/s html4 +2.2% - BM_ZDataBuffer/0 213820 207272 471.1MB/s html +3.2% - BM_ZDataBuffer/1 2589010 2586495 258.9MB/s urls +0.1% - BM_ZDataBuffer/2 121871 118885 1018.4MB/s jpg +2.5% - BM_ZDataBuffer/3 145382 145986 616.2MB/s pdf -0.4% - BM_ZDataBuffer/4 868117 852754 458.1MB/s html4 +1.8% - Sum of all benchmarks 33771833 33744763 +0.1% - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@71 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 81f34784b7b812dcda956ee489dfdc74ec2da990 -Author: snappy.mirrorbot@gmail.com -Date: Sun Jan 6 19:21:26 2013 +0000 - - Adjust the Snappy open-source distribution for the changes in Google's - internal file API. - - R=sanjay - - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@70 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 698af469b47fe809905e2ed173ad84241de5800f -Author: snappy.mirrorbot@gmail.com -Date: Fri Jan 4 11:54:20 2013 +0000 - - Change a few ORs to additions where they don't matter. This helps the compiler - use the LEA instruction more efficiently, since e.g. a + (b << 2) can be encoded - as one instruction. Even more importantly, it can constant-fold the - COPY_* enums together with the shifted negative constants, which also saves - some instructions. (We don't need it for LITERAL, since it happens to be 0.) - - I am unsure why the compiler couldn't do this itself, but the theory is that - it cannot prove that len-1 and len-4 cannot underflow/wrap, and thus can't - do the optimization safely. - - The gains are small but measurable; 0.5-1.0% over the BM_Z* benchmarks - (measured on Westmere, Sandy Bridge and Istanbul). - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@69 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 55209f9b92efd97e0a61be28ed94210de04c3bfc -Author: snappy.mirrorbot@gmail.com -Date: Mon Oct 8 11:37:16 2012 +0000 - - Stop giving -Werror to automake, due to an incompatibility between current - versions of libtool and automake on non-GNU platforms (e.g. Mac OS X). - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@68 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit b86e81c8b3426a62d8ab3a7674c2506e9e678740 -Author: snappy.mirrorbot@gmail.com -Date: Fri Aug 17 13:54:47 2012 +0000 - - Fix public issue 66: Document GetUncompressedLength better, in particular that - it leaves the source in a state that's not appropriate for RawUncompress. - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@67 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 2e225ba821b420ae28e1d427075d5589c1e892d9 -Author: snappy.mirrorbot@gmail.com -Date: Tue Jul 31 11:44:44 2012 +0000 - - Fix public issue 64: Check for at configure time, - since MSVC seemingly does not have it. - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@66 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit e89f20ab46ee11050760c6d57f05c2a3825a911c -Author: snappy.mirrorbot@gmail.com -Date: Wed Jul 4 09:34:48 2012 +0000 - - Handle the case where gettimeofday() goes backwards or returns the same value - twice; it could cause division by zero in the unit test framework. - (We already had one fix for this in place, but it was incomplete.) - - This could in theory happen on any system, since there are few guarantees - about gettimeofday(), but seems to only happen in practice on GNU/Hurd, where - gettimeofday() is cached and only updated ever so often. - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@65 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 3ec60ac9878de5d0317ad38fc545080a4bfaa74f -Author: snappy.mirrorbot@gmail.com -Date: Wed Jul 4 09:28:33 2012 +0000 - - Mark ARMv4 as not supporting unaligned accesses (not just ARMv5 and ARMv6); - apparently Debian still targets these by default, giving us segfaults on - armel. - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@64 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit be80d6f74f9d82220e952a54f3f129aae1f13f95 -Author: snappy.mirrorbot@gmail.com -Date: Tue May 22 09:46:05 2012 +0000 - - Fix public bug #62: Remove an extraneous comma at the end of an enum list, - causing compile errors when embedded in Mozilla on OpenBSD. - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@63 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 8b95464146dddab1c7068f879162db9a885cdafe -Author: snappy.mirrorbot@gmail.com -Date: Tue May 22 09:32:50 2012 +0000 - - Snappy library no longer depends on iostream. - - Achieved by moving logging macro definitions to a test-only - header file, and by changing non-test code to use assert, - fprintf, and abort instead of LOG/CHECK macros. - - R=sesse - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@62 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit fc723b212d6972af7051261754770b3f70a7dc03 -Author: snappy.mirrorbot@gmail.com -Date: Fri Feb 24 15:46:37 2012 +0000 - - Release Snappy 1.0.5. - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@61 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit dc63e0ad9693e13390ba31b00d92ecccaf7605c3 -Author: snappy.mirrorbot@gmail.com -Date: Thu Feb 23 17:00:36 2012 +0000 - - For 32-bit platforms, do not try to accelerate multiple neighboring - 32-bit loads with a 64-bit load during compression (it's not a win). - - The main target for this optimization is ARM, but 32-bit x86 gets - a small gain, too, although there is noise in the microbenchmarks. - It's a no-op for 64-bit x86. It does not affect decompression. - - Microbenchmark results on a Cortex-A9 1GHz, using g++ 4.6.2 (from - Ubuntu/Linaro), -O2 -DNDEBUG -Wa,-march=armv7a -mtune=cortex-a9 - -mthumb-interwork, minimum 1000 iterations: - - Benchmark Time(ns) CPU(ns) Iterations - --------------------------------------------------- - BM_ZFlat/0 1158277 1160000 1000 84.2MB/s html (23.57 %) [ +4.3%] - BM_ZFlat/1 14861782 14860000 1000 45.1MB/s urls (50.89 %) [ +1.1%] - BM_ZFlat/2 393595 390000 1000 310.5MB/s jpg (99.88 %) [ +0.0%] - BM_ZFlat/3 650583 650000 1000 138.4MB/s pdf (82.13 %) [ +3.1%] - BM_ZFlat/4 4661480 4660000 1000 83.8MB/s html4 (23.55 %) [ +4.3%] - BM_ZFlat/5 491973 490000 1000 47.9MB/s cp (48.12 %) [ +2.0%] - BM_ZFlat/6 193575 192678 1038 55.2MB/s c (42.40 %) [ +9.0%] - BM_ZFlat/7 62343 62754 3187 56.5MB/s lsp (48.37 %) [ +2.6%] - BM_ZFlat/8 17708468 17710000 1000 55.5MB/s xls (41.34 %) [ -0.3%] - BM_ZFlat/9 3755345 3760000 1000 38.6MB/s txt1 (59.81 %) [ +8.2%] - BM_ZFlat/10 3324217 3320000 1000 36.0MB/s txt2 (64.07 %) [ +4.2%] - BM_ZFlat/11 10139932 10140000 1000 40.1MB/s txt3 (57.11 %) [ +6.4%] - BM_ZFlat/12 13532109 13530000 1000 34.0MB/s txt4 (68.35 %) [ +5.0%] - BM_ZFlat/13 4690847 4690000 1000 104.4MB/s bin (18.21 %) [ +4.1%] - BM_ZFlat/14 830682 830000 1000 43.9MB/s sum (51.88 %) [ +1.2%] - BM_ZFlat/15 84784 85011 2235 47.4MB/s man (59.36 %) [ +1.1%] - BM_ZFlat/16 1293254 1290000 1000 87.7MB/s pb (23.15 %) [ +2.3%] - BM_ZFlat/17 2775155 2780000 1000 63.2MB/s gaviota (38.27 %) [+12.2%] - - Core i7 in 32-bit mode (only one run and 100 iterations, though, so noisy): - - Benchmark Time(ns) CPU(ns) Iterations - --------------------------------------------------- - BM_ZFlat/0 227582 223464 3043 437.0MB/s html (23.57 %) [ +7.4%] - BM_ZFlat/1 2982430 2918455 233 229.4MB/s urls (50.89 %) [ +2.9%] - BM_ZFlat/2 46967 46658 15217 2.5GB/s jpg (99.88 %) [ +0.0%] - BM_ZFlat/3 115298 114864 5833 783.2MB/s pdf (82.13 %) [ +1.5%] - BM_ZFlat/4 913440 899743 778 434.2MB/s html4 (23.55 %) [ +0.3%] - BM_ZFlat/5 110302 108571 7000 216.1MB/s cp (48.12 %) [ +0.0%] - BM_ZFlat/6 44409 43372 15909 245.2MB/s c (42.40 %) [ +0.8%] - BM_ZFlat/7 15713 15643 46667 226.9MB/s lsp (48.37 %) [ +2.7%] - BM_ZFlat/8 2625539 2602230 269 377.4MB/s xls (41.34 %) [ +1.4%] - BM_ZFlat/9 808884 811429 875 178.8MB/s txt1 (59.81 %) [ -3.9%] - BM_ZFlat/10 709532 700000 1000 170.5MB/s txt2 (64.07 %) [ +0.0%] - BM_ZFlat/11 2177682 2162162 333 188.2MB/s txt3 (57.11 %) [ -1.4%] - BM_ZFlat/12 2849640 2840000 250 161.8MB/s txt4 (68.35 %) [ -1.4%] - BM_ZFlat/13 849760 835476 778 585.8MB/s bin (18.21 %) [ +1.2%] - BM_ZFlat/14 165940 164571 4375 221.6MB/s sum (51.88 %) [ +1.4%] - BM_ZFlat/15 20939 20571 35000 196.0MB/s man (59.36 %) [ +2.1%] - BM_ZFlat/16 239209 236544 2917 478.1MB/s pb (23.15 %) [ +4.2%] - BM_ZFlat/17 616206 610000 1000 288.2MB/s gaviota (38.27 %) [ -1.6%] - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@60 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit f8829ea39d51432ba4e6a26ddaec57acea779f4c -Author: snappy.mirrorbot@gmail.com -Date: Tue Feb 21 17:02:17 2012 +0000 - - Enable the use of unaligned loads and stores for ARM-based architectures - where they are available (ARMv7 and higher). This gives a significant - speed boost on ARM, both for compression and decompression. - It should not affect x86 at all. - - There are more changes possible to speed up ARM, but it might not be - that easy to do without hurting x86 or making the code uglier. - Also, we de not try to use NEON yet. - - Microbenchmark results on a Cortex-A9 1GHz, using g++ 4.6.2 (from Ubuntu/Linaro), - -O2 -DNDEBUG -Wa,-march=armv7a -mtune=cortex-a9 -mthumb-interwork: - - Benchmark Time(ns) CPU(ns) Iterations - --------------------------------------------------- - BM_UFlat/0 524806 529100 378 184.6MB/s html [+33.6%] - BM_UFlat/1 5139790 5200000 100 128.8MB/s urls [+28.8%] - BM_UFlat/2 86540 84166 1901 1.4GB/s jpg [ +0.6%] - BM_UFlat/3 215351 210176 904 428.0MB/s pdf [+29.8%] - BM_UFlat/4 2144490 2100000 100 186.0MB/s html4 [+33.3%] - BM_UFlat/5 194482 190000 1000 123.5MB/s cp [+36.2%] - BM_UFlat/6 91843 90175 2107 117.9MB/s c [+38.6%] - BM_UFlat/7 28535 28426 6684 124.8MB/s lsp [+34.7%] - BM_UFlat/8 9206600 9200000 100 106.7MB/s xls [+42.4%] - BM_UFlat/9 1865273 1886792 106 76.9MB/s txt1 [+32.5%] - BM_UFlat/10 1576809 1587301 126 75.2MB/s txt2 [+32.3%] - BM_UFlat/11 4968450 4900000 100 83.1MB/s txt3 [+32.7%] - BM_UFlat/12 6673970 6700000 100 68.6MB/s txt4 [+32.8%] - BM_UFlat/13 2391470 2400000 100 203.9MB/s bin [+29.2%] - BM_UFlat/14 334601 344827 522 105.8MB/s sum [+30.6%] - BM_UFlat/15 37404 38080 5252 105.9MB/s man [+33.8%] - BM_UFlat/16 535470 540540 370 209.2MB/s pb [+31.2%] - BM_UFlat/17 1875245 1886792 106 93.2MB/s gaviota [+37.8%] - BM_UValidate/0 178425 179533 1114 543.9MB/s html [ +2.7%] - BM_UValidate/1 2100450 2000000 100 334.8MB/s urls [ +5.0%] - BM_UValidate/2 1039 1044 172413 113.3GB/s jpg [ +3.4%] - BM_UValidate/3 59423 59470 3363 1.5GB/s pdf [ +7.8%] - BM_UValidate/4 760716 766283 261 509.8MB/s html4 [ +6.5%] - BM_ZFlat/0 1204632 1204819 166 81.1MB/s html (23.57 %) [+32.8%] - BM_ZFlat/1 15656190 15600000 100 42.9MB/s urls (50.89 %) [+27.6%] - BM_ZFlat/2 403336 410677 487 294.8MB/s jpg (99.88 %) [+16.5%] - BM_ZFlat/3 664073 671140 298 134.0MB/s pdf (82.13 %) [+28.4%] - BM_ZFlat/4 4961940 4900000 100 79.7MB/s html4 (23.55 %) [+30.6%] - BM_ZFlat/5 500664 501253 399 46.8MB/s cp (48.12 %) [+33.4%] - BM_ZFlat/6 217276 215982 926 49.2MB/s c (42.40 %) [+25.0%] - BM_ZFlat/7 64122 65487 3054 54.2MB/s lsp (48.37 %) [+36.1%] - BM_ZFlat/8 18045730 18000000 100 54.6MB/s xls (41.34 %) [+34.4%] - BM_ZFlat/9 4051530 4000000 100 36.3MB/s txt1 (59.81 %) [+25.0%] - BM_ZFlat/10 3451800 3500000 100 34.1MB/s txt2 (64.07 %) [+25.7%] - BM_ZFlat/11 11052340 11100000 100 36.7MB/s txt3 (57.11 %) [+24.3%] - BM_ZFlat/12 14538690 14600000 100 31.5MB/s txt4 (68.35 %) [+24.7%] - BM_ZFlat/13 5041850 5000000 100 97.9MB/s bin (18.21 %) [+32.0%] - BM_ZFlat/14 908840 909090 220 40.1MB/s sum (51.88 %) [+22.2%] - BM_ZFlat/15 86921 86206 1972 46.8MB/s man (59.36 %) [+42.2%] - BM_ZFlat/16 1312315 1315789 152 86.0MB/s pb (23.15 %) [+34.5%] - BM_ZFlat/17 3173120 3200000 100 54.9MB/s gaviota (38.27%) [+28.1%] - - - The move from 64-bit to 32-bit operations for the copies also affected 32-bit x86; - positive on the decompression side, and slightly negative on the compression side - (unless that is noise; I only ran once): - - Benchmark Time(ns) CPU(ns) Iterations - ----------------------------------------------------- - BM_UFlat/0 86279 86140 7778 1.1GB/s html [ +7.5%] - BM_UFlat/1 839265 822622 778 813.9MB/s urls [ +9.4%] - BM_UFlat/2 9180 9143 87500 12.9GB/s jpg [ +1.2%] - BM_UFlat/3 35080 35000 20000 2.5GB/s pdf [+10.1%] - BM_UFlat/4 350318 345000 2000 1.1GB/s html4 [ +7.0%] - BM_UFlat/5 33808 33472 21212 701.0MB/s cp [ +9.0%] - BM_UFlat/6 15201 15214 46667 698.9MB/s c [+14.9%] - BM_UFlat/7 4652 4651 159091 762.9MB/s lsp [ +7.5%] - BM_UFlat/8 1285551 1282528 538 765.7MB/s xls [+10.7%] - BM_UFlat/9 282510 281690 2414 514.9MB/s txt1 [+13.6%] - BM_UFlat/10 243494 239286 2800 498.9MB/s txt2 [+14.4%] - BM_UFlat/11 743625 740000 1000 550.0MB/s txt3 [+14.3%] - BM_UFlat/12 999441 989717 778 464.3MB/s txt4 [+16.1%] - BM_UFlat/13 412402 410076 1707 1.2GB/s bin [ +7.3%] - BM_UFlat/14 54876 54000 10000 675.3MB/s sum [+13.0%] - BM_UFlat/15 6146 6100 100000 660.8MB/s man [+14.8%] - BM_UFlat/16 90496 90286 8750 1.2GB/s pb [ +4.0%] - BM_UFlat/17 292650 292000 2500 602.0MB/s gaviota [+18.1%] - BM_UValidate/0 49620 49699 14286 1.9GB/s html [ +0.0%] - BM_UValidate/1 501371 500000 1000 1.3GB/s urls [ +0.0%] - BM_UValidate/2 232 227 3043478 521.5GB/s jpg [ +1.3%] - BM_UValidate/3 17250 17143 43750 5.1GB/s pdf [ -1.3%] - BM_UValidate/4 198643 200000 3500 1.9GB/s html4 [ -0.9%] - BM_ZFlat/0 227128 229415 3182 425.7MB/s html (23.57 %) [ -1.4%] - BM_ZFlat/1 2970089 2960000 250 226.2MB/s urls (50.89 %) [ -1.9%] - BM_ZFlat/2 45683 44999 15556 2.6GB/s jpg (99.88 %) [ +2.2%] - BM_ZFlat/3 114661 113136 6364 795.1MB/s pdf (82.13 %) [ -1.5%] - BM_ZFlat/4 919702 914286 875 427.2MB/s html4 (23.55%) [ -1.3%] - BM_ZFlat/5 108189 108422 6364 216.4MB/s cp (48.12 %) [ -1.2%] - BM_ZFlat/6 44525 44000 15909 241.7MB/s c (42.40 %) [ -2.9%] - BM_ZFlat/7 15973 15857 46667 223.8MB/s lsp (48.37 %) [ +0.0%] - BM_ZFlat/8 2677888 2639405 269 372.1MB/s xls (41.34 %) [ -1.4%] - BM_ZFlat/9 800715 780000 1000 186.0MB/s txt1 (59.81 %) [ -0.4%] - BM_ZFlat/10 700089 700000 1000 170.5MB/s txt2 (64.07 %) [ -2.9%] - BM_ZFlat/11 2159356 2138365 318 190.3MB/s txt3 (57.11 %) [ -0.3%] - BM_ZFlat/12 2796143 2779923 259 165.3MB/s txt4 (68.35 %) [ -1.4%] - BM_ZFlat/13 856458 835476 778 585.8MB/s bin (18.21 %) [ -0.1%] - BM_ZFlat/14 166908 166857 4375 218.6MB/s sum (51.88 %) [ -1.4%] - BM_ZFlat/15 21181 20857 35000 193.3MB/s man (59.36 %) [ -0.8%] - BM_ZFlat/16 244009 239973 2917 471.3MB/s pb (23.15 %) [ -1.4%] - BM_ZFlat/17 596362 590000 1000 297.9MB/s gaviota (38.27%) [ +0.0%] - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@59 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit f2e184f638bdc7905f26c24faaf10fc0f5d33403 -Author: snappy.mirrorbot@gmail.com -Date: Sat Feb 11 22:11:22 2012 +0000 - - Lower the size allocated in the "corrupted input" unit test from 256 MB - to 2 MB. This fixes issues with running the unit test on platforms with - little RAM (e.g. some ARM boards). - - Also, reactivate the 2 MB test for 64-bit platforms; there's no good - reason why it shouldn't be. - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@58 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit e750dc0f054ba74b0ce76dd2013e6728cc7a41c5 -Author: snappy.mirrorbot@gmail.com -Date: Sun Jan 8 17:55:48 2012 +0000 - - Minor refactoring to accomodate changes in Google's internal code tree. - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@57 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit d9068ee301bdf893a4d8cb7c6518eacc44c4c1f2 -Author: snappy.mirrorbot@gmail.com -Date: Wed Jan 4 13:10:46 2012 +0000 - - Fix public issue r57: Fix most warnings with -Wall, mostly signed/unsigned - warnings. There are still some in the unit test, but the main .cc file should - be clean. We haven't enabled -Wall for the default build, since the unit test - is still not clean. - - This also fixes a real bug in the open-source implementation of - ReadFileToStringOrDie(); it would not detect errors correctly. - - I had to go through some pains to avoid performance loss as the types - were changed; I think there might still be some with 32-bit if and only if LFS - is enabled (ie., size_t is 64-bit), but for regular 32-bit and 64-bit I can't - see any losses, and I've diffed the generated GCC assembler between the old and - new code without seeing any significant choices. If anything, it's ever so - slightly faster. - - This may or may not enable compression of very large blocks (>2^32 bytes) - when size_t is 64-bit, but I haven't checked, and it is still not a supported - case. - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@56 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 0755c815197dacc77d8971ae917c86d7aa96bf8e -Author: snappy.mirrorbot@gmail.com -Date: Wed Jan 4 10:46:39 2012 +0000 - - Add a framing format description. We do not have any implementation of this at - the current point, but there seems to be enough of a general interest in the - topic (cf. public bug #34). - - R=csilvers,sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@55 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit d7eb2dc4133794b62cba691f9be40d1549bc32e2 -Author: snappy.mirrorbot@gmail.com -Date: Mon Dec 5 21:27:26 2011 +0000 - - Speed up decompression by moving the refill check to the end of the loop. - - This seems to work because in most of the branches, the compiler can evaluate - “ip_limit_ - ip” in a more efficient way than reloading ip_limit_ from memory - (either by already having the entire expression in a register, or reconstructing - it from “avail”, or something else). Memory loads, even from L1, are seemingly - costly in the big picture at the current decompression speeds. - - Microbenchmarks (64-bit, opt mode): - - Westmere (Intel Core i7): - - Benchmark Time(ns) CPU(ns) Iterations - -------------------------------------------- - BM_UFlat/0 74492 74491 187894 1.3GB/s html [ +5.9%] - BM_UFlat/1 712268 712263 19644 940.0MB/s urls [ +3.8%] - BM_UFlat/2 10591 10590 1000000 11.2GB/s jpg [ -6.8%] - BM_UFlat/3 29643 29643 469915 3.0GB/s pdf [ +7.9%] - BM_UFlat/4 304669 304667 45930 1.3GB/s html4 [ +4.8%] - BM_UFlat/5 28508 28507 490077 823.1MB/s cp [ +4.0%] - BM_UFlat/6 12415 12415 1000000 856.5MB/s c [ +8.6%] - BM_UFlat/7 3415 3415 4084723 1039.0MB/s lsp [+18.0%] - BM_UFlat/8 979569 979563 14261 1002.5MB/s xls [ +5.8%] - BM_UFlat/9 230150 230148 60934 630.2MB/s txt1 [ +5.2%] - BM_UFlat/10 197167 197166 71135 605.5MB/s txt2 [ +4.7%] - BM_UFlat/11 607394 607390 23041 670.1MB/s txt3 [ +5.6%] - BM_UFlat/12 808502 808496 17316 568.4MB/s txt4 [ +5.0%] - BM_UFlat/13 372791 372788 37564 1.3GB/s bin [ +3.3%] - BM_UFlat/14 44541 44541 313969 818.8MB/s sum [ +5.7%] - BM_UFlat/15 4833 4833 2898697 834.1MB/s man [ +4.8%] - BM_UFlat/16 79855 79855 175356 1.4GB/s pb [ +4.8%] - BM_UFlat/17 245845 245843 56838 715.0MB/s gaviota [ +5.8%] - - Clovertown (Intel Core 2): - - Benchmark Time(ns) CPU(ns) Iterations - -------------------------------------------- - BM_UFlat/0 107911 107890 100000 905.1MB/s html [ +2.2%] - BM_UFlat/1 1011237 1011041 10000 662.3MB/s urls [ +2.5%] - BM_UFlat/2 26775 26770 523089 4.4GB/s jpg [ +0.0%] - BM_UFlat/3 48103 48095 290618 1.8GB/s pdf [ +3.4%] - BM_UFlat/4 437724 437644 31937 892.6MB/s html4 [ +2.1%] - BM_UFlat/5 39607 39600 358284 592.5MB/s cp [ +2.4%] - BM_UFlat/6 18227 18224 768191 583.5MB/s c [ +2.7%] - BM_UFlat/7 5171 5170 2709437 686.4MB/s lsp [ +3.9%] - BM_UFlat/8 1560291 1559989 8970 629.5MB/s xls [ +3.6%] - BM_UFlat/9 335401 335343 41731 432.5MB/s txt1 [ +3.0%] - BM_UFlat/10 287014 286963 48758 416.0MB/s txt2 [ +2.8%] - BM_UFlat/11 888522 888356 15752 458.1MB/s txt3 [ +2.9%] - BM_UFlat/12 1186600 1186378 10000 387.3MB/s txt4 [ +3.1%] - BM_UFlat/13 572295 572188 24468 855.4MB/s bin [ +2.1%] - BM_UFlat/14 64060 64049 218401 569.4MB/s sum [ +4.1%] - BM_UFlat/15 7264 7263 1916168 555.0MB/s man [ +1.4%] - BM_UFlat/16 108853 108836 100000 1039.1MB/s pb [ +1.7%] - BM_UFlat/17 364289 364223 38419 482.6MB/s gaviota [ +4.9%] - - Barcelona (AMD Opteron): - - Benchmark Time(ns) CPU(ns) Iterations - -------------------------------------------- - BM_UFlat/0 103900 103871 100000 940.2MB/s html [ +8.3%] - BM_UFlat/1 1000435 1000107 10000 669.5MB/s urls [ +6.6%] - BM_UFlat/2 24659 24652 567362 4.8GB/s jpg [ +0.1%] - BM_UFlat/3 48206 48193 291121 1.8GB/s pdf [ +5.0%] - BM_UFlat/4 421980 421850 33174 926.0MB/s html4 [ +7.3%] - BM_UFlat/5 40368 40357 346994 581.4MB/s cp [ +8.7%] - BM_UFlat/6 19836 19830 708695 536.2MB/s c [ +8.0%] - BM_UFlat/7 6100 6098 2292774 581.9MB/s lsp [ +9.0%] - BM_UFlat/8 1693093 1692514 8261 580.2MB/s xls [ +8.0%] - BM_UFlat/9 365991 365886 38225 396.4MB/s txt1 [ +7.1%] - BM_UFlat/10 311330 311238 44950 383.6MB/s txt2 [ +7.6%] - BM_UFlat/11 975037 974737 14376 417.5MB/s txt3 [ +6.9%] - BM_UFlat/12 1303558 1303175 10000 352.6MB/s txt4 [ +7.3%] - BM_UFlat/13 517448 517290 27144 946.2MB/s bin [ +5.5%] - BM_UFlat/14 66537 66518 210352 548.3MB/s sum [ +7.5%] - BM_UFlat/15 7976 7974 1760383 505.6MB/s man [ +5.6%] - BM_UFlat/16 103121 103092 100000 1097.0MB/s pb [ +8.7%] - BM_UFlat/17 391431 391314 35733 449.2MB/s gaviota [ +6.5%] - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@54 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 5ed51ce15fc4ff8d2f7235704eb6b0c3f762fb88 -Author: snappy.mirrorbot@gmail.com -Date: Wed Nov 23 11:14:17 2011 +0000 - - Speed up decompression by making the fast path for literals faster. - - We do the fast-path step as soon as possible; in fact, as soon as we know the - literal length. Since we usually hit the fast path, we can then skip the checks - for long literals and available input space (beyond what the fast path check - already does). - - Note that this changes the decompression Writer API; however, it does not - change the ABI, since writers are always templatized and as such never - cross compilation units. The new API is slightly more general, in that it - doesn't hard-code the value 16. Note that we also take care to check - for len <= 16 first, since the other two checks almost always succeed - (so we don't want to waste time checking for them until we have to). - - The improvements are most marked on Nehalem, but are generally positive - on other platforms as well. All microbenchmarks are 64-bit, opt. - - Clovertown (Core 2): - - Benchmark Time(ns) CPU(ns) Iterations - -------------------------------------------- - BM_UFlat/0 110226 110224 100000 886.0MB/s html [ +1.5%] - BM_UFlat/1 1036523 1036508 10000 646.0MB/s urls [ -0.8%] - BM_UFlat/2 26775 26775 522570 4.4GB/s jpg [ +0.0%] - BM_UFlat/3 49738 49737 280974 1.8GB/s pdf [ +0.3%] - BM_UFlat/4 446790 446792 31334 874.3MB/s html4 [ +0.8%] - BM_UFlat/5 40561 40562 350424 578.5MB/s cp [ +1.3%] - BM_UFlat/6 18722 18722 746903 568.0MB/s c [ +1.4%] - BM_UFlat/7 5373 5373 2608632 660.5MB/s lsp [ +8.3%] - BM_UFlat/8 1615716 1615718 8670 607.8MB/s xls [ +2.0%] - BM_UFlat/9 345278 345281 40481 420.1MB/s txt1 [ +1.4%] - BM_UFlat/10 294855 294855 47452 404.9MB/s txt2 [ +1.6%] - BM_UFlat/11 914263 914263 15316 445.2MB/s txt3 [ +1.1%] - BM_UFlat/12 1222694 1222691 10000 375.8MB/s txt4 [ +1.4%] - BM_UFlat/13 584495 584489 23954 837.4MB/s bin [ -0.6%] - BM_UFlat/14 66662 66662 210123 547.1MB/s sum [ +1.2%] - BM_UFlat/15 7368 7368 1881856 547.1MB/s man [ +4.0%] - BM_UFlat/16 110727 110726 100000 1021.4MB/s pb [ +2.3%] - BM_UFlat/17 382138 382141 36616 460.0MB/s gaviota [ -0.7%] - - Westmere (Core i7): - - Benchmark Time(ns) CPU(ns) Iterations - -------------------------------------------- - BM_UFlat/0 78861 78853 177703 1.2GB/s html [ +2.1%] - BM_UFlat/1 739560 739491 18912 905.4MB/s urls [ +3.4%] - BM_UFlat/2 9867 9866 1419014 12.0GB/s jpg [ +3.4%] - BM_UFlat/3 31989 31986 438385 2.7GB/s pdf [ +0.2%] - BM_UFlat/4 319406 319380 43771 1.2GB/s html4 [ +1.9%] - BM_UFlat/5 29639 29636 472862 791.7MB/s cp [ +5.2%] - BM_UFlat/6 13478 13477 1000000 789.0MB/s c [ +2.3%] - BM_UFlat/7 4030 4029 3475364 880.7MB/s lsp [ +8.7%] - BM_UFlat/8 1036585 1036492 10000 947.5MB/s xls [ +6.9%] - BM_UFlat/9 242127 242105 57838 599.1MB/s txt1 [ +3.0%] - BM_UFlat/10 206499 206480 67595 578.2MB/s txt2 [ +3.4%] - BM_UFlat/11 641635 641570 21811 634.4MB/s txt3 [ +2.4%] - BM_UFlat/12 848847 848769 16443 541.4MB/s txt4 [ +3.1%] - BM_UFlat/13 384968 384938 36366 1.2GB/s bin [ +0.3%] - BM_UFlat/14 47106 47101 297770 774.3MB/s sum [ +4.4%] - BM_UFlat/15 5063 5063 2772202 796.2MB/s man [ +7.7%] - BM_UFlat/16 83663 83656 167697 1.3GB/s pb [ +1.8%] - BM_UFlat/17 260224 260198 53823 675.6MB/s gaviota [ -0.5%] - - Barcelona (Opteron): - - Benchmark Time(ns) CPU(ns) Iterations - -------------------------------------------- - BM_UFlat/0 112490 112457 100000 868.4MB/s html [ -0.4%] - BM_UFlat/1 1066719 1066339 10000 627.9MB/s urls [ +1.0%] - BM_UFlat/2 24679 24672 563802 4.8GB/s jpg [ +0.7%] - BM_UFlat/3 50603 50589 277285 1.7GB/s pdf [ +2.6%] - BM_UFlat/4 452982 452849 30900 862.6MB/s html4 [ -0.2%] - BM_UFlat/5 43860 43848 319554 535.1MB/s cp [ +1.2%] - BM_UFlat/6 21419 21413 653573 496.6MB/s c [ +1.0%] - BM_UFlat/7 6646 6645 2105405 534.1MB/s lsp [ +0.3%] - BM_UFlat/8 1828487 1827886 7658 537.3MB/s xls [ +2.6%] - BM_UFlat/9 391824 391714 35708 370.3MB/s txt1 [ +2.2%] - BM_UFlat/10 334913 334816 41885 356.6MB/s txt2 [ +1.7%] - BM_UFlat/11 1042062 1041674 10000 390.7MB/s txt3 [ +1.1%] - BM_UFlat/12 1398902 1398456 10000 328.6MB/s txt4 [ +1.7%] - BM_UFlat/13 545706 545530 25669 897.2MB/s bin [ -0.4%] - BM_UFlat/14 71512 71505 196035 510.0MB/s sum [ +1.4%] - BM_UFlat/15 8422 8421 1665036 478.7MB/s man [ +2.6%] - BM_UFlat/16 112053 112048 100000 1009.3MB/s pb [ -0.4%] - BM_UFlat/17 416723 416713 33612 421.8MB/s gaviota [ -2.0%] - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@53 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 0c1b9c3904430f5b399bd057d76de4bc36b7a123 -Author: snappy.mirrorbot@gmail.com -Date: Tue Nov 8 14:46:39 2011 +0000 - - Fix public issue #53: Update the README to the API we actually open-sourced - with. - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@52 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit b61134bc0a6a904b41522b4e5c9e80874c730cef -Author: snappy.mirrorbot@gmail.com -Date: Wed Oct 5 12:27:12 2011 +0000 - - In the format description, use a clearer example to emphasize that varints are - stored in little-endian. Patch from Christian von Roques. - - R=csilvers - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@51 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 21a2e4f55758e759302cd84ad0f3580affcba7d9 -Author: snappy.mirrorbot@gmail.com -Date: Thu Sep 15 19:34:06 2011 +0000 - - Release Snappy 1.0.4. - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@50 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit e2e303286813c759c5b1cdb46dad63c494f0a061 -Author: snappy.mirrorbot@gmail.com -Date: Thu Sep 15 09:50:05 2011 +0000 - - Fix public issue #50: Include generic byteswap macros. - Also include Solaris 10 and FreeBSD versions. - - R=csilvers - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@49 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 593002da3c051f4721312869f816b41485bad3b7 -Author: snappy.mirrorbot@gmail.com -Date: Wed Aug 10 18:57:27 2011 +0000 - - Partially fix public issue 50: Remove an extra comma from the end of some - enum declarations, as it seems the Sun compiler does not like it. - - Based on patch by Travis Vitek. - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@48 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit f1063a5dc43891eed37f0586bfea57b84dddd756 -Author: snappy.mirrorbot@gmail.com -Date: Wed Aug 10 18:44:16 2011 +0000 - - Use the right #ifdef test for sys/mman.h. - - Based on patch by Travis Vitek. - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@47 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 41c827a2fa9ce048202d941187f211180feadde4 -Author: snappy.mirrorbot@gmail.com -Date: Wed Aug 10 01:22:09 2011 +0000 - - Fix public issue #47: Small comment cleanups in the unit test. - - Originally based on a patch by Patrick Pelletier. - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@46 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 59aeffa6049b5c2a3a467e7602c1f93630b870e7 -Author: snappy.mirrorbot@gmail.com -Date: Wed Aug 10 01:14:43 2011 +0000 - - Fix public issue #46: Format description said "3-byte offset" - instead of "4-byte offset" for the longest copies. - - Also fix an inconsistency in the heading for section 2.2.3. - Both patches by Patrick Pelletier. - - R=csilvers - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@45 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 57e7cd72559cb022ef32856f2252a4c4585e562e -Author: snappy.mirrorbot@gmail.com -Date: Tue Jun 28 11:40:25 2011 +0000 - - Fix public issue #44: Make the definition and declaration of CompressFragment - identical, even regarding cv-qualifiers. - - This is required to work around a bug in the Solaris Studio C++ compiler - (it does not properly disregard cv-qualifiers when doing name mangling). - - R=sanjay - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@44 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 13c4a449a8ea22139c9aa441e8024eebc9dbdf6e -Author: snappy.mirrorbot@gmail.com -Date: Sat Jun 4 10:19:05 2011 +0000 - - Correct an inaccuracy in the Snappy format description. - (I stumbled into this when changing the way we decompress literals.) - - R=csilvers - - Revision created by MOE tool push_codebase. - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@43 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit f5406737403119e1483a71d2084d17728663a114 -Author: snappy.mirrorbot@gmail.com -Date: Fri Jun 3 20:53:06 2011 +0000 - - Speed up decompression by removing a fast-path attempt. - - Whenever we try to enter a copy fast-path, there is a certain cost in checking - that all the preconditions are in place, but it's normally offset by the fact - that we can usually take the cheaper path. However, in a certain path we've - already established that "avail < literal_length", which usually means that - either the available space is small, or the literal is big. Both will disqualify - us from taking the fast path, and thus we take the hit from the precondition - checking without gaining much from having a fast path. Thus, simply don't try - the fast path in this situation -- we're already on a slow path anyway - (one where we need to refill more data from the reader). - - I'm a bit surprised at how much this gained; it could be that this path is - more common than I thought, or that the simpler structure somehow makes the - compiler happier. I haven't looked at the assembler, but it's a win across - the board on both Core 2, Core i7 and Opteron, at least for the cases we - typically care about. The gains seem to be the largest on Core i7, though. - Results from my Core i7 workstation: - - - Benchmark Time(ns) CPU(ns) Iterations - --------------------------------------------------- - BM_UFlat/0 73337 73091 190996 1.3GB/s html [ +1.7%] - BM_UFlat/1 696379 693501 20173 965.5MB/s urls [ +2.7%] - BM_UFlat/2 9765 9734 1472135 12.1GB/s jpg [ +0.7%] - BM_UFlat/3 29720 29621 472973 3.0GB/s pdf [ +1.8%] - BM_UFlat/4 294636 293834 47782 1.3GB/s html4 [ +2.3%] - BM_UFlat/5 28399 28320 494700 828.5MB/s cp [ +3.5%] - BM_UFlat/6 12795 12760 1000000 833.3MB/s c [ +1.2%] - BM_UFlat/7 3984 3973 3526448 893.2MB/s lsp [ +5.7%] - BM_UFlat/8 991996 989322 14141 992.6MB/s xls [ +3.3%] - BM_UFlat/9 228620 227835 61404 636.6MB/s txt1 [ +4.0%] - BM_UFlat/10 197114 196494 72165 607.5MB/s txt2 [ +3.5%] - BM_UFlat/11 605240 603437 23217 674.4MB/s txt3 [ +3.7%] - BM_UFlat/12 804157 802016 17456 573.0MB/s txt4 [ +3.9%] - BM_UFlat/13 347860 346998 40346 1.4GB/s bin [ +1.2%] - BM_UFlat/14 44684 44559 315315 818.4MB/s sum [ +2.3%] - BM_UFlat/15 5120 5106 2739726 789.4MB/s man [ +3.3%] - BM_UFlat/16 76591 76355 183486 1.4GB/s pb [ +2.8%] - BM_UFlat/17 238564 237828 58824 739.1MB/s gaviota [ +1.6%] - BM_UValidate/0 42194 42060 333333 2.3GB/s html [ -0.1%] - BM_UValidate/1 433182 432005 32407 1.5GB/s urls [ -0.1%] - BM_UValidate/2 197 196 71428571 603.3GB/s jpg [ +0.5%] - BM_UValidate/3 14494 14462 972222 6.1GB/s pdf [ +0.5%] - BM_UValidate/4 168444 167836 83832 2.3GB/s html4 [ +0.1%] - - R=jeff - - Revision created by MOE tool push_codebase. - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@42 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 197f3ee9f9397e98c9abf07f9da875fbcb725dba -Author: snappy.mirrorbot@gmail.com -Date: Fri Jun 3 20:47:14 2011 +0000 - - Speed up decompression by not needing a lookup table for literal items. - - Looking up into and decoding the values from char_table has long shown up as a - hotspot in the decompressor. While it turns out that it's hard to make a more - efficient decoder for the copy ops, the literals are simple enough that we can - decode them without needing a table lookup. (This means that 1/4 of the table - is now unused, although that in itself doesn't buy us anything.) - - The gains are small, but definitely present; some tests win as much as 10%, - but 1-4% is more typical. These results are from Core i7, in 64-bit mode; - Core 2 and Opteron show similar results. (I've run with more iterations - than unusual to make sure the smaller gains don't drown entirely in noise.) - - Benchmark Time(ns) CPU(ns) Iterations - --------------------------------------------------- - BM_UFlat/0 74665 74428 182055 1.3GB/s html [ +3.1%] - BM_UFlat/1 714106 711997 19663 940.4MB/s urls [ +4.4%] - BM_UFlat/2 9820 9789 1427115 12.1GB/s jpg [ -1.2%] - BM_UFlat/3 30461 30380 465116 2.9GB/s pdf [ +0.8%] - BM_UFlat/4 301445 300568 46512 1.3GB/s html4 [ +2.2%] - BM_UFlat/5 29338 29263 479452 801.8MB/s cp [ +1.6%] - BM_UFlat/6 13004 12970 1000000 819.9MB/s c [ +2.1%] - BM_UFlat/7 4180 4168 3349282 851.4MB/s lsp [ +1.3%] - BM_UFlat/8 1026149 1024000 10000 959.0MB/s xls [+10.7%] - BM_UFlat/9 237441 236830 59072 612.4MB/s txt1 [ +0.3%] - BM_UFlat/10 203966 203298 69307 587.2MB/s txt2 [ +0.8%] - BM_UFlat/11 627230 625000 22400 651.2MB/s txt3 [ +0.7%] - BM_UFlat/12 836188 833979 16787 551.0MB/s txt4 [ +1.3%] - BM_UFlat/13 351904 350750 39886 1.4GB/s bin [ +3.8%] - BM_UFlat/14 45685 45562 308370 800.4MB/s sum [ +5.9%] - BM_UFlat/15 5286 5270 2656546 764.9MB/s man [ +1.5%] - BM_UFlat/16 78774 78544 178117 1.4GB/s pb [ +4.3%] - BM_UFlat/17 242270 241345 58091 728.3MB/s gaviota [ +1.2%] - BM_UValidate/0 42149 42000 333333 2.3GB/s html [ -3.0%] - BM_UValidate/1 432741 431303 32483 1.5GB/s urls [ +7.8%] - BM_UValidate/2 198 197 71428571 600.7GB/s jpg [+16.8%] - BM_UValidate/3 14560 14521 965517 6.1GB/s pdf [ -4.1%] - BM_UValidate/4 169065 168671 83832 2.3GB/s html4 [ -2.9%] - - R=jeff - - Revision created by MOE tool push_codebase. - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@41 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 8efa2639e885ac467e7b11c662975c5844019fb9 -Author: snappy.mirrorbot@gmail.com -Date: Thu Jun 2 22:57:41 2011 +0000 - - Release Snappy 1.0.3. - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@40 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 2e12124bd87f39296709decc65195fa5bfced538 -Author: snappy.mirrorbot@gmail.com -Date: Thu Jun 2 18:06:54 2011 +0000 - - Remove an unneeded goto in the decompressor; it turns out that the - state of ip_ after decompression (or attempted decompresion) is - completely irrelevant, so we don't need the trailer. - - Performance is, as expected, mostly flat -- there's a curious ~3-5% - loss in the "lsp" test, but that test case is so short it is hard to say - anything definitive about why (most likely, it's some sort of - unrelated effect). - - R=jeff - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@39 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit c266bbf32103f8ed4a83e2272ed3d8828d5b8b34 -Author: snappy.mirrorbot@gmail.com -Date: Thu Jun 2 17:59:40 2011 +0000 - - Speed up decompression by caching ip_. - - It is seemingly hard for the compiler to understand that ip_, the current input - pointer into the compressed data stream, can not alias on anything else, and - thus using it directly will incur memory traffic as it cannot be kept in a - register. The code already knew about this and cached it into a local - variable, but since Step() only decoded one tag, it had to move ip_ back into - place between every tag. This seems to have cost us a significant amount of - performance, so changing Step() into a function that decodes as much as it can - before it saves ip_ back and returns. (Note that Step() was already inlined, - so it is not the manual inlining that buys the performance here.) - - The wins are about 3-6% for Core 2, 6-13% on Core i7 and 5-12% on Opteron - (for plain array-to-array decompression, in 64-bit opt mode). - - There is a tiny difference in the behavior here; if an invalid literal is - encountered (ie., the writer refuses the Append() operation), ip_ will now - point to the byte past the tag byte, instead of where the literal was - originally thought to end. However, we don't use ip_ for anything after - DecompressAllTags() has returned, so this should not change external behavior - in any way. - - Microbenchmark results for Core i7, 64-bit (Opteron results are similar): - - Benchmark Time(ns) CPU(ns) Iterations - --------------------------------------------------- - BM_UFlat/0 79134 79110 8835 1.2GB/s html [ +6.2%] - BM_UFlat/1 786126 786096 891 851.8MB/s urls [+10.0%] - BM_UFlat/2 9948 9948 69125 11.9GB/s jpg [ -1.3%] - BM_UFlat/3 31999 31998 21898 2.7GB/s pdf [ +6.5%] - BM_UFlat/4 318909 318829 2204 1.2GB/s html4 [ +6.5%] - BM_UFlat/5 31384 31390 22363 747.5MB/s cp [ +9.2%] - BM_UFlat/6 14037 14034 49858 757.7MB/s c [+10.6%] - BM_UFlat/7 4612 4612 151395 769.5MB/s lsp [ +9.5%] - BM_UFlat/8 1203174 1203007 582 816.3MB/s xls [+19.3%] - BM_UFlat/9 253869 253955 2757 571.1MB/s txt1 [+11.4%] - BM_UFlat/10 219292 219290 3194 544.4MB/s txt2 [+12.1%] - BM_UFlat/11 672135 672131 1000 605.5MB/s txt3 [+11.2%] - BM_UFlat/12 902512 902492 776 509.2MB/s txt4 [+12.5%] - BM_UFlat/13 372110 371998 1881 1.3GB/s bin [ +5.8%] - BM_UFlat/14 50407 50407 10000 723.5MB/s sum [+13.5%] - BM_UFlat/15 5699 5701 100000 707.2MB/s man [+12.4%] - BM_UFlat/16 83448 83424 8383 1.3GB/s pb [ +5.7%] - BM_UFlat/17 256958 256963 2723 684.1MB/s gaviota [ +7.9%] - BM_UValidate/0 42795 42796 16351 2.2GB/s html [+25.8%] - BM_UValidate/1 490672 490622 1427 1.3GB/s urls [+22.7%] - BM_UValidate/2 237 237 2950297 499.0GB/s jpg [+24.9%] - BM_UValidate/3 14610 14611 47901 6.0GB/s pdf [+26.8%] - BM_UValidate/4 171973 171990 4071 2.2GB/s html4 [+25.7%] - - - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@38 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit d0ee043bc50c62c5b5ff3da044f0b5567257407d -Author: snappy.mirrorbot@gmail.com -Date: Tue May 17 08:48:25 2011 +0000 - - Fix the numbering of the headlines in the Snappy format description. - - R=csilvers - DELTA=4 (0 added, 0 deleted, 4 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1906 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@37 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 6c7053871fbdb459c9c14287a138d7f82d6d84a1 -Author: snappy.mirrorbot@gmail.com -Date: Mon May 16 08:59:18 2011 +0000 - - Fix public issue #32: Add compressed format documentation for Snappy. - This text is new, but an earlier version from Zeev Tarantov was used - as reference. - - R=csilvers - DELTA=112 (111 added, 0 deleted, 1 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1867 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@36 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit a1f9f9973d127992f341d442969c86fd9a0847c9 -Author: snappy.mirrorbot@gmail.com -Date: Mon May 9 21:29:02 2011 +0000 - - Fix public issue #39: Pick out the median runs based on CPU time, - not real time. Also, use nth_element instead of sort, since we - only need one element. - - R=csilvers - DELTA=5 (3 added, 0 deleted, 2 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1799 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@35 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit f7b105683c074cdf233740089e245e43f63e7e55 -Author: snappy.mirrorbot@gmail.com -Date: Mon May 9 21:28:45 2011 +0000 - - Fix public issue #38: Make the microbenchmark framework handle - properly cases where gettimeofday() can stand return the same - result twice (as sometimes on GNU/Hurd) or go backwards - (as when the user adjusts the clock). We avoid a division-by-zero, - and put a lower bound on the number of iterations -- the same - amount as we use to calibrate. - - We should probably use CLOCK_MONOTONIC for platforms that support - it, to be robust against clock adjustments; we already use Windows' - monotonic timers. However, that's for a later changelist. - - R=csilvers - DELTA=7 (5 added, 0 deleted, 2 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1798 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@34 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit d8d481427a05b88cdb0810c29bf400153595c423 -Author: snappy.mirrorbot@gmail.com -Date: Tue May 3 23:22:52 2011 +0000 - - Fix public issue #37: Only link snappy_unittest against -lz and other autodetected - libraries, not libsnappy.so (which doesn't need any such dependency). - - R=csilvers - DELTA=20 (14 added, 0 deleted, 6 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1710 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@33 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit bcecf195c0aeb2c98144d3d54b4d8d228774f50d -Author: snappy.mirrorbot@gmail.com -Date: Tue May 3 23:22:33 2011 +0000 - - Release Snappy 1.0.2, to get the license change and various other fixes into - a release. - - R=csilvers - DELTA=239 (236 added, 0 deleted, 3 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1709 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@32 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 84d9f642025cda672dda0d94a8008f094500aaa6 -Author: snappy.mirrorbot@gmail.com -Date: Tue Apr 26 12:34:55 2011 +0000 - - Fix public issue #30: Stop using gettimeofday() altogether on Win32, - as MSVC doesn't include it. Replace with QueryPerformanceCounter(), - which is monotonic and probably reasonably high-resolution. - (Some machines have traditionally had bugs in QPC, but they should - be relatively rare these days, and there's really no much better - alternative that I know of.) - - R=csilvers - DELTA=74 (55 added, 19 deleted, 0 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1556 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@31 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 3d8e71df8d30f980d71d4c784ebfc5ff62d5b0cb -Author: snappy.mirrorbot@gmail.com -Date: Tue Apr 26 12:34:37 2011 +0000 - - Fix public issue #31: Don't reset PATH in autogen.sh; instead, do the trickery - we need for our own build system internally. - - R=csilvers - DELTA=16 (13 added, 1 deleted, 2 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1555 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@30 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 73987351de54c88e2fc3f5dcdeceb47708df3585 -Author: snappy.mirrorbot@gmail.com -Date: Fri Apr 15 22:55:56 2011 +0000 - - When including , define WIN32_LEAN_AND_MEAN first, - so we won't pull in macro definitions of things like min() and max(), - which can conflict with . - - R=csilvers - DELTA=1 (1 added, 0 deleted, 0 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1485 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@29 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit fb7e0eade471a20b009720a84fea0af1552791d5 -Author: snappy.mirrorbot@gmail.com -Date: Mon Apr 11 09:07:01 2011 +0000 - - Fix public issue #29: Write CPU timing code for Windows, based on GetProcessTimes() - instead of getursage(). - - I thought I'd already committed this patch, so that the 1.0.1 release already - would have a Windows-compatible snappy_unittest, but I'd seemingly deleted it - instead, so this is a reconstruction. - - R=csilvers - DELTA=43 (39 added, 3 deleted, 1 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1295 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@28 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit c67fa0c755a329000da5546fff79089d62ac2f82 -Author: snappy.mirrorbot@gmail.com -Date: Fri Apr 8 09:51:53 2011 +0000 - - Include C bindings of Snappy, contributed by Martin Gieseking. - - I've made a few changes since Martin's version; mostly style nits, but also - a semantic change -- most functions that return bool in the C++ version now - return an enum, to better match typical C (and zlib) semantics. - - I've kept the copyright notice, since Martin is obviously the author here; - he has signed the contributor license agreement, though, so this should not - hinder Google's use in the future. - - We'll need to update the libtool version number to match the added interface, - but as of http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html - I'm going to wait until public release. - - R=csilvers - DELTA=238 (233 added, 0 deleted, 5 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1294 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@27 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 56be85cb9ae06f2e92180ae2575bdd10c012ab73 -Author: snappy.mirrorbot@gmail.com -Date: Thu Apr 7 16:36:43 2011 +0000 - - Replace geo.protodata with a newer version. - - The data compresses/decompresses slightly faster than the old data, and has - similar density. - - R=lookingbill - DELTA=1 (0 added, 0 deleted, 1 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1288 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@26 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 3dd93f3ec74df54a37f68bffabb058ac757bbe72 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 30 20:27:53 2011 +0000 - - Fix public issue #27: Add HAVE_CONFIG_H tests around the config.h - inclusion in snappy-stubs-internal.h, which eases compiling outside the - automake/autoconf framework. - - R=csilvers - DELTA=5 (4 added, 1 deleted, 0 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1152 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@25 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit f67bcaa61006da8b325a7ed9909a782590971815 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 30 20:27:39 2011 +0000 - - Fix public issue #26: Take memory allocation and reallocation entirely out of the - Measure() loop. This gives all algorithms a small speed boost, except Snappy which - already didn't do reallocation (so the measurements were slightly biased in its - favor). - - R=csilvers - DELTA=92 (69 added, 9 deleted, 14 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1151 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@24 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit cc333c1c5cc4eabceceb9848ff3cac6c604ecbc6 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 30 20:25:09 2011 +0000 - - Renamed "namespace zippy" to "namespace snappy" to reduce - the differences from the opensource code. Will make it easier - in the future to mix-and-match third-party code that uses - snappy with google code. - - Currently, csearch shows that the only external user of - "namespace zippy" is some bigtable code that accesses - a TEST variable, which is temporarily kept in the zippy - namespace. - - R=sesse - DELTA=123 (18 added, 3 deleted, 102 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1150 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@23 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit f19fb07e6dc79d6857e37df572dba25ff30fc8f3 -Author: snappy.mirrorbot@gmail.com -Date: Mon Mar 28 22:17:04 2011 +0000 - - Put back the final few lines of what was truncated during the - license header change. - - R=csilvers - DELTA=5 (4 added, 0 deleted, 1 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1094 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@22 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 7e8ca8f8315fc2ecb4eea19db695039ab2ca43a0 -Author: snappy.mirrorbot@gmail.com -Date: Sat Mar 26 02:34:34 2011 +0000 - - Change on 2011-03-25 19:18:00-07:00 by sesse - - Replace the Apache 2.0 license header by the BSD-type license header; - somehow a lot of the files were missed in the last round. - - R=dannyb,csilvers - DELTA=147 (74 added, 2 deleted, 71 changed) - - Change on 2011-03-25 19:25:07-07:00 by sesse - - Unbreak the build; the relicensing removed a bit too much (only comments - were intended, but I also accidentially removed some of the top lines of - the actual source). - - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1072 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@21 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit b4bbc1041b35d844ec26fbae25f2864995361fd8 -Author: snappy.mirrorbot@gmail.com -Date: Fri Mar 25 16:14:41 2011 +0000 - - Change Snappy from the Apache 2.0 to a BSD-type license. - - R=dannyb - DELTA=328 (80 added, 184 deleted, 64 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1061 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@20 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit c47640c510eb11cf8913edfa34f667bceb3a4401 -Author: snappy.mirrorbot@gmail.com -Date: Fri Mar 25 00:39:01 2011 +0000 - - Release Snappy 1.0.1, to soup up all the various small changes - that have been made since release. - - R=csilvers - DELTA=266 (260 added, 0 deleted, 6 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1057 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@19 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit b1dc1f643eaff897a5ce135f525799b99687b118 -Author: snappy.mirrorbot@gmail.com -Date: Thu Mar 24 19:15:54 2011 +0000 - - Fix a microbenchmark crash on mingw32; seemingly %lld is not universally - supported on Windows, and %I64d is recommended instead. - - R=csilvers - DELTA=6 (5 added, 0 deleted, 1 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1034 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@18 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 98004ca9afc62a3279dfe9d9a359083f61db437f -Author: snappy.mirrorbot@gmail.com -Date: Thu Mar 24 19:15:27 2011 +0000 - - Fix public issue #19: Fix unit test when Google Test is installed but the - gflags package isn't (Google Test is not properly initialized). - - Patch by Martin Gieseking. - - R=csilvers - DELTA=2 (1 added, 0 deleted, 1 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1033 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@17 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 444a6c5f72d6f8d8f7213a5bcc08b26606eb9934 -Author: snappy.mirrorbot@gmail.com -Date: Thu Mar 24 19:13:57 2011 +0000 - - Make the unit test work on systems without mmap(). This is required for, - among others, Windows support. For Windows in specific, we could have used - CreateFileMapping/MapViewOfFile, but this should at least get us a bit closer - to compiling, and is of course also relevant for embedded systems with no MMU. - - (Part 2/2) - - R=csilvers - DELTA=15 (12 added, 3 deleted, 0 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1032 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@16 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 2e182e9bb840737f9cd8817e859dc17a82f2c16b -Author: snappy.mirrorbot@gmail.com -Date: Thu Mar 24 19:12:27 2011 +0000 - - Make the unit test work on systems without mmap(). This is required for, - among others, Windows support. For Windows in specific, we could have used - CreateFileMapping/MapViewOfFile, but this should at least get us a bit closer - to compiling, and is of course also relevant for embedded systems with no MMU. - - (Part 1/2) - - R=csilvers - DELTA=9 (8 added, 0 deleted, 1 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1031 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@15 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 48662cbb7f81533977334629790d346220084527 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 23 23:17:36 2011 +0000 - - Fix public issue #12: Don't keep autogenerated auto* files in Subversion; - it causes problems with others sending patches etc.. - - We can't get this 100% hermetic anyhow, due to files like lt~obsolete.m4, - so we can just as well go cleanly in the other direction. - - R=csilvers - DELTA=21038 (0 added, 21036 deleted, 2 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=1012 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@14 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 9e4717a586149c9538b353400312bab5ab5458c4 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 23 17:50:49 2011 +0000 - - Fix public issue tracker bug #3: Call AC_SUBST([LIBTOOL_DEPS]), or the rule - to rebuild libtool in Makefile.am won't work. - - R=csilvers - DELTA=1 (1 added, 0 deleted, 0 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=997 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@13 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 519c822a34a91a0c0eb32d98e9686ee7d9cd6651 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 23 11:16:39 2011 +0000 - - Fix public issue #10: Don't add GTEST_CPPFLAGS to snappy_unittest_CXXFLAGS; - it's not needed (CPPFLAGS are always included when compiling). - - R=csilvers - DELTA=1 (0 added, 1 deleted, 0 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=994 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@12 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit ea6b936378583cba730c33c8a53776edc1782208 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 23 11:16:18 2011 +0000 - - Fix public issue #9: Add -Wall -Werror to automake flags. - (This concerns automake itself, not the C++ compiler.) - - R=csilvers - DELTA=4 (3 added, 0 deleted, 1 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=993 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@11 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit e3ca06af253094b1c3a8eae508cd97accf077535 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 23 11:13:37 2011 +0000 - - Fix a typo in the Snappy README file. - - R=csilvers - DELTA=1 (0 added, 0 deleted, 1 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=992 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@10 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 39d27bea23873abaa663e884261386b17b058f20 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 23 11:13:13 2011 +0000 - - Fix public issue #6: Add a --with-gflags for disabling gflags autodetection - and using a manually given setting (use/don't use) instead. - - R=csilvers - DELTA=16 (13 added, 0 deleted, 3 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=991 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@9 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 60add43d99c1c31aeecd895cb555ad6f6520608e -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 23 11:12:44 2011 +0000 - - Fix public issue #5: Replace the EXTRA_LIBSNAPPY_LDFLAGS setup with something - slightly more standard, that also doesn't leak libtool command-line into - configure.ac. - - R=csilvers - DELTA=7 (0 added, 4 deleted, 3 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=990 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@8 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit a8dd1700879ad646106742aa0e9c3a48dc07b01d -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 23 11:12:22 2011 +0000 - - Fix public issue #4: Properly quote all macro arguments in configure.ac. - - R=csilvers - DELTA=16 (0 added, 0 deleted, 16 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=989 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@7 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 79752dd7033658e28dc894de55012bdf2c9afca3 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 23 11:11:54 2011 +0000 - - Fix public issue #7: Don't use internal variables named ac_*, as those belong - to autoconf's namespace. - - R=csilvers - DELTA=6 (0 added, 0 deleted, 6 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=988 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@6 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 46e39fb20c297129494b969ac4ea64fcd04b4fa0 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 23 11:11:09 2011 +0000 - - Add missing licensing headers to a few files. (Part 2/2.) - - R=csilvers - DELTA=12 (12 added, 0 deleted, 0 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=987 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@5 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 3e764216fc8edaafca480443b90e55c14eaae2c2 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 23 11:10:39 2011 +0000 - - Add mising licensing headers to a few files. (Part 1/2.) - - R=csilvers - DELTA=24 (24 added, 0 deleted, 0 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=986 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@4 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 9a59f183c8ffec62dcdabd3499d0d515e44e4ef0 -Author: snappy.mirrorbot@gmail.com -Date: Wed Mar 23 11:10:04 2011 +0000 - - Use the correct license file for the Apache 2.0 license; - spotted by Florian Weimer. - - R=csilvers - DELTA=202 (174 added, 0 deleted, 28 changed) - - - Revision created by MOE tool push_codebase. - MOE_MIGRATION=985 - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@3 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 28a64402392c791905d6e1384ea1b48a5cb0b281 -Author: snappy.mirrorbot@gmail.com -Date: Fri Mar 18 17:14:15 2011 +0000 - - Revision created by MOE tool push_codebase. - MOE_MIGRATION= - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@2 03e5f5b5-db94-4691-08a0-1a8bf15f6143 - -commit 7c3c6077b72b4ae2237267a20f640b55e9a90569 -Author: sesse@google.com -Date: Fri Mar 18 17:13:52 2011 +0000 - - Create trunk directory. - - - git-svn-id: https://snappy.googlecode.com/svn/trunk@1 03e5f5b5-db94-4691-08a0-1a8bf15f6143 diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 0000000..4f80d95 --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,23 @@ +module( + name = "snappy", + version = "1.2.2", + compatibility_level = 1, +) + +bazel_dep( + name = "googletest", + version = "1.14.0.bcr.1", + dev_dependency = True, + repo_name = "com_google_googletest", +) +bazel_dep( + name = "google_benchmark", + version = "1.9.0", + dev_dependency = True, + repo_name = "com_google_benchmark", +) + +bazel_dep( + name = "platforms", + version = "0.0.9", +) diff --git a/Makefile.am b/Makefile.am deleted file mode 100644 index 0746a16..0000000 --- a/Makefile.am +++ /dev/null @@ -1,31 +0,0 @@ -ACLOCAL_AMFLAGS = -I m4 - -# Library. -lib_LTLIBRARIES = libsnappy.la -libsnappy_la_SOURCES = snappy.cc snappy-sinksource.cc snappy-stubs-internal.cc snappy-c.cc -libsnappy_la_LDFLAGS = -version-info $(SNAPPY_LTVERSION) - -include_HEADERS = snappy.h snappy-sinksource.h snappy-stubs-public.h snappy-c.h -noinst_HEADERS = snappy-internal.h snappy-stubs-internal.h snappy-test.h - -# Unit tests and benchmarks. -snappy_unittest_CPPFLAGS = $(gflags_CFLAGS) $(GTEST_CPPFLAGS) -snappy_unittest_SOURCES = snappy_unittest.cc snappy-test.cc -snappy_unittest_LDFLAGS = $(GTEST_LDFLAGS) -snappy_unittest_LDADD = libsnappy.la $(UNITTEST_LIBS) $(gflags_LIBS) $(GTEST_LIBS) -TESTS = snappy_unittest -noinst_PROGRAMS = $(TESTS) - -EXTRA_DIST = autogen.sh testdata/alice29.txt testdata/asyoulik.txt testdata/baddata1.snappy testdata/baddata2.snappy testdata/baddata3.snappy testdata/geo.protodata testdata/fireworks.jpeg testdata/html testdata/html_x_4 testdata/kppkn.gtb testdata/lcet10.txt testdata/paper-100k.pdf testdata/plrabn12.txt testdata/urls.10K -dist_doc_DATA = ChangeLog COPYING INSTALL NEWS README format_description.txt framing_format.txt - -pkgconfigdir = $(libdir)/pkgconfig -nodist_pkgconfig_DATA = snappy.pc - -libtool: $(LIBTOOL_DEPS) - $(SHELL) ./config.status --recheck - -# Needed by autoconf because we use README.md instead of README. -# See http://stackoverflow.com/q/15013672/ -README: README.md - cat $< > $@.tmp diff --git a/NEWS b/NEWS index 8aeafd7..ef935ba 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,46 @@ +Snappy v1.2.2, Mar 26th 2025: + + * We added a new compression level in v1.2.1 which compresses a bit + denser but slower. Decompression speed should be even faster with it. + + * We fixed a very old issue of data corruption when compressed size + exceeds 4GB. This can happen when you compress data close to 4GB + and it's incompressible, for example, random data. + + * Started to use minimum CMake 3.10 because older ones are not + planned to be supported. + + * Various other small fixes and performance improvements (especially + for clang). + +Snappy v1.1.10, Mar 8th 2023: + + * Performance improvements + + * Compilation fixes for various environments + +Snappy v1.1.9, May 4th 2021: + + * Performance improvements. + + * Google Test and Google Benchmark are now bundled in third_party/. + +Snappy v1.1.8, January 15th 2020: + + * Small performance improvements. + + * Removed snappy::string alias for std::string. + + * Improved CMake configuration. + +Snappy v1.1.7, August 24th 2017: + + * Improved CMake build support for 64-bit Linux distributions. + + * MSVC builds now use MSVC-specific intrinsics that map to clzll. + + * ARM64 (AArch64) builds use the code paths optimized for 64-bit processors. + Snappy v1.1.6, July 12th 2017: This is a re-release of v1.1.5 with proper SONAME / SOVERSION values. diff --git a/README.md b/README.md index b9db833..9b4a494 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ Snappy, a fast compressor/decompressor. +[![Build Status](https://github.com/google/snappy/actions/workflows/build.yml/badge.svg)](https://github.com/google/snappy/actions/workflows/build.yml) Introduction ============ @@ -51,7 +52,7 @@ In particular: - Snappy uses 64-bit operations in several places to process more data at once than would otherwise be possible. - - Snappy assumes unaligned 32- and 64-bit loads and stores are cheap. + - Snappy assumes unaligned 32 and 64-bit loads and stores are cheap. On some platforms, these must be emulated with single-byte loads and stores, which is much slower. - Snappy assumes little-endian throughout, and needs to byte-swap data in @@ -65,32 +66,38 @@ are of course most welcome; see "Contact", below. Building ======== -CMake is supported and autotools will soon be deprecated. -You need CMake 3.4 or above to build: - - mkdir build - cd build && cmake ../ && make +You need the CMake version specified in [CMakeLists.txt](./CMakeLists.txt) +or later to build: +```bash +git submodule update --init +mkdir build +cd build && cmake ../ && make +``` Usage ===== Note that Snappy, both the implementation and the main interface, is written in C++. However, several third-party bindings to other languages -are available; see the home page at http://google.github.io/snappy/ -for more information. Also, if you want to use Snappy from C code, you can -use the included C bindings in snappy-c.h. +are available; see the [home page](docs/README.md) for more information. +Also, if you want to use Snappy from C code, you can use the included C +bindings in snappy-c.h. To use Snappy from your own C++ program, include the file "snappy.h" from your calling file, and link against the compiled library. There are many ways to call Snappy, but the simplest possible is - snappy::Compress(input.data(), input.size(), &output); +```c++ +snappy::Compress(input.data(), input.size(), &output); +``` and similarly - snappy::Uncompress(input.data(), input.size(), &output); +```c++ +snappy::Uncompress(input.data(), input.size(), &output); +``` where "input" and "output" are both instances of std::string. @@ -102,48 +109,57 @@ information. Tests and benchmarks ==================== -When you compile Snappy, snappy_unittest is compiled in addition to the -library itself. You do not need it to use the compressor from your own library, -but it contains several useful components for Snappy development. - -First of all, it contains unit tests, verifying correctness on your machine in -various scenarios. If you want to change or optimize Snappy, please run the -tests to verify you have not broken anything. Note that if you have the -Google Test library installed, unit test behavior (especially failures) will be -significantly more user-friendly. You can find Google Test at - - http://github.com/google/googletest - -You probably also want the gflags library for handling of command-line flags; -you can find it at - - http://gflags.github.io/gflags/ - -In addition to the unit tests, snappy contains microbenchmarks used to -tune compression and decompression performance. These are automatically run -before the unit tests, but you can disable them using the flag ---run_microbenchmarks=false if you have gflags installed (otherwise you will -need to edit the source). - -Finally, snappy can benchmark Snappy against a few other compression libraries -(zlib, LZO, LZF, and QuickLZ), if they were detected at configure time. -To benchmark using a given file, give the compression algorithm you want to test -Snappy against (e.g. --zlib) and then a list of one or more file names on the -command line. The testdata/ directory contains the files used by the -microbenchmark, which should provide a reasonably balanced starting point for -benchmarking. (Note that baddata[1-3].snappy are not intended as benchmarks; they -are used to verify correctness in the presence of corrupted data in the unit -test.) - +When you compile Snappy, the following binaries are compiled in addition to the +library itself. You do not need them to use the compressor from your own +library, but they are useful for Snappy development. + +* `snappy_benchmark` contains microbenchmarks used to tune compression and + decompression performance. +* `snappy_unittests` contains unit tests, verifying correctness on your machine + in various scenarios. +* `snappy_test_tool` can benchmark Snappy against a few other compression + libraries (zlib, LZO, LZF, and QuickLZ), if they were detected at configure + time. To benchmark using a given file, give the compression algorithm you want + to test Snappy against (e.g. --zlib) and then a list of one or more file names + on the command line. + +If you want to change or optimize Snappy, please run the tests and benchmarks to +verify you have not broken anything. + +The testdata/ directory contains the files used by the microbenchmarks, which +should provide a reasonably balanced starting point for benchmarking. (Note that +baddata[1-3].snappy are not intended as benchmarks; they are used to verify +correctness in the presence of corrupted data in the unit test.) + +Contributing to the Snappy Project +================================== + +In addition to the aims listed at the top of the [README](README.md) Snappy +explicitly supports the following: + +1. C++11 +2. Clang (gcc and MSVC are best-effort). +3. Low level optimizations (e.g. assembly or equivalent intrinsics) for: + - [x86](https://en.wikipedia.org/wiki/X86) + - [x86-64](https://en.wikipedia.org/wiki/X86-64) + - ARMv7 (32-bit) + - ARMv8 (AArch64) +4. Supports only the Snappy compression scheme as described in + [format_description.txt](format_description.txt). +5. CMake for building + +Changes adding features or dependencies outside of the core area of focus listed +above might not be accepted. If in doubt post a message to the +[Snappy discussion mailing list](https://groups.google.com/g/snappy-compression). + +We are unlikely to accept contributions to the build configuration files, such +as `CMakeLists.txt`. We are focused on maintaining a build configuration that +allows us to test that the project works in a few supported configurations +inside Google. We are not currently interested in supporting other requirements, +such as different operating systems, compilers, or build systems. Contact ======= -Snappy is distributed through GitHub. For the latest version, a bug tracker, -and other information, see - - http://google.github.io/snappy/ - -or the repository at - - https://github.com/google/snappy +Snappy is distributed through GitHub. For the latest version and other +information, see https://github.com/google/snappy. diff --git a/WORKSPACE b/WORKSPACE new file mode 100644 index 0000000..7e60888 --- /dev/null +++ b/WORKSPACE @@ -0,0 +1,27 @@ +# Copyright 2023 Google Inc. All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/WORKSPACE.bzlmod b/WORKSPACE.bzlmod new file mode 100644 index 0000000..e69de29 diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 886150f..0000000 --- a/appveyor.yml +++ /dev/null @@ -1,38 +0,0 @@ -# Build matrix / environment variables are explained on: -# https://www.appveyor.com/docs/appveyor-yml/ -# This file can be validated on: https://ci.appveyor.com/tools/validate-yaml - -version: "{build}" - -environment: - matrix: - # AppVeyor currently has no custom job name feature. - # http://help.appveyor.com/discussions/questions/1623-can-i-provide-a-friendly-name-for-jobs - - JOB: Visual Studio 2017 - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 - CMAKE_GENERATOR: Visual Studio 15 2017 - - JOB: Visual Studio 2015 - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 - CMAKE_GENERATOR: Visual Studio 14 2015 - -platform: - - x86 - - x64 - -configuration: - - RelWithDebInfo - - Debug - -build: - verbosity: minimal - -build_script: - - git submodule update --init --recursive - - mkdir out - - cd out - - cmake .. -G "%CMAKE_GENERATOR%" - -DCMAKE_CONFIGURATION_TYPES="Debug;RelWithDebInfo" - - cmake --build . --config %CONFIGURATION% - -test_script: - - ctest -C %CONFIGURATION% --output-on-failure diff --git a/autogen.sh b/autogen.sh deleted file mode 100755 index 9cb502e..0000000 --- a/autogen.sh +++ /dev/null @@ -1,12 +0,0 @@ -#! /bin/sh -e -rm -rf autom4te.cache -aclocal -I m4 -autoheader -if glibtoolize --version >/dev/null 2>/dev/null; then - LIBTOOLIZE=${LIBTOOLIZE:-glibtoolize} -else - LIBTOOLIZE=${LIBTOOLIZE:-libtoolize} -fi -$LIBTOOLIZE --copy -automake --add-missing --copy -autoconf diff --git a/cmake/SnappyConfig.cmake.in b/cmake/SnappyConfig.cmake.in index 5e604fe..9e7d134 100644 --- a/cmake/SnappyConfig.cmake.in +++ b/cmake/SnappyConfig.cmake.in @@ -1,9 +1,33 @@ -set(SNAPPY_VERSION @SNAPPY_MAJOR@.@SNAPPY_MINOR@.@SNAPPY_PATCHLEVEL@) +# Copyright 2019 Google Inc. All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @PACKAGE_INIT@ -set_and_check(SNAPPY_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@") -set_and_check(SNAPPY_LIBRARY_DIR "@PACKAGE_LIBRARY_INSTALL_DIR@") -set_and_check(SNAPPY_BINARY_DIR "@PACKAGE_BINARY_INSTALL_DIR@") +include("${CMAKE_CURRENT_LIST_DIR}/SnappyTargets.cmake") -check_required_components(SNAPPY) \ No newline at end of file +check_required_components(Snappy) \ No newline at end of file diff --git a/cmake/config.h.in b/cmake/config.h.in index c06e3ad..de80c5f 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -1,90 +1,75 @@ -#ifndef SNAPPY_CONFIG_H -#define SNAPPY_CONFIG_H 1 +#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_ +#define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_ + +/* Define to 1 if the compiler supports __attribute__((always_inline)). */ +#cmakedefine01 HAVE_ATTRIBUTE_ALWAYS_INLINE /* Define to 1 if the compiler supports __builtin_ctz and friends. */ -#cmakedefine HAVE_BUILTIN_CTZ ${HAVE_BUILTIN_CTZ} +#cmakedefine01 HAVE_BUILTIN_CTZ /* Define to 1 if the compiler supports __builtin_expect. */ -#cmakedefine HAVE_BUILTIN_EXPECT ${HAVE_BUILTIN_EXPECT} - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_BYTESWAP_H ${HAVE_BYTESWAP_H} - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_DLFCN_H ${HAVE_DLFCN_H} +#cmakedefine01 HAVE_BUILTIN_EXPECT -/* Use the gflags package for command-line parsing. */ -#cmakedefine HAVE_GFLAGS ${HAVE_GFLAGS} +/* Define to 1 if the compiler supports __builtin_prefetch. */ +#cmakedefine01 HAVE_BUILTIN_PREFETCH -/* Defined when Google Test is available. */ -#cmakedefine HAVE_GTEST ${HAVE_GTEST} +/* Define to 1 if you have a definition for mmap() in . */ +#cmakedefine01 HAVE_FUNC_MMAP -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_INTTYPES_H ${HAVE_INTTYPES_H} +/* Define to 1 if you have a definition for sysconf() in . */ +#cmakedefine01 HAVE_FUNC_SYSCONF /* Define to 1 if you have the `lzo2' library (-llzo2). */ -#cmakedefine HAVE_LIBLZO2 ${HAVE_LIBLZO2} +#cmakedefine01 HAVE_LIBLZO2 /* Define to 1 if you have the `z' library (-lz). */ -#cmakedefine HAVE_LIBZ ${HAVE_LIBZ} - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYS_UIO_H ${HAVE_SYS_UIO_H} - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_MEMORY_H ${HAVE_MEMORY_H} - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_STDDEF_H ${HAVE_STDDEF_H} - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_STDINT_H ${HAVE_STDINT_H} - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_STDLIB_H ${HAVE_STDLIB_H} - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_STRINGS_H ${HAVE_STRINGS_H} +#cmakedefine01 HAVE_LIBZ -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_STRING_H ${HAVE_STRING_H} - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYS_BYTESWAP_H ${HAVE_SYS_BYTESWAP_H} - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYS_ENDIAN_H ${HAVE_SYS_ENDIAN_H} +/* Define to 1 if you have the `lz4' library (-llz4). */ +#cmakedefine01 HAVE_LIBLZ4 /* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYS_MMAN_H ${HAVE_SYS_MMAN_H} +#cmakedefine01 HAVE_SYS_MMAN_H /* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYS_RESOURCE_H ${HAVE_SYS_RESOURCE_H} - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYS_STAT_H ${HAVE_SYS_STAT_H} +#cmakedefine01 HAVE_SYS_RESOURCE_H /* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYS_TIME_H ${HAVE_SYS_TIME_H} +#cmakedefine01 HAVE_SYS_TIME_H -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES} +/* Define to 1 if you have the header file. */ +#cmakedefine01 HAVE_SYS_UIO_H /* Define to 1 if you have the header file. */ -#cmakedefine HAVE_UNISTD_H ${HAVE_UNISTD_H} +#cmakedefine01 HAVE_UNISTD_H /* Define to 1 if you have the header file. */ -#cmakedefine HAVE_WINDOWS_H ${HAVE_WINDOWS_H} +#cmakedefine01 HAVE_WINDOWS_H + +/* Define to 1 if you target processors with SSSE3+ and have . */ +#cmakedefine01 SNAPPY_HAVE_SSSE3 + +/* Define to 1 if you target processors with SSE4.2 and have . */ +#cmakedefine01 SNAPPY_HAVE_X86_CRC32 + +/* Define to 1 if you target processors with BMI2+ and have . */ +#cmakedefine01 SNAPPY_HAVE_BMI2 + +/* Define to 1 if you target processors with NEON and have . */ +#cmakedefine01 SNAPPY_HAVE_NEON + +/* Define to 1 if you target processors with RVV1.0 and have . */ +#cmakedefine01 SNAPPY_RVV_1 -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 +/* Define to 1 if you target processors with RVV0.7 and have . */ +#cmakedefine01 SNAPPY_RVV_0_7 -/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most - significant byte first (like Motorola and SPARC, unlike Intel and VAX). */ -#cmakedefine WORDS_BIGENDIAN +/* Define to 1 if you have and and want to optimize + compression speed by using __crc32cw from . */ +#cmakedefine01 SNAPPY_HAVE_NEON_CRC32 -#if defined(_MSC_VER) && (_MSC_VER <= 1900) -typedef __int64 ssize_t; -#endif +/* Define to 1 if your processor stores words with the most significant byte + first (like Motorola and SPARC, unlike Intel and VAX). */ +#cmakedefine01 SNAPPY_IS_BIG_ENDIAN -#endif +#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_ diff --git a/configure.ac b/configure.ac deleted file mode 100644 index 595826e..0000000 --- a/configure.ac +++ /dev/null @@ -1,131 +0,0 @@ -m4_define([snappy_major], [1]) -m4_define([snappy_minor], [1]) -m4_define([snappy_patchlevel], [6]) - -# Libtool shared library interface versions (current:revision:age) -# Update this value for every release! (A:B:C will map to foo.so.(A-C).C.B) -# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html -m4_define([snappy_ltversion], [2:6:1]) - -AC_INIT([snappy], [snappy_major.snappy_minor.snappy_patchlevel]) -AC_CONFIG_MACRO_DIR([m4]) -AC_CONFIG_AUX_DIR([.]) - -# These are flags passed to automake (though they look like gcc flags!) -AM_INIT_AUTOMAKE([-Wall]) - -LT_INIT -AC_SUBST([LIBTOOL_DEPS]) -AC_PROG_CXX -AC_LANG([C++]) -AC_C_BIGENDIAN -AC_TYPE_SIZE_T -AC_TYPE_SSIZE_T -AC_CHECK_HEADERS([stdint.h stddef.h sys/mman.h sys/resource.h sys/uio.h windows.h byteswap.h sys/byteswap.h sys/endian.h sys/time.h]) - -# Don't use AC_FUNC_MMAP, as it checks for mappings of already-mapped memory, -# which we don't need (and does not exist on Windows). -AC_CHECK_FUNC([mmap]) - -GTEST_LIB_CHECK([], [true], [true # Ignore; we can live without it.]) - -AC_ARG_WITH([gflags], - [AS_HELP_STRING( - [--with-gflags], - [use Google Flags package to enhance the unit test @<:@default=check@:>@])], - [], - [with_gflags=check]) - -if test "x$with_gflags" != "xno"; then - PKG_CHECK_MODULES( - [gflags], - [libgflags], - [AC_DEFINE([HAVE_GFLAGS], [1], [Use the gflags package for command-line parsing.])], - [if test "x$with_gflags" != "xcheck"; then - AC_MSG_FAILURE([--with-gflags was given, but test for gflags failed]) - fi]) -fi - -# See if we have __builtin_expect. -# TODO: Use AC_CACHE. -AC_MSG_CHECKING([if the compiler supports __builtin_expect]) - -AC_TRY_COMPILE(, [ - return __builtin_expect(1, 1) ? 1 : 0 -], [ - snappy_have_builtin_expect=yes - AC_MSG_RESULT([yes]) -], [ - snappy_have_builtin_expect=no - AC_MSG_RESULT([no]) -]) -if test x$snappy_have_builtin_expect = xyes ; then - AC_DEFINE([HAVE_BUILTIN_EXPECT], [1], [Define to 1 if the compiler supports __builtin_expect.]) -fi - -# See if we have working count-trailing-zeros intrinsics. -# TODO: Use AC_CACHE. -AC_MSG_CHECKING([if the compiler supports __builtin_ctzll]) - -AC_TRY_COMPILE(, [ - return (__builtin_ctzll(0x100000000LL) == 32) ? 1 : 0 -], [ - snappy_have_builtin_ctz=yes - AC_MSG_RESULT([yes]) -], [ - snappy_have_builtin_ctz=no - AC_MSG_RESULT([no]) -]) -if test x$snappy_have_builtin_ctz = xyes ; then - AC_DEFINE([HAVE_BUILTIN_CTZ], [1], [Define to 1 if the compiler supports __builtin_ctz and friends.]) -fi - -# Other compression libraries; the unit test can use these for comparison -# if they are available. If they are not found, just ignore. -UNITTEST_LIBS="" -AC_DEFUN([CHECK_EXT_COMPRESSION_LIB], [ - AH_CHECK_LIB([$1]) - AC_CHECK_LIB( - [$1], - [$2], - [ - AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_LIB$1)) - UNITTEST_LIBS="-l$1 $UNITTEST_LIBS" - ], - [true] - ) -]) -CHECK_EXT_COMPRESSION_LIB([z], [zlibVersion]) -CHECK_EXT_COMPRESSION_LIB([lzo2], [lzo1x_1_15_compress]) -AC_SUBST([UNITTEST_LIBS]) - -# These are used by snappy-stubs-public.h.in. -if test "$ac_cv_header_stdint_h" = "yes"; then - AC_SUBST([ac_cv_have_stdint_h], [1]) -else - AC_SUBST([ac_cv_have_stdint_h], [0]) -fi -if test "$ac_cv_header_stddef_h" = "yes"; then - AC_SUBST([ac_cv_have_stddef_h], [1]) -else - AC_SUBST([ac_cv_have_stddef_h], [0]) -fi -if test "$ac_cv_header_sys_uio_h" = "yes"; then - AC_SUBST([ac_cv_have_sys_uio_h], [1]) -else - AC_SUBST([ac_cv_have_sys_uio_h], [0]) -fi - -# Export the version to snappy-stubs-public.h. -SNAPPY_MAJOR="snappy_major" -SNAPPY_MINOR="snappy_minor" -SNAPPY_PATCHLEVEL="snappy_patchlevel" - -AC_SUBST([SNAPPY_MAJOR]) -AC_SUBST([SNAPPY_MINOR]) -AC_SUBST([SNAPPY_PATCHLEVEL]) -AC_SUBST([SNAPPY_LTVERSION], snappy_ltversion) - -AC_CONFIG_HEADERS([config.h]) -AC_CONFIG_FILES([Makefile snappy-stubs-public.h snappy.pc]) -AC_OUTPUT diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..d5e0e63 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,72 @@ +Snappy is a compression/decompression library. It does not aim for maximum +compression, or compatibility with any other compression library; instead, it +aims for very high speeds and reasonable compression. For instance, compared +to the fastest mode of zlib, Snappy is an order of magnitude faster for most +inputs, but the resulting compressed files are anywhere from 20% to 100% +bigger. On a single core of a Core i7 processor in 64-bit mode, Snappy +compresses at about 250 MB/sec or more and decompresses at about 500 MB/sec +or more. + +Snappy is widely used inside Google, in everything from BigTable and MapReduce +to our internal RPC systems. (Snappy has previously been referred to as "Zippy" +in some presentations and the likes.) + +For more information, please see the [README](../README.md). Benchmarks against +a few other compression libraries (zlib, LZO, LZF, FastLZ, and QuickLZ) are +included in the source code distribution. The source code also contains a +[formal format specification](../format_description.txt), as well +as a specification for a [framing format](../framing_format.txt) useful for +higher-level framing and encapsulation of Snappy data, e.g. for transporting +Snappy-compressed data across HTTP in a streaming fashion. Note that the Snappy +distribution currently has no code implementing the latter, but some of the +ports do (see below). + +Snappy is written in C++, but C bindings are included, and several bindings to +other languages are maintained by third parties: + +* C#: [Snappy for .NET](http://snappy4net.codeplex.com/) (P/Invoke wrapper), + [Snappy.NET](http://snappy.angeloflogic.com/) (P/Invoke wrapper), + [Snappy.Sharp](https://github.com/jeffesp/Snappy.Sharp) (native + reimplementation) +* [C port](http://github.com/andikleen/snappy-c) +* [C++ MSVC packaging](http://snappy.angeloflogic.com/) (plus Windows binaries, + NuGet packages and command-line tool) +* Common Lisp: [Library bindings](http://flambard.github.com/thnappy/), + [native reimplementation](https://github.com/brown/snappy) +* Erlang: [esnappy](https://github.com/thekvs/esnappy), + [snappy-erlang-nif](https://github.com/fdmanana/snappy-erlang-nif) +* [Go](https://github.com/golang/snappy/) +* [Haskell](http://hackage.haskell.org/package/snappy) +* [Haxe](https://github.com/MaddinXx/hxsnappy) (C++/Neko) +* [iOS packaging](https://github.com/ideawu/snappy-ios) +* Java: [JNI wrapper](https://github.com/xerial/snappy-java) (including the + framing format), [native reimplementation](http://code.google.com/p/jsnappy/), + [other native reimplementation](https://github.com/dain/snappy) (including + the framing format) +* [Lua](https://github.com/forhappy/lua-snappy) +* [Node.js](https://github.com/kesla/node-snappy) (including the [framing + format](https://github.com/kesla/node-snappy-stream)) +* [Perl](http://search.cpan.org/dist/Compress-Snappy/) +* [PHP](https://github.com/kjdev/php-ext-snappy) +* [Python](http://pypi.python.org/pypi/python-snappy) (including a command-line + tool for the framing format) +* [R](https://github.com/lulyon/R-snappy) +* [Ruby](https://github.com/miyucy/snappy) +* [Rust](https://github.com/BurntSushi/rust-snappy) +* [Smalltalk](https://github.com/mumez/sqnappy) (including the framing format) + +Snappy is used or is available as an alternative in software such as + +* [MongoDB](https://www.mongodb.com/) +* [Cassandra](http://cassandra.apache.org/) +* [Couchbase](http://www.couchbase.com/) +* [Hadoop](http://hadoop.apache.org/) +* [LessFS](http://www.lessfs.com/wordpress/) +* [LevelDB](https://github.com/google/leveldb) (which is in turn used by + [Google Chrome](http://chrome.google.com/)) +* [Lucene](http://lucene.apache.org/) +* [VoltDB](http://voltdb.com/) + +If you know of more, do not hesitate to let us know. The easiest way to get in +touch is via the +[Snappy discussion mailing list](http://groups.google.com/group/snappy-compression). diff --git a/m4/gtest.m4 b/m4/gtest.m4 deleted file mode 100644 index 98e61f9..0000000 --- a/m4/gtest.m4 +++ /dev/null @@ -1,74 +0,0 @@ -dnl GTEST_LIB_CHECK([minimum version [, -dnl action if found [,action if not found]]]) -dnl -dnl Check for the presence of the Google Test library, optionally at a minimum -dnl version, and indicate a viable version with the HAVE_GTEST flag. It defines -dnl standard variables for substitution including GTEST_CPPFLAGS, -dnl GTEST_CXXFLAGS, GTEST_LDFLAGS, and GTEST_LIBS. It also defines -dnl GTEST_VERSION as the version of Google Test found. Finally, it provides -dnl optional custom action slots in the event GTEST is found or not. -AC_DEFUN([GTEST_LIB_CHECK], -[ -dnl Provide a flag to enable or disable Google Test usage. -AC_ARG_ENABLE([gtest], - [AS_HELP_STRING([--enable-gtest], - [Enable tests using the Google C++ Testing Framework. - (Default is enabled.)])], - [], - [enable_gtest=]) -AC_ARG_VAR([GTEST_CONFIG], - [The exact path of Google Test's 'gtest-config' script.]) -AC_ARG_VAR([GTEST_CPPFLAGS], - [C-like preprocessor flags for Google Test.]) -AC_ARG_VAR([GTEST_CXXFLAGS], - [C++ compile flags for Google Test.]) -AC_ARG_VAR([GTEST_LDFLAGS], - [Linker path and option flags for Google Test.]) -AC_ARG_VAR([GTEST_LIBS], - [Library linking flags for Google Test.]) -AC_ARG_VAR([GTEST_VERSION], - [The version of Google Test available.]) -HAVE_GTEST="no" -AS_IF([test "x${enable_gtest}" != "xno"], - [AC_MSG_CHECKING([for 'gtest-config']) - AS_IF([test "x${enable_gtest}" = "xyes"], - [AS_IF([test -x "${enable_gtest}/scripts/gtest-config"], - [GTEST_CONFIG="${enable_gtest}/scripts/gtest-config"], - [GTEST_CONFIG="${enable_gtest}/bin/gtest-config"]) - AS_IF([test -x "${GTEST_CONFIG}"], [], - [AC_MSG_RESULT([no]) - AC_MSG_ERROR([dnl -Unable to locate either a built or installed Google Test. -The specific location '${enable_gtest}' was provided for a built or installed -Google Test, but no 'gtest-config' script could be found at this location.]) - ])], - [AC_PATH_PROG([GTEST_CONFIG], [gtest-config])]) - AS_IF([test -x "${GTEST_CONFIG}"], - [AC_MSG_RESULT([${GTEST_CONFIG}]) - m4_ifval([$1], - [_gtest_min_version="--min-version=$1" - AC_MSG_CHECKING([for Google Test at least version >= $1])], - [_gtest_min_version="--min-version=0" - AC_MSG_CHECKING([for Google Test])]) - AS_IF([${GTEST_CONFIG} ${_gtest_min_version}], - [AC_MSG_RESULT([yes]) - HAVE_GTEST='yes'], - [AC_MSG_RESULT([no])])], - [AC_MSG_RESULT([no])]) - AS_IF([test "x${HAVE_GTEST}" = "xyes"], - [GTEST_CPPFLAGS=`${GTEST_CONFIG} --cppflags` - GTEST_CXXFLAGS=`${GTEST_CONFIG} --cxxflags` - GTEST_LDFLAGS=`${GTEST_CONFIG} --ldflags` - GTEST_LIBS=`${GTEST_CONFIG} --libs` - GTEST_VERSION=`${GTEST_CONFIG} --version` - AC_DEFINE([HAVE_GTEST],[1],[Defined when Google Test is available.])], - [AS_IF([test "x${enable_gtest}" = "xyes"], - [AC_MSG_ERROR([dnl -Google Test was enabled, but no viable version could be found.]) - ])])]) -AC_SUBST([HAVE_GTEST]) -AM_CONDITIONAL([HAVE_GTEST],[test "x$HAVE_GTEST" = "xyes"]) -AS_IF([test "x$HAVE_GTEST" = "xyes"], - [m4_ifval([$2], [$2])], - [m4_ifval([$3], [$3])]) -]) diff --git a/snappy-internal.h b/snappy-internal.h index 0cccba1..00b2db5 100644 --- a/snappy-internal.h +++ b/snappy-internal.h @@ -31,26 +31,131 @@ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ +#include + #include "snappy-stubs-internal.h" +#if SNAPPY_HAVE_SSSE3 +// Please do not replace with or with headers that assume more +// advanced SSE versions without checking with all the OWNERS. +#include +#include +#endif + +#if SNAPPY_HAVE_NEON +#include +#endif + +#if SNAPPY_RVV_1 || SNAPPY_RVV_0_7 +#define SNAPPY_HAVE_RVV 1 +#include +#else +#define SNAPPY_HAVE_RVV 0 +#endif + +#ifdef SNAPPY_RVV_1 +#define VSETVL_E8M2 __riscv_vsetvl_e8m2 +#define VLE8_V_U8M2 __riscv_vle8_v_u8m2 +#define VSE8_V_U8M2 __riscv_vse8_v_u8m2 +#elif SNAPPY_RVV_0_7 +#define VSETVL_E8M2 vsetvl_e8m2 +#define VLE8_V_U8M2 vle8_v_u8m2 +#define VSE8_V_U8M2 vse8_v_u8m2 +#endif + +#if SNAPPY_HAVE_SSSE3 || SNAPPY_HAVE_NEON +#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 1 +#else +#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 0 +#endif + namespace snappy { namespace internal { +#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE +#if SNAPPY_HAVE_SSSE3 +using V128 = __m128i; +#elif SNAPPY_HAVE_NEON +using V128 = uint8x16_t; +#endif + +// Load 128 bits of integer data. `src` must be 16-byte aligned. +inline V128 V128_Load(const V128* src); + +// Load 128 bits of integer data. `src` does not need to be aligned. +inline V128 V128_LoadU(const V128* src); + +// Store 128 bits of integer data. `dst` does not need to be aligned. +inline void V128_StoreU(V128* dst, V128 val); + +// Shuffle packed 8-bit integers using a shuffle mask. +// Each packed integer in the shuffle mask must be in [0,16). +inline V128 V128_Shuffle(V128 input, V128 shuffle_mask); + +// Constructs V128 with 16 chars |c|. +inline V128 V128_DupChar(char c); + +#if SNAPPY_HAVE_SSSE3 +inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); } + +inline V128 V128_LoadU(const V128* src) { return _mm_loadu_si128(src); } + +inline void V128_StoreU(V128* dst, V128 val) { _mm_storeu_si128(dst, val); } + +inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) { + return _mm_shuffle_epi8(input, shuffle_mask); +} + +inline V128 V128_DupChar(char c) { return _mm_set1_epi8(c); } + +#elif SNAPPY_HAVE_NEON +inline V128 V128_Load(const V128* src) { + return vld1q_u8(reinterpret_cast(src)); +} + +inline V128 V128_LoadU(const V128* src) { + return vld1q_u8(reinterpret_cast(src)); +} + +inline void V128_StoreU(V128* dst, V128 val) { + vst1q_u8(reinterpret_cast(dst), val); +} + +inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) { + assert(vminvq_u8(shuffle_mask) >= 0 && vmaxvq_u8(shuffle_mask) <= 15); + return vqtbl1q_u8(input, shuffle_mask); +} + +inline V128 V128_DupChar(char c) { return vdupq_n_u8(c); } + + +#endif +#endif // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE + +// Working memory performs a single allocation to hold all scratch space +// required for compression. class WorkingMemory { public: - WorkingMemory() : large_table_(NULL) { } - ~WorkingMemory() { delete[] large_table_; } + explicit WorkingMemory(size_t input_size); + ~WorkingMemory(); // Allocates and clears a hash table using memory in "*this", // stores the number of buckets in "*table_size" and returns a pointer to // the base of the hash table. - uint16* GetHashTable(size_t input_size, int* table_size); + uint16_t* GetHashTable(size_t fragment_size, int* table_size) const; + char* GetScratchInput() const { return input_; } + char* GetScratchOutput() const { return output_; } private: - uint16 small_table_[1<<10]; // 2KB - uint16* large_table_; // Allocated only when needed + char* mem_; // the allocated memory, never nullptr + size_t size_; // the size of the allocated memory, never 0 + uint16_t* table_; // the pointer to the hashtable + char* input_; // the pointer to the input scratch buffer + char* output_; // the pointer to the output scratch buffer - DISALLOW_COPY_AND_ASSIGN(WorkingMemory); + // No copying + WorkingMemory(const WorkingMemory&); + void operator=(const WorkingMemory&); }; // Flat array compression that does not emit the "uncompressed length" @@ -67,7 +172,7 @@ class WorkingMemory { char* CompressFragment(const char* input, size_t input_length, char* op, - uint16* table, + uint16_t* table, const int table_size); // Find the largest n such that @@ -80,11 +185,20 @@ char* CompressFragment(const char* input, // Does not read *(s1 + (s2_limit - s2)) or beyond. // Requires that s2_limit >= s2. // +// In addition populate *data with the next 5 bytes from the end of the match. +// This is only done if 8 bytes are available (s2_limit - s2 >= 8). The point is +// that on some arch's this can be done faster in this routine than subsequent +// loading from s2 + n. +// // Separate implementation for 64-bit, little-endian cpus. -#if defined(ARCH_K8) || (defined(ARCH_PPC) && !defined(WORDS_BIGENDIAN)) +// riscv and little-endian cpu choose this routinue can be done faster too. +#if !SNAPPY_IS_BIG_ENDIAN && \ + (defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || \ + defined(ARCH_ARM) || defined(__riscv)) static inline std::pair FindMatchLength(const char* s1, const char* s2, - const char* s2_limit) { + const char* s2_limit, + uint64_t* data) { assert(s2_limit >= s2); size_t matched = 0; @@ -93,39 +207,118 @@ static inline std::pair FindMatchLength(const char* s1, // uncommon code paths that determine, without extra effort, whether the match // length is less than 8. In short, we are hoping to avoid a conditional // branch, and perhaps get better code layout from the C++ compiler. - if (PREDICT_TRUE(s2 <= s2_limit - 8)) { - uint64 a1 = UNALIGNED_LOAD64(s1); - uint64 a2 = UNALIGNED_LOAD64(s2); - if (a1 != a2) { - return std::pair(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3, - true); + if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 16)) { + uint64_t a1 = UNALIGNED_LOAD64(s1); + uint64_t a2 = UNALIGNED_LOAD64(s2); + if (SNAPPY_PREDICT_TRUE(a1 != a2)) { + // This code is critical for performance. The reason is that it determines + // how much to advance `ip` (s2). This obviously depends on both the loads + // from the `candidate` (s1) and `ip`. Furthermore the next `candidate` + // depends on the advanced `ip` calculated here through a load, hash and + // new candidate hash lookup (a lot of cycles). This makes s1 (ie. + // `candidate`) the variable that limits throughput. This is the reason we + // go through hoops to have this function update `data` for the next iter. + // The straightforward code would use *data, given by + // + // *data = UNALIGNED_LOAD64(s2 + matched_bytes) (Latency of 5 cycles), + // + // as input for the hash table lookup to find next candidate. However + // this forces the load on the data dependency chain of s1, because + // matched_bytes directly depends on s1. However matched_bytes is 0..7, so + // we can also calculate *data by + // + // *data = AlignRight(UNALIGNED_LOAD64(s2), UNALIGNED_LOAD64(s2 + 8), + // matched_bytes); + // + // The loads do not depend on s1 anymore and are thus off the bottleneck. + // The straightforward implementation on x86_64 would be to use + // + // shrd rax, rdx, cl (cl being matched_bytes * 8) + // + // unfortunately shrd with a variable shift has a 4 cycle latency. So this + // only wins 1 cycle. The BMI2 shrx instruction is a 1 cycle variable + // shift instruction but can only shift 64 bits. If we focus on just + // obtaining the least significant 4 bytes, we can obtain this by + // + // *data = ConditionalMove(matched_bytes < 4, UNALIGNED_LOAD64(s2), + // UNALIGNED_LOAD64(s2 + 4) >> ((matched_bytes & 3) * 8); + // + // Writen like above this is not a big win, the conditional move would be + // a cmp followed by a cmov (2 cycles) followed by a shift (1 cycle). + // However matched_bytes < 4 is equal to + // static_cast(xorval) != 0. Writen that way, the conditional + // move (2 cycles) can execute in parallel with FindLSBSetNonZero64 + // (tzcnt), which takes 3 cycles. + uint64_t xorval = a1 ^ a2; + int shift = Bits::FindLSBSetNonZero64(xorval); + size_t matched_bytes = shift >> 3; + uint64_t a3 = UNALIGNED_LOAD64(s2 + 4); +#ifndef __x86_64__ + a2 = static_cast(xorval) == 0 ? a3 : a2; +#else + // Ideally this would just be + // + // a2 = static_cast(xorval) == 0 ? a3 : a2; + // + // However clang correctly infers that the above statement participates on + // a critical data dependency chain and thus, unfortunately, refuses to + // use a conditional move (it's tuned to cut data dependencies). In this + // case there is a longer parallel chain anyway AND this will be fairly + // unpredictable. + asm("testl %k2, %k2\n\t" + "cmovzq %1, %0\n\t" + : "+r"(a2) + : "r"(a3), "r"(xorval) + : "cc"); +#endif + *data = a2 >> (shift & (3 * 8)); + return std::pair(matched_bytes, true); } else { matched = 8; s2 += 8; } } + SNAPPY_PREFETCH(s1 + 64); + SNAPPY_PREFETCH(s2 + 64); // Find out how long the match is. We loop over the data 64 bits at a // time until we find a 64-bit block that doesn't match; then we find // the first non-matching bit and use that to calculate the total // length of the match. - while (PREDICT_TRUE(s2 <= s2_limit - 8)) { - if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) { + while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 16)) { + uint64_t a1 = UNALIGNED_LOAD64(s1 + matched); + uint64_t a2 = UNALIGNED_LOAD64(s2); + if (a1 == a2) { s2 += 8; matched += 8; } else { - uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); - int matching_bits = Bits::FindLSBSetNonZero64(x); - matched += matching_bits >> 3; + uint64_t xorval = a1 ^ a2; + int shift = Bits::FindLSBSetNonZero64(xorval); + size_t matched_bytes = shift >> 3; + uint64_t a3 = UNALIGNED_LOAD64(s2 + 4); +#ifndef __x86_64__ + a2 = static_cast(xorval) == 0 ? a3 : a2; +#else + asm("testl %k2, %k2\n\t" + "cmovzq %1, %0\n\t" + : "+r"(a2) + : "r"(a3), "r"(xorval) + : "cc"); +#endif + *data = a2 >> (shift & (3 * 8)); + matched += matched_bytes; assert(matched >= 8); return std::pair(matched, false); } } - while (PREDICT_TRUE(s2 < s2_limit)) { + while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) { if (s1[matched] == *s2) { ++s2; ++matched; } else { + if (s2 <= s2_limit - 8) { + *data = UNALIGNED_LOAD64(s2); + } return std::pair(matched, matched < 8); } } @@ -134,7 +327,8 @@ static inline std::pair FindMatchLength(const char* s1, #else static inline std::pair FindMatchLength(const char* s1, const char* s2, - const char* s2_limit) { + const char* s2_limit, + uint64_t* data) { // Implementation based on the x86-64 version, above. assert(s2_limit >= s2); int matched = 0; @@ -145,19 +339,46 @@ static inline std::pair FindMatchLength(const char* s1, matched += 4; } if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { - uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); + uint32_t x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); int matching_bits = Bits::FindLSBSetNonZero(x); matched += matching_bits >> 3; + s2 += matching_bits >> 3; } else { while ((s2 < s2_limit) && (s1[matched] == *s2)) { ++s2; ++matched; } } + if (s2 <= s2_limit - 8) *data = LittleEndian::Load64(s2); return std::pair(matched, matched < 8); } #endif +static inline size_t FindMatchLengthPlain(const char* s1, const char* s2, + const char* s2_limit) { + // Implementation based on the x86-64 version, above. + assert(s2_limit >= s2); + int matched = 0; + + while (s2 <= s2_limit - 8 && + UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) { + s2 += 8; + matched += 8; + } + if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 8) { + uint64_t x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); + int matching_bits = Bits::FindLSBSetNonZero64(x); + matched += matching_bits >> 3; + s2 += matching_bits >> 3; + } else { + while ((s2 < s2_limit) && (s1[matched] == *s2)) { + ++s2; + ++matched; + } + } + return matched; +} + // Lookup tables for decompression code. Give --snappy_dump_decompression_table // to the unit test to recompute char_table. @@ -180,7 +401,8 @@ static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual o // because of efficiency reasons: // (1) Extracting a byte is faster than a bit-field // (2) It properly aligns copy offset so we do not need a <<8 -static const uint16 char_table[256] = { +static constexpr uint16_t char_table[256] = { + // clang-format off 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, @@ -212,7 +434,8 @@ static const uint16 char_table[256] = { 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, - 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 + 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040, + // clang-format on }; } // end namespace internal diff --git a/snappy-sinksource.cc b/snappy-sinksource.cc index 369a132..8214964 100644 --- a/snappy-sinksource.cc +++ b/snappy-sinksource.cc @@ -26,23 +26,31 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include +#include +#include #include "snappy-sinksource.h" namespace snappy { -Source::~Source() { } +Source::~Source() = default; -Sink::~Sink() { } +Sink::~Sink() = default; char* Sink::GetAppendBuffer(size_t length, char* scratch) { + // TODO: Switch to [[maybe_unused]] when we can assume C++17. + (void)length; + return scratch; } char* Sink::GetAppendBufferVariable( size_t min_size, size_t desired_size_hint, char* scratch, size_t scratch_size, size_t* allocated_size) { + // TODO: Switch to [[maybe_unused]] when we can assume C++17. + (void)min_size; + (void)desired_size_hint; + *allocated_size = scratch_size; return scratch; } @@ -55,7 +63,7 @@ void Sink::AppendAndTakeOwnership( (*deleter)(deleter_arg, bytes, n); } -ByteArraySource::~ByteArraySource() { } +ByteArraySource::~ByteArraySource() = default; size_t ByteArraySource::Available() const { return left_; } @@ -74,22 +82,26 @@ UncheckedByteArraySink::~UncheckedByteArraySink() { } void UncheckedByteArraySink::Append(const char* data, size_t n) { // Do no copying if the caller filled in the result of GetAppendBuffer() if (data != dest_) { - memcpy(dest_, data, n); + std::memcpy(dest_, data, n); } dest_ += n; } char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { + // TODO: Switch to [[maybe_unused]] when we can assume C++17. + (void)len; + (void)scratch; + return dest_; } void UncheckedByteArraySink::AppendAndTakeOwnership( - char* data, size_t n, + char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), void *deleter_arg) { - if (data != dest_) { - memcpy(dest_, data, n); - (*deleter)(deleter_arg, data, n); + if (bytes != dest_) { + std::memcpy(dest_, bytes, n); + (*deleter)(deleter_arg, bytes, n); } dest_ += n; } @@ -97,6 +109,11 @@ void UncheckedByteArraySink::AppendAndTakeOwnership( char* UncheckedByteArraySink::GetAppendBufferVariable( size_t min_size, size_t desired_size_hint, char* scratch, size_t scratch_size, size_t* allocated_size) { + // TODO: Switch to [[maybe_unused]] when we can assume C++17. + (void)min_size; + (void)scratch; + (void)scratch_size; + *allocated_size = desired_size_hint; return dest_; } diff --git a/snappy-sinksource.h b/snappy-sinksource.h index 8afcdaa..3c74e1b 100644 --- a/snappy-sinksource.h +++ b/snappy-sinksource.h @@ -146,10 +146,10 @@ class Source { class ByteArraySource : public Source { public: ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { } - virtual ~ByteArraySource(); - virtual size_t Available() const; - virtual const char* Peek(size_t* len); - virtual void Skip(size_t n); + ~ByteArraySource() override; + size_t Available() const override; + const char* Peek(size_t* len) override; + void Skip(size_t n) override; private: const char* ptr_; size_t left_; @@ -159,15 +159,15 @@ class ByteArraySource : public Source { class UncheckedByteArraySink : public Sink { public: explicit UncheckedByteArraySink(char* dest) : dest_(dest) { } - virtual ~UncheckedByteArraySink(); - virtual void Append(const char* data, size_t n); - virtual char* GetAppendBuffer(size_t len, char* scratch); - virtual char* GetAppendBufferVariable( + ~UncheckedByteArraySink() override; + void Append(const char* data, size_t n) override; + char* GetAppendBuffer(size_t len, char* scratch) override; + char* GetAppendBufferVariable( size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size); - virtual void AppendAndTakeOwnership( + size_t scratch_size, size_t* allocated_size) override; + void AppendAndTakeOwnership( char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), - void *deleter_arg); + void *deleter_arg) override; // Return the current output pointer so that a caller can see how // many bytes were produced. diff --git a/snappy-stubs-internal.cc b/snappy-stubs-internal.cc index 6ed3343..0bc8c2d 100644 --- a/snappy-stubs-internal.cc +++ b/snappy-stubs-internal.cc @@ -33,7 +33,7 @@ namespace snappy { -void Varint::Append32(string* s, uint32 value) { +void Varint::Append32(std::string* s, uint32_t value) { char buf[Varint::kMax32]; const char* p = Varint::Encode32(buf, value); s->append(buf, p - buf); diff --git a/snappy-stubs-internal.h b/snappy-stubs-internal.h index 6979e1a..526c38b 100644 --- a/snappy-stubs-internal.h +++ b/snappy-stubs-internal.h @@ -31,31 +31,49 @@ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ -#ifdef HAVE_CONFIG_H +#if HAVE_CONFIG_H #include "config.h" #endif -#include +#include -#include -#include -#include +#include +#include +#include +#include +#include -#ifdef HAVE_SYS_MMAN_H +#if HAVE_SYS_MMAN_H #include #endif -#include "snappy-stubs-public.h" +#if HAVE_UNISTD_H +#include +#endif + +#if defined(_MSC_VER) +#include +#endif // defined(_MSC_VER) -#if defined(__x86_64__) +#ifndef __has_feature +#define __has_feature(x) 0 +#endif -// Enable 64-bit optimized versions of some routines. -#define ARCH_K8 1 +#if __has_feature(memory_sanitizer) +#include +#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \ + __msan_unpoison((address), (size)) +#else +#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */ +#endif // __has_feature(memory_sanitizer) -#elif defined(__ppc64__) +#include "snappy-stubs-public.h" +// Used to enable 64-bit optimized versions of some routines. +#if defined(__PPC64__) || defined(__powerpc64__) #define ARCH_PPC 1 - +#elif defined(__aarch64__) || defined(_M_ARM64) +#define ARCH_ARM 1 #endif // Needed by OS X, among others. @@ -69,222 +87,83 @@ #ifdef ARRAYSIZE #undef ARRAYSIZE #endif -#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) +#define ARRAYSIZE(a) int{sizeof(a) / sizeof(*(a))} // Static prediction hints. -#ifdef HAVE_BUILTIN_EXPECT -#define PREDICT_FALSE(x) (__builtin_expect(x, 0)) -#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) +#if HAVE_BUILTIN_EXPECT +#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0)) +#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) #else -#define PREDICT_FALSE(x) x -#define PREDICT_TRUE(x) x -#endif - -// This is only used for recomputing the tag byte table used during -// decompression; for simplicity we just remove it from the open-source -// version (anyone who wants to regenerate it can just do the call -// themselves within main()). -#define DEFINE_bool(flag_name, default_value, description) \ - bool FLAGS_ ## flag_name = default_value -#define DECLARE_bool(flag_name) \ - extern bool FLAGS_ ## flag_name - -namespace snappy { - -static const uint32 kuint32max = static_cast(0xFFFFFFFF); -static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); - -// Potentially unaligned loads and stores. - -// x86 and PowerPC can simply do these loads and stores native. - -#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) +#define SNAPPY_PREDICT_FALSE(x) x +#define SNAPPY_PREDICT_TRUE(x) x +#endif // HAVE_BUILTIN_EXPECT -#define UNALIGNED_LOAD16(_p) (*reinterpret_cast(_p)) -#define UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) -#define UNALIGNED_LOAD64(_p) (*reinterpret_cast(_p)) - -#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast(_p) = (_val)) -#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast(_p) = (_val)) -#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast(_p) = (_val)) +// Inlining hints. +#if HAVE_ATTRIBUTE_ALWAYS_INLINE +#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) +#else +#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE +#endif // HAVE_ATTRIBUTE_ALWAYS_INLINE -// ARMv7 and newer support native unaligned accesses, but only of 16-bit -// and 32-bit values (not 64-bit); older versions either raise a fatal signal, -// do an unaligned read and rotate the words around a bit, or do the reads very -// slowly (trip through kernel mode). There's no simple #define that says just -// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6 -// sub-architectures. -// -// This is a mess, but there's not much we can do about it. -// -// To further complicate matters, only LDR instructions (single reads) are -// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we -// explicitly tell the compiler that these accesses can be unaligned, it can and -// will combine accesses. On armcc, the way to signal this is done by accessing -// through the type (uint32 __packed *), but GCC has no such attribute -// (it ignores __attribute__((packed)) on individual variables). However, -// we can tell it that a _struct_ is unaligned, which has the same effect, -// so we do that. - -#elif defined(__arm__) && \ - !defined(__ARM_ARCH_4__) && \ - !defined(__ARM_ARCH_4T__) && \ - !defined(__ARM_ARCH_5__) && \ - !defined(__ARM_ARCH_5T__) && \ - !defined(__ARM_ARCH_5TE__) && \ - !defined(__ARM_ARCH_5TEJ__) && \ - !defined(__ARM_ARCH_6__) && \ - !defined(__ARM_ARCH_6J__) && \ - !defined(__ARM_ARCH_6K__) && \ - !defined(__ARM_ARCH_6Z__) && \ - !defined(__ARM_ARCH_6ZK__) && \ - !defined(__ARM_ARCH_6T2__) - -#if __GNUC__ -#define ATTRIBUTE_PACKED __attribute__((__packed__)) +#if HAVE_BUILTIN_PREFETCH +#define SNAPPY_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 3) #else -#define ATTRIBUTE_PACKED +#define SNAPPY_PREFETCH(ptr) (void)(ptr) #endif -namespace base { -namespace internal { - -struct Unaligned16Struct { - uint16 value; - uint8 dummy; // To make the size non-power-of-two. -} ATTRIBUTE_PACKED; - -struct Unaligned32Struct { - uint32 value; - uint8 dummy; // To make the size non-power-of-two. -} ATTRIBUTE_PACKED; - -} // namespace internal -} // namespace base - -#define UNALIGNED_LOAD16(_p) \ - ((reinterpret_cast(_p))->value) -#define UNALIGNED_LOAD32(_p) \ - ((reinterpret_cast(_p))->value) - -#define UNALIGNED_STORE16(_p, _val) \ - ((reinterpret_cast< ::snappy::base::internal::Unaligned16Struct *>(_p))->value = \ - (_val)) -#define UNALIGNED_STORE32(_p, _val) \ - ((reinterpret_cast< ::snappy::base::internal::Unaligned32Struct *>(_p))->value = \ - (_val)) - -// TODO(user): NEON supports unaligned 64-bit loads and stores. -// See if that would be more efficient on platforms supporting it, -// at least for copies. - -inline uint64 UNALIGNED_LOAD64(const void *p) { - uint64 t; - memcpy(&t, p, sizeof t); - return t; -} - -inline void UNALIGNED_STORE64(void *p, uint64 v) { - memcpy(p, &v, sizeof v); -} - -#else +// Stubbed version of ABSL_FLAG. +// +// In the open source version, flags can only be changed at compile time. +#define SNAPPY_FLAG(flag_type, flag_name, default_value, help) \ + flag_type FLAGS_ ## flag_name = default_value -// These functions are provided for architectures that don't support -// unaligned loads and stores. +namespace snappy { -inline uint16 UNALIGNED_LOAD16(const void *p) { - uint16 t; - memcpy(&t, p, sizeof t); - return t; -} +// Stubbed version of absl::GetFlag(). +template +inline T GetFlag(T flag) { return flag; } -inline uint32 UNALIGNED_LOAD32(const void *p) { - uint32 t; - memcpy(&t, p, sizeof t); - return t; -} +static const uint32_t kuint32max = std::numeric_limits::max(); +static const int64_t kint64max = std::numeric_limits::max(); -inline uint64 UNALIGNED_LOAD64(const void *p) { - uint64 t; - memcpy(&t, p, sizeof t); - return t; -} +// Potentially unaligned loads and stores. -inline void UNALIGNED_STORE16(void *p, uint16 v) { - memcpy(p, &v, sizeof v); +inline uint16_t UNALIGNED_LOAD16(const void *p) { + // Compiles to a single movzx/ldrh on clang/gcc/msvc. + uint16_t v; + std::memcpy(&v, p, sizeof(v)); + return v; } -inline void UNALIGNED_STORE32(void *p, uint32 v) { - memcpy(p, &v, sizeof v); +inline uint32_t UNALIGNED_LOAD32(const void *p) { + // Compiles to a single mov/ldr on clang/gcc/msvc. + uint32_t v; + std::memcpy(&v, p, sizeof(v)); + return v; } -inline void UNALIGNED_STORE64(void *p, uint64 v) { - memcpy(p, &v, sizeof v); +inline uint64_t UNALIGNED_LOAD64(const void *p) { + // Compiles to a single mov/ldr on clang/gcc/msvc. + uint64_t v; + std::memcpy(&v, p, sizeof(v)); + return v; } -#endif - -// The following guarantees declaration of the byte swap functions. -#ifdef WORDS_BIGENDIAN - -#ifdef HAVE_SYS_BYTEORDER_H -#include -#endif - -#ifdef HAVE_SYS_ENDIAN_H -#include -#endif - -#ifdef _MSC_VER -#include -#define bswap_16(x) _byteswap_ushort(x) -#define bswap_32(x) _byteswap_ulong(x) -#define bswap_64(x) _byteswap_uint64(x) - -#elif defined(__APPLE__) -// Mac OS X / Darwin features -#include -#define bswap_16(x) OSSwapInt16(x) -#define bswap_32(x) OSSwapInt32(x) -#define bswap_64(x) OSSwapInt64(x) - -#elif defined(HAVE_BYTESWAP_H) -#include - -#elif defined(bswap32) -// FreeBSD defines bswap{16,32,64} in (already #included). -#define bswap_16(x) bswap16(x) -#define bswap_32(x) bswap32(x) -#define bswap_64(x) bswap64(x) - -#elif defined(BSWAP_64) -// Solaris 10 defines BSWAP_{16,32,64} in (already #included). -#define bswap_16(x) BSWAP_16(x) -#define bswap_32(x) BSWAP_32(x) -#define bswap_64(x) BSWAP_64(x) - -#else - -inline uint16 bswap_16(uint16 x) { - return (x << 8) | (x >> 8); +inline void UNALIGNED_STORE16(void *p, uint16_t v) { + // Compiles to a single mov/strh on clang/gcc/msvc. + std::memcpy(p, &v, sizeof(v)); } -inline uint32 bswap_32(uint32 x) { - x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8); - return (x >> 16) | (x << 16); +inline void UNALIGNED_STORE32(void *p, uint32_t v) { + // Compiles to a single mov/str on clang/gcc/msvc. + std::memcpy(p, &v, sizeof(v)); } -inline uint64 bswap_64(uint64 x) { - x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8); - x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16); - return (x >> 32) | (x << 32); +inline void UNALIGNED_STORE64(void *p, uint64_t v) { + // Compiles to a single mov/str on clang/gcc/msvc. + std::memcpy(p, &v, sizeof(v)); } -#endif - -#endif // WORDS_BIGENDIAN - // Convert to little-endian storage, opposite of network format. // Convert x from host to little endian: x = LittleEndian.FromHost(x); // convert x from little endian to host: x = LittleEndian.ToHost(x); @@ -296,87 +175,194 @@ inline uint64 bswap_64(uint64 x) { // x = LittleEndian.Load16(p); class LittleEndian { public: - // Conversion functions. -#ifdef WORDS_BIGENDIAN - - static uint16 FromHost16(uint16 x) { return bswap_16(x); } - static uint16 ToHost16(uint16 x) { return bswap_16(x); } - - static uint32 FromHost32(uint32 x) { return bswap_32(x); } - static uint32 ToHost32(uint32 x) { return bswap_32(x); } - - static bool IsLittleEndian() { return false; } - -#else // !defined(WORDS_BIGENDIAN) - - static uint16 FromHost16(uint16 x) { return x; } - static uint16 ToHost16(uint16 x) { return x; } - - static uint32 FromHost32(uint32 x) { return x; } - static uint32 ToHost32(uint32 x) { return x; } + // Functions to do unaligned loads and stores in little-endian order. + static inline uint16_t Load16(const void *ptr) { + // Compiles to a single mov/str on recent clang and gcc. +#if SNAPPY_IS_BIG_ENDIAN + const uint8_t* const buffer = reinterpret_cast(ptr); + return (static_cast(buffer[0])) | + (static_cast(buffer[1]) << 8); +#else + // memcpy() turns into a single instruction early in the optimization + // pipeline (relatively to a series of byte accesses). So, using memcpy + // instead of byte accesses may lead to better decisions in more stages of + // the optimization pipeline. + uint16_t value; + std::memcpy(&value, ptr, 2); + return value; +#endif + } - static bool IsLittleEndian() { return true; } + static inline uint32_t Load32(const void *ptr) { + // Compiles to a single mov/str on recent clang and gcc. +#if SNAPPY_IS_BIG_ENDIAN + const uint8_t* const buffer = reinterpret_cast(ptr); + return (static_cast(buffer[0])) | + (static_cast(buffer[1]) << 8) | + (static_cast(buffer[2]) << 16) | + (static_cast(buffer[3]) << 24); +#else + // See Load16() for the rationale of using memcpy(). + uint32_t value; + std::memcpy(&value, ptr, 4); + return value; +#endif + } -#endif // !defined(WORDS_BIGENDIAN) + static inline uint64_t Load64(const void *ptr) { + // Compiles to a single mov/str on recent clang and gcc. +#if SNAPPY_IS_BIG_ENDIAN + const uint8_t* const buffer = reinterpret_cast(ptr); + return (static_cast(buffer[0])) | + (static_cast(buffer[1]) << 8) | + (static_cast(buffer[2]) << 16) | + (static_cast(buffer[3]) << 24) | + (static_cast(buffer[4]) << 32) | + (static_cast(buffer[5]) << 40) | + (static_cast(buffer[6]) << 48) | + (static_cast(buffer[7]) << 56); +#else + // See Load16() for the rationale of using memcpy(). + uint64_t value; + std::memcpy(&value, ptr, 8); + return value; +#endif + } - // Functions to do unaligned loads and stores in little-endian order. - static uint16 Load16(const void *p) { - return ToHost16(UNALIGNED_LOAD16(p)); + static inline void Store16(void *dst, uint16_t value) { + // Compiles to a single mov/str on recent clang and gcc. +#if SNAPPY_IS_BIG_ENDIAN + uint8_t* const buffer = reinterpret_cast(dst); + buffer[0] = static_cast(value); + buffer[1] = static_cast(value >> 8); +#else + // See Load16() for the rationale of using memcpy(). + std::memcpy(dst, &value, 2); +#endif } - static void Store16(void *p, uint16 v) { - UNALIGNED_STORE16(p, FromHost16(v)); + static void Store32(void *dst, uint32_t value) { + // Compiles to a single mov/str on recent clang and gcc. +#if SNAPPY_IS_BIG_ENDIAN + uint8_t* const buffer = reinterpret_cast(dst); + buffer[0] = static_cast(value); + buffer[1] = static_cast(value >> 8); + buffer[2] = static_cast(value >> 16); + buffer[3] = static_cast(value >> 24); +#else + // See Load16() for the rationale of using memcpy(). + std::memcpy(dst, &value, 4); +#endif } - static uint32 Load32(const void *p) { - return ToHost32(UNALIGNED_LOAD32(p)); + static void Store64(void* dst, uint64_t value) { + // Compiles to a single mov/str on recent clang and gcc. +#if SNAPPY_IS_BIG_ENDIAN + uint8_t* const buffer = reinterpret_cast(dst); + buffer[0] = static_cast(value); + buffer[1] = static_cast(value >> 8); + buffer[2] = static_cast(value >> 16); + buffer[3] = static_cast(value >> 24); + buffer[4] = static_cast(value >> 32); + buffer[5] = static_cast(value >> 40); + buffer[6] = static_cast(value >> 48); + buffer[7] = static_cast(value >> 56); +#else + // See Load16() for the rationale of using memcpy(). + std::memcpy(dst, &value, 8); +#endif } - static void Store32(void *p, uint32 v) { - UNALIGNED_STORE32(p, FromHost32(v)); + static inline constexpr bool IsLittleEndian() { +#if SNAPPY_IS_BIG_ENDIAN + return false; +#else + return true; +#endif // SNAPPY_IS_BIG_ENDIAN } }; // Some bit-manipulation functions. class Bits { public: + // Return floor(log2(n)) for positive integer n. + static int Log2FloorNonZero(uint32_t n); + // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. - static int Log2Floor(uint32 n); + static int Log2Floor(uint32_t n); // Return the first set least / most significant bit, 0-indexed. Returns an // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except // that it's 0-indexed. - static int FindLSBSetNonZero(uint32 n); - static int FindLSBSetNonZero64(uint64 n); + static int FindLSBSetNonZero(uint32_t n); + + static int FindLSBSetNonZero64(uint64_t n); private: - DISALLOW_COPY_AND_ASSIGN(Bits); + // No copying + Bits(const Bits&); + void operator=(const Bits&); }; -#ifdef HAVE_BUILTIN_CTZ +#if HAVE_BUILTIN_CTZ + +inline int Bits::Log2FloorNonZero(uint32_t n) { + assert(n != 0); + // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof + // represents subtraction in base 2 and observes that there's no carry. + // + // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x). + // Using "31 ^" here instead of "31 -" allows the optimizer to strip the + // function body down to _bit_scan_reverse(x). + return 31 ^ __builtin_clz(n); +} -inline int Bits::Log2Floor(uint32 n) { - return n == 0 ? -1 : 31 ^ __builtin_clz(n); +inline int Bits::Log2Floor(uint32_t n) { + return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); } -inline int Bits::FindLSBSetNonZero(uint32 n) { +inline int Bits::FindLSBSetNonZero(uint32_t n) { + assert(n != 0); return __builtin_ctz(n); } -inline int Bits::FindLSBSetNonZero64(uint64 n) { - return __builtin_ctzll(n); +#elif defined(_MSC_VER) + +inline int Bits::Log2FloorNonZero(uint32_t n) { + assert(n != 0); + // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long. + unsigned long where; + _BitScanReverse(&where, n); + return static_cast(where); +} + +inline int Bits::Log2Floor(uint32_t n) { + // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long. + unsigned long where; + if (_BitScanReverse(&where, n)) + return static_cast(where); + return -1; +} + +inline int Bits::FindLSBSetNonZero(uint32_t n) { + assert(n != 0); + // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long. + unsigned long where; + if (_BitScanForward(&where, n)) + return static_cast(where); + return 32; } #else // Portable versions. -inline int Bits::Log2Floor(uint32 n) { - if (n == 0) - return -1; +inline int Bits::Log2FloorNonZero(uint32_t n) { + assert(n != 0); + int log = 0; - uint32 value = n; + uint32_t value = n; for (int i = 4; i >= 0; --i) { int shift = (1 << i); - uint32 x = value >> shift; + uint32_t x = value >> shift; if (x != 0) { value = x; log += shift; @@ -386,10 +372,16 @@ inline int Bits::Log2Floor(uint32 n) { return log; } -inline int Bits::FindLSBSetNonZero(uint32 n) { +inline int Bits::Log2Floor(uint32_t n) { + return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); +} + +inline int Bits::FindLSBSetNonZero(uint32_t n) { + assert(n != 0); + int rc = 31; for (int i = 4, shift = 1 << 4; i >= 0; --i) { - const uint32 x = n << shift; + const uint32_t x = n << shift; if (x != 0) { n = x; rc -= shift; @@ -399,23 +391,48 @@ inline int Bits::FindLSBSetNonZero(uint32 n) { return rc; } +#endif // End portable versions. + +#if HAVE_BUILTIN_CTZ + +inline int Bits::FindLSBSetNonZero64(uint64_t n) { + assert(n != 0); + return __builtin_ctzll(n); +} + +#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) +// _BitScanForward64() is only available on x64 and ARM64. + +inline int Bits::FindLSBSetNonZero64(uint64_t n) { + assert(n != 0); + // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long. + unsigned long where; + if (_BitScanForward64(&where, n)) + return static_cast(where); + return 64; +} + +#else // Portable version. + // FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). -inline int Bits::FindLSBSetNonZero64(uint64 n) { - const uint32 bottombits = static_cast(n); +inline int Bits::FindLSBSetNonZero64(uint64_t n) { + assert(n != 0); + + const uint32_t bottombits = static_cast(n); if (bottombits == 0) { - // Bottom bits are zero, so scan in top bits - return 32 + FindLSBSetNonZero(static_cast(n >> 32)); + // Bottom bits are zero, so scan the top bits. + return 32 + FindLSBSetNonZero(static_cast(n >> 32)); } else { return FindLSBSetNonZero(bottombits); } } -#endif // End portable versions. +#endif // HAVE_BUILTIN_CTZ // Variable-length integer encoding. class Varint { public: - // Maximum lengths of varint encoding of uint32. + // Maximum lengths of varint encoding of uint32_t. static const int kMax32 = 5; // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. @@ -424,23 +441,23 @@ class Varint { // past the last byte of the varint32. Else returns NULL. On success, // "result <= limit". static const char* Parse32WithLimit(const char* ptr, const char* limit, - uint32* OUTPUT); + uint32_t* OUTPUT); // REQUIRES "ptr" points to a buffer of length sufficient to hold "v". // EFFECTS Encodes "v" into "ptr" and returns a pointer to the // byte just past the last encoded byte. - static char* Encode32(char* ptr, uint32 v); + static char* Encode32(char* ptr, uint32_t v); // EFFECTS Appends the varint representation of "value" to "*s". - static void Append32(string* s, uint32 value); + static void Append32(std::string* s, uint32_t value); }; inline const char* Varint::Parse32WithLimit(const char* p, const char* l, - uint32* OUTPUT) { + uint32_t* OUTPUT) { const unsigned char* ptr = reinterpret_cast(p); const unsigned char* limit = reinterpret_cast(l); - uint32 b, result; + uint32_t b, result; if (ptr >= limit) return NULL; b = *(ptr++); result = b & 127; if (b < 128) goto done; if (ptr >= limit) return NULL; @@ -457,30 +474,30 @@ inline const char* Varint::Parse32WithLimit(const char* p, return reinterpret_cast(ptr); } -inline char* Varint::Encode32(char* sptr, uint32 v) { +inline char* Varint::Encode32(char* sptr, uint32_t v) { // Operate on characters as unsigneds - unsigned char* ptr = reinterpret_cast(sptr); - static const int B = 128; - if (v < (1<<7)) { - *(ptr++) = v; - } else if (v < (1<<14)) { - *(ptr++) = v | B; - *(ptr++) = v>>7; - } else if (v < (1<<21)) { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = v>>14; - } else if (v < (1<<28)) { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = v>>21; + uint8_t* ptr = reinterpret_cast(sptr); + static const uint8_t B = 128; + if (v < (1 << 7)) { + *(ptr++) = static_cast(v); + } else if (v < (1 << 14)) { + *(ptr++) = static_cast(v | B); + *(ptr++) = static_cast(v >> 7); + } else if (v < (1 << 21)) { + *(ptr++) = static_cast(v | B); + *(ptr++) = static_cast((v >> 7) | B); + *(ptr++) = static_cast(v >> 14); + } else if (v < (1 << 28)) { + *(ptr++) = static_cast(v | B); + *(ptr++) = static_cast((v >> 7) | B); + *(ptr++) = static_cast((v >> 14) | B); + *(ptr++) = static_cast(v >> 21); } else { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = (v>>21) | B; - *(ptr++) = v>>28; + *(ptr++) = static_cast(v | B); + *(ptr++) = static_cast((v>>7) | B); + *(ptr++) = static_cast((v>>14) | B); + *(ptr++) = static_cast((v>>21) | B); + *(ptr++) = static_cast(v >> 28); } return reinterpret_cast(ptr); } @@ -489,7 +506,7 @@ inline char* Varint::Encode32(char* sptr, uint32 v) { // replace this function with one that resizes the string without // filling the new space with zeros (if applicable) -- // it will be non-portable but faster. -inline void STLStringResizeUninitialized(string* s, size_t new_size) { +inline void STLStringResizeUninitialized(std::string* s, size_t new_size) { s->resize(new_size); } @@ -505,7 +522,7 @@ inline void STLStringResizeUninitialized(string* s, size_t new_size) { // (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) // proposes this as the method. It will officially be part of the standard // for C++0x. This should already work on all current implementations. -inline char* string_as_array(string* str) { +inline char* string_as_array(std::string* str) { return str->empty() ? NULL : &*str->begin(); } diff --git a/snappy-stubs-public.h.in b/snappy-stubs-public.h.in index 96989ac..02947fa 100644 --- a/snappy-stubs-public.h.in +++ b/snappy-stubs-public.h.in @@ -1,5 +1,4 @@ // Copyright 2011 Google Inc. All Rights Reserved. -// Author: sesse@google.com (Steinar H. Gunderson) // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -36,64 +35,28 @@ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ -#if @ac_cv_have_stdint_h@ -#include -#endif +#include -#if @ac_cv_have_stddef_h@ -#include -#endif - -#if @ac_cv_have_sys_uio_h@ +#if ${HAVE_SYS_UIO_H_01} // HAVE_SYS_UIO_H #include -#endif +#endif // HAVE_SYS_UIO_H -#define SNAPPY_MAJOR @SNAPPY_MAJOR@ -#define SNAPPY_MINOR @SNAPPY_MINOR@ -#define SNAPPY_PATCHLEVEL @SNAPPY_PATCHLEVEL@ +#define SNAPPY_MAJOR ${PROJECT_VERSION_MAJOR} +#define SNAPPY_MINOR ${PROJECT_VERSION_MINOR} +#define SNAPPY_PATCHLEVEL ${PROJECT_VERSION_PATCH} #define SNAPPY_VERSION \ ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) -#include - namespace snappy { -#if @ac_cv_have_stdint_h@ -typedef int8_t int8; -typedef uint8_t uint8; -typedef int16_t int16; -typedef uint16_t uint16; -typedef int32_t int32; -typedef uint32_t uint32; -typedef int64_t int64; -typedef uint64_t uint64; -#else -typedef signed char int8; -typedef unsigned char uint8; -typedef short int16; -typedef unsigned short uint16; -typedef int int32; -typedef unsigned int uint32; -typedef long long int64; -typedef unsigned long long uint64; -#endif - -typedef std::string string; - -#ifndef DISALLOW_COPY_AND_ASSIGN -#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ - void operator=(const TypeName&) -#endif - -#if !@ac_cv_have_sys_uio_h@ +#if !${HAVE_SYS_UIO_H_01} // !HAVE_SYS_UIO_H // Windows does not have an iovec type, yet the concept is universally useful. // It is simple to define it ourselves, so we put it inside our own namespace. struct iovec { - void* iov_base; - size_t iov_len; + void* iov_base; + size_t iov_len; }; -#endif +#endif // !HAVE_SYS_UIO_H } // namespace snappy diff --git a/snappy-test.cc b/snappy-test.cc index 01d5541..aae6072 100644 --- a/snappy-test.cc +++ b/snappy-test.cc @@ -28,239 +28,130 @@ // // Various stubs for the unit tests for the open-source version of Snappy. -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef HAVE_WINDOWS_H -// Needed to be able to use std::max without workarounds in the source code. -// https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts -#define NOMINMAX -#include -#endif - #include "snappy-test.h" #include +#include +#include +#include +#include +#include -DEFINE_bool(run_microbenchmarks, true, - "Run microbenchmarks before doing anything else."); +namespace file { -namespace snappy { +OptionsStub::OptionsStub() = default; +OptionsStub::~OptionsStub() = default; -string ReadTestDataFile(const string& base, size_t size_limit) { - string contents; - const char* srcdir = getenv("srcdir"); // This is set by Automake. - string prefix; - if (srcdir) { - prefix = string(srcdir) + "/"; - } - file::GetContents(prefix + "testdata/" + base, &contents, file::Defaults() - ).CheckSuccess(); - if (size_limit > 0) { - contents = contents.substr(0, size_limit); - } - return contents; -} - -string ReadTestDataFile(const string& base) { - return ReadTestDataFile(base, 0); +const OptionsStub &Defaults() { + static OptionsStub defaults; + return defaults; } -string StringPrintf(const char* format, ...) { - char buf[4096]; - va_list ap; - va_start(ap, format); - vsnprintf(buf, sizeof(buf), format, ap); - va_end(ap); - return buf; -} +StatusStub::StatusStub() = default; +StatusStub::StatusStub(const StatusStub &) = default; +StatusStub &StatusStub::operator=(const StatusStub &) = default; +StatusStub::~StatusStub() = default; -bool benchmark_running = false; -int64 benchmark_real_time_us = 0; -int64 benchmark_cpu_time_us = 0; -string *benchmark_label = NULL; -int64 benchmark_bytes_processed = 0; +bool StatusStub::ok() { return true; } -void ResetBenchmarkTiming() { - benchmark_real_time_us = 0; - benchmark_cpu_time_us = 0; -} +StatusStub GetContents(const std::string &filename, std::string *output, + const OptionsStub & /* options */) { + std::FILE *fp = std::fopen(filename.c_str(), "rb"); + if (fp == nullptr) { + std::perror(filename.c_str()); + std::exit(1); + } -#ifdef WIN32 -LARGE_INTEGER benchmark_start_real; -FILETIME benchmark_start_cpu; -#else // WIN32 -struct timeval benchmark_start_real; -struct rusage benchmark_start_cpu; -#endif // WIN32 - -void StartBenchmarkTiming() { -#ifdef WIN32 - QueryPerformanceCounter(&benchmark_start_real); - FILETIME dummy; - CHECK(GetProcessTimes( - GetCurrentProcess(), &dummy, &dummy, &dummy, &benchmark_start_cpu)); -#else - gettimeofday(&benchmark_start_real, NULL); - if (getrusage(RUSAGE_SELF, &benchmark_start_cpu) == -1) { - perror("getrusage(RUSAGE_SELF)"); - exit(1); + output->clear(); + while (!std::feof(fp)) { + char buffer[4096]; + size_t bytes_read = std::fread(buffer, 1, sizeof(buffer), fp); + if (bytes_read == 0 && std::ferror(fp)) { + std::perror("fread"); + std::exit(1); + } + output->append(buffer, bytes_read); } -#endif - benchmark_running = true; + + std::fclose(fp); + return StatusStub(); } -void StopBenchmarkTiming() { - if (!benchmark_running) { - return; +StatusStub SetContents(const std::string &file_name, const std::string &content, + const OptionsStub & /* options */) { + std::FILE *fp = std::fopen(file_name.c_str(), "wb"); + if (fp == nullptr) { + std::perror(file_name.c_str()); + std::exit(1); } -#ifdef WIN32 - LARGE_INTEGER benchmark_stop_real; - LARGE_INTEGER benchmark_frequency; - QueryPerformanceCounter(&benchmark_stop_real); - QueryPerformanceFrequency(&benchmark_frequency); - - double elapsed_real = static_cast( - benchmark_stop_real.QuadPart - benchmark_start_real.QuadPart) / - benchmark_frequency.QuadPart; - benchmark_real_time_us += elapsed_real * 1e6 + 0.5; - - FILETIME benchmark_stop_cpu, dummy; - CHECK(GetProcessTimes( - GetCurrentProcess(), &dummy, &dummy, &dummy, &benchmark_stop_cpu)); - - ULARGE_INTEGER start_ulargeint; - start_ulargeint.LowPart = benchmark_start_cpu.dwLowDateTime; - start_ulargeint.HighPart = benchmark_start_cpu.dwHighDateTime; - - ULARGE_INTEGER stop_ulargeint; - stop_ulargeint.LowPart = benchmark_stop_cpu.dwLowDateTime; - stop_ulargeint.HighPart = benchmark_stop_cpu.dwHighDateTime; - - benchmark_cpu_time_us += - (stop_ulargeint.QuadPart - start_ulargeint.QuadPart + 5) / 10; -#else // WIN32 - struct timeval benchmark_stop_real; - gettimeofday(&benchmark_stop_real, NULL); - benchmark_real_time_us += - 1000000 * (benchmark_stop_real.tv_sec - benchmark_start_real.tv_sec); - benchmark_real_time_us += - (benchmark_stop_real.tv_usec - benchmark_start_real.tv_usec); - - struct rusage benchmark_stop_cpu; - if (getrusage(RUSAGE_SELF, &benchmark_stop_cpu) == -1) { - perror("getrusage(RUSAGE_SELF)"); - exit(1); + size_t bytes_written = std::fwrite(content.data(), 1, content.size(), fp); + if (bytes_written != content.size()) { + std::perror("fwrite"); + std::exit(1); } - benchmark_cpu_time_us += 1000000 * (benchmark_stop_cpu.ru_utime.tv_sec - - benchmark_start_cpu.ru_utime.tv_sec); - benchmark_cpu_time_us += (benchmark_stop_cpu.ru_utime.tv_usec - - benchmark_start_cpu.ru_utime.tv_usec); -#endif // WIN32 - benchmark_running = false; + std::fclose(fp); + return StatusStub(); } -void SetBenchmarkLabel(const string& str) { - if (benchmark_label) { - delete benchmark_label; +} // namespace file + +namespace snappy { + +std::string ReadTestDataFile(const std::string& base, size_t size_limit) { + std::string contents; + const char* srcdir = getenv("srcdir"); // This is set by Automake. + std::string prefix; + if (srcdir) { + prefix = std::string(srcdir) + "/"; } - benchmark_label = new string(str); + file::GetContents(prefix + "testdata/" + base, &contents, file::Defaults() + ).ok(); + if (size_limit > 0) { + contents = contents.substr(0, size_limit); + } + return contents; } -void SetBenchmarkBytesProcessed(int64 bytes) { - benchmark_bytes_processed = bytes; +std::string StrFormat(const char* format, ...) { + char buffer[4096]; + std::va_list ap; + va_start(ap, format); + std::vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + return buffer; } -struct BenchmarkRun { - int64 real_time_us; - int64 cpu_time_us; -}; +LogMessage::~LogMessage() { std::cerr << std::endl; } -struct BenchmarkCompareCPUTime { - bool operator() (const BenchmarkRun& a, const BenchmarkRun& b) const { - return a.cpu_time_us < b.cpu_time_us; - } -}; - -void Benchmark::Run() { - for (int test_case_num = start_; test_case_num <= stop_; ++test_case_num) { - // Run a few iterations first to find out approximately how fast - // the benchmark is. - const int kCalibrateIterations = 100; - ResetBenchmarkTiming(); - StartBenchmarkTiming(); - (*function_)(kCalibrateIterations, test_case_num); - StopBenchmarkTiming(); - - // Let each test case run for about 200ms, but at least as many - // as we used to calibrate. - // Run five times and pick the median. - const int kNumRuns = 5; - const int kMedianPos = kNumRuns / 2; - int num_iterations = 0; - if (benchmark_real_time_us > 0) { - num_iterations = 200000 * kCalibrateIterations / benchmark_real_time_us; - } - num_iterations = std::max(num_iterations, kCalibrateIterations); - BenchmarkRun benchmark_runs[kNumRuns]; +LogMessage &LogMessage::operator<<(const std::string &message) { + std::cerr << message; + return *this; +} - for (int run = 0; run < kNumRuns; ++run) { - ResetBenchmarkTiming(); - StartBenchmarkTiming(); - (*function_)(num_iterations, test_case_num); - StopBenchmarkTiming(); +LogMessage &LogMessage::operator<<(int number) { + std::cerr << number; + return *this; +} - benchmark_runs[run].real_time_us = benchmark_real_time_us; - benchmark_runs[run].cpu_time_us = benchmark_cpu_time_us; - } +#ifdef _MSC_VER +// ~LogMessageCrash calls std::abort() and therefore never exits. This is by +// design, so temporarily disable warning C4722. +#pragma warning(push) +#pragma warning(disable : 4722) +#endif - string heading = StringPrintf("%s/%d", name_.c_str(), test_case_num); - string human_readable_speed; - - std::nth_element(benchmark_runs, - benchmark_runs + kMedianPos, - benchmark_runs + kNumRuns, - BenchmarkCompareCPUTime()); - int64 real_time_us = benchmark_runs[kMedianPos].real_time_us; - int64 cpu_time_us = benchmark_runs[kMedianPos].cpu_time_us; - if (cpu_time_us <= 0) { - human_readable_speed = "?"; - } else { - int64 bytes_per_second = - benchmark_bytes_processed * 1000000 / cpu_time_us; - if (bytes_per_second < 1024) { - human_readable_speed = StringPrintf("%dB/s", bytes_per_second); - } else if (bytes_per_second < 1024 * 1024) { - human_readable_speed = StringPrintf( - "%.1fkB/s", bytes_per_second / 1024.0f); - } else if (bytes_per_second < 1024 * 1024 * 1024) { - human_readable_speed = StringPrintf( - "%.1fMB/s", bytes_per_second / (1024.0f * 1024.0f)); - } else { - human_readable_speed = StringPrintf( - "%.1fGB/s", bytes_per_second / (1024.0f * 1024.0f * 1024.0f)); - } - } +LogMessageCrash::~LogMessageCrash() { + std::cerr << std::endl; + std::abort(); +} - fprintf(stderr, -#ifdef WIN32 - "%-18s %10I64d %10I64d %10d %s %s\n", -#else - "%-18s %10lld %10lld %10d %s %s\n", +#ifdef _MSC_VER +#pragma warning(pop) #endif - heading.c_str(), - static_cast(real_time_us * 1000 / num_iterations), - static_cast(cpu_time_us * 1000 / num_iterations), - num_iterations, - human_readable_speed.c_str(), - benchmark_label->c_str()); - } -} -#ifdef HAVE_LIBZ +#if HAVE_LIBZ ZLib::ZLib() : comp_init_(false), diff --git a/snappy-test.h b/snappy-test.h index cebb4ee..65f3725 100644 --- a/snappy-test.h +++ b/snappy-test.h @@ -31,241 +31,110 @@ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ -#include -#include +#if HAVE_CONFIG_H +#include "config.h" +#endif #include "snappy-stubs-internal.h" -#include -#include - -#ifdef HAVE_SYS_MMAN_H +#if HAVE_SYS_MMAN_H #include #endif -#ifdef HAVE_SYS_RESOURCE_H +#if HAVE_SYS_RESOURCE_H #include #endif -#ifdef HAVE_SYS_TIME_H +#if HAVE_SYS_TIME_H #include #endif -#ifdef HAVE_WINDOWS_H +#if HAVE_WINDOWS_H +// Needed to be able to use std::max without workarounds in the source code. +// https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts +#define NOMINMAX #include #endif -#include - -#ifdef HAVE_GTEST - -#include -#undef TYPED_TEST -#define TYPED_TEST TEST -#define INIT_GTEST(argc, argv) ::testing::InitGoogleTest(argc, *argv) - -#else - -// Stubs for if the user doesn't have Google Test installed. - -#define TEST(test_case, test_subcase) \ - void Test_ ## test_case ## _ ## test_subcase() -#define INIT_GTEST(argc, argv) - -#define TYPED_TEST TEST -#define EXPECT_EQ CHECK_EQ -#define EXPECT_NE CHECK_NE -#define EXPECT_FALSE(cond) CHECK(!(cond)) - -#endif - -#ifdef HAVE_GFLAGS - -#include - -// This is tricky; both gflags and Google Test want to look at the command line -// arguments. Google Test seems to be the most happy with unknown arguments, -// though, so we call it first and hope for the best. -#define InitGoogle(argv0, argc, argv, remove_flags) \ - INIT_GTEST(argc, argv); \ - google::ParseCommandLineFlags(argc, argv, remove_flags); - -#else - -// If we don't have the gflags package installed, these can only be -// changed at compile time. -#define DEFINE_int32(flag_name, default_value, description) \ - static int FLAGS_ ## flag_name = default_value; +#define InitGoogle(argv0, argc, argv, remove_flags) ((void)(0)) -#define InitGoogle(argv0, argc, argv, remove_flags) \ - INIT_GTEST(argc, argv) - -#endif - -#ifdef HAVE_LIBZ +#if HAVE_LIBZ #include "zlib.h" #endif -#ifdef HAVE_LIBLZO2 +#if HAVE_LIBLZO2 #include "lzo/lzo1x.h" #endif -#ifdef HAVE_LIBLZF -extern "C" { -#include "lzf.h" -} +#if HAVE_LIBLZ4 +#include "lz4.h" #endif -#ifdef HAVE_LIBQUICKLZ -#include "quicklz.h" -#endif +namespace file { + +// Stubs the class file::Options. +// +// This class should not be instantiated explicitly. It should only be used by +// passing file::Defaults() to file::GetContents() / file::SetContents(). +class OptionsStub { + public: + OptionsStub(); + OptionsStub(const OptionsStub &) = delete; + OptionsStub &operator=(const OptionsStub &) = delete; + ~OptionsStub(); +}; -namespace { +const OptionsStub &Defaults(); -namespace file { - int Defaults() { return 0; } - - class DummyStatus { - public: - void CheckSuccess() { } - }; - - DummyStatus GetContents( - const std::string& filename, std::string* data, int unused) { - FILE* fp = fopen(filename.c_str(), "rb"); - if (fp == NULL) { - perror(filename.c_str()); - exit(1); - } - - data->clear(); - while (!feof(fp)) { - char buf[4096]; - size_t ret = fread(buf, 1, 4096, fp); - if (ret == 0 && ferror(fp)) { - perror("fread"); - exit(1); - } - data->append(std::string(buf, ret)); - } - - fclose(fp); - - return DummyStatus(); - } +// Stubs the class absl::Status. +// +// This class should not be instantiated explicitly. It should only be used by +// passing the result of file::GetContents() / file::SetContents() to +// CHECK_OK(). +class StatusStub { + public: + StatusStub(); + StatusStub(const StatusStub &); + StatusStub &operator=(const StatusStub &); + ~StatusStub(); - inline DummyStatus SetContents( - const std::string& filename, const std::string& str, int unused) { - FILE* fp = fopen(filename.c_str(), "wb"); - if (fp == NULL) { - perror(filename.c_str()); - exit(1); - } + bool ok(); +}; - int ret = fwrite(str.data(), str.size(), 1, fp); - if (ret != 1) { - perror("fwrite"); - exit(1); - } +StatusStub GetContents(const std::string &file_name, std::string *output, + const OptionsStub & /* options */); - fclose(fp); +StatusStub SetContents(const std::string &file_name, const std::string &content, + const OptionsStub & /* options */); - return DummyStatus(); - } } // namespace file -} // namespace - namespace snappy { #define FLAGS_test_random_seed 301 -typedef string TypeParam; -void Test_CorruptedTest_VerifyCorrupted(); -void Test_Snappy_SimpleTests(); -void Test_Snappy_MaxBlowup(); -void Test_Snappy_RandomData(); -void Test_Snappy_FourByteOffset(); -void Test_SnappyCorruption_TruncatedVarint(); -void Test_SnappyCorruption_UnterminatedVarint(); -void Test_SnappyCorruption_OverflowingVarint(); -void Test_Snappy_ReadPastEndOfBuffer(); -void Test_Snappy_FindMatchLength(); -void Test_Snappy_FindMatchLengthRandom(); +std::string ReadTestDataFile(const std::string& base, size_t size_limit); -string ReadTestDataFile(const string& base, size_t size_limit); - -string ReadTestDataFile(const string& base); - -// A sprintf() variant that returns a std::string. +// A std::sprintf() variant that returns a std::string. // Not safe for general use due to truncation issues. -string StringPrintf(const char* format, ...); - -// A simple, non-cryptographically-secure random generator. -class ACMRandom { - public: - explicit ACMRandom(uint32 seed) : seed_(seed) {} - - int32 Next(); - - int32 Uniform(int32 n) { - return Next() % n; - } - uint8 Rand8() { - return static_cast((Next() >> 1) & 0x000000ff); - } - bool OneIn(int X) { return Uniform(X) == 0; } - - // Skewed: pick "base" uniformly from range [0,max_log] and then - // return "base" random bits. The effect is to pick a number in the - // range [0,2^max_log-1] with bias towards smaller numbers. - int32 Skewed(int max_log); - - private: - static const uint32 M = 2147483647L; // 2^31-1 - uint32 seed_; -}; - -inline int32 ACMRandom::Next() { - static const uint64 A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 - // We are computing - // seed_ = (seed_ * A) % M, where M = 2^31-1 - // - // seed_ must not be zero or M, or else all subsequent computed values - // will be zero or M respectively. For all other values, seed_ will end - // up cycling through every number in [1,M-1] - uint64 product = seed_ * A; - - // Compute (product % M) using the fact that ((x << 31) % M) == x. - seed_ = (product >> 31) + (product & M); - // The first reduction may overflow by 1 bit, so we may need to repeat. - // mod == M is not possible; using > allows the faster sign-bit-based test. - if (seed_ > M) { - seed_ -= M; - } - return seed_; -} - -inline int32 ACMRandom::Skewed(int max_log) { - const int32 base = (Next() - 1) % (max_log+1); - return (Next() - 1) & ((1u << base)-1); -} +std::string StrFormat(const char* format, ...); // A wall-time clock. This stub is not super-accurate, nor resistant to the // system time changing. class CycleTimer { public: - CycleTimer() : real_time_us_(0) {} + inline CycleTimer() : real_time_us_(0) {} + inline ~CycleTimer() = default; - void Start() { + inline void Start() { #ifdef WIN32 QueryPerformanceCounter(&start_); #else - gettimeofday(&start_, NULL); + ::gettimeofday(&start_, nullptr); #endif } - void Stop() { + inline void Stop() { #ifdef WIN32 LARGE_INTEGER stop; LARGE_INTEGER frequency; @@ -276,65 +145,78 @@ class CycleTimer { frequency.QuadPart; real_time_us_ += elapsed * 1e6 + 0.5; #else - struct timeval stop; - gettimeofday(&stop, NULL); + struct ::timeval stop; + ::gettimeofday(&stop, nullptr); real_time_us_ += 1000000 * (stop.tv_sec - start_.tv_sec); real_time_us_ += (stop.tv_usec - start_.tv_usec); #endif } - double Get() { - return real_time_us_ * 1e-6; - } + inline double Get() { return real_time_us_ * 1e-6; } private: - int64 real_time_us_; + int64_t real_time_us_; #ifdef WIN32 LARGE_INTEGER start_; #else - struct timeval start_; + struct ::timeval start_; #endif }; -// Minimalistic microbenchmark framework. +// Logging. + +class LogMessage { + public: + inline LogMessage() = default; + ~LogMessage(); -typedef void (*BenchmarkFunction)(int, int); + LogMessage &operator<<(const std::string &message); + LogMessage &operator<<(int number); +}; -class Benchmark { +class LogMessageCrash : public LogMessage { public: - Benchmark(const string& name, BenchmarkFunction function) : - name_(name), function_(function) {} + inline LogMessageCrash() = default; + ~LogMessageCrash(); +}; - Benchmark* DenseRange(int start, int stop) { - start_ = start; - stop_ = stop; - return this; - } +// This class is used to explicitly ignore values in the conditional +// logging macros. This avoids compiler warnings like "value computed +// is not used" and "statement has no effect". - void Run(); +class LogMessageVoidify { + public: + inline LogMessageVoidify() = default; + inline ~LogMessageVoidify() = default; - private: - const string name_; - const BenchmarkFunction function_; - int start_, stop_; + // This has to be an operator with a precedence lower than << but + // higher than ?: + inline void operator&(const LogMessage &) {} }; -#define BENCHMARK(benchmark_name) \ - Benchmark* Benchmark_ ## benchmark_name = \ - (new Benchmark(#benchmark_name, benchmark_name)) -extern Benchmark* Benchmark_BM_UFlat; -extern Benchmark* Benchmark_BM_UIOVec; -extern Benchmark* Benchmark_BM_UValidate; -extern Benchmark* Benchmark_BM_ZFlat; +// Asserts, both versions activated in debug mode only, +// and ones that are always active. -void ResetBenchmarkTiming(); -void StartBenchmarkTiming(); -void StopBenchmarkTiming(); -void SetBenchmarkLabel(const string& str); -void SetBenchmarkBytesProcessed(int64 bytes); +#define CRASH_UNLESS(condition) \ + SNAPPY_PREDICT_TRUE(condition) \ + ? (void)0 \ + : snappy::LogMessageVoidify() & snappy::LogMessageCrash() -#ifdef HAVE_LIBZ +#define LOG(level) LogMessage() +#define VLOG(level) \ + true ? (void)0 : snappy::LogMessageVoidify() & snappy::LogMessage() + +#define CHECK(cond) CRASH_UNLESS(cond) +#define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b)) +#define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b)) +#define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b)) +#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b)) +#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b)) +#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b)) +#define CHECK_OK(cond) (cond).ok() + +#if HAVE_LIBZ // Object-oriented wrapper around zlib. class ZLib { @@ -457,127 +339,4 @@ class ZLib { } // namespace snappy -DECLARE_bool(run_microbenchmarks); - -static inline void RunSpecifiedBenchmarks() { - if (!FLAGS_run_microbenchmarks) { - return; - } - - fprintf(stderr, "Running microbenchmarks.\n"); -#ifndef NDEBUG - fprintf(stderr, "WARNING: Compiled with assertions enabled, will be slow.\n"); -#endif -#ifndef __OPTIMIZE__ - fprintf(stderr, "WARNING: Compiled without optimization, will be slow.\n"); -#endif - fprintf(stderr, "Benchmark Time(ns) CPU(ns) Iterations\n"); - fprintf(stderr, "---------------------------------------------------\n"); - - snappy::Benchmark_BM_UFlat->Run(); - snappy::Benchmark_BM_UIOVec->Run(); - snappy::Benchmark_BM_UValidate->Run(); - snappy::Benchmark_BM_ZFlat->Run(); - - fprintf(stderr, "\n"); -} - -#ifndef HAVE_GTEST - -static inline int RUN_ALL_TESTS() { - fprintf(stderr, "Running correctness tests.\n"); - snappy::Test_CorruptedTest_VerifyCorrupted(); - snappy::Test_Snappy_SimpleTests(); - snappy::Test_Snappy_MaxBlowup(); - snappy::Test_Snappy_RandomData(); - snappy::Test_Snappy_FourByteOffset(); - snappy::Test_SnappyCorruption_TruncatedVarint(); - snappy::Test_SnappyCorruption_UnterminatedVarint(); - snappy::Test_SnappyCorruption_OverflowingVarint(); - snappy::Test_Snappy_ReadPastEndOfBuffer(); - snappy::Test_Snappy_FindMatchLength(); - snappy::Test_Snappy_FindMatchLengthRandom(); - fprintf(stderr, "All tests passed.\n"); - - return 0; -} - -#endif // HAVE_GTEST - -// For main(). -namespace snappy { - -// Logging. - -#define LOG(level) LogMessage() -#define VLOG(level) true ? (void)0 : \ - snappy::LogMessageVoidify() & snappy::LogMessage() - -class LogMessage { - public: - LogMessage() { } - ~LogMessage() { - std::cerr << std::endl; - } - - LogMessage& operator<<(const std::string& msg) { - std::cerr << msg; - return *this; - } - LogMessage& operator<<(int x) { - std::cerr << x; - return *this; - } -}; - -// Asserts, both versions activated in debug mode only, -// and ones that are always active. - -#define CRASH_UNLESS(condition) \ - PREDICT_TRUE(condition) ? (void)0 : \ - snappy::LogMessageVoidify() & snappy::LogMessageCrash() - -#ifdef _MSC_VER -// ~LogMessageCrash calls abort() and therefore never exits. This is by design -// so temporarily disable warning C4722. -#pragma warning(push) -#pragma warning(disable:4722) -#endif - -class LogMessageCrash : public LogMessage { - public: - LogMessageCrash() { } - ~LogMessageCrash() { - std::cerr << std::endl; - abort(); - } -}; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -// This class is used to explicitly ignore values in the conditional -// logging macros. This avoids compiler warnings like "value computed -// is not used" and "statement has no effect". - -class LogMessageVoidify { - public: - LogMessageVoidify() { } - // This has to be an operator with a precedence lower than << but - // higher than ?: - void operator&(const LogMessage&) { } -}; - -#define CHECK(cond) CRASH_UNLESS(cond) -#define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b)) -#define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b)) -#define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b)) -#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b)) -#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b)) -#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b)) -#define CHECK_OK(cond) (cond).CheckSuccess() - -} // namespace snappy - #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ diff --git a/snappy.cc b/snappy.cc index 1ba247b..d6d709a 100644 --- a/snappy.cc +++ b/snappy.cc @@ -26,51 +26,175 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include "snappy.h" #include "snappy-internal.h" #include "snappy-sinksource.h" - -#ifndef SNAPPY_HAVE_SSE2 -#if defined(__SSE2__) || defined(_M_X64) || \ - (defined(_M_IX86_FP) && _M_IX86_FP >= 2) -#define SNAPPY_HAVE_SSE2 1 +#include "snappy.h" +#if !defined(SNAPPY_HAVE_BMI2) +// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2 +// specifically, but it does define __AVX2__ when AVX2 support is available. +// Fortunately, AVX2 was introduced in Haswell, just like BMI2. +// +// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So, +// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which +// case issuing BMI2 instructions results in a compiler error. +#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__)) +#define SNAPPY_HAVE_BMI2 1 #else -#define SNAPPY_HAVE_SSE2 0 +#define SNAPPY_HAVE_BMI2 0 #endif +#endif // !defined(SNAPPY_HAVE_BMI2) + +#if !defined(SNAPPY_HAVE_X86_CRC32) +#if defined(__SSE4_2__) +#define SNAPPY_HAVE_X86_CRC32 1 +#else +#define SNAPPY_HAVE_X86_CRC32 0 #endif +#endif // !defined(SNAPPY_HAVE_X86_CRC32) -#if SNAPPY_HAVE_SSE2 -#include +#if !defined(SNAPPY_HAVE_NEON_CRC32) +#if SNAPPY_HAVE_NEON && defined(__ARM_FEATURE_CRC32) +#define SNAPPY_HAVE_NEON_CRC32 1 +#else +#define SNAPPY_HAVE_NEON_CRC32 0 +#endif +#endif // !defined(SNAPPY_HAVE_NEON_CRC32) + +#if SNAPPY_HAVE_BMI2 || SNAPPY_HAVE_X86_CRC32 +// Please do not replace with . or with headers that assume more +// advanced SSE versions without checking with all the OWNERS. +#include +#elif SNAPPY_HAVE_NEON_CRC32 +#include #endif -#include #include +#include +#include +#include +#include +#include +#include +#include #include +#include #include - namespace snappy { +namespace { + +// The amount of slop bytes writers are using for unconditional copies. +constexpr int kSlopBytes = 64; + +using internal::char_table; using internal::COPY_1_BYTE_OFFSET; using internal::COPY_2_BYTE_OFFSET; -using internal::LITERAL; -using internal::char_table; +using internal::COPY_4_BYTE_OFFSET; using internal::kMaximumTagLength; +using internal::LITERAL; +#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE +using internal::V128; +using internal::V128_Load; +using internal::V128_LoadU; +using internal::V128_Shuffle; +using internal::V128_StoreU; +using internal::V128_DupChar; +#endif + +// We translate the information encoded in a tag through a lookup table to a +// format that requires fewer instructions to decode. Effectively we store +// the length minus the tag part of the offset. The lowest significant byte +// thus stores the length. While total length - offset is given by +// entry - ExtractOffset(type). The nice thing is that the subtraction +// immediately sets the flags for the necessary check that offset >= length. +// This folds the cmp with sub. We engineer the long literals and copy-4 to +// always fail this check, so their presence doesn't affect the fast path. +// To prevent literals from triggering the guard against offset < length (offset +// does not apply to literals) the table is giving them a spurious offset of +// 256. +inline constexpr int16_t MakeEntry(int16_t len, int16_t offset) { + return len - (offset << 8); +} + +inline constexpr int16_t LengthMinusOffset(int data, int type) { + return type == 3 ? 0xFF // copy-4 (or type == 3) + : type == 2 ? MakeEntry(data + 1, 0) // copy-2 + : type == 1 ? MakeEntry((data & 7) + 4, data >> 3) // copy-1 + : data < 60 ? MakeEntry(data + 1, 1) // note spurious offset. + : 0xFF; // long literal +} + +inline constexpr int16_t LengthMinusOffset(uint8_t tag) { + return LengthMinusOffset(tag >> 2, tag & 3); +} + +template +struct index_sequence {}; + +template +struct make_index_sequence : make_index_sequence {}; + +template +struct make_index_sequence<0, Is...> : index_sequence {}; + +template +constexpr std::array MakeTable(index_sequence) { + return std::array{LengthMinusOffset(seq)...}; +} + +alignas(64) const std::array kLengthMinusOffset = + MakeTable(make_index_sequence<256>{}); + +// Given a table of uint16_t whose size is mask / 2 + 1, return a pointer to the +// relevant entry, if any, for the given bytes. Any hash function will do, +// but a good hash function reduces the number of collisions and thus yields +// better compression for compressible input. +// +// REQUIRES: mask is 2 * (table_size - 1), and table_size is a power of two. +inline uint16_t* TableEntry(uint16_t* table, uint32_t bytes, uint32_t mask) { + // Our choice is quicker-and-dirtier than the typical hash function; + // empirically, that seems beneficial. The upper bits of kMagic * bytes are a + // higher-quality hash than the lower bits, so when using kMagic * bytes we + // also shift right to get a higher-quality end result. There's no similar + // issue with a CRC because all of the output bits of a CRC are equally good + // "hashes." So, a CPU instruction for CRC, if available, tends to be a good + // choice. +#if SNAPPY_HAVE_NEON_CRC32 + // We use mask as the second arg to the CRC function, as it's about to + // be used anyway; it'd be equally correct to use 0 or some constant. + // Mathematically, _mm_crc32_u32 (or similar) is a function of the + // xor of its arguments. + const uint32_t hash = __crc32cw(bytes, mask); +#elif SNAPPY_HAVE_X86_CRC32 + const uint32_t hash = _mm_crc32_u32(bytes, mask); +#else + constexpr uint32_t kMagic = 0x1e35a7bd; + const uint32_t hash = (kMagic * bytes) >> (31 - kMaxHashTableBits); +#endif + return reinterpret_cast(reinterpret_cast(table) + + (hash & mask)); +} -// Any hash function will produce a valid compressed bitstream, but a good -// hash function reduces the number of collisions and thus yields better -// compression for compressible input, and more speed for incompressible -// input. Of course, it doesn't hurt if the hash function is reasonably fast -// either, as it gets called a lot. -static inline uint32 HashBytes(uint32 bytes, int shift) { - uint32 kMul = 0x1e35a7bd; - return (bytes * kMul) >> shift; +inline uint16_t* TableEntry4ByteMatch(uint16_t* table, uint32_t bytes, + uint32_t mask) { + constexpr uint32_t kMagic = 2654435761U; + const uint32_t hash = (kMagic * bytes) >> (32 - kMaxHashTableBits); + return reinterpret_cast(reinterpret_cast(table) + + (hash & mask)); } -static inline uint32 Hash(const char* p, int shift) { - return HashBytes(UNALIGNED_LOAD32(p), shift); + +inline uint16_t* TableEntry8ByteMatch(uint16_t* table, uint64_t bytes, + uint32_t mask) { + constexpr uint64_t kMagic = 58295818150454627ULL; + const uint32_t hash = (kMagic * bytes) >> (64 - kMaxHashTableBits); + return reinterpret_cast(reinterpret_cast(table) + + (hash & mask)); } -size_t MaxCompressedLength(size_t source_len) { +} // namespace + +size_t MaxCompressedLength(size_t source_bytes) { // Compressed data can be defined as: // compressed := item* literal* // item := literal* copy @@ -91,28 +215,34 @@ size_t MaxCompressedLength(size_t source_len) { // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. // // This last factor dominates the blowup, so the final estimate is: - return 32 + source_len + source_len/6; + return 32 + source_bytes + source_bytes / 6; } namespace { void UnalignedCopy64(const void* src, void* dst) { char tmp[8]; - memcpy(tmp, src, 8); - memcpy(dst, tmp, 8); + std::memcpy(tmp, src, 8); + std::memcpy(dst, tmp, 8); } void UnalignedCopy128(const void* src, void* dst) { - // TODO(alkis): Remove this when we upgrade to a recent compiler that emits - // SSE2 moves for memcpy(dst, src, 16). -#if SNAPPY_HAVE_SSE2 - __m128i x = _mm_loadu_si128(static_cast(src)); - _mm_storeu_si128(static_cast<__m128i*>(dst), x); -#else + // std::memcpy() gets vectorized when the appropriate compiler options are + // used. For example, x86 compilers targeting SSE2+ will optimize to an SSE2 + // load and store. char tmp[16]; - memcpy(tmp, src, 16); - memcpy(dst, tmp, 16); -#endif + std::memcpy(tmp, src, 16); + std::memcpy(dst, tmp, 16); +} + +template +inline void ConditionalUnalignedCopy128(const char* src, char* dst) { + if (use_16bytes_chunk) { + UnalignedCopy128(src, dst); + } else { + UnalignedCopy64(src, dst); + UnalignedCopy64(src + 8, dst + 8); + } } // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used @@ -124,30 +254,194 @@ void UnalignedCopy128(const void* src, void* dst) { // After IncrementalCopySlow(src, op, op_limit), the result will have eleven // copies of "ab" // ababababababababababab -// Note that this does not match the semantics of either memcpy() or memmove(). +// Note that this does not match the semantics of either std::memcpy() or +// std::memmove(). inline char* IncrementalCopySlow(const char* src, char* op, char* const op_limit) { + // TODO: Remove pragma when LLVM is aware this + // function is only called in cold regions and when cold regions don't get + // vectorized or unrolled. +#ifdef __clang__ +#pragma clang loop unroll(disable) +#endif while (op < op_limit) { *op++ = *src++; } return op_limit; } -// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than +#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE + +// Computes the bytes for shuffle control mask (please read comments on +// 'pattern_generation_masks' as well) for the given index_offset and +// pattern_size. For example, when the 'offset' is 6, it will generate a +// repeating pattern of size 6. So, the first 16 byte indexes will correspond to +// the pattern-bytes {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3} and the +// next 16 byte indexes will correspond to the pattern-bytes {4, 5, 0, 1, 2, 3, +// 4, 5, 0, 1, 2, 3, 4, 5, 0, 1}. These byte index sequences are generated by +// calling MakePatternMaskBytes(0, 6, index_sequence<16>()) and +// MakePatternMaskBytes(16, 6, index_sequence<16>()) respectively. + + +template +inline constexpr std::array MakePatternMaskBytes( + int index_offset, int pattern_size, index_sequence) { + return {static_cast((index_offset + indexes) % pattern_size)...}; +} + +// Computes the shuffle control mask bytes array for given pattern-sizes and +// returns an array. +template +inline constexpr std::array, + sizeof...(pattern_sizes_minus_one)> +MakePatternMaskBytesTable(int index_offset, + index_sequence) { + return { + MakePatternMaskBytes(index_offset, pattern_sizes_minus_one + 1, + make_index_sequence())...}; +} +// This is an array of shuffle control masks that can be used as the source +// operand for PSHUFB to permute the contents of the destination XMM register +// into a repeating byte pattern. +alignas(16) constexpr std::array, + 16> pattern_generation_masks = + MakePatternMaskBytesTable( + /*index_offset=*/0, + /*pattern_sizes_minus_one=*/make_index_sequence<16>()); + +// Similar to 'pattern_generation_masks', this table is used to "rotate" the +// pattern so that we can copy the *next 16 bytes* consistent with the pattern. +// Basically, pattern_reshuffle_masks is a continuation of +// pattern_generation_masks. It follows that, pattern_reshuffle_masks is same as +// pattern_generation_masks for offsets 1, 2, 4, 8 and 16. +alignas(16) constexpr std::array, + 16> pattern_reshuffle_masks = + MakePatternMaskBytesTable( + /*index_offset=*/16, + /*pattern_sizes_minus_one=*/make_index_sequence<16>()); + +SNAPPY_ATTRIBUTE_ALWAYS_INLINE +static inline V128 LoadPattern(const char* src, const size_t pattern_size) { + V128 generation_mask = V128_Load(reinterpret_cast( + pattern_generation_masks[pattern_size - 1].data())); + // Uninitialized bytes are masked out by the shuffle mask. + // TODO: remove annotation and macro defs once MSan is fixed. + SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(src + pattern_size, 16 - pattern_size); + return V128_Shuffle(V128_LoadU(reinterpret_cast(src)), + generation_mask); +} +SNAPPY_ATTRIBUTE_ALWAYS_INLINE +static inline std::pair +LoadPatternAndReshuffleMask(const char* src, const size_t pattern_size) { + V128 pattern = LoadPattern(src, pattern_size); + + // This mask will generate the next 16 bytes in-place. Doing so enables us to + // write data by at most 4 V128_StoreU. + // + // For example, suppose pattern is: abcdefabcdefabcd + // Shuffling with this mask will generate: efabcdefabcdefab + // Shuffling again will generate: cdefabcdefabcdef + V128 reshuffle_mask = V128_Load(reinterpret_cast( + pattern_reshuffle_masks[pattern_size - 1].data())); + return {pattern, reshuffle_mask}; +} +#endif // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE + +// Fallback for when we need to copy while extending the pattern, for example +// copying 10 bytes from 3 positions back abc -> abcabcabcabca. +// +// REQUIRES: [dst - offset, dst + 64) is a valid address range. +SNAPPY_ATTRIBUTE_ALWAYS_INLINE +static inline bool Copy64BytesWithPatternExtension(char* dst, size_t offset) { +#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE + if (SNAPPY_PREDICT_TRUE(offset <= 16)) { + switch (offset) { + case 0: + return false; + case 1: { + // TODO: Ideally we should memset, move back once the + // codegen issues are fixed. + V128 pattern = V128_DupChar(dst[-1]); + for (int i = 0; i < 4; i++) { + V128_StoreU(reinterpret_cast(dst + 16 * i), pattern); + } + return true; + } + case 2: + case 4: + case 8: + case 16: { + V128 pattern = LoadPattern(dst - offset, offset); + for (int i = 0; i < 4; i++) { + V128_StoreU(reinterpret_cast(dst + 16 * i), pattern); + } + return true; + } + default: { + auto pattern_and_reshuffle_mask = + LoadPatternAndReshuffleMask(dst - offset, offset); + V128 pattern = pattern_and_reshuffle_mask.first; + V128 reshuffle_mask = pattern_and_reshuffle_mask.second; + for (int i = 0; i < 4; i++) { + V128_StoreU(reinterpret_cast(dst + 16 * i), pattern); + pattern = V128_Shuffle(pattern, reshuffle_mask); + } + return true; + } + } + } +#else + if (SNAPPY_PREDICT_TRUE(offset < 16)) { + if (SNAPPY_PREDICT_FALSE(offset == 0)) return false; + // Extend the pattern to the first 16 bytes. + // The simpler formulation of `dst[i - offset]` induces undefined behavior. + for (int i = 0; i < 16; i++) dst[i] = (dst - offset)[i]; + // Find a multiple of pattern >= 16. + static std::array pattern_sizes = []() { + std::array res; + for (int i = 1; i < 16; i++) res[i] = (16 / i + 1) * i; + return res; + }(); + offset = pattern_sizes[offset]; + for (int i = 1; i < 4; i++) { + std::memcpy(dst + i * 16, dst + i * 16 - offset, 16); + } + return true; + } +#endif // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE + + // Very rare. + for (int i = 0; i < 4; i++) { + std::memcpy(dst + i * 16, dst + i * 16 - offset, 16); + } + return true; +} + +// Copy [src, src+(op_limit-op)) to [op, op_limit) but faster than // IncrementalCopySlow. buf_limit is the address past the end of the writable // region of the buffer. inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, char* const buf_limit) { +#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE + constexpr int big_pattern_size_lower_bound = 16; +#else + constexpr int big_pattern_size_lower_bound = 8; +#endif + // Terminology: // // slop = buf_limit - op // pat = op - src - // len = limit - op + // len = op_limit - op assert(src < op); + assert(op < op_limit); assert(op_limit <= buf_limit); - // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that - // to optimize this function but we have to also handle these cases in case - // the input does not satisfy these conditions. + // NOTE: The copy tags use 3 or 6 bits to store the copy length, so len <= 64. + assert(op_limit - op <= 64); + // NOTE: In practice the compressor always emits len >= 4, so it is ok to + // assume that to optimize this function, but this is not guaranteed by the + // compression format, so we have to also handle len < 4 in case the input + // does not satisfy these conditions. size_t pattern_size = op - src; // The cases are split into different branches to allow the branch predictor, @@ -171,47 +465,139 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, // input. In general if we always predict len <= 16 it would be an ok // prediction. // - // In order to be fast we want a pattern >= 8 bytes and an unrolled loop - // copying 2x 8 bytes at a time. - - // Handle the uncommon case where pattern is less than 8 bytes. - if (PREDICT_FALSE(pattern_size < 8)) { - // Expand pattern to at least 8 bytes. The worse case scenario in terms of - // buffer usage is when the pattern is size 3. ^ is the original position - // of op. x are irrelevant bytes copied by the last UnalignedCopy64. + // In order to be fast we want a pattern >= 16 bytes (or 8 bytes in non-SSE) + // and an unrolled loop copying 1x 16 bytes (or 2x 8 bytes in non-SSE) at a + // time. + + // Handle the uncommon case where pattern is less than 16 (or 8 in non-SSE) + // bytes. + if (pattern_size < big_pattern_size_lower_bound) { +#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE + // Load the first eight bytes into an 128-bit XMM register, then use PSHUFB + // to permute the register's contents in-place into a repeating sequence of + // the first "pattern_size" bytes. + // For example, suppose: + // src == "abc" + // op == op + 3 + // After V128_Shuffle(), "pattern" will have five copies of "abc" + // followed by one byte of slop: abcabcabcabcabca. // - // abc - // abcabcxxxxx - // abcabcabcabcxxxxx - // ^ - // The last x is 14 bytes after ^. - if (PREDICT_TRUE(op <= buf_limit - 14)) { + // The non-SSE fallback implementation suffers from store-forwarding stalls + // because its loads and stores partly overlap. By expanding the pattern + // in-place, we avoid the penalty. + + // Typically, the op_limit is the gating factor so try to simplify the loop + // based on that. + if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 15)) { + auto pattern_and_reshuffle_mask = + LoadPatternAndReshuffleMask(src, pattern_size); + V128 pattern = pattern_and_reshuffle_mask.first; + V128 reshuffle_mask = pattern_and_reshuffle_mask.second; + // There is at least one, and at most four 16-byte blocks. Writing four + // conditionals instead of a loop allows FDO to layout the code with + // respect to the actual probabilities of each length. + // TODO: Replace with loop with trip count hint. + V128_StoreU(reinterpret_cast(op), pattern); + + if (op + 16 < op_limit) { + pattern = V128_Shuffle(pattern, reshuffle_mask); + V128_StoreU(reinterpret_cast(op + 16), pattern); + } + if (op + 32 < op_limit) { + pattern = V128_Shuffle(pattern, reshuffle_mask); + V128_StoreU(reinterpret_cast(op + 32), pattern); + } + if (op + 48 < op_limit) { + pattern = V128_Shuffle(pattern, reshuffle_mask); + V128_StoreU(reinterpret_cast(op + 48), pattern); + } + return op_limit; + } + char* const op_end = buf_limit - 15; + if (SNAPPY_PREDICT_TRUE(op < op_end)) { + auto pattern_and_reshuffle_mask = + LoadPatternAndReshuffleMask(src, pattern_size); + V128 pattern = pattern_and_reshuffle_mask.first; + V128 reshuffle_mask = pattern_and_reshuffle_mask.second; + // This code path is relatively cold however so we save code size + // by avoiding unrolling and vectorizing. + // + // TODO: Remove pragma when when cold regions don't get + // vectorized or unrolled. +#ifdef __clang__ +#pragma clang loop unroll(disable) +#endif + do { + V128_StoreU(reinterpret_cast(op), pattern); + pattern = V128_Shuffle(pattern, reshuffle_mask); + op += 16; + } while (SNAPPY_PREDICT_TRUE(op < op_end)); + } + return IncrementalCopySlow(op - pattern_size, op, op_limit); +#else // !SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE + // If plenty of buffer space remains, expand the pattern to at least 8 + // bytes. The way the following loop is written, we need 8 bytes of buffer + // space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10 + // bytes if pattern_size is 2. Precisely encoding that is probably not + // worthwhile; instead, invoke the slow path if we cannot write 11 bytes + // (because 11 are required in the worst case). + if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 11)) { while (pattern_size < 8) { UnalignedCopy64(src, op); op += pattern_size; pattern_size *= 2; } - if (PREDICT_TRUE(op >= op_limit)) return op_limit; + if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; } else { return IncrementalCopySlow(src, op, op_limit); } +#endif // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE } - assert(pattern_size >= 8); + assert(pattern_size >= big_pattern_size_lower_bound); + constexpr bool use_16bytes_chunk = big_pattern_size_lower_bound == 16; - // Copy 2x 8 bytes at a time. Because op - src can be < 16, a single - // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe - // because expanding the pattern to at least 8 bytes guarantees that - // op - src >= 8. - while (op <= buf_limit - 16) { - UnalignedCopy64(src, op); - UnalignedCopy64(src + 8, op + 8); - src += 16; - op += 16; - if (PREDICT_TRUE(op >= op_limit)) return op_limit; + // Copy 1x 16 bytes (or 2x 8 bytes in non-SSE) at a time. Because op - src can + // be < 16 in non-SSE, a single UnalignedCopy128 might overwrite data in op. + // UnalignedCopy64 is safe because expanding the pattern to at least 8 bytes + // guarantees that op - src >= 8. + // + // Typically, the op_limit is the gating factor so try to simplify the loop + // based on that. + if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 15)) { + // There is at least one, and at most four 16-byte blocks. Writing four + // conditionals instead of a loop allows FDO to layout the code with respect + // to the actual probabilities of each length. + // TODO: Replace with loop with trip count hint. + ConditionalUnalignedCopy128(src, op); + if (op + 16 < op_limit) { + ConditionalUnalignedCopy128(src + 16, op + 16); + } + if (op + 32 < op_limit) { + ConditionalUnalignedCopy128(src + 32, op + 32); + } + if (op + 48 < op_limit) { + ConditionalUnalignedCopy128(src + 48, op + 48); + } + return op_limit; + } + + // Fall back to doing as much as we can with the available slop in the + // buffer. This code path is relatively cold however so we save code size by + // avoiding unrolling and vectorizing. + // + // TODO: Remove pragma when when cold regions don't get vectorized + // or unrolled. +#ifdef __clang__ +#pragma clang loop unroll(disable) +#endif + for (char* op_end = buf_limit - 16; op < op_end; op += 16, src += 16) { + ConditionalUnalignedCopy128(src, op); } + if (op >= op_limit) return op_limit; + // We only take this branch if we didn't have enough slop and we can do a // single 8 byte copy. - if (PREDICT_FALSE(op <= buf_limit - 8)) { + if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) { UnalignedCopy64(src, op); src += 8; op += 8; @@ -221,12 +607,10 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, } // namespace -static inline char* EmitLiteral(char* op, - const char* literal, - int len, - bool allow_fast_path) { +template +static inline char* EmitLiteral(char* op, const char* literal, int len) { // The vast majority of copies are below 16 bytes, for which a - // call to memcpy is overkill. This fast path can sometimes + // call to std::memcpy() is overkill. This fast path can sometimes // copy up to 15 bytes too much, but that is okay in the // main loop, since we have a bit to go on for both sides: // @@ -235,7 +619,7 @@ static inline char* EmitLiteral(char* op, // if not, allow_fast_path = false. // - The output will always have 32 spare bytes (see // MaxCompressedLength). - assert(len > 0); // Zero-length literals are disallowed + assert(len > 0); // Zero-length literals are disallowed int n = len - 1; if (allow_fast_path && len <= 16) { // Fits in tag byte @@ -249,74 +633,95 @@ static inline char* EmitLiteral(char* op, // Fits in tag byte *op++ = LITERAL | (n << 2); } else { - // Encode in upcoming bytes - char* base = op; - int count = 0; - op++; - while (n > 0) { - *op++ = n & 0xff; - n >>= 8; - count++; - } + int count = (Bits::Log2Floor(n) >> 3) + 1; assert(count >= 1); assert(count <= 4); - *base = LITERAL | ((59+count) << 2); + *op++ = LITERAL | ((59 + count) << 2); + // Encode in upcoming bytes. + // Write 4 bytes, though we may care about only 1 of them. The output buffer + // is guaranteed to have at least 3 more spaces left as 'len >= 61' holds + // here and there is a std::memcpy() of size 'len' below. + LittleEndian::Store32(op, n); + op += count; + } + // When allow_fast_path is true, we can overwrite up to 16 bytes. + if (allow_fast_path) { + char* destination = op; + const char* source = literal; + const char* end = destination + len; + do { + std::memcpy(destination, source, 16); + destination += 16; + source += 16; + } while (destination < end); + } else { + std::memcpy(op, literal, len); } - memcpy(op, literal, len); return op + len; } -static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len, - bool len_less_than_12) { +template +static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) { assert(len <= 64); assert(len >= 4); assert(offset < 65536); assert(len_less_than_12 == (len < 12)); - if (len_less_than_12 && PREDICT_TRUE(offset < 2048)) { - // offset fits in 11 bits. The 3 highest go in the top of the first byte, - // and the rest go in the second byte. - *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0); - *op++ = offset & 0xff; + if (len_less_than_12) { + uint32_t u = (len << 2) + (offset << 8); + uint32_t copy1 = COPY_1_BYTE_OFFSET - (4 << 2) + ((offset >> 3) & 0xe0); + uint32_t copy2 = COPY_2_BYTE_OFFSET - (1 << 2); + // It turns out that offset < 2048 is a difficult to predict branch. + // `perf record` shows this is the highest percentage of branch misses in + // benchmarks. This code produces branch free code, the data dependency + // chain that bottlenecks the throughput is so long that a few extra + // instructions are completely free (IPC << 6 because of data deps). + u += offset < 2048 ? copy1 : copy2; + LittleEndian::Store32(op, u); + op += offset < 2048 ? 2 : 3; } else { // Write 4 bytes, though we only care about 3 of them. The output buffer // is required to have some slack, so the extra byte won't overrun it. - uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8); + uint32_t u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8); LittleEndian::Store32(op, u); op += 3; } return op; } -static inline char* EmitCopy(char* op, size_t offset, size_t len, - bool len_less_than_12) { +template +static inline char* EmitCopy(char* op, size_t offset, size_t len) { assert(len_less_than_12 == (len < 12)); if (len_less_than_12) { - return EmitCopyAtMost64(op, offset, len, true); + return EmitCopyAtMost64(op, offset, len); } else { // A special case for len <= 64 might help, but so far measurements suggest // it's in the noise. // Emit 64 byte copies but make sure to keep at least four bytes reserved. - while (PREDICT_FALSE(len >= 68)) { - op = EmitCopyAtMost64(op, offset, 64, false); + while (SNAPPY_PREDICT_FALSE(len >= 68)) { + op = EmitCopyAtMost64(op, offset, 64); len -= 64; } // One or two copies will now finish the job. if (len > 64) { - op = EmitCopyAtMost64(op, offset, 60, false); + op = EmitCopyAtMost64(op, offset, 60); len -= 60; } // Emit remainder. - op = EmitCopyAtMost64(op, offset, len, len < 12); + if (len < 12) { + op = EmitCopyAtMost64(op, offset, len); + } else { + op = EmitCopyAtMost64(op, offset, len); + } return op; } } bool GetUncompressedLength(const char* start, size_t n, size_t* result) { - uint32 v = 0; + uint32_t v = 0; const char* limit = start + n; if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { *result = v; @@ -326,76 +731,47 @@ bool GetUncompressedLength(const char* start, size_t n, size_t* result) { } } -namespace internal { -uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) { - // Use smaller hash table when input.size() is smaller, since we - // fill the table, incurring O(hash table size) overhead for - // compression, and if the input is short, we won't need that - // many hash table entries anyway. - assert(kMaxHashTableSize >= 256); - size_t htsize = 256; - while (htsize < kMaxHashTableSize && htsize < input_size) { - htsize <<= 1; - } - - uint16* table; - if (htsize <= ARRAYSIZE(small_table_)) { - table = small_table_; - } else { - if (large_table_ == NULL) { - large_table_ = new uint16[kMaxHashTableSize]; - } - table = large_table_; +namespace { +uint32_t CalculateTableSize(uint32_t input_size) { + static_assert( + kMaxHashTableSize >= kMinHashTableSize, + "kMaxHashTableSize should be greater or equal to kMinHashTableSize."); + if (input_size > kMaxHashTableSize) { + return kMaxHashTableSize; } - - *table_size = htsize; - memset(table, 0, htsize * sizeof(*table)); - return table; -} -} // end namespace internal - -// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will -// equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have -// empirically found that overlapping loads such as -// UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) -// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. -// -// We have different versions for 64- and 32-bit; ideally we would avoid the -// two functions and just inline the UNALIGNED_LOAD64 call into -// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever -// enough to avoid loading the value multiple times then. For 64-bit, the load -// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is -// done at GetUint32AtOffset() time. - -#ifdef ARCH_K8 - -typedef uint64 EightBytesReference; - -static inline EightBytesReference GetEightBytesAt(const char* ptr) { - return UNALIGNED_LOAD64(ptr); + if (input_size < kMinHashTableSize) { + return kMinHashTableSize; + } + // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1. + // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)). + return 2u << Bits::Log2Floor(input_size - 1); } +} // namespace -static inline uint32 GetUint32AtOffset(uint64 v, int offset) { - assert(offset >= 0); - assert(offset <= 4); - return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); +namespace internal { +WorkingMemory::WorkingMemory(size_t input_size) { + const size_t max_fragment_size = std::min(input_size, kBlockSize); + const size_t table_size = CalculateTableSize(max_fragment_size); + size_ = table_size * sizeof(*table_) + max_fragment_size + + MaxCompressedLength(max_fragment_size); + mem_ = std::allocator().allocate(size_); + table_ = reinterpret_cast(mem_); + input_ = mem_ + table_size * sizeof(*table_); + output_ = input_ + max_fragment_size; } -#else - -typedef const char* EightBytesReference; - -static inline EightBytesReference GetEightBytesAt(const char* ptr) { - return ptr; +WorkingMemory::~WorkingMemory() { + std::allocator().deallocate(mem_, size_); } -static inline uint32 GetUint32AtOffset(const char* v, int offset) { - assert(offset >= 0); - assert(offset <= 4); - return UNALIGNED_LOAD32(v + offset); +uint16_t* WorkingMemory::GetHashTable(size_t fragment_size, + int* table_size) const { + const size_t htsize = CalculateTableSize(fragment_size); + memset(table_, 0, htsize * sizeof(*table_)); + *table_size = htsize; + return table_; } - -#endif +} // end namespace internal // Flat array compression that does not emit the "uncompressed length" // prefix. Compresses "input" string to the "*op" buffer. @@ -409,29 +785,25 @@ static inline uint32 GetUint32AtOffset(const char* v, int offset) { // Returns an "end" pointer into "op" buffer. // "end - op" is the compressed size of "input". namespace internal { -char* CompressFragment(const char* input, - size_t input_size, - char* op, - uint16* table, - const int table_size) { +char* CompressFragment(const char* input, size_t input_size, char* op, + uint16_t* table, const int table_size) { // "ip" is the input pointer, and "op" is the output pointer. const char* ip = input; assert(input_size <= kBlockSize); - assert((table_size & (table_size - 1)) == 0); // table must be power of two - const int shift = 32 - Bits::Log2Floor(table_size); - assert(static_cast(kuint32max >> shift) == table_size - 1); + assert((table_size & (table_size - 1)) == 0); // table must be power of two + const uint32_t mask = 2 * (table_size - 1); const char* ip_end = input + input_size; const char* base_ip = ip; - // Bytes in [next_emit, ip) will be emitted as literal bytes. Or - // [next_emit, ip_end) after the main loop. - const char* next_emit = ip; const size_t kInputMarginBytes = 15; - if (PREDICT_TRUE(input_size >= kInputMarginBytes)) { + if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) { const char* ip_limit = input + input_size - kInputMarginBytes; - for (uint32 next_hash = Hash(++ip, shift); ; ) { - assert(next_emit < ip); + for (uint32_t preload = LittleEndian::Load32(ip + 1);;) { + // Bytes in [next_emit, ip) will be emitted as literal bytes. Or + // [next_emit, ip_end) after the main loop. + const char* next_emit = ip++; + uint64_t data = LittleEndian::Load64(ip); // The body of this loop calls EmitLiteral once and then EmitCopy one or // more times. (The exception is that when we're close to exhausting // the input we goto emit_remainder.) @@ -457,34 +829,66 @@ char* CompressFragment(const char* input, // The "skip" variable keeps track of how many bytes there are since the // last match; dividing it by 32 (ie. right-shifting by five) gives the // number of bytes to move ahead for each iteration. - uint32 skip = 32; + uint32_t skip = 32; - const char* next_ip = ip; const char* candidate; - do { - ip = next_ip; - uint32 hash = next_hash; - assert(hash == Hash(ip, shift)); - uint32 bytes_between_hash_lookups = skip >> 5; + if (ip_limit - ip >= 16) { + auto delta = ip - base_ip; + for (int j = 0; j < 4; ++j) { + for (int k = 0; k < 4; ++k) { + int i = 4 * j + k; + // These for-loops are meant to be unrolled. So we can freely + // special case the first iteration to use the value already + // loaded in preload. + uint32_t dword = i == 0 ? preload : static_cast(data); + assert(dword == LittleEndian::Load32(ip + i)); + uint16_t* table_entry = TableEntry(table, dword, mask); + candidate = base_ip + *table_entry; + assert(candidate >= base_ip); + assert(candidate < ip + i); + *table_entry = delta + i; + if (SNAPPY_PREDICT_FALSE(LittleEndian::Load32(candidate) == dword)) { + *op = LITERAL | (i << 2); + UnalignedCopy128(next_emit, op + 1); + ip += i; + op = op + i + 2; + goto emit_match; + } + data >>= 8; + } + data = LittleEndian::Load64(ip + 4 * j + 4); + } + ip += 16; + skip += 16; + } + while (true) { + assert(static_cast(data) == LittleEndian::Load32(ip)); + uint16_t* table_entry = TableEntry(table, data, mask); + uint32_t bytes_between_hash_lookups = skip >> 5; skip += bytes_between_hash_lookups; - next_ip = ip + bytes_between_hash_lookups; - if (PREDICT_FALSE(next_ip > ip_limit)) { + const char* next_ip = ip + bytes_between_hash_lookups; + if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) { + ip = next_emit; goto emit_remainder; } - next_hash = Hash(next_ip, shift); - candidate = base_ip + table[hash]; + candidate = base_ip + *table_entry; assert(candidate >= base_ip); assert(candidate < ip); - table[hash] = ip - base_ip; - } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) != - UNALIGNED_LOAD32(candidate))); + *table_entry = ip - base_ip; + if (SNAPPY_PREDICT_FALSE(static_cast(data) == + LittleEndian::Load32(candidate))) { + break; + } + data = LittleEndian::Load32(next_ip); + ip = next_ip; + } // Step 2: A 4-byte match has been found. We'll later see if more // than 4 bytes match. But, prior to the match, input // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." assert(next_emit + 16 <= ip_end); - op = EmitLiteral(op, next_emit, ip - next_emit, true); + op = EmitLiteral(op, next_emit, ip - next_emit); // Step 3: Call EmitCopy, and then see if another EmitCopy could // be our next move. Repeat until we find no match for the @@ -494,54 +898,239 @@ char* CompressFragment(const char* input, // though we don't yet know how big the literal will be. We handle that // by proceeding to the next iteration of the main loop. We also can exit // this loop via goto if we get close to exhausting the input. - EightBytesReference input_bytes; - uint32 candidate_bytes = 0; - + emit_match: do { // We have a 4-byte match at ip, and no need to emit any // "literal bytes" prior to ip. const char* base = ip; std::pair p = - FindMatchLength(candidate + 4, ip + 4, ip_end); + FindMatchLength(candidate + 4, ip + 4, ip_end, &data); size_t matched = 4 + p.first; ip += matched; size_t offset = base - candidate; assert(0 == memcmp(base, candidate, matched)); - op = EmitCopy(op, offset, matched, p.second); - next_emit = ip; - if (PREDICT_FALSE(ip >= ip_limit)) { + if (p.second) { + op = EmitCopy(op, offset, matched); + } else { + op = EmitCopy(op, offset, matched); + } + if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) { goto emit_remainder; } + // Expect 5 bytes to match + assert((data & 0xFFFFFFFFFF) == + (LittleEndian::Load64(ip) & 0xFFFFFFFFFF)); // We are now looking for a 4-byte match again. We read - // table[Hash(ip, shift)] for that. To improve compression, - // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)]. - input_bytes = GetEightBytesAt(ip - 1); - uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); - table[prev_hash] = ip - base_ip - 1; - uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); - candidate = base_ip + table[cur_hash]; - candidate_bytes = UNALIGNED_LOAD32(candidate); - table[cur_hash] = ip - base_ip; - } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); - - next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); - ++ip; + // table[Hash(ip, mask)] for that. To improve compression, + // we also update table[Hash(ip - 1, mask)] and table[Hash(ip, mask)]. + *TableEntry(table, LittleEndian::Load32(ip - 1), mask) = + ip - base_ip - 1; + uint16_t* table_entry = TableEntry(table, data, mask); + candidate = base_ip + *table_entry; + *table_entry = ip - base_ip; + // Measurements on the benchmarks have shown the following probabilities + // for the loop to exit (ie. avg. number of iterations is reciprocal). + // BM_Flat/6 txt1 p = 0.3-0.4 + // BM_Flat/7 txt2 p = 0.35 + // BM_Flat/8 txt3 p = 0.3-0.4 + // BM_Flat/9 txt3 p = 0.34-0.4 + // BM_Flat/10 pb p = 0.4 + // BM_Flat/11 gaviota p = 0.1 + // BM_Flat/12 cp p = 0.5 + // BM_Flat/13 c p = 0.3 + } while (static_cast(data) == LittleEndian::Load32(candidate)); + // Because the least significant 5 bytes matched, we can utilize data + // for the next iteration. + preload = data >> 8; } } - emit_remainder: +emit_remainder: // Emit the remaining bytes as a literal - if (next_emit < ip_end) { - op = EmitLiteral(op, next_emit, ip_end - next_emit, false); + if (ip < ip_end) { + op = EmitLiteral(op, ip, ip_end - ip); + } + + return op; +} + +char* CompressFragmentDoubleHash(const char* input, size_t input_size, char* op, + uint16_t* table, const int table_size, + uint16_t* table2, const int table_size2) { + (void)table_size2; + assert(table_size == table_size2); + // "ip" is the input pointer, and "op" is the output pointer. + const char* ip = input; + assert(input_size <= kBlockSize); + assert((table_size & (table_size - 1)) == 0); // table must be power of two + const uint32_t mask = 2 * (table_size - 1); + const char* ip_end = input + input_size; + const char* base_ip = ip; + + const size_t kInputMarginBytes = 15; + if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) { + const char* ip_limit = input + input_size - kInputMarginBytes; + + for (;;) { + const char* next_emit = ip++; + uint64_t data = LittleEndian::Load64(ip); + uint32_t skip = 512; + + const char* candidate; + uint32_t candidate_length; + while (true) { + assert(static_cast(data) == LittleEndian::Load32(ip)); + uint16_t* table_entry2 = TableEntry8ByteMatch(table2, data, mask); + uint32_t bytes_between_hash_lookups = skip >> 9; + skip++; + const char* next_ip = ip + bytes_between_hash_lookups; + if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) { + ip = next_emit; + goto emit_remainder; + } + candidate = base_ip + *table_entry2; + assert(candidate >= base_ip); + assert(candidate < ip); + + *table_entry2 = ip - base_ip; + if (SNAPPY_PREDICT_FALSE(static_cast(data) == + LittleEndian::Load32(candidate))) { + candidate_length = + FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4; + break; + } + + uint16_t* table_entry = TableEntry4ByteMatch(table, data, mask); + candidate = base_ip + *table_entry; + assert(candidate >= base_ip); + assert(candidate < ip); + + *table_entry = ip - base_ip; + if (SNAPPY_PREDICT_FALSE(static_cast(data) == + LittleEndian::Load32(candidate))) { + candidate_length = + FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4; + table_entry2 = + TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask); + auto candidate2 = base_ip + *table_entry2; + size_t candidate_length2 = + FindMatchLengthPlain(candidate2, ip + 1, ip_end); + if (candidate_length2 > candidate_length) { + *table_entry2 = ip - base_ip; + candidate = candidate2; + candidate_length = candidate_length2; + ++ip; + } + break; + } + data = LittleEndian::Load64(next_ip); + ip = next_ip; + } + // Backtrack to the point it matches fully. + while (ip > next_emit && candidate > base_ip && + *(ip - 1) == *(candidate - 1)) { + --ip; + --candidate; + ++candidate_length; + } + *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask) = + ip - base_ip + 1; + *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 2), mask) = + ip - base_ip + 2; + *TableEntry4ByteMatch(table, LittleEndian::Load32(ip + 1), mask) = + ip - base_ip + 1; + // Step 2: A 4-byte or 8-byte match has been found. + // We'll later see if more than 4 bytes match. But, prior to the match, + // input bytes [next_emit, ip) are unmatched. Emit them as + // "literal bytes." + assert(next_emit + 16 <= ip_end); + if (ip - next_emit > 0) { + op = EmitLiteral(op, next_emit, + ip - next_emit); + } + // Step 3: Call EmitCopy, and then see if another EmitCopy could + // be our next move. Repeat until we find no match for the + // input immediately after what was consumed by the last EmitCopy call. + // + // If we exit this loop normally then we need to call EmitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can exit + // this loop via goto if we get close to exhausting the input. + do { + // We have a 4-byte match at ip, and no need to emit any + // "literal bytes" prior to ip. + const char* base = ip; + ip += candidate_length; + size_t offset = base - candidate; + if (candidate_length < 12) { + op = + EmitCopy(op, offset, candidate_length); + } else { + op = EmitCopy(op, offset, + candidate_length); + } + if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) { + goto emit_remainder; + } + // We are now looking for a 4-byte match again. We read + // table[Hash(ip, mask)] for that. To improve compression, + // we also update several previous table entries. + if (ip - base_ip > 7) { + *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 7), mask) = + ip - base_ip - 7; + *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 4), mask) = + ip - base_ip - 4; + } + *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 3), mask) = + ip - base_ip - 3; + *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 2), mask) = + ip - base_ip - 2; + *TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 2), mask) = + ip - base_ip - 2; + *TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 1), mask) = + ip - base_ip - 1; + + uint16_t* table_entry = + TableEntry8ByteMatch(table2, LittleEndian::Load64(ip), mask); + candidate = base_ip + *table_entry; + *table_entry = ip - base_ip; + if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) { + candidate_length = + FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4; + continue; + } + table_entry = + TableEntry4ByteMatch(table, LittleEndian::Load32(ip), mask); + candidate = base_ip + *table_entry; + *table_entry = ip - base_ip; + if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) { + candidate_length = + FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4; + continue; + } + break; + } while (true); + } + } + +emit_remainder: + // Emit the remaining bytes as a literal + if (ip < ip_end) { + op = EmitLiteral(op, ip, ip_end - ip); } return op; } } // end namespace internal -// Called back at avery compression call to trace parameters and sizes. -static inline void Report(const char *algorithm, size_t compressed_size, - size_t uncompressed_size) {} +static inline void Report(int token, const char *algorithm, size_t +compressed_size, size_t uncompressed_size) { + // TODO: Switch to [[maybe_unused]] when we can assume C++17. + (void)token; + (void)algorithm; + (void)compressed_size; + (void)uncompressed_size; +} // Signature of output types needed by decompression code. // The decompression code is templatized on a type that obeys this @@ -553,12 +1142,28 @@ static inline void Report(const char *algorithm, size_t compressed_size, // // Called before decompression // void SetExpectedLength(size_t length); // +// // For performance a writer may choose to donate the cursor variable to the +// // decompression function. The decompression will inject it in all its +// // function calls to the writer. Keeping the important output cursor as a +// // function local stack variable allows the compiler to keep it in +// // register, which greatly aids performance by avoiding loads and stores of +// // this variable in the fast path loop iterations. +// T GetOutputPtr() const; +// +// // At end of decompression the loop donates the ownership of the cursor +// // variable back to the writer by calling this function. +// void SetOutputPtr(T op); +// // // Called after decompression // bool CheckLength() const; // // // Called repeatedly during decompression -// bool Append(const char* ip, size_t length); -// bool AppendFromSelf(uint32 offset, size_t length); +// // Each function get a pointer to the op (output pointer), that the writer +// // can use and update. Note it's important that these functions get fully +// // inlined so that no actual address of the local variable needs to be +// // taken. +// bool Append(const char* ip, size_t length, T* op); +// bool AppendFromSelf(uint32_t offset, size_t length, T* op); // // // The rules for how TryFastAppend differs from Append are somewhat // // convoluted: @@ -580,27 +1185,341 @@ static inline void Report(const char *algorithm, size_t compressed_size, // // as it is unlikely that one would implement a fast path accepting // // this much data. // // -// bool TryFastAppend(const char* ip, size_t available, size_t length); +// bool TryFastAppend(const char* ip, size_t available, size_t length, T* op); // }; -namespace internal { +static inline uint32_t ExtractLowBytes(const uint32_t& v, int n) { + assert(n >= 0); + assert(n <= 4); +#if SNAPPY_HAVE_BMI2 + return _bzhi_u32(v, 8 * n); +#else + // This needs to be wider than uint32_t otherwise `mask << 32` will be + // undefined. + uint64_t mask = 0xffffffff; + return v & ~(mask << (8 * n)); +#endif +} + +static inline bool LeftShiftOverflows(uint8_t value, uint32_t shift) { + assert(shift < 32); + static const uint8_t masks[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe}; + return (value & masks[shift]) != 0; +} + +inline bool Copy64BytesWithPatternExtension(ptrdiff_t dst, size_t offset) { + // TODO: Switch to [[maybe_unused]] when we can assume C++17. + (void)dst; + return offset != 0; +} + +// Copies between size bytes and 64 bytes from src to dest. size cannot exceed +// 64. More than size bytes, but never exceeding 64, might be copied if doing +// so gives better performance. [src, src + size) must not overlap with +// [dst, dst + size), but [src, src + 64) may overlap with [dst, dst + 64). +void MemCopy64(char* dst, const void* src, size_t size) { + // Always copy this many bytes. If that's below size then copy the full 64. + constexpr int kShortMemCopy = 32; + (void)kShortMemCopy; + assert(size <= 64); + assert(std::less_equal()(static_cast(src) + size, + dst) || + std::less_equal()(dst + size, src)); + + // We know that src and dst are at least size bytes apart. However, because we + // might copy more than size bytes the copy still might overlap past size. + // E.g. if src and dst appear consecutively in memory (src + size >= dst). + // TODO: Investigate wider copies on other platforms. +#if defined(__x86_64__) && defined(__AVX__) + assert(kShortMemCopy <= 32); + __m256i data = _mm256_lddqu_si256(static_cast(src)); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), data); + // Profiling shows that nearly all copies are short. + if (SNAPPY_PREDICT_FALSE(size > kShortMemCopy)) { + data = _mm256_lddqu_si256(static_cast(src) + 1); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst) + 1, data); + } + // RVV acceleration available on RISC-V when compiled with -march=rv64gcv +#elif defined(__riscv) && SNAPPY_HAVE_RVV + // Cast pointers to the type we will operate on. + unsigned char* dst_ptr = reinterpret_cast(dst); + const unsigned char* src_ptr = reinterpret_cast(src); + size_t remaining_bytes = size; + // Loop as long as there are bytes remaining to be copied. + while (remaining_bytes > 0) { + // Set vector configuration: e8 (8-bit elements), m2 (LMUL=2). + // Use e8m2 configuration to maximize throughput. + size_t vl = VSETVL_E8M2(remaining_bytes); + // Load data from the current source pointer. + vuint8m2_t vec = VLE8_V_U8M2(src_ptr, vl); + // Store data to the current destination pointer. + VSE8_V_U8M2(dst_ptr, vec, vl); + // Update pointers and the remaining count. + src_ptr += vl; + dst_ptr += vl; + remaining_bytes -= vl; + } + +#else + std::memmove(dst, src, kShortMemCopy); + // Profiling shows that nearly all copies are short. + if (SNAPPY_PREDICT_FALSE(size > kShortMemCopy)) { + std::memmove(dst + kShortMemCopy, + static_cast(src) + kShortMemCopy, + 64 - kShortMemCopy); + } +#endif +} + +void MemCopy64(ptrdiff_t dst, const void* src, size_t size) { + // TODO: Switch to [[maybe_unused]] when we can assume C++17. + (void)dst; + (void)src; + (void)size; +} + +void ClearDeferred(const void** deferred_src, size_t* deferred_length, + uint8_t* safe_source) { + *deferred_src = safe_source; + *deferred_length = 0; +} + +void DeferMemCopy(const void** deferred_src, size_t* deferred_length, + const void* src, size_t length) { + *deferred_src = src; + *deferred_length = length; +} -// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits -static const uint32 wordmask[] = { - 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu +SNAPPY_ATTRIBUTE_ALWAYS_INLINE +inline size_t AdvanceToNextTagARMOptimized(const uint8_t** ip_p, size_t* tag) { + const uint8_t*& ip = *ip_p; + // This section is crucial for the throughput of the decompression loop. + // The latency of an iteration is fundamentally constrained by the + // following data chain on ip. + // ip -> c = Load(ip) -> delta1 = (c & 3) -> ip += delta1 or delta2 + // delta2 = ((c >> 2) + 1) ip++ + // This is different from X86 optimizations because ARM has conditional add + // instruction (csinc) and it removes several register moves. + const size_t tag_type = *tag & 3; + const bool is_literal = (tag_type == 0); + if (is_literal) { + size_t next_literal_tag = (*tag >> 2) + 1; + *tag = ip[next_literal_tag]; + ip += next_literal_tag + 1; + } else { + *tag = ip[tag_type]; + ip += tag_type + 1; + } + return tag_type; +} + +SNAPPY_ATTRIBUTE_ALWAYS_INLINE +inline size_t AdvanceToNextTagX86Optimized(const uint8_t** ip_p, size_t* tag) { + const uint8_t*& ip = *ip_p; + // This section is crucial for the throughput of the decompression loop. + // The latency of an iteration is fundamentally constrained by the + // following data chain on ip. + // ip -> c = Load(ip) -> ip1 = ip + 1 + (c & 3) -> ip = ip1 or ip2 + // ip2 = ip + 2 + (c >> 2) + // This amounts to 8 cycles. + // 5 (load) + 1 (c & 3) + 1 (lea ip1, [ip + (c & 3) + 1]) + 1 (cmov) + size_t literal_len = *tag >> 2; + size_t tag_type = *tag; + bool is_literal; +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) + // TODO clang misses the fact that the (c & 3) already correctly + // sets the zero flag. + asm("and $3, %k[tag_type]\n\t" + : [tag_type] "+r"(tag_type), "=@ccz"(is_literal) + :: "cc"); +#else + tag_type &= 3; + is_literal = (tag_type == 0); +#endif + // TODO + // This is code is subtle. Loading the values first and then cmov has less + // latency then cmov ip and then load. However clang would move the loads + // in an optimization phase, volatile prevents this transformation. + // Note that we have enough slop bytes (64) that the loads are always valid. + size_t tag_literal = + static_cast(ip)[1 + literal_len]; + size_t tag_copy = static_cast(ip)[tag_type]; + *tag = is_literal ? tag_literal : tag_copy; + const uint8_t* ip_copy = ip + 1 + tag_type; + const uint8_t* ip_literal = ip + 2 + literal_len; + ip = is_literal ? ip_literal : ip_copy; +#if defined(__GNUC__) && defined(__x86_64__) + // TODO Clang is "optimizing" zero-extension (a totally free + // operation) this means that after the cmov of tag, it emits another movzb + // tag, byte(tag). It really matters as it's on the core chain. This dummy + // asm, persuades clang to do the zero-extension at the load (it's automatic) + // removing the expensive movzb. + asm("" ::"r"(tag_copy)); +#endif + return tag_type; +} + +// Extract the offset for copy-1 and copy-2 returns 0 for literals or copy-4. +inline uint32_t ExtractOffset(uint32_t val, size_t tag_type) { + // For x86 non-static storage works better. For ARM static storage is better. + // TODO: Once the array is recognized as a register, improve the + // readability for x86. +#if defined(__x86_64__) + constexpr uint64_t kExtractMasksCombined = 0x0000FFFF00FF0000ull; + uint16_t result; + memcpy(&result, + reinterpret_cast(&kExtractMasksCombined) + 2 * tag_type, + sizeof(result)); + return val & result; +#elif defined(__aarch64__) + constexpr uint64_t kExtractMasksCombined = 0x0000FFFF00FF0000ull; + return val & static_cast( + (kExtractMasksCombined >> (tag_type * 16)) & 0xFFFF); +#else + static constexpr uint32_t kExtractMasks[4] = {0, 0xFF, 0xFFFF, 0}; + return val & kExtractMasks[tag_type]; +#endif }; -} // end namespace internal +// Core decompression loop, when there is enough data available. +// Decompresses the input buffer [ip, ip_limit) into the output buffer +// [op, op_limit_min_slop). Returning when either we are too close to the end +// of the input buffer, or we exceed op_limit_min_slop or when a exceptional +// tag is encountered (literal of length > 60) or a copy-4. +// Returns {ip, op} at the points it stopped decoding. +// TODO This function probably does not need to be inlined, as it +// should decode large chunks at a time. This allows runtime dispatch to +// implementations based on CPU capability (BMI2 / perhaps 32 / 64 byte memcpy). +template +std::pair DecompressBranchless( + const uint8_t* ip, const uint8_t* ip_limit, ptrdiff_t op, T op_base, + ptrdiff_t op_limit_min_slop) { + // If deferred_src is invalid point it here. + uint8_t safe_source[64]; + const void* deferred_src; + size_t deferred_length; + ClearDeferred(&deferred_src, &deferred_length, safe_source); + + // We unroll the inner loop twice so we need twice the spare room. + op_limit_min_slop -= kSlopBytes; + if (2 * (kSlopBytes + 1) < ip_limit - ip && op < op_limit_min_slop) { + const uint8_t* const ip_limit_min_slop = ip_limit - 2 * kSlopBytes - 1; + ip++; + // ip points just past the tag and we are touching at maximum kSlopBytes + // in an iteration. + size_t tag = ip[-1]; +#if defined(__clang__) && defined(__aarch64__) + // Workaround for https://bugs.llvm.org/show_bug.cgi?id=51317 + // when loading 1 byte, clang for aarch64 doesn't realize that it(ldrb) + // comes with free zero-extension, so clang generates another + // 'and xn, xm, 0xff' before it use that as the offset. This 'and' is + // redundant and can be removed by adding this dummy asm, which gives + // clang a hint that we're doing the zero-extension at the load. + asm("" ::"r"(tag)); +#endif + do { + // The throughput is limited by instructions, unrolling the inner loop + // twice reduces the amount of instructions checking limits and also + // leads to reduced mov's. + + SNAPPY_PREFETCH(ip + 128); + for (int i = 0; i < 2; i++) { + const uint8_t* old_ip = ip; + assert(tag == ip[-1]); + // For literals tag_type = 0, hence we will always obtain 0 from + // ExtractLowBytes. For literals offset will thus be kLiteralOffset. + ptrdiff_t len_minus_offset = kLengthMinusOffset[tag]; + uint32_t next; +#if defined(__aarch64__) + size_t tag_type = AdvanceToNextTagARMOptimized(&ip, &tag); + // We never need more than 16 bits. Doing a Load16 allows the compiler + // to elide the masking operation in ExtractOffset. + next = LittleEndian::Load16(old_ip); +#else + size_t tag_type = AdvanceToNextTagX86Optimized(&ip, &tag); + next = LittleEndian::Load32(old_ip); +#endif + size_t len = len_minus_offset & 0xFF; + ptrdiff_t extracted = ExtractOffset(next, tag_type); + ptrdiff_t len_min_offset = len_minus_offset - extracted; + if (SNAPPY_PREDICT_FALSE(len_minus_offset > extracted)) { + if (SNAPPY_PREDICT_FALSE(len & 0x80)) { + // Exceptional case (long literal or copy 4). + // Actually doing the copy here is negatively impacting the main + // loop due to compiler incorrectly allocating a register for + // this fallback. Hence we just break. + break_loop: + ip = old_ip; + goto exit; + } + // Only copy-1 or copy-2 tags can get here. + assert(tag_type == 1 || tag_type == 2); + std::ptrdiff_t delta = (op + deferred_length) + len_min_offset - len; + // Guard against copies before the buffer start. + // Execute any deferred MemCopy since we write to dst here. + MemCopy64(op_base + op, deferred_src, deferred_length); + op += deferred_length; + ClearDeferred(&deferred_src, &deferred_length, safe_source); + if (SNAPPY_PREDICT_FALSE(delta < 0 || + !Copy64BytesWithPatternExtension( + op_base + op, len - len_min_offset))) { + goto break_loop; + } + // We aren't deferring this copy so add length right away. + op += len; + continue; + } + std::ptrdiff_t delta = (op + deferred_length) + len_min_offset - len; + if (SNAPPY_PREDICT_FALSE(delta < 0)) { + // Due to the spurious offset in literals have this will trigger + // at the start of a block when op is still smaller than 256. + if (tag_type != 0) goto break_loop; + MemCopy64(op_base + op, deferred_src, deferred_length); + op += deferred_length; + DeferMemCopy(&deferred_src, &deferred_length, old_ip, len); + continue; + } + + // For copies we need to copy from op_base + delta, for literals + // we need to copy from ip instead of from the stream. + const void* from = + tag_type ? reinterpret_cast(op_base + delta) : old_ip; + MemCopy64(op_base + op, deferred_src, deferred_length); + op += deferred_length; + DeferMemCopy(&deferred_src, &deferred_length, from, len); + } + } while (ip < ip_limit_min_slop && + static_cast(op + deferred_length) < op_limit_min_slop); + exit: + ip--; + assert(ip <= ip_limit); + } + // If we deferred a copy then we can perform. If we are up to date then we + // might not have enough slop bytes and could run past the end. + if (deferred_length) { + MemCopy64(op_base + op, deferred_src, deferred_length); + op += deferred_length; + ClearDeferred(&deferred_src, &deferred_length, safe_source); + } + return {ip, op}; +} // Helper class for decompression class SnappyDecompressor { private: - Source* reader_; // Underlying source of bytes to decompress - const char* ip_; // Points to next buffered byte - const char* ip_limit_; // Points just past buffered bytes - uint32 peeked_; // Bytes peeked from reader (need to skip) - bool eof_; // Hit end of input without an error? - char scratch_[kMaximumTagLength]; // See RefillTag(). + Source* reader_; // Underlying source of bytes to decompress + const char* ip_; // Points to next buffered byte + const char* ip_limit_; // Points just past buffered bytes + // If ip < ip_limit_min_maxtaglen_ it's safe to read kMaxTagLength from + // buffer. + const char* ip_limit_min_maxtaglen_; + uint64_t peeked_; // Bytes peeked from reader (need to skip) + bool eof_; // Hit end of input without an error? + char scratch_[kMaximumTagLength]; // See RefillTag(). // Ensure that all of the tag metadata for the next tag is available // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even @@ -609,14 +1528,14 @@ class SnappyDecompressor { // Returns true on success, false on error or end of input. bool RefillTag(); + void ResetLimit(const char* ip) { + ip_limit_min_maxtaglen_ = + ip_limit_ - std::min(ip_limit_ - ip, kMaximumTagLength - 1); + } + public: explicit SnappyDecompressor(Source* reader) - : reader_(reader), - ip_(NULL), - ip_limit_(NULL), - peeked_(0), - eof_(false) { - } + : reader_(reader), ip_(NULL), ip_limit_(NULL), peeked_(0), eof_(false) {} ~SnappyDecompressor() { // Advance past any bytes we peeked at from the reader @@ -624,18 +1543,16 @@ class SnappyDecompressor { } // Returns true iff we have hit the end of the input without an error. - bool eof() const { - return eof_; - } + bool eof() const { return eof_; } // Read the uncompressed length stored at the start of the compressed data. - // On succcess, stores the length in *result and returns true. + // On success, stores the length in *result and returns true. // On failure, returns false. - bool ReadUncompressedLength(uint32* result) { - assert(ip_ == NULL); // Must not have read anything yet + bool ReadUncompressedLength(uint32_t* result) { + assert(ip_ == NULL); // Must not have read anything yet // Length is encoded in 1..5 bytes *result = 0; - uint32 shift = 0; + uint32_t shift = 0; while (true) { if (shift >= 32) return false; size_t n; @@ -643,8 +1560,8 @@ class SnappyDecompressor { if (n == 0) return false; const unsigned char c = *(reinterpret_cast(ip)); reader_->Skip(1); - uint32 val = c & 0x7f; - if (((val << shift) >> shift) != val) return false; + uint32_t val = c & 0x7f; + if (LeftShiftOverflows(static_cast(val), shift)) return false; *result |= val << shift; if (c < 128) { break; @@ -657,36 +1574,47 @@ class SnappyDecompressor { // Process the next item found in the input. // Returns true if successful, false on error or end of input. template - void DecompressAllTags(Writer* writer) { +#if defined(__GNUC__) && defined(__x86_64__) + __attribute__((aligned(32))) +#endif + void + DecompressAllTags(Writer* writer) { const char* ip = ip_; - // For position-independent executables, accessing global arrays can be - // slow. Move wordmask array onto the stack to mitigate this. - uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)]; - // Do not use memcpy to copy internal::wordmask to - // wordmask. LLVM converts stack arrays to global arrays if it detects - // const stack arrays and this hurts the performance of position - // independent code. This change is temporary and can be reverted when - // https://reviews.llvm.org/D30759 is approved. - wordmask[0] = internal::wordmask[0]; - wordmask[1] = internal::wordmask[1]; - wordmask[2] = internal::wordmask[2]; - wordmask[3] = internal::wordmask[3]; - wordmask[4] = internal::wordmask[4]; - + ResetLimit(ip); + auto op = writer->GetOutputPtr(); // We could have put this refill fragment only at the beginning of the loop. // However, duplicating it at the end of each branch gives the compiler more // scope to optimize the expression based on the local // context, which overall increases speed. - #define MAYBE_REFILL() \ - if (ip_limit_ - ip < kMaximumTagLength) { \ - ip_ = ip; \ - if (!RefillTag()) return; \ - ip = ip_; \ - } - +#define MAYBE_REFILL() \ + if (SNAPPY_PREDICT_FALSE(ip >= ip_limit_min_maxtaglen_)) { \ + ip_ = ip; \ + if (SNAPPY_PREDICT_FALSE(!RefillTag())) goto exit; \ + ip = ip_; \ + ResetLimit(ip); \ + } \ + preload = static_cast(*ip) + + // At the start of the for loop below the least significant byte of preload + // contains the tag. + uint32_t preload; MAYBE_REFILL(); - for ( ;; ) { - const unsigned char c = *(reinterpret_cast(ip++)); + for (;;) { + { + ptrdiff_t op_limit_min_slop; + auto op_base = writer->GetBase(&op_limit_min_slop); + if (op_base) { + auto res = + DecompressBranchless(reinterpret_cast(ip), + reinterpret_cast(ip_limit_), + op - op_base, op_base, op_limit_min_slop); + ip = reinterpret_cast(res.first); + op = op_base + res.second; + MAYBE_REFILL(); + } + } + const uint8_t c = static_cast(preload); + ip++; // Ratio of iterations that have LITERAL vs non-LITERAL for different // inputs. @@ -700,67 +1628,102 @@ class SnappyDecompressor { // txt[1-4] 25% 75% // pb 24% 76% // bin 24% 76% - if (PREDICT_FALSE((c & 0x3) == LITERAL)) { + if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) { size_t literal_length = (c >> 2) + 1u; - if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { + if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length, &op)) { assert(literal_length < 61); ip += literal_length; - // NOTE(user): There is no MAYBE_REFILL() here, as TryFastAppend() + // NOTE: There is no MAYBE_REFILL() here, as TryFastAppend() // will not return true unless there's already at least five spare // bytes in addition to the literal. + preload = static_cast(*ip); continue; } - if (PREDICT_FALSE(literal_length >= 61)) { + if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) { // Long literal. const size_t literal_length_length = literal_length - 60; literal_length = - (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1; + ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) + + 1; ip += literal_length_length; } size_t avail = ip_limit_ - ip; while (avail < literal_length) { - if (!writer->Append(ip, avail)) return; + if (!writer->Append(ip, avail, &op)) goto exit; literal_length -= avail; reader_->Skip(peeked_); size_t n; ip = reader_->Peek(&n); avail = n; peeked_ = avail; - if (avail == 0) return; // Premature end of input + if (avail == 0) goto exit; ip_limit_ = ip + avail; + ResetLimit(ip); } - if (!writer->Append(ip, literal_length)) { - return; - } + if (!writer->Append(ip, literal_length, &op)) goto exit; ip += literal_length; MAYBE_REFILL(); } else { - const size_t entry = char_table[c]; - const size_t trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11]; - const size_t length = entry & 0xff; - ip += entry >> 11; - - // copy_offset/256 is encoded in bits 8..10. By just fetching - // those bits, we get copy_offset (since the bit-field starts at - // bit 8). - const size_t copy_offset = entry & 0x700; - if (!writer->AppendFromSelf(copy_offset + trailer, length)) { - return; + if (SNAPPY_PREDICT_FALSE((c & 3) == COPY_4_BYTE_OFFSET)) { + const size_t copy_offset = LittleEndian::Load32(ip); + const size_t length = (c >> 2) + 1; + ip += 4; + + if (!writer->AppendFromSelf(copy_offset, length, &op)) goto exit; + } else { + const ptrdiff_t entry = kLengthMinusOffset[c]; + preload = LittleEndian::Load32(ip); + const uint32_t trailer = ExtractLowBytes(preload, c & 3); + const uint32_t length = entry & 0xff; + assert(length > 0); + + // copy_offset/256 is encoded in bits 8..10. By just fetching + // those bits, we get copy_offset (since the bit-field starts at + // bit 8). + const uint32_t copy_offset = trailer - entry + length; + if (!writer->AppendFromSelf(copy_offset, length, &op)) goto exit; + + ip += (c & 3); + // By using the result of the previous load we reduce the critical + // dependency chain of ip to 4 cycles. + preload >>= (c & 3) * 8; + if (ip < ip_limit_min_maxtaglen_) continue; } MAYBE_REFILL(); } } - #undef MAYBE_REFILL + exit: + writer->SetOutputPtr(op); } }; +constexpr uint32_t CalculateNeeded(uint8_t tag) { + return ((tag & 3) == 0 && tag >= (60 * 4)) + ? (tag >> 2) - 58 + : (0x05030201 >> ((tag * 8) & 31)) & 0xFF; +} + +#if __cplusplus >= 201402L +constexpr bool VerifyCalculateNeeded() { + for (int i = 0; i < 1; i++) { + if (CalculateNeeded(i) != static_cast((char_table[i] >> 11)) + 1) + return false; + } + return true; +} + +// Make sure CalculateNeeded is correct by verifying it against the established +// table encoding the number of added bytes needed. +static_assert(VerifyCalculateNeeded(), ""); +#endif // c++14 + bool SnappyDecompressor::RefillTag() { const char* ip = ip_; if (ip == ip_limit_) { // Fetch a new fragment from the reader - reader_->Skip(peeked_); // All peeked bytes are used up + reader_->Skip(peeked_); // All peeked bytes are used up size_t n; ip = reader_->Peek(&n); peeked_ = n; @@ -772,26 +1735,31 @@ bool SnappyDecompressor::RefillTag() { // Read the tag character assert(ip < ip_limit_); const unsigned char c = *(reinterpret_cast(ip)); - const uint32 entry = char_table[c]; - const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c' + // At this point make sure that the data for the next tag is consecutive. + // For copy 1 this means the next 2 bytes (tag and 1 byte offset) + // For copy 2 the next 3 bytes (tag and 2 byte offset) + // For copy 4 the next 5 bytes (tag and 4 byte offset) + // For all small literals we only need 1 byte buf for literals 60...63 the + // length is encoded in 1...4 extra bytes. + const uint32_t needed = CalculateNeeded(c); assert(needed <= sizeof(scratch_)); // Read more bytes from reader if needed - uint32 nbuf = ip_limit_ - ip; + uint64_t nbuf = ip_limit_ - ip; if (nbuf < needed) { // Stitch together bytes from ip and reader to form the word // contents. We store the needed bytes in "scratch_". They // will be consumed immediately by the caller since we do not // read more than we need. - memmove(scratch_, ip, nbuf); + std::memmove(scratch_, ip, nbuf); reader_->Skip(peeked_); // All peeked bytes are used up peeked_ = 0; while (nbuf < needed) { size_t length; const char* src = reader_->Peek(&length); if (length == 0) return false; - uint32 to_add = std::min(needed - nbuf, length); - memcpy(scratch_ + nbuf, src, to_add); + uint64_t to_add = std::min(needed - nbuf, length); + std::memcpy(scratch_ + nbuf, src, to_add); nbuf += to_add; reader_->Skip(to_add); } @@ -801,7 +1769,7 @@ bool SnappyDecompressor::RefillTag() { } else if (nbuf < kMaximumTagLength) { // Have enough bytes, but move into scratch_ so that we do not // read past end of input - memmove(scratch_, ip, nbuf); + std::memmove(scratch_, ip, nbuf); reader_->Skip(peeked_); // All peeked bytes are used up peeked_ = 0; ip_ = scratch_; @@ -817,7 +1785,7 @@ template static bool InternalUncompress(Source* r, Writer* writer) { // Read the uncompressed length from the front of the compressed input SnappyDecompressor decompressor(r); - uint32 uncompressed_len = 0; + uint32_t uncompressed_len = 0; if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; return InternalUncompressAllTags(&decompressor, writer, r->Available(), @@ -826,10 +1794,10 @@ static bool InternalUncompress(Source* r, Writer* writer) { template static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, - Writer* writer, - uint32 compressed_len, - uint32 uncompressed_len) { - Report("snappy_uncompress", compressed_len, uncompressed_len); + Writer* writer, uint32_t compressed_len, + uint32_t uncompressed_len) { + int token = 0; + Report(token, "snappy_uncompress", compressed_len, uncompressed_len); writer->SetExpectedLength(uncompressed_len); @@ -839,23 +1807,28 @@ static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, return (decompressor->eof() && writer->CheckLength()); } -bool GetUncompressedLength(Source* source, uint32* result) { +bool GetUncompressedLength(Source* source, uint32_t* result) { SnappyDecompressor decompressor(source); return decompressor.ReadUncompressedLength(result); } size_t Compress(Source* reader, Sink* writer) { + return Compress(reader, writer, CompressionOptions{}); +} + +size_t Compress(Source* reader, Sink* writer, CompressionOptions options) { + assert(options.level == 1 || options.level == 2); + int token = 0; size_t written = 0; size_t N = reader->Available(); + assert(N <= 0xFFFFFFFFu); const size_t uncompressed_size = N; char ulength[Varint::kMax32]; char* p = Varint::Encode32(ulength, N); - writer->Append(ulength, p-ulength); + writer->Append(ulength, p - ulength); written += (p - ulength); - internal::WorkingMemory wmem; - char* scratch = NULL; - char* scratch_output = NULL; + internal::WorkingMemory wmem(N); while (N > 0) { // Get next block to compress (without copying if possible) @@ -871,20 +1844,14 @@ size_t Compress(Source* reader, Sink* writer) { pending_advance = num_to_read; fragment_size = num_to_read; } else { - // Read into scratch buffer - if (scratch == NULL) { - // If this is the last iteration, we want to allocate N bytes - // of space, otherwise the max possible kBlockSize space. - // num_to_read contains exactly the correct value - scratch = new char[num_to_read]; - } - memcpy(scratch, fragment, bytes_read); + char* scratch = wmem.GetScratchInput(); + std::memcpy(scratch, fragment, bytes_read); reader->Skip(bytes_read); while (bytes_read < num_to_read) { fragment = reader->Peek(&fragment_size); size_t n = std::min(fragment_size, num_to_read - bytes_read); - memcpy(scratch + bytes_read, fragment, n); + std::memcpy(scratch + bytes_read, fragment, n); bytes_read += n; reader->Skip(n); } @@ -896,23 +1863,26 @@ size_t Compress(Source* reader, Sink* writer) { // Get encoding table for compression int table_size; - uint16* table = wmem.GetHashTable(num_to_read, &table_size); + uint16_t* table = wmem.GetHashTable(num_to_read, &table_size); // Compress input_fragment and append to dest - const int max_output = MaxCompressedLength(num_to_read); + int max_output = MaxCompressedLength(num_to_read); + // Since we encode kBlockSize regions followed by a region + // which is <= kBlockSize in length, a previously allocated + // scratch_output[] region is big enough for this iteration. // Need a scratch buffer for the output, in case the byte sink doesn't // have room for us directly. - if (scratch_output == NULL) { - scratch_output = new char[max_output]; - } else { - // Since we encode kBlockSize regions followed by a region - // which is <= kBlockSize in length, a previously allocated - // scratch_output[] region is big enough for this iteration. + char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput()); + char* end = nullptr; + if (options.level == 1) { + end = internal::CompressFragment(fragment, fragment_size, dest, table, + table_size); + } else if (options.level == 2) { + end = internal::CompressFragmentDoubleHash( + fragment, fragment_size, dest, table, table_size >> 1, + table + (table_size >> 1), table_size >> 1); } - char* dest = writer->GetAppendBuffer(max_output, scratch_output); - char* end = internal::CompressFragment(fragment, fragment_size, - dest, table, table_size); writer->Append(dest, end - dest); written += (end - dest); @@ -920,11 +1890,7 @@ size_t Compress(Source* reader, Sink* writer) { reader->Skip(pending_advance); } - Report("snappy_compress", written, uncompressed_size); - - delete[] scratch; - delete[] scratch_output; - + Report(token, "snappy_compress", written, uncompressed_size); return written; } @@ -932,19 +1898,88 @@ size_t Compress(Source* reader, Sink* writer) { // IOVec interfaces // ----------------------------------------------------------------------- +// A `Source` implementation that yields the contents of an `iovec` array. Note +// that `total_size` is the total number of bytes to be read from the elements +// of `iov` (_not_ the total number of elements in `iov`). +class SnappyIOVecReader : public Source { + public: + SnappyIOVecReader(const struct iovec* iov, size_t total_size) + : curr_iov_(iov), + curr_pos_(total_size > 0 ? reinterpret_cast(iov->iov_base) + : nullptr), + curr_size_remaining_(total_size > 0 ? iov->iov_len : 0), + total_size_remaining_(total_size) { + // Skip empty leading `iovec`s. + if (total_size > 0 && curr_size_remaining_ == 0) Advance(); + } + + ~SnappyIOVecReader() override = default; + + size_t Available() const override { return total_size_remaining_; } + + const char* Peek(size_t* len) override { + *len = curr_size_remaining_; + return curr_pos_; + } + + void Skip(size_t n) override { + while (n >= curr_size_remaining_ && n > 0) { + n -= curr_size_remaining_; + Advance(); + } + curr_size_remaining_ -= n; + total_size_remaining_ -= n; + curr_pos_ += n; + } + + private: + // Advances to the next nonempty `iovec` and updates related variables. + void Advance() { + do { + assert(total_size_remaining_ >= curr_size_remaining_); + total_size_remaining_ -= curr_size_remaining_; + if (total_size_remaining_ == 0) { + curr_pos_ = nullptr; + curr_size_remaining_ = 0; + return; + } + ++curr_iov_; + curr_pos_ = reinterpret_cast(curr_iov_->iov_base); + curr_size_remaining_ = curr_iov_->iov_len; + } while (curr_size_remaining_ == 0); + } + + // The `iovec` currently being read. + const struct iovec* curr_iov_; + // The location in `curr_iov_` currently being read. + const char* curr_pos_; + // The amount of unread data in `curr_iov_`. + size_t curr_size_remaining_; + // The amount of unread data in the entire input array. + size_t total_size_remaining_; +}; + // A type that writes to an iovec. // Note that this is not a "ByteSink", but a type that matches the // Writer template argument to SnappyDecompressor::DecompressAllTags(). class SnappyIOVecWriter { private: + // output_iov_end_ is set to iov + count and used to determine when + // the end of the iovs is reached. + const struct iovec* output_iov_end_; + +#if !defined(NDEBUG) const struct iovec* output_iov_; - const size_t output_iov_count_; +#endif // !defined(NDEBUG) + + // Current iov that is being written into. + const struct iovec* curr_iov_; - // We are currently writing into output_iov_[curr_iov_index_]. - size_t curr_iov_index_; + // Pointer to current iov's write location. + char* curr_iov_output_; - // Bytes written to output_iov_[curr_iov_index_] so far. - size_t curr_iov_written_; + // Remaining bytes to write into curr_iov_output. + size_t curr_iov_remaining_; // Total bytes decompressed into output_iov_ so far. size_t total_written_; @@ -952,53 +1987,61 @@ class SnappyIOVecWriter { // Maximum number of bytes that will be decompressed into output_iov_. size_t output_limit_; - inline char* GetIOVecPointer(size_t index, size_t offset) { - return reinterpret_cast(output_iov_[index].iov_base) + - offset; + static inline char* GetIOVecPointer(const struct iovec* iov, size_t offset) { + return reinterpret_cast(iov->iov_base) + offset; } public: // Does not take ownership of iov. iov must be valid during the // entire lifetime of the SnappyIOVecWriter. inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count) - : output_iov_(iov), - output_iov_count_(iov_count), - curr_iov_index_(0), - curr_iov_written_(0), + : output_iov_end_(iov + iov_count), +#if !defined(NDEBUG) + output_iov_(iov), +#endif // !defined(NDEBUG) + curr_iov_(iov), + curr_iov_output_(iov_count ? reinterpret_cast(iov->iov_base) + : nullptr), + curr_iov_remaining_(iov_count ? iov->iov_len : 0), total_written_(0), output_limit_(-1) { } - inline void SetExpectedLength(size_t len) { - output_limit_ = len; - } + inline void SetExpectedLength(size_t len) { output_limit_ = len; } - inline bool CheckLength() const { - return total_written_ == output_limit_; - } + inline bool CheckLength() const { return total_written_ == output_limit_; } - inline bool Append(const char* ip, size_t len) { + inline bool Append(const char* ip, size_t len, char**) { if (total_written_ + len > output_limit_) { return false; } + return AppendNoCheck(ip, len); + } + + char* GetOutputPtr() { return nullptr; } + char* GetBase(ptrdiff_t*) { return nullptr; } + void SetOutputPtr(char* op) { + // TODO: Switch to [[maybe_unused]] when we can assume C++17. + (void)op; + } + + inline bool AppendNoCheck(const char* ip, size_t len) { while (len > 0) { - assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len); - if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) { + if (curr_iov_remaining_ == 0) { // This iovec is full. Go to the next one. - if (curr_iov_index_ + 1 >= output_iov_count_) { + if (curr_iov_ + 1 >= output_iov_end_) { return false; } - curr_iov_written_ = 0; - ++curr_iov_index_; + ++curr_iov_; + curr_iov_output_ = reinterpret_cast(curr_iov_->iov_base); + curr_iov_remaining_ = curr_iov_->iov_len; } - const size_t to_write = std::min( - len, output_iov_[curr_iov_index_].iov_len - curr_iov_written_); - memcpy(GetIOVecPointer(curr_iov_index_, curr_iov_written_), - ip, - to_write); - curr_iov_written_ += to_write; + const size_t to_write = std::min(len, curr_iov_remaining_); + std::memcpy(curr_iov_output_, ip, to_write); + curr_iov_output_ += to_write; + curr_iov_remaining_ -= to_write; total_written_ += to_write; ip += to_write; len -= to_write; @@ -1007,14 +2050,15 @@ class SnappyIOVecWriter { return true; } - inline bool TryFastAppend(const char* ip, size_t available, size_t len) { + inline bool TryFastAppend(const char* ip, size_t available, size_t len, + char**) { const size_t space_left = output_limit_ - total_written_; if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 && - output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) { + curr_iov_remaining_ >= 16) { // Fast path, used for the majority (about 95%) of invocations. - char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_); - UnalignedCopy128(ip, ptr); - curr_iov_written_ += len; + UnalignedCopy128(ip, curr_iov_output_); + curr_iov_output_ += len; + curr_iov_remaining_ -= len; total_written_ += len; return true; } @@ -1022,8 +2066,10 @@ class SnappyIOVecWriter { return false; } - inline bool AppendFromSelf(size_t offset, size_t len) { - if (offset > total_written_ || offset == 0) { + inline bool AppendFromSelf(size_t offset, size_t len, char**) { + // See SnappyArrayWriter::AppendFromSelf for an explanation of + // the "offset - 1u" trick. + if (offset - 1u >= total_written_) { return false; } const size_t space_left = output_limit_ - total_written_; @@ -1032,8 +2078,8 @@ class SnappyIOVecWriter { } // Locate the iovec from which we need to start the copy. - size_t from_iov_index = curr_iov_index_; - size_t from_iov_offset = curr_iov_written_; + const iovec* from_iov = curr_iov_; + size_t from_iov_offset = curr_iov_->iov_len - curr_iov_remaining_; while (offset > 0) { if (from_iov_offset >= offset) { from_iov_offset -= offset; @@ -1041,47 +2087,48 @@ class SnappyIOVecWriter { } offset -= from_iov_offset; - assert(from_iov_index > 0); - --from_iov_index; - from_iov_offset = output_iov_[from_iov_index].iov_len; + --from_iov; +#if !defined(NDEBUG) + assert(from_iov >= output_iov_); +#endif // !defined(NDEBUG) + from_iov_offset = from_iov->iov_len; } // Copy bytes starting from the iovec pointed to by from_iov_index to // the current iovec. while (len > 0) { - assert(from_iov_index <= curr_iov_index_); - if (from_iov_index != curr_iov_index_) { - const size_t to_copy = std::min( - output_iov_[from_iov_index].iov_len - from_iov_offset, - len); - Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy); + assert(from_iov <= curr_iov_); + if (from_iov != curr_iov_) { + const size_t to_copy = + std::min(from_iov->iov_len - from_iov_offset, len); + AppendNoCheck(GetIOVecPointer(from_iov, from_iov_offset), to_copy); len -= to_copy; if (len > 0) { - ++from_iov_index; + ++from_iov; from_iov_offset = 0; } } else { - assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len); - size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len - - curr_iov_written_, - len); + size_t to_copy = curr_iov_remaining_; if (to_copy == 0) { // This iovec is full. Go to the next one. - if (curr_iov_index_ + 1 >= output_iov_count_) { + if (curr_iov_ + 1 >= output_iov_end_) { return false; } - ++curr_iov_index_; - curr_iov_written_ = 0; + ++curr_iov_; + curr_iov_output_ = reinterpret_cast(curr_iov_->iov_base); + curr_iov_remaining_ = curr_iov_->iov_len; continue; } if (to_copy > len) { to_copy = len; } - IncrementalCopySlow( - GetIOVecPointer(from_iov_index, from_iov_offset), - GetIOVecPointer(curr_iov_index_, curr_iov_written_), - GetIOVecPointer(curr_iov_index_, curr_iov_written_) + to_copy); - curr_iov_written_ += to_copy; + assert(to_copy > 0); + + IncrementalCopy(GetIOVecPointer(from_iov, from_iov_offset), + curr_iov_output_, curr_iov_output_ + to_copy, + curr_iov_output_ + curr_iov_remaining_); + curr_iov_output_ += to_copy; + curr_iov_remaining_ -= to_copy; from_iov_offset += to_copy; total_written_ += to_copy; len -= to_copy; @@ -1118,59 +2165,74 @@ class SnappyArrayWriter { char* base_; char* op_; char* op_limit_; + // If op < op_limit_min_slop_ then it's safe to unconditionally write + // kSlopBytes starting at op. + char* op_limit_min_slop_; public: inline explicit SnappyArrayWriter(char* dst) : base_(dst), op_(dst), - op_limit_(dst) { - } + op_limit_(dst), + op_limit_min_slop_(dst) {} // Safe default see invariant. inline void SetExpectedLength(size_t len) { op_limit_ = op_ + len; + // Prevent pointer from being past the buffer. + op_limit_min_slop_ = op_limit_ - std::min(kSlopBytes - 1, len); } - inline bool CheckLength() const { - return op_ == op_limit_; + inline bool CheckLength() const { return op_ == op_limit_; } + + char* GetOutputPtr() { return op_; } + char* GetBase(ptrdiff_t* op_limit_min_slop) { + *op_limit_min_slop = op_limit_min_slop_ - base_; + return base_; } + void SetOutputPtr(char* op) { op_ = op; } - inline bool Append(const char* ip, size_t len) { - char* op = op_; + inline bool Append(const char* ip, size_t len, char** op_p) { + char* op = *op_p; const size_t space_left = op_limit_ - op; - if (space_left < len) { - return false; - } - memcpy(op, ip, len); - op_ = op + len; + if (space_left < len) return false; + std::memcpy(op, ip, len); + *op_p = op + len; return true; } - inline bool TryFastAppend(const char* ip, size_t available, size_t len) { - char* op = op_; + inline bool TryFastAppend(const char* ip, size_t available, size_t len, + char** op_p) { + char* op = *op_p; const size_t space_left = op_limit_ - op; if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) { // Fast path, used for the majority (about 95%) of invocations. UnalignedCopy128(ip, op); - op_ = op + len; + *op_p = op + len; return true; } else { return false; } } - inline bool AppendFromSelf(size_t offset, size_t len) { - char* const op_end = op_ + len; + SNAPPY_ATTRIBUTE_ALWAYS_INLINE + inline bool AppendFromSelf(size_t offset, size_t len, char** op_p) { + assert(len > 0); + char* const op = *op_p; + assert(op >= base_); + char* const op_end = op + len; // Check if we try to append from before the start of the buffer. - // Normally this would just be a check for "produced < offset", - // but "produced <= offset - 1u" is equivalent for every case - // except the one where offset==0, where the right side will wrap around - // to a very big number. This is convenient, as offset==0 is another - // invalid case that we also want to catch, so that we do not go - // into an infinite loop. - if (Produced() <= offset - 1u || op_end > op_limit_) return false; - op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_); + if (SNAPPY_PREDICT_FALSE(static_cast(op - base_) < offset)) + return false; + if (SNAPPY_PREDICT_FALSE((kSlopBytes < 64 && len > kSlopBytes) || + op >= op_limit_min_slop_ || offset < len)) { + if (op_end > op_limit_ || offset == 0) return false; + *op_p = IncrementalCopy(op - offset, op, op_end, op_limit_); + return true; + } + std::memmove(op, op - offset, kSlopBytes); + *op_p = op_end; return true; } inline size_t Produced() const { @@ -1180,8 +2242,9 @@ class SnappyArrayWriter { inline void Flush() {} }; -bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { - ByteArraySource reader(compressed, n); +bool RawUncompress(const char* compressed, size_t compressed_length, + char* uncompressed) { + ByteArraySource reader(compressed, compressed_length); return RawUncompress(&reader, uncompressed); } @@ -1190,9 +2253,10 @@ bool RawUncompress(Source* compressed, char* uncompressed) { return InternalUncompress(compressed, &output); } -bool Uncompress(const char* compressed, size_t n, string* uncompressed) { +bool Uncompress(const char* compressed, size_t compressed_length, + std::string* uncompressed) { size_t ulength; - if (!GetUncompressedLength(compressed, n, &ulength)) { + if (!GetUncompressedLength(compressed, compressed_length, &ulength)) { return false; } // On 32-bit builds: max_size() < kuint32max. Check for that instead @@ -1201,7 +2265,8 @@ bool Uncompress(const char* compressed, size_t n, string* uncompressed) { return false; } STLStringResizeUninitialized(uncompressed, ulength); - return RawUncompress(compressed, n, string_as_array(uncompressed)); + return RawUncompress(compressed, compressed_length, + string_as_array(uncompressed)); } // A Writer that drops everything on the floor and just does validation @@ -1211,32 +2276,44 @@ class SnappyDecompressionValidator { size_t produced_; public: - inline SnappyDecompressionValidator() : expected_(0), produced_(0) { } - inline void SetExpectedLength(size_t len) { - expected_ = len; - } - inline bool CheckLength() const { - return expected_ == produced_; + inline SnappyDecompressionValidator() : expected_(0), produced_(0) {} + inline void SetExpectedLength(size_t len) { expected_ = len; } + size_t GetOutputPtr() { return produced_; } + size_t GetBase(ptrdiff_t* op_limit_min_slop) { + *op_limit_min_slop = std::numeric_limits::max() - kSlopBytes + 1; + return 1; } - inline bool Append(const char* ip, size_t len) { - produced_ += len; - return produced_ <= expected_; + void SetOutputPtr(size_t op) { produced_ = op; } + inline bool CheckLength() const { return expected_ == produced_; } + inline bool Append(const char* ip, size_t len, size_t* produced) { + // TODO: Switch to [[maybe_unused]] when we can assume C++17. + (void)ip; + + *produced += len; + return *produced <= expected_; } - inline bool TryFastAppend(const char* ip, size_t available, size_t length) { + inline bool TryFastAppend(const char* ip, size_t available, size_t length, + size_t* produced) { + // TODO: Switch to [[maybe_unused]] when we can assume C++17. + (void)ip; + (void)available; + (void)length; + (void)produced; + return false; } - inline bool AppendFromSelf(size_t offset, size_t len) { + inline bool AppendFromSelf(size_t offset, size_t len, size_t* produced) { // See SnappyArrayWriter::AppendFromSelf for an explanation of // the "offset - 1u" trick. - if (produced_ <= offset - 1u) return false; - produced_ += len; - return produced_ <= expected_; + if (*produced <= offset - 1u) return false; + *produced += len; + return *produced <= expected_; } inline void Flush() {} }; -bool IsValidCompressedBuffer(const char* compressed, size_t n) { - ByteArraySource reader(compressed, n); +bool IsValidCompressedBuffer(const char* compressed, size_t compressed_length) { + ByteArraySource reader(compressed, compressed_length); SnappyDecompressionValidator writer; return InternalUncompress(&reader, &writer); } @@ -1246,26 +2323,77 @@ bool IsValidCompressed(Source* compressed) { return InternalUncompress(compressed, &writer); } -void RawCompress(const char* input, - size_t input_length, - char* compressed, +void RawCompress(const char* input, size_t input_length, char* compressed, size_t* compressed_length) { + RawCompress(input, input_length, compressed, compressed_length, + CompressionOptions{}); +} + +void RawCompress(const char* input, size_t input_length, char* compressed, + size_t* compressed_length, CompressionOptions options) { ByteArraySource reader(input, input_length); UncheckedByteArraySink writer(compressed); - Compress(&reader, &writer); + Compress(&reader, &writer, options); // Compute how many bytes were added *compressed_length = (writer.CurrentDestination() - compressed); } -size_t Compress(const char* input, size_t input_length, string* compressed) { +void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length, + char* compressed, size_t* compressed_length) { + RawCompressFromIOVec(iov, uncompressed_length, compressed, compressed_length, + CompressionOptions{}); +} + +void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length, + char* compressed, size_t* compressed_length, + CompressionOptions options) { + SnappyIOVecReader reader(iov, uncompressed_length); + UncheckedByteArraySink writer(compressed); + Compress(&reader, &writer, options); + + // Compute how many bytes were added. + *compressed_length = writer.CurrentDestination() - compressed; +} + +size_t Compress(const char* input, size_t input_length, + std::string* compressed) { + return Compress(input, input_length, compressed, CompressionOptions{}); +} + +size_t Compress(const char* input, size_t input_length, std::string* compressed, + CompressionOptions options) { // Pre-grow the buffer to the max length of the compressed output STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length)); size_t compressed_length; RawCompress(input, input_length, string_as_array(compressed), - &compressed_length); - compressed->resize(compressed_length); + &compressed_length, options); + compressed->erase(compressed_length); + return compressed_length; +} + +size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt, + std::string* compressed) { + return CompressFromIOVec(iov, iov_cnt, compressed, CompressionOptions{}); +} + +size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt, + std::string* compressed, CompressionOptions options) { + // Compute the number of bytes to be compressed. + size_t uncompressed_length = 0; + for (size_t i = 0; i < iov_cnt; ++i) { + uncompressed_length += iov[i].iov_len; + } + + // Pre-grow the buffer to the max length of the compressed output. + STLStringResizeUninitialized(compressed, MaxCompressedLength( + uncompressed_length)); + + size_t compressed_length; + RawCompressFromIOVec(iov, uncompressed_length, string_as_array(compressed), + &compressed_length, options); + compressed->erase(compressed_length); return compressed_length; } @@ -1290,13 +2418,14 @@ class SnappyScatteredWriter { size_t full_size_; // Pointer into current output block - char* op_base_; // Base of output block - char* op_ptr_; // Pointer to next unfilled byte in block - char* op_limit_; // Pointer just past block + char* op_base_; // Base of output block + char* op_ptr_; // Pointer to next unfilled byte in block + char* op_limit_; // Pointer just past block + // If op < op_limit_min_slop_ then it's safe to unconditionally write + // kSlopBytes starting at op. + char* op_limit_min_slop_; - inline size_t Size() const { - return full_size_ + (op_ptr_ - op_base_); - } + inline size_t Size() const { return full_size_ + (op_ptr_ - op_base_); } bool SlowAppend(const char* ip, size_t len); bool SlowAppendFromSelf(size_t offset, size_t len); @@ -1307,59 +2436,79 @@ class SnappyScatteredWriter { full_size_(0), op_base_(NULL), op_ptr_(NULL), - op_limit_(NULL) { + op_limit_(NULL), + op_limit_min_slop_(NULL) {} + char* GetOutputPtr() { return op_ptr_; } + char* GetBase(ptrdiff_t* op_limit_min_slop) { + *op_limit_min_slop = op_limit_min_slop_ - op_base_; + return op_base_; } + void SetOutputPtr(char* op) { op_ptr_ = op; } inline void SetExpectedLength(size_t len) { assert(blocks_.empty()); expected_ = len; } - inline bool CheckLength() const { - return Size() == expected_; - } + inline bool CheckLength() const { return Size() == expected_; } // Return the number of bytes actually uncompressed so far - inline size_t Produced() const { - return Size(); - } + inline size_t Produced() const { return Size(); } - inline bool Append(const char* ip, size_t len) { - size_t avail = op_limit_ - op_ptr_; + inline bool Append(const char* ip, size_t len, char** op_p) { + char* op = *op_p; + size_t avail = op_limit_ - op; if (len <= avail) { // Fast path - memcpy(op_ptr_, ip, len); - op_ptr_ += len; + std::memcpy(op, ip, len); + *op_p = op + len; return true; } else { - return SlowAppend(ip, len); + op_ptr_ = op; + bool res = SlowAppend(ip, len); + *op_p = op_ptr_; + return res; } } - inline bool TryFastAppend(const char* ip, size_t available, size_t length) { - char* op = op_ptr_; + inline bool TryFastAppend(const char* ip, size_t available, size_t length, + char** op_p) { + char* op = *op_p; const int space_left = op_limit_ - op; if (length <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) { // Fast path, used for the majority (about 95%) of invocations. UnalignedCopy128(ip, op); - op_ptr_ = op + length; + *op_p = op + length; return true; } else { return false; } } - inline bool AppendFromSelf(size_t offset, size_t len) { - char* const op_end = op_ptr_ + len; - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - if (PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ && op_end <= op_limit_)) { - // Fast path: src and dst in current block. - op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_); + inline bool AppendFromSelf(size_t offset, size_t len, char** op_p) { + char* op = *op_p; + assert(op >= op_base_); + // Check if we try to append from before the start of the buffer. + if (SNAPPY_PREDICT_FALSE((kSlopBytes < 64 && len > kSlopBytes) || + static_cast(op - op_base_) < offset || + op >= op_limit_min_slop_ || offset < len)) { + if (offset == 0) return false; + if (SNAPPY_PREDICT_FALSE(static_cast(op - op_base_) < offset || + op + len > op_limit_)) { + op_ptr_ = op; + bool res = SlowAppendFromSelf(offset, len); + *op_p = op_ptr_; + return res; + } + *op_p = IncrementalCopy(op - offset, op, op + len, op_limit_); return true; } - return SlowAppendFromSelf(offset, len); + // Fast path + char* const op_end = op + len; + std::memmove(op, op - offset, kSlopBytes); + *op_p = op_end; + return true; } // Called at the end of the decompress. We ask the allocator @@ -1367,12 +2516,12 @@ class SnappyScatteredWriter { inline void Flush() { allocator_.Flush(Produced()); } }; -template +template bool SnappyScatteredWriter::SlowAppend(const char* ip, size_t len) { size_t avail = op_limit_ - op_ptr_; while (len > avail) { // Completely fill this block - memcpy(op_ptr_, ip, avail); + std::memcpy(op_ptr_, ip, avail); op_ptr_ += avail; assert(op_limit_ - op_ptr_ == 0); full_size_ += (op_ptr_ - op_base_); @@ -1380,25 +2529,25 @@ bool SnappyScatteredWriter::SlowAppend(const char* ip, size_t len) { ip += avail; // Bounds check - if (full_size_ + len > expected_) { - return false; - } + if (full_size_ + len > expected_) return false; // Make new block size_t bsize = std::min(kBlockSize, expected_ - full_size_); op_base_ = allocator_.Allocate(bsize); op_ptr_ = op_base_; op_limit_ = op_base_ + bsize; + op_limit_min_slop_ = op_limit_ - std::min(kSlopBytes - 1, bsize); + blocks_.push_back(op_base_); avail = bsize; } - memcpy(op_ptr_, ip, len); + std::memcpy(op_ptr_, ip, len); op_ptr_ += len; return true; } -template +template bool SnappyScatteredWriter::SlowAppendFromSelf(size_t offset, size_t len) { // Overflow check @@ -1413,19 +2562,26 @@ bool SnappyScatteredWriter::SlowAppendFromSelf(size_t offset, // nice if we do not rely on that, since we can get better compression if we // allow cross-block copies and thus might want to change the compressor in // the future. + // TODO Replace this with a properly optimized path. This is not + // triggered right now. But this is so super slow, that it would regress + // performance unacceptably if triggered. size_t src = cur - offset; + char* op = op_ptr_; while (len-- > 0) { - char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)]; - Append(&c, 1); + char c = blocks_[src >> kBlockLog][src & (kBlockSize - 1)]; + if (!Append(&c, 1, &op)) { + op_ptr_ = op; + return false; + } src++; } + op_ptr_ = op; return true; } class SnappySinkAllocator { public: - explicit SnappySinkAllocator(Sink* dest): dest_(dest) {} - ~SnappySinkAllocator() {} + explicit SnappySinkAllocator(Sink* dest) : dest_(dest) {} char* Allocate(int size) { Datablock block(new char[size], size); @@ -1440,10 +2596,9 @@ class SnappySinkAllocator { // to the blocks. void Flush(size_t size) { size_t size_written = 0; - size_t block_size; - for (int i = 0; i < blocks_.size(); ++i) { - block_size = std::min(blocks_[i].size, size - size_written); - dest_->AppendAndTakeOwnership(blocks_[i].data, block_size, + for (Datablock& block : blocks_) { + size_t block_size = std::min(block.size, size - size_written); + dest_->AppendAndTakeOwnership(block.data, block_size, &SnappySinkAllocator::Deleter, NULL); size_written += block_size; } @@ -1458,6 +2613,10 @@ class SnappySinkAllocator { }; static void Deleter(void* arg, const char* bytes, size_t size) { + // TODO: Switch to [[maybe_unused]] when we can assume C++17. + (void)arg; + (void)size; + delete[] bytes; } @@ -1477,15 +2636,15 @@ size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) { bool Uncompress(Source* compressed, Sink* uncompressed) { // Read the uncompressed length from the front of the compressed input SnappyDecompressor decompressor(compressed); - uint32 uncompressed_len = 0; + uint32_t uncompressed_len = 0; if (!decompressor.ReadUncompressedLength(&uncompressed_len)) { return false; } char c; size_t allocated_size; - char* buf = uncompressed->GetAppendBufferVariable( - 1, uncompressed_len, &c, 1, &allocated_size); + char* buf = uncompressed->GetAppendBufferVariable(1, uncompressed_len, &c, 1, + &allocated_size); const size_t compressed_len = compressed->Available(); // If we can get a flat buffer, then use it, otherwise do block by block @@ -1504,4 +2663,4 @@ bool Uncompress(Source* compressed, Sink* uncompressed) { } } -} // end namespace snappy +} // namespace snappy diff --git a/snappy.h b/snappy.h index 4568db8..2f1b802 100644 --- a/snappy.h +++ b/snappy.h @@ -40,6 +40,8 @@ #define THIRD_PARTY_SNAPPY_SNAPPY_H__ #include +#include + #include #include "snappy-stubs-public.h" @@ -48,13 +50,38 @@ namespace snappy { class Source; class Sink; + struct CompressionOptions { + // Compression level. + // Level 1 is the fastest + // Level 2 is a little slower but provides better compression. Level 2 is + // **EXPERIMENTAL** for the time being. It might happen that we decide to + // fall back to level 1 in the future. + // Levels 3+ are currently not supported. We plan to support levels up to + // 9 in the future. + // If you played with other compression algorithms, level 1 is equivalent to + // fast mode (level 1) of LZ4, level 2 is equivalent to LZ4's level 2 mode + // and compresses somewhere around zstd:-3 and zstd:-2 but generally with + // faster decompression speeds than snappy:1 and zstd:-3. + int level = DefaultCompressionLevel(); + + constexpr CompressionOptions() = default; + constexpr CompressionOptions(int compression_level) + : level(compression_level) {} + static constexpr int MinCompressionLevel() { return 1; } + static constexpr int MaxCompressionLevel() { return 2; } + static constexpr int DefaultCompressionLevel() { return 1; } + }; + // ------------------------------------------------------------------------ // Generic compression/decompression routines. // ------------------------------------------------------------------------ - // Compress the bytes read from "*source" and append to "*sink". Return the + // Compress the bytes read from "*reader" and append to "*writer". Return the // number of bytes written. - size_t Compress(Source* source, Sink* sink); + // First version is to preserve ABI. + size_t Compress(Source* reader, Sink* writer); + size_t Compress(Source* reader, Sink* writer, + CompressionOptions options); // Find the uncompressed length of the given stream, as given by the header. // Note that the true length could deviate from this; the stream could e.g. @@ -63,26 +90,41 @@ namespace snappy { // Also note that this leaves "*source" in a state that is unsuitable for // further operations, such as RawUncompress(). You will need to rewind // or recreate the source yourself before attempting any further calls. - bool GetUncompressedLength(Source* source, uint32* result); + bool GetUncompressedLength(Source* source, uint32_t* result); // ------------------------------------------------------------------------ // Higher-level string based routines (should be sufficient for most users) // ------------------------------------------------------------------------ - // Sets "*output" to the compressed version of "input[0,input_length-1]". - // Original contents of *output are lost. + // Sets "*compressed" to the compressed version of "input[0..input_length-1]". + // Original contents of *compressed are lost. // - // REQUIRES: "input[]" is not an alias of "*output". - size_t Compress(const char* input, size_t input_length, string* output); + // REQUIRES: "input[]" is not an alias of "*compressed". + // First version is to preserve ABI. + size_t Compress(const char* input, size_t input_length, + std::string* compressed); + size_t Compress(const char* input, size_t input_length, + std::string* compressed, CompressionOptions options); - // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". + // Same as `Compress` above but taking an `iovec` array as input. Note that + // this function preprocesses the inputs to compute the sum of + // `iov[0..iov_cnt-1].iov_len` before reading. To avoid this, use + // `RawCompressFromIOVec` below. + // First version is to preserve ABI. + size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt, + std::string* compressed); + size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt, + std::string* compressed, + CompressionOptions options); + + // Decompresses "compressed[0..compressed_length-1]" to "*uncompressed". // Original contents of "*uncompressed" are lost. // // REQUIRES: "compressed[]" is not an alias of "*uncompressed". // // returns false if the message is corrupted and could not be decompressed bool Uncompress(const char* compressed, size_t compressed_length, - string* uncompressed); + std::string* uncompressed); // Decompresses "compressed" to "*uncompressed". // @@ -116,10 +158,19 @@ namespace snappy { // RawCompress(input, input_length, output, &output_length); // ... Process(output, output_length) ... // delete [] output; - void RawCompress(const char* input, - size_t input_length, - char* compressed, + void RawCompress(const char* input, size_t input_length, char* compressed, size_t* compressed_length); + void RawCompress(const char* input, size_t input_length, char* compressed, + size_t* compressed_length, CompressionOptions options); + + // Same as `RawCompress` above but taking an `iovec` array as input. Note that + // `uncompressed_length` is the total number of bytes to be read from the + // elements of `iov` (_not_ the number of elements in `iov`). + void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length, + char* compressed, size_t* compressed_length); + void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length, + char* compressed, size_t* compressed_length, + CompressionOptions options); // Given data in "compressed[0..compressed_length-1]" generated by // calling the Snappy::Compress routine, this routine @@ -193,11 +244,14 @@ namespace snappy { // Note that there might be older data around that is compressed with larger // block sizes, so the decompression code should not rely on the // non-existence of long backreferences. - static const int kBlockLog = 16; - static const size_t kBlockSize = 1 << kBlockLog; + static constexpr int kBlockLog = 16; + static constexpr size_t kBlockSize = 1 << kBlockLog; + + static constexpr int kMinHashTableBits = 8; + static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits; - static const int kMaxHashTableBits = 14; - static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits; + static constexpr int kMaxHashTableBits = 15; + static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits; } // end namespace snappy #endif // THIRD_PARTY_SNAPPY_SNAPPY_H__ diff --git a/snappy.pc.in b/snappy.pc.in deleted file mode 100644 index 982d240..0000000 --- a/snappy.pc.in +++ /dev/null @@ -1,10 +0,0 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ - -Name: @PACKAGE@ -Description: A fast compression/decompression library -Version: @PACKAGE_VERSION@ -Libs: -L${libdir} -l@PACKAGE@ -Cflags: -I${includedir} diff --git a/snappy_benchmark.cc b/snappy_benchmark.cc new file mode 100644 index 0000000..d6e35d3 --- /dev/null +++ b/snappy_benchmark.cc @@ -0,0 +1,398 @@ +// Copyright 2020 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "snappy-internal.h" +#include "snappy-sinksource.h" +#include "snappy-test.h" +#include "snappy.h" +#include "snappy_test_data.h" + +namespace snappy { + +namespace { + +void FilesAndLevels(benchmark::internal::Benchmark* benchmark) { + for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) { + for (int level = snappy::CompressionOptions::MinCompressionLevel(); + level <= snappy::CompressionOptions::MaxCompressionLevel(); ++level) { + benchmark->ArgPair(i, level); + } + } +} + +void BM_UFlat(benchmark::State& state) { + // Pick file to process based on state.range(0). + int file_index = state.range(0); + + CHECK_GE(file_index, 0); + CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); + std::string contents = + ReadTestDataFile(kTestDataFiles[file_index].filename, + kTestDataFiles[file_index].size_limit); + + std::string zcontents; + snappy::Compress( + contents.data(), contents.size(), &zcontents, + snappy::CompressionOptions{/*level=*/static_cast(state.range(1))}); + char* dst = new char[contents.size()]; + + for (auto s : state) { + CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst)); + benchmark::DoNotOptimize(dst); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(contents.size())); + state.SetLabel(kTestDataFiles[file_index].label); + + delete[] dst; +} +BENCHMARK(BM_UFlat)->Apply(FilesAndLevels); + +struct SourceFiles { + SourceFiles() { + for (int i = 0; i < kFiles; i++) { + std::string contents = ReadTestDataFile(kTestDataFiles[i].filename, + kTestDataFiles[i].size_limit); + max_size = std::max(max_size, contents.size()); + sizes[i] = contents.size(); + snappy::Compress(contents.data(), contents.size(), &zcontents[i]); + } + } + static constexpr int kFiles = ARRAYSIZE(kTestDataFiles); + std::string zcontents[kFiles]; + size_t sizes[kFiles]; + size_t max_size = 0; +}; + +void BM_UFlatMedley(benchmark::State& state) { + static const SourceFiles* const source = new SourceFiles(); + + std::vector dst(source->max_size); + + for (auto s : state) { + for (int i = 0; i < SourceFiles::kFiles; i++) { + CHECK(snappy::RawUncompress(source->zcontents[i].data(), + source->zcontents[i].size(), dst.data())); + benchmark::DoNotOptimize(dst); + } + } + + int64_t source_sizes = 0; + for (int i = 0; i < SourceFiles::kFiles; i++) { + source_sizes += static_cast(source->sizes[i]); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + source_sizes); +} +BENCHMARK(BM_UFlatMedley); + +void BM_UValidate(benchmark::State& state) { + // Pick file to process based on state.range(0). + int file_index = state.range(0); + + CHECK_GE(file_index, 0); + CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); + std::string contents = + ReadTestDataFile(kTestDataFiles[file_index].filename, + kTestDataFiles[file_index].size_limit); + + std::string zcontents; + snappy::Compress( + contents.data(), contents.size(), &zcontents, + snappy::CompressionOptions{/*level=*/static_cast(state.range(1))}); + + for (auto s : state) { + CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size())); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(contents.size())); + state.SetLabel(kTestDataFiles[file_index].label); +} +BENCHMARK(BM_UValidate)->Apply(FilesAndLevels); + +void BM_UValidateMedley(benchmark::State& state) { + static const SourceFiles* const source = new SourceFiles(); + + for (auto s : state) { + for (int i = 0; i < SourceFiles::kFiles; i++) { + CHECK(snappy::IsValidCompressedBuffer(source->zcontents[i].data(), + source->zcontents[i].size())); + } + } + + int64_t source_sizes = 0; + for (int i = 0; i < SourceFiles::kFiles; i++) { + source_sizes += static_cast(source->sizes[i]); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + source_sizes); +} +BENCHMARK(BM_UValidateMedley); + +void BM_UIOVecSource(benchmark::State& state) { + // Pick file to process based on state.range(0). + int file_index = state.range(0); + int level = state.range(1); + + CHECK_GE(file_index, 0); + CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); + std::string contents = + ReadTestDataFile(kTestDataFiles[file_index].filename, + kTestDataFiles[file_index].size_limit); + + // Create `iovec`s of the `contents`. + const int kNumEntries = 10; + struct iovec iov[kNumEntries]; + size_t used_so_far = 0; + for (int i = 0; i < kNumEntries; ++i) { + iov[i].iov_base = const_cast(contents.data()) + used_so_far; + if (used_so_far == contents.size()) { + iov[i].iov_len = 0; + continue; + } + if (i == kNumEntries - 1) { + iov[i].iov_len = contents.size() - used_so_far; + } else { + iov[i].iov_len = contents.size() / kNumEntries; + } + used_so_far += iov[i].iov_len; + } + + char* dst = new char[snappy::MaxCompressedLength(contents.size())]; + size_t zsize = 0; + for (auto s : state) { + snappy::RawCompressFromIOVec(iov, contents.size(), dst, &zsize, + snappy::CompressionOptions{/*level=*/level}); + benchmark::DoNotOptimize(iov); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(contents.size())); + const double compression_ratio = + static_cast(zsize) / std::max(1, contents.size()); + state.SetLabel(StrFormat("%s (%.2f %%)", kTestDataFiles[file_index].label, + 100.0 * compression_ratio)); + VLOG(0) << StrFormat("compression for %s: %d -> %d bytes", + kTestDataFiles[file_index].label, contents.size(), + zsize); + + delete[] dst; +} +BENCHMARK(BM_UIOVecSource)->Apply(FilesAndLevels); + +void BM_UIOVecSink(benchmark::State& state) { + // Pick file to process based on state.range(0). + int file_index = state.range(0); + + CHECK_GE(file_index, 0); + CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); + std::string contents = + ReadTestDataFile(kTestDataFiles[file_index].filename, + kTestDataFiles[file_index].size_limit); + + std::string zcontents; + snappy::Compress(contents.data(), contents.size(), &zcontents); + + // Uncompress into an iovec containing ten entries. + const int kNumEntries = 10; + struct iovec iov[kNumEntries]; + char* dst = new char[contents.size()]; + size_t used_so_far = 0; + for (int i = 0; i < kNumEntries; ++i) { + iov[i].iov_base = dst + used_so_far; + if (used_so_far == contents.size()) { + iov[i].iov_len = 0; + continue; + } + + if (i == kNumEntries - 1) { + iov[i].iov_len = contents.size() - used_so_far; + } else { + iov[i].iov_len = contents.size() / kNumEntries; + } + used_so_far += iov[i].iov_len; + } + + for (auto s : state) { + CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov, + kNumEntries)); + benchmark::DoNotOptimize(iov); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(contents.size())); + state.SetLabel(kTestDataFiles[file_index].label); + + delete[] dst; +} +BENCHMARK(BM_UIOVecSink)->DenseRange(0, 4); + +void BM_UFlatSink(benchmark::State& state) { + // Pick file to process based on state.range(0). + int file_index = state.range(0); + + CHECK_GE(file_index, 0); + CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); + std::string contents = + ReadTestDataFile(kTestDataFiles[file_index].filename, + kTestDataFiles[file_index].size_limit); + + std::string zcontents; + snappy::Compress( + contents.data(), contents.size(), &zcontents, + snappy::CompressionOptions{/*level=*/static_cast(state.range(1))}); + char* dst = new char[contents.size()]; + + for (auto s : state) { + snappy::ByteArraySource source(zcontents.data(), zcontents.size()); + snappy::UncheckedByteArraySink sink(dst); + CHECK(snappy::Uncompress(&source, &sink)); + benchmark::DoNotOptimize(sink); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(contents.size())); + state.SetLabel(kTestDataFiles[file_index].label); + + std::string s(dst, contents.size()); + CHECK_EQ(contents, s); + + delete[] dst; +} + +BENCHMARK(BM_UFlatSink)->Apply(FilesAndLevels); + +void BM_ZFlat(benchmark::State& state) { + // Pick file to process based on state.range(0). + int file_index = state.range(0); + int level = state.range(1); + + CHECK_GE(file_index, 0); + CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles)); + std::string contents = + ReadTestDataFile(kTestDataFiles[file_index].filename, + kTestDataFiles[file_index].size_limit); + char* dst = new char[snappy::MaxCompressedLength(contents.size())]; + + size_t zsize = 0; + for (auto s : state) { + snappy::RawCompress(contents.data(), contents.size(), dst, &zsize, + snappy::CompressionOptions{/*level=*/level}); + benchmark::DoNotOptimize(dst); + } + state.SetBytesProcessed(static_cast(state.iterations()) * + static_cast(contents.size())); + const double compression_ratio = + static_cast(zsize) / std::max(1, contents.size()); + state.SetLabel(StrFormat("%s (%.2f %%)", kTestDataFiles[file_index].label, + 100.0 * compression_ratio)); + VLOG(0) << StrFormat("compression for %s: %d -> %d bytes", + kTestDataFiles[file_index].label, contents.size(), + zsize); + delete[] dst; +} + +BENCHMARK(BM_ZFlat)->Apply(FilesAndLevels); + +void BM_ZFlatAll(benchmark::State& state) { + const int num_files = ARRAYSIZE(kTestDataFiles); + int level = state.range(0); + + std::vector contents(num_files); + std::vector dst(num_files); + + int64_t total_contents_size = 0; + for (int i = 0; i < num_files; ++i) { + contents[i] = ReadTestDataFile(kTestDataFiles[i].filename, + kTestDataFiles[i].size_limit); + dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())]; + total_contents_size += contents[i].size(); + } + + size_t zsize = 0; + for (auto s : state) { + for (int i = 0; i < num_files; ++i) { + snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i], + &zsize, snappy::CompressionOptions{/*level=*/level}); + benchmark::DoNotOptimize(dst); + } + } + + state.SetBytesProcessed(static_cast(state.iterations()) * + total_contents_size); + + for (char* dst_item : dst) { + delete[] dst_item; + } + state.SetLabel(StrFormat("%d kTestDataFiles", num_files)); +} +BENCHMARK(BM_ZFlatAll)->DenseRange(1, 2); + +void BM_ZFlatIncreasingTableSize(benchmark::State& state) { + CHECK_GT(ARRAYSIZE(kTestDataFiles), 0); + int level = state.range(0); + const std::string base_content = ReadTestDataFile( + kTestDataFiles[0].filename, kTestDataFiles[0].size_limit); + + std::vector contents; + std::vector dst; + int64_t total_contents_size = 0; + for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits; + ++table_bits) { + std::string content = base_content; + content.resize(1 << table_bits); + dst.push_back(new char[snappy::MaxCompressedLength(content.size())]); + total_contents_size += content.size(); + contents.push_back(std::move(content)); + } + + size_t zsize = 0; + for (auto s : state) { + for (size_t i = 0; i < contents.size(); ++i) { + snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i], + &zsize, snappy::CompressionOptions{/*level=*/level}); + benchmark::DoNotOptimize(dst); + } + } + + state.SetBytesProcessed(static_cast(state.iterations()) * + total_contents_size); + + for (char* dst_item : dst) { + delete[] dst_item; + } + state.SetLabel(StrFormat("%d tables", contents.size())); +} +BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(1, 2); + +} // namespace + +} // namespace snappy diff --git a/snappy_compress_fuzzer.cc b/snappy_compress_fuzzer.cc new file mode 100644 index 0000000..93254a2 --- /dev/null +++ b/snappy_compress_fuzzer.cc @@ -0,0 +1,64 @@ +// Copyright 2019 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// libFuzzer harness for fuzzing snappy compression code. + +#include +#include + +#include +#include + +#include "snappy.h" + +// Entry point for LibFuzzer. +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + std::string input(reinterpret_cast(data), size); + for (int level = snappy::CompressionOptions::MinCompressionLevel(); + level <= snappy::CompressionOptions::MaxCompressionLevel(); ++level) { + std::string compressed; + size_t compressed_size = + snappy::Compress(input.data(), input.size(), &compressed, + snappy::CompressionOptions{/*level=*/level}); + + (void)compressed_size; // Variable only used in debug builds. + assert(compressed_size == compressed.size()); + assert(compressed.size() <= snappy::MaxCompressedLength(input.size())); + assert( + snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); + + std::string uncompressed_after_compress; + bool uncompress_succeeded = snappy::Uncompress( + compressed.data(), compressed.size(), &uncompressed_after_compress); + + (void)uncompress_succeeded; // Variable only used in debug builds. + assert(uncompress_succeeded); + assert(input == uncompressed_after_compress); + } + return 0; +} diff --git a/snappy_test_data.cc b/snappy_test_data.cc new file mode 100644 index 0000000..8b54153 --- /dev/null +++ b/snappy_test_data.cc @@ -0,0 +1,57 @@ +// Copyright 2020 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Support code for reading test data. + +#include "snappy_test_data.h" + +#include +#include +#include + +#include "snappy-test.h" + +namespace snappy { + +std::string ReadTestDataFile(const char* base, size_t size_limit) { + std::string srcdir; + const char* srcdir_env = std::getenv("srcdir"); // This is set by Automake. + if (srcdir_env) { + srcdir = std::string(srcdir_env) + "/"; + } + + std::string contents; + CHECK_OK(file::GetContents(srcdir + "testdata/" + base, &contents, + file::Defaults())); + if (size_limit > 0) { + contents = contents.substr(0, size_limit); + } + return contents; +} + +} // namespace snappy diff --git a/snappy_test_data.h b/snappy_test_data.h new file mode 100644 index 0000000..b01f74b --- /dev/null +++ b/snappy_test_data.h @@ -0,0 +1,68 @@ +// Copyright 2020 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// List of test case files. + +#ifndef THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__ +#define THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__ + +#include +#include + +namespace snappy { + +std::string ReadTestDataFile(const char* base, size_t size_limit); + +// TODO: Replace anonymous namespace with inline variable when we can +// rely on C++17. +namespace { + +constexpr struct { + const char* label; + const char* filename; + size_t size_limit; +} kTestDataFiles[] = { + { "html", "html", 0 }, + { "urls", "urls.10K", 0 }, + { "jpg", "fireworks.jpeg", 0 }, + { "jpg_200", "fireworks.jpeg", 200 }, + { "pdf", "paper-100k.pdf", 0 }, + { "html4", "html_x_4", 0 }, + { "txt1", "alice29.txt", 0 }, + { "txt2", "asyoulik.txt", 0 }, + { "txt3", "lcet10.txt", 0 }, + { "txt4", "plrabn12.txt", 0 }, + { "pb", "geo.protodata", 0 }, + { "gaviota", "kppkn.gtb", 0 }, +}; + +} // namespace + +} // namespace snappy + +#endif // THIRD_PARTY_SNAPPY_SNAPPY_TEST_DATA_H__ diff --git a/snappy_test_tool.cc b/snappy_test_tool.cc new file mode 100644 index 0000000..a7c779b --- /dev/null +++ b/snappy_test_tool.cc @@ -0,0 +1,471 @@ +// Copyright 2020 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "snappy-test.h" + +#include "snappy-internal.h" +#include "snappy-sinksource.h" +#include "snappy.h" +#include "snappy_test_data.h" + +SNAPPY_FLAG(int32_t, start_len, -1, + "Starting prefix size for testing (-1: just full file contents)"); +SNAPPY_FLAG(int32_t, end_len, -1, + "Starting prefix size for testing (-1: just full file contents)"); +SNAPPY_FLAG(int32_t, bytes, 10485760, + "How many bytes to compress/uncompress per file for timing"); + +SNAPPY_FLAG(bool, zlib, true, + "Run zlib compression (http://www.zlib.net)"); +SNAPPY_FLAG(bool, lzo, true, + "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)"); +SNAPPY_FLAG(bool, lz4, true, + "Run LZ4 compression (https://github.com/lz4/lz4)"); +SNAPPY_FLAG(bool, snappy, true, "Run snappy compression"); + +SNAPPY_FLAG(bool, write_compressed, false, + "Write compressed versions of each file to .comp"); +SNAPPY_FLAG(bool, write_uncompressed, false, + "Write uncompressed versions of each file to .uncomp"); + +namespace snappy { + +namespace { + +#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF + +// To test against code that reads beyond its input, this class copies a +// string to a newly allocated group of pages, the last of which +// is made unreadable via mprotect. Note that we need to allocate the +// memory with mmap(), as POSIX allows mprotect() only on memory allocated +// with mmap(), and some malloc/posix_memalign implementations expect to +// be able to read previously allocated memory while doing heap allocations. +class DataEndingAtUnreadablePage { + public: + explicit DataEndingAtUnreadablePage(const std::string& s) { + const size_t page_size = sysconf(_SC_PAGESIZE); + const size_t size = s.size(); + // Round up space for string to a multiple of page_size. + size_t space_for_string = (size + page_size - 1) & ~(page_size - 1); + alloc_size_ = space_for_string + page_size; + mem_ = mmap(NULL, alloc_size_, + PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + CHECK_NE(MAP_FAILED, mem_); + protected_page_ = reinterpret_cast(mem_) + space_for_string; + char* dst = protected_page_ - size; + std::memcpy(dst, s.data(), size); + data_ = dst; + size_ = size; + // Make guard page unreadable. + CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE)); + } + + ~DataEndingAtUnreadablePage() { + const size_t page_size = sysconf(_SC_PAGESIZE); + // Undo the mprotect. + CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE)); + CHECK_EQ(0, munmap(mem_, alloc_size_)); + } + + const char* data() const { return data_; } + size_t size() const { return size_; } + + private: + size_t alloc_size_; + void* mem_; + char* protected_page_; + const char* data_; + size_t size_; +}; + +#else // HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF + +// Fallback for systems without mmap. +using DataEndingAtUnreadablePage = std::string; + +#endif + +enum CompressorType { ZLIB, LZO, LZ4, SNAPPY }; + +const char* names[] = {"ZLIB", "LZO", "LZ4", "SNAPPY"}; + +size_t MinimumRequiredOutputSpace(size_t input_size, CompressorType comp) { + switch (comp) { +#ifdef ZLIB_VERSION + case ZLIB: + return ZLib::MinCompressbufSize(input_size); +#endif // ZLIB_VERSION + +#ifdef LZO_VERSION + case LZO: + return input_size + input_size/64 + 16 + 3; +#endif // LZO_VERSION + +#ifdef LZ4_VERSION_NUMBER + case LZ4: + return LZ4_compressBound(input_size); +#endif // LZ4_VERSION_NUMBER + + case SNAPPY: + return snappy::MaxCompressedLength(input_size); + + default: + LOG(FATAL) << "Unknown compression type number " << comp; + return 0; + } +} + +// Returns true if we successfully compressed, false otherwise. +// +// If compressed_is_preallocated is set, do not resize the compressed buffer. +// This is typically what you want for a benchmark, in order to not spend +// time in the memory allocator. If you do set this flag, however, +// "compressed" must be preinitialized to at least MinCompressbufSize(comp) +// number of bytes, and may contain junk bytes at the end after return. +bool Compress(const char* input, size_t input_size, CompressorType comp, + std::string* compressed, bool compressed_is_preallocated) { + if (!compressed_is_preallocated) { + compressed->resize(MinimumRequiredOutputSpace(input_size, comp)); + } + + switch (comp) { +#ifdef ZLIB_VERSION + case ZLIB: { + ZLib zlib; + uLongf destlen = compressed->size(); + int ret = zlib.Compress( + reinterpret_cast(string_as_array(compressed)), + &destlen, + reinterpret_cast(input), + input_size); + CHECK_EQ(Z_OK, ret); + if (!compressed_is_preallocated) { + compressed->resize(destlen); + } + return true; + } +#endif // ZLIB_VERSION + +#ifdef LZO_VERSION + case LZO: { + unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS]; + lzo_uint destlen; + int ret = lzo1x_1_15_compress( + reinterpret_cast(input), + input_size, + reinterpret_cast(string_as_array(compressed)), + &destlen, + mem); + CHECK_EQ(LZO_E_OK, ret); + delete[] mem; + if (!compressed_is_preallocated) { + compressed->resize(destlen); + } + break; + } +#endif // LZO_VERSION + +#ifdef LZ4_VERSION_NUMBER + case LZ4: { + int destlen = compressed->size(); + destlen = LZ4_compress_default(input, string_as_array(compressed), + input_size, destlen); + CHECK_NE(destlen, 0); + if (!compressed_is_preallocated) { + compressed->resize(destlen); + } + break; + } +#endif // LZ4_VERSION_NUMBER + + case SNAPPY: { + size_t destlen; + snappy::RawCompress(input, input_size, + string_as_array(compressed), + &destlen); + CHECK_LE(destlen, snappy::MaxCompressedLength(input_size)); + if (!compressed_is_preallocated) { + compressed->resize(destlen); + } + break; + } + + default: { + return false; // the asked-for library wasn't compiled in + } + } + return true; +} + +bool Uncompress(const std::string& compressed, CompressorType comp, int size, + std::string* output) { + // TODO: Switch to [[maybe_unused]] when we can assume C++17. + (void)size; + switch (comp) { +#ifdef ZLIB_VERSION + case ZLIB: { + output->resize(size); + ZLib zlib; + uLongf destlen = output->size(); + int ret = zlib.Uncompress( + reinterpret_cast(string_as_array(output)), + &destlen, + reinterpret_cast(compressed.data()), + compressed.size()); + CHECK_EQ(Z_OK, ret); + CHECK_EQ(static_cast(size), destlen); + break; + } +#endif // ZLIB_VERSION + +#ifdef LZO_VERSION + case LZO: { + output->resize(size); + lzo_uint destlen; + int ret = lzo1x_decompress( + reinterpret_cast(compressed.data()), + compressed.size(), + reinterpret_cast(string_as_array(output)), + &destlen, + NULL); + CHECK_EQ(LZO_E_OK, ret); + CHECK_EQ(static_cast(size), destlen); + break; + } +#endif // LZO_VERSION + +#ifdef LZ4_VERSION_NUMBER + case LZ4: { + output->resize(size); + int destlen = output->size(); + destlen = LZ4_decompress_safe(compressed.data(), string_as_array(output), + compressed.size(), destlen); + CHECK_NE(destlen, 0); + CHECK_EQ(size, destlen); + break; + } +#endif // LZ4_VERSION_NUMBER + case SNAPPY: { + snappy::RawUncompress(compressed.data(), compressed.size(), + string_as_array(output)); + break; + } + + default: { + return false; // the asked-for library wasn't compiled in + } + } + return true; +} + +void Measure(const char* data, size_t length, CompressorType comp, int repeats, + int block_size) { + // Run tests a few time and pick median running times + static const int kRuns = 5; + double ctime[kRuns]; + double utime[kRuns]; + int compressed_size = 0; + + { + // Chop the input into blocks + int num_blocks = (length + block_size - 1) / block_size; + std::vector input(num_blocks); + std::vector input_length(num_blocks); + std::vector compressed(num_blocks); + std::vector output(num_blocks); + for (int b = 0; b < num_blocks; ++b) { + int input_start = b * block_size; + int input_limit = std::min((b+1)*block_size, length); + input[b] = data+input_start; + input_length[b] = input_limit-input_start; + } + + // Pre-grow the output buffers so we don't measure string append time. + for (std::string& compressed_block : compressed) { + compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp)); + } + + // First, try one trial compression to make sure the code is compiled in + if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) { + LOG(WARNING) << "Skipping " << names[comp] << ": " + << "library not compiled in"; + return; + } + + for (int run = 0; run < kRuns; ++run) { + CycleTimer ctimer, utimer; + + // Pre-grow the output buffers so we don't measure string append time. + for (std::string& compressed_block : compressed) { + compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp)); + } + + ctimer.Start(); + for (int b = 0; b < num_blocks; ++b) { + for (int i = 0; i < repeats; ++i) + Compress(input[b], input_length[b], comp, &compressed[b], true); + } + ctimer.Stop(); + + // Compress once more, with resizing, so we don't leave junk + // at the end that will confuse the decompressor. + for (int b = 0; b < num_blocks; ++b) { + Compress(input[b], input_length[b], comp, &compressed[b], false); + } + + for (int b = 0; b < num_blocks; ++b) { + output[b].resize(input_length[b]); + } + + utimer.Start(); + for (int i = 0; i < repeats; ++i) { + for (int b = 0; b < num_blocks; ++b) + Uncompress(compressed[b], comp, input_length[b], &output[b]); + } + utimer.Stop(); + + ctime[run] = ctimer.Get(); + utime[run] = utimer.Get(); + } + + compressed_size = 0; + for (const std::string& compressed_item : compressed) { + compressed_size += compressed_item.size(); + } + } + + std::sort(ctime, ctime + kRuns); + std::sort(utime, utime + kRuns); + const int med = kRuns/2; + + float comp_rate = (length / ctime[med]) * repeats / 1048576.0; + float uncomp_rate = (length / utime[med]) * repeats / 1048576.0; + std::string x = names[comp]; + x += ":"; + std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate) + : std::string("?"); + std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% " + "comp %5.1f MB/s uncomp %5s MB/s\n", + x.c_str(), + block_size/(1<<20), + static_cast(length), static_cast(compressed_size), + (compressed_size * 100.0) / std::max(1, length), + comp_rate, + urate.c_str()); +} + +void CompressFile(const char* fname) { + std::string fullinput; + CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); + + std::string compressed; + Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false); + + CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed, + file::Defaults())); +} + +void UncompressFile(const char* fname) { + std::string fullinput; + CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); + + size_t uncompLength; + CHECK(snappy::GetUncompressedLength(fullinput.data(), fullinput.size(), + &uncompLength)); + + std::string uncompressed; + uncompressed.resize(uncompLength); + CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed)); + + CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed, + file::Defaults())); +} + +void MeasureFile(const char* fname) { + std::string fullinput; + CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); + std::printf("%-40s :\n", fname); + + int start_len = (snappy::GetFlag(FLAGS_start_len) < 0) + ? fullinput.size() + : snappy::GetFlag(FLAGS_start_len); + int end_len = fullinput.size(); + if (snappy::GetFlag(FLAGS_end_len) >= 0) { + end_len = std::min(fullinput.size(), snappy::GetFlag(FLAGS_end_len)); + } + for (int len = start_len; len <= end_len; ++len) { + const char* const input = fullinput.data(); + int repeats = (snappy::GetFlag(FLAGS_bytes) + len) / (len + 1); + if (snappy::GetFlag(FLAGS_zlib)) + Measure(input, len, ZLIB, repeats, 1024 << 10); + if (snappy::GetFlag(FLAGS_lzo)) + Measure(input, len, LZO, repeats, 1024 << 10); + if (snappy::GetFlag(FLAGS_lz4)) + Measure(input, len, LZ4, repeats, 1024 << 10); + if (snappy::GetFlag(FLAGS_snappy)) + Measure(input, len, SNAPPY, repeats, 4096 << 10); + + // For block-size based measurements + if (0 && snappy::GetFlag(FLAGS_snappy)) { + Measure(input, len, SNAPPY, repeats, 8<<10); + Measure(input, len, SNAPPY, repeats, 16<<10); + Measure(input, len, SNAPPY, repeats, 32<<10); + Measure(input, len, SNAPPY, repeats, 64<<10); + Measure(input, len, SNAPPY, repeats, 256<<10); + Measure(input, len, SNAPPY, repeats, 1024<<10); + } + } +} + +} // namespace + +} // namespace snappy + +int main(int argc, char** argv) { + InitGoogle(argv[0], &argc, &argv, true); + + for (int arg = 1; arg < argc; ++arg) { + if (snappy::GetFlag(FLAGS_write_compressed)) { + snappy::CompressFile(argv[arg]); + } else if (snappy::GetFlag(FLAGS_write_uncompressed)) { + snappy::UncompressFile(argv[arg]); + } else { + snappy::MeasureFile(argv[arg]); + } + } + return 0; +} diff --git a/snappy_uncompress_fuzzer.cc b/snappy_uncompress_fuzzer.cc new file mode 100644 index 0000000..385bfb5 --- /dev/null +++ b/snappy_uncompress_fuzzer.cc @@ -0,0 +1,58 @@ +// Copyright 2019 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// libFuzzer harness for fuzzing snappy's decompression code. + +#include +#include + +#include +#include + +#include "snappy.h" + +// Entry point for LibFuzzer. +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + std::string input(reinterpret_cast(data), size); + + // Avoid self-crafted decompression bombs. + size_t uncompressed_size; + constexpr size_t kMaxUncompressedSize = 1 << 20; + bool get_uncompressed_length_succeeded = snappy::GetUncompressedLength( + input.data(), input.size(), &uncompressed_size); + if (!get_uncompressed_length_succeeded || + (uncompressed_size > kMaxUncompressedSize)) { + return 0; + } + + std::string uncompressed; + // The return value of snappy::Uncompress() is ignored because decompression + // will fail on invalid inputs. + snappy::Uncompress(input.data(), input.size(), &uncompressed); + return 0; +} diff --git a/snappy_unittest.cc b/snappy_unittest.cc index 19062e4..923a0ec 100644 --- a/snappy_unittest.cc +++ b/snappy_unittest.cc @@ -26,44 +26,32 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include -#include - - #include +#include +#include +#include +#include #include #include #include -#include "snappy.h" -#include "snappy-internal.h" #include "snappy-test.h" + +#include "gtest/gtest.h" + +#include "snappy-internal.h" #include "snappy-sinksource.h" +#include "snappy.h" +#include "snappy_test_data.h" -DEFINE_int32(start_len, -1, - "Starting prefix size for testing (-1: just full file contents)"); -DEFINE_int32(end_len, -1, - "Starting prefix size for testing (-1: just full file contents)"); -DEFINE_int32(bytes, 10485760, - "How many bytes to compress/uncompress per file for timing"); - -DEFINE_bool(zlib, false, - "Run zlib compression (http://www.zlib.net)"); -DEFINE_bool(lzo, false, - "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)"); -DEFINE_bool(snappy, true, "Run snappy compression"); - -DEFINE_bool(write_compressed, false, - "Write compressed versions of each file to .comp"); -DEFINE_bool(write_uncompressed, false, - "Write uncompressed versions of each file to .uncomp"); - -DEFINE_bool(snappy_dump_decompression_table, false, +SNAPPY_FLAG(bool, snappy_dump_decompression_table, false, "If true, we print the decompression table during tests."); namespace snappy { -#ifdef HAVE_FUNC_MMAP +namespace { + +#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF // To test against code that reads beyond its input, this class copies a // string to a newly allocated group of pages, the last of which @@ -73,8 +61,8 @@ namespace snappy { // be able to read previously allocated memory while doing heap allocations. class DataEndingAtUnreadablePage { public: - explicit DataEndingAtUnreadablePage(const string& s) { - const size_t page_size = getpagesize(); + explicit DataEndingAtUnreadablePage(const std::string& s) { + const size_t page_size = sysconf(_SC_PAGESIZE); const size_t size = s.size(); // Round up space for string to a multiple of page_size. size_t space_for_string = (size + page_size - 1) & ~(page_size - 1); @@ -84,7 +72,7 @@ class DataEndingAtUnreadablePage { CHECK_NE(MAP_FAILED, mem_); protected_page_ = reinterpret_cast(mem_) + space_for_string; char* dst = protected_page_ - size; - memcpy(dst, s.data(), size); + std::memcpy(dst, s.data(), size); data_ = dst; size_ = size; // Make guard page unreadable. @@ -92,8 +80,9 @@ class DataEndingAtUnreadablePage { } ~DataEndingAtUnreadablePage() { + const size_t page_size = sysconf(_SC_PAGESIZE); // Undo the mprotect. - CHECK_EQ(0, mprotect(protected_page_, getpagesize(), PROT_READ|PROT_WRITE)); + CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE)); CHECK_EQ(0, munmap(mem_, alloc_size_)); } @@ -108,258 +97,15 @@ class DataEndingAtUnreadablePage { size_t size_; }; -#else // HAVE_FUNC_MMAP +#else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF // Fallback for systems without mmap. -typedef string DataEndingAtUnreadablePage; +using DataEndingAtUnreadablePage = std::string; #endif -enum CompressorType { - ZLIB, LZO, SNAPPY -}; - -const char* names[] = { - "ZLIB", "LZO", "SNAPPY" -}; - -static size_t MinimumRequiredOutputSpace(size_t input_size, - CompressorType comp) { - switch (comp) { -#ifdef ZLIB_VERSION - case ZLIB: - return ZLib::MinCompressbufSize(input_size); -#endif // ZLIB_VERSION - -#ifdef LZO_VERSION - case LZO: - return input_size + input_size/64 + 16 + 3; -#endif // LZO_VERSION - - case SNAPPY: - return snappy::MaxCompressedLength(input_size); - - default: - LOG(FATAL) << "Unknown compression type number " << comp; - return 0; - } -} - -// Returns true if we successfully compressed, false otherwise. -// -// If compressed_is_preallocated is set, do not resize the compressed buffer. -// This is typically what you want for a benchmark, in order to not spend -// time in the memory allocator. If you do set this flag, however, -// "compressed" must be preinitialized to at least MinCompressbufSize(comp) -// number of bytes, and may contain junk bytes at the end after return. -static bool Compress(const char* input, size_t input_size, CompressorType comp, - string* compressed, bool compressed_is_preallocated) { - if (!compressed_is_preallocated) { - compressed->resize(MinimumRequiredOutputSpace(input_size, comp)); - } - - switch (comp) { -#ifdef ZLIB_VERSION - case ZLIB: { - ZLib zlib; - uLongf destlen = compressed->size(); - int ret = zlib.Compress( - reinterpret_cast(string_as_array(compressed)), - &destlen, - reinterpret_cast(input), - input_size); - CHECK_EQ(Z_OK, ret); - if (!compressed_is_preallocated) { - compressed->resize(destlen); - } - return true; - } -#endif // ZLIB_VERSION - -#ifdef LZO_VERSION - case LZO: { - unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS]; - lzo_uint destlen; - int ret = lzo1x_1_15_compress( - reinterpret_cast(input), - input_size, - reinterpret_cast(string_as_array(compressed)), - &destlen, - mem); - CHECK_EQ(LZO_E_OK, ret); - delete[] mem; - if (!compressed_is_preallocated) { - compressed->resize(destlen); - } - break; - } -#endif // LZO_VERSION - - case SNAPPY: { - size_t destlen; - snappy::RawCompress(input, input_size, - string_as_array(compressed), - &destlen); - CHECK_LE(destlen, snappy::MaxCompressedLength(input_size)); - if (!compressed_is_preallocated) { - compressed->resize(destlen); - } - break; - } - - default: { - return false; // the asked-for library wasn't compiled in - } - } - return true; -} - -static bool Uncompress(const string& compressed, CompressorType comp, - int size, string* output) { - switch (comp) { -#ifdef ZLIB_VERSION - case ZLIB: { - output->resize(size); - ZLib zlib; - uLongf destlen = output->size(); - int ret = zlib.Uncompress( - reinterpret_cast(string_as_array(output)), - &destlen, - reinterpret_cast(compressed.data()), - compressed.size()); - CHECK_EQ(Z_OK, ret); - CHECK_EQ(static_cast(size), destlen); - break; - } -#endif // ZLIB_VERSION - -#ifdef LZO_VERSION - case LZO: { - output->resize(size); - lzo_uint destlen; - int ret = lzo1x_decompress( - reinterpret_cast(compressed.data()), - compressed.size(), - reinterpret_cast(string_as_array(output)), - &destlen, - NULL); - CHECK_EQ(LZO_E_OK, ret); - CHECK_EQ(static_cast(size), destlen); - break; - } -#endif // LZO_VERSION - - case SNAPPY: { - snappy::RawUncompress(compressed.data(), compressed.size(), - string_as_array(output)); - break; - } - - default: { - return false; // the asked-for library wasn't compiled in - } - } - return true; -} - -static void Measure(const char* data, - size_t length, - CompressorType comp, - int repeats, - int block_size) { - // Run tests a few time and pick median running times - static const int kRuns = 5; - double ctime[kRuns]; - double utime[kRuns]; - int compressed_size = 0; - - { - // Chop the input into blocks - int num_blocks = (length + block_size - 1) / block_size; - std::vector input(num_blocks); - std::vector input_length(num_blocks); - std::vector compressed(num_blocks); - std::vector output(num_blocks); - for (int b = 0; b < num_blocks; b++) { - int input_start = b * block_size; - int input_limit = std::min((b+1)*block_size, length); - input[b] = data+input_start; - input_length[b] = input_limit-input_start; - - // Pre-grow the output buffer so we don't measure string append time. - compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp)); - } - - // First, try one trial compression to make sure the code is compiled in - if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) { - LOG(WARNING) << "Skipping " << names[comp] << ": " - << "library not compiled in"; - return; - } - - for (int run = 0; run < kRuns; run++) { - CycleTimer ctimer, utimer; - - for (int b = 0; b < num_blocks; b++) { - // Pre-grow the output buffer so we don't measure string append time. - compressed[b].resize(MinimumRequiredOutputSpace(block_size, comp)); - } - - ctimer.Start(); - for (int b = 0; b < num_blocks; b++) - for (int i = 0; i < repeats; i++) - Compress(input[b], input_length[b], comp, &compressed[b], true); - ctimer.Stop(); - - // Compress once more, with resizing, so we don't leave junk - // at the end that will confuse the decompressor. - for (int b = 0; b < num_blocks; b++) { - Compress(input[b], input_length[b], comp, &compressed[b], false); - } - - for (int b = 0; b < num_blocks; b++) { - output[b].resize(input_length[b]); - } - - utimer.Start(); - for (int i = 0; i < repeats; i++) - for (int b = 0; b < num_blocks; b++) - Uncompress(compressed[b], comp, input_length[b], &output[b]); - utimer.Stop(); - - ctime[run] = ctimer.Get(); - utime[run] = utimer.Get(); - } - - compressed_size = 0; - for (size_t i = 0; i < compressed.size(); i++) { - compressed_size += compressed[i].size(); - } - } - - std::sort(ctime, ctime + kRuns); - std::sort(utime, utime + kRuns); - const int med = kRuns/2; - - float comp_rate = (length / ctime[med]) * repeats / 1048576.0; - float uncomp_rate = (length / utime[med]) * repeats / 1048576.0; - string x = names[comp]; - x += ":"; - string urate = (uncomp_rate >= 0) - ? StringPrintf("%.1f", uncomp_rate) - : string("?"); - printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% " - "comp %5.1f MB/s uncomp %5s MB/s\n", - x.c_str(), - block_size/(1<<20), - static_cast(length), static_cast(compressed_size), - (compressed_size * 100.0) / std::max(1, length), - comp_rate, - urate.c_str()); -} - -static int VerifyString(const string& input) { - string compressed; +int VerifyString(const std::string& input) { + std::string compressed; DataEndingAtUnreadablePage i(input); const size_t written = snappy::Compress(i.data(), i.size(), &compressed); CHECK_EQ(written, compressed.size()); @@ -367,15 +113,15 @@ static int VerifyString(const string& input) { snappy::MaxCompressedLength(input.size())); CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); - string uncompressed; + std::string uncompressed; DataEndingAtUnreadablePage c(compressed); CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed)); CHECK_EQ(uncompressed, input); return uncompressed.size(); } -static void VerifyStringSink(const string& input) { - string compressed; +void VerifyStringSink(const std::string& input) { + std::string compressed; DataEndingAtUnreadablePage i(input); const size_t written = snappy::Compress(i.data(), i.size(), &compressed); CHECK_EQ(written, compressed.size()); @@ -383,7 +129,7 @@ static void VerifyStringSink(const string& input) { snappy::MaxCompressedLength(input.size())); CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); - string uncompressed; + std::string uncompressed; uncompressed.resize(input.size()); snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed)); DataEndingAtUnreadablePage c(compressed); @@ -392,41 +138,67 @@ static void VerifyStringSink(const string& input) { CHECK_EQ(uncompressed, input); } -static void VerifyIOVec(const string& input) { - string compressed; - DataEndingAtUnreadablePage i(input); - const size_t written = snappy::Compress(i.data(), i.size(), &compressed); - CHECK_EQ(written, compressed.size()); - CHECK_LE(compressed.size(), - snappy::MaxCompressedLength(input.size())); - CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); - - // Try uncompressing into an iovec containing a random number of entries - // ranging from 1 to 10. - char* buf = new char[input.size()]; - ACMRandom rnd(input.size()); - size_t num = rnd.Next() % 10 + 1; +struct iovec* GetIOVec(const std::string& input, char*& buf, size_t& num) { + std::minstd_rand0 rng(input.size()); + std::uniform_int_distribution uniform_1_to_10(1, 10); + num = uniform_1_to_10(rng); if (input.size() < num) { num = input.size(); } struct iovec* iov = new iovec[num]; - int used_so_far = 0; + size_t used_so_far = 0; + std::bernoulli_distribution one_in_five(1.0 / 5); for (size_t i = 0; i < num; ++i) { + assert(used_so_far < input.size()); iov[i].iov_base = buf + used_so_far; if (i == num - 1) { iov[i].iov_len = input.size() - used_so_far; } else { // Randomly choose to insert a 0 byte entry. - if (rnd.OneIn(5)) { + if (one_in_five(rng)) { iov[i].iov_len = 0; } else { - iov[i].iov_len = rnd.Uniform(input.size()); + std::uniform_int_distribution uniform_not_used_so_far( + 0, input.size() - used_so_far - 1); + iov[i].iov_len = uniform_not_used_so_far(rng); } } used_so_far += iov[i].iov_len; } - CHECK(snappy::RawUncompressToIOVec( - compressed.data(), compressed.size(), iov, num)); + return iov; +} + +int VerifyIOVecSource(const std::string& input) { + std::string compressed; + std::string copy = input; + char* buf = const_cast(copy.data()); + size_t num = 0; + struct iovec* iov = GetIOVec(input, buf, num); + const size_t written = snappy::CompressFromIOVec(iov, num, &compressed); + CHECK_EQ(written, compressed.size()); + CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size())); + CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); + + std::string uncompressed; + DataEndingAtUnreadablePage c(compressed); + CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed)); + CHECK_EQ(uncompressed, input); + delete[] iov; + return uncompressed.size(); +} + +void VerifyIOVecSink(const std::string& input) { + std::string compressed; + DataEndingAtUnreadablePage i(input); + const size_t written = snappy::Compress(i.data(), i.size(), &compressed); + CHECK_EQ(written, compressed.size()); + CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size())); + CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); + char* buf = new char[input.size()]; + size_t num = 0; + struct iovec* iov = GetIOVec(input, buf, num); + CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), iov, + num)); CHECK(!memcmp(buf, input.data(), input.size())); delete[] iov; delete[] buf; @@ -434,22 +206,22 @@ static void VerifyIOVec(const string& input) { // Test that data compressed by a compressor that does not // obey block sizes is uncompressed properly. -static void VerifyNonBlockedCompression(const string& input) { +void VerifyNonBlockedCompression(const std::string& input) { if (input.length() > snappy::kBlockSize) { // We cannot test larger blocks than the maximum block size, obviously. return; } - string prefix; + std::string prefix; Varint::Append32(&prefix, input.size()); // Setup compression table - snappy::internal::WorkingMemory wmem; + snappy::internal::WorkingMemory wmem(input.size()); int table_size; - uint16* table = wmem.GetHashTable(input.size(), &table_size); + uint16_t* table = wmem.GetHashTable(input.size(), &table_size); // Compress entire input in one shot - string compressed; + std::string compressed; compressed += prefix; compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size())); char* dest = string_as_array(&compressed) + prefix.size(); @@ -457,13 +229,13 @@ static void VerifyNonBlockedCompression(const string& input) { dest, table, table_size); compressed.resize(end - compressed.data()); - // Uncompress into string - string uncomp_str; + // Uncompress into std::string + std::string uncomp_str; CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str)); CHECK_EQ(uncomp_str, input); // Uncompress using source/sink - string uncomp_str2; + std::string uncomp_str2; uncomp_str2.resize(input.size()); snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2)); snappy::ByteArraySource source(compressed.data(), compressed.size()); @@ -475,62 +247,64 @@ static void VerifyNonBlockedCompression(const string& input) { static const int kNumBlocks = 10; struct iovec vec[kNumBlocks]; const int block_size = 1 + input.size() / kNumBlocks; - string iovec_data(block_size * kNumBlocks, 'x'); - for (int i = 0; i < kNumBlocks; i++) { + std::string iovec_data(block_size * kNumBlocks, 'x'); + for (int i = 0; i < kNumBlocks; ++i) { vec[i].iov_base = string_as_array(&iovec_data) + i * block_size; vec[i].iov_len = block_size; } CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), vec, kNumBlocks)); - CHECK_EQ(string(iovec_data.data(), input.size()), input); + CHECK_EQ(std::string(iovec_data.data(), input.size()), input); } } // Expand the input so that it is at least K times as big as block size -static string Expand(const string& input) { +std::string Expand(const std::string& input) { static const int K = 3; - string data = input; + std::string data = input; while (data.size() < K * snappy::kBlockSize) { data += input; } return data; } -static int Verify(const string& input) { +int Verify(const std::string& input) { VLOG(1) << "Verifying input of size " << input.size(); // Compress using string based routines const int result = VerifyString(input); + // Compress using `iovec`-based routines. + CHECK_EQ(VerifyIOVecSource(input), result); + // Verify using sink based routines VerifyStringSink(input); VerifyNonBlockedCompression(input); - VerifyIOVec(input); + VerifyIOVecSink(input); if (!input.empty()) { - const string expanded = Expand(input); + const std::string expanded = Expand(input); VerifyNonBlockedCompression(expanded); - VerifyIOVec(input); + VerifyIOVecSink(input); } return result; } - -static bool IsValidCompressedBuffer(const string& c) { +bool IsValidCompressedBuffer(const std::string& c) { return snappy::IsValidCompressedBuffer(c.data(), c.size()); } -static bool Uncompress(const string& c, string* u) { +bool Uncompress(const std::string& c, std::string* u) { return snappy::Uncompress(c.data(), c.size(), u); } // This test checks to ensure that snappy doesn't coredump if it gets // corrupted data. TEST(CorruptedTest, VerifyCorrupted) { - string source = "making sure we don't crash with corrupted input"; + std::string source = "making sure we don't crash with corrupted input"; VLOG(1) << source; - string dest; - string uncmp; + std::string dest; + std::string uncmp; snappy::Compress(source.data(), source.size(), &dest); // Mess around with the data. It's hard to simulate all possible @@ -545,8 +319,8 @@ TEST(CorruptedTest, VerifyCorrupted) { // This is testing for a security bug - a buffer that decompresses to 100k // but we lie in the snappy header and only reserve 0 bytes of memory :) source.resize(100000); - for (size_t i = 0; i < source.length(); ++i) { - source[i] = 'A'; + for (char& source_char : source) { + source_char = 'A'; } snappy::Compress(source.data(), source.size(), &dest); dest[0] = dest[1] = dest[2] = dest[3] = 0; @@ -577,14 +351,14 @@ TEST(CorruptedTest, VerifyCorrupted) { // try reading stuff in from a bad file. for (int i = 1; i <= 3; ++i) { - string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str(), - 0); - string uncmp; + std::string data = + ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0); + std::string uncmp; // check that we don't return a crazy length size_t ulen; CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen) || (ulen < (1<<20))); - uint32 ulen2; + uint32_t ulen2; snappy::ByteArraySource source(data.data(), data.size()); CHECK(!snappy::GetUncompressedLength(&source, &ulen2) || (ulen2 < (1<<20))); @@ -597,7 +371,7 @@ TEST(CorruptedTest, VerifyCorrupted) { // These mirror the compression code in snappy.cc, but are copied // here so that we can bypass some limitations in the how snappy.cc // invokes these routines. -static void AppendLiteral(string* dst, const string& literal) { +void AppendLiteral(std::string* dst, const std::string& literal) { if (literal.empty()) return; int n = literal.size() - 1; if (n < 60) { @@ -612,12 +386,12 @@ static void AppendLiteral(string* dst, const string& literal) { n >>= 8; } dst->push_back(0 | ((59+count) << 2)); - *dst += string(number, count); + *dst += std::string(number, count); } *dst += literal; } -static void AppendCopy(string* dst, int offset, int length) { +void AppendCopy(std::string* dst, int offset, int length) { while (length > 0) { // Figure out how much to copy in one shot int to_copy; @@ -654,51 +428,114 @@ TEST(Snappy, SimpleTests) { Verify("ab"); Verify("abc"); - Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc"); - Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc"); - Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc"); - Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc"); - Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc"); + Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc"); + Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc"); + Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc"); + Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc"); + Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc"); +} + +// Regression test for cr/345340892. +TEST(Snappy, AppendSelfPatternExtensionEdgeCases) { + Verify("abcabcabcabcabcabcab"); + Verify("abcabcabcabcabcabcab0123456789ABCDEF"); + + Verify("abcabcabcabcabcabcabcabcabcabcabcabc"); + Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF"); +} + +// Regression test for cr/345340892. +TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) { + std::mt19937 rng; + std::uniform_int_distribution uniform_byte(0, 255); + for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) { + for (int length = 1; length <= 64; ++length) { + for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) { + const int size = pattern_size + length + extra_bytes_after_pattern; + std::string input; + input.resize(size); + for (int i = 0; i < pattern_size; ++i) { + input[i] = 'a' + i; + } + for (int i = 0; i < length; ++i) { + input[pattern_size + i] = input[i]; + } + for (int i = 0; i < extra_bytes_after_pattern; ++i) { + input[pattern_size + length + i] = + static_cast(uniform_byte(rng)); + } + Verify(input); + } + } + } } // Verify max blowup (lots of four-byte copies) TEST(Snappy, MaxBlowup) { - string input; - for (int i = 0; i < 20000; i++) { - ACMRandom rnd(i); - uint32 bytes = static_cast(rnd.Next()); - input.append(reinterpret_cast(&bytes), sizeof(bytes)); - } - for (int i = 19999; i >= 0; i--) { - ACMRandom rnd(i); - uint32 bytes = static_cast(rnd.Next()); - input.append(reinterpret_cast(&bytes), sizeof(bytes)); + std::mt19937 rng; + std::uniform_int_distribution uniform_byte(0, 255); + std::string input; + for (int i = 0; i < 80000; ++i) + input.push_back(static_cast(uniform_byte(rng))); + + for (int i = 0; i < 80000; i += 4) { + std::string four_bytes(input.end() - i - 4, input.end() - i); + input.append(four_bytes); } Verify(input); } -TEST(Snappy, RandomData) { - ACMRandom rnd(FLAGS_test_random_seed); +// Issue #201, when output is more than 4GB, we had a data corruption bug. +// We cannot run this test always because of CI constraints. +TEST(Snappy, DISABLED_MoreThan4GB) { + std::mt19937 rng; + std::uniform_int_distribution uniform_byte(0, 255); + std::string input; + input.resize((1ull << 32) - 1); + for (uint64_t i = 0; i < ((1ull << 32) - 1); ++i) + input[i] = static_cast(uniform_byte(rng)); + Verify(input); +} - const int num_ops = 20000; - for (int i = 0; i < num_ops; i++) { +TEST(Snappy, RandomData) { + std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed)); + std::uniform_int_distribution uniform_0_to_3(0, 3); + std::uniform_int_distribution uniform_0_to_8(0, 8); + std::uniform_int_distribution uniform_byte(0, 255); + std::uniform_int_distribution uniform_4k(0, 4095); + std::uniform_int_distribution uniform_64k(0, 65535); + std::bernoulli_distribution one_in_ten(1.0 / 10); + + constexpr int num_ops = 20000; + for (int i = 0; i < num_ops; ++i) { if ((i % 1000) == 0) { VLOG(0) << "Random op " << i << " of " << num_ops; } - string x; - size_t len = rnd.Uniform(4096); + std::string x; + size_t len = uniform_4k(rng); if (i < 100) { - len = 65536 + rnd.Uniform(65536); + len = 65536 + uniform_64k(rng); } while (x.size() < len) { int run_len = 1; - if (rnd.OneIn(10)) { - run_len = rnd.Skewed(8); + if (one_in_ten(rng)) { + int skewed_bits = uniform_0_to_8(rng); + // int is guaranteed to hold at least 16 bits, this uses at most 8 bits. + std::uniform_int_distribution skewed_low(0, + (1 << skewed_bits) - 1); + run_len = skewed_low(rng); + } + char c = static_cast(uniform_byte(rng)); + if (i >= 100) { + int skewed_bits = uniform_0_to_3(rng); + // int is guaranteed to hold at least 16 bits, this uses at most 3 bits. + std::uniform_int_distribution skewed_low(0, + (1 << skewed_bits) - 1); + c = static_cast(skewed_low(rng)); } - char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3); while (run_len-- > 0 && x.size() < len) { - x += c; + x.push_back(c); } } @@ -712,20 +549,20 @@ TEST(Snappy, FourByteOffset) { // copy manually. // The two fragments that make up the input string. - string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz"; - string fragment2 = "some other string"; + std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz"; + std::string fragment2 = "some other string"; // How many times each fragment is emitted. const int n1 = 2; const int n2 = 100000 / fragment2.size(); - const int length = n1 * fragment1.size() + n2 * fragment2.size(); + const size_t length = n1 * fragment1.size() + n2 * fragment2.size(); - string compressed; + std::string compressed; Varint::Append32(&compressed, length); AppendLiteral(&compressed, fragment1); - string src = fragment1; - for (int i = 0; i < n2; i++) { + std::string src = fragment1; + for (int i = 0; i < n2; ++i) { AppendLiteral(&compressed, fragment2); src += fragment2; } @@ -733,14 +570,34 @@ TEST(Snappy, FourByteOffset) { src += fragment1; CHECK_EQ(length, src.size()); - string uncompressed; + std::string uncompressed; CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed)); CHECK_EQ(uncompressed, src); } -TEST(Snappy, IOVecEdgeCases) { +TEST(Snappy, IOVecSourceEdgeCases) { + // Validate that empty leading, trailing, and in-between iovecs are handled: + // [] [] ['a'] [] ['b'] []. + std::string data = "ab"; + char* buf = const_cast(data.data()); + size_t used_so_far = 0; + static const int kLengths[] = {0, 0, 1, 0, 1, 0}; + struct iovec iov[ARRAYSIZE(kLengths)]; + for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { + iov[i].iov_base = buf + used_so_far; + iov[i].iov_len = kLengths[i]; + used_so_far += kLengths[i]; + } + std::string compressed; + snappy::CompressFromIOVec(iov, ARRAYSIZE(kLengths), &compressed); + std::string uncompressed; + snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed); + CHECK_EQ(data, uncompressed); +} + +TEST(Snappy, IOVecSinkEdgeCases) { // Test some tricky edge cases in the iovec output that are not necessarily // exercised by random tests. @@ -755,7 +612,7 @@ TEST(Snappy, IOVecEdgeCases) { iov[i].iov_len = kLengths[i]; } - string compressed; + std::string compressed; Varint::Append32(&compressed, 22); // A literal whose output crosses three blocks. @@ -816,7 +673,7 @@ TEST(Snappy, IOVecLiteralOverflow) { iov[i].iov_len = kLengths[i]; } - string compressed; + std::string compressed; Varint::Append32(&compressed, 8); AppendLiteral(&compressed, "12345678"); @@ -838,7 +695,7 @@ TEST(Snappy, IOVecCopyOverflow) { iov[i].iov_len = kLengths[i]; } - string compressed; + std::string compressed; Varint::Append32(&compressed, 8); AppendLiteral(&compressed, "123"); @@ -852,21 +709,20 @@ TEST(Snappy, IOVecCopyOverflow) { } } -static bool CheckUncompressedLength(const string& compressed, - size_t* ulength) { +bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) { const bool result1 = snappy::GetUncompressedLength(compressed.data(), compressed.size(), ulength); snappy::ByteArraySource source(compressed.data(), compressed.size()); - uint32 length; + uint32_t length; const bool result2 = snappy::GetUncompressedLength(&source, &length); CHECK_EQ(result1, result2); return result1; } TEST(SnappyCorruption, TruncatedVarint) { - string compressed, uncompressed; + std::string compressed, uncompressed; size_t ulength; compressed.push_back('\xf0'); CHECK(!CheckUncompressedLength(compressed, &ulength)); @@ -876,7 +732,7 @@ TEST(SnappyCorruption, TruncatedVarint) { } TEST(SnappyCorruption, UnterminatedVarint) { - string compressed, uncompressed; + std::string compressed, uncompressed; size_t ulength; compressed.push_back('\x80'); compressed.push_back('\x80'); @@ -891,7 +747,7 @@ TEST(SnappyCorruption, UnterminatedVarint) { } TEST(SnappyCorruption, OverflowingVarint) { - string compressed, uncompressed; + std::string compressed, uncompressed; size_t ulength; compressed.push_back('\xfb'); compressed.push_back('\xff'); @@ -908,14 +764,14 @@ TEST(Snappy, ReadPastEndOfBuffer) { // Check that we do not read past end of input // Make a compressed string that ends with a single-byte literal - string compressed; + std::string compressed; Varint::Append32(&compressed, 1); AppendLiteral(&compressed, "x"); - string uncompressed; + std::string uncompressed; DataEndingAtUnreadablePage c(compressed); CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed)); - CHECK_EQ(uncompressed, string("x")); + CHECK_EQ(uncompressed, std::string("x")); } // Check for an infinite loop caused by a copy with offset==0 @@ -934,17 +790,14 @@ TEST(Snappy, ZeroOffsetCopyValidation) { EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4)); } -namespace { - int TestFindMatchLength(const char* s1, const char *s2, unsigned length) { + uint64_t data; std::pair p = - snappy::internal::FindMatchLength(s1, s2, s2 + length); + snappy::internal::FindMatchLength(s1, s2, s2 + length, &data); CHECK_EQ(p.first < 8, p.second); return p.first; } -} // namespace - TEST(Snappy, FindMatchLength) { // Exercise all different code paths through the function. // 64-bit version: @@ -1036,35 +889,37 @@ TEST(Snappy, FindMatchLength) { } TEST(Snappy, FindMatchLengthRandom) { - const int kNumTrials = 10000; - const int kTypicalLength = 10; - ACMRandom rnd(FLAGS_test_random_seed); - - for (int i = 0; i < kNumTrials; i++) { - string s, t; - char a = rnd.Rand8(); - char b = rnd.Rand8(); - while (!rnd.OneIn(kTypicalLength)) { - s.push_back(rnd.OneIn(2) ? a : b); - t.push_back(rnd.OneIn(2) ? a : b); + constexpr int kNumTrials = 10000; + constexpr int kTypicalLength = 10; + std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed)); + std::uniform_int_distribution uniform_byte(0, 255); + std::bernoulli_distribution one_in_two(1.0 / 2); + std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength); + + for (int i = 0; i < kNumTrials; ++i) { + std::string s, t; + char a = static_cast(uniform_byte(rng)); + char b = static_cast(uniform_byte(rng)); + while (!one_in_typical_length(rng)) { + s.push_back(one_in_two(rng) ? a : b); + t.push_back(one_in_two(rng) ? a : b); } DataEndingAtUnreadablePage u(s); DataEndingAtUnreadablePage v(t); - int matched = TestFindMatchLength(u.data(), v.data(), t.size()); + size_t matched = TestFindMatchLength(u.data(), v.data(), t.size()); if (matched == t.size()) { EXPECT_EQ(s, t); } else { EXPECT_NE(s[matched], t[matched]); - for (int j = 0; j < matched; j++) { + for (size_t j = 0; j < matched; ++j) { EXPECT_EQ(s[j], t[j]); } } } } -static uint16 MakeEntry(unsigned int extra, - unsigned int len, - unsigned int copy_offset) { +uint16_t MakeEntry(unsigned int extra, unsigned int len, + unsigned int copy_offset) { // Check that all of the fields fit within the allocated space assert(extra == (extra & 0x7)); // At most 3 bits assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits @@ -1081,329 +936,88 @@ TEST(Snappy, VerifyCharTable) { using snappy::internal::COPY_4_BYTE_OFFSET; using snappy::internal::char_table; - uint16 dst[256]; + uint16_t dst[256]; // Place invalid entries in all places to detect missing initialization int assigned = 0; - for (int i = 0; i < 256; i++) { + for (int i = 0; i < 256; ++i) { dst[i] = 0xffff; } // Small LITERAL entries. We store (len-1) in the top 6 bits. - for (unsigned int len = 1; len <= 60; len++) { - dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0); + for (uint8_t len = 1; len <= 60; ++len) { + dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0); assigned++; } // Large LITERAL entries. We use 60..63 in the high 6 bits to // encode the number of bytes of length info that follow the opcode. - for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) { + for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) { // We set the length field in the lookup table to 1 because extra // bytes encode len-1. - dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0); + dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0); assigned++; } // COPY_1_BYTE_OFFSET. // // The tag byte in the compressed data stores len-4 in 3 bits, and - // offset/256 in 5 bits. offset%256 is stored in the next byte. + // offset/256 in 3 bits. offset%256 is stored in the next byte. // // This format is used for length in range [4..11] and offset in // range [0..2047] - for (unsigned int len = 4; len < 12; len++) { - for (unsigned int offset = 0; offset < 2048; offset += 256) { - dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] = - MakeEntry(1, len, offset>>8); + for (uint8_t len = 4; len < 12; ++len) { + for (uint16_t offset = 0; offset < 2048; offset += 256) { + uint8_t offset_high = static_cast(offset >> 8); + dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] = + MakeEntry(1, len, offset_high); assigned++; } } // COPY_2_BYTE_OFFSET. // Tag contains len-1 in top 6 bits, and offset in next two bytes. - for (unsigned int len = 1; len <= 64; len++) { - dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0); + for (uint8_t len = 1; len <= 64; ++len) { + dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0); assigned++; } // COPY_4_BYTE_OFFSET. // Tag contents len-1 in top 6 bits, and offset in next four bytes. - for (unsigned int len = 1; len <= 64; len++) { - dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0); + for (uint8_t len = 1; len <= 64; ++len) { + dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0); assigned++; } // Check that each entry was initialized exactly once. EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256"; - for (int i = 0; i < 256; i++) { + for (int i = 0; i < 256; ++i) { EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i; } - if (FLAGS_snappy_dump_decompression_table) { - printf("static const uint16 char_table[256] = {\n "); - for (int i = 0; i < 256; i++) { - printf("0x%04x%s", - dst[i], - ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", "))); + if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) { + std::printf("static const uint16_t char_table[256] = {\n "); + for (int i = 0; i < 256; ++i) { + std::printf("0x%04x%s", + dst[i], + ((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", "))); } - printf("};\n"); + std::printf("};\n"); } // Check that computed table matched recorded table. - for (int i = 0; i < 256; i++) { + for (int i = 0; i < 256; ++i) { EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i; } } -static void CompressFile(const char* fname) { - string fullinput; - CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); - - string compressed; - Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false); - - CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed, - file::Defaults())); -} - -static void UncompressFile(const char* fname) { - string fullinput; - CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); - - size_t uncompLength; - CHECK(CheckUncompressedLength(fullinput, &uncompLength)); - - string uncompressed; - uncompressed.resize(uncompLength); - CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed)); - - CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed, - file::Defaults())); -} - -static void MeasureFile(const char* fname) { - string fullinput; - CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); - printf("%-40s :\n", fname); - - int start_len = (FLAGS_start_len < 0) ? fullinput.size() : FLAGS_start_len; - int end_len = fullinput.size(); - if (FLAGS_end_len >= 0) { - end_len = std::min(fullinput.size(), FLAGS_end_len); - } - for (int len = start_len; len <= end_len; len++) { - const char* const input = fullinput.data(); - int repeats = (FLAGS_bytes + len) / (len + 1); - if (FLAGS_zlib) Measure(input, len, ZLIB, repeats, 1024<<10); - if (FLAGS_lzo) Measure(input, len, LZO, repeats, 1024<<10); - if (FLAGS_snappy) Measure(input, len, SNAPPY, repeats, 4096<<10); - - // For block-size based measurements - if (0 && FLAGS_snappy) { - Measure(input, len, SNAPPY, repeats, 8<<10); - Measure(input, len, SNAPPY, repeats, 16<<10); - Measure(input, len, SNAPPY, repeats, 32<<10); - Measure(input, len, SNAPPY, repeats, 64<<10); - Measure(input, len, SNAPPY, repeats, 256<<10); - Measure(input, len, SNAPPY, repeats, 1024<<10); - } - } -} - -static struct { - const char* label; - const char* filename; - size_t size_limit; -} files[] = { - { "html", "html", 0 }, - { "urls", "urls.10K", 0 }, - { "jpg", "fireworks.jpeg", 0 }, - { "jpg_200", "fireworks.jpeg", 200 }, - { "pdf", "paper-100k.pdf", 0 }, - { "html4", "html_x_4", 0 }, - { "txt1", "alice29.txt", 0 }, - { "txt2", "asyoulik.txt", 0 }, - { "txt3", "lcet10.txt", 0 }, - { "txt4", "plrabn12.txt", 0 }, - { "pb", "geo.protodata", 0 }, - { "gaviota", "kppkn.gtb", 0 }, -}; - -static void BM_UFlat(int iters, int arg) { - StopBenchmarkTiming(); - - // Pick file to process based on "arg" - CHECK_GE(arg, 0); - CHECK_LT(arg, ARRAYSIZE(files)); - string contents = ReadTestDataFile(files[arg].filename, - files[arg].size_limit); - - string zcontents; - snappy::Compress(contents.data(), contents.size(), &zcontents); - char* dst = new char[contents.size()]; - - SetBenchmarkBytesProcessed(static_cast(iters) * - static_cast(contents.size())); - SetBenchmarkLabel(files[arg].label); - StartBenchmarkTiming(); - while (iters-- > 0) { - CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst)); - } - StopBenchmarkTiming(); - - delete[] dst; -} -BENCHMARK(BM_UFlat)->DenseRange(0, ARRAYSIZE(files) - 1); - -static void BM_UValidate(int iters, int arg) { - StopBenchmarkTiming(); - - // Pick file to process based on "arg" - CHECK_GE(arg, 0); - CHECK_LT(arg, ARRAYSIZE(files)); - string contents = ReadTestDataFile(files[arg].filename, - files[arg].size_limit); - - string zcontents; - snappy::Compress(contents.data(), contents.size(), &zcontents); - - SetBenchmarkBytesProcessed(static_cast(iters) * - static_cast(contents.size())); - SetBenchmarkLabel(files[arg].label); - StartBenchmarkTiming(); - while (iters-- > 0) { - CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size())); - } - StopBenchmarkTiming(); -} -BENCHMARK(BM_UValidate)->DenseRange(0, 4); - -static void BM_UIOVec(int iters, int arg) { - StopBenchmarkTiming(); - - // Pick file to process based on "arg" - CHECK_GE(arg, 0); - CHECK_LT(arg, ARRAYSIZE(files)); - string contents = ReadTestDataFile(files[arg].filename, - files[arg].size_limit); - - string zcontents; - snappy::Compress(contents.data(), contents.size(), &zcontents); - - // Uncompress into an iovec containing ten entries. - const int kNumEntries = 10; - struct iovec iov[kNumEntries]; - char *dst = new char[contents.size()]; - int used_so_far = 0; - for (int i = 0; i < kNumEntries; ++i) { - iov[i].iov_base = dst + used_so_far; - if (used_so_far == contents.size()) { - iov[i].iov_len = 0; - continue; - } - - if (i == kNumEntries - 1) { - iov[i].iov_len = contents.size() - used_so_far; - } else { - iov[i].iov_len = contents.size() / kNumEntries; - } - used_so_far += iov[i].iov_len; - } - - SetBenchmarkBytesProcessed(static_cast(iters) * - static_cast(contents.size())); - SetBenchmarkLabel(files[arg].label); - StartBenchmarkTiming(); - while (iters-- > 0) { - CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov, - kNumEntries)); - } - StopBenchmarkTiming(); - - delete[] dst; -} -BENCHMARK(BM_UIOVec)->DenseRange(0, 4); - -static void BM_UFlatSink(int iters, int arg) { - StopBenchmarkTiming(); - - // Pick file to process based on "arg" - CHECK_GE(arg, 0); - CHECK_LT(arg, ARRAYSIZE(files)); - string contents = ReadTestDataFile(files[arg].filename, - files[arg].size_limit); - - string zcontents; - snappy::Compress(contents.data(), contents.size(), &zcontents); - char* dst = new char[contents.size()]; - - SetBenchmarkBytesProcessed(static_cast(iters) * - static_cast(contents.size())); - SetBenchmarkLabel(files[arg].label); - StartBenchmarkTiming(); - while (iters-- > 0) { - snappy::ByteArraySource source(zcontents.data(), zcontents.size()); - snappy::UncheckedByteArraySink sink(dst); - CHECK(snappy::Uncompress(&source, &sink)); +TEST(Snappy, TestBenchmarkFiles) { + for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) { + Verify(ReadTestDataFile(kTestDataFiles[i].filename, + kTestDataFiles[i].size_limit)); } - StopBenchmarkTiming(); - - string s(dst, contents.size()); - CHECK_EQ(contents, s); - - delete[] dst; } -BENCHMARK(BM_UFlatSink)->DenseRange(0, ARRAYSIZE(files) - 1); - -static void BM_ZFlat(int iters, int arg) { - StopBenchmarkTiming(); - - // Pick file to process based on "arg" - CHECK_GE(arg, 0); - CHECK_LT(arg, ARRAYSIZE(files)); - string contents = ReadTestDataFile(files[arg].filename, - files[arg].size_limit); - - char* dst = new char[snappy::MaxCompressedLength(contents.size())]; - - SetBenchmarkBytesProcessed(static_cast(iters) * - static_cast(contents.size())); - StartBenchmarkTiming(); - - size_t zsize = 0; - while (iters-- > 0) { - snappy::RawCompress(contents.data(), contents.size(), dst, &zsize); - } - StopBenchmarkTiming(); - const double compression_ratio = - static_cast(zsize) / std::max(1, contents.size()); - SetBenchmarkLabel(StringPrintf("%s (%.2f %%)", - files[arg].label, 100.0 * compression_ratio)); - VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes", - files[arg].label, contents.size(), zsize); - delete[] dst; -} -BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1); +} // namespace } // namespace snappy - -int main(int argc, char** argv) { - InitGoogle(argv[0], &argc, &argv, true); - RunSpecifiedBenchmarks(); - - if (argc >= 2) { - for (int arg = 1; arg < argc; arg++) { - if (FLAGS_write_compressed) { - snappy::CompressFile(argv[arg]); - } else if (FLAGS_write_uncompressed) { - snappy::UncompressFile(argv[arg]); - } else { - snappy::MeasureFile(argv[arg]); - } - } - return 0; - } - - return RUN_ALL_TESTS(); -} diff --git a/third_party/benchmark b/third_party/benchmark new file mode 160000 index 0000000..b20cea6 --- /dev/null +++ b/third_party/benchmark @@ -0,0 +1 @@ +Subproject commit b20cea674170b2ba45da0dfaf03953cdea473d0d diff --git a/third_party/googletest b/third_party/googletest new file mode 160000 index 0000000..b796f7d --- /dev/null +++ b/third_party/googletest @@ -0,0 +1 @@ +Subproject commit b796f7d44681514f58a683a3a71ff17c94edb0c1