Add zlib-ng support from system or built-in subproject
zlib-ng is an actively maintained and optimized fork of zlib that includles SIMD optimizations for: - Intel and AMD's SSE2/3/4, AVX (and more) - ARM's NEON, - Power ISA's Altivec, and - RISC-V's "V" Vector (RVV) extensions It's enabled by default and will either use the system lib or, when compiling an optimized build, use the built-in subproject provided CMake and Meson version 1.3.0 are available. If those aren't provided or available, then the existing zlib system lib provides the deflate functionality.
This commit is contained in:
parent
dcbadda15f
commit
550f48e49e
12 changed files with 429 additions and 28 deletions
3
contrib/benchmark_zlibs/.gitignore
vendored
Normal file
3
contrib/benchmark_zlibs/.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
build
|
||||
subprojects/zlib-ng
|
||||
|
85
contrib/benchmark_zlibs/meson.build
Normal file
85
contrib/benchmark_zlibs/meson.build
Normal file
|
@ -0,0 +1,85 @@
|
|||
project(
|
||||
'benchmark_zlibs',
|
||||
'cpp',
|
||||
license: 'GPL-2.0-or-later',
|
||||
meson_version: '>= 1.3.0',
|
||||
default_options: [
|
||||
'cpp_std=c++17',
|
||||
'buildtype=release',
|
||||
'b_ndebug=if-release',
|
||||
'b_staticpic=false',
|
||||
'b_pie=false',
|
||||
'warning_level=3',
|
||||
'zlib-ng:c_std=c11',
|
||||
],
|
||||
)
|
||||
|
||||
# zlib
|
||||
zlib_dep = dependency('zlib')
|
||||
|
||||
# zlib-ng
|
||||
cmake_bin = find_program('cmake', required: false)
|
||||
cmake_module = import('cmake', required: false)
|
||||
cmake_options = cmake_module.subproject_options()
|
||||
|
||||
zlib_ng_options = get_option('use_zlib_ng')
|
||||
zlib_ng_is_native = zlib_ng_options.contains('native')
|
||||
|
||||
zlib_ng_defines = {
|
||||
'ZLIB_COMPAT': true,
|
||||
'WITH_OPTIM': true,
|
||||
'ZLIB_BUILD_STATIC': true,
|
||||
'PIC': get_option('b_staticpic'),
|
||||
'BUILD_SHARED_LIBS': false,
|
||||
'WITH_GTEST': false,
|
||||
'ZLIB_ENABLE_TESTS': false,
|
||||
'WITH_NATIVE_INSTRUCTIONS': zlib_ng_is_native,
|
||||
'WITH_SANITIZER': get_option('b_sanitize'),
|
||||
}
|
||||
|
||||
foreach instruction_set : [
|
||||
'avx2',
|
||||
'avx512',
|
||||
'avx512vnni',
|
||||
'sse2',
|
||||
'ssse3',
|
||||
'sse42',
|
||||
'pclmulqdq',
|
||||
'vpclmulqdq',
|
||||
'acle',
|
||||
'neon',
|
||||
'armv6',
|
||||
'altivec',
|
||||
'power8',
|
||||
'rvv',
|
||||
'crc32_vx',
|
||||
'dfltcc_deflate',
|
||||
'dfltcc_inflate',
|
||||
]
|
||||
cmake_define_key = 'WITH_' + instruction_set.to_upper()
|
||||
cmake_define_value = (
|
||||
zlib_ng_is_native
|
||||
or zlib_ng_options.contains(instruction_set)
|
||||
)
|
||||
zlib_ng_defines += {cmake_define_key: cmake_define_value}
|
||||
endforeach
|
||||
|
||||
cmake_options.add_cmake_defines(zlib_ng_defines)
|
||||
|
||||
zlib_ng_subproject = cmake_module.subproject(
|
||||
'zlib-ng',
|
||||
options: cmake_options,
|
||||
)
|
||||
zlib_ng_dep = zlib_ng_subproject.get_variable('zlib_dep')
|
||||
|
||||
executable(
|
||||
'benchmark_zlib',
|
||||
'src/main.cpp',
|
||||
dependencies: zlib_dep,
|
||||
)
|
||||
|
||||
executable(
|
||||
'benchmark_zlib_ng',
|
||||
'src/main.cpp',
|
||||
dependencies: zlib_ng_dep,
|
||||
)
|
1
contrib/benchmark_zlibs/meson_options.txt
Symbolic link
1
contrib/benchmark_zlibs/meson_options.txt
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../meson_options.txt
|
112
contrib/benchmark_zlibs/src/main.cpp
Normal file
112
contrib/benchmark_zlibs/src/main.cpp
Normal file
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*
|
||||
* Copyright (C) 2023-2023 The DOSBox Staging Team
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <zlib.h>
|
||||
|
||||
using namespace std::chrono;
|
||||
|
||||
constexpr auto OneMegabyte = 1024 * 1024;
|
||||
|
||||
using data_array_t = std::array<Bytef, OneMegabyte>;
|
||||
|
||||
static data_array_t generate_easy_data_in()
|
||||
{
|
||||
data_array_t data = {};
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
data[i] = static_cast<Bytef>(i % 256);
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static void print_results(const size_t num_bytes, const microseconds elapsed_us_us)
|
||||
{
|
||||
// Calculate and print compression speed in MB/s
|
||||
const auto elapsed_us_s = elapsed_us_us.count() / 1000000.0;
|
||||
const auto speed_mb_s = static_cast<double>(num_bytes) / OneMegabyte /
|
||||
elapsed_us_s;
|
||||
printf("%7.1f MB/s\n", speed_mb_s);
|
||||
}
|
||||
|
||||
static void compress_data(data_array_t data_in)
|
||||
{
|
||||
static data_array_t data_out = {};
|
||||
|
||||
// Loop and tally counters
|
||||
constexpr auto num_rounds = 200;
|
||||
auto remaining_rounds = num_rounds;
|
||||
const auto ten_percent = num_rounds / 10;
|
||||
|
||||
auto total_bytes_compressed = 0;
|
||||
|
||||
auto elapsed_us = microseconds(0);
|
||||
|
||||
while (remaining_rounds > 0) {
|
||||
// Initialize the stream
|
||||
z_stream stream = {};
|
||||
[[maybe_unused]] auto rcode = deflateInit(&stream,
|
||||
Z_DEFAULT_COMPRESSION);
|
||||
assert(rcode == Z_OK);
|
||||
|
||||
// Configure the stream
|
||||
stream.avail_in = data_in.size();
|
||||
stream.next_in = data_in.data();
|
||||
stream.avail_out = data_out.size();
|
||||
stream.next_out = data_out.data();
|
||||
|
||||
// Compress and record elapsed
|
||||
const auto start = high_resolution_clock::now();
|
||||
rcode = deflate(&stream, Z_FINISH);
|
||||
assert(rcode == Z_STREAM_END);
|
||||
const auto end = high_resolution_clock::now();
|
||||
|
||||
// Update tallies
|
||||
--remaining_rounds;
|
||||
total_bytes_compressed += data_in.size();
|
||||
elapsed_us += duration_cast<microseconds>(end - start);
|
||||
|
||||
// Close the stream
|
||||
rcode = deflateEnd(&stream);
|
||||
assert(rcode == Z_OK);
|
||||
|
||||
// Log every ten percent done
|
||||
if (remaining_rounds % ten_percent == 0) {
|
||||
printf(".");
|
||||
}
|
||||
}
|
||||
|
||||
print_results(total_bytes_compressed, elapsed_us);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
setvbuf(stdout, nullptr, _IONBF, 0);
|
||||
|
||||
printf("easy data:");
|
||||
compress_data(generate_easy_data_in());
|
||||
|
||||
return 0;
|
||||
}
|
1
contrib/benchmark_zlibs/subprojects/zlib-ng.wrap
Symbolic link
1
contrib/benchmark_zlibs/subprojects/zlib-ng.wrap
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../subprojects/zlib-ng.wrap
|
111
meson.build
111
meson.build
|
@ -4,7 +4,7 @@ project(
|
|||
'cpp',
|
||||
version: '0.81.0-alpha',
|
||||
license: 'GPL-2.0-or-later',
|
||||
meson_version: '>= 0.57.0',
|
||||
meson_version: '>= 0.59.0',
|
||||
default_options: [
|
||||
'cpp_std=c++17',
|
||||
'buildtype=release',
|
||||
|
@ -25,6 +25,7 @@ project(
|
|||
'gtest:warning_level=0',
|
||||
'libjpeg-turbo:b_staticpic=true',
|
||||
'libpng:b_staticpic=true',
|
||||
'zlib-ng:c_std=c11',
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -139,8 +140,11 @@ else
|
|||
)
|
||||
endif
|
||||
|
||||
if get_option('buildtype') in ['release', 'minsize']
|
||||
# For release and small build types, we're not anticipating
|
||||
is_optimized_buildtype = (
|
||||
get_option('buildtype') in ['release', 'minsize', 'debugoptimized']
|
||||
)
|
||||
if is_optimized_buildtype
|
||||
# For optimized build types, we're not anticipating
|
||||
# needing debuggable floating point signals.
|
||||
# These safety measures are still enabled in debug builds,
|
||||
# so if an issue is reported where these happen help, then
|
||||
|
@ -526,13 +530,102 @@ sdl2_dep = dependency(
|
|||
include_type: 'system',
|
||||
)
|
||||
|
||||
zlib_dep = dependency(
|
||||
'zlib',
|
||||
version: ['>= 1.2.11', '< 2'],
|
||||
default_options: default_wrap_options,
|
||||
static: ('zlib' in static_libs_list or prefers_static_libs),
|
||||
include_type: 'system',
|
||||
# zlib
|
||||
# ~~~~
|
||||
zlib_dep = disabler()
|
||||
zlib_ng_options = get_option('use_zlib_ng')
|
||||
zlib_is_static = 'zlib' in static_libs_list or prefers_static_libs
|
||||
|
||||
try_system_zlib_ng = 'auto' in zlib_ng_options or 'system' in zlib_ng_options
|
||||
try_builtin_zlib_ng = 'auto' in zlib_ng_options or 'built-in' in zlib_ng_options
|
||||
|
||||
system_zlib_ng_dep = disabler()
|
||||
if try_system_zlib_ng
|
||||
system_zlib_ng_dep = dependency(
|
||||
'zlib-ng',
|
||||
required: false,
|
||||
fallback: [],
|
||||
static: zlib_is_static,
|
||||
include_type: 'system',
|
||||
)
|
||||
endif
|
||||
if (system_zlib_ng_dep.found())
|
||||
summary('zlib-ng provider', 'system library')
|
||||
conf_data.set10('C_SYSTEM_ZLIB_NG', system_zlib_ng_dep.found())
|
||||
elif (
|
||||
# Otherwise consider the built-in, which is a whole-sale replacement for zlib
|
||||
try_builtin_zlib_ng
|
||||
and is_optimized_buildtype
|
||||
and meson.version() >= '1.3.0'
|
||||
)
|
||||
cmake_bin = find_program('cmake', required: false)
|
||||
cmake_module = import('cmake', required: false)
|
||||
if cmake_bin.found() and cmake_module.found()
|
||||
cmake_options = cmake_module.subproject_options()
|
||||
zlib_ng_is_native = zlib_ng_options.contains('native')
|
||||
|
||||
zlib_ng_defines = {
|
||||
'ZLIB_COMPAT': true,
|
||||
'WITH_OPTIM': true,
|
||||
'ZLIB_BUILD_STATIC': true,
|
||||
'PIC': get_option('b_staticpic'),
|
||||
'BUILD_SHARED_LIBS': false,
|
||||
'WITH_GTEST': false,
|
||||
'ZLIB_ENABLE_TESTS': false,
|
||||
'WITH_NATIVE_INSTRUCTIONS': zlib_ng_is_native,
|
||||
'WITH_SANITIZER': get_option('b_sanitize'),
|
||||
}
|
||||
|
||||
foreach instruction_set : [
|
||||
'avx2',
|
||||
'avx512',
|
||||
'avx512vnni',
|
||||
'sse2',
|
||||
'ssse3',
|
||||
'sse42',
|
||||
'pclmulqdq',
|
||||
'vpclmulqdq',
|
||||
'acle',
|
||||
'neon',
|
||||
'armv6',
|
||||
'altivec',
|
||||
'power8',
|
||||
'rvv',
|
||||
'crc32_vx',
|
||||
'dfltcc_deflate',
|
||||
'dfltcc_inflate',
|
||||
]
|
||||
cmake_define_key = 'WITH_' + instruction_set.to_upper()
|
||||
cmake_define_value = (
|
||||
zlib_ng_is_native
|
||||
or zlib_ng_options.contains(instruction_set)
|
||||
)
|
||||
zlib_ng_defines += {cmake_define_key: cmake_define_value}
|
||||
endforeach
|
||||
|
||||
cmake_options.add_cmake_defines(zlib_ng_defines)
|
||||
|
||||
zlib_ng_subproject = cmake_module.subproject(
|
||||
'zlib-ng',
|
||||
options: cmake_options,
|
||||
)
|
||||
zlib_dep = zlib_ng_subproject.get_variable('zlib_dep')
|
||||
summary('zlib provider', 'built-in (zlib-ng)')
|
||||
endif
|
||||
endif
|
||||
|
||||
# Otherwise Use the system's zlib or fallback
|
||||
if not zlib_dep.found()
|
||||
zlib_dep = dependency(
|
||||
'zlib',
|
||||
version: ['>= 1.2.11', '< 2'],
|
||||
required: true,
|
||||
fallback: [],
|
||||
static: zlib_is_static,
|
||||
include_type: 'system',
|
||||
)
|
||||
summary('zlib provider', 'system library')
|
||||
endif
|
||||
|
||||
# SpeexDSP
|
||||
# ~~~~~~~~
|
||||
|
|
|
@ -64,7 +64,99 @@ option(
|
|||
type: 'combo',
|
||||
choices: ['auto', 'true', 'false'],
|
||||
value: 'auto',
|
||||
description: 'Let ManyMouse use the X Input 2.0 protocol.'
|
||||
description: 'Let ManyMouse use the X Input 2.0 protocol.',
|
||||
)
|
||||
|
||||
# The built-in zlib-ng is available when compiling with optimizations
|
||||
# such as when -Dbuildtype is set to 'release','minsize', or
|
||||
# 'debugoptimized'.
|
||||
#
|
||||
# You will need CMake(*) and Meson 1.3.0. If your package manager's
|
||||
# version of Meson is too old, you can install it using pip.
|
||||
#
|
||||
# If you're compiling from sources just for your own local system,
|
||||
# then you can leave this setting as-is for excellent performance.
|
||||
#
|
||||
# If you're compiling a package targeting a range of hardware
|
||||
# and you have no qualms with built-ins, then set this to the
|
||||
# most conservative set of SIMD instructions for your supported
|
||||
# range of hardware. For example, if your binary needs to run on
|
||||
# Intel's Core 2 Duo (and newer), then:
|
||||
# meson setup -Duse_zlib_ng=built-in,sse2,ssse3
|
||||
#
|
||||
# If you're a repo packager that dislikes built-ins or are
|
||||
# working under a policy that prohibits them, then use:
|
||||
# meson setup -Duse_zlib_ng=false
|
||||
#
|
||||
# Note: As zlib-ng is, itself, an optimization that adds time
|
||||
# and complexity to the build process, we therefore only use it
|
||||
# for optimized build types. Maintainers, developers, and CI
|
||||
# jobs compiling debug builds aren't burdened with this as
|
||||
# these builds aren't concerned with maximizing performance.
|
||||
#
|
||||
# (*) A Meson project that depends on CMake!? Well, zlib-ng
|
||||
# is a CMake project and thankfully Meson has a module
|
||||
# that can both configure it build it. To eliminate this
|
||||
# dependency on CMake, feel free to contribute a wrap
|
||||
# for zlib-ng here: https://github.com/mesonbuild/wrapdb
|
||||
#
|
||||
option(
|
||||
'use_zlib_ng',
|
||||
type: 'array',
|
||||
choices: [
|
||||
# Auto (default) try zlib-ng from the system
|
||||
# and then built-in, in that order.
|
||||
'auto',
|
||||
|
||||
# Only try using zlib-ng from the system.
|
||||
'system',
|
||||
|
||||
# Only try using zlib-ng from built-in.
|
||||
'built-in',
|
||||
|
||||
# Disable zlib-ng; use good old zlib.
|
||||
'false',
|
||||
|
||||
# Native (default) enables those instruction sets supported
|
||||
# by Meson's build-target machine, which is often the local
|
||||
# system (unless you're cross-compiling).
|
||||
# Only guaranteed when the built-in is used.
|
||||
'native',
|
||||
|
||||
# Enable x86 and x86-64 instruction sets.
|
||||
# Only guaranteed when the built-in is used.
|
||||
'avx2',
|
||||
'avx512',
|
||||
'avx512vnni',
|
||||
'sse2',
|
||||
'ssse3',
|
||||
'sse42',
|
||||
'pclmulqdq',
|
||||
'vpclmulqdq',
|
||||
|
||||
# Enable Arm instruction sets.
|
||||
# Only guaranteed when the built-in is used.
|
||||
'acle',
|
||||
'neon',
|
||||
'armv6',
|
||||
|
||||
# Enable POWER and PowerPC instruction sets.
|
||||
# Only guaranteed when the built-in is used.
|
||||
'altivec',
|
||||
'power8',
|
||||
|
||||
# Enable the RISC-V "V" (vector) instruction set.
|
||||
# Only guaranteed when the built-in is used.
|
||||
'rvv',
|
||||
|
||||
# Enable the IBM Z instruction sets.
|
||||
# Only guaranteed when the built-in is used.
|
||||
'crc32_vx',
|
||||
'dfltcc_deflate',
|
||||
'dfltcc_inflate',
|
||||
],
|
||||
value: ['auto', 'native'],
|
||||
description: 'Enable zlib-ng either from the system or built-in',
|
||||
)
|
||||
|
||||
option(
|
||||
|
@ -84,9 +176,10 @@ option(
|
|||
)
|
||||
|
||||
option(
|
||||
'pagesize', type : 'integer',
|
||||
value : 0,
|
||||
description: 'Set host memory pagesize in bytes (skip detection)'
|
||||
'pagesize',
|
||||
type: 'integer',
|
||||
value: 0,
|
||||
description: 'Set host memory pagesize in bytes (skip detection)',
|
||||
)
|
||||
|
||||
# Per-page write-or-execute (W^X) permissions
|
||||
|
@ -124,7 +217,7 @@ option(
|
|||
'per_page_w_or_x',
|
||||
type: 'feature',
|
||||
value: 'auto',
|
||||
description: 'Flag dynamic core memory write-or-execute (W^X) per-page.'
|
||||
description: 'Flag dynamic core memory write-or-execute (W^X) per-page.',
|
||||
)
|
||||
|
||||
# Use this option for selectively switching dependencies to look for static
|
||||
|
|
|
@ -172,6 +172,9 @@
|
|||
* between operating systems.
|
||||
*/
|
||||
|
||||
// Define to 1 when zlib-ng support is provided by the system
|
||||
#mesondefine C_SYSTEM_ZLIB_NG
|
||||
|
||||
// Defined if function clock_gettime is available
|
||||
#mesondefine HAVE_CLOCK_GETTIME
|
||||
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
zlib_or_ng_dep = system_zlib_ng_dep.found() ? system_zlib_ng_dep : zlib_dep
|
||||
|
||||
libzmbv = static_library(
|
||||
'zmbv',
|
||||
'zmbv.cpp',
|
||||
include_directories: incdir,
|
||||
dependencies: [libmisc_dep, zlib_dep]
|
||||
dependencies: [libmisc_dep, zlib_or_ng_dep]
|
||||
)
|
||||
|
||||
libzmbv_dep = declare_dependency(link_with: libzmbv)
|
||||
|
|
|
@ -22,7 +22,21 @@
|
|||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if defined(C_SYSTEM_ZLIB_NG)
|
||||
#include <zlib-ng.h>
|
||||
#define deflateInit2 zng_deflateInit2
|
||||
#define deflateReset zng_deflateReset
|
||||
#define deflate zng_deflate
|
||||
#define deflateEnd zng_deflateEnd
|
||||
#define inflateInit zng_inflateInit
|
||||
#define inflateReset zng_inflateReset
|
||||
#define inflate zng_inflate
|
||||
#define z_stream zng_stream
|
||||
#else
|
||||
#include <zlib.h>
|
||||
#endif
|
||||
|
||||
#define CODEC_4CC "ZMBV"
|
||||
|
||||
|
|
7
subprojects/zlib-ng.wrap
Normal file
7
subprojects/zlib-ng.wrap
Normal file
|
@ -0,0 +1,7 @@
|
|||
[wrap-git]
|
||||
url = https://github.com/zlib-ng/zlib-ng.git
|
||||
revision = 2.1.5
|
||||
method = cmake
|
||||
|
||||
[provide]
|
||||
zlib-ng = zlib_dep
|
|
@ -1,13 +0,0 @@
|
|||
[wrap-file]
|
||||
directory = zlib-1.3
|
||||
source_url = http://zlib.net/fossils/zlib-1.3.tar.gz
|
||||
source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/zlib_1.3-3/zlib-1.3.tar.gz
|
||||
source_filename = zlib-1.3.tar.gz
|
||||
source_hash = ff0ba4c292013dbc27530b3a81e1f9a813cd39de01ca5e0f8bf355702efa593e
|
||||
patch_filename = zlib_1.3-3_patch.zip
|
||||
patch_url = https://wrapdb.mesonbuild.com/v2/zlib_1.3-3/get_patch
|
||||
patch_hash = dcb86003e945761dc47aed34b6179003c5e68ddbca4cf71ebc5875ae57b76b8e
|
||||
wrapdb_version = 1.3-3
|
||||
|
||||
[provide]
|
||||
zlib = zlib_dep
|
Loading…
Add table
Add a link
Reference in a new issue