Add zlib-ng support from system or built-in subproject

zlib-ng is an actively maintained and optimized fork of zlib
that includles SIMD optimizations for:
- Intel and AMD's SSE2/3/4, AVX (and more)
- ARM's NEON,
- Power ISA's Altivec, and
- RISC-V's "V" Vector (RVV) extensions

It's enabled by default and will either use the system lib
or, when compiling an optimized build, use the built-in
subproject provided CMake and Meson version 1.3.0 are available.

If those aren't provided or available, then the existing zlib
system lib provides the deflate functionality.
This commit is contained in:
kcgen 2023-12-12 23:50:14 -08:00 committed by kcgen
parent dcbadda15f
commit 550f48e49e
12 changed files with 429 additions and 28 deletions

3
contrib/benchmark_zlibs/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
build
subprojects/zlib-ng

View file

@ -0,0 +1,85 @@
project(
'benchmark_zlibs',
'cpp',
license: 'GPL-2.0-or-later',
meson_version: '>= 1.3.0',
default_options: [
'cpp_std=c++17',
'buildtype=release',
'b_ndebug=if-release',
'b_staticpic=false',
'b_pie=false',
'warning_level=3',
'zlib-ng:c_std=c11',
],
)
# zlib
zlib_dep = dependency('zlib')
# zlib-ng
cmake_bin = find_program('cmake', required: false)
cmake_module = import('cmake', required: false)
cmake_options = cmake_module.subproject_options()
zlib_ng_options = get_option('use_zlib_ng')
zlib_ng_is_native = zlib_ng_options.contains('native')
zlib_ng_defines = {
'ZLIB_COMPAT': true,
'WITH_OPTIM': true,
'ZLIB_BUILD_STATIC': true,
'PIC': get_option('b_staticpic'),
'BUILD_SHARED_LIBS': false,
'WITH_GTEST': false,
'ZLIB_ENABLE_TESTS': false,
'WITH_NATIVE_INSTRUCTIONS': zlib_ng_is_native,
'WITH_SANITIZER': get_option('b_sanitize'),
}
foreach instruction_set : [
'avx2',
'avx512',
'avx512vnni',
'sse2',
'ssse3',
'sse42',
'pclmulqdq',
'vpclmulqdq',
'acle',
'neon',
'armv6',
'altivec',
'power8',
'rvv',
'crc32_vx',
'dfltcc_deflate',
'dfltcc_inflate',
]
cmake_define_key = 'WITH_' + instruction_set.to_upper()
cmake_define_value = (
zlib_ng_is_native
or zlib_ng_options.contains(instruction_set)
)
zlib_ng_defines += {cmake_define_key: cmake_define_value}
endforeach
cmake_options.add_cmake_defines(zlib_ng_defines)
zlib_ng_subproject = cmake_module.subproject(
'zlib-ng',
options: cmake_options,
)
zlib_ng_dep = zlib_ng_subproject.get_variable('zlib_dep')
executable(
'benchmark_zlib',
'src/main.cpp',
dependencies: zlib_dep,
)
executable(
'benchmark_zlib_ng',
'src/main.cpp',
dependencies: zlib_ng_dep,
)

View file

@ -0,0 +1 @@
../../meson_options.txt

View file

@ -0,0 +1,112 @@
/*
* SPDX-License-Identifier: GPL-3.0-or-later
*
* Copyright (C) 2023-2023 The DOSBox Staging Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <array>
#include <cassert>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <zlib.h>
using namespace std::chrono;
constexpr auto OneMegabyte = 1024 * 1024;
using data_array_t = std::array<Bytef, OneMegabyte>;
static data_array_t generate_easy_data_in()
{
data_array_t data = {};
for (size_t i = 0; i < data.size(); ++i) {
data[i] = static_cast<Bytef>(i % 256);
}
return data;
}
static void print_results(const size_t num_bytes, const microseconds elapsed_us_us)
{
// Calculate and print compression speed in MB/s
const auto elapsed_us_s = elapsed_us_us.count() / 1000000.0;
const auto speed_mb_s = static_cast<double>(num_bytes) / OneMegabyte /
elapsed_us_s;
printf("%7.1f MB/s\n", speed_mb_s);
}
static void compress_data(data_array_t data_in)
{
static data_array_t data_out = {};
// Loop and tally counters
constexpr auto num_rounds = 200;
auto remaining_rounds = num_rounds;
const auto ten_percent = num_rounds / 10;
auto total_bytes_compressed = 0;
auto elapsed_us = microseconds(0);
while (remaining_rounds > 0) {
// Initialize the stream
z_stream stream = {};
[[maybe_unused]] auto rcode = deflateInit(&stream,
Z_DEFAULT_COMPRESSION);
assert(rcode == Z_OK);
// Configure the stream
stream.avail_in = data_in.size();
stream.next_in = data_in.data();
stream.avail_out = data_out.size();
stream.next_out = data_out.data();
// Compress and record elapsed
const auto start = high_resolution_clock::now();
rcode = deflate(&stream, Z_FINISH);
assert(rcode == Z_STREAM_END);
const auto end = high_resolution_clock::now();
// Update tallies
--remaining_rounds;
total_bytes_compressed += data_in.size();
elapsed_us += duration_cast<microseconds>(end - start);
// Close the stream
rcode = deflateEnd(&stream);
assert(rcode == Z_OK);
// Log every ten percent done
if (remaining_rounds % ten_percent == 0) {
printf(".");
}
}
print_results(total_bytes_compressed, elapsed_us);
}
int main()
{
setvbuf(stdout, nullptr, _IONBF, 0);
printf("easy data:");
compress_data(generate_easy_data_in());
return 0;
}

View file

@ -0,0 +1 @@
../../../subprojects/zlib-ng.wrap

View file

@ -4,7 +4,7 @@ project(
'cpp',
version: '0.81.0-alpha',
license: 'GPL-2.0-or-later',
meson_version: '>= 0.57.0',
meson_version: '>= 0.59.0',
default_options: [
'cpp_std=c++17',
'buildtype=release',
@ -25,6 +25,7 @@ project(
'gtest:warning_level=0',
'libjpeg-turbo:b_staticpic=true',
'libpng:b_staticpic=true',
'zlib-ng:c_std=c11',
],
)
@ -139,8 +140,11 @@ else
)
endif
if get_option('buildtype') in ['release', 'minsize']
# For release and small build types, we're not anticipating
is_optimized_buildtype = (
get_option('buildtype') in ['release', 'minsize', 'debugoptimized']
)
if is_optimized_buildtype
# For optimized build types, we're not anticipating
# needing debuggable floating point signals.
# These safety measures are still enabled in debug builds,
# so if an issue is reported where these happen help, then
@ -526,13 +530,102 @@ sdl2_dep = dependency(
include_type: 'system',
)
zlib_dep = dependency(
'zlib',
version: ['>= 1.2.11', '< 2'],
default_options: default_wrap_options,
static: ('zlib' in static_libs_list or prefers_static_libs),
include_type: 'system',
# zlib
# ~~~~
zlib_dep = disabler()
zlib_ng_options = get_option('use_zlib_ng')
zlib_is_static = 'zlib' in static_libs_list or prefers_static_libs
try_system_zlib_ng = 'auto' in zlib_ng_options or 'system' in zlib_ng_options
try_builtin_zlib_ng = 'auto' in zlib_ng_options or 'built-in' in zlib_ng_options
system_zlib_ng_dep = disabler()
if try_system_zlib_ng
system_zlib_ng_dep = dependency(
'zlib-ng',
required: false,
fallback: [],
static: zlib_is_static,
include_type: 'system',
)
endif
if (system_zlib_ng_dep.found())
summary('zlib-ng provider', 'system library')
conf_data.set10('C_SYSTEM_ZLIB_NG', system_zlib_ng_dep.found())
elif (
# Otherwise consider the built-in, which is a whole-sale replacement for zlib
try_builtin_zlib_ng
and is_optimized_buildtype
and meson.version() >= '1.3.0'
)
cmake_bin = find_program('cmake', required: false)
cmake_module = import('cmake', required: false)
if cmake_bin.found() and cmake_module.found()
cmake_options = cmake_module.subproject_options()
zlib_ng_is_native = zlib_ng_options.contains('native')
zlib_ng_defines = {
'ZLIB_COMPAT': true,
'WITH_OPTIM': true,
'ZLIB_BUILD_STATIC': true,
'PIC': get_option('b_staticpic'),
'BUILD_SHARED_LIBS': false,
'WITH_GTEST': false,
'ZLIB_ENABLE_TESTS': false,
'WITH_NATIVE_INSTRUCTIONS': zlib_ng_is_native,
'WITH_SANITIZER': get_option('b_sanitize'),
}
foreach instruction_set : [
'avx2',
'avx512',
'avx512vnni',
'sse2',
'ssse3',
'sse42',
'pclmulqdq',
'vpclmulqdq',
'acle',
'neon',
'armv6',
'altivec',
'power8',
'rvv',
'crc32_vx',
'dfltcc_deflate',
'dfltcc_inflate',
]
cmake_define_key = 'WITH_' + instruction_set.to_upper()
cmake_define_value = (
zlib_ng_is_native
or zlib_ng_options.contains(instruction_set)
)
zlib_ng_defines += {cmake_define_key: cmake_define_value}
endforeach
cmake_options.add_cmake_defines(zlib_ng_defines)
zlib_ng_subproject = cmake_module.subproject(
'zlib-ng',
options: cmake_options,
)
zlib_dep = zlib_ng_subproject.get_variable('zlib_dep')
summary('zlib provider', 'built-in (zlib-ng)')
endif
endif
# Otherwise Use the system's zlib or fallback
if not zlib_dep.found()
zlib_dep = dependency(
'zlib',
version: ['>= 1.2.11', '< 2'],
required: true,
fallback: [],
static: zlib_is_static,
include_type: 'system',
)
summary('zlib provider', 'system library')
endif
# SpeexDSP
# ~~~~~~~~

View file

@ -64,7 +64,99 @@ option(
type: 'combo',
choices: ['auto', 'true', 'false'],
value: 'auto',
description: 'Let ManyMouse use the X Input 2.0 protocol.'
description: 'Let ManyMouse use the X Input 2.0 protocol.',
)
# The built-in zlib-ng is available when compiling with optimizations
# such as when -Dbuildtype is set to 'release','minsize', or
# 'debugoptimized'.
#
# You will need CMake(*) and Meson 1.3.0. If your package manager's
# version of Meson is too old, you can install it using pip.
#
# If you're compiling from sources just for your own local system,
# then you can leave this setting as-is for excellent performance.
#
# If you're compiling a package targeting a range of hardware
# and you have no qualms with built-ins, then set this to the
# most conservative set of SIMD instructions for your supported
# range of hardware. For example, if your binary needs to run on
# Intel's Core 2 Duo (and newer), then:
# meson setup -Duse_zlib_ng=built-in,sse2,ssse3
#
# If you're a repo packager that dislikes built-ins or are
# working under a policy that prohibits them, then use:
# meson setup -Duse_zlib_ng=false
#
# Note: As zlib-ng is, itself, an optimization that adds time
# and complexity to the build process, we therefore only use it
# for optimized build types. Maintainers, developers, and CI
# jobs compiling debug builds aren't burdened with this as
# these builds aren't concerned with maximizing performance.
#
# (*) A Meson project that depends on CMake!? Well, zlib-ng
# is a CMake project and thankfully Meson has a module
# that can both configure it build it. To eliminate this
# dependency on CMake, feel free to contribute a wrap
# for zlib-ng here: https://github.com/mesonbuild/wrapdb
#
option(
'use_zlib_ng',
type: 'array',
choices: [
# Auto (default) try zlib-ng from the system
# and then built-in, in that order.
'auto',
# Only try using zlib-ng from the system.
'system',
# Only try using zlib-ng from built-in.
'built-in',
# Disable zlib-ng; use good old zlib.
'false',
# Native (default) enables those instruction sets supported
# by Meson's build-target machine, which is often the local
# system (unless you're cross-compiling).
# Only guaranteed when the built-in is used.
'native',
# Enable x86 and x86-64 instruction sets.
# Only guaranteed when the built-in is used.
'avx2',
'avx512',
'avx512vnni',
'sse2',
'ssse3',
'sse42',
'pclmulqdq',
'vpclmulqdq',
# Enable Arm instruction sets.
# Only guaranteed when the built-in is used.
'acle',
'neon',
'armv6',
# Enable POWER and PowerPC instruction sets.
# Only guaranteed when the built-in is used.
'altivec',
'power8',
# Enable the RISC-V "V" (vector) instruction set.
# Only guaranteed when the built-in is used.
'rvv',
# Enable the IBM Z instruction sets.
# Only guaranteed when the built-in is used.
'crc32_vx',
'dfltcc_deflate',
'dfltcc_inflate',
],
value: ['auto', 'native'],
description: 'Enable zlib-ng either from the system or built-in',
)
option(
@ -84,9 +176,10 @@ option(
)
option(
'pagesize', type : 'integer',
value : 0,
description: 'Set host memory pagesize in bytes (skip detection)'
'pagesize',
type: 'integer',
value: 0,
description: 'Set host memory pagesize in bytes (skip detection)',
)
# Per-page write-or-execute (W^X) permissions
@ -124,7 +217,7 @@ option(
'per_page_w_or_x',
type: 'feature',
value: 'auto',
description: 'Flag dynamic core memory write-or-execute (W^X) per-page.'
description: 'Flag dynamic core memory write-or-execute (W^X) per-page.',
)
# Use this option for selectively switching dependencies to look for static

View file

@ -172,6 +172,9 @@
* between operating systems.
*/
// Define to 1 when zlib-ng support is provided by the system
#mesondefine C_SYSTEM_ZLIB_NG
// Defined if function clock_gettime is available
#mesondefine HAVE_CLOCK_GETTIME

View file

@ -1,8 +1,10 @@
zlib_or_ng_dep = system_zlib_ng_dep.found() ? system_zlib_ng_dep : zlib_dep
libzmbv = static_library(
'zmbv',
'zmbv.cpp',
include_directories: incdir,
dependencies: [libmisc_dep, zlib_dep]
dependencies: [libmisc_dep, zlib_or_ng_dep]
)
libzmbv_dep = declare_dependency(link_with: libzmbv)

View file

@ -22,7 +22,21 @@
#include <cstdint>
#include <vector>
#include "config.h"
#if defined(C_SYSTEM_ZLIB_NG)
#include <zlib-ng.h>
#define deflateInit2 zng_deflateInit2
#define deflateReset zng_deflateReset
#define deflate zng_deflate
#define deflateEnd zng_deflateEnd
#define inflateInit zng_inflateInit
#define inflateReset zng_inflateReset
#define inflate zng_inflate
#define z_stream zng_stream
#else
#include <zlib.h>
#endif
#define CODEC_4CC "ZMBV"

7
subprojects/zlib-ng.wrap Normal file
View file

@ -0,0 +1,7 @@
[wrap-git]
url = https://github.com/zlib-ng/zlib-ng.git
revision = 2.1.5
method = cmake
[provide]
zlib-ng = zlib_dep

View file

@ -1,13 +0,0 @@
[wrap-file]
directory = zlib-1.3
source_url = http://zlib.net/fossils/zlib-1.3.tar.gz
source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/zlib_1.3-3/zlib-1.3.tar.gz
source_filename = zlib-1.3.tar.gz
source_hash = ff0ba4c292013dbc27530b3a81e1f9a813cd39de01ca5e0f8bf355702efa593e
patch_filename = zlib_1.3-3_patch.zip
patch_url = https://wrapdb.mesonbuild.com/v2/zlib_1.3-3/get_patch
patch_hash = dcb86003e945761dc47aed34b6179003c5e68ddbca4cf71ebc5875ae57b76b8e
wrapdb_version = 1.3-3
[provide]
zlib = zlib_dep