/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025
/* clang-format off */

#define xMDBX_ALLOY 1  /* alloyed build */

#define MDBX_BUILD_SOURCERY 8916c04a1d0598afd3f4e15336ada8cc6684b27d706e5f1ddb4a870da3d86f91_v0_13_10_0_gcc5debac

#define LIBMDBX_INTERNALS
#define MDBX_DEPRECATED

#ifdef MDBX_CONFIG_H
#include MDBX_CONFIG_H
#endif

/* Undefine the NDEBUG if debugging is enforced by MDBX_DEBUG */
#if (defined(MDBX_DEBUG) && MDBX_DEBUG > 0) || (defined(MDBX_FORCE_ASSERTIONS) && MDBX_FORCE_ASSERTIONS)
#undef NDEBUG
#ifndef MDBX_DEBUG
/* Чтобы избежать включения отладки только из-за включения assert-проверок */
#define MDBX_DEBUG 0
#endif
#endif

/*----------------------------------------------------------------------------*/

/** Disables using GNU/Linux libc extensions.
 * \ingroup build_option
 * \note This option couldn't be moved to the options.h since dependent
 * control macros/defined should be prepared before include the options.h */
#ifndef MDBX_DISABLE_GNU_SOURCE
#define MDBX_DISABLE_GNU_SOURCE 0
#endif
#if MDBX_DISABLE_GNU_SOURCE
#undef _GNU_SOURCE
#elif (defined(__linux__) || defined(__gnu_linux__)) && !defined(_GNU_SOURCE)
#define _GNU_SOURCE
#endif /* MDBX_DISABLE_GNU_SOURCE */

/* Should be defined before any includes */
#if !defined(_FILE_OFFSET_BITS) && !defined(__ANDROID_API__) && !defined(ANDROID)
#define _FILE_OFFSET_BITS 64
#endif /* _FILE_OFFSET_BITS */

#if defined(__APPLE__) && !defined(_DARWIN_C_SOURCE)
#define _DARWIN_C_SOURCE
#endif /* _DARWIN_C_SOURCE */

#if (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)) && !defined(__USE_MINGW_ANSI_STDIO)
#define __USE_MINGW_ANSI_STDIO 1
#endif /* MinGW */

#if defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS)

#ifndef _WIN32_WINNT
#define _WIN32_WINNT 0x0601 /* Windows 7 */
#endif                      /* _WIN32_WINNT */

#if !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif /* _CRT_SECURE_NO_WARNINGS */
#if !defined(UNICODE)
#define UNICODE
#endif /* UNICODE */

#if !defined(_NO_CRT_STDIO_INLINE) && MDBX_BUILD_SHARED_LIBRARY && !defined(xMDBX_TOOLS) && MDBX_WITHOUT_MSVC_CRT
#define _NO_CRT_STDIO_INLINE
#endif /* _NO_CRT_STDIO_INLINE */

#elif !defined(_POSIX_C_SOURCE)
#define _POSIX_C_SOURCE 200809L
#endif /* Windows */

#ifdef __cplusplus

#ifndef NOMINMAX
#define NOMINMAX
#endif /* NOMINMAX */

/* Workaround for modern libstdc++ with CLANG < 4.x */
#if defined(__SIZEOF_INT128__) && !defined(__GLIBCXX_TYPE_INT_N_0) && defined(__clang__) && __clang_major__ < 4
#define __GLIBCXX_BITSIZE_INT_N_0 128
#define __GLIBCXX_TYPE_INT_N_0 __int128
#endif /* Workaround for modern libstdc++ with CLANG < 4.x */

#ifdef _MSC_VER
/* Workaround for MSVC' header `extern "C"` vs `std::` redefinition bug */
#if defined(__SANITIZE_ADDRESS__) && !defined(_DISABLE_VECTOR_ANNOTATION)
#define _DISABLE_VECTOR_ANNOTATION
#endif /* _DISABLE_VECTOR_ANNOTATION */
#endif /* _MSC_VER */

#endif /* __cplusplus */

#ifdef _MSC_VER
#if _MSC_FULL_VER < 190024234
/* Actually libmdbx was not tested with compilers older than 19.00.24234 (Visual
 * Studio 2015 Update 3). But you could remove this #error and try to continue
 * at your own risk. In such case please don't rise up an issues related ONLY to
 * old compilers.
 *
 * NOTE:
 *   Unfortunately, there are several different builds of "Visual Studio" that
 *   are called "Visual Studio 2015 Update 3".
 *
 *   The 190024234 is used here because it is minimal version of Visual Studio
 *   that was used for build and testing libmdbx in recent years. Soon this
 *   value will be increased to 19.0.24241.7, since build and testing using
 *   "Visual Studio 2015" will be performed only at https://ci.appveyor.com.
 *
 *   Please ask Microsoft (but not us) for information about version differences
 *   and how to and where you can obtain the latest "Visual Studio 2015" build
 *   with all fixes.
 */
#error "At least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required."
#endif
#if _MSC_VER > 1800
#pragma warning(disable : 4464) /* relative include path contains '..' */
#endif
#if _MSC_VER > 1913
#pragma warning(disable : 5045) /* will insert Spectre mitigation... */
#endif
#if _MSC_VER > 1914
#pragma warning(disable : 5105) /* winbase.h(9531): warning C5105: macro expansion                                     \
                                   producing 'defined' has undefined behavior */
#endif
#if _MSC_VER < 1920
/* avoid "error C2219: syntax error: type qualifier must be after '*'" */
#define __restrict
#endif
#if _MSC_VER > 1930
#pragma warning(disable : 6235) /* <expression> is always a constant */
#pragma warning(disable : 6237) /* <expression> is never evaluated and might                                           \
                                   have side effects */
#pragma warning(disable : 5286) /* implicit conversion from enum type 'type 1' to enum type 'type 2' */
#pragma warning(disable : 5287) /* operands are different enum types 'type 1' and 'type 2' */
#endif
#pragma warning(disable : 4710) /* 'xyz': function not inlined */
#pragma warning(disable : 4711) /* function 'xyz' selected for automatic                                               \
                                   inline expansion */
#pragma warning(disable : 4201) /* nonstandard extension used: nameless                                                \
                                   struct/union */
#pragma warning(disable : 4702) /* unreachable code */
#pragma warning(disable : 4706) /* assignment within conditional expression */
#pragma warning(disable : 4127) /* conditional expression is constant */
#pragma warning(disable : 4324) /* 'xyz': structure was padded due to                                                  \
                                   alignment specifier */
#pragma warning(disable : 4310) /* cast truncates constant value */
#pragma warning(disable : 4820) /* bytes padding added after data member for                                           \
                                   alignment */
#pragma warning(disable : 4548) /* expression before comma has no effect;                                              \
                                   expected expression with side - effect */
#pragma warning(disable : 4366) /* the result of the unary '&' operator may be                                         \
                                   unaligned */
#pragma warning(disable : 4200) /* nonstandard extension used: zero-sized                                              \
                                   array in struct/union */
#pragma warning(disable : 4204) /* nonstandard extension used: non-constant                                            \
                                   aggregate initializer */
#pragma warning(disable : 4505) /* unreferenced local function has been removed */
#endif                          /* _MSC_VER (warnings) */

#if defined(__GNUC__) && __GNUC__ < 9
#pragma GCC diagnostic ignored "-Wattributes"
#endif /* GCC < 9 */

/*----------------------------------------------------------------------------*/
/* Microsoft compiler generates a lot of warning for self includes... */

#ifdef _MSC_VER
#pragma warning(push, 1)
#pragma warning(disable : 4548) /* expression before comma has no effect;                                              \
                                   expected expression with side - effect */
#pragma warning(disable : 4530) /* C++ exception handler used, but unwind                                              \
                                 * semantics are not enabled. Specify /EHsc */
#pragma warning(disable : 4577) /* 'noexcept' used with no exception handling                                          \
                                 * mode specified; termination on exception is                                         \
                                 * not guaranteed. Specify /EHsc */
#endif                          /* _MSC_VER (warnings) */

/*----------------------------------------------------------------------------*/
/* basic C99 includes */

#include <inttypes.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>

#include <assert.h>
#include <ctype.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <time.h>

/*----------------------------------------------------------------------------*/
/* feature testing */

#ifndef __has_warning
#define __has_warning(x) (0)
#endif

#ifndef __has_include
#define __has_include(x) (0)
#endif

#ifndef __has_attribute
#define __has_attribute(x) (0)
#endif

#ifndef __has_cpp_attribute
#define __has_cpp_attribute(x) 0
#endif

#ifndef __has_feature
#define __has_feature(x) (0)
#endif

#ifndef __has_extension
#define __has_extension(x) (0)
#endif

#ifndef __has_builtin
#define __has_builtin(x) (0)
#endif

#if __has_feature(thread_sanitizer)
#define __SANITIZE_THREAD__ 1
#endif

#if __has_feature(address_sanitizer)
#define __SANITIZE_ADDRESS__ 1
#endif

#ifndef __GNUC_PREREQ
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
#define __GNUC_PREREQ(maj, min) ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
#else
#define __GNUC_PREREQ(maj, min) (0)
#endif
#endif /* __GNUC_PREREQ */

#ifndef __CLANG_PREREQ
#ifdef __clang__
#define __CLANG_PREREQ(maj, min) ((__clang_major__ << 16) + __clang_minor__ >= ((maj) << 16) + (min))
#else
#define __CLANG_PREREQ(maj, min) (0)
#endif
#endif /* __CLANG_PREREQ */

#ifndef __GLIBC_PREREQ
#if defined(__GLIBC__) && defined(__GLIBC_MINOR__)
#define __GLIBC_PREREQ(maj, min) ((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min))
#else
#define __GLIBC_PREREQ(maj, min) (0)
#endif
#endif /* __GLIBC_PREREQ */

/*----------------------------------------------------------------------------*/
/* pre-requirements */

#if (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF
#error "Sanity checking failed: Two's complement, reasonably sized integer types"
#endif

#ifndef SSIZE_MAX
#define SSIZE_MAX INTPTR_MAX
#endif

#if defined(__GNUC__) && !__GNUC_PREREQ(4, 2)
/* Actually libmdbx was not tested with compilers older than GCC 4.2.
 * But you could ignore this warning at your own risk.
 * In such case please don't rise up an issues related ONLY to old compilers.
 */
#warning "libmdbx required GCC >= 4.2"
#endif

#if defined(__clang__) && !__CLANG_PREREQ(3, 8)
/* Actually libmdbx was not tested with CLANG older than 3.8.
 * But you could ignore this warning at your own risk.
 * In such case please don't rise up an issues related ONLY to old compilers.
 */
#warning "libmdbx required CLANG >= 3.8"
#endif

#if defined(__GLIBC__) && !__GLIBC_PREREQ(2, 12)
/* Actually libmdbx was not tested with something older than glibc 2.12.
 * But you could ignore this warning at your own risk.
 * In such case please don't rise up an issues related ONLY to old systems.
 */
#warning "libmdbx was only tested with GLIBC >= 2.12."
#endif

#ifdef __SANITIZE_THREAD__
#warning "libmdbx don't compatible with ThreadSanitizer, you will get a lot of false-positive issues."
#endif /* __SANITIZE_THREAD__ */

/*----------------------------------------------------------------------------*/
/* C11' alignas() */

#if __has_include(<stdalign.h>)
#include <stdalign.h>
#endif
#if defined(alignas) || defined(__cplusplus)
#define MDBX_ALIGNAS(N) alignas(N)
#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
#define MDBX_ALIGNAS(N) _Alignas(N)
#elif defined(_MSC_VER)
#define MDBX_ALIGNAS(N) __declspec(align(N))
#elif __has_attribute(__aligned__) || defined(__GNUC__)
#define MDBX_ALIGNAS(N) __attribute__((__aligned__(N)))
#else
#error "FIXME: Required alignas() or equivalent."
#endif /* MDBX_ALIGNAS */

/*----------------------------------------------------------------------------*/
/* Systems macros and includes */

#ifndef __extern_C
#ifdef __cplusplus
#define __extern_C extern "C"
#else
#define __extern_C
#endif
#endif /* __extern_C */

#if !defined(nullptr) && !defined(__cplusplus) || (__cplusplus < 201103L && !defined(_MSC_VER))
#define nullptr NULL
#endif

#if defined(__APPLE__) || defined(_DARWIN_C_SOURCE)
#include <AvailabilityMacros.h>
#include <TargetConditionals.h>
#ifndef MAC_OS_X_VERSION_MIN_REQUIRED
#define MAC_OS_X_VERSION_MIN_REQUIRED 1070 /* Mac OS X 10.7, 2011 */
#endif
#endif /* Apple OSX & iOS */

#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__BSD__) || defined(__bsdi__) ||    \
    defined(__DragonFly__) || defined(__APPLE__) || defined(__MACH__)
#include <sys/cdefs.h>
#include <sys/mount.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#if defined(__FreeBSD__) || defined(__DragonFly__)
#include <vm/vm_param.h>
#elif defined(__OpenBSD__) || defined(__NetBSD__)
#include <uvm/uvm_param.h>
#else
#define SYSCTL_LEGACY_NONCONST_MIB
#endif
#ifndef __MACH__
#include <sys/vmmeter.h>
#endif
#else
#include <malloc.h>
#if !(defined(__sun) || defined(__SVR4) || defined(__svr4__) || defined(_WIN32) || defined(_WIN64))
#include <mntent.h>
#endif /* !Solaris */
#endif /* !xBSD */

#if defined(__FreeBSD__) || __has_include(<malloc_np.h>)
#include <malloc_np.h>
#endif

#if defined(__APPLE__) || defined(__MACH__) || __has_include(<malloc/malloc.h>)
#include <malloc/malloc.h>
#endif /* MacOS */

#if defined(__MACH__)
#include <mach/host_info.h>
#include <mach/mach_host.h>
#include <mach/mach_port.h>
#include <uuid/uuid.h>
#endif

#if defined(__linux__) || defined(__gnu_linux__)
#include <sched.h>
#include <sys/sendfile.h>
#include <sys/statfs.h>
#endif /* Linux */

#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE 0
#endif

#ifndef _XOPEN_SOURCE_EXTENDED
#define _XOPEN_SOURCE_EXTENDED 0
#else
#include <utmpx.h>
#endif /* _XOPEN_SOURCE_EXTENDED */

#if defined(__sun) || defined(__SVR4) || defined(__svr4__)
#include <kstat.h>
#include <sys/mnttab.h>
/* On Solaris, it's easier to add a missing prototype rather than find a
 * combination of #defines that break nothing. */
__extern_C key_t ftok(const char *, int);
#endif /* SunOS/Solaris */

#if defined(_WIN32) || defined(_WIN64) /*-------------------------------------*/

#ifndef _WIN32_WINNT
#define _WIN32_WINNT 0x0601 /* Windows 7 */
#elif _WIN32_WINNT < 0x0500
#error At least 'Windows 2000' API is required for libmdbx.
#endif /* _WIN32_WINNT */
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif /* WIN32_LEAN_AND_MEAN */
#include <windows.h>
#include <winnt.h>
#include <winternl.h>

/* После подгрузки windows.h, чтобы избежать проблем со сборкой MINGW и т.п. */
#include <excpt.h>
#include <tlhelp32.h>

#else /*----------------------------------------------------------------------*/

#include <unistd.h>
#if !defined(_POSIX_MAPPED_FILES) || _POSIX_MAPPED_FILES < 1
#error "libmdbx requires the _POSIX_MAPPED_FILES feature"
#endif /* _POSIX_MAPPED_FILES */

#include <pthread.h>
#include <semaphore.h>
#include <signal.h>
#include <sys/file.h>
#include <sys/ipc.h>
#include <sys/mman.h>
#include <sys/param.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sys/time.h>
#include <sys/uio.h>

#endif /*---------------------------------------------------------------------*/

#if defined(__ANDROID_API__) || defined(ANDROID)
#include <android/log.h>
#if __ANDROID_API__ >= 21
#include <sys/sendfile.h>
#endif
#endif /* Android */

#if defined(HAVE_SYS_STAT_H) || __has_include(<sys/stat.h>)
#include <sys/stat.h>
#endif
#if defined(HAVE_SYS_TYPES_H) || __has_include(<sys/types.h>)
#include <sys/types.h>
#endif
#if defined(HAVE_SYS_FILE_H) || __has_include(<sys/file.h>)
#include <sys/file.h>
#endif

/*----------------------------------------------------------------------------*/
/* Byteorder */

#if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || defined(i486) || defined(__i486) ||     \
    defined(__i486__) || defined(i586) || defined(__i586) || defined(__i586__) || defined(i686) || defined(__i686) ||  \
    defined(__i686__) || defined(_M_IX86) || defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) ||           \
    defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || defined(__amd64) ||        \
    defined(_M_X64) || defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
#ifndef __ia32__
/* LY: define neutral __ia32__ for x86 and x86-64 */
#define __ia32__ 1
#endif /* __ia32__ */
#if !defined(__amd64__) &&                                                                                             \
    (defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) || defined(_M_AMD64))
/* LY: define trusty __amd64__ for all AMD64/x86-64 arch */
#define __amd64__ 1
#endif /* __amd64__ */
#endif /* all x86 */

#if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || !defined(__ORDER_BIG_ENDIAN__)

#if defined(__GLIBC__) || defined(__GNU_LIBRARY__) || defined(__ANDROID_API__) || defined(HAVE_ENDIAN_H) ||            \
    __has_include(<endian.h>)
#include <endian.h>
#elif defined(__APPLE__) || defined(__MACH__) || defined(__OpenBSD__) || defined(HAVE_MACHINE_ENDIAN_H) ||             \
    __has_include(<machine/endian.h>)
#include <machine/endian.h>
#elif defined(HAVE_SYS_ISA_DEFS_H) || __has_include(<sys/isa_defs.h>)
#include <sys/isa_defs.h>
#elif (defined(HAVE_SYS_TYPES_H) && defined(HAVE_SYS_ENDIAN_H)) ||                                                     \
    (__has_include(<sys/types.h>) && __has_include(<sys/endian.h>))
#include <sys/endian.h>
#include <sys/types.h>
#elif defined(__bsdi__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) ||                    \
    defined(HAVE_SYS_PARAM_H) || __has_include(<sys/param.h>)
#include <sys/param.h>
#endif /* OS */

#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
#define __ORDER_LITTLE_ENDIAN__ __LITTLE_ENDIAN
#define __ORDER_BIG_ENDIAN__ __BIG_ENDIAN
#define __BYTE_ORDER__ __BYTE_ORDER
#elif defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)
#define __ORDER_LITTLE_ENDIAN__ _LITTLE_ENDIAN
#define __ORDER_BIG_ENDIAN__ _BIG_ENDIAN
#define __BYTE_ORDER__ _BYTE_ORDER
#else
#define __ORDER_LITTLE_ENDIAN__ 1234
#define __ORDER_BIG_ENDIAN__ 4321

#if defined(__LITTLE_ENDIAN__) || (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || defined(__ARMEL__) ||          \
    defined(__THUMBEL__) || defined(__AARCH64EL__) || defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) ||  \
    defined(_M_ARM) || defined(_M_ARM64) || defined(__e2k__) || defined(__elbrus_4c__) || defined(__elbrus_8c__) ||    \
    defined(__bfin__) || defined(__BFIN__) || defined(__ia64__) || defined(_IA64) || defined(__IA64__) ||              \
    defined(__ia64) || defined(_M_IA64) || defined(__itanium__) || defined(__ia32__) || defined(__CYGWIN__) ||         \
    defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) || defined(__WINDOWS__)
#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__

#elif defined(__BIG_ENDIAN__) || (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || defined(__ARMEB__) ||           \
    defined(__THUMBEB__) || defined(__AARCH64EB__) || defined(__MIPSEB__) || defined(_MIPSEB) || defined(__MIPSEB) ||  \
    defined(__m68k__) || defined(M68000) || defined(__hppa__) || defined(__hppa) || defined(__HPPA__) ||               \
    defined(__sparc__) || defined(__sparc) || defined(__370__) || defined(__THW_370__) || defined(__s390__) ||         \
    defined(__s390x__) || defined(__SYSC_ZARCH__)
#define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__

#else
#error __BYTE_ORDER__ should be defined.
#endif /* Arch */

#endif
#endif /* __BYTE_ORDER__ || __ORDER_LITTLE_ENDIAN__ || __ORDER_BIG_ENDIAN__ */

#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64)
#define MDBX_WORDBITS 64
#else
#define MDBX_WORDBITS 32
#endif /* MDBX_WORDBITS */

/*----------------------------------------------------------------------------*/
/* Availability of CMOV or equivalent */

#ifndef MDBX_HAVE_CMOV
#if defined(__e2k__)
#define MDBX_HAVE_CMOV 1
#elif defined(__thumb2__) || defined(__thumb2)
#define MDBX_HAVE_CMOV 1
#elif defined(__thumb__) || defined(__thumb) || defined(__TARGET_ARCH_THUMB)
#define MDBX_HAVE_CMOV 0
#elif defined(_M_ARM) || defined(_M_ARM64) || defined(__aarch64__) || defined(__aarch64) || defined(__arm__) ||        \
    defined(__arm) || defined(__CC_ARM)
#define MDBX_HAVE_CMOV 1
#elif (defined(__riscv__) || defined(__riscv64)) && (defined(__riscv_b) || defined(__riscv_bitmanip))
#define MDBX_HAVE_CMOV 1
#elif defined(i686) || defined(__i686) || defined(__i686__) || (defined(_M_IX86) && _M_IX86 > 600) ||                  \
    defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || defined(__amd64) || defined(_M_X64) ||           \
    defined(_M_AMD64)
#define MDBX_HAVE_CMOV 1
#else
#define MDBX_HAVE_CMOV 0
#endif
#endif /* MDBX_HAVE_CMOV */

/*----------------------------------------------------------------------------*/
/* Compiler's includes for builtins/intrinsics */

#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
#include <intrin.h>
#elif __GNUC_PREREQ(4, 4) || defined(__clang__)
#if defined(__e2k__)
#include <e2kintrin.h>
#include <x86intrin.h>
#endif /* __e2k__ */
#if defined(__ia32__)
#include <cpuid.h>
#include <x86intrin.h>
#endif /* __ia32__ */
#ifdef __ARM_NEON
#include <arm_neon.h>
#endif
#elif defined(__SUNPRO_C) || defined(__sun) || defined(sun)
#include <mbarrier.h>
#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && (defined(HP_IA64) || defined(__ia64))
#include <machine/sys/inline.h>
#elif defined(__IBMC__) && defined(__powerpc)
#include <atomic.h>
#elif defined(_AIX)
#include <builtins.h>
#include <sys/atomic_op.h>
#elif (defined(__osf__) && defined(__DECC)) || defined(__alpha)
#include <c_asm.h>
#include <machine/builtins.h>
#elif defined(__MWERKS__)
/* CodeWarrior - troubles ? */
#pragma gcc_extensions
#elif defined(__SNC__)
/* Sony PS3 - troubles ? */
#elif defined(__hppa__) || defined(__hppa)
#include <machine/inline.h>
#else
#error Unsupported C compiler, please use GNU C 4.4 or newer
#endif /* Compiler */

#if !defined(__noop) && !defined(_MSC_VER)
#define __noop                                                                                                         \
  do {                                                                                                                 \
  } while (0)
#endif /* __noop */

#if defined(__fallthrough) && (defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__))
#undef __fallthrough
#endif /* __fallthrough workaround for MinGW */

#ifndef __fallthrough
#if defined(__cplusplus) && (__has_cpp_attribute(fallthrough) && (!defined(__clang__) || __clang__ > 4)) ||            \
    __cplusplus >= 201703L
#define __fallthrough [[fallthrough]]
#elif __GNUC_PREREQ(8, 0) && defined(__cplusplus) && __cplusplus >= 201103L
#define __fallthrough [[fallthrough]]
#elif __GNUC_PREREQ(7, 0) && (!defined(__LCC__) || (__LCC__ == 124 && __LCC_MINOR__ >= 12) ||                          \
                              (__LCC__ == 125 && __LCC_MINOR__ >= 5) || (__LCC__ >= 126))
#define __fallthrough __attribute__((__fallthrough__))
#elif defined(__clang__) && defined(__cplusplus) && __cplusplus >= 201103L && __has_feature(cxx_attributes) &&         \
    __has_warning("-Wimplicit-fallthrough")
#define __fallthrough [[clang::fallthrough]]
#else
#define __fallthrough
#endif
#endif /* __fallthrough */

#ifndef __unreachable
#if __GNUC_PREREQ(4, 5) || __has_builtin(__builtin_unreachable)
#define __unreachable() __builtin_unreachable()
#elif defined(_MSC_VER)
#define __unreachable() __assume(0)
#else
#define __unreachable()                                                                                                \
  do {                                                                                                                 \
  } while (1)
#endif
#endif /* __unreachable */

#ifndef __prefetch
#if defined(__GNUC__) || defined(__clang__) || __has_builtin(__builtin_prefetch)
#define __prefetch(ptr) __builtin_prefetch(ptr)
#else
#define __prefetch(ptr)                                                                                                \
  do {                                                                                                                 \
    (void)(ptr);                                                                                                       \
  } while (0)
#endif
#endif /* __prefetch */

#ifndef offsetof
#define offsetof(type, member) __builtin_offsetof(type, member)
#endif /* offsetof */

#ifndef container_of
#define container_of(ptr, type, member) ((type *)((char *)(ptr) - offsetof(type, member)))
#endif /* container_of */

/*----------------------------------------------------------------------------*/
/* useful attributes */

#ifndef __always_inline
#if defined(__GNUC__) || __has_attribute(__always_inline__)
#define __always_inline __inline __attribute__((__always_inline__))
#elif defined(_MSC_VER)
#define __always_inline __forceinline
#else
#define __always_inline __inline
#endif
#endif /* __always_inline */

#ifndef __noinline
#if defined(__GNUC__) || __has_attribute(__noinline__)
#define __noinline __attribute__((__noinline__))
#elif defined(_MSC_VER)
#define __noinline __declspec(noinline)
#else
#define __noinline
#endif
#endif /* __noinline */

#ifndef __must_check_result
#if defined(__GNUC__) || __has_attribute(__warn_unused_result__)
#define __must_check_result __attribute__((__warn_unused_result__))
#else
#define __must_check_result
#endif
#endif /* __must_check_result */

#ifndef __nothrow
#if defined(__cplusplus)
#if __cplusplus < 201703L
#define __nothrow throw()
#else
#define __nothrow noexcept(true)
#endif /* __cplusplus */
#elif defined(__GNUC__) || __has_attribute(__nothrow__)
#define __nothrow __attribute__((__nothrow__))
#elif defined(_MSC_VER) && defined(__cplusplus)
#define __nothrow __declspec(nothrow)
#else
#define __nothrow
#endif
#endif /* __nothrow */

#ifndef __hidden
#if defined(__GNUC__) || __has_attribute(__visibility__)
#define __hidden __attribute__((__visibility__("hidden")))
#else
#define __hidden
#endif
#endif /* __hidden */

#ifndef __optimize
#if defined(__OPTIMIZE__)
#if (defined(__GNUC__) && !defined(__clang__)) || __has_attribute(__optimize__)
#define __optimize(ops) __attribute__((__optimize__(ops)))
#else
#define __optimize(ops)
#endif
#else
#define __optimize(ops)
#endif
#endif /* __optimize */

#ifndef __hot
#if defined(__OPTIMIZE__)
#if defined(__clang__) && !__has_attribute(__hot__) && __has_attribute(__section__) &&                                 \
    (defined(__linux__) || defined(__gnu_linux__))
/* just put frequently used functions in separate section */
#define __hot __attribute__((__section__("text.hot"))) __optimize("O3")
#elif defined(__GNUC__) || __has_attribute(__hot__)
#define __hot __attribute__((__hot__))
#else
#define __hot __optimize("O3")
#endif
#else
#define __hot
#endif
#endif /* __hot */

#ifndef __cold
#if defined(__OPTIMIZE__)
#if defined(__clang__) && !__has_attribute(__cold__) && __has_attribute(__section__) &&                                \
    (defined(__linux__) || defined(__gnu_linux__))
/* just put infrequently used functions in separate section */
#define __cold __attribute__((__section__("text.unlikely"))) __optimize("Os")
#elif defined(__GNUC__) || __has_attribute(__cold__)
#define __cold __attribute__((__cold__))
#else
#define __cold __optimize("Os")
#endif
#else
#define __cold
#endif
#endif /* __cold */

#ifndef __flatten
#if defined(__OPTIMIZE__) && (defined(__GNUC__) || __has_attribute(__flatten__))
#define __flatten __attribute__((__flatten__))
#else
#define __flatten
#endif
#endif /* __flatten */

#ifndef likely
#if (defined(__GNUC__) || __has_builtin(__builtin_expect)) && !defined(__COVERITY__)
#define likely(cond) __builtin_expect(!!(cond), 1)
#else
#define likely(x) (!!(x))
#endif
#endif /* likely */

#ifndef unlikely
#if (defined(__GNUC__) || __has_builtin(__builtin_expect)) && !defined(__COVERITY__)
#define unlikely(cond) __builtin_expect(!!(cond), 0)
#else
#define unlikely(x) (!!(x))
#endif
#endif /* unlikely */

#ifndef __anonymous_struct_extension__
#if defined(__GNUC__)
#define __anonymous_struct_extension__ __extension__
#else
#define __anonymous_struct_extension__
#endif
#endif /* __anonymous_struct_extension__ */

#ifndef MDBX_WEAK_IMPORT_ATTRIBUTE
#ifdef WEAK_IMPORT_ATTRIBUTE
#define MDBX_WEAK_IMPORT_ATTRIBUTE WEAK_IMPORT_ATTRIBUTE
#elif __has_attribute(__weak__) && __has_attribute(__weak_import__)
#define MDBX_WEAK_IMPORT_ATTRIBUTE __attribute__((__weak__, __weak_import__))
#elif __has_attribute(__weak__) || (defined(__GNUC__) && __GNUC__ >= 4 && defined(__ELF__))
#define MDBX_WEAK_IMPORT_ATTRIBUTE __attribute__((__weak__))
#else
#define MDBX_WEAK_IMPORT_ATTRIBUTE
#endif
#endif /* MDBX_WEAK_IMPORT_ATTRIBUTE */

#if !defined(__thread) && (defined(_MSC_VER) || defined(__DMC__))
#define __thread __declspec(thread)
#endif /* __thread */

#ifndef MDBX_EXCLUDE_FOR_GPROF
#ifdef ENABLE_GPROF
#define MDBX_EXCLUDE_FOR_GPROF __attribute__((__no_instrument_function__, __no_profile_instrument_function__))
#else
#define MDBX_EXCLUDE_FOR_GPROF
#endif /* ENABLE_GPROF */
#endif /* MDBX_EXCLUDE_FOR_GPROF */

/*----------------------------------------------------------------------------*/

#ifndef expect_with_probability
#if defined(__builtin_expect_with_probability) || __has_builtin(__builtin_expect_with_probability) ||                  \
    __GNUC_PREREQ(9, 0)
#define expect_with_probability(expr, value, prob) __builtin_expect_with_probability(expr, value, prob)
#else
#define expect_with_probability(expr, value, prob) (expr)
#endif
#endif /* expect_with_probability */

#ifndef MDBX_GOOFY_MSVC_STATIC_ANALYZER
#ifdef _PREFAST_
#define MDBX_GOOFY_MSVC_STATIC_ANALYZER 1
#else
#define MDBX_GOOFY_MSVC_STATIC_ANALYZER 0
#endif
#endif /* MDBX_GOOFY_MSVC_STATIC_ANALYZER */

#if MDBX_GOOFY_MSVC_STATIC_ANALYZER || (defined(_MSC_VER) && _MSC_VER > 1919)
#define MDBX_ANALYSIS_ASSUME(expr) __analysis_assume(expr)
#ifdef _PREFAST_
#define MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(warn_id) __pragma(prefast(suppress : warn_id))
#else
#define MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(warn_id) __pragma(warning(suppress : warn_id))
#endif
#else
#define MDBX_ANALYSIS_ASSUME(expr) assert(expr)
#define MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(warn_id)
#endif /* MDBX_GOOFY_MSVC_STATIC_ANALYZER */

#ifndef FLEXIBLE_ARRAY_MEMBERS
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (!defined(__cplusplus) && defined(_MSC_VER))
#define FLEXIBLE_ARRAY_MEMBERS 1
#else
#define FLEXIBLE_ARRAY_MEMBERS 0
#endif
#endif /* FLEXIBLE_ARRAY_MEMBERS */

/*----------------------------------------------------------------------------*/
/* Valgrind and Address Sanitizer */

#if defined(ENABLE_MEMCHECK)
#include <valgrind/memcheck.h>
#ifndef VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
/* LY: available since Valgrind 3.10 */
#define VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s)
#define VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s)
#endif
#elif !defined(RUNNING_ON_VALGRIND)
#define VALGRIND_CREATE_MEMPOOL(h, r, z)
#define VALGRIND_DESTROY_MEMPOOL(h)
#define VALGRIND_MEMPOOL_TRIM(h, a, s)
#define VALGRIND_MEMPOOL_ALLOC(h, a, s)
#define VALGRIND_MEMPOOL_FREE(h, a)
#define VALGRIND_MEMPOOL_CHANGE(h, a, b, s)
#define VALGRIND_MAKE_MEM_NOACCESS(a, s)
#define VALGRIND_MAKE_MEM_DEFINED(a, s)
#define VALGRIND_MAKE_MEM_UNDEFINED(a, s)
#define VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s)
#define VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(a, s)
#define VALGRIND_CHECK_MEM_IS_ADDRESSABLE(a, s) (0)
#define VALGRIND_CHECK_MEM_IS_DEFINED(a, s) (0)
#define RUNNING_ON_VALGRIND (0)
#endif /* ENABLE_MEMCHECK */

#ifdef __SANITIZE_ADDRESS__
#include <sanitizer/asan_interface.h>
#elif !defined(ASAN_POISON_MEMORY_REGION)
#define ASAN_POISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size))
#define ASAN_UNPOISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size))
#endif /* __SANITIZE_ADDRESS__ */

/*----------------------------------------------------------------------------*/

#ifndef ARRAY_LENGTH
#ifdef __cplusplus
template <typename T, size_t N> char (&__ArraySizeHelper(T (&array)[N]))[N];
#define ARRAY_LENGTH(array) (sizeof(::__ArraySizeHelper(array)))
#else
#define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0]))
#endif
#endif /* ARRAY_LENGTH */

#ifndef ARRAY_END
#define ARRAY_END(array) (&array[ARRAY_LENGTH(array)])
#endif /* ARRAY_END */

#define CONCAT(a, b) a##b
#define XCONCAT(a, b) CONCAT(a, b)

#define MDBX_TETRAD(a, b, c, d) ((uint32_t)(a) << 24 | (uint32_t)(b) << 16 | (uint32_t)(c) << 8 | (d))

#define MDBX_STRING_TETRAD(str) MDBX_TETRAD(str[0], str[1], str[2], str[3])

#define FIXME "FIXME: " __FILE__ ", " MDBX_STRINGIFY(__LINE__)

#ifndef STATIC_ASSERT_MSG
#if defined(static_assert)
#define STATIC_ASSERT_MSG(expr, msg) static_assert(expr, msg)
#elif defined(_STATIC_ASSERT)
#define STATIC_ASSERT_MSG(expr, msg) _STATIC_ASSERT(expr)
#elif defined(_MSC_VER)
#include <crtdbg.h>
#define STATIC_ASSERT_MSG(expr, msg) _STATIC_ASSERT(expr)
#elif (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __has_feature(c_static_assert)
#define STATIC_ASSERT_MSG(expr, msg) _Static_assert(expr, msg)
#else
#define STATIC_ASSERT_MSG(expr, msg)                                                                                   \
  switch (0) {                                                                                                         \
  case 0:                                                                                                              \
  case (expr):;                                                                                                        \
  }
#endif
#endif /* STATIC_ASSERT */

#ifndef STATIC_ASSERT
#define STATIC_ASSERT(expr) STATIC_ASSERT_MSG(expr, #expr)
#endif

/*----------------------------------------------------------------------------*/

#if defined(_MSC_VER) && _MSC_VER >= 1900
/* LY: MSVC 2015/2017/2019 has buggy/inconsistent PRIuPTR/PRIxPTR macros
 * for internal format-args checker. */
#undef PRIuPTR
#undef PRIiPTR
#undef PRIdPTR
#undef PRIxPTR
#define PRIuPTR "Iu"
#define PRIiPTR "Ii"
#define PRIdPTR "Id"
#define PRIxPTR "Ix"
#define PRIuSIZE "zu"
#define PRIiSIZE "zi"
#define PRIdSIZE "zd"
#define PRIxSIZE "zx"
#endif /* fix PRI*PTR for _MSC_VER */

#ifndef PRIuSIZE
#define PRIuSIZE PRIuPTR
#define PRIiSIZE PRIiPTR
#define PRIdSIZE PRIdPTR
#define PRIxSIZE PRIxPTR
#endif /* PRI*SIZE macros for MSVC */

#ifdef _MSC_VER
#pragma warning(pop)
#endif

/*----------------------------------------------------------------------------*/

#if __has_warning("-Wnested-anon-types")
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wnested-anon-types"
#elif defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wnested-anon-types"
#else
#pragma warning disable "nested-anon-types"
#endif
#endif /* -Wnested-anon-types */

#if __has_warning("-Wconstant-logical-operand")
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
#elif defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wconstant-logical-operand"
#else
#pragma warning disable "constant-logical-operand"
#endif
#endif /* -Wconstant-logical-operand */

#if defined(__LCC__) && (__LCC__ <= 121)
/* bug #2798 */
#pragma diag_suppress alignment_reduction_ignored
#elif defined(__ICC)
#pragma warning(disable : 3453 1366)
#elif __has_warning("-Walignment-reduction-ignored")
#if defined(__clang__)
#pragma clang diagnostic ignored "-Walignment-reduction-ignored"
#elif defined(__GNUC__)
#pragma GCC diagnostic ignored "-Walignment-reduction-ignored"
#else
#pragma warning disable "alignment-reduction-ignored"
#endif
#endif /* -Walignment-reduction-ignored */

#ifdef xMDBX_ALLOY
/* Amalgamated build */
#define MDBX_INTERNAL static
#else
/* Non-amalgamated build */
#define MDBX_INTERNAL
#endif /* xMDBX_ALLOY */

#include "mdbx.h"

/*----------------------------------------------------------------------------*/
/* Basic constants and types */

typedef struct iov_ctx iov_ctx_t;
///

/*----------------------------------------------------------------------------*/
/* Memory/Compiler barriers, cache coherence */

#if __has_include(<sys/cachectl.h>)
#include <sys/cachectl.h>
#elif defined(__mips) || defined(__mips__) || defined(__mips64) || defined(__mips64__) || defined(_M_MRX000) ||        \
    defined(_MIPS_) || defined(__MWERKS__) || defined(__sgi)
/* MIPS should have explicit cache control */
#include <sys/cachectl.h>
#endif

MDBX_MAYBE_UNUSED static inline void osal_compiler_barrier(void) {
#if defined(__clang__) || defined(__GNUC__)
  __asm__ __volatile__("" ::: "memory");
#elif defined(_MSC_VER)
  _ReadWriteBarrier();
#elif defined(__INTEL_COMPILER) /* LY: Intel Compiler may mimic GCC and MSC */
  __memory_barrier();
#elif defined(__SUNPRO_C) || defined(__sun) || defined(sun)
  __compiler_barrier();
#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && (defined(HP_IA64) || defined(__ia64))
  _Asm_sched_fence(/* LY: no-arg meaning 'all expect ALU', e.g. 0x3D3D */);
#elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
  __fence();
#else
#error "Could not guess the kind of compiler, please report to us."
#endif
}

MDBX_MAYBE_UNUSED static inline void osal_memory_barrier(void) {
#ifdef MDBX_HAVE_C11ATOMICS
  atomic_thread_fence(memory_order_seq_cst);
#elif defined(__ATOMIC_SEQ_CST)
#ifdef __clang__
  __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
#else
  __atomic_thread_fence(__ATOMIC_SEQ_CST);
#endif
#elif defined(__clang__) || defined(__GNUC__)
  __sync_synchronize();
#elif defined(_WIN32) || defined(_WIN64)
  MemoryBarrier();
#elif defined(__INTEL_COMPILER) /* LY: Intel Compiler may mimic GCC and MSC */
#if defined(__ia32__)
  _mm_mfence();
#else
  __mf();
#endif
#elif defined(__SUNPRO_C) || defined(__sun) || defined(sun)
  __machine_rw_barrier();
#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && (defined(HP_IA64) || defined(__ia64))
  _Asm_mf();
#elif defined(_AIX) || defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
  __lwsync();
#else
#error "Could not guess the kind of compiler, please report to us."
#endif
}

/*----------------------------------------------------------------------------*/
/* system-depended definitions */

#if defined(_WIN32) || defined(_WIN64)
#define HAVE_SYS_STAT_H
#define HAVE_SYS_TYPES_H
typedef HANDLE osal_thread_t;
typedef unsigned osal_thread_key_t;
#define MAP_FAILED nullptr
#define HIGH_DWORD(v) ((DWORD)((sizeof(v) > 4) ? ((uint64_t)(v) >> 32) : 0))
#define THREAD_CALL WINAPI
#define THREAD_RESULT DWORD
typedef struct {
  HANDLE mutex;
  HANDLE event[2];
} osal_condpair_t;
typedef CRITICAL_SECTION osal_fastmutex_t;

#if !defined(_MSC_VER) && !defined(__try)
#define __try
#define __except(COND) if (/* (void)(COND), */ false)
#endif /* stub for MSVC's __try/__except */

#if MDBX_WITHOUT_MSVC_CRT

#ifndef osal_malloc
static inline void *osal_malloc(size_t bytes) { return HeapAlloc(GetProcessHeap(), 0, bytes); }
#endif /* osal_malloc */

#ifndef osal_calloc
static inline void *osal_calloc(size_t nelem, size_t size) {
  return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, nelem * size);
}
#endif /* osal_calloc */

#ifndef osal_realloc
static inline void *osal_realloc(void *ptr, size_t bytes) {
  return ptr ? HeapReAlloc(GetProcessHeap(), 0, ptr, bytes) : HeapAlloc(GetProcessHeap(), 0, bytes);
}
#endif /* osal_realloc */

#ifndef osal_free
static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
#endif /* osal_free */

#else /* MDBX_WITHOUT_MSVC_CRT */

#define osal_malloc malloc
#define osal_calloc calloc
#define osal_realloc realloc
#define osal_free free
#define osal_strdup _strdup

#endif /* MDBX_WITHOUT_MSVC_CRT */

#ifndef snprintf
#define snprintf _snprintf /* ntdll */
#endif

#ifndef vsnprintf
#define vsnprintf _vsnprintf /* ntdll */
#endif

#else /*----------------------------------------------------------------------*/

typedef pthread_t osal_thread_t;
typedef pthread_key_t osal_thread_key_t;
#define INVALID_HANDLE_VALUE (-1)
#define THREAD_CALL
#define THREAD_RESULT void *
typedef struct {
  pthread_mutex_t mutex;
  pthread_cond_t cond[2];
} osal_condpair_t;
typedef pthread_mutex_t osal_fastmutex_t;
#define osal_malloc malloc
#define osal_calloc calloc
#define osal_realloc realloc
#define osal_free free
#define osal_strdup strdup
#endif /* Platform */

#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size)
#define osal_malloc_usable_size(ptr) malloc_usable_size(ptr)
#elif defined(__APPLE__)
#define osal_malloc_usable_size(ptr) malloc_size(ptr)
#elif defined(_MSC_VER) && !MDBX_WITHOUT_MSVC_CRT
#define osal_malloc_usable_size(ptr) _msize(ptr)
#endif /* osal_malloc_usable_size */

/*----------------------------------------------------------------------------*/
/* OS abstraction layer stuff */

#if defined(_WIN32) || defined(_WIN64)
typedef wchar_t pathchar_t;
#define MDBX_PRIsPATH "ls"
#else
typedef char pathchar_t;
#define MDBX_PRIsPATH "s"
#endif

MDBX_MAYBE_UNUSED static inline bool osal_yield(void) {
#if defined(_WIN32) || defined(_WIN64)
  return SleepEx(0, true) == WAIT_IO_COMPLETION;
#else
  return sched_yield() != 0;
#endif
}

typedef struct osal_mmap {
  union {
    void *base;
    struct shared_lck *lck;
  };
  mdbx_filehandle_t fd;
  size_t limit;   /* mapping length, but NOT a size of file nor DB */
  size_t current; /* mapped region size, i.e. the size of file and DB */
  uint64_t filesize /* in-process cache of a file size */;
#if defined(_WIN32) || defined(_WIN64)
  HANDLE section; /* memory-mapped section handle */
#endif
} osal_mmap_t;

#ifndef MDBX_HAVE_PWRITEV
#if defined(_WIN32) || defined(_WIN64)

#define MDBX_HAVE_PWRITEV 0

#elif defined(__ANDROID_API__)

#if __ANDROID_API__ < 24
/* https://android-developers.googleblog.com/2017/09/introducing-android-native-development.html
 * https://android.googlesource.com/platform/bionic/+/master/docs/32-bit-abi.md */
#define MDBX_HAVE_PWRITEV 0
#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS != MDBX_WORDBITS
#error "_FILE_OFFSET_BITS != MDBX_WORDBITS and __ANDROID_API__ < 24" (_FILE_OFFSET_BITS != MDBX_WORDBITS)
#elif defined(__FILE_OFFSET_BITS) && __FILE_OFFSET_BITS != MDBX_WORDBITS
#error "__FILE_OFFSET_BITS != MDBX_WORDBITS and __ANDROID_API__ < 24" (__FILE_OFFSET_BITS != MDBX_WORDBITS)
#endif
#else
#define MDBX_HAVE_PWRITEV 1
#endif

#elif defined(__APPLE__) || defined(__MACH__) || defined(_DARWIN_C_SOURCE)

#if defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_VERSION_11_0) &&                                          \
    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
/* FIXME: add checks for IOS versions, etc */
#define MDBX_HAVE_PWRITEV 1
#else
#define MDBX_HAVE_PWRITEV 0
#endif

#elif defined(_SC_IOV_MAX) || (defined(IOV_MAX) && IOV_MAX > 1)
#define MDBX_HAVE_PWRITEV 1
#else
#define MDBX_HAVE_PWRITEV 0
#endif
#endif /* MDBX_HAVE_PWRITEV */

typedef struct ior_item {
#if defined(_WIN32) || defined(_WIN64)
  OVERLAPPED ov;
#define ior_sgv_gap4terminator 1
#define ior_sgv_element FILE_SEGMENT_ELEMENT
#else
  size_t offset;
#if MDBX_HAVE_PWRITEV
  size_t sgvcnt;
#define ior_sgv_gap4terminator 0
#define ior_sgv_element struct iovec
#endif /* MDBX_HAVE_PWRITEV */
#endif /* !Windows */
  union {
    MDBX_val single;
#if defined(ior_sgv_element)
    ior_sgv_element sgv[1 + ior_sgv_gap4terminator];
#endif /* ior_sgv_element */
  };
} ior_item_t;

typedef struct osal_ioring {
  unsigned slots_left;
  unsigned allocated;
#if defined(_WIN32) || defined(_WIN64)
#define IOR_STATE_LOCKED 1
  HANDLE overlapped_fd;
  unsigned pagesize;
  unsigned last_sgvcnt;
  size_t last_bytes;
  uint8_t direct, state, pagesize_ln2;
  unsigned event_stack;
  HANDLE *event_pool;
  volatile LONG async_waiting;
  volatile LONG async_completed;
  HANDLE async_done;

#define ior_last_sgvcnt(ior, item) (ior)->last_sgvcnt
#define ior_last_bytes(ior, item) (ior)->last_bytes
#elif MDBX_HAVE_PWRITEV
  unsigned last_bytes;
#define ior_last_sgvcnt(ior, item) (item)->sgvcnt
#define ior_last_bytes(ior, item) (ior)->last_bytes
#else
#define ior_last_sgvcnt(ior, item) (1)
#define ior_last_bytes(ior, item) (item)->single.iov_len
#endif /* !Windows */
  ior_item_t *last;
  ior_item_t *pool;
  char *boundary;
} osal_ioring_t;

/* Actually this is not ioring for now, but on the way. */
MDBX_INTERNAL int osal_ioring_create(osal_ioring_t *
#if defined(_WIN32) || defined(_WIN64)
                                     ,
                                     bool enable_direct, mdbx_filehandle_t overlapped_fd
#endif /* Windows */
);
MDBX_INTERNAL int osal_ioring_resize(osal_ioring_t *, size_t items);
MDBX_INTERNAL void osal_ioring_destroy(osal_ioring_t *);
MDBX_INTERNAL void osal_ioring_reset(osal_ioring_t *);
MDBX_INTERNAL int osal_ioring_add(osal_ioring_t *ctx, const size_t offset, void *data, const size_t bytes);
typedef struct osal_ioring_write_result {
  int err;
  unsigned wops;
} osal_ioring_write_result_t;
MDBX_INTERNAL osal_ioring_write_result_t osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd);

MDBX_INTERNAL void osal_ioring_walk(osal_ioring_t *ior, iov_ctx_t *ctx,
                                    void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes));

MDBX_MAYBE_UNUSED static inline unsigned osal_ioring_left(const osal_ioring_t *ior) { return ior->slots_left; }

MDBX_MAYBE_UNUSED static inline unsigned osal_ioring_used(const osal_ioring_t *ior) {
  return ior->allocated - ior->slots_left;
}

MDBX_MAYBE_UNUSED static inline int osal_ioring_prepare(osal_ioring_t *ior, size_t items, size_t bytes) {
  items = (items > 32) ? items : 32;
#if defined(_WIN32) || defined(_WIN64)
  if (ior->direct) {
    const size_t npages = bytes >> ior->pagesize_ln2;
    items = (items > npages) ? items : npages;
  }
#else
  (void)bytes;
#endif
  items = (items < 65536) ? items : 65536;
  if (likely(ior->allocated >= items))
    return MDBX_SUCCESS;
  return osal_ioring_resize(ior, items);
}

/*----------------------------------------------------------------------------*/
/* libc compatibility stuff */

#if (!defined(__GLIBC__) && __GLIBC_PREREQ(2, 1)) && (defined(_GNU_SOURCE) || defined(_BSD_SOURCE))
#define osal_asprintf asprintf
#define osal_vasprintf vasprintf
#else
MDBX_MAYBE_UNUSED MDBX_INTERNAL MDBX_PRINTF_ARGS(2, 3) int osal_asprintf(char **strp, const char *fmt, ...);
MDBX_INTERNAL int osal_vasprintf(char **strp, const char *fmt, va_list ap);
#endif

#if !defined(MADV_DODUMP) && defined(MADV_CORE)
#define MADV_DODUMP MADV_CORE
#endif /* MADV_CORE -> MADV_DODUMP */

#if !defined(MADV_DONTDUMP) && defined(MADV_NOCORE)
#define MADV_DONTDUMP MADV_NOCORE
#endif /* MADV_NOCORE -> MADV_DONTDUMP */

MDBX_MAYBE_UNUSED MDBX_INTERNAL void osal_jitter(bool tiny);

/* max bytes to write in one call */
#if defined(_WIN64)
#define MAX_WRITE UINT32_C(0x10000000)
#elif defined(_WIN32)
#define MAX_WRITE UINT32_C(0x04000000)
#else
#define MAX_WRITE UINT32_C(0x3f000000)

#if defined(F_GETLK64) && defined(F_SETLK64) && defined(F_SETLKW64) && !defined(__ANDROID_API__)
#define MDBX_F_SETLK F_SETLK64
#define MDBX_F_SETLKW F_SETLKW64
#define MDBX_F_GETLK F_GETLK64
#if (__GLIBC_PREREQ(2, 28) && (defined(__USE_LARGEFILE64) || defined(__LARGEFILE64_SOURCE) ||                          \
                               defined(_USE_LARGEFILE64) || defined(_LARGEFILE64_SOURCE))) ||                          \
    defined(fcntl64)
#define MDBX_FCNTL fcntl64
#else
#define MDBX_FCNTL fcntl
#endif
#define MDBX_STRUCT_FLOCK struct flock64
#ifndef OFF_T_MAX
#define OFF_T_MAX UINT64_C(0x7fffFFFFfff00000)
#endif /* OFF_T_MAX */
#else
#define MDBX_F_SETLK F_SETLK
#define MDBX_F_SETLKW F_SETLKW
#define MDBX_F_GETLK F_GETLK
#define MDBX_FCNTL fcntl
#define MDBX_STRUCT_FLOCK struct flock
#endif /* MDBX_F_SETLK, MDBX_F_SETLKW, MDBX_F_GETLK */

#if defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && defined(F_OFD_GETLK64) && !defined(__ANDROID_API__)
#define MDBX_F_OFD_SETLK F_OFD_SETLK64
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW64
#define MDBX_F_OFD_GETLK F_OFD_GETLK64
#else
#define MDBX_F_OFD_SETLK F_OFD_SETLK
#define MDBX_F_OFD_SETLKW F_OFD_SETLKW
#define MDBX_F_OFD_GETLK F_OFD_GETLK
#ifndef OFF_T_MAX
#define OFF_T_MAX (((sizeof(off_t) > 4) ? INT64_MAX : INT32_MAX) & ~(size_t)0xFffff)
#endif /* OFF_T_MAX */
#endif /* MDBX_F_OFD_SETLK64, MDBX_F_OFD_SETLKW64, MDBX_F_OFD_GETLK64 */

#endif /* !Windows */

#ifndef osal_strdup
LIBMDBX_API char *osal_strdup(const char *str);
#endif

MDBX_MAYBE_UNUSED static inline int osal_get_errno(void) {
#if defined(_WIN32) || defined(_WIN64)
  DWORD rc = GetLastError();
#else
  int rc = errno;
#endif
  return rc;
}

#ifndef osal_memalign_alloc
MDBX_INTERNAL int osal_memalign_alloc(size_t alignment, size_t bytes, void **result);
#endif
#ifndef osal_memalign_free
MDBX_INTERNAL void osal_memalign_free(void *ptr);
#endif

MDBX_INTERNAL int osal_condpair_init(osal_condpair_t *condpair);
MDBX_INTERNAL int osal_condpair_lock(osal_condpair_t *condpair);
MDBX_INTERNAL int osal_condpair_unlock(osal_condpair_t *condpair);
MDBX_INTERNAL int osal_condpair_signal(osal_condpair_t *condpair, bool part);
MDBX_INTERNAL int osal_condpair_wait(osal_condpair_t *condpair, bool part);
MDBX_INTERNAL int osal_condpair_destroy(osal_condpair_t *condpair);

MDBX_INTERNAL int osal_fastmutex_init(osal_fastmutex_t *fastmutex);
MDBX_INTERNAL int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex);
MDBX_INTERNAL int osal_fastmutex_release(osal_fastmutex_t *fastmutex);
MDBX_INTERNAL int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex);

MDBX_INTERNAL int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov, size_t sgvcnt, uint64_t offset);
MDBX_INTERNAL int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count, uint64_t offset);
MDBX_INTERNAL int osal_pwrite(mdbx_filehandle_t fd, const void *buf, size_t count, uint64_t offset);
MDBX_INTERNAL int osal_write(mdbx_filehandle_t fd, const void *buf, size_t count);

MDBX_INTERNAL int osal_thread_create(osal_thread_t *thread, THREAD_RESULT(THREAD_CALL *start_routine)(void *),
                                     void *arg);
MDBX_INTERNAL int osal_thread_join(osal_thread_t thread);

enum osal_syncmode_bits {
  MDBX_SYNC_NONE = 0,
  MDBX_SYNC_KICK = 1,
  MDBX_SYNC_DATA = 2,
  MDBX_SYNC_SIZE = 4,
  MDBX_SYNC_IODQ = 8
};

MDBX_INTERNAL int osal_fsync(mdbx_filehandle_t fd, const enum osal_syncmode_bits mode_bits);
MDBX_INTERNAL int osal_fsetsize(mdbx_filehandle_t fd, const uint64_t length);
MDBX_INTERNAL int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
MDBX_INTERNAL int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);

enum osal_openfile_purpose {
  MDBX_OPEN_DXB_READ,
  MDBX_OPEN_DXB_LAZY,
  MDBX_OPEN_DXB_DSYNC,
#if defined(_WIN32) || defined(_WIN64)
  MDBX_OPEN_DXB_OVERLAPPED,
  MDBX_OPEN_DXB_OVERLAPPED_DIRECT,
#endif /* Windows */
  MDBX_OPEN_LCK,
  MDBX_OPEN_COPY,
  MDBX_OPEN_DELETE
};

MDBX_MAYBE_UNUSED static inline bool osal_isdirsep(pathchar_t c) {
  return
#if defined(_WIN32) || defined(_WIN64)
      c == '\\' ||
#endif
      c == '/';
}

MDBX_INTERNAL bool osal_pathequal(const pathchar_t *l, const pathchar_t *r, size_t len);
MDBX_INTERNAL pathchar_t *osal_fileext(const pathchar_t *pathname, size_t len);
MDBX_INTERNAL int osal_fileexists(const pathchar_t *pathname);
MDBX_INTERNAL int osal_openfile(const enum osal_openfile_purpose purpose, const MDBX_env *env,
                                const pathchar_t *pathname, mdbx_filehandle_t *fd, mdbx_mode_t unix_mode_bits);
MDBX_INTERNAL int osal_closefile(mdbx_filehandle_t fd);
MDBX_INTERNAL int osal_removefile(const pathchar_t *pathname);
MDBX_INTERNAL int osal_removedirectory(const pathchar_t *pathname);
MDBX_INTERNAL int osal_is_pipe(mdbx_filehandle_t fd);
MDBX_INTERNAL int osal_lockfile(mdbx_filehandle_t fd, bool wait);

#define MMAP_OPTION_SETLENGTH 1
#define MMAP_OPTION_SEMAPHORE 2
MDBX_INTERNAL int osal_mmap(const int flags, osal_mmap_t *map, size_t size, const size_t limit, const unsigned options,
                            const pathchar_t *pathname4logging);
MDBX_INTERNAL int osal_munmap(osal_mmap_t *map);
#define MDBX_MRESIZE_MAY_MOVE 0x00000100
#define MDBX_MRESIZE_MAY_UNMAP 0x00000200
MDBX_INTERNAL int osal_mresize(const int flags, osal_mmap_t *map, size_t size, size_t limit);
#if defined(_WIN32) || defined(_WIN64)
typedef struct {
  unsigned limit, count;
  HANDLE handles[31];
} mdbx_handle_array_t;
MDBX_INTERNAL int osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array);
MDBX_INTERNAL int osal_resume_threads_after_remap(mdbx_handle_array_t *array);
#endif /* Windows */
MDBX_INTERNAL int osal_msync(const osal_mmap_t *map, size_t offset, size_t length, enum osal_syncmode_bits mode_bits);
MDBX_INTERNAL int osal_check_fs_rdonly(mdbx_filehandle_t handle, const pathchar_t *pathname, int err);
MDBX_INTERNAL int osal_check_fs_incore(mdbx_filehandle_t handle);
MDBX_INTERNAL int osal_check_fs_local(mdbx_filehandle_t handle, int flags);

MDBX_MAYBE_UNUSED static inline uint32_t osal_getpid(void) {
  STATIC_ASSERT(sizeof(mdbx_pid_t) <= sizeof(uint32_t));
#if defined(_WIN32) || defined(_WIN64)
  return GetCurrentProcessId();
#else
  STATIC_ASSERT(sizeof(pid_t) <= sizeof(uint32_t));
  return getpid();
#endif
}

MDBX_MAYBE_UNUSED static inline uintptr_t osal_thread_self(void) {
  mdbx_tid_t thunk;
  STATIC_ASSERT(sizeof(uintptr_t) >= sizeof(thunk));
#if defined(_WIN32) || defined(_WIN64)
  thunk = GetCurrentThreadId();
#else
  thunk = pthread_self();
#endif
  return (uintptr_t)thunk;
}

#if !defined(_WIN32) && !defined(_WIN64)
#if defined(__ANDROID_API__) || defined(ANDROID) || defined(BIONIC)
MDBX_INTERNAL int osal_check_tid4bionic(void);
#else
static inline int osal_check_tid4bionic(void) { return 0; }
#endif /* __ANDROID_API__ || ANDROID) || BIONIC */

MDBX_MAYBE_UNUSED static inline int osal_pthread_mutex_lock(pthread_mutex_t *mutex) {
  int err = osal_check_tid4bionic();
  return unlikely(err) ? err : pthread_mutex_lock(mutex);
}
#endif /* !Windows */

MDBX_INTERNAL uint64_t osal_monotime(void);
MDBX_INTERNAL uint64_t osal_cputime(size_t *optional_page_faults);
MDBX_INTERNAL uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16);
MDBX_INTERNAL uint32_t osal_monotime_to_16dot16(uint64_t monotime);

MDBX_MAYBE_UNUSED static inline uint32_t osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) {
  uint32_t seconds_16dot16 = osal_monotime_to_16dot16(monotime);
  return seconds_16dot16 ? seconds_16dot16 : /* fix underflow */ (monotime > 0);
}

/*----------------------------------------------------------------------------*/

MDBX_INTERNAL void osal_ctor(void);
MDBX_INTERNAL void osal_dtor(void);

#if defined(_WIN32) || defined(_WIN64)
MDBX_INTERNAL int osal_mb2w(const char *const src, wchar_t **const pdst);
#endif /* Windows */

typedef union bin128 {
  __anonymous_struct_extension__ struct {
    uint64_t x, y;
  };
  __anonymous_struct_extension__ struct {
    uint32_t a, b, c, d;
  };
} bin128_t;

MDBX_INTERNAL bin128_t osal_guid(const MDBX_env *);

/*----------------------------------------------------------------------------*/

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint64_t osal_bswap64(uint64_t v) {
#if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || __has_builtin(__builtin_bswap64)
  return __builtin_bswap64(v);
#elif defined(_MSC_VER) && !defined(__clang__)
  return _byteswap_uint64(v);
#elif defined(__bswap_64)
  return __bswap_64(v);
#elif defined(bswap_64)
  return bswap_64(v);
#else
  return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | ((v << 24) & UINT64_C(0x0000ff0000000000)) |
         ((v << 8) & UINT64_C(0x000000ff00000000)) | ((v >> 8) & UINT64_C(0x00000000ff000000)) |
         ((v >> 24) & UINT64_C(0x0000000000ff0000)) | ((v >> 40) & UINT64_C(0x000000000000ff00));
#endif
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint32_t osal_bswap32(uint32_t v) {
#if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || __has_builtin(__builtin_bswap32)
  return __builtin_bswap32(v);
#elif defined(_MSC_VER) && !defined(__clang__)
  return _byteswap_ulong(v);
#elif defined(__bswap_32)
  return __bswap_32(v);
#elif defined(bswap_32)
  return bswap_32(v);
#else
  return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) | ((v >> 8) & UINT32_C(0x0000ff00));
#endif
}

/*******************************************************************************
 *******************************************************************************
 *
 * BUILD TIME
 *
 *         ####   #####    #####     #     ####   #    #   ####
 *        #    #  #    #     #       #    #    #  ##   #  #
 *        #    #  #    #     #       #    #    #  # #  #   ####
 *        #    #  #####      #       #    #    #  #  # #       #
 *        #    #  #          #       #    #    #  #   ##  #    #
 *         ####   #          #       #     ####   #    #   ####
 *
 *
 */

/** \defgroup build_option Build options
 * The libmdbx build options.
 @{ */

/** Using fcntl(F_FULLFSYNC) with 5-10 times slowdown */
#define MDBX_OSX_WANNA_DURABILITY 0
/** Using fsync() with chance of data lost on power failure */
#define MDBX_OSX_WANNA_SPEED 1

#ifndef MDBX_APPLE_SPEED_INSTEADOF_DURABILITY
/** Choices \ref MDBX_OSX_WANNA_DURABILITY or \ref MDBX_OSX_WANNA_SPEED
 * for OSX & iOS */
#define MDBX_APPLE_SPEED_INSTEADOF_DURABILITY MDBX_OSX_WANNA_DURABILITY
#endif /* MDBX_APPLE_SPEED_INSTEADOF_DURABILITY */

/** Controls checking PID against reuse DB environment after the fork() */
#ifndef MDBX_ENV_CHECKPID
#if defined(MADV_DONTFORK) || defined(_WIN32) || defined(_WIN64)
/* PID check could be omitted:
 *  - on Linux when madvise(MADV_DONTFORK) is available, i.e. after the fork()
 *    mapped pages will not be available for child process.
 *  - in Windows where fork() not available. */
#define MDBX_ENV_CHECKPID 0
#else
#define MDBX_ENV_CHECKPID 1
#endif
#define MDBX_ENV_CHECKPID_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_ENV_CHECKPID)
#elif !(MDBX_ENV_CHECKPID == 0 || MDBX_ENV_CHECKPID == 1)
#error MDBX_ENV_CHECKPID must be defined as 0 or 1
#else
#define MDBX_ENV_CHECKPID_CONFIG MDBX_STRINGIFY(MDBX_ENV_CHECKPID)
#endif /* MDBX_ENV_CHECKPID */

/** Controls checking transaction owner thread against misuse transactions from
 * other threads. */
#ifndef MDBX_TXN_CHECKOWNER
#define MDBX_TXN_CHECKOWNER 1
#define MDBX_TXN_CHECKOWNER_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_TXN_CHECKOWNER)
#elif !(MDBX_TXN_CHECKOWNER == 0 || MDBX_TXN_CHECKOWNER == 1)
#error MDBX_TXN_CHECKOWNER must be defined as 0 or 1
#else
#define MDBX_TXN_CHECKOWNER_CONFIG MDBX_STRINGIFY(MDBX_TXN_CHECKOWNER)
#endif /* MDBX_TXN_CHECKOWNER */

/** Does a system have battery-backed Real-Time Clock or just a fake. */
#ifndef MDBX_TRUST_RTC
#if defined(__linux__) || defined(__gnu_linux__) || defined(__NetBSD__) || defined(__OpenBSD__)
#define MDBX_TRUST_RTC 0 /* a lot of embedded systems have a fake RTC */
#else
#define MDBX_TRUST_RTC 1
#endif
#define MDBX_TRUST_RTC_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_TRUST_RTC)
#elif !(MDBX_TRUST_RTC == 0 || MDBX_TRUST_RTC == 1)
#error MDBX_TRUST_RTC must be defined as 0 or 1
#else
#define MDBX_TRUST_RTC_CONFIG MDBX_STRINGIFY(MDBX_TRUST_RTC)
#endif /* MDBX_TRUST_RTC */

/** Controls online database auto-compactification during write-transactions. */
#ifndef MDBX_ENABLE_REFUND
#define MDBX_ENABLE_REFUND 1
#elif !(MDBX_ENABLE_REFUND == 0 || MDBX_ENABLE_REFUND == 1)
#error MDBX_ENABLE_REFUND must be defined as 0 or 1
#endif /* MDBX_ENABLE_REFUND */

/** Controls profiling of GC search and updates. */
#ifndef MDBX_ENABLE_PROFGC
#define MDBX_ENABLE_PROFGC 0
#elif !(MDBX_ENABLE_PROFGC == 0 || MDBX_ENABLE_PROFGC == 1)
#error MDBX_ENABLE_PROFGC must be defined as 0 or 1
#endif /* MDBX_ENABLE_PROFGC */

/** Controls gathering statistics for page operations. */
#ifndef MDBX_ENABLE_PGOP_STAT
#define MDBX_ENABLE_PGOP_STAT 1
#elif !(MDBX_ENABLE_PGOP_STAT == 0 || MDBX_ENABLE_PGOP_STAT == 1)
#error MDBX_ENABLE_PGOP_STAT must be defined as 0 or 1
#endif /* MDBX_ENABLE_PGOP_STAT */

/** Controls using Unix' mincore() to determine whether DB-pages
 * are resident in memory. */
#ifndef MDBX_USE_MINCORE
#if defined(MINCORE_INCORE) || !(defined(_WIN32) || defined(_WIN64))
#define MDBX_USE_MINCORE 1
#else
#define MDBX_USE_MINCORE 0
#endif
#define MDBX_USE_MINCORE_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_USE_MINCORE)
#elif !(MDBX_USE_MINCORE == 0 || MDBX_USE_MINCORE == 1)
#error MDBX_USE_MINCORE must be defined as 0 or 1
#endif /* MDBX_USE_MINCORE */

/** Enables chunking long list of retired pages during huge transactions commit
 * to avoid use sequences of pages. */
#ifndef MDBX_ENABLE_BIGFOOT
#define MDBX_ENABLE_BIGFOOT 1
#elif !(MDBX_ENABLE_BIGFOOT == 0 || MDBX_ENABLE_BIGFOOT == 1)
#error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1
#endif /* MDBX_ENABLE_BIGFOOT */

/** Disable some checks to reduce an overhead and detection probability of
 * database corruption to a values closer to the LMDB. */
#ifndef MDBX_DISABLE_VALIDATION
#define MDBX_DISABLE_VALIDATION 0
#elif !(MDBX_DISABLE_VALIDATION == 0 || MDBX_DISABLE_VALIDATION == 1)
#error MDBX_DISABLE_VALIDATION must be defined as 0 or 1
#endif /* MDBX_DISABLE_VALIDATION */

#ifndef MDBX_PNL_PREALLOC_FOR_RADIXSORT
#define MDBX_PNL_PREALLOC_FOR_RADIXSORT 1
#elif !(MDBX_PNL_PREALLOC_FOR_RADIXSORT == 0 || MDBX_PNL_PREALLOC_FOR_RADIXSORT == 1)
#error MDBX_PNL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
#endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */

#ifndef MDBX_DPL_PREALLOC_FOR_RADIXSORT
#define MDBX_DPL_PREALLOC_FOR_RADIXSORT 1
#elif !(MDBX_DPL_PREALLOC_FOR_RADIXSORT == 0 || MDBX_DPL_PREALLOC_FOR_RADIXSORT == 1)
#error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */

/** Controls dirty pages tracking, spilling and persisting in `MDBX_WRITEMAP`
 * mode, i.e. disables in-memory database updating with consequent
 * flush-to-disk/msync syscall.
 *
 * 0/OFF = Don't track dirty pages at all, don't spill ones, and use msync() to
 * persist data. This is by-default on Linux and other systems where kernel
 * provides properly LRU tracking and effective flushing on-demand.
 *
 * 1/ON = Tracking of dirty pages but with LRU labels for spilling and explicit
 * persist ones by write(). This may be reasonable for goofy systems (Windows)
 * which low performance of msync() and/or zany LRU tracking. */
#ifndef MDBX_AVOID_MSYNC
#if defined(_WIN32) || defined(_WIN64)
#define MDBX_AVOID_MSYNC 1
#else
#define MDBX_AVOID_MSYNC 0
#endif
#elif !(MDBX_AVOID_MSYNC == 0 || MDBX_AVOID_MSYNC == 1)
#error MDBX_AVOID_MSYNC must be defined as 0 or 1
#endif /* MDBX_AVOID_MSYNC */

/** Управляет механизмом поддержки разреженных наборов DBI-хендлов для снижения
 * накладных расходов при запуске и обработке транзакций. */
#ifndef MDBX_ENABLE_DBI_SPARSE
#define MDBX_ENABLE_DBI_SPARSE 1
#elif !(MDBX_ENABLE_DBI_SPARSE == 0 || MDBX_ENABLE_DBI_SPARSE == 1)
#error MDBX_ENABLE_DBI_SPARSE must be defined as 0 or 1
#endif /* MDBX_ENABLE_DBI_SPARSE */

/** Управляет механизмом отложенного освобождения и поддержки пути быстрого
 * открытия DBI-хендлов без захвата блокировок. */
#ifndef MDBX_ENABLE_DBI_LOCKFREE
#define MDBX_ENABLE_DBI_LOCKFREE 1
#elif !(MDBX_ENABLE_DBI_LOCKFREE == 0 || MDBX_ENABLE_DBI_LOCKFREE == 1)
#error MDBX_ENABLE_DBI_LOCKFREE must be defined as 0 or 1
#endif /* MDBX_ENABLE_DBI_LOCKFREE */

/** Controls sort order of internal page number lists.
 * This mostly experimental/advanced option with not for regular MDBX users.
 * \warning The database format depend on this option and libmdbx built with
 * different option value are incompatible. */
#ifndef MDBX_PNL_ASCENDING
#define MDBX_PNL_ASCENDING 0
#elif !(MDBX_PNL_ASCENDING == 0 || MDBX_PNL_ASCENDING == 1)
#error MDBX_PNL_ASCENDING must be defined as 0 or 1
#endif /* MDBX_PNL_ASCENDING */

/** Avoid dependence from MSVC CRT and use ntdll.dll instead. */
#ifndef MDBX_WITHOUT_MSVC_CRT
#if defined(MDBX_BUILD_CXX) && !MDBX_BUILD_CXX
#define MDBX_WITHOUT_MSVC_CRT 1
#else
#define MDBX_WITHOUT_MSVC_CRT 0
#endif
#elif !(MDBX_WITHOUT_MSVC_CRT == 0 || MDBX_WITHOUT_MSVC_CRT == 1)
#error MDBX_WITHOUT_MSVC_CRT must be defined as 0 or 1
#endif /* MDBX_WITHOUT_MSVC_CRT */

/** Size of buffer used during copying a environment/database file. */
#ifndef MDBX_ENVCOPY_WRITEBUF
#define MDBX_ENVCOPY_WRITEBUF 1048576u
#elif MDBX_ENVCOPY_WRITEBUF < 65536u || MDBX_ENVCOPY_WRITEBUF > 1073741824u || MDBX_ENVCOPY_WRITEBUF % 65536u
#error MDBX_ENVCOPY_WRITEBUF must be defined in range 65536..1073741824 and be multiple of 65536
#endif /* MDBX_ENVCOPY_WRITEBUF */

/** Forces assertion checking. */
#ifndef MDBX_FORCE_ASSERTIONS
#define MDBX_FORCE_ASSERTIONS 0
#elif !(MDBX_FORCE_ASSERTIONS == 0 || MDBX_FORCE_ASSERTIONS == 1)
#error MDBX_FORCE_ASSERTIONS must be defined as 0 or 1
#endif /* MDBX_FORCE_ASSERTIONS */

/** Presumed malloc size overhead for each allocation
 * to adjust allocations to be more aligned. */
#ifndef MDBX_ASSUME_MALLOC_OVERHEAD
#ifdef __SIZEOF_POINTER__
#define MDBX_ASSUME_MALLOC_OVERHEAD (__SIZEOF_POINTER__ * 2u)
#else
#define MDBX_ASSUME_MALLOC_OVERHEAD (sizeof(void *) * 2u)
#endif
#elif MDBX_ASSUME_MALLOC_OVERHEAD < 0 || MDBX_ASSUME_MALLOC_OVERHEAD > 64 || MDBX_ASSUME_MALLOC_OVERHEAD % 4
#error MDBX_ASSUME_MALLOC_OVERHEAD must be defined in range 0..64 and be multiple of 4
#endif /* MDBX_ASSUME_MALLOC_OVERHEAD */

/** If defined then enables integration with Valgrind,
 * a memory analyzing tool. */
#ifndef ENABLE_MEMCHECK
#endif /* ENABLE_MEMCHECK */

/** If defined then enables use C11 atomics,
 *  otherwise detects ones availability automatically. */
#ifndef MDBX_HAVE_C11ATOMICS
#endif /* MDBX_HAVE_C11ATOMICS */

/** If defined then enables use the GCC's `__builtin_cpu_supports()`
 * for runtime dispatching depending on the CPU's capabilities.
 * \note Defining `MDBX_HAVE_BUILTIN_CPU_SUPPORTS` to `0` should avoided unless
 * build for particular single-target platform, since on AMD64/x86 this disables
 * dynamic choice (at runtime) of SSE2 / AVX2 / AVX512 instructions
 * with fallback to non-accelerated baseline code. */
#ifndef MDBX_HAVE_BUILTIN_CPU_SUPPORTS
#if defined(__APPLE__) || defined(BIONIC)
/* Never use any modern features on Apple's or Google's OSes
 * since a lot of troubles with compatibility and/or performance */
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
#elif defined(__e2k__)
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
#elif __has_builtin(__builtin_cpu_supports) || defined(__BUILTIN_CPU_SUPPORTS__) ||                                    \
    (defined(__ia32__) && __GNUC_PREREQ(4, 8) && __GLIBC_PREREQ(2, 23))
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 1
#else
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
#endif
#elif !(MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 0 || MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 1)
#error MDBX_HAVE_BUILTIN_CPU_SUPPORTS must be defined as 0 or 1
#endif /* MDBX_HAVE_BUILTIN_CPU_SUPPORTS */

/** if enabled then instead of the returned error `MDBX_REMOTE`, only a warning is issued, when
 * the database being opened in non-read-only mode is located in a file system exported via NFS. */
#ifndef MDBX_ENABLE_NON_READONLY_EXPORT
#define MDBX_ENABLE_NON_READONLY_EXPORT 0
#elif !(MDBX_ENABLE_NON_READONLY_EXPORT == 0 || MDBX_ENABLE_NON_READONLY_EXPORT == 1)
#error MDBX_ENABLE_NON_READONLY_EXPORT must be defined as 0 or 1
#endif /* MDBX_ENABLE_NON_READONLY_EXPORT */

//------------------------------------------------------------------------------

/** Win32 File Locking API for \ref MDBX_LOCKING */
#define MDBX_LOCKING_WIN32FILES -1

/** SystemV IPC semaphores for \ref MDBX_LOCKING */
#define MDBX_LOCKING_SYSV 5

/** POSIX-1 Shared anonymous semaphores for \ref MDBX_LOCKING */
#define MDBX_LOCKING_POSIX1988 1988

/** POSIX-2001 Shared Mutexes for \ref MDBX_LOCKING */
#define MDBX_LOCKING_POSIX2001 2001

/** POSIX-2008 Robust Mutexes for \ref MDBX_LOCKING */
#define MDBX_LOCKING_POSIX2008 2008

/** Advanced: Choices the locking implementation (autodetection by default). */
#if defined(_WIN32) || defined(_WIN64)
#define MDBX_LOCKING MDBX_LOCKING_WIN32FILES
#else
#ifndef MDBX_LOCKING
#if defined(_POSIX_THREAD_PROCESS_SHARED) && _POSIX_THREAD_PROCESS_SHARED >= 200112L && !defined(__FreeBSD__)

/* Some platforms define the EOWNERDEAD error code even though they
 * don't support Robust Mutexes. If doubt compile with -MDBX_LOCKING=2001. */
#if defined(EOWNERDEAD) && _POSIX_THREAD_PROCESS_SHARED >= 200809L &&                                                  \
    ((defined(_POSIX_THREAD_ROBUST_PRIO_INHERIT) && _POSIX_THREAD_ROBUST_PRIO_INHERIT > 0) ||                          \
     (defined(_POSIX_THREAD_ROBUST_PRIO_PROTECT) && _POSIX_THREAD_ROBUST_PRIO_PROTECT > 0) ||                          \
     defined(PTHREAD_MUTEX_ROBUST) || defined(PTHREAD_MUTEX_ROBUST_NP)) &&                                             \
    (!defined(__GLIBC__) || __GLIBC_PREREQ(2, 10) /* troubles with Robust mutexes before 2.10 */) &&                   \
    !defined(__OHOS__) /* Harmony OS doesn't support robust mutexes at the end of 2025 */
#define MDBX_LOCKING MDBX_LOCKING_POSIX2008
#else
#define MDBX_LOCKING MDBX_LOCKING_POSIX2001
#endif
#elif defined(__sun) || defined(__SVR4) || defined(__svr4__)
#define MDBX_LOCKING MDBX_LOCKING_POSIX1988
#else
#define MDBX_LOCKING MDBX_LOCKING_SYSV
#endif
#define MDBX_LOCKING_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_LOCKING)
#else
#define MDBX_LOCKING_CONFIG MDBX_STRINGIFY(MDBX_LOCKING)
#endif /* MDBX_LOCKING */
#endif /* !Windows */

/** Advanced: Using POSIX OFD-locks (autodetection by default). */
#ifndef MDBX_USE_OFDLOCKS
#if ((defined(F_OFD_SETLK) && defined(F_OFD_SETLKW) && defined(F_OFD_GETLK)) ||                                        \
     (defined(F_OFD_SETLK64) && defined(F_OFD_SETLKW64) && defined(F_OFD_GETLK64))) &&                                 \
    !defined(MDBX_SAFE4QEMU) && !defined(__sun) /* OFD-lock are broken on Solaris */
#define MDBX_USE_OFDLOCKS 1
#else
#define MDBX_USE_OFDLOCKS 0
#endif
#define MDBX_USE_OFDLOCKS_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_USE_OFDLOCKS)
#elif !(MDBX_USE_OFDLOCKS == 0 || MDBX_USE_OFDLOCKS == 1)
#error MDBX_USE_OFDLOCKS must be defined as 0 or 1
#else
#define MDBX_USE_OFDLOCKS_CONFIG MDBX_STRINGIFY(MDBX_USE_OFDLOCKS)
#endif /* MDBX_USE_OFDLOCKS */

/** Advanced: Using sendfile() syscall (autodetection by default). */
#ifndef MDBX_USE_SENDFILE
#if ((defined(__linux__) || defined(__gnu_linux__)) && !defined(__ANDROID_API__)) ||                                   \
    (defined(__ANDROID_API__) && __ANDROID_API__ >= 21)
#define MDBX_USE_SENDFILE 1
#else
#define MDBX_USE_SENDFILE 0
#endif
#elif !(MDBX_USE_SENDFILE == 0 || MDBX_USE_SENDFILE == 1)
#error MDBX_USE_SENDFILE must be defined as 0 or 1
#endif /* MDBX_USE_SENDFILE */

/** Advanced: Using copy_file_range() syscall (autodetection by default). */
#ifndef MDBX_USE_COPYFILERANGE
#if __GLIBC_PREREQ(2, 27) && defined(_GNU_SOURCE)
#define MDBX_USE_COPYFILERANGE 1
#else
#define MDBX_USE_COPYFILERANGE 0
#endif
#elif !(MDBX_USE_COPYFILERANGE == 0 || MDBX_USE_COPYFILERANGE == 1)
#error MDBX_USE_COPYFILERANGE must be defined as 0 or 1
#endif /* MDBX_USE_COPYFILERANGE */

/** Advanced: Using posix_fallocate() or fcntl(F_PREALLOCATE) on OSX (autodetection by default). */
#ifndef MDBX_USE_FALLOCATE
#if defined(__APPLE__)
#define MDBX_USE_FALLOCATE 0 /* Too slow and unclean, but not required to prevent SIGBUS */
#elif (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) || (__GLIBC_PREREQ(2, 10) && defined(_GNU_SOURCE))
#define MDBX_USE_FALLOCATE 1
#else
#define MDBX_USE_FALLOCATE 0
#endif
#define MDBX_USE_FALLOCATE_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_USE_FALLOCATE)
#elif !(MDBX_USE_FALLOCATE == 0 || MDBX_USE_FALLOCATE == 1)
#error MDBX_USE_FALLOCATE must be defined as 0 or 1
#else
#define MDBX_USE_FALLOCATE_CONFIG MDBX_STRINGIFY(MDBX_USE_FALLOCATE)
#endif /* MDBX_USE_FALLOCATE */

//------------------------------------------------------------------------------

#ifndef MDBX_CPU_WRITEBACK_INCOHERENT
#if defined(__ia32__) || defined(__e2k__) || defined(__hppa) || defined(__hppa__) || defined(DOXYGEN)
#define MDBX_CPU_WRITEBACK_INCOHERENT 0
#else
#define MDBX_CPU_WRITEBACK_INCOHERENT 1
#endif
#elif !(MDBX_CPU_WRITEBACK_INCOHERENT == 0 || MDBX_CPU_WRITEBACK_INCOHERENT == 1)
#error MDBX_CPU_WRITEBACK_INCOHERENT must be defined as 0 or 1
#endif /* MDBX_CPU_WRITEBACK_INCOHERENT */

#ifndef MDBX_MMAP_INCOHERENT_FILE_WRITE
#ifdef __OpenBSD__
#define MDBX_MMAP_INCOHERENT_FILE_WRITE 1
#else
#define MDBX_MMAP_INCOHERENT_FILE_WRITE 0
#endif
#elif !(MDBX_MMAP_INCOHERENT_FILE_WRITE == 0 || MDBX_MMAP_INCOHERENT_FILE_WRITE == 1)
#error MDBX_MMAP_INCOHERENT_FILE_WRITE must be defined as 0 or 1
#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */

#ifndef MDBX_MMAP_INCOHERENT_CPU_CACHE
#if defined(__mips) || defined(__mips__) || defined(__mips64) || defined(__mips64__) || defined(_M_MRX000) ||          \
    defined(_MIPS_) || defined(__MWERKS__) || defined(__sgi)
/* MIPS has cache coherency issues. */
#define MDBX_MMAP_INCOHERENT_CPU_CACHE 1
#else
/* LY: assume no relevant mmap/dcache issues. */
#define MDBX_MMAP_INCOHERENT_CPU_CACHE 0
#endif
#elif !(MDBX_MMAP_INCOHERENT_CPU_CACHE == 0 || MDBX_MMAP_INCOHERENT_CPU_CACHE == 1)
#error MDBX_MMAP_INCOHERENT_CPU_CACHE must be defined as 0 or 1
#endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */

/** Assume system needs explicit syscall to sync/flush/write modified mapped
 * memory. */
#ifndef MDBX_MMAP_NEEDS_JOLT
#if MDBX_MMAP_INCOHERENT_FILE_WRITE || MDBX_MMAP_INCOHERENT_CPU_CACHE || !(defined(__linux__) || defined(__gnu_linux__))
#define MDBX_MMAP_NEEDS_JOLT 1
#else
#define MDBX_MMAP_NEEDS_JOLT 0
#endif
#define MDBX_MMAP_NEEDS_JOLT_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_MMAP_NEEDS_JOLT)
#elif !(MDBX_MMAP_NEEDS_JOLT == 0 || MDBX_MMAP_NEEDS_JOLT == 1)
#error MDBX_MMAP_NEEDS_JOLT must be defined as 0 or 1
#endif /* MDBX_MMAP_NEEDS_JOLT */

#ifndef MDBX_64BIT_ATOMIC
#if MDBX_WORDBITS >= 64 || defined(DOXYGEN)
#define MDBX_64BIT_ATOMIC 1
#else
#define MDBX_64BIT_ATOMIC 0
#endif
#define MDBX_64BIT_ATOMIC_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_64BIT_ATOMIC)
#elif !(MDBX_64BIT_ATOMIC == 0 || MDBX_64BIT_ATOMIC == 1)
#error MDBX_64BIT_ATOMIC must be defined as 0 or 1
#else
#define MDBX_64BIT_ATOMIC_CONFIG MDBX_STRINGIFY(MDBX_64BIT_ATOMIC)
#endif /* MDBX_64BIT_ATOMIC */

#ifndef MDBX_64BIT_CAS
#if defined(__GCC_ATOMIC_LLONG_LOCK_FREE)
#if __GCC_ATOMIC_LLONG_LOCK_FREE > 1
#define MDBX_64BIT_CAS 1
#else
#define MDBX_64BIT_CAS 0
#endif
#elif defined(__CLANG_ATOMIC_LLONG_LOCK_FREE)
#if __CLANG_ATOMIC_LLONG_LOCK_FREE > 1
#define MDBX_64BIT_CAS 1
#else
#define MDBX_64BIT_CAS 0
#endif
#elif defined(ATOMIC_LLONG_LOCK_FREE)
#if ATOMIC_LLONG_LOCK_FREE > 1
#define MDBX_64BIT_CAS 1
#else
#define MDBX_64BIT_CAS 0
#endif
#elif defined(_MSC_VER) || defined(__APPLE__) || defined(DOXYGEN)
#define MDBX_64BIT_CAS 1
#elif !(MDBX_64BIT_CAS == 0 || MDBX_64BIT_CAS == 1)
#error MDBX_64BIT_CAS must be defined as 0 or 1
#else
#define MDBX_64BIT_CAS MDBX_64BIT_ATOMIC
#endif
#define MDBX_64BIT_CAS_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_64BIT_CAS)
#else
#define MDBX_64BIT_CAS_CONFIG MDBX_STRINGIFY(MDBX_64BIT_CAS)
#endif /* MDBX_64BIT_CAS */

#ifndef MDBX_UNALIGNED_OK
#if defined(__ALIGNED__) || defined(__SANITIZE_UNDEFINED__) || defined(ENABLE_UBSAN)
#define MDBX_UNALIGNED_OK 0 /* no unaligned access allowed */
#elif defined(__ARM_FEATURE_UNALIGNED)
#define MDBX_UNALIGNED_OK 4 /* ok unaligned for 32-bit words */
#elif defined(__e2k__) || defined(__elbrus__)
#if __iset__ > 4
#define MDBX_UNALIGNED_OK 8 /* ok unaligned for 64-bit words */
#else
#define MDBX_UNALIGNED_OK 4 /* ok unaligned for 32-bit words */
#endif
#elif defined(__ia32__)
#define MDBX_UNALIGNED_OK 8 /* ok unaligned for 64-bit words */
#elif __CLANG_PREREQ(5, 0) || __GNUC_PREREQ(5, 0)
/* expecting an optimization will well done, also this
 * hushes false-positives from UBSAN (undefined behaviour sanitizer) */
#define MDBX_UNALIGNED_OK 0
#else
#define MDBX_UNALIGNED_OK 0 /* no unaligned access allowed */
#endif
#elif MDBX_UNALIGNED_OK == 1
#undef MDBX_UNALIGNED_OK
#define MDBX_UNALIGNED_OK 32 /* any unaligned access allowed */
#endif                       /* MDBX_UNALIGNED_OK */

#ifndef MDBX_CACHELINE_SIZE
#if defined(SYSTEM_CACHE_ALIGNMENT_SIZE)
#define MDBX_CACHELINE_SIZE SYSTEM_CACHE_ALIGNMENT_SIZE
#elif defined(__ia64__) || defined(__ia64) || defined(_M_IA64)
#define MDBX_CACHELINE_SIZE 128
#else
#define MDBX_CACHELINE_SIZE 64
#endif
#endif /* MDBX_CACHELINE_SIZE */

/* Max length of iov-vector passed to writev() call, used for auxilary writes */
#ifndef MDBX_AUXILARY_IOV_MAX
#define MDBX_AUXILARY_IOV_MAX 64
#endif
#if defined(IOV_MAX) && IOV_MAX < MDBX_AUXILARY_IOV_MAX
#undef MDBX_AUXILARY_IOV_MAX
#define MDBX_AUXILARY_IOV_MAX IOV_MAX
#endif /* MDBX_AUXILARY_IOV_MAX */

/* An extra/custom information provided during library build */
#ifndef MDBX_BUILD_METADATA
#define MDBX_BUILD_METADATA ""
#endif /* MDBX_BUILD_METADATA */
/** @} end of build options */
/*******************************************************************************
 *******************************************************************************
 ******************************************************************************/

#ifndef DOXYGEN

/* In case the MDBX_DEBUG is undefined set it corresponding to NDEBUG */
#ifndef MDBX_DEBUG
#ifdef NDEBUG
#define MDBX_DEBUG 0
#else
#define MDBX_DEBUG 1
#endif
#endif
#if MDBX_DEBUG < 0 || MDBX_DEBUG > 2
#error "The MDBX_DEBUG must be defined to 0, 1 or 2"
#endif /* MDBX_DEBUG */

#else

/* !!! Actually this is a fake definitions for Doxygen !!! */

/** Controls enabling of debugging features.
 *
 *  - `MDBX_DEBUG = 0` (by default) Disables any debugging features at all,
 *                     including logging and assertion controls.
 *                     Logging level and corresponding debug flags changing
 *                     by \ref mdbx_setup_debug() will not have effect.
 *  - `MDBX_DEBUG > 0` Enables code for the debugging features (logging,
 *                     assertions checking and internal audit).
 *                     Simultaneously sets the default logging level
 *                     to the `MDBX_DEBUG` value.
 *                     Also enables \ref MDBX_DBG_AUDIT if `MDBX_DEBUG >= 2`.
 *
 * \ingroup build_option */
#define MDBX_DEBUG 0...2

/** Disables using of GNU libc extensions. */
#define MDBX_DISABLE_GNU_SOURCE 0 or 1

#endif /* DOXYGEN */

#ifndef MDBX_64BIT_ATOMIC
#error "The MDBX_64BIT_ATOMIC must be defined before"
#endif /* MDBX_64BIT_ATOMIC */

#ifndef MDBX_64BIT_CAS
#error "The MDBX_64BIT_CAS must be defined before"
#endif /* MDBX_64BIT_CAS */

#if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include(<cstdatomic>)
#include <cstdatomic>
#define MDBX_HAVE_C11ATOMICS
#elif !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L || __has_extension(c_atomic)) &&                           \
    !defined(__STDC_NO_ATOMICS__) &&                                                                                   \
    (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || !(defined(__GNUC__) || defined(__clang__)))
#include <stdatomic.h>
#define MDBX_HAVE_C11ATOMICS
#elif defined(__GNUC__) || defined(__clang__)
#elif defined(_MSC_VER)
#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
#pragma warning(disable : 4133) /* 'function': incompatible types - from                                               \
                                   'size_t' to 'LONGLONG' */
#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to                                             \
                                   'std::size_t', possible loss of data */
#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to                                             \
                                   'long', possible loss of data */
#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
#elif defined(__APPLE__)
#include <libkern/OSAtomic.h>
#else
#error FIXME atomic-ops
#endif

typedef enum mdbx_memory_order {
  mo_Relaxed,
  mo_AcquireRelease
  /* , mo_SequentialConsistency */
} mdbx_memory_order_t;

typedef union {
  volatile uint32_t weak;
#ifdef MDBX_HAVE_C11ATOMICS
  volatile _Atomic uint32_t c11a;
#endif /* MDBX_HAVE_C11ATOMICS */
} mdbx_atomic_uint32_t;

typedef union {
  volatile uint64_t weak;
#if defined(MDBX_HAVE_C11ATOMICS) && (MDBX_64BIT_CAS || MDBX_64BIT_ATOMIC)
  volatile _Atomic uint64_t c11a;
#endif
#if !defined(MDBX_HAVE_C11ATOMICS) || !MDBX_64BIT_CAS || !MDBX_64BIT_ATOMIC
  __anonymous_struct_extension__ struct {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    mdbx_atomic_uint32_t low, high;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    mdbx_atomic_uint32_t high, low;
#else
#error "FIXME: Unsupported byte order"
#endif /* __BYTE_ORDER__ */
  };
#endif
} mdbx_atomic_uint64_t;

#ifdef MDBX_HAVE_C11ATOMICS

/* Crutches for C11 atomic compiler's bugs */
#if defined(__e2k__) && defined(__LCC__) && __LCC__ < /* FIXME */ 127
#define MDBX_c11a_ro(type, ptr) (&(ptr)->weak)
#define MDBX_c11a_rw(type, ptr) (&(ptr)->weak)
#elif defined(__clang__) && __clang__ < 8
#define MDBX_c11a_ro(type, ptr) ((volatile _Atomic(type) *)&(ptr)->c11a)
#define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a)
#else
#define MDBX_c11a_ro(type, ptr) (&(ptr)->c11a)
#define MDBX_c11a_rw(type, ptr) (&(ptr)->c11a)
#endif /* Crutches for C11 atomic compiler's bugs */

#define mo_c11_store(fence)                                                                                            \
  (((fence) == mo_Relaxed)          ? memory_order_relaxed                                                             \
   : ((fence) == mo_AcquireRelease) ? memory_order_release                                                             \
                                    : memory_order_seq_cst)
#define mo_c11_load(fence)                                                                                             \
  (((fence) == mo_Relaxed)          ? memory_order_relaxed                                                             \
   : ((fence) == mo_AcquireRelease) ? memory_order_acquire                                                             \
                                    : memory_order_seq_cst)

#endif /* MDBX_HAVE_C11ATOMICS */

#define SAFE64_INVALID_THRESHOLD UINT64_C(0xffffFFFF00000000)

#pragma pack(push, 4)

/* A stamp that identifies a file as an MDBX file.
 * There's nothing special about this value other than that it is easily
 * recognizable, and it will reflect any byte order mismatches. */
#define MDBX_MAGIC UINT64_C(/* 56-bit prime */ 0x59659DBDEF4C11)

/* FROZEN: The version number for a database's datafile format. */
#define MDBX_DATA_VERSION 3

#define MDBX_DATA_MAGIC ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + MDBX_DATA_VERSION)
#define MDBX_DATA_MAGIC_LEGACY_COMPAT ((MDBX_MAGIC << 8) + MDBX_PNL_ASCENDING * 64 + 2)
#define MDBX_DATA_MAGIC_LEGACY_DEVEL ((MDBX_MAGIC << 8) + 255)

/* handle for the DB used to track free pages. */
#define FREE_DBI 0
/* handle for the default DB. */
#define MAIN_DBI 1
/* Number of DBs in metapage (free and main) - also hardcoded elsewhere */
#define CORE_DBS 2

/* Number of meta pages - also hardcoded elsewhere */
#define NUM_METAS 3

/* A page number in the database.
 *
 * MDBX uses 32 bit for page numbers. This limits database
 * size up to 2^44 bytes, in case of 4K pages. */
typedef uint32_t pgno_t;
typedef mdbx_atomic_uint32_t atomic_pgno_t;
#define PRIaPGNO PRIu32
#define MAX_PAGENO UINT32_C(0x7FFFffff)
#define MIN_PAGENO NUM_METAS

/* An invalid page number.
 * Mainly used to denote an empty tree. */
#define P_INVALID (~(pgno_t)0)

/* A transaction ID. */
typedef uint64_t txnid_t;
typedef mdbx_atomic_uint64_t atomic_txnid_t;
#define PRIaTXN PRIi64
#define MIN_TXNID UINT64_C(1)
#define MAX_TXNID (SAFE64_INVALID_THRESHOLD - 1)
#define INITIAL_TXNID (MIN_TXNID + NUM_METAS - 1)
#define INVALID_TXNID UINT64_MAX

/* Used for offsets within a single page. */
typedef uint16_t indx_t;

typedef struct tree {
  uint16_t flags;       /* see mdbx_dbi_open */
  uint16_t height;      /* height of this tree */
  uint32_t dupfix_size; /* key-size for MDBX_DUPFIXED (DUPFIX pages) */
  pgno_t root;          /* the root page of this tree */
  pgno_t branch_pages;  /* number of branch pages */
  pgno_t leaf_pages;    /* number of leaf pages */
  pgno_t large_pages;   /* number of large pages */
  uint64_t sequence;    /* table sequence counter */
  uint64_t items;       /* number of data items */
  uint64_t mod_txnid;   /* txnid of last committed modification */
} tree_t;

/* database size-related parameters */
typedef struct geo {
  uint16_t grow_pv;   /* datafile growth step as a 16-bit packed (exponential
                           quantized) value */
  uint16_t shrink_pv; /* datafile shrink threshold as a 16-bit packed
                           (exponential quantized) value */
  pgno_t lower;       /* minimal size of datafile in pages */
  pgno_t upper;       /* maximal size of datafile in pages */
  union {
    pgno_t now; /* current size of datafile in pages */
    pgno_t end_pgno;
  };
  union {
    pgno_t first_unallocated; /* first unused page in the datafile,
                         but actually the file may be shorter. */
    pgno_t next_pgno;
  };
} geo_t;

/* Meta page content.
 * A meta page is the start point for accessing a database snapshot.
 * Pages 0-2 are meta pages. */
typedef struct meta {
  /* Stamp identifying this as an MDBX file.
   * It must be set to MDBX_MAGIC with MDBX_DATA_VERSION. */
  uint32_t magic_and_version[2];

  /* txnid that committed this meta, the first of a two-phase-update pair */
  union {
    mdbx_atomic_uint32_t txnid_a[2];
    uint64_t unsafe_txnid;
  };

  uint16_t reserve16;   /* extra flags, zero (nothing) for now */
  uint8_t validator_id; /* ID of checksum and page validation method,
                         * zero (nothing) for now */
  int8_t extra_pagehdr; /* extra bytes in the page header,
                         * zero (nothing) for now */

  geo_t geometry; /* database size-related parameters */

  union {
    struct {
      tree_t gc, main;
    } trees;
    __anonymous_struct_extension__ struct {
      uint16_t gc_flags;
      uint16_t gc_height;
      uint32_t pagesize;
    };
  };

  MDBX_canary canary;

#define DATASIGN_NONE 0u
#define DATASIGN_WEAK 1u
#define SIGN_IS_STEADY(sign) ((sign) > DATASIGN_WEAK)
  union {
    uint32_t sign[2];
    uint64_t unsafe_sign;
  };

  /* txnid that committed this meta, the second of a two-phase-update pair */
  mdbx_atomic_uint32_t txnid_b[2];

  /* Number of non-meta pages which were put in GC after COW. May be 0 in case
   * DB was previously handled by libmdbx without corresponding feature.
   * This value in couple with reader.snapshot_pages_retired allows fast
   * estimation of "how much reader is restraining GC recycling". */
  uint32_t pages_retired[2];

  /* The analogue /proc/sys/kernel/random/boot_id or similar to determine
   * whether the system was rebooted after the last use of the database files.
   * If there was no reboot, but there is no need to rollback to the last
   * steady sync point. Zeros mean that no relevant information is available
   * from the system. */
  bin128_t bootid;

  /* GUID базы данных, начиная с v0.13.1 */
  bin128_t dxbid;
} meta_t;

#pragma pack(1)

typedef enum page_type {
  P_BRANCH = 0x01u /* branch page */,
  P_LEAF = 0x02u /* leaf page */,
  P_LARGE = 0x04u /* large/overflow page */,
  P_META = 0x08u /* meta page */,
  P_LEGACY_DIRTY = 0x10u /* legacy P_DIRTY flag prior to v0.10 958fd5b9 */,
  P_BAD = P_LEGACY_DIRTY /* explicit flag for invalid/bad page */,
  P_DUPFIX = 0x20u /* for MDBX_DUPFIXED records */,
  P_SUBP = 0x40u /* for MDBX_DUPSORT sub-pages */,
  P_SPILLED = 0x2000u /* spilled in parent txn */,
  P_LOOSE = 0x4000u /* page was dirtied then freed, can be reused */,
  P_FROZEN = 0x8000u /* used for retire page with known status */,
  P_ILL_BITS = (uint16_t)~(P_BRANCH | P_LEAF | P_DUPFIX | P_LARGE | P_SPILLED),

  page_broken = 0,
  page_large = P_LARGE,
  page_branch = P_BRANCH,
  page_leaf = P_LEAF,
  page_dupfix_leaf = P_DUPFIX,
  page_sub_leaf = P_SUBP | P_LEAF,
  page_sub_dupfix_leaf = P_SUBP | P_DUPFIX,
  page_sub_broken = P_SUBP,
} page_type_t;

/* Common header for all page types. The page type depends on flags.
 *
 * P_BRANCH and P_LEAF pages have unsorted 'node_t's at the end, with
 * sorted entries[] entries referring to them. Exception: P_DUPFIX pages
 * omit entries and pack sorted MDBX_DUPFIXED values after the page header.
 *
 * P_LARGE records occupy one or more contiguous pages where only the
 * first has a page header. They hold the real data of N_BIG nodes.
 *
 * P_SUBP sub-pages are small leaf "pages" with duplicate data.
 * A node with flag N_DUP but not N_TREE contains a sub-page.
 * (Duplicate data can also go in tables, which use normal pages.)
 *
 * P_META pages contain meta_t, the start point of an MDBX snapshot.
 *
 * Each non-metapage up to meta_t.mm_last_pg is reachable exactly once
 * in the snapshot: Either used by a database or listed in a GC record. */
typedef struct page {
  uint64_t txnid;        /* txnid which created page, maybe zero in legacy DB */
  uint16_t dupfix_ksize; /* key size if this is a DUPFIX page */
  uint16_t flags;
  union {
    uint32_t pages; /* number of overflow pages */
    __anonymous_struct_extension__ struct {
      indx_t lower; /* lower bound of free space */
      indx_t upper; /* upper bound of free space */
    };
  };
  pgno_t pgno; /* page number */

#if FLEXIBLE_ARRAY_MEMBERS
  indx_t entries[] /* dynamic size */;
#endif /* FLEXIBLE_ARRAY_MEMBERS */
} page_t;

/* Size of the page header, excluding dynamic data at the end */
#define PAGEHDRSZ 20u

/* Header for a single key/data pair within a page.
 * Used in pages of type P_BRANCH and P_LEAF without P_DUPFIX.
 * We guarantee 2-byte alignment for 'node_t's.
 *
 * Leaf node flags describe node contents.  N_BIG says the node's
 * data part is the page number of an overflow page with actual data.
 * N_DUP and N_TREE can be combined giving duplicate data in
 * a sub-page/table, and named databases (just N_TREE). */
typedef struct node {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  union {
    uint32_t dsize;
    uint32_t child_pgno;
  };
  uint8_t flags; /* see node_flags */
  uint8_t extra;
  uint16_t ksize; /* key size */
#else
  uint16_t ksize; /* key size */
  uint8_t extra;
  uint8_t flags; /* see node_flags */
  union {
    uint32_t child_pgno;
    uint32_t dsize;
  };
#endif /* __BYTE_ORDER__ */

#if FLEXIBLE_ARRAY_MEMBERS
  uint8_t payload[] /* key and data are appended here */;
#endif /* FLEXIBLE_ARRAY_MEMBERS */
} node_t;

/* Size of the node header, excluding dynamic data at the end */
#define NODESIZE 8u

typedef enum node_flags {
  N_BIG = 0x01 /* data put on large page */,
  N_TREE = 0x02 /* data is a b-tree */,
  N_DUP = 0x04 /* data has duplicates */
} node_flags_t;

#pragma pack(pop)

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t page_type(const page_t *mp) { return mp->flags; }

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t page_type_compat(const page_t *mp) {
  /* Drop legacy P_DIRTY flag for sub-pages for compatilibity,
   * for assertions only. */
  return unlikely(mp->flags & P_SUBP) ? mp->flags & ~(P_SUBP | P_LEGACY_DIRTY) : mp->flags;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_leaf(const page_t *mp) {
  return (mp->flags & P_LEAF) != 0;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_dupfix_leaf(const page_t *mp) {
  return (mp->flags & P_DUPFIX) != 0;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_branch(const page_t *mp) {
  return (mp->flags & P_BRANCH) != 0;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_largepage(const page_t *mp) {
  return (mp->flags & P_LARGE) != 0;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_subpage(const page_t *mp) {
  return (mp->flags & P_SUBP) != 0;
}

/* The version number for a database's lockfile format. */
#define MDBX_LOCK_VERSION 6

#if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES

#define MDBX_LCK_SIGN UINT32_C(0xF10C)
typedef void osal_ipclock_t;
#elif MDBX_LOCKING == MDBX_LOCKING_SYSV

#define MDBX_LCK_SIGN UINT32_C(0xF18D)
typedef mdbx_pid_t osal_ipclock_t;

#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008

#define MDBX_LCK_SIGN UINT32_C(0x8017)
typedef pthread_mutex_t osal_ipclock_t;

#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988

#define MDBX_LCK_SIGN UINT32_C(0xFC29)
typedef sem_t osal_ipclock_t;

#else
#error "FIXME"
#endif /* MDBX_LOCKING */

/* Статистика профилирования работы GC */
typedef struct gc_prof_stat {
  /* Монотонное время по "настенным часам"
   * затраченное на чтение и поиск внутри GC */
  uint64_t rtime_monotonic;
  /* Процессорное время в режим пользователя
   * на подготовку страниц извлекаемых из GC, включая подкачку с диска. */
  uint64_t xtime_cpu;
  /* Количество итераций чтения-поиска внутри GC при выделении страниц */
  uint32_t rsteps;
  /* Количество запросов на выделение последовательностей страниц,
   * т.е. когда запрашивает выделение больше одной страницы */
  uint32_t xpages;
  /* Счетчик выполнения по медленному пути (slow path execution count) */
  uint32_t spe_counter;
  /* page faults (hard page faults) */
  uint32_t majflt;
  /* Для разборок с pnl_merge() */
  struct {
    uint64_t time;
    uint64_t volume;
    uint32_t calls;
  } pnl_merge;
} gc_prof_stat_t;

/* Statistics of pages operations for all transactions,
 * including incomplete and aborted. */
typedef struct pgops {
  mdbx_atomic_uint64_t newly;   /* Quantity of a new pages added */
  mdbx_atomic_uint64_t cow;     /* Quantity of pages copied for update */
  mdbx_atomic_uint64_t clone;   /* Quantity of parent's dirty pages clones
                                   for nested transactions */
  mdbx_atomic_uint64_t split;   /* Page splits */
  mdbx_atomic_uint64_t merge;   /* Page merges */
  mdbx_atomic_uint64_t spill;   /* Quantity of spilled dirty pages */
  mdbx_atomic_uint64_t unspill; /* Quantity of unspilled/reloaded pages */
  mdbx_atomic_uint64_t wops;    /* Number of explicit write operations (not a pages) to a disk */
  mdbx_atomic_uint64_t msync;   /* Number of explicit msync/flush-to-disk operations */
  mdbx_atomic_uint64_t fsync;   /* Number of explicit fsync/flush-to-disk operations */

  mdbx_atomic_uint64_t prefault; /* Number of prefault write operations */
  mdbx_atomic_uint64_t mincore;  /* Number of mincore() calls */

  mdbx_atomic_uint32_t incoherence; /* number of https://libmdbx.dqdkfa.ru/dead-github/issues/269
                                       caught */
  mdbx_atomic_uint32_t reserved;

  /* Статистика для профилирования GC.
   * Логически эти данные, возможно, стоит вынести в другую структуру,
   * но разница будет сугубо косметическая. */
  struct {
    /* Затраты на поддержку данных пользователя */
    gc_prof_stat_t work;
    /* Затраты на поддержку и обновления самой GC */
    gc_prof_stat_t self;
    /* Итераций обновления GC,
     * больше 1 если были повторы/перезапуски */
    uint32_t wloops;
    /* Итерации слияния записей GC */
    uint32_t coalescences;
    /* Уничтожения steady-точек фиксации в MDBX_UTTERLY_NOSYNC */
    uint32_t wipes;
    /* Сбросы данные на диск вне MDBX_UTTERLY_NOSYNC */
    uint32_t flushes;
    /* Попытки пнуть тормозящих читателей */
    uint32_t kicks;
  } gc_prof;
} pgop_stat_t;

/* Reader Lock Table
 *
 * Readers don't acquire any locks for their data access. Instead, they
 * simply record their transaction ID in the reader table. The reader
 * mutex is needed just to find an empty slot in the reader table. The
 * slot's address is saved in thread-specific data so that subsequent
 * read transactions started by the same thread need no further locking to
 * proceed.
 *
 * If MDBX_NOSTICKYTHREADS is set, the slot address is not saved in
 * thread-specific data. No reader table is used if the database is on a
 * read-only filesystem.
 *
 * Since the database uses multi-version concurrency control, readers don't
 * actually need any locking. This table is used to keep track of which
 * readers are using data from which old transactions, so that we'll know
 * when a particular old transaction is no longer in use. Old transactions
 * that have discarded any data pages can then have those pages reclaimed
 * for use by a later write transaction.
 *
 * The lock table is constructed such that reader slots are aligned with the
 * processor's cache line size. Any slot is only ever used by one thread.
 * This alignment guarantees that there will be no contention or cache
 * thrashing as threads update their own slot info, and also eliminates
 * any need for locking when accessing a slot.
 *
 * A writer thread will scan every slot in the table to determine the oldest
 * outstanding reader transaction. Any freed pages older than this will be
 * reclaimed by the writer. The writer doesn't use any locks when scanning
 * this table. This means that there's no guarantee that the writer will
 * see the most up-to-date reader info, but that's not required for correct
 * operation - all we need is to know the upper bound on the oldest reader,
 * we don't care at all about the newest reader. So the only consequence of
 * reading stale information here is that old pages might hang around a
 * while longer before being reclaimed. That's actually good anyway, because
 * the longer we delay reclaiming old pages, the more likely it is that a
 * string of contiguous pages can be found after coalescing old pages from
 * many old transactions together. */

/* The actual reader record, with cacheline padding. */
typedef struct reader_slot {
  /* Current Transaction ID when this transaction began, or INVALID_TXNID.
   * Multiple readers that start at the same time will probably have the
   * same ID here. Again, it's not important to exclude them from
   * anything; all we need to know is which version of the DB they
   * started from so we can avoid overwriting any data used in that
   * particular version. */
  atomic_txnid_t txnid;

  /* The information we store in a single slot of the reader table.
   * In addition to a transaction ID, we also record the process and
   * thread ID that owns a slot, so that we can detect stale information,
   * e.g. threads or processes that went away without cleaning up.
   *
   * NOTE: We currently don't check for stale records.
   * We simply re-init the table when we know that we're the only process
   * opening the lock file. */

  /* Псевдо thread_id для пометки вытесненных читающих транзакций. */
#define MDBX_TID_TXN_OUSTED (UINT64_MAX - 1)

  /* Псевдо thread_id для пометки припаркованных читающих транзакций. */
#define MDBX_TID_TXN_PARKED UINT64_MAX

  /* The thread ID of the thread owning this txn. */
  mdbx_atomic_uint64_t tid;

  /* The process ID of the process owning this reader txn. */
  mdbx_atomic_uint32_t pid;

  /* The number of pages used in the reader's MVCC snapshot,
   * i.e. the value of meta->geometry.first_unallocated and
   * txn->geo.first_unallocated */
  atomic_pgno_t snapshot_pages_used;
  /* Number of retired pages at the time this reader starts transaction. So,
   * at any time the difference meta.pages_retired -
   * reader.snapshot_pages_retired will give the number of pages which this
   * reader restraining from reuse. */
  mdbx_atomic_uint64_t snapshot_pages_retired;
} reader_slot_t;

/* The header for the reader table (a memory-mapped lock file). */
typedef struct shared_lck {
  /* Stamp identifying this as an MDBX file.
   * It must be set to MDBX_MAGIC with with MDBX_LOCK_VERSION. */
  uint64_t magic_and_version;

  /* Format of this lock file. Must be set to MDBX_LOCK_FORMAT. */
  uint32_t os_and_format;

  /* Flags which environment was opened. */
  mdbx_atomic_uint32_t envmode;

  /* Threshold of un-synced-with-disk pages for auto-sync feature,
   * zero means no-threshold, i.e. auto-sync is disabled. */
  atomic_pgno_t autosync_threshold;

  /* Low 32-bit of txnid with which meta-pages was synced,
   * i.e. for sync-polling in the MDBX_NOMETASYNC mode. */
#define MDBX_NOMETASYNC_LAZY_UNK (UINT32_MAX / 3)
#define MDBX_NOMETASYNC_LAZY_FD (MDBX_NOMETASYNC_LAZY_UNK + UINT32_MAX / 8)
#define MDBX_NOMETASYNC_LAZY_WRITEMAP (MDBX_NOMETASYNC_LAZY_UNK - UINT32_MAX / 8)
  mdbx_atomic_uint32_t meta_sync_txnid;

  /* Period for timed auto-sync feature, i.e. at the every steady checkpoint
   * the mti_unsynced_timeout sets to the current_time + autosync_period.
   * The time value is represented in a suitable system-dependent form, for
   * example clock_gettime(CLOCK_BOOTTIME) or clock_gettime(CLOCK_MONOTONIC).
   * Zero means timed auto-sync is disabled. */
  mdbx_atomic_uint64_t autosync_period;

  /* Marker to distinguish uniqueness of DB/CLK. */
  mdbx_atomic_uint64_t bait_uniqueness;

  /* Paired counter of processes that have mlock()ed part of mmapped DB.
   * The (mlcnt[0] - mlcnt[1]) > 0 means at least one process
   * lock at least one page, so therefore madvise() could return EINVAL. */
  mdbx_atomic_uint32_t mlcnt[2];

  MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/

  /* Statistics of costly ops of all (running, completed and aborted)
   * transactions */
  pgop_stat_t pgops;

  MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/

#if MDBX_LOCKING > 0
  /* Write transaction lock. */
  osal_ipclock_t wrt_lock;
#endif /* MDBX_LOCKING > 0 */

  atomic_txnid_t cached_oldest;

  /* Timestamp of entering an out-of-sync state. Value is represented in a
   * suitable system-dependent form, for example clock_gettime(CLOCK_BOOTTIME)
   * or clock_gettime(CLOCK_MONOTONIC). */
  mdbx_atomic_uint64_t eoos_timestamp;

  /* Number un-synced-with-disk pages for auto-sync feature. */
  mdbx_atomic_uint64_t unsynced_pages;

  /* Timestamp of the last readers check. */
  mdbx_atomic_uint64_t readers_check_timestamp;

  /* Number of page which was discarded last time by madvise(DONTNEED). */
  atomic_pgno_t discarded_tail;

  /* Shared anchor for tracking readahead edge and enabled/disabled status. */
  pgno_t readahead_anchor;

  /* Shared cache for mincore() results */
  struct {
    pgno_t begin[4];
    uint64_t mask[4];
  } mincore_cache;

  MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/

#if MDBX_LOCKING > 0
  /* Readeaders table lock. */
  osal_ipclock_t rdt_lock;
#endif /* MDBX_LOCKING > 0 */

  /* The number of slots that have been used in the reader table.
   * This always records the maximum count, it is not decremented
   * when readers release their slots. */
  mdbx_atomic_uint32_t rdt_length;
  mdbx_atomic_uint32_t rdt_refresh_flag;

#if FLEXIBLE_ARRAY_MEMBERS
  MDBX_ALIGNAS(MDBX_CACHELINE_SIZE) /* cacheline ----------------------------*/
  reader_slot_t rdt[] /* dynamic size */;

/* Lockfile format signature: version, features and field layout */
#define MDBX_LOCK_FORMAT                                                                                               \
  (MDBX_LCK_SIGN * 27733 + (unsigned)sizeof(reader_slot_t) * 13 +                                                      \
   (unsigned)offsetof(reader_slot_t, snapshot_pages_used) * 251 + (unsigned)offsetof(lck_t, cached_oldest) * 83 +      \
   (unsigned)offsetof(lck_t, rdt_length) * 37 + (unsigned)offsetof(lck_t, rdt) * 29)
#endif /* FLEXIBLE_ARRAY_MEMBERS */
} lck_t;

#define MDBX_LOCK_MAGIC ((MDBX_MAGIC << 8) + MDBX_LOCK_VERSION)

#define MDBX_READERS_LIMIT 32767

#define MIN_MAPSIZE (MDBX_MIN_PAGESIZE * MIN_PAGENO)
#if defined(_WIN32) || defined(_WIN64)
#define MAX_MAPSIZE32 UINT32_C(0x38000000)
#else
#define MAX_MAPSIZE32 UINT32_C(0x7f000000)
#endif
#define MAX_MAPSIZE64 ((MAX_PAGENO + 1) * (uint64_t)MDBX_MAX_PAGESIZE)

#if MDBX_WORDBITS >= 64
#define MAX_MAPSIZE MAX_MAPSIZE64
#define PAGELIST_LIMIT ((size_t)MAX_PAGENO)
#else
#define MAX_MAPSIZE MAX_MAPSIZE32
#define PAGELIST_LIMIT (MAX_MAPSIZE32 / MDBX_MIN_PAGESIZE)
#endif /* MDBX_WORDBITS */

#define MDBX_GOLD_RATIO_DBL 1.6180339887498948482
#define MEGABYTE ((size_t)1 << 20)

/*----------------------------------------------------------------------------*/

union logger_union {
  void *ptr;
  MDBX_debug_func *fmt;
  MDBX_debug_func_nofmt *nofmt;
};

struct libmdbx_globals {
  bin128_t bootid;
  unsigned sys_pagesize, sys_allocation_granularity;
  uint8_t sys_pagesize_ln2;
  uint8_t runtime_flags;
  uint8_t loglevel;
#if defined(_WIN32) || defined(_WIN64)
  bool running_under_Wine;
#elif defined(__linux__) || defined(__gnu_linux__)
  bool running_on_WSL1 /* Windows Subsystem 1 for Linux */;
  uint32_t linux_kernel_version;
#endif /* Linux */
  union logger_union logger;
  osal_fastmutex_t debug_lock;
  size_t logger_buffer_size;
  char *logger_buffer;
};

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */

extern struct libmdbx_globals globals;
#if defined(_WIN32) || defined(_WIN64)
extern struct libmdbx_imports imports;
#endif /* Windows */

#ifndef __Wpedantic_format_voidptr
MDBX_MAYBE_UNUSED static inline const void *__Wpedantic_format_voidptr(const void *ptr) { return ptr; }
#define __Wpedantic_format_voidptr(ARG) __Wpedantic_format_voidptr(ARG)
#endif /* __Wpedantic_format_voidptr */

MDBX_INTERNAL void MDBX_PRINTF_ARGS(4, 5) debug_log(int level, const char *function, int line, const char *fmt, ...)
    MDBX_PRINTF_ARGS(4, 5);
MDBX_INTERNAL void debug_log_va(int level, const char *function, int line, const char *fmt, va_list args);

#if MDBX_DEBUG
#define LOG_ENABLED(LVL) unlikely(LVL <= globals.loglevel)
#define AUDIT_ENABLED() unlikely((globals.runtime_flags & (unsigned)MDBX_DBG_AUDIT))
#else /* MDBX_DEBUG */
#define LOG_ENABLED(LVL) (LVL < MDBX_LOG_VERBOSE && LVL <= globals.loglevel)
#define AUDIT_ENABLED() (0)
#endif /* LOG_ENABLED() & AUDIT_ENABLED() */

#if MDBX_FORCE_ASSERTIONS
#define ASSERT_ENABLED() (1)
#elif MDBX_DEBUG
#define ASSERT_ENABLED() likely((globals.runtime_flags & (unsigned)MDBX_DBG_ASSERT))
#else
#define ASSERT_ENABLED() (0)
#endif /* ASSERT_ENABLED() */

#define DEBUG_EXTRA(fmt, ...)                                                                                          \
  do {                                                                                                                 \
    if (LOG_ENABLED(MDBX_LOG_EXTRA))                                                                                   \
      debug_log(MDBX_LOG_EXTRA, __func__, __LINE__, fmt, __VA_ARGS__);                                                 \
  } while (0)

#define DEBUG_EXTRA_PRINT(fmt, ...)                                                                                    \
  do {                                                                                                                 \
    if (LOG_ENABLED(MDBX_LOG_EXTRA))                                                                                   \
      debug_log(MDBX_LOG_EXTRA, nullptr, 0, fmt, __VA_ARGS__);                                                         \
  } while (0)

#define TRACE(fmt, ...)                                                                                                \
  do {                                                                                                                 \
    if (LOG_ENABLED(MDBX_LOG_TRACE))                                                                                   \
      debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__);                                            \
  } while (0)

#define DEBUG(fmt, ...)                                                                                                \
  do {                                                                                                                 \
    if (LOG_ENABLED(MDBX_LOG_DEBUG))                                                                                   \
      debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__);                                            \
  } while (0)

#define VERBOSE(fmt, ...)                                                                                              \
  do {                                                                                                                 \
    if (LOG_ENABLED(MDBX_LOG_VERBOSE))                                                                                 \
      debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__);                                          \
  } while (0)

#define NOTICE(fmt, ...)                                                                                               \
  do {                                                                                                                 \
    if (LOG_ENABLED(MDBX_LOG_NOTICE))                                                                                  \
      debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__);                                           \
  } while (0)

#define WARNING(fmt, ...)                                                                                              \
  do {                                                                                                                 \
    if (LOG_ENABLED(MDBX_LOG_WARN))                                                                                    \
      debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__);                                             \
  } while (0)

#undef ERROR /* wingdi.h                                                                                               \
  Yeah, morons from M$ put such definition to the public header. */

#define ERROR(fmt, ...)                                                                                                \
  do {                                                                                                                 \
    if (LOG_ENABLED(MDBX_LOG_ERROR))                                                                                   \
      debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__);                                            \
  } while (0)

#define FATAL(fmt, ...) debug_log(MDBX_LOG_FATAL, __func__, __LINE__, fmt "\n", __VA_ARGS__);

#if MDBX_DEBUG
#define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line)
#else /* MDBX_DEBUG */
MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, unsigned line);
#define ASSERT_FAIL(env, msg, func, line)                                                                              \
  do {                                                                                                                 \
    (void)(env);                                                                                                       \
    assert_fail(msg, func, line);                                                                                      \
  } while (0)
#endif /* MDBX_DEBUG */

#define ENSURE_MSG(env, expr, msg)                                                                                     \
  do {                                                                                                                 \
    if (unlikely(!(expr)))                                                                                             \
      ASSERT_FAIL(env, msg, __func__, __LINE__);                                                                       \
  } while (0)

#define ENSURE(env, expr) ENSURE_MSG(env, expr, #expr)

/* assert(3) variant in environment context */
#define eASSERT(env, expr)                                                                                             \
  do {                                                                                                                 \
    if (ASSERT_ENABLED())                                                                                              \
      ENSURE(env, expr);                                                                                               \
  } while (0)

/* assert(3) variant in cursor context */
#define cASSERT(mc, expr) eASSERT((mc)->txn->env, expr)

/* assert(3) variant in transaction context */
#define tASSERT(txn, expr) eASSERT((txn)->env, expr)

#ifndef xMDBX_TOOLS /* Avoid using internal eASSERT() */
#undef assert
#define assert(expr) eASSERT(nullptr, expr)
#endif

MDBX_MAYBE_UNUSED static inline void jitter4testing(bool tiny) {
#if MDBX_DEBUG
  if (globals.runtime_flags & (unsigned)MDBX_DBG_JITTER)
    osal_jitter(tiny);
#else
  (void)tiny;
#endif
}

MDBX_MAYBE_UNUSED MDBX_INTERNAL void page_list(page_t *mp);

MDBX_INTERNAL const char *pagetype_caption(const uint8_t type, char buf4unknown[16]);
/* Key size which fits in a DKBUF (debug key buffer). */
#define DKBUF_MAX 127
#define DKBUF char dbg_kbuf[DKBUF_MAX * 4 + 2]
#define DKEY(x) mdbx_dump_val(x, dbg_kbuf, DKBUF_MAX * 2 + 1)
#define DVAL(x) mdbx_dump_val(x, dbg_kbuf + DKBUF_MAX * 2 + 1, DKBUF_MAX * 2 + 1)

#if MDBX_DEBUG
#define DKBUF_DEBUG DKBUF
#define DKEY_DEBUG(x) DKEY(x)
#define DVAL_DEBUG(x) DVAL(x)
#else
#define DKBUF_DEBUG ((void)(0))
#define DKEY_DEBUG(x) ("-")
#define DVAL_DEBUG(x) ("-")
#endif

MDBX_INTERNAL void log_error(const int err, const char *func, unsigned line);

MDBX_MAYBE_UNUSED static inline int log_if_error(const int err, const char *func, unsigned line) {
  if (unlikely(err != MDBX_SUCCESS))
    log_error(err, func, line);
  return err;
}

#define LOG_IFERR(err) log_if_error((err), __func__, __LINE__)

/* Test if the flags f are set in a flag word w. */
#define F_ISSET(w, f) (((w) & (f)) == (f))

/* Round n up to an even number. */
#define EVEN_CEIL(n) (((n) + 1UL) & -2L) /* sign-extending -2 to match n+1U */

/* Round n down to an even number. */
#define EVEN_FLOOR(n) ((n) & ~(size_t)1)

/*
 *                /
 *                | -1, a < b
 * CMP2INT(a,b) = <  0, a == b
 *                |  1, a > b
 *                \
 */
#define CMP2INT(a, b) (((a) != (b)) ? (((a) < (b)) ? -1 : 1) : 0)

/* Pointer displacement without casting to char* to avoid pointer-aliasing */
#define ptr_disp(ptr, disp) ((void *)(((intptr_t)(ptr)) + ((intptr_t)(disp))))

/* Pointer distance as signed number of bytes */
#define ptr_dist(more, less) (((intptr_t)(more)) - ((intptr_t)(less)))

#define MDBX_ASAN_POISON_MEMORY_REGION(addr, size)                                                                     \
  do {                                                                                                                 \
    TRACE("POISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), (size_t)(size), __LINE__);                            \
    ASAN_POISON_MEMORY_REGION(addr, size);                                                                             \
  } while (0)

#define MDBX_ASAN_UNPOISON_MEMORY_REGION(addr, size)                                                                   \
  do {                                                                                                                 \
    TRACE("UNPOISON_MEMORY_REGION(%p, %zu) at %u", (void *)(addr), (size_t)(size), __LINE__);                          \
    ASAN_UNPOISON_MEMORY_REGION(addr, size);                                                                           \
  } while (0)

MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t branchless_abs(intptr_t value) {
  assert(value > INT_MIN);
  const size_t expanded_sign = (size_t)(value >> (sizeof(value) * CHAR_BIT - 1));
  return ((size_t)value + expanded_sign) ^ expanded_sign;
}

MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline bool is_powerof2(size_t x) { return (x & (x - 1)) == 0; }

MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t floor_powerof2(size_t value, size_t granularity) {
  assert(is_powerof2(granularity));
  return value & ~(granularity - 1);
}

MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t ceil_powerof2(size_t value, size_t granularity) {
  return floor_powerof2(value + granularity - 1, granularity);
}

MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL unsigned log2n_powerof2(size_t value_uintptr);

MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL uint64_t rrxmrrxmsx_0(uint64_t v);

struct monotime_cache {
  uint64_t value;
  int expire_countdown;
};

MDBX_MAYBE_UNUSED static inline uint64_t monotime_since_cached(uint64_t begin_timestamp, struct monotime_cache *cache) {
  if (cache->expire_countdown)
    cache->expire_countdown -= 1;
  else {
    cache->value = osal_monotime();
    cache->expire_countdown = 42 / 3;
  }
  return cache->value - begin_timestamp;
}

/* An PNL is an Page Number List, a sorted array of IDs.
 *
 * The first element of the array is a counter for how many actual page-numbers
 * are in the list. By default PNLs are sorted in descending order, this allow
 * cut off a page with lowest pgno (at the tail) just truncating the list. The
 * sort order of PNLs is controlled by the MDBX_PNL_ASCENDING build option. */
typedef pgno_t *pnl_t;
typedef const pgno_t *const_pnl_t;

#if MDBX_PNL_ASCENDING
#define MDBX_PNL_ORDERED(first, last) ((first) < (last))
#define MDBX_PNL_DISORDERED(first, last) ((first) >= (last))
#else
#define MDBX_PNL_ORDERED(first, last) ((first) > (last))
#define MDBX_PNL_DISORDERED(first, last) ((first) <= (last))
#endif

#define MDBX_PNL_GRANULATE_LOG2 10
#define MDBX_PNL_GRANULATE (1 << MDBX_PNL_GRANULATE_LOG2)
#define MDBX_PNL_INITIAL (MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t))

#define MDBX_PNL_ALLOCLEN(pl) ((pl)[-1])
#define MDBX_PNL_GETSIZE(pl) ((size_t)((pl)[0]))
#define MDBX_PNL_SETSIZE(pl, size)                                                                                     \
  do {                                                                                                                 \
    const size_t __size = size;                                                                                        \
    assert(__size < INT_MAX);                                                                                          \
    (pl)[0] = (pgno_t)__size;                                                                                          \
  } while (0)
#define MDBX_PNL_FIRST(pl) ((pl)[1])
#define MDBX_PNL_LAST(pl) ((pl)[MDBX_PNL_GETSIZE(pl)])
#define MDBX_PNL_BEGIN(pl) (&(pl)[1])
#define MDBX_PNL_END(pl) (&(pl)[MDBX_PNL_GETSIZE(pl) + 1])

#if MDBX_PNL_ASCENDING
#define MDBX_PNL_EDGE(pl) ((pl) + 1)
#define MDBX_PNL_LEAST(pl) MDBX_PNL_FIRST(pl)
#define MDBX_PNL_MOST(pl) MDBX_PNL_LAST(pl)
#else
#define MDBX_PNL_EDGE(pl) ((pl) + MDBX_PNL_GETSIZE(pl))
#define MDBX_PNL_LEAST(pl) MDBX_PNL_LAST(pl)
#define MDBX_PNL_MOST(pl) MDBX_PNL_FIRST(pl)
#endif

#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t))
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0)

MDBX_MAYBE_UNUSED static inline size_t pnl_size2bytes(size_t size) {
  assert(size > 0 && size <= PAGELIST_LIMIT);
#if MDBX_PNL_PREALLOC_FOR_RADIXSORT

  size += size;
#endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */
  STATIC_ASSERT(MDBX_ASSUME_MALLOC_OVERHEAD +
                    (PAGELIST_LIMIT * (MDBX_PNL_PREALLOC_FOR_RADIXSORT + 1) + MDBX_PNL_GRANULATE + 3) * sizeof(pgno_t) <
                SIZE_MAX / 4 * 3);
  size_t bytes =
      ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(pgno_t) * (size + 3), MDBX_PNL_GRANULATE * sizeof(pgno_t)) -
      MDBX_ASSUME_MALLOC_OVERHEAD;
  return bytes;
}

MDBX_MAYBE_UNUSED static inline pgno_t pnl_bytes2size(const size_t bytes) {
  size_t size = bytes / sizeof(pgno_t);
  assert(size > 3 && size <= PAGELIST_LIMIT + /* alignment gap */ 65536);
  size -= 3;
#if MDBX_PNL_PREALLOC_FOR_RADIXSORT
  size >>= 1;
#endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */
  return (pgno_t)size;
}

MDBX_INTERNAL pnl_t pnl_alloc(size_t size);

MDBX_INTERNAL void pnl_free(pnl_t pnl);

MDBX_INTERNAL int pnl_reserve(pnl_t __restrict *__restrict ppnl, const size_t wanna);

MDBX_MAYBE_UNUSED static inline int __must_check_result pnl_need(pnl_t __restrict *__restrict ppnl, size_t num) {
  assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl));
  assert(num <= PAGELIST_LIMIT);
  const size_t wanna = MDBX_PNL_GETSIZE(*ppnl) + num;
  return likely(MDBX_PNL_ALLOCLEN(*ppnl) >= wanna) ? MDBX_SUCCESS : pnl_reserve(ppnl, wanna);
}

MDBX_MAYBE_UNUSED static inline void pnl_append_prereserved(__restrict pnl_t pnl, pgno_t pgno) {
  assert(MDBX_PNL_GETSIZE(pnl) < MDBX_PNL_ALLOCLEN(pnl));
  if (AUDIT_ENABLED()) {
    for (size_t i = MDBX_PNL_GETSIZE(pnl); i > 0; --i)
      assert(pgno != pnl[i]);
  }
  *pnl += 1;
  MDBX_PNL_LAST(pnl) = pgno;
}

MDBX_INTERNAL void pnl_shrink(pnl_t __restrict *__restrict ppnl);

MDBX_INTERNAL int __must_check_result spill_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n);

MDBX_INTERNAL int __must_check_result pnl_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n);

MDBX_INTERNAL int __must_check_result pnl_insert_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n);

MDBX_INTERNAL size_t pnl_search_nochk(const pnl_t pnl, pgno_t pgno);

MDBX_INTERNAL void pnl_sort_nochk(pnl_t pnl);

MDBX_INTERNAL bool pnl_check(const const_pnl_t pnl, const size_t limit);

MDBX_MAYBE_UNUSED static inline bool pnl_check_allocated(const const_pnl_t pnl, const size_t limit) {
  return pnl == nullptr || (MDBX_PNL_ALLOCLEN(pnl) >= MDBX_PNL_GETSIZE(pnl) && pnl_check(pnl, limit));
}

MDBX_MAYBE_UNUSED static inline void pnl_sort(pnl_t pnl, size_t limit4check) {
  pnl_sort_nochk(pnl);
  assert(pnl_check(pnl, limit4check));
  (void)limit4check;
}

MDBX_MAYBE_UNUSED static inline size_t pnl_search(const pnl_t pnl, pgno_t pgno, size_t limit) {
  assert(pnl_check_allocated(pnl, limit));
  if (MDBX_HAVE_CMOV) {
    /* cmov-ускоренный бинарный поиск может читать (но не использовать) один
     * элемент за концом данных, этот элемент в пределах выделенного участка
     * памяти, но не инициализирован. */
    VALGRIND_MAKE_MEM_DEFINED(MDBX_PNL_END(pnl), sizeof(pgno_t));
  }
  assert(pgno < limit);
  (void)limit;
  size_t n = pnl_search_nochk(pnl, pgno);
  if (MDBX_HAVE_CMOV) {
    VALGRIND_MAKE_MEM_UNDEFINED(MDBX_PNL_END(pnl), sizeof(pgno_t));
  }
  return n;
}

MDBX_INTERNAL size_t pnl_merge(pnl_t dst, const pnl_t src);

#ifdef __cplusplus
}
#endif /* __cplusplus */

#define mdbx_sourcery_anchor XCONCAT(mdbx_sourcery_, MDBX_BUILD_SOURCERY)
#if defined(xMDBX_TOOLS)
extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
#endif

#define MDBX_IS_ERROR(rc) ((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE)

/*----------------------------------------------------------------------------*/

MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t int64pgno(int64_t i64) {
  if (likely(i64 >= (int64_t)MIN_PAGENO && i64 <= (int64_t)MAX_PAGENO + 1))
    return (pgno_t)i64;
  return (i64 < (int64_t)MIN_PAGENO) ? MIN_PAGENO : MAX_PAGENO;
}

MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t pgno_add(size_t base, size_t augend) {
  assert(base <= MAX_PAGENO + 1 && augend < MAX_PAGENO);
  return int64pgno((int64_t)base + (int64_t)augend);
}

MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t pgno_sub(size_t base, size_t subtrahend) {
  assert(base >= MIN_PAGENO && base <= MAX_PAGENO + 1 && subtrahend < MAX_PAGENO);
  return int64pgno((int64_t)base - (int64_t)subtrahend);
}

/*----------------------------------------------------------------------------*/

typedef struct dp dp_t;
typedef struct dpl dpl_t;
typedef struct kvx kvx_t;
typedef struct meta_ptr meta_ptr_t;
typedef struct inner_cursor subcur_t;
typedef struct cursor_couple cursor_couple_t;
typedef struct defer_free_item defer_free_item_t;

typedef struct troika {
  uint8_t fsm, recent, prefer_steady, tail_and_flags;
#if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */
  uint32_t unused_pad;
#endif
#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7u)
#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64u)
#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128u)
#define TROIKA_TAIL(troika) ((troika)->tail_and_flags & 3u)
  txnid_t txnid[NUM_METAS];
} troika_t;

typedef struct page_get_result {
  page_t *page;
  int err;
} pgr_t;

typedef struct node_search_result {
  node_t *node;
  bool exact;
} nsr_t;

typedef struct bind_reader_slot_result {
  int err;
  reader_slot_t *rslot;
} bsr_t;

#ifndef __cplusplus

#ifdef MDBX_HAVE_C11ATOMICS
#define osal_memory_fence(order, write) atomic_thread_fence((write) ? mo_c11_store(order) : mo_c11_load(order))
#else /* MDBX_HAVE_C11ATOMICS */
#define osal_memory_fence(order, write)                                                                                \
  do {                                                                                                                 \
    osal_compiler_barrier();                                                                                           \
    if (write && order > (MDBX_CPU_WRITEBACK_INCOHERENT ? mo_Relaxed : mo_AcquireRelease))                             \
      osal_memory_barrier();                                                                                           \
  } while (0)
#endif /* MDBX_HAVE_C11ATOMICS */

#if defined(MDBX_HAVE_C11ATOMICS) && defined(__LCC__)
#define atomic_store32(p, value, order)                                                                                \
  ({                                                                                                                   \
    const uint32_t value_to_store = (value);                                                                           \
    atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value_to_store, mo_c11_store(order));                             \
    value_to_store;                                                                                                    \
  })
#define atomic_load32(p, order) atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order))
#define atomic_store64(p, value, order)                                                                                \
  ({                                                                                                                   \
    const uint64_t value_to_store = (value);                                                                           \
    atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value_to_store, mo_c11_store(order));                             \
    value_to_store;                                                                                                    \
  })
#define atomic_load64(p, order) atomic_load_explicit(MDBX_c11a_ro(uint64_t, p), mo_c11_load(order))
#endif /* LCC && MDBX_HAVE_C11ATOMICS */

#ifndef atomic_store32
MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_store32(mdbx_atomic_uint32_t *p, const uint32_t value,
                                                                 enum mdbx_memory_order order) {
  STATIC_ASSERT(sizeof(mdbx_atomic_uint32_t) == 4);
#ifdef MDBX_HAVE_C11ATOMICS
  assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p)));
  atomic_store_explicit(MDBX_c11a_rw(uint32_t, p), value, mo_c11_store(order));
#else  /* MDBX_HAVE_C11ATOMICS */
  if (order != mo_Relaxed)
    osal_compiler_barrier();
  p->weak = value;
  osal_memory_fence(order, true);
#endif /* MDBX_HAVE_C11ATOMICS */
  return value;
}
#endif /* atomic_store32 */

#ifndef atomic_load32
MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_load32(const volatile mdbx_atomic_uint32_t *p,
                                                                enum mdbx_memory_order order) {
  STATIC_ASSERT(sizeof(mdbx_atomic_uint32_t) == 4);
#ifdef MDBX_HAVE_C11ATOMICS
  assert(atomic_is_lock_free(MDBX_c11a_ro(uint32_t, p)));
  return atomic_load_explicit(MDBX_c11a_ro(uint32_t, p), mo_c11_load(order));
#else  /* MDBX_HAVE_C11ATOMICS */
  osal_memory_fence(order, false);
  const uint32_t value = p->weak;
  if (order != mo_Relaxed)
    osal_compiler_barrier();
  return value;
#endif /* MDBX_HAVE_C11ATOMICS */
}
#endif /* atomic_load32 */

/*------------------------------------------------------------------------------
 * safe read/write volatile 64-bit fields on 32-bit architectures. */

/* LY: for testing non-atomic 64-bit txnid on 32-bit arches.
 * #define xMDBX_TXNID_STEP (UINT32_MAX / 3) */
#ifndef xMDBX_TXNID_STEP
#if MDBX_64BIT_CAS
#define xMDBX_TXNID_STEP 1u
#else
#define xMDBX_TXNID_STEP 2u
#endif
#endif /* xMDBX_TXNID_STEP */

#ifndef atomic_store64
MDBX_MAYBE_UNUSED static __always_inline uint64_t atomic_store64(mdbx_atomic_uint64_t *p, const uint64_t value,
                                                                 enum mdbx_memory_order order) {
  STATIC_ASSERT(sizeof(mdbx_atomic_uint64_t) == 8);
#if MDBX_64BIT_ATOMIC
#if __GNUC_PREREQ(11, 0)
  STATIC_ASSERT(__alignof__(mdbx_atomic_uint64_t) >= sizeof(uint64_t));
#endif /* GNU C >= 11 */
#ifdef MDBX_HAVE_C11ATOMICS
  assert(atomic_is_lock_free(MDBX_c11a_rw(uint64_t, p)));
  atomic_store_explicit(MDBX_c11a_rw(uint64_t, p), value, mo_c11_store(order));
#else  /* MDBX_HAVE_C11ATOMICS */
  if (order != mo_Relaxed)
    osal_compiler_barrier();
  p->weak = value;
  osal_memory_fence(order, true);
#endif /* MDBX_HAVE_C11ATOMICS */
#else  /* !MDBX_64BIT_ATOMIC */
  osal_compiler_barrier();
  atomic_store32(&p->low, (uint32_t)value, mo_Relaxed);
  jitter4testing(true);
  atomic_store32(&p->high, (uint32_t)(value >> 32), order);
  jitter4testing(true);
#endif /* !MDBX_64BIT_ATOMIC */
  return value;
}
#endif /* atomic_store64 */

#ifndef atomic_load64
MDBX_MAYBE_UNUSED static
#if MDBX_64BIT_ATOMIC
    __always_inline
#endif /* MDBX_64BIT_ATOMIC */
        uint64_t atomic_load64(const volatile mdbx_atomic_uint64_t *p, enum mdbx_memory_order order) {
  STATIC_ASSERT(sizeof(mdbx_atomic_uint64_t) == 8);
#if MDBX_64BIT_ATOMIC
#ifdef MDBX_HAVE_C11ATOMICS
  assert(atomic_is_lock_free(MDBX_c11a_ro(uint64_t, p)));
  return atomic_load_explicit(MDBX_c11a_ro(uint64_t, p), mo_c11_load(order));
#else  /* MDBX_HAVE_C11ATOMICS */
  osal_memory_fence(order, false);
  const uint64_t value = p->weak;
  if (order != mo_Relaxed)
    osal_compiler_barrier();
  return value;
#endif /* MDBX_HAVE_C11ATOMICS */
#else  /* !MDBX_64BIT_ATOMIC */
  osal_compiler_barrier();
  uint64_t value = (uint64_t)atomic_load32(&p->high, order) << 32;
  jitter4testing(true);
  value |= atomic_load32(&p->low, (order == mo_Relaxed) ? mo_Relaxed : mo_AcquireRelease);
  jitter4testing(true);
  for (;;) {
    osal_compiler_barrier();
    uint64_t again = (uint64_t)atomic_load32(&p->high, order) << 32;
    jitter4testing(true);
    again |= atomic_load32(&p->low, (order == mo_Relaxed) ? mo_Relaxed : mo_AcquireRelease);
    jitter4testing(true);
    if (likely(value == again))
      return value;
    value = again;
  }
#endif /* !MDBX_64BIT_ATOMIC */
}
#endif /* atomic_load64 */

MDBX_MAYBE_UNUSED static __always_inline void atomic_yield(void) {
#if defined(_WIN32) || defined(_WIN64)
  YieldProcessor();
#elif defined(__ia32__) || defined(__e2k__)
  __builtin_ia32_pause();
#elif defined(__ia64__)
#if defined(__HP_cc__) || defined(__HP_aCC__)
  _Asm_hint(_HINT_PAUSE);
#else
  __asm__ __volatile__("hint @pause");
#endif
#elif defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH > 6) || defined(__ARM_ARCH_6K__)
#ifdef __CC_ARM
  __yield();
#else
  __asm__ __volatile__("yield");
#endif
#elif (defined(__mips64) || defined(__mips64__)) && defined(__mips_isa_rev) && __mips_isa_rev >= 2
  __asm__ __volatile__("pause");
#elif defined(__mips) || defined(__mips__) || defined(__mips64) || defined(__mips64__) || defined(_M_MRX000) ||        \
    defined(_MIPS_) || defined(__MWERKS__) || defined(__sgi)
  __asm__ __volatile__(".word 0x00000140");
#else
  osal_yield();
#endif
}

#if MDBX_64BIT_CAS
MDBX_MAYBE_UNUSED static __always_inline bool atomic_cas64(mdbx_atomic_uint64_t *p, uint64_t c, uint64_t v) {
#ifdef MDBX_HAVE_C11ATOMICS
  STATIC_ASSERT(sizeof(long long) >= sizeof(uint64_t));
  assert(atomic_is_lock_free(MDBX_c11a_rw(uint64_t, p)));
  return atomic_compare_exchange_strong(MDBX_c11a_rw(uint64_t, p), &c, v);
#elif defined(__GNUC__) || defined(__clang__)
  return __sync_bool_compare_and_swap(&p->weak, c, v);
#elif defined(_MSC_VER)
  return c == (uint64_t)_InterlockedCompareExchange64((volatile __int64 *)&p->weak, v, c);
#elif defined(__APPLE__)
  return OSAtomicCompareAndSwap64Barrier(c, v, &p->weak);
#else
#error FIXME: Unsupported compiler
#endif
}
#endif /* MDBX_64BIT_CAS */

MDBX_MAYBE_UNUSED static __always_inline bool atomic_cas32(mdbx_atomic_uint32_t *p, uint32_t c, uint32_t v) {
#ifdef MDBX_HAVE_C11ATOMICS
  STATIC_ASSERT(sizeof(int) >= sizeof(uint32_t));
  assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p)));
  return atomic_compare_exchange_strong(MDBX_c11a_rw(uint32_t, p), &c, v);
#elif defined(__GNUC__) || defined(__clang__)
  return __sync_bool_compare_and_swap(&p->weak, c, v);
#elif defined(_MSC_VER)
  STATIC_ASSERT(sizeof(volatile long) == sizeof(volatile uint32_t));
  return c == (uint32_t)_InterlockedCompareExchange((volatile long *)&p->weak, v, c);
#elif defined(__APPLE__)
  return OSAtomicCompareAndSwap32Barrier(c, v, &p->weak);
#else
#error FIXME: Unsupported compiler
#endif
}

MDBX_MAYBE_UNUSED static __always_inline uint32_t atomic_add32(mdbx_atomic_uint32_t *p, uint32_t v) {
#ifdef MDBX_HAVE_C11ATOMICS
  STATIC_ASSERT(sizeof(int) >= sizeof(uint32_t));
  assert(atomic_is_lock_free(MDBX_c11a_rw(uint32_t, p)));
  return atomic_fetch_add(MDBX_c11a_rw(uint32_t, p), v);
#elif defined(__GNUC__) || defined(__clang__)
  return __sync_fetch_and_add(&p->weak, v);
#elif defined(_MSC_VER)
  STATIC_ASSERT(sizeof(volatile long) == sizeof(volatile uint32_t));
  return (uint32_t)_InterlockedExchangeAdd((volatile long *)&p->weak, v);
#elif defined(__APPLE__)
  return OSAtomicAdd32Barrier(v, &p->weak);
#else
#error FIXME: Unsupported compiler
#endif
}

#define atomic_sub32(p, v) atomic_add32(p, 0 - (v))

MDBX_MAYBE_UNUSED static __always_inline uint64_t safe64_txnid_next(uint64_t txnid) {
  txnid += xMDBX_TXNID_STEP;
#if !MDBX_64BIT_CAS
  /* avoid overflow of low-part in safe64_reset() */
  txnid += (UINT32_MAX == (uint32_t)txnid);
#endif
  return txnid;
}

/* Atomically make target value >= SAFE64_INVALID_THRESHOLD */
MDBX_MAYBE_UNUSED static __always_inline void safe64_reset(mdbx_atomic_uint64_t *p, bool single_writer) {
  if (single_writer) {
#if MDBX_64BIT_ATOMIC && MDBX_WORDBITS >= 64
    atomic_store64(p, UINT64_MAX, mo_AcquireRelease);
#else
    atomic_store32(&p->high, UINT32_MAX, mo_AcquireRelease);
#endif /* MDBX_64BIT_ATOMIC && MDBX_WORDBITS >= 64 */
  } else {
#if MDBX_64BIT_CAS && MDBX_64BIT_ATOMIC
    /* atomically make value >= SAFE64_INVALID_THRESHOLD by 64-bit operation */
    atomic_store64(p, UINT64_MAX, mo_AcquireRelease);
#elif MDBX_64BIT_CAS
    /* atomically make value >= SAFE64_INVALID_THRESHOLD by 32-bit operation */
    atomic_store32(&p->high, UINT32_MAX, mo_AcquireRelease);
#else
    /* it is safe to increment low-part to avoid ABA, since xMDBX_TXNID_STEP > 1
     * and overflow was preserved in safe64_txnid_next() */
    STATIC_ASSERT(xMDBX_TXNID_STEP > 1);
    atomic_add32(&p->low, 1) /* avoid ABA in safe64_reset_compare() */;
    atomic_store32(&p->high, UINT32_MAX, mo_AcquireRelease);
    atomic_add32(&p->low, 1) /* avoid ABA in safe64_reset_compare() */;
#endif /* MDBX_64BIT_CAS && MDBX_64BIT_ATOMIC */
  }
  assert(p->weak >= SAFE64_INVALID_THRESHOLD);
  jitter4testing(true);
}

MDBX_MAYBE_UNUSED static __always_inline bool safe64_reset_compare(mdbx_atomic_uint64_t *p, uint64_t compare) {
  /* LY: This function is used to reset `txnid` from hsr-handler in case
   *     the asynchronously cancellation of read transaction. Therefore,
   *     there may be a collision between the cleanup performed here and
   *     asynchronous termination and restarting of the read transaction
   *     in another process/thread. In general we MUST NOT reset the `txnid`
   *     if a new transaction was started (i.e. if `txnid` was changed). */
#if MDBX_64BIT_CAS
  bool rc = atomic_cas64(p, compare, UINT64_MAX);
#else
  /* LY: There is no gold ratio here since shared mutex is too costly,
   *     in such way we must acquire/release it for every update of txnid,
   *     i.e. twice for each read transaction). */
  bool rc = false;
  if (likely(atomic_load32(&p->low, mo_AcquireRelease) == (uint32_t)compare &&
             atomic_cas32(&p->high, (uint32_t)(compare >> 32), UINT32_MAX))) {
    if (unlikely(atomic_load32(&p->low, mo_AcquireRelease) != (uint32_t)compare))
      atomic_cas32(&p->high, UINT32_MAX, (uint32_t)(compare >> 32));
    else
      rc = true;
  }
#endif /* MDBX_64BIT_CAS */
  jitter4testing(true);
  return rc;
}

MDBX_MAYBE_UNUSED static __always_inline void safe64_write(mdbx_atomic_uint64_t *p, const uint64_t v) {
  assert(p->weak >= SAFE64_INVALID_THRESHOLD);
#if MDBX_64BIT_ATOMIC && MDBX_64BIT_CAS
  atomic_store64(p, v, mo_AcquireRelease);
#else  /* MDBX_64BIT_ATOMIC */
  osal_compiler_barrier();
  /* update low-part but still value >= SAFE64_INVALID_THRESHOLD */
  atomic_store32(&p->low, (uint32_t)v, mo_Relaxed);
  assert(p->weak >= SAFE64_INVALID_THRESHOLD);
  jitter4testing(true);
  /* update high-part from SAFE64_INVALID_THRESHOLD to actual value */
  atomic_store32(&p->high, (uint32_t)(v >> 32), mo_AcquireRelease);
#endif /* MDBX_64BIT_ATOMIC */
  assert(p->weak == v);
  jitter4testing(true);
}

MDBX_MAYBE_UNUSED static __always_inline uint64_t safe64_read(const mdbx_atomic_uint64_t *p) {
  jitter4testing(true);
  uint64_t v;
  do
    v = atomic_load64(p, mo_AcquireRelease);
  while (!MDBX_64BIT_ATOMIC && unlikely(v != p->weak));
  return v;
}

#if 0 /* unused for now */
MDBX_MAYBE_UNUSED static __always_inline bool safe64_is_valid(uint64_t v) {
#if MDBX_WORDBITS >= 64
  return v < SAFE64_INVALID_THRESHOLD;
#else
  return (v >> 32) != UINT32_MAX;
#endif /* MDBX_WORDBITS */
}

MDBX_MAYBE_UNUSED static __always_inline bool
 safe64_is_valid_ptr(const mdbx_atomic_uint64_t *p) {
#if MDBX_64BIT_ATOMIC
  return atomic_load64(p, mo_AcquireRelease) < SAFE64_INVALID_THRESHOLD;
#else
  return atomic_load32(&p->high, mo_AcquireRelease) != UINT32_MAX;
#endif /* MDBX_64BIT_ATOMIC */
}
#endif /* unused for now */

/* non-atomic write with safety for reading a half-updated value */
MDBX_MAYBE_UNUSED static __always_inline void safe64_update(mdbx_atomic_uint64_t *p, const uint64_t v) {
#if MDBX_64BIT_ATOMIC
  atomic_store64(p, v, mo_Relaxed);
#else
  safe64_reset(p, true);
  safe64_write(p, v);
#endif /* MDBX_64BIT_ATOMIC */
}

/* non-atomic increment with safety for reading a half-updated value */
MDBX_MAYBE_UNUSED static
#if MDBX_64BIT_ATOMIC
    __always_inline
#endif /* MDBX_64BIT_ATOMIC */
    void safe64_inc(mdbx_atomic_uint64_t *p, const uint64_t v) {
  assert(v > 0);
  safe64_update(p, safe64_read(p) + v);
}

#endif /* !__cplusplus */

/* Internal prototypes */

/* audit.c */
MDBX_INTERNAL int audit_ex(MDBX_txn *txn, size_t retired_stored, bool dont_filter_gc);

/* mvcc-readers.c */
MDBX_INTERNAL bsr_t mvcc_bind_slot(MDBX_env *env);
MDBX_MAYBE_UNUSED MDBX_INTERNAL pgno_t mvcc_largest_this(MDBX_env *env, pgno_t largest);
MDBX_INTERNAL txnid_t mvcc_shapshot_oldest(MDBX_env *const env, const txnid_t steady);
MDBX_INTERNAL pgno_t mvcc_snapshot_largest(const MDBX_env *env, pgno_t last_used_page);
MDBX_INTERNAL txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler);
MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rlocked, int *dead);
MDBX_INTERNAL txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t laggard);

/* dxb.c */
MDBX_INTERNAL int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bits);
MDBX_INTERNAL int __must_check_result dxb_read_header(MDBX_env *env, meta_t *meta, const int lck_exclusive,
                                                      const mdbx_mode_t mode_bits);
enum resize_mode { implicit_grow, impilict_shrink, explicit_resize };
MDBX_INTERNAL int __must_check_result dxb_resize(MDBX_env *const env, const pgno_t used_pgno, const pgno_t size_pgno,
                                                 pgno_t limit_pgno, const enum resize_mode mode);
MDBX_INTERNAL int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, const bool enable, const bool force_whole);
MDBX_INTERNAL int __must_check_result dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending,
                                                      troika_t *const troika);
#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)
MDBX_INTERNAL void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn);
#else
static inline void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn) {
  (void)env;
  (void)txn;
}
#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */

/* txn.c */
MDBX_INTERNAL bool txn_refund(MDBX_txn *txn);
MDBX_INTERNAL txnid_t txn_snapshot_oldest(const MDBX_txn *const txn);
MDBX_INTERNAL int txn_abort(MDBX_txn *txn);
MDBX_INTERNAL int txn_renew(MDBX_txn *txn, unsigned flags);
MDBX_INTERNAL int txn_park(MDBX_txn *txn, bool autounpark);
MDBX_INTERNAL int txn_unpark(MDBX_txn *txn);
MDBX_INTERNAL int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits);
MDBX_INTERNAL void txn_done_cursors(MDBX_txn *txn, const bool merge);

#define TXN_END_NAMES                                                                                                  \
  {"committed", "empty-commit", "abort", "reset", "fail-begin", "fail-beginchild", "ousted", nullptr}
enum {
  /* txn_end operation number, for logging */
  TXN_END_COMMITTED,
  TXN_END_PURE_COMMIT,
  TXN_END_ABORT,
  TXN_END_RESET,
  TXN_END_FAIL_BEGIN,
  TXN_END_FAIL_BEGINCHILD,
  TXN_END_OUSTED,

  TXN_END_OPMASK = 0x07 /* mask for txn_end() operation number */,
  TXN_END_UPDATE = 0x10 /* update env state (DBIs) */,
  TXN_END_FREE = 0x20 /* free txn unless it is env.basal_txn */,
  TXN_END_EOTDONE = 0x40 /* txn's cursors already closed */,
  TXN_END_SLOT = 0x80 /* release any reader slot if NOSTICKYTHREADS */
};
MDBX_INTERNAL int txn_end(MDBX_txn *txn, unsigned mode);
MDBX_INTERNAL int txn_write(MDBX_txn *txn, iov_ctx_t *ctx);
MDBX_INTERNAL void txn_take_gcprof(const MDBX_txn *txn, MDBX_commit_latency *latency);
MDBX_INTERNAL void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len);

/* env.c */
MDBX_INTERNAL int env_open(MDBX_env *env, mdbx_mode_t mode);
MDBX_INTERNAL int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, size_t bytes, troika_t *troika);
MDBX_INTERNAL int env_sync(MDBX_env *env, bool force, bool nonblock);
MDBX_INTERNAL int env_close(MDBX_env *env, bool resurrect_after_fork);
MDBX_INTERNAL MDBX_txn *env_owned_wrtxn(const MDBX_env *env);
MDBX_INTERNAL int __must_check_result env_page_auxbuffer(MDBX_env *env);
MDBX_INTERNAL unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize);

/* api-opt.c */
MDBX_INTERNAL void env_options_init(MDBX_env *env);
MDBX_INTERNAL void env_options_adjust_defaults(MDBX_env *env);
MDBX_INTERNAL void env_options_adjust_dp_limit(MDBX_env *env);
MDBX_INTERNAL pgno_t default_dp_limit(const MDBX_env *env);

/* tree.c */
MDBX_INTERNAL int tree_drop(MDBX_cursor *mc, const bool may_have_tables);
MDBX_INTERNAL int __must_check_result tree_rebalance(MDBX_cursor *mc);
MDBX_INTERNAL int __must_check_result tree_propagate_key(MDBX_cursor *mc, const MDBX_val *key);
MDBX_INTERNAL void recalculate_merge_thresholds(MDBX_env *env);
MDBX_INTERNAL void recalculate_subpage_thresholds(MDBX_env *env);

/* table.c */
MDBX_INTERNAL int __must_check_result tbl_fetch(MDBX_txn *txn, size_t dbi);
MDBX_INTERNAL int __must_check_result tbl_setup(const MDBX_env *env, volatile kvx_t *const kvx, const tree_t *const db);

/* coherency.c */
MDBX_INTERNAL bool coherency_check_meta(const MDBX_env *env, const volatile meta_t *meta, bool report);
MDBX_INTERNAL int coherency_fetch_head(MDBX_txn *txn, const meta_ptr_t head, uint64_t *timestamp);
MDBX_INTERNAL int coherency_check_written(const MDBX_env *env, const txnid_t txnid, const volatile meta_t *meta,
                                          const intptr_t pgno, uint64_t *timestamp);
MDBX_INTERNAL int coherency_timeout(uint64_t *timestamp, intptr_t pgno, const MDBX_env *env);

/* List of txnid */
typedef txnid_t *txl_t;
typedef const txnid_t *const_txl_t;

enum txl_rules {
  txl_granulate = 32,
  txl_initial = txl_granulate - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t),
  txl_max = (1u << 26) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(txnid_t)
};

MDBX_INTERNAL txl_t txl_alloc(void);

MDBX_INTERNAL void txl_free(txl_t txl);

MDBX_INTERNAL int __must_check_result txl_append(txl_t __restrict *ptxl, txnid_t id);

MDBX_INTERNAL void txl_sort(txl_t txl);

MDBX_INTERNAL bool txl_contain(const txl_t txl, txnid_t id);

/*------------------------------------------------------------------------------
 * Unaligned access */

MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED static inline size_t field_alignment(size_t alignment_baseline,
                                                                                   size_t field_offset) {
  size_t merge = alignment_baseline | (size_t)field_offset;
  return merge & -(int)merge;
}

/* read-thunk for UB-sanitizer */
MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t peek_u8(const uint8_t *__restrict ptr) { return *ptr; }

/* write-thunk for UB-sanitizer */
static inline void poke_u8(uint8_t *__restrict ptr, const uint8_t v) { *ptr = v; }

static inline void *bcopy_2(void *__restrict dst, const void *__restrict src) {
  uint8_t *__restrict d = (uint8_t *)dst;
  const uint8_t *__restrict s = (uint8_t *)src;
  d[0] = s[0];
  d[1] = s[1];
  return d;
}

static inline void *bcopy_4(void *const __restrict dst, const void *const __restrict src) {
  uint8_t *__restrict d = (uint8_t *)dst;
  const uint8_t *__restrict s = (uint8_t *)src;
  d[0] = s[0];
  d[1] = s[1];
  d[2] = s[2];
  d[3] = s[3];
  return d;
}

static inline void *bcopy_8(void *const __restrict dst, const void *const __restrict src) {
  uint8_t *__restrict d = (uint8_t *)dst;
  const uint8_t *__restrict s = (uint8_t *)src;
  d[0] = s[0];
  d[1] = s[1];
  d[2] = s[2];
  d[3] = s[3];
  d[4] = s[4];
  d[5] = s[5];
  d[6] = s[6];
  d[7] = s[7];
  return d;
}

MDBX_NOTHROW_PURE_FUNCTION static inline uint16_t unaligned_peek_u16(const size_t expected_alignment,
                                                                     const void *const ptr) {
  assert((uintptr_t)ptr % expected_alignment == 0);
  if (MDBX_UNALIGNED_OK >= 2 || (expected_alignment % sizeof(uint16_t)) == 0)
    return *(const uint16_t *)ptr;
  else {
#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64)
    return *(const __unaligned uint16_t *)ptr;
#else
    uint16_t v;
    bcopy_2((uint8_t *)&v, (const uint8_t *)ptr);
    return v;
#endif /* _MSC_VER || __unaligned */
  }
}

static inline void unaligned_poke_u16(const size_t expected_alignment, void *const __restrict ptr, const uint16_t v) {
  assert((uintptr_t)ptr % expected_alignment == 0);
  if (MDBX_UNALIGNED_OK >= 2 || (expected_alignment % sizeof(v)) == 0)
    *(uint16_t *)ptr = v;
  else {
#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64)
    *((uint16_t __unaligned *)ptr) = v;
#else
    bcopy_2((uint8_t *)ptr, (const uint8_t *)&v);
#endif /* _MSC_VER || __unaligned */
  }
}

MDBX_NOTHROW_PURE_FUNCTION static inline uint32_t unaligned_peek_u32(const size_t expected_alignment,
                                                                     const void *const __restrict ptr) {
  assert((uintptr_t)ptr % expected_alignment == 0);
  if (MDBX_UNALIGNED_OK >= 4 || (expected_alignment % sizeof(uint32_t)) == 0)
    return *(const uint32_t *)ptr;
  else if ((expected_alignment % sizeof(uint16_t)) == 0) {
    const uint16_t lo = ((const uint16_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__];
    const uint16_t hi = ((const uint16_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__];
    return lo | (uint32_t)hi << 16;
  } else {
#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64)
    return *(const __unaligned uint32_t *)ptr;
#else
    uint32_t v;
    bcopy_4((uint8_t *)&v, (const uint8_t *)ptr);
    return v;
#endif /* _MSC_VER || __unaligned */
  }
}

static inline void unaligned_poke_u32(const size_t expected_alignment, void *const __restrict ptr, const uint32_t v) {
  assert((uintptr_t)ptr % expected_alignment == 0);
  if (MDBX_UNALIGNED_OK >= 4 || (expected_alignment % sizeof(v)) == 0)
    *(uint32_t *)ptr = v;
  else if ((expected_alignment % sizeof(uint16_t)) == 0) {
    ((uint16_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__] = (uint16_t)v;
    ((uint16_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__] = (uint16_t)(v >> 16);
  } else {
#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64)
    *((uint32_t __unaligned *)ptr) = v;
#else
    bcopy_4((uint8_t *)ptr, (const uint8_t *)&v);
#endif /* _MSC_VER || __unaligned */
  }
}

MDBX_NOTHROW_PURE_FUNCTION static inline uint64_t unaligned_peek_u64(const size_t expected_alignment,
                                                                     const void *const __restrict ptr) {
  assert((uintptr_t)ptr % expected_alignment == 0);
  if (MDBX_UNALIGNED_OK >= 8 || (expected_alignment % sizeof(uint64_t)) == 0)
    return *(const uint64_t *)ptr;
  else if ((expected_alignment % sizeof(uint32_t)) == 0) {
    const uint32_t lo = ((const uint32_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__];
    const uint32_t hi = ((const uint32_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__];
    return lo | (uint64_t)hi << 32;
  } else {
#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64)
    return *(const __unaligned uint64_t *)ptr;
#else
    uint64_t v;
    bcopy_8((uint8_t *)&v, (const uint8_t *)ptr);
    return v;
#endif /* _MSC_VER || __unaligned */
  }
}

static inline uint64_t unaligned_peek_u64_volatile(const size_t expected_alignment,
                                                   const volatile void *const __restrict ptr) {
  assert((uintptr_t)ptr % expected_alignment == 0);
  assert(expected_alignment % sizeof(uint32_t) == 0);
  if (MDBX_UNALIGNED_OK >= 8 || (expected_alignment % sizeof(uint64_t)) == 0)
    return *(const volatile uint64_t *)ptr;
  else {
#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64)
    return *(const volatile __unaligned uint64_t *)ptr;
#else
    const uint32_t lo = ((const volatile uint32_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__];
    const uint32_t hi = ((const volatile uint32_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__];
    return lo | (uint64_t)hi << 32;
#endif /* _MSC_VER || __unaligned */
  }
}

static inline void unaligned_poke_u64(const size_t expected_alignment, void *const __restrict ptr, const uint64_t v) {
  assert((uintptr_t)ptr % expected_alignment == 0);
  if (MDBX_UNALIGNED_OK >= 8 || (expected_alignment % sizeof(v)) == 0)
    *(uint64_t *)ptr = v;
  else if ((expected_alignment % sizeof(uint32_t)) == 0) {
    ((uint32_t *)ptr)[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__] = (uint32_t)v;
    ((uint32_t *)ptr)[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__] = (uint32_t)(v >> 32);
  } else {
#if defined(__unaligned) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64)
    *((uint64_t __unaligned *)ptr) = v;
#else
    bcopy_8((uint8_t *)ptr, (const uint8_t *)&v);
#endif /* _MSC_VER || __unaligned */
  }
}

#define UNALIGNED_PEEK_8(ptr, struct, field) peek_u8(ptr_disp(ptr, offsetof(struct, field)))
#define UNALIGNED_POKE_8(ptr, struct, field, value) poke_u8(ptr_disp(ptr, offsetof(struct, field)), value)

#define UNALIGNED_PEEK_16(ptr, struct, field) unaligned_peek_u16(1, ptr_disp(ptr, offsetof(struct, field)))
#define UNALIGNED_POKE_16(ptr, struct, field, value)                                                                   \
  unaligned_poke_u16(1, ptr_disp(ptr, offsetof(struct, field)), value)

#define UNALIGNED_PEEK_32(ptr, struct, field) unaligned_peek_u32(1, ptr_disp(ptr, offsetof(struct, field)))
#define UNALIGNED_POKE_32(ptr, struct, field, value)                                                                   \
  unaligned_poke_u32(1, ptr_disp(ptr, offsetof(struct, field)), value)

#define UNALIGNED_PEEK_64(ptr, struct, field) unaligned_peek_u64(1, ptr_disp(ptr, offsetof(struct, field)))
#define UNALIGNED_POKE_64(ptr, struct, field, value)                                                                   \
  unaligned_poke_u64(1, ptr_disp(ptr, offsetof(struct, field)), value)

MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t peek_pgno(const void *const __restrict ptr) {
  if (sizeof(pgno_t) == sizeof(uint32_t))
    return (pgno_t)unaligned_peek_u32(1, ptr);
  else if (sizeof(pgno_t) == sizeof(uint64_t))
    return (pgno_t)unaligned_peek_u64(1, ptr);
  else {
    pgno_t pgno;
    memcpy(&pgno, ptr, sizeof(pgno));
    return pgno;
  }
}

static inline void poke_pgno(void *const __restrict ptr, const pgno_t pgno) {
  if (sizeof(pgno) == sizeof(uint32_t))
    unaligned_poke_u32(1, ptr, pgno);
  else if (sizeof(pgno) == sizeof(uint64_t))
    unaligned_poke_u64(1, ptr, pgno);
  else
    memcpy(ptr, &pgno, sizeof(pgno));
}
#if defined(_WIN32) || defined(_WIN64)

typedef union osal_srwlock {
  __anonymous_struct_extension__ struct {
    long volatile readerCount;
    long volatile writerCount;
  };
  RTL_SRWLOCK native;
} osal_srwlock_t;

typedef void(WINAPI *osal_srwlock_t_function)(osal_srwlock_t *);

#if _WIN32_WINNT < 0x0600 /* prior to Windows Vista */
typedef enum _FILE_INFO_BY_HANDLE_CLASS {
  FileBasicInfo,
  FileStandardInfo,
  FileNameInfo,
  FileRenameInfo,
  FileDispositionInfo,
  FileAllocationInfo,
  FileEndOfFileInfo,
  FileStreamInfo,
  FileCompressionInfo,
  FileAttributeTagInfo,
  FileIdBothDirectoryInfo,
  FileIdBothDirectoryRestartInfo,
  FileIoPriorityHintInfo,
  FileRemoteProtocolInfo,
  MaximumFileInfoByHandleClass
} FILE_INFO_BY_HANDLE_CLASS,
    *PFILE_INFO_BY_HANDLE_CLASS;

typedef struct _FILE_END_OF_FILE_INFO {
  LARGE_INTEGER EndOfFile;
} FILE_END_OF_FILE_INFO, *PFILE_END_OF_FILE_INFO;

#define REMOTE_PROTOCOL_INFO_FLAG_LOOPBACK 0x00000001
#define REMOTE_PROTOCOL_INFO_FLAG_OFFLINE 0x00000002

typedef struct _FILE_REMOTE_PROTOCOL_INFO {
  USHORT StructureVersion;
  USHORT StructureSize;
  DWORD Protocol;
  USHORT ProtocolMajorVersion;
  USHORT ProtocolMinorVersion;
  USHORT ProtocolRevision;
  USHORT Reserved;
  DWORD Flags;
  struct {
    DWORD Reserved[8];
  } GenericReserved;
  struct {
    DWORD Reserved[16];
  } ProtocolSpecificReserved;
} FILE_REMOTE_PROTOCOL_INFO, *PFILE_REMOTE_PROTOCOL_INFO;

#endif /* _WIN32_WINNT < 0x0600 (prior to Windows Vista) */

typedef BOOL(WINAPI *MDBX_GetFileInformationByHandleEx)(_In_ HANDLE hFile,
                                                        _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass,
                                                        _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize);

typedef BOOL(WINAPI *MDBX_GetVolumeInformationByHandleW)(
    _In_ HANDLE hFile, _Out_opt_ LPWSTR lpVolumeNameBuffer, _In_ DWORD nVolumeNameSize,
    _Out_opt_ LPDWORD lpVolumeSerialNumber, _Out_opt_ LPDWORD lpMaximumComponentLength,
    _Out_opt_ LPDWORD lpFileSystemFlags, _Out_opt_ LPWSTR lpFileSystemNameBuffer, _In_ DWORD nFileSystemNameSize);

typedef DWORD(WINAPI *MDBX_GetFinalPathNameByHandleW)(_In_ HANDLE hFile, _Out_ LPWSTR lpszFilePath,
                                                      _In_ DWORD cchFilePath, _In_ DWORD dwFlags);

typedef BOOL(WINAPI *MDBX_SetFileInformationByHandle)(_In_ HANDLE hFile,
                                                      _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass,
                                                      _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize);

typedef NTSTATUS(NTAPI *MDBX_NtFsControlFile)(IN HANDLE FileHandle, IN OUT HANDLE Event,
                                              IN OUT PVOID /* PIO_APC_ROUTINE */ ApcRoutine, IN OUT PVOID ApcContext,
                                              OUT PIO_STATUS_BLOCK IoStatusBlock, IN ULONG FsControlCode,
                                              IN OUT PVOID InputBuffer, IN ULONG InputBufferLength,
                                              OUT OPTIONAL PVOID OutputBuffer, IN ULONG OutputBufferLength);

typedef uint64_t(WINAPI *MDBX_GetTickCount64)(void);

#if !defined(_WIN32_WINNT_WIN8) || _WIN32_WINNT < _WIN32_WINNT_WIN8
typedef struct _WIN32_MEMORY_RANGE_ENTRY {
  PVOID VirtualAddress;
  SIZE_T NumberOfBytes;
} WIN32_MEMORY_RANGE_ENTRY, *PWIN32_MEMORY_RANGE_ENTRY;
#endif /* Windows 8.x */

typedef BOOL(WINAPI *MDBX_PrefetchVirtualMemory)(HANDLE hProcess, ULONG_PTR NumberOfEntries,
                                                 PWIN32_MEMORY_RANGE_ENTRY VirtualAddresses, ULONG Flags);

typedef enum _SECTION_INHERIT { ViewShare = 1, ViewUnmap = 2 } SECTION_INHERIT;

typedef NTSTATUS(NTAPI *MDBX_NtExtendSection)(IN HANDLE SectionHandle, IN PLARGE_INTEGER NewSectionSize);

typedef LSTATUS(WINAPI *MDBX_RegGetValueA)(HKEY hkey, LPCSTR lpSubKey, LPCSTR lpValue, DWORD dwFlags, LPDWORD pdwType,
                                           PVOID pvData, LPDWORD pcbData);

typedef long(WINAPI *MDBX_CoCreateGuid)(bin128_t *guid);

NTSYSAPI ULONG RtlRandomEx(PULONG Seed);

typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle, PUCHAR OverlappedRangeStart, ULONG Length);

struct libmdbx_imports {
  osal_srwlock_t_function srwl_Init;
  osal_srwlock_t_function srwl_AcquireShared;
  osal_srwlock_t_function srwl_ReleaseShared;
  osal_srwlock_t_function srwl_AcquireExclusive;
  osal_srwlock_t_function srwl_ReleaseExclusive;
  MDBX_NtExtendSection NtExtendSection;
  MDBX_GetFileInformationByHandleEx GetFileInformationByHandleEx;
  MDBX_GetVolumeInformationByHandleW GetVolumeInformationByHandleW;
  MDBX_GetFinalPathNameByHandleW GetFinalPathNameByHandleW;
  MDBX_SetFileInformationByHandle SetFileInformationByHandle;
  MDBX_NtFsControlFile NtFsControlFile;
  MDBX_PrefetchVirtualMemory PrefetchVirtualMemory;
  MDBX_GetTickCount64 GetTickCount64;
  MDBX_RegGetValueA RegGetValueA;
  MDBX_SetFileIoOverlappedRange SetFileIoOverlappedRange;
  MDBX_CoCreateGuid CoCreateGuid;
};

MDBX_INTERNAL void windows_import(void);
#endif /* Windows */

enum signatures {
  env_signature = INT32_C(0x1A899641),
  txn_signature = INT32_C(0x13D53A31),
  cur_signature_live = INT32_C(0x7E05D5B1),
  cur_signature_ready4dispose = INT32_C(0x2817A047),
  cur_signature_wait4eot = INT32_C(0x10E297A7)
};

/*----------------------------------------------------------------------------*/

/* An dirty-page list item is an pgno/pointer pair. */
struct dp {
  page_t *ptr;
  pgno_t pgno, npages;
};

enum dpl_rules {
  dpl_gap_edging = 2,
  dpl_gap_mergesort = 16,
  dpl_reserve_gap = dpl_gap_mergesort + dpl_gap_edging,
  dpl_insertion_threshold = 42
};

/* An DPL (dirty-page list) is a lazy-sorted array of MDBX_DPs. */
struct dpl {
  size_t sorted;
  size_t length;
  /* number of pages, but not an entries. */
  size_t pages_including_loose;
  /* allocated size excluding the dpl_reserve_gap */
  size_t detent;
  /* dynamic size with holes at zero and after the last */
  dp_t items[dpl_reserve_gap];
};

/*----------------------------------------------------------------------------*/
/* Internal structures */

/* Comparing/ordering and length constraints */
typedef struct clc {
  MDBX_cmp_func *cmp; /* comparator */
  size_t lmin, lmax;  /* min/max length constraints */
} clc_t;

/* Вспомогательная информация о table.
 *
 * Совокупность потребностей:
 * 1. Для транзакций и основного курсора нужны все поля.
 * 2. Для вложенного dupsort-курсора нужен компаратор значений, который изнутри
 *    курсора будет выглядеть как компаратор ключей. Плюс заглушка компаратора
 *    значений, которая не должна использоваться в штатных ситуациях, но
 *    требуется хотя-бы для отслеживания таких обращений.
 * 3. Использование компараторов для курсора и вложенного dupsort-курсора
 *    должно выглядеть одинаково.
 * 4. Желательно минимизировать объём данных размещаемых внутри вложенного
 *    dupsort-курсора.
 * 5. Желательно чтобы объем всей структуры был степенью двойки.
 *
 * Решение:
 *  - не храним в dupsort-курсоре ничего лишнего, а только tree;
 *  - в курсоры помещаем только указатель на clc_t, который будет указывать
 *    на соответствующее clc-поле в общей kvx-таблице привязанной к env;
 *  - компаратор размещаем в начале clc_t, в kvx_t сначала размещаем clc
 *    для ключей, потом для значений, а имя БД в конце kvx_t.
 *  - тогда в курсоре clc[0] будет содержать информацию для ключей,
 *    а clc[1] для значений, причем компаратор значений для dupsort-курсора
 *    будет попадать на MDBX_val с именем, что приведет к SIGSEGV при попытке
 *    использования такого компаратора.
 *  - размер kvx_t становится равным 8 словам.
 *
 * Трюки и прочая экономия на спичках:
 *  - не храним dbi внутри курсора, вместо этого вычисляем его как разницу между
 *    dbi_state курсора и началом таблицы dbi_state в транзакции. Смысл тут в
 *    экономии кол-ва полей при инициализации курсора. Затрат это не создает,
 *    так как dbi требуется для последующего доступа к массивам в транзакции,
 *    т.е. при вычислении dbi разыменовывается тот-же указатель на txn
 *    и читается та же кэш-линия с указателями. */
typedef struct clc2 {
  clc_t k; /* для ключей */
  clc_t v; /* для значений */
} clc2_t;

struct kvx {
  clc2_t clc;
  MDBX_val name; /* имя table */
};

/* Non-shared DBI state flags inside transaction */
enum dbi_state {
  DBI_DIRTY = 0x01 /* DB was written in this txn */,
  DBI_STALE = 0x02 /* Named-DB record is older than txnID */,
  DBI_FRESH = 0x04 /* Named-DB handle opened in this txn */,
  DBI_CREAT = 0x08 /* Named-DB handle created in this txn */,
  DBI_VALID = 0x10 /* Handle is valid, see also DB_VALID */,
  DBI_OLDEN = 0x40 /* Handle was closed/reopened outside txn */,
  DBI_LINDO = 0x80 /* Lazy initialization done for DBI-slot */,
};

enum txn_flags {
  txn_ro_begin_flags = MDBX_TXN_RDONLY | MDBX_TXN_RDONLY_PREPARE,
  txn_rw_begin_flags = MDBX_TXN_NOMETASYNC | MDBX_TXN_NOSYNC | MDBX_TXN_TRY,
  txn_shrink_allowed = UINT32_C(0x40000000),
  txn_parked = MDBX_TXN_PARKED,
  txn_gc_drained = 0x40 /* GC was depleted up to oldest reader */,
  txn_state_flags = MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | MDBX_TXN_HAS_CHILD |
                    MDBX_TXN_INVALID | txn_gc_drained
};

/* A database transaction.
 * Every operation requires a transaction handle. */
struct MDBX_txn {
  int32_t signature;
  uint32_t flags; /* Transaction Flags */
  size_t n_dbi;
  size_t owner; /* thread ID that owns this transaction */

  MDBX_txn *parent; /* parent of a nested txn */
  MDBX_txn *nested; /* nested txn under this txn,
                       set together with MDBX_TXN_HAS_CHILD */
  geo_t geo;

  /* The ID of this transaction. IDs are integers incrementing from
   * INITIAL_TXNID. Only committed write transactions increment the ID. If a
   * transaction aborts, the ID may be re-used by the next writer. */
  txnid_t txnid, front_txnid;

  MDBX_env *env; /* the DB environment */
  tree_t *dbs;   /* Array of tree_t records for each known DB */

#if MDBX_ENABLE_DBI_SPARSE
  unsigned *__restrict dbi_sparse;
#endif /* MDBX_ENABLE_DBI_SPARSE */

  /* Array of non-shared txn's flags of DBI.
   * Модификатор __restrict тут полезен и безопасен в текущем понимании,
   * так как пересечение возможно только с dbi_state курсоров,
   * и происходит по-чтению до последующего изменения/записи. */
  uint8_t *__restrict dbi_state;

  /* Array of sequence numbers for each DB handle. */
  uint32_t *__restrict dbi_seqs;

  /* Массив с головами односвязных списков отслеживания курсоров. */
  MDBX_cursor **cursors;

  /* "Канареечные" маркеры/счетчики */
  MDBX_canary canary;

  /* User-settable context */
  void *userctx;

  union {
    struct {
      /* For read txns: This thread/txn's reader table slot, or nullptr. */
      reader_slot_t *reader;
    } to;
    struct {
      troika_t troika;
      pnl_t __restrict repnl; /* Reclaimed GC pages */
      struct {
        /* The list of reclaimed txn-ids from GC */
        txl_t __restrict retxl;
        txnid_t last_reclaimed; /* ID of last used record */
        uint64_t time_acc;
      } gc;
      bool prefault_write_activated;
#if MDBX_ENABLE_REFUND
      pgno_t loose_refund_wl /* FIXME: describe */;
#endif /* MDBX_ENABLE_REFUND */
      /* a sequence to spilling dirty page with LRU policy */
      unsigned dirtylru;
      /* dirtylist room: Dirty array size - dirty pages visible to this txn.
       * Includes ancestor txns' dirty pages not hidden by other txns'
       * dirty/spilled pages. Thus commit(nested txn) has room to merge
       * dirtylist into parent after freeing hidden parent pages. */
      size_t dirtyroom;
      /* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
      dpl_t *__restrict dirtylist;
      /* The list of pages that became unused during this transaction. */
      pnl_t __restrict retired_pages;
      /* The list of loose pages that became unused and may be reused
       * in this transaction, linked through `page_next()`. */
      page_t *__restrict loose_pages;
      /* Number of loose pages (tw.loose_pages) */
      size_t loose_count;
      union {
        struct {
          size_t least_removed;
          /* The sorted list of dirty pages we temporarily wrote to disk
           * because the dirty list was full. page numbers in here are
           * shifted left by 1, deleted slots have the LSB set. */
          pnl_t __restrict list;
        } spilled;
        size_t writemap_dirty_npages;
        size_t writemap_spilled_npages;
      };
      /* In write txns, next is located the array of cursors for each DB */
    } tw;
  };
};

#define CURSOR_STACK_SIZE (16 + MDBX_WORDBITS / 4)

struct MDBX_cursor {
  int32_t signature;
  union {
    /* Тут некоторые трюки/заморочки с тем чтобы во всех основных сценариях
     * проверять состояние курсора одной простой операцией сравнения,
     * и при этом ни на каплю не усложнять код итерации стека курсора.
     *
     * Поэтому решение такое:
     *  - поля flags и top сделаны знаковыми, а их отрицательные значения
     *    используются для обозначения не-установленного/не-инициализированного
     *    состояния курсора;
     *  - для инвалидации/сброса курсора достаточно записать отрицательное
     *    значение в объединенное поле top_and_flags;
     *  - все проверки состояния сводятся к сравнению одного из полей
     *    flags/snum/snum_and_flags, которые в зависимости от сценария,
     *    трактуются либо как знаковые, либо как безнаковые. */
    __anonymous_struct_extension__ struct {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
      int8_t flags;
      /* индекс вершины стека, меньше нуля для не-инициализированного курсора */
      int8_t top;
#else
      int8_t top;
      int8_t flags;
#endif
    };
    int16_t top_and_flags;
  };
  /* флаги проверки, в том числе биты для проверки типа листовых страниц. */
  uint8_t checking;

  /* Указывает на txn->dbi_state[] для DBI этого курсора.
   * Модификатор __restrict тут полезен и безопасен в текущем понимании,
   * так как пересечение возможно только с dbi_state транзакции,
   * и происходит по-чтению до последующего изменения/записи. */
  uint8_t *__restrict dbi_state;
  /* Связь списка отслеживания курсоров в транзакции */
  MDBX_txn *txn;
  /* Указывает на tree->dbs[] для DBI этого курсора. */
  tree_t *tree;
  /* Указывает на env->kvs[] для DBI этого курсора. */
  clc2_t *clc;
  subcur_t *__restrict subcur;
  page_t *pg[CURSOR_STACK_SIZE]; /* stack of pushed pages */
  indx_t ki[CURSOR_STACK_SIZE];  /* stack of page indices */
  MDBX_cursor *next;
  /* Состояние на момент старта вложенной транзакции */
  MDBX_cursor *backup;
};

struct inner_cursor {
  MDBX_cursor cursor;
  tree_t nested_tree;
};

struct cursor_couple {
  MDBX_cursor outer;
  void *userctx; /* User-settable context */
  subcur_t inner;
};

enum env_flags {
  /* Failed to update the meta page. Probably an I/O error. */
  ENV_FATAL_ERROR = INT32_MIN /* 0x80000000 */,
  /* Some fields are initialized. */
  ENV_ACTIVE = UINT32_C(0x20000000),
  /* me_txkey is set */
  ENV_TXKEY = UINT32_C(0x10000000),
  /* Legacy MDBX_MAPASYNC (prior v0.9) */
  DEPRECATED_MAPASYNC = UINT32_C(0x100000),
  /* Legacy MDBX_COALESCE (prior v0.12) */
  DEPRECATED_COALESCE = UINT32_C(0x2000000),
  ENV_INTERNAL_FLAGS = ENV_FATAL_ERROR | ENV_ACTIVE | ENV_TXKEY,
  /* Only a subset of the mdbx_env flags can be changed
   * at runtime. Changing other flags requires closing the
   * environment and re-opening it with the new flags. */
  ENV_CHANGEABLE_FLAGS = MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | DEPRECATED_MAPASYNC | MDBX_NOMEMINIT |
                         DEPRECATED_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE | MDBX_VALIDATION,
  ENV_CHANGELESS_FLAGS = MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS | MDBX_NORDAHEAD |
                         MDBX_LIFORECLAIM | MDBX_EXCLUSIVE,
  ENV_USABLE_FLAGS = ENV_CHANGEABLE_FLAGS | ENV_CHANGELESS_FLAGS
};

/* The database environment. */
struct MDBX_env {
  /* ----------------------------------------------------- mostly static part */
  mdbx_atomic_uint32_t signature;
  uint32_t flags;
  unsigned ps;          /* DB page size, initialized from me_os_psize */
  osal_mmap_t dxb_mmap; /* The main data file */
#define lazy_fd dxb_mmap.fd
  mdbx_filehandle_t dsync_fd, fd4meta;
#if defined(_WIN32) || defined(_WIN64)
  HANDLE dxb_lock_event;
#endif                  /* Windows */
  osal_mmap_t lck_mmap; /* The lock file */
  lck_t *lck;

  uint16_t leaf_nodemax;   /* max size of a leaf-node */
  uint16_t branch_nodemax; /* max size of a branch-node */
  uint16_t subpage_limit;
  uint16_t subpage_room_threshold;
  uint16_t subpage_reserve_prereq;
  uint16_t subpage_reserve_limit;
  atomic_pgno_t mlocked_pgno;
  uint8_t ps2ln;                                /* log2 of DB page size */
  int8_t stuck_meta;                            /* recovery-only: target meta page or less that zero */
  uint16_t merge_threshold, merge_threshold_gc; /* pages emptier than this are
                                                   candidates for merging */
  unsigned max_readers;                         /* size of the reader table */
  MDBX_dbi max_dbi;                             /* size of the DB table */
  uint32_t pid;                                 /* process ID of this env */
  osal_thread_key_t me_txkey;                   /* thread-key for readers */
  struct {                                      /* path to the DB files */
    pathchar_t *lck, *dxb, *specified;
    void *buffer;
  } pathname;
  void *page_auxbuf;              /* scratch area for DUPSORT put() */
  MDBX_txn *basal_txn;            /* preallocated write transaction */
  kvx_t *kvs;                     /* array of auxiliary key-value properties */
  uint8_t *__restrict dbs_flags;  /* array of flags from tree_t.flags */
  mdbx_atomic_uint32_t *dbi_seqs; /* array of dbi sequence numbers */
  unsigned maxgc_large1page;      /* Number of pgno_t fit in a single large page */
  unsigned maxgc_per_branch;
  uint32_t registered_reader_pid; /* have liveness lock in reader table */
  void *userctx;                  /* User-settable context */
  MDBX_hsr_func *hsr_callback;    /* Callback for kicking laggard readers */
  size_t madv_threshold;

  struct {
    unsigned dp_reserve_limit;
    unsigned rp_augment_limit;
    unsigned dp_limit;
    unsigned dp_initial;
    uint64_t gc_time_limit;
    uint8_t dp_loose_limit;
    uint8_t spill_max_denominator;
    uint8_t spill_min_denominator;
    uint8_t spill_parent4child_denominator;
    unsigned merge_threshold_16dot16_percent;
#if !(defined(_WIN32) || defined(_WIN64))
    unsigned writethrough_threshold;
#endif /* Windows */
    bool prefault_write;
    bool prefer_waf_insteadof_balance; /* Strive to minimize WAF instead of
                                          balancing pages fullment */
    bool need_dp_limit_adjust;
    struct {
      uint16_t limit;
      uint16_t room_threshold;
      uint16_t reserve_prereq;
      uint16_t reserve_limit;
    } subpage;

    union {
      unsigned all;
      /* tracks options with non-auto values but tuned by user */
      struct {
        unsigned dp_limit : 1;
        unsigned rp_augment_limit : 1;
        unsigned prefault_write : 1;
      } non_auto;
    } flags;
  } options;

  /* struct geo_in_bytes used for accepting db-geo params from user for the new
   * database creation, i.e. when mdbx_env_set_geometry() was called before
   * mdbx_env_open(). */
  struct {
    size_t lower;  /* minimal size of datafile */
    size_t upper;  /* maximal size of datafile */
    size_t now;    /* current size of datafile */
    size_t grow;   /* step to grow datafile */
    size_t shrink; /* threshold to shrink datafile */
  } geo_in_bytes;

#if MDBX_LOCKING == MDBX_LOCKING_SYSV
  union {
    key_t key;
    int semid;
  } me_sysv_ipc;
#endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */
  bool incore;

#if MDBX_ENABLE_DBI_LOCKFREE
  defer_free_item_t *defer_free;
#endif /* MDBX_ENABLE_DBI_LOCKFREE */

  /* -------------------------------------------------------------- debugging */

#if MDBX_DEBUG
  MDBX_assert_func *assert_func; /*  Callback for assertion failures */
#endif
#ifdef ENABLE_MEMCHECK
  int valgrind_handle;
#endif
#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)
  pgno_t poison_edge;
#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */

#ifndef xMDBX_DEBUG_SPILLING
#define xMDBX_DEBUG_SPILLING 0
#endif
#if xMDBX_DEBUG_SPILLING == 2
  size_t debug_dirtied_est, debug_dirtied_act;
#endif /* xMDBX_DEBUG_SPILLING */

  /* --------------------------------------------------- mostly volatile part */

  MDBX_txn *txn; /* current write transaction */
  osal_fastmutex_t dbi_lock;
  unsigned n_dbi; /* number of DBs opened */

  unsigned shadow_reserve_len;
  page_t *__restrict shadow_reserve; /* list of malloc'ed blocks for re-use */

  osal_ioring_t ioring;

#if defined(_WIN32) || defined(_WIN64)
  osal_srwlock_t remap_guard;
  /* Workaround for LockFileEx and WriteFile multithread bug */
  CRITICAL_SECTION windowsbug_lock;
  char *pathname_char; /* cache of multi-byte representation of pathname
                             to the DB files */
#else
  osal_fastmutex_t remap_guard;
#endif

  /* ------------------------------------------------- stub for lck-less mode */
  mdbx_atomic_uint64_t lckless_placeholder[(sizeof(lck_t) + MDBX_CACHELINE_SIZE - 1) / sizeof(mdbx_atomic_uint64_t)];
};

/*----------------------------------------------------------------------------*/

/* pseudo-error code, not exposed outside libmdbx */
#define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 33)

/* Number of slots in the reader table.
 * This value was chosen somewhat arbitrarily. The 61 is a prime number,
 * and such readers plus a couple mutexes fit into single 4KB page.
 * Applications should set the table size using mdbx_env_set_maxreaders(). */
#define DEFAULT_READERS 61

enum db_flags {
  DB_PERSISTENT_FLAGS =
      MDBX_REVERSEKEY | MDBX_DUPSORT | MDBX_INTEGERKEY | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP,

  /* mdbx_dbi_open() flags */
  DB_USABLE_FLAGS = DB_PERSISTENT_FLAGS | MDBX_CREATE | MDBX_DB_ACCEDE,

  DB_VALID = 0x80u /* DB handle is valid, for dbs_flags */,
  DB_POISON = 0x7fu /* update pending */,
  DB_INTERNAL_FLAGS = DB_VALID
};

#if !defined(__cplusplus) || CONSTEXPR_ENUM_FLAGS_OPERATIONS
MDBX_MAYBE_UNUSED static void static_checks(void) {
  STATIC_ASSERT(MDBX_WORDBITS == sizeof(void *) * CHAR_BIT);
  STATIC_ASSERT(UINT64_C(0x80000000) == (uint32_t)ENV_FATAL_ERROR);
  STATIC_ASSERT_MSG(INT16_MAX - CORE_DBS == MDBX_MAX_DBI, "Oops, MDBX_MAX_DBI or CORE_DBS?");
  STATIC_ASSERT_MSG((unsigned)(MDBX_DB_ACCEDE | MDBX_CREATE) ==
                        ((DB_USABLE_FLAGS | DB_INTERNAL_FLAGS) & (ENV_USABLE_FLAGS | ENV_INTERNAL_FLAGS)),
                    "Oops, some flags overlapped or wrong");
  STATIC_ASSERT_MSG((DB_INTERNAL_FLAGS & DB_USABLE_FLAGS) == 0, "Oops, some flags overlapped or wrong");
  STATIC_ASSERT_MSG((DB_PERSISTENT_FLAGS & ~DB_USABLE_FLAGS) == 0, "Oops, some flags overlapped or wrong");
  STATIC_ASSERT(DB_PERSISTENT_FLAGS <= UINT8_MAX);
  STATIC_ASSERT_MSG((ENV_INTERNAL_FLAGS & ENV_USABLE_FLAGS) == 0, "Oops, some flags overlapped or wrong");

  STATIC_ASSERT_MSG((txn_state_flags & (txn_rw_begin_flags | txn_ro_begin_flags)) == 0,
                    "Oops, some txn flags overlapped or wrong");
  STATIC_ASSERT_MSG(((txn_rw_begin_flags | txn_ro_begin_flags | txn_state_flags) & txn_shrink_allowed) == 0,
                    "Oops, some txn flags overlapped or wrong");

  STATIC_ASSERT(sizeof(reader_slot_t) == 32);
#if MDBX_LOCKING > 0
  STATIC_ASSERT(offsetof(lck_t, wrt_lock) % MDBX_CACHELINE_SIZE == 0);
  STATIC_ASSERT(offsetof(lck_t, rdt_lock) % MDBX_CACHELINE_SIZE == 0);
#else
  STATIC_ASSERT(offsetof(lck_t, cached_oldest) % MDBX_CACHELINE_SIZE == 0);
  STATIC_ASSERT(offsetof(lck_t, rdt_length) % MDBX_CACHELINE_SIZE == 0);
#endif /* MDBX_LOCKING */
  STATIC_ASSERT(offsetof(lck_t, rdt) % MDBX_CACHELINE_SIZE == 0);

#if FLEXIBLE_ARRAY_MEMBERS
  STATIC_ASSERT(NODESIZE == offsetof(node_t, payload));
  STATIC_ASSERT(PAGEHDRSZ == offsetof(page_t, entries));
#endif /* FLEXIBLE_ARRAY_MEMBERS */
  STATIC_ASSERT(sizeof(clc_t) == 3 * sizeof(void *));
  STATIC_ASSERT(sizeof(kvx_t) == 8 * sizeof(void *));

#if MDBX_WORDBITS == 64
#define KVX_SIZE_LN2 6
#else
#define KVX_SIZE_LN2 5
#endif
  STATIC_ASSERT(sizeof(kvx_t) == (1u << KVX_SIZE_LN2));
}
#endif /* Disabled for MSVC 19.0 (VisualStudio 2015) */

/******************************************************************************/

/* valid flags for mdbx_node_add() */
#define NODE_ADD_FLAGS (N_DUP | N_TREE | MDBX_RESERVE | MDBX_APPEND)

/* Get the page number pointed to by a branch node */
MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t node_pgno(const node_t *const __restrict node) {
  pgno_t pgno = UNALIGNED_PEEK_32(node, node_t, child_pgno);
  return pgno;
}

/* Set the page number in a branch node */
static inline void node_set_pgno(node_t *const __restrict node, pgno_t pgno) {
  assert(pgno >= MIN_PAGENO && pgno <= MAX_PAGENO);

  UNALIGNED_POKE_32(node, node_t, child_pgno, (uint32_t)pgno);
}

/* Get the size of the data in a leaf node */
MDBX_NOTHROW_PURE_FUNCTION static inline size_t node_ds(const node_t *const __restrict node) {
  return UNALIGNED_PEEK_32(node, node_t, dsize);
}

/* Set the size of the data for a leaf node */
static inline void node_set_ds(node_t *const __restrict node, size_t size) {
  assert(size < INT_MAX);
  UNALIGNED_POKE_32(node, node_t, dsize, (uint32_t)size);
}

/* The size of a key in a node */
MDBX_NOTHROW_PURE_FUNCTION static inline size_t node_ks(const node_t *const __restrict node) {
  return UNALIGNED_PEEK_16(node, node_t, ksize);
}

/* Set the size of the key for a leaf node */
static inline void node_set_ks(node_t *const __restrict node, size_t size) {
  assert(size < INT16_MAX);
  UNALIGNED_POKE_16(node, node_t, ksize, (uint16_t)size);
}

MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t node_flags(const node_t *const __restrict node) {
  return UNALIGNED_PEEK_8(node, node_t, flags);
}

static inline void node_set_flags(node_t *const __restrict node, uint8_t flags) {
  UNALIGNED_POKE_8(node, node_t, flags, flags);
}

/* Address of the key for the node */
MDBX_NOTHROW_PURE_FUNCTION static inline void *node_key(const node_t *const __restrict node) {
  return ptr_disp(node, NODESIZE);
}

/* Address of the data for a node */
MDBX_NOTHROW_PURE_FUNCTION static inline void *node_data(const node_t *const __restrict node) {
  return ptr_disp(node_key(node), node_ks(node));
}

/* Size of a node in a leaf page with a given key and data.
 * This is node header plus key plus data size. */
MDBX_NOTHROW_CONST_FUNCTION static inline size_t node_size_len(const size_t key_len, const size_t value_len) {
  return NODESIZE + EVEN_CEIL(key_len + value_len);
}
MDBX_NOTHROW_PURE_FUNCTION static inline size_t node_size(const MDBX_val *key, const MDBX_val *value) {
  return node_size_len(key ? key->iov_len : 0, value ? value->iov_len : 0);
}

MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t node_largedata_pgno(const node_t *const __restrict node) {
  assert(node_flags(node) & N_BIG);
  return peek_pgno(node_data(node));
}

MDBX_INTERNAL int __must_check_result node_read_bigdata(MDBX_cursor *mc, const node_t *node, MDBX_val *data,
                                                        const page_t *mp);

static inline int __must_check_result node_read(MDBX_cursor *mc, const node_t *node, MDBX_val *data, const page_t *mp) {
  data->iov_len = node_ds(node);
  data->iov_base = node_data(node);
  if (likely(node_flags(node) != N_BIG))
    return MDBX_SUCCESS;
  return node_read_bigdata(mc, node, data, mp);
}

/*----------------------------------------------------------------------------*/

MDBX_INTERNAL nsr_t node_search(MDBX_cursor *mc, const MDBX_val *key);

MDBX_INTERNAL int __must_check_result node_add_branch(MDBX_cursor *mc, size_t indx, const MDBX_val *key, pgno_t pgno);

MDBX_INTERNAL int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx, const MDBX_val *key, MDBX_val *data,
                                                    unsigned flags);

MDBX_INTERNAL int __must_check_result node_add_dupfix(MDBX_cursor *mc, size_t indx, const MDBX_val *key);

MDBX_INTERNAL void node_del(MDBX_cursor *mc, size_t ksize);

MDBX_INTERNAL node_t *node_shrink(page_t *mp, size_t indx, node_t *node);

#if MDBX_ENABLE_DBI_SPARSE

MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL size_t dbi_bitmap_ctz_fallback(const MDBX_txn *txn,
                                                                                           intptr_t bmi);

static inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) {
  tASSERT(txn, bmi != 0);
  STATIC_ASSERT(sizeof(bmi) >= sizeof(txn->dbi_sparse[0]));
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl)
  if (sizeof(txn->dbi_sparse[0]) <= sizeof(int))
    return __builtin_ctz((int)bmi);
  if (sizeof(txn->dbi_sparse[0]) == sizeof(long))
    return __builtin_ctzl((long)bmi);
#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || __has_builtin(__builtin_ctzll)
  return __builtin_ctzll(bmi);
#endif /* have(long long) && long long == uint64_t */
#endif /* GNU C */

#if defined(_MSC_VER)
  unsigned long index;
  if (sizeof(txn->dbi_sparse[0]) > 4) {
#if defined(_M_AMD64) || defined(_M_ARM64) || defined(_M_X64)
    _BitScanForward64(&index, bmi);
    return index;
#else
    if (bmi > UINT32_MAX) {
      _BitScanForward(&index, (uint32_t)((uint64_t)bmi >> 32));
      return index;
    }
#endif
  }
  _BitScanForward(&index, (uint32_t)bmi);
  return index;
#endif /* MSVC */

  return dbi_bitmap_ctz_fallback(txn, bmi);
}

/* LY: Макрос целенаправленно сделан с одним циклом, чтобы сохранить возможность
 * использования оператора break */
#define TXN_FOREACH_DBI_FROM(TXN, I, FROM)                                                                             \
  for (size_t bitmap_chunk = CHAR_BIT * sizeof(TXN->dbi_sparse[0]), bitmap_item = TXN->dbi_sparse[0] >> FROM,          \
              I = FROM;                                                                                                \
       I < TXN->n_dbi; ++I)                                                                                            \
    if (bitmap_item == 0) {                                                                                            \
      I = (I - 1) | (bitmap_chunk - 1);                                                                                \
      bitmap_item = TXN->dbi_sparse[(1 + I) / bitmap_chunk];                                                           \
      if (!bitmap_item)                                                                                                \
        /* coverity[const_overflow] */                                                                                 \
        I += bitmap_chunk;                                                                                             \
      continue;                                                                                                        \
    } else if ((bitmap_item & 1) == 0) {                                                                               \
      size_t bitmap_skip = dbi_bitmap_ctz(txn, bitmap_item);                                                           \
      bitmap_item >>= bitmap_skip;                                                                                     \
      I += bitmap_skip - 1;                                                                                            \
      continue;                                                                                                        \
    } else if (bitmap_item >>= 1, TXN->dbi_state[I])

#else

#define TXN_FOREACH_DBI_FROM(TXN, I, SKIP)                                                                             \
  for (size_t I = SKIP; I < TXN->n_dbi; ++I)                                                                           \
    if (TXN->dbi_state[I])

#endif /* MDBX_ENABLE_DBI_SPARSE */

#define TXN_FOREACH_DBI_ALL(TXN, I) TXN_FOREACH_DBI_FROM(TXN, I, 0)
#define TXN_FOREACH_DBI_USER(TXN, I) TXN_FOREACH_DBI_FROM(TXN, I, CORE_DBS)

MDBX_INTERNAL int dbi_import(MDBX_txn *txn, const size_t dbi);

struct dbi_snap_result {
  uint32_t sequence;
  unsigned flags;
};
MDBX_INTERNAL struct dbi_snap_result dbi_snap(const MDBX_env *env, const size_t dbi);

MDBX_INTERNAL int dbi_update(MDBX_txn *txn, int keep);

static inline uint8_t dbi_state(const MDBX_txn *txn, const size_t dbi) {
  STATIC_ASSERT((int)DBI_DIRTY == MDBX_DBI_DIRTY && (int)DBI_STALE == MDBX_DBI_STALE &&
                (int)DBI_FRESH == MDBX_DBI_FRESH && (int)DBI_CREAT == MDBX_DBI_CREAT);

#if MDBX_ENABLE_DBI_SPARSE
  const size_t bitmap_chunk = CHAR_BIT * sizeof(txn->dbi_sparse[0]);
  const size_t bitmap_indx = dbi / bitmap_chunk;
  const size_t bitmap_mask = (size_t)1 << dbi % bitmap_chunk;
  return likely(dbi < txn->n_dbi && (txn->dbi_sparse[bitmap_indx] & bitmap_mask) != 0) ? txn->dbi_state[dbi] : 0;
#else
  return likely(dbi < txn->n_dbi) ? txn->dbi_state[dbi] : 0;
#endif /* MDBX_ENABLE_DBI_SPARSE */
}

static inline bool dbi_changed(const MDBX_txn *txn, const size_t dbi) {
  const MDBX_env *const env = txn->env;
  eASSERT(env, dbi_state(txn, dbi) & DBI_LINDO);
  const uint32_t snap_seq = atomic_load32(&env->dbi_seqs[dbi], mo_AcquireRelease);
  return unlikely(snap_seq != txn->dbi_seqs[dbi]);
}

static inline int dbi_check(const MDBX_txn *txn, const size_t dbi) {
  const uint8_t state = dbi_state(txn, dbi);
  if (likely((state & DBI_LINDO) != 0 && !dbi_changed(txn, dbi)))
    return (state & DBI_VALID) ? MDBX_SUCCESS : MDBX_BAD_DBI;

  /* Медленный путь: ленивая до-инициализацяи и импорт */
  return dbi_import((MDBX_txn *)txn, dbi);
}

static inline uint32_t dbi_seq_next(const MDBX_env *const env, size_t dbi) {
  uint32_t v = atomic_load32(&env->dbi_seqs[dbi], mo_AcquireRelease) + 1;
  return v ? v : 1;
}

MDBX_INTERNAL int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, MDBX_dbi *dbi,
                           MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp);

MDBX_INTERNAL int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, MDBX_cmp_func *keycmp,
                           MDBX_cmp_func *datacmp);

typedef struct defer_free_item {
  struct defer_free_item *next;
  uint64_t timestamp;
} defer_free_item_t;

MDBX_INTERNAL int dbi_defer_release(MDBX_env *const env, defer_free_item_t *const chain);
MDBX_INTERNAL int dbi_close_release(MDBX_env *env, MDBX_dbi dbi);
MDBX_INTERNAL const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, tree_t *fallback);

struct dbi_rename_result {
  defer_free_item_t *defer;
  int err;
};

MDBX_INTERNAL struct dbi_rename_result dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name);

MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL pgno_t pv2pages(uint16_t pv);

MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL uint16_t pages2pv(size_t pages);

MDBX_MAYBE_UNUSED MDBX_INTERNAL bool pv2pages_verify(void);

/*------------------------------------------------------------------------------
 * Nodes, Keys & Values length limitation factors:
 *
 * BRANCH_NODE_MAX
 *   Branch-page must contain at least two nodes, within each a key and a child
 *   page number. But page can't be split if it contains less that 4 keys,
 *   i.e. a page should not overflow before adding the fourth key. Therefore,
 *   at least 3 branch-node should fit in the single branch-page. Further, the
 *   first node of a branch-page doesn't contain a key, i.e. the first node
 *   is always require space just for itself. Thus:
 *       PAGESPACE = pagesize - page_hdr_len;
 *       BRANCH_NODE_MAX = even_floor(
 *         (PAGESPACE - sizeof(indx_t) - NODESIZE) / (3 - 1) - sizeof(indx_t));
 *       KEYLEN_MAX = BRANCH_NODE_MAX - node_hdr_len;
 *
 * LEAF_NODE_MAX
 *   Leaf-node must fit into single leaf-page, where a value could be placed on
 *   a large/overflow page. However, may require to insert a nearly page-sized
 *   node between two large nodes are already fill-up a page. In this case the
 *   page must be split to two if some pair of nodes fits on one page, or
 *   otherwise the page should be split to the THREE with a single node
 *   per each of ones. Such 1-into-3 page splitting is costly and complex since
 *   requires TWO insertion into the parent page, that could lead to split it
 *   and so on up to the root. Therefore double-splitting is avoided here and
 *   the maximum node size is half of a leaf page space:
 *       LEAF_NODE_MAX = even_floor(PAGESPACE / 2 - sizeof(indx_t));
 *       DATALEN_NO_OVERFLOW = LEAF_NODE_MAX - NODESIZE - KEYLEN_MAX;
 *
 *  - Table-node must fit into one leaf-page:
 *       TABLE_NAME_MAX = LEAF_NODE_MAX - node_hdr_len - sizeof(tree_t);
 *
 *  - Dupsort values itself are a keys in a dupsort-table and couldn't be longer
 *    than the KEYLEN_MAX. But dupsort node must not great than LEAF_NODE_MAX,
 *    since dupsort value couldn't be placed on a large/overflow page:
 *       DUPSORT_DATALEN_MAX = min(KEYLEN_MAX,
 *                                 max(DATALEN_NO_OVERFLOW, sizeof(tree_t));
 */

#define PAGESPACE(pagesize) ((pagesize) - PAGEHDRSZ)

#define BRANCH_NODE_MAX(pagesize)                                                                                      \
  (EVEN_FLOOR((PAGESPACE(pagesize) - sizeof(indx_t) - NODESIZE) / (3 - 1) - sizeof(indx_t)))

#define LEAF_NODE_MAX(pagesize) (EVEN_FLOOR(PAGESPACE(pagesize) / 2) - sizeof(indx_t))

#define MAX_GC1OVPAGE(pagesize) (PAGESPACE(pagesize) / sizeof(pgno_t) - 1)

MDBX_NOTHROW_CONST_FUNCTION static inline size_t keysize_max(size_t pagesize, MDBX_db_flags_t flags) {
  assert(pagesize >= MDBX_MIN_PAGESIZE && pagesize <= MDBX_MAX_PAGESIZE && is_powerof2(pagesize));
  STATIC_ASSERT(BRANCH_NODE_MAX(MDBX_MIN_PAGESIZE) - NODESIZE >= 8);
  if (flags & MDBX_INTEGERKEY)
    return 8 /* sizeof(uint64_t) */;

  const intptr_t max_branch_key = BRANCH_NODE_MAX(pagesize) - NODESIZE;
  STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) - NODESIZE -
                    /* sizeof(uint64) as a key */ 8 >
                sizeof(tree_t));
  if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)) {
    const intptr_t max_dupsort_leaf_key = LEAF_NODE_MAX(pagesize) - NODESIZE - sizeof(tree_t);
    return (max_branch_key < max_dupsort_leaf_key) ? max_branch_key : max_dupsort_leaf_key;
  }
  return max_branch_key;
}

MDBX_NOTHROW_CONST_FUNCTION static inline size_t env_keysize_max(const MDBX_env *env, MDBX_db_flags_t flags) {
  size_t size_max;
  if (flags & MDBX_INTEGERKEY)
    size_max = 8 /* sizeof(uint64_t) */;
  else {
    const intptr_t max_branch_key = env->branch_nodemax - NODESIZE;
    STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) - NODESIZE -
                      /* sizeof(uint64) as a key */ 8 >
                  sizeof(tree_t));
    if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)) {
      const intptr_t max_dupsort_leaf_key = env->leaf_nodemax - NODESIZE - sizeof(tree_t);
      size_max = (max_branch_key < max_dupsort_leaf_key) ? max_branch_key : max_dupsort_leaf_key;
    } else
      size_max = max_branch_key;
  }
  eASSERT(env, size_max == keysize_max(env->ps, flags));
  return size_max;
}

MDBX_NOTHROW_CONST_FUNCTION static inline size_t keysize_min(MDBX_db_flags_t flags) {
  return (flags & MDBX_INTEGERKEY) ? 4 /* sizeof(uint32_t) */ : 0;
}

MDBX_NOTHROW_CONST_FUNCTION static inline size_t valsize_min(MDBX_db_flags_t flags) {
  if (flags & MDBX_INTEGERDUP)
    return 4 /* sizeof(uint32_t) */;
  else if (flags & MDBX_DUPFIXED)
    return sizeof(indx_t);
  else
    return 0;
}

MDBX_NOTHROW_CONST_FUNCTION static inline size_t valsize_max(size_t pagesize, MDBX_db_flags_t flags) {
  assert(pagesize >= MDBX_MIN_PAGESIZE && pagesize <= MDBX_MAX_PAGESIZE && is_powerof2(pagesize));

  if (flags & MDBX_INTEGERDUP)
    return 8 /* sizeof(uint64_t) */;

  if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP))
    return keysize_max(pagesize, 0);

  const unsigned page_ln2 = log2n_powerof2(pagesize);
  const size_t hard = 0x7FF00000ul;
  const size_t hard_pages = hard >> page_ln2;
  STATIC_ASSERT(PAGELIST_LIMIT <= MAX_PAGENO);
  const size_t pages_limit = PAGELIST_LIMIT / 4;
  const size_t limit = (hard_pages < pages_limit) ? hard : (pages_limit << page_ln2);
  return (limit < MAX_MAPSIZE / 2) ? limit : MAX_MAPSIZE / 2;
}

MDBX_NOTHROW_CONST_FUNCTION static inline size_t env_valsize_max(const MDBX_env *env, MDBX_db_flags_t flags) {
  size_t size_max;
  if (flags & MDBX_INTEGERDUP)
    size_max = 8 /* sizeof(uint64_t) */;
  else if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP))
    size_max = env_keysize_max(env, 0);
  else {
    const size_t hard = 0x7FF00000ul;
    const size_t hard_pages = hard >> env->ps2ln;
    STATIC_ASSERT(PAGELIST_LIMIT <= MAX_PAGENO);
    const size_t pages_limit = PAGELIST_LIMIT / 4;
    const size_t limit = (hard_pages < pages_limit) ? hard : (pages_limit << env->ps2ln);
    size_max = (limit < MAX_MAPSIZE / 2) ? limit : MAX_MAPSIZE / 2;
  }
  eASSERT(env, size_max == valsize_max(env->ps, flags));
  return size_max;
}

/*----------------------------------------------------------------------------*/

MDBX_NOTHROW_PURE_FUNCTION static inline size_t leaf_size(const MDBX_env *env, const MDBX_val *key,
                                                          const MDBX_val *data) {
  size_t node_bytes = node_size(key, data);
  if (node_bytes > env->leaf_nodemax)
    /* put on large/overflow page */
    node_bytes = node_size_len(key->iov_len, 0) + sizeof(pgno_t);

  return node_bytes + sizeof(indx_t);
}

MDBX_NOTHROW_PURE_FUNCTION static inline size_t branch_size(const MDBX_env *env, const MDBX_val *key) {
  /* Size of a node in a branch page with a given key.
   * This is just the node header plus the key, there is no data. */
  size_t node_bytes = node_size(key, nullptr);
  if (unlikely(node_bytes > env->branch_nodemax)) {
    /* put on large/overflow page, not implemented */
    mdbx_panic("node_size(key) %zu > %u branch_nodemax", node_bytes, env->branch_nodemax);
    node_bytes = node_size(key, nullptr) + sizeof(pgno_t);
  }

  return node_bytes + sizeof(indx_t);
}

MDBX_NOTHROW_CONST_FUNCTION static inline uint16_t flags_db2sub(uint16_t db_flags) {
  uint16_t sub_flags = db_flags & MDBX_DUPFIXED;

  /* MDBX_INTEGERDUP => MDBX_INTEGERKEY */
#define SHIFT_INTEGERDUP_TO_INTEGERKEY 2
  STATIC_ASSERT((MDBX_INTEGERDUP >> SHIFT_INTEGERDUP_TO_INTEGERKEY) == MDBX_INTEGERKEY);
  sub_flags |= (db_flags & MDBX_INTEGERDUP) >> SHIFT_INTEGERDUP_TO_INTEGERKEY;

  /* MDBX_REVERSEDUP => MDBX_REVERSEKEY */
#define SHIFT_REVERSEDUP_TO_REVERSEKEY 5
  STATIC_ASSERT((MDBX_REVERSEDUP >> SHIFT_REVERSEDUP_TO_REVERSEKEY) == MDBX_REVERSEKEY);
  sub_flags |= (db_flags & MDBX_REVERSEDUP) >> SHIFT_REVERSEDUP_TO_REVERSEKEY;

  return sub_flags;
}

static inline bool check_table_flags(unsigned flags) {
  switch (flags & ~(MDBX_REVERSEKEY | MDBX_INTEGERKEY)) {
  default:
    NOTICE("invalid db-flags 0x%x", flags);
    return false;
  case MDBX_DUPSORT:
  case MDBX_DUPSORT | MDBX_REVERSEDUP:
  case MDBX_DUPSORT | MDBX_DUPFIXED:
  case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP:
  case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP:
  case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP:
  case MDBX_DB_DEFAULTS:
    return (flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) != (MDBX_REVERSEKEY | MDBX_INTEGERKEY);
  }
}

static inline int tbl_setup_ifneed(const MDBX_env *env, volatile kvx_t *const kvx, const tree_t *const db) {
  return likely(kvx->clc.v.lmax) ? MDBX_SUCCESS : tbl_setup(env, kvx, db);
}

/*----------------------------------------------------------------------------*/

MDBX_NOTHROW_PURE_FUNCTION static inline size_t pgno2bytes(const MDBX_env *env, size_t pgno) {
  eASSERT(env, (1u << env->ps2ln) == env->ps);
  return ((size_t)pgno) << env->ps2ln;
}

MDBX_NOTHROW_PURE_FUNCTION static inline page_t *pgno2page(const MDBX_env *env, size_t pgno) {
  return ptr_disp(env->dxb_mmap.base, pgno2bytes(env, pgno));
}

MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t bytes2pgno(const MDBX_env *env, size_t bytes) {
  eASSERT(env, (env->ps >> env->ps2ln) == 1);
  return (pgno_t)(bytes >> env->ps2ln);
}

MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL size_t bytes_align2os_bytes(const MDBX_env *env, size_t bytes);

MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL size_t pgno_align2os_bytes(const MDBX_env *env, size_t pgno);

MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL pgno_t pgno_align2os_pgno(const MDBX_env *env, size_t pgno);

MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t largechunk_npages(const MDBX_env *env, size_t bytes) {
  return bytes2pgno(env, PAGEHDRSZ - 1 + bytes) + 1;
}

MDBX_NOTHROW_PURE_FUNCTION static inline MDBX_val get_key(const node_t *node) {
  MDBX_val key;
  key.iov_len = node_ks(node);
  key.iov_base = node_key(node);
  return key;
}

static inline void get_key_optional(const node_t *node, MDBX_val *keyptr /* __may_null */) {
  if (keyptr)
    *keyptr = get_key(node);
}

MDBX_NOTHROW_PURE_FUNCTION static inline void *page_data(const page_t *mp) { return ptr_disp(mp, PAGEHDRSZ); }

MDBX_NOTHROW_PURE_FUNCTION static inline const page_t *data_page(const void *data) {
  return container_of(data, page_t, entries);
}

MDBX_NOTHROW_PURE_FUNCTION static inline meta_t *page_meta(page_t *mp) { return (meta_t *)page_data(mp); }

MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_numkeys(const page_t *mp) { return mp->lower >> 1; }

MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_room(const page_t *mp) { return mp->upper - mp->lower; }

MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_space(const MDBX_env *env) {
  STATIC_ASSERT(PAGEHDRSZ % 2 == 0);
  return env->ps - PAGEHDRSZ;
}

MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_used(const MDBX_env *env, const page_t *mp) {
  return page_space(env) - page_room(mp);
}

/* The percentage of space used in the page, in a percents. */
MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline unsigned page_fill_percentum_x10(const MDBX_env *env,
                                                                                            const page_t *mp) {
  const size_t space = page_space(env);
  return (unsigned)((page_used(env, mp) * 1000 + space / 2) / space);
}

MDBX_NOTHROW_PURE_FUNCTION static inline node_t *page_node(const page_t *mp, size_t i) {
  assert(page_type_compat(mp) == P_LEAF || page_type(mp) == P_BRANCH);
  assert(page_numkeys(mp) > i);
  assert(mp->entries[i] % 2 == 0);
  return ptr_disp(mp, mp->entries[i] + PAGEHDRSZ);
}

MDBX_NOTHROW_PURE_FUNCTION static inline void *page_dupfix_ptr(const page_t *mp, size_t i, size_t keysize) {
  assert(page_type_compat(mp) == (P_LEAF | P_DUPFIX) && i == (indx_t)i && mp->dupfix_ksize == keysize);
  (void)keysize;
  return ptr_disp(mp, PAGEHDRSZ + mp->dupfix_ksize * (indx_t)i);
}

MDBX_NOTHROW_PURE_FUNCTION static inline MDBX_val page_dupfix_key(const page_t *mp, size_t i, size_t keysize) {
  MDBX_val r;
  r.iov_base = page_dupfix_ptr(mp, i, keysize);
  r.iov_len = mp->dupfix_ksize;
  return r;
}

/*----------------------------------------------------------------------------*/

MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_int_unaligned(const MDBX_val *a, const MDBX_val *b);

#if MDBX_UNALIGNED_OK < 2 || (MDBX_DEBUG || MDBX_FORCE_ASSERTIONS || !defined(NDEBUG))
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int
/* Compare two items pointing at 2-byte aligned unsigned int's. */
cmp_int_align2(const MDBX_val *a, const MDBX_val *b);
#else
#define cmp_int_align2 cmp_int_unaligned
#endif /* !MDBX_UNALIGNED_OK || debug */

#if MDBX_UNALIGNED_OK < 4 || (MDBX_DEBUG || MDBX_FORCE_ASSERTIONS || !defined(NDEBUG))
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int
/* Compare two items pointing at 4-byte aligned unsigned int's. */
cmp_int_align4(const MDBX_val *a, const MDBX_val *b);
#else
#define cmp_int_align4 cmp_int_unaligned
#endif /* !MDBX_UNALIGNED_OK || debug */

/* Compare two items lexically */
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_lexical(const MDBX_val *a, const MDBX_val *b);

/* Compare two items in reverse byte order */
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_reverse(const MDBX_val *a, const MDBX_val *b);

/* Fast non-lexically comparator */
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_lenfast(const MDBX_val *a, const MDBX_val *b);

MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL bool eq_fast_slowpath(const uint8_t *a, const uint8_t *b, size_t l);

MDBX_NOTHROW_PURE_FUNCTION static inline bool eq_fast(const MDBX_val *a, const MDBX_val *b) {
  return unlikely(a->iov_len == b->iov_len) && eq_fast_slowpath(a->iov_base, b->iov_base, a->iov_len);
}

MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b);

MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_equal_or_wrong(const MDBX_val *a, const MDBX_val *b);

static inline MDBX_cmp_func *builtin_keycmp(MDBX_db_flags_t flags) {
  return (flags & MDBX_REVERSEKEY) ? cmp_reverse : (flags & MDBX_INTEGERKEY) ? cmp_int_align2 : cmp_lexical;
}

static inline MDBX_cmp_func *builtin_datacmp(MDBX_db_flags_t flags) {
  return !(flags & MDBX_DUPSORT)
             ? cmp_lenfast
             : ((flags & MDBX_INTEGERDUP) ? cmp_int_unaligned
                                          : ((flags & MDBX_REVERSEDUP) ? cmp_reverse : cmp_lexical));
}

/*----------------------------------------------------------------------------*/

MDBX_INTERNAL uint32_t combine_durability_flags(const uint32_t a, const uint32_t b);

MDBX_CONST_FUNCTION static inline lck_t *lckless_stub(const MDBX_env *env) {
  uintptr_t stub = (uintptr_t)&env->lckless_placeholder;
  /* align to avoid false-positive alarm from UndefinedBehaviorSanitizer */
  stub = (stub + MDBX_CACHELINE_SIZE - 1) & ~(MDBX_CACHELINE_SIZE - 1);
  return (lck_t *)stub;
}

#if !(defined(_WIN32) || defined(_WIN64))
MDBX_CONST_FUNCTION static inline int ignore_enosys(int err) {
#ifdef ENOSYS
  if (err == ENOSYS)
    return MDBX_RESULT_TRUE;
#endif /* ENOSYS */
#ifdef ENOIMPL
  if (err == ENOIMPL)
    return MDBX_RESULT_TRUE;
#endif /* ENOIMPL */
#ifdef ENOTSUP
  if (err == ENOTSUP)
    return MDBX_RESULT_TRUE;
#endif /* ENOTSUP */
#ifdef ENOSUPP
  if (err == ENOSUPP)
    return MDBX_RESULT_TRUE;
#endif /* ENOSUPP */
#ifdef EOPNOTSUPP
  if (err == EOPNOTSUPP)
    return MDBX_RESULT_TRUE;
#endif /* EOPNOTSUPP */
  return err;
}

MDBX_MAYBE_UNUSED MDBX_CONST_FUNCTION static inline int ignore_enosys_and_eagain(int err) {
  return (err == EAGAIN) ? MDBX_RESULT_TRUE : ignore_enosys(err);
}

MDBX_MAYBE_UNUSED MDBX_CONST_FUNCTION static inline int ignore_enosys_and_einval(int err) {
  return (err == EINVAL) ? MDBX_RESULT_TRUE : ignore_enosys(err);
}

MDBX_MAYBE_UNUSED MDBX_CONST_FUNCTION static inline int ignore_enosys_and_eremote(int err) {
  return (err == MDBX_EREMOTE) ? MDBX_RESULT_TRUE : ignore_enosys(err);
}

#endif /* defined(_WIN32) || defined(_WIN64) */

static inline int check_env(const MDBX_env *env, const bool wanna_active) {
  if (unlikely(!env))
    return MDBX_EINVAL;

  if (unlikely(env->signature.weak != env_signature))
    return MDBX_EBADSIGN;

  if (unlikely(env->flags & ENV_FATAL_ERROR))
    return MDBX_PANIC;

  if (wanna_active) {
#if MDBX_ENV_CHECKPID
    if (unlikely(env->pid != osal_getpid()) && env->pid) {
      ((MDBX_env *)env)->flags |= ENV_FATAL_ERROR;
      return MDBX_PANIC;
    }
#endif /* MDBX_ENV_CHECKPID */
    if (unlikely((env->flags & ENV_ACTIVE) == 0))
      return MDBX_EPERM;
    eASSERT(env, env->dxb_mmap.base != nullptr);
  }

  return MDBX_SUCCESS;
}

static __always_inline int check_txn(const MDBX_txn *txn, int bad_bits) {
  if (unlikely(!txn))
    return MDBX_EINVAL;

  if (unlikely(txn->signature != txn_signature))
    return MDBX_EBADSIGN;

  if (bad_bits) {
    if (unlikely(!txn->env->dxb_mmap.base))
      return MDBX_EPERM;

    if (unlikely(txn->flags & bad_bits)) {
      if ((bad_bits & MDBX_TXN_RDONLY) && unlikely(txn->flags & MDBX_TXN_RDONLY))
        return MDBX_EACCESS;
      if ((bad_bits & MDBX_TXN_PARKED) == 0)
        return MDBX_BAD_TXN;
      return txn_check_badbits_parked(txn, bad_bits);
    }
  }

  tASSERT(txn, (txn->flags & MDBX_TXN_FINISHED) ||
                   (txn->flags & MDBX_NOSTICKYTHREADS) == (txn->env->flags & MDBX_NOSTICKYTHREADS));
#if MDBX_TXN_CHECKOWNER
  if ((txn->flags & (MDBX_NOSTICKYTHREADS | MDBX_TXN_FINISHED)) != MDBX_NOSTICKYTHREADS &&
      !(bad_bits /* abort/reset/txn-break */ == 0 &&
        ((txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED)) == (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED))) &&
      unlikely(txn->owner != osal_thread_self()))
    return txn->owner ? MDBX_THREAD_MISMATCH : MDBX_BAD_TXN;
#endif /* MDBX_TXN_CHECKOWNER */

  return MDBX_SUCCESS;
}

static inline int check_txn_rw(const MDBX_txn *txn, int bad_bits) {
  return check_txn(txn, (bad_bits | MDBX_TXN_RDONLY) & ~MDBX_TXN_PARKED);
}

/*----------------------------------------------------------------------------*/

MDBX_INTERNAL void mincore_clean_cache(const MDBX_env *const env);

MDBX_INTERNAL void update_mlcnt(const MDBX_env *env, const pgno_t new_aligned_mlocked_pgno,
                                const bool lock_not_release);

MDBX_INTERNAL void munlock_after(const MDBX_env *env, const pgno_t aligned_pgno, const size_t end_bytes);

MDBX_INTERNAL void munlock_all(const MDBX_env *env);

/*----------------------------------------------------------------------------*/
/* Cache coherence and mmap invalidation */
#ifndef MDBX_CPU_WRITEBACK_INCOHERENT
#error "The MDBX_CPU_WRITEBACK_INCOHERENT must be defined before"
#elif MDBX_CPU_WRITEBACK_INCOHERENT
#define osal_flush_incoherent_cpu_writeback() osal_memory_barrier()
#else
#define osal_flush_incoherent_cpu_writeback() osal_compiler_barrier()
#endif /* MDBX_CPU_WRITEBACK_INCOHERENT */

MDBX_MAYBE_UNUSED static inline void osal_flush_incoherent_mmap(const void *addr, size_t nbytes,
                                                                const intptr_t pagesize) {
#ifndef MDBX_MMAP_INCOHERENT_FILE_WRITE
#error "The MDBX_MMAP_INCOHERENT_FILE_WRITE must be defined before"
#elif MDBX_MMAP_INCOHERENT_FILE_WRITE
  char *const begin = (char *)(-pagesize & (intptr_t)addr);
  char *const end = (char *)(-pagesize & (intptr_t)((char *)addr + nbytes + pagesize - 1));
  int err = msync(begin, end - begin, MS_SYNC | MS_INVALIDATE) ? errno : 0;
  eASSERT(nullptr, err == 0);
  (void)err;
#else
  (void)pagesize;
#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */

#ifndef MDBX_MMAP_INCOHERENT_CPU_CACHE
#error "The MDBX_MMAP_INCOHERENT_CPU_CACHE must be defined before"
#elif MDBX_MMAP_INCOHERENT_CPU_CACHE
#ifdef DCACHE
  /* MIPS has cache coherency issues.
   * Note: for any nbytes >= on-chip cache size, entire is flushed. */
  cacheflush((void *)addr, nbytes, DCACHE);
#else
#error "Oops, cacheflush() not available"
#endif /* DCACHE */
#endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */

#if !MDBX_MMAP_INCOHERENT_FILE_WRITE && !MDBX_MMAP_INCOHERENT_CPU_CACHE
  (void)addr;
  (void)nbytes;
#endif
}

/* Состояние курсора.
 *
 * плохой/poor:
 *  - неустановленный курсор с незаполненым стеком;
 *  - следует пропускать во всех циклах отслеживания/корректировки
 *    позиций курсоров;
 *  - допускаются только операции предполагающие установку абсолютной позиции;
 *  - в остальных случаях возвращается ENODATA.
 *
 *    У таких курсоров top = -1 и flags < 0, что позволяет дешево проверять и
 *    пропускать такие курсоры в циклах отслеживания/корректировки по условию
 *    probe_cursor->top < this_cursor->top.
 *
 * пустой/hollow:
 *  - частично инициализированный курсор, но без доступной пользователю позиции,
 *    поэтому нельзя выполнить какую-либо операцию без абсолютного (не
 *    относительного) позиционирования;
 *  - ki[top] может быть некорректным, в том числе >= page_numkeys(pg[top]).
 *
 *    У таких курсоров top >= 0, но flags < 0 (есть флажок z_hollow).
 *
 * установленный/pointed:
 *  - полностью инициализированный курсор с конкретной позицией с данными;
 *  - можно прочитать текущую строку, удалить её, либо выполнить
 *    относительное перемещение;
 *  - может иметь флажки z_after_delete, z_eof_hard и z_eof_soft;
 *  - наличие z_eof_soft означает что курсор перемещен за пределы данных,
 *    поэтому нелья прочитать текущие данные, либо удалить их.
 *
 *    У таких курсоров top >= 0 и flags >= 0 (нет флажка z_hollow).
 *
 * наполненный данными/filled:
 *  - это установленный/pointed курсор без флагов z_eof_soft;
 *  - за курсором есть даные, возможны CRUD операции в текущей позиции.
 *
 *    У таких курсоров top >= 0 и (unsigned)flags < z_eof_soft.
 *
 * Изменения состояния.
 *
 *  - Сбрасывается состояние курсора посредством top_and_flags |= z_poor_mark,
 *    что равносильно top = -1 вместе с flags |= z_poor_mark;
 *  - При позиционировании курсора сначала устанавливается top, а flags
 *    только в самом конце при отсутстви ошибок.
 *  - Повторное позиционирование first/last может начинаться
 *    с установки/обнуления только top без сброса flags, что позволяет работать
 *    быстрому пути внутри tree_search_finalize().
 *
 *  - Заморочки с концом данных:
 *     - mdbx_cursor_get(NEXT) выполняет две операции (перемещение и чтение),
 *       поэтому перемещение на последнюю строку строку всегда успешно,
 *       а ошибка возвращается только при последующем next().
 *       Однако, из-за этой двойственности семантика ситуации возврата ошибки
 *       из mdbx_cursor_get(NEXT) допускает разночтение/неопределенность, ибо
 *       не понятно к чему относится ошибка:
 *        - Если к чтению данных, то курсор перемещен и стоит после последней
 *          строки. Соответственно, чтение в текущей позиции запрещено,
 *          а при выполнении prev() курсор вернется на последнюю строку;
 *        - Если же ошибка относится к перемещению, то курсор не перемещен и
 *          остается на последней строке. Соответственно, чтение в текущей
 *          позиции допустимо, а при выполнении prev() курсор встанет
 *          на пред-последнюю строку.
 *        - Пикантность в том, что пользователи (так или иначе) полагаются
 *          на оба варианта поведения, при этом конечно ожидают что после
 *          ошибки MDBX_NEXT функция mdbx_cursor_eof() будет возвращать true.
 *     - далее добавляется схожая ситуация с MDBX_GET_RANGE, MDBX_LOWERBOUND,
 *       MDBX_GET_BOTH_RANGE и MDBX_UPPERBOUND. Тут при неуспехе поиска курсор
 *       может/должен стоять после последней строки.
 *     - далее добавляется MDBX_LAST. Тут курсор должен стоять на последней
 *       строке и допускать чтение в текузщей позиции,
 *       но mdbx_cursor_eof() должен возвращать true.
 *
 *    Решение = делаем два флажка z_eof_soft и z_eof_hard:
 *     - Когда установлен только z_eof_soft,
 *       функция mdbx_cursor_eof() возвращает true, но допускается
 *       чтение данных в текущей позиции, а prev() передвигает курсор
 *       на пред-последнюю строку.
 *     - Когда установлен z_eof_hard, чтение данных в текущей позиции
 *       не допускается, и mdbx_cursor_eof() также возвращает true,
 *       а prev() устанавливает курсора на последюю строку. */
enum cursor_state {
  /* Это вложенный курсор для вложенного дерева/страницы и является
     inner-элементом struct cursor_couple. */
  z_inner = 0x01,

  /* Происходит подготовка к обновлению GC,
     поэтому можно брать страницы из GC даже для FREE_DBI. */
  z_gcu_preparation = 0x02,

  /* Курсор только-что создан, поэтому допускается авто-установка
     в начало/конец, вместо возврата ошибки. */
  z_fresh = 0x04,

  /* Предыдущей операцией было удаление, поэтому курсор уже физически указывает
     на следующий элемент и соответствующая операция перемещения должна
     игнорироваться. */
  z_after_delete = 0x08,

  /* */
  z_disable_tree_search_fastpath = 0x10,

  /* Курсор логически в конце данных, но физически на последней строке,
   * ki[top] == page_numkeys(pg[top]) - 1 и читать данные в текущей позиции. */
  z_eof_soft = 0x20,

  /* Курсор логически за концом данных, поэтому следующий переход "назад"
     должен игнорироваться и/или приводить к установке на последнюю строку.
     В текущем же состоянии нельзя делать CRUD операции. */
  z_eof_hard = 0x40,

  /* За курсором нет данных, логически его позиция не определена,
     нельзя делать CRUD операции в текущей позиции.
     Относительное перемещение запрещено. */
  z_hollow = -128 /* 0x80 */,

  /* Маски для сброса/установки состояния. */
  z_clear_mask = z_inner | z_gcu_preparation,
  z_poor_mark = z_eof_hard | z_hollow | z_disable_tree_search_fastpath,
  z_fresh_mark = z_poor_mark | z_fresh
};

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_inner(const MDBX_cursor *mc) {
  return (mc->flags & z_inner) != 0;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_poor(const MDBX_cursor *mc) {
  const bool r = mc->top < 0;
  cASSERT(mc, r == (mc->top_and_flags < 0));
  if (r && mc->subcur)
    cASSERT(mc, mc->subcur->cursor.flags < 0 && mc->subcur->cursor.top < 0);
  return r;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_pointed(const MDBX_cursor *mc) {
  const bool r = mc->top >= 0;
  cASSERT(mc, r == (mc->top_and_flags >= 0));
  if (!r && mc->subcur)
    cASSERT(mc, is_poor(&mc->subcur->cursor));
  return r;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_hollow(const MDBX_cursor *mc) {
  const bool r = mc->flags < 0;
  if (!r) {
    cASSERT(mc, mc->top >= 0);
    cASSERT(mc, (mc->flags & z_eof_hard) || mc->ki[mc->top] < page_numkeys(mc->pg[mc->top]));
  } else if (mc->subcur)
    cASSERT(mc, is_poor(&mc->subcur->cursor));
  return r;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_eof(const MDBX_cursor *mc) {
  const bool r = z_eof_soft <= (uint8_t)mc->flags;
  return r;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_filled(const MDBX_cursor *mc) {
  const bool r = z_eof_hard > (uint8_t)mc->flags;
  return r;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool inner_filled(const MDBX_cursor *mc) {
  return mc->subcur && is_filled(&mc->subcur->cursor);
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool inner_pointed(const MDBX_cursor *mc) {
  return mc->subcur && is_pointed(&mc->subcur->cursor);
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool inner_hollow(const MDBX_cursor *mc) {
  const bool r = !mc->subcur || is_hollow(&mc->subcur->cursor);
#if MDBX_DEBUG || MDBX_FORCE_ASSERTIONS
  if (!r) {
    cASSERT(mc, is_filled(mc));
    const page_t *mp = mc->pg[mc->top];
    const node_t *node = page_node(mp, mc->ki[mc->top]);
    cASSERT(mc, node_flags(node) & N_DUP);
  }
#endif /* MDBX_DEBUG || MDBX_FORCE_ASSERTIONS */
  return r;
}

MDBX_MAYBE_UNUSED static inline void inner_gone(MDBX_cursor *mc) {
  if (mc->subcur) {
    TRACE("reset inner cursor %p", __Wpedantic_format_voidptr(&mc->subcur->cursor));
    mc->subcur->nested_tree.root = 0;
    mc->subcur->cursor.top_and_flags = z_inner | z_poor_mark;
  }
}

MDBX_MAYBE_UNUSED static inline void be_poor(MDBX_cursor *mc) {
  const bool inner = is_inner(mc);
  if (inner) {
    mc->tree->root = 0;
    mc->top_and_flags = z_inner | z_poor_mark;
  } else {
    mc->top_and_flags |= z_poor_mark;
    inner_gone(mc);
  }
  cASSERT(mc, is_poor(mc) && !is_pointed(mc) && !is_filled(mc));
  cASSERT(mc, inner == is_inner(mc));
}

MDBX_MAYBE_UNUSED static inline void be_filled(MDBX_cursor *mc) {
  cASSERT(mc, mc->top >= 0);
  cASSERT(mc, mc->ki[mc->top] < page_numkeys(mc->pg[mc->top]));
  const bool inner = is_inner(mc);
  mc->flags &= z_clear_mask;
  cASSERT(mc, is_filled(mc));
  cASSERT(mc, inner == is_inner(mc));
}

MDBX_MAYBE_UNUSED static inline bool is_related(const MDBX_cursor *base, const MDBX_cursor *scan) {
  cASSERT(base, base->top >= 0);
  return base->top <= scan->top && base != scan;
}

/* Флаги контроля/проверки курсора. */
enum cursor_checking {
  z_branch = 0x01 /* same as P_BRANCH for check_leaf_type() */,
  z_leaf = 0x02 /* same as P_LEAF for check_leaf_type() */,
  z_largepage = 0x04 /* same as P_LARGE for check_leaf_type() */,
  z_updating = 0x08 /* update/rebalance pending */,
  z_ignord = 0x10 /* don't check keys ordering */,
  z_dupfix = 0x20 /* same as P_DUPFIX for check_leaf_type() */,
  z_retiring = 0x40 /* refs to child pages may be invalid */,
  z_pagecheck = 0x80 /* perform page checking, see MDBX_VALIDATION */
};

MDBX_INTERNAL int __must_check_result cursor_validate(const MDBX_cursor *mc);

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline size_t cursor_dbi(const MDBX_cursor *mc) {
  cASSERT(mc, mc->txn->signature == txn_signature);
  size_t dbi = mc->dbi_state - mc->txn->dbi_state;
  cASSERT(mc, dbi < mc->txn->env->n_dbi);
  return dbi;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool cursor_dbi_changed(const MDBX_cursor *mc) {
  return dbi_changed(mc->txn, cursor_dbi(mc));
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline uint8_t *cursor_dbi_state(const MDBX_cursor *mc) {
  return mc->dbi_state;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool cursor_is_gc(const MDBX_cursor *mc) {
  return mc->dbi_state == mc->txn->dbi_state + FREE_DBI;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool cursor_is_main(const MDBX_cursor *mc) {
  return mc->dbi_state == mc->txn->dbi_state + MAIN_DBI;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool cursor_is_core(const MDBX_cursor *mc) {
  return mc->dbi_state < mc->txn->dbi_state + CORE_DBS;
}

MDBX_MAYBE_UNUSED static inline int cursor_dbi_dbg(const MDBX_cursor *mc) {
  /* Debugging output value of a cursor's DBI: Negative for a sub-cursor. */
  const int dbi = cursor_dbi(mc);
  return (mc->flags & z_inner) ? -dbi : dbi;
}

MDBX_MAYBE_UNUSED static inline int __must_check_result cursor_push(MDBX_cursor *mc, page_t *mp, indx_t ki) {
  TRACE("pushing page %" PRIaPGNO " on db %d cursor %p", mp->pgno, cursor_dbi_dbg(mc), __Wpedantic_format_voidptr(mc));
  if (unlikely(mc->top >= CURSOR_STACK_SIZE - 1)) {
    be_poor(mc);
    mc->txn->flags |= MDBX_TXN_ERROR;
    return MDBX_CURSOR_FULL;
  }
  mc->top += 1;
  mc->pg[mc->top] = mp;
  mc->ki[mc->top] = ki;
  return MDBX_SUCCESS;
}

MDBX_MAYBE_UNUSED static inline void cursor_pop(MDBX_cursor *mc) {
  TRACE("popped page %" PRIaPGNO " off db %d cursor %p", mc->pg[mc->top]->pgno, cursor_dbi_dbg(mc),
        __Wpedantic_format_voidptr(mc));
  cASSERT(mc, mc->top >= 0);
  mc->top -= 1;
}

MDBX_NOTHROW_PURE_FUNCTION static inline bool check_leaf_type(const MDBX_cursor *mc, const page_t *mp) {
  return (((page_type(mp) ^ mc->checking) & (z_branch | z_leaf | z_largepage | z_dupfix)) == 0);
}

MDBX_INTERNAL int cursor_check(const MDBX_cursor *mc, int txn_bad_bits);

/* без необходимости доступа к данным, без активации припаркованных транзакций. */
static inline int cursor_check_pure(const MDBX_cursor *mc) {
  return cursor_check(mc, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED);
}

/* для чтения данных, с активацией припаркованных транзакций. */
static inline int cursor_check_ro(const MDBX_cursor *mc) { return cursor_check(mc, MDBX_TXN_BLOCKED); }

/* для записи данных. */
static inline int cursor_check_rw(const MDBX_cursor *mc) {
  return cursor_check(mc, (MDBX_TXN_BLOCKED - MDBX_TXN_PARKED) | MDBX_TXN_RDONLY);
}

MDBX_INTERNAL MDBX_cursor *cursor_eot(MDBX_cursor *mc, MDBX_txn *txn, const bool merge);
MDBX_INTERNAL int cursor_shadow(MDBX_cursor *mc, MDBX_txn *nested, const size_t dbi);

MDBX_INTERNAL MDBX_cursor *cursor_cpstk(const MDBX_cursor *csrc, MDBX_cursor *cdst);

MDBX_INTERNAL int __must_check_result cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
                                                 const MDBX_cursor_op op);

MDBX_INTERNAL int __must_check_result cursor_check_multiple(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
                                                            unsigned flags);

MDBX_INTERNAL int __must_check_result cursor_put_checklen(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
                                                          unsigned flags);

MDBX_INTERNAL int __must_check_result cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags);

MDBX_INTERNAL int __must_check_result cursor_validate_updating(MDBX_cursor *mc);

MDBX_INTERNAL int __must_check_result cursor_del(MDBX_cursor *mc, unsigned flags);

MDBX_INTERNAL int __must_check_result cursor_sibling_left(MDBX_cursor *mc);
MDBX_INTERNAL int __must_check_result cursor_sibling_right(MDBX_cursor *mc);

typedef struct cursor_set_result {
  int err;
  bool exact;
} csr_t;

MDBX_INTERNAL csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op);

MDBX_INTERNAL int __must_check_result inner_first(MDBX_cursor *__restrict mc, MDBX_val *__restrict data);
MDBX_INTERNAL int __must_check_result inner_last(MDBX_cursor *__restrict mc, MDBX_val *__restrict data);
MDBX_INTERNAL int __must_check_result outer_first(MDBX_cursor *__restrict mc, MDBX_val *__restrict key,
                                                  MDBX_val *__restrict data);
MDBX_INTERNAL int __must_check_result outer_last(MDBX_cursor *__restrict mc, MDBX_val *__restrict key,
                                                 MDBX_val *__restrict data);

MDBX_INTERNAL int __must_check_result inner_next(MDBX_cursor *__restrict mc, MDBX_val *__restrict data);
MDBX_INTERNAL int __must_check_result inner_prev(MDBX_cursor *__restrict mc, MDBX_val *__restrict data);
MDBX_INTERNAL int __must_check_result outer_next(MDBX_cursor *__restrict mc, MDBX_val *__restrict key,
                                                 MDBX_val *__restrict data, MDBX_cursor_op op);
MDBX_INTERNAL int __must_check_result outer_prev(MDBX_cursor *__restrict mc, MDBX_val *__restrict key,
                                                 MDBX_val *__restrict data, MDBX_cursor_op op);

MDBX_INTERNAL int cursor_init4walk(cursor_couple_t *couple, const MDBX_txn *const txn, tree_t *const tree,
                                   kvx_t *const kvx);

MDBX_INTERNAL int __must_check_result cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi);

MDBX_INTERNAL int __must_check_result cursor_dupsort_setup(MDBX_cursor *mc, const node_t *node, const page_t *mp);

MDBX_INTERNAL int __must_check_result cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, const MDBX_val *data);

/*----------------------------------------------------------------------------*/

/* Update sub-page pointer, if any, in mc->subcur.
 * Needed when the node which contains the sub-page may have moved.
 * Called with mp = mc->pg[mc->top], ki = mc->ki[mc->top]. */
MDBX_MAYBE_UNUSED static inline void cursor_inner_refresh(const MDBX_cursor *mc, const page_t *mp, unsigned ki) {
  cASSERT(mc, is_leaf(mp));
  const node_t *node = page_node(mp, ki);
  if ((node_flags(node) & (N_DUP | N_TREE)) == N_DUP)
    mc->subcur->cursor.pg[0] = node_data(node);
}

MDBX_MAYBE_UNUSED MDBX_INTERNAL bool cursor_is_tracked(const MDBX_cursor *mc);

static inline void cursor_reset(cursor_couple_t *couple) {
  couple->outer.top_and_flags = z_fresh_mark;
  couple->inner.cursor.top_and_flags = z_fresh_mark | z_inner;
}

static inline void cursor_drown(cursor_couple_t *couple) {
  couple->outer.top_and_flags = z_poor_mark;
  couple->inner.cursor.top_and_flags = z_poor_mark | z_inner;
  couple->outer.txn = nullptr;
  couple->inner.cursor.txn = nullptr;
  couple->outer.tree = nullptr;
  /* сохраняем clc-указатель, так он используется для вычисления dbi в mdbx_cursor_renew(). */
  couple->outer.dbi_state = nullptr;
  couple->inner.cursor.dbi_state = nullptr;
}

static inline size_t dpl_setlen(dpl_t *dl, size_t len) {
  static const page_t dpl_stub_pageE = {INVALID_TXNID,
                                        0,
                                        P_BAD,
                                        {0},
                                        /* pgno */ ~(pgno_t)0};
  assert(dpl_stub_pageE.flags == P_BAD && dpl_stub_pageE.pgno == P_INVALID);
  dl->length = len;
  dl->items[len + 1].ptr = (page_t *)&dpl_stub_pageE;
  dl->items[len + 1].pgno = P_INVALID;
  dl->items[len + 1].npages = 1;
  return len;
}

static inline void dpl_clear(dpl_t *dl) {
  static const page_t dpl_stub_pageB = {INVALID_TXNID,
                                        0,
                                        P_BAD,
                                        {0},
                                        /* pgno */ 0};
  assert(dpl_stub_pageB.flags == P_BAD && dpl_stub_pageB.pgno == 0);
  dl->sorted = dpl_setlen(dl, 0);
  dl->pages_including_loose = 0;
  dl->items[0].ptr = (page_t *)&dpl_stub_pageB;
  dl->items[0].pgno = 0;
  dl->items[0].npages = 1;
  assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
}

MDBX_INTERNAL int __must_check_result dpl_alloc(MDBX_txn *txn);

MDBX_INTERNAL void dpl_free(MDBX_txn *txn);

MDBX_INTERNAL dpl_t *dpl_reserve(MDBX_txn *txn, size_t size);

MDBX_INTERNAL __noinline dpl_t *dpl_sort_slowpath(const MDBX_txn *txn);

static inline dpl_t *dpl_sort(const MDBX_txn *txn) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);

  dpl_t *dl = txn->tw.dirtylist;
  tASSERT(txn, dl->length <= PAGELIST_LIMIT);
  tASSERT(txn, dl->sorted <= dl->length);
  tASSERT(txn, dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
  return likely(dl->sorted == dl->length) ? dl : dpl_sort_slowpath(txn);
}

MDBX_INTERNAL __noinline size_t dpl_search(const MDBX_txn *txn, pgno_t pgno);

MDBX_MAYBE_UNUSED MDBX_INTERNAL const page_t *debug_dpl_find(const MDBX_txn *txn, const pgno_t pgno);

MDBX_NOTHROW_PURE_FUNCTION static inline unsigned dpl_npages(const dpl_t *dl, size_t i) {
  assert(0 <= (intptr_t)i && i <= dl->length);
  unsigned n = dl->items[i].npages;
  assert(n == (is_largepage(dl->items[i].ptr) ? dl->items[i].ptr->pages : 1));
  return n;
}

MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t dpl_endpgno(const dpl_t *dl, size_t i) {
  return dpl_npages(dl, i) + dl->items[i].pgno;
}

static inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno, size_t npages) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);

  dpl_t *dl = txn->tw.dirtylist;
  tASSERT(txn, dl->sorted == dl->length);
  tASSERT(txn, dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
  size_t const n = dpl_search(txn, pgno);
  tASSERT(txn, n >= 1 && n <= dl->length + 1);
  tASSERT(txn, pgno <= dl->items[n].pgno);
  tASSERT(txn, pgno > dl->items[n - 1].pgno);
  const bool rc =
      /* intersection with founded */ pgno + npages > dl->items[n].pgno ||
      /* intersection with prev */ dpl_endpgno(dl, n - 1) > pgno;
  if (ASSERT_ENABLED()) {
    bool check = false;
    for (size_t i = 1; i <= dl->length; ++i) {
      const page_t *const dp = dl->items[i].ptr;
      if (!(dp->pgno /* begin */ >= /* end */ pgno + npages || dpl_endpgno(dl, i) /* end */ <= /* begin */ pgno))
        check |= true;
    }
    tASSERT(txn, check == rc);
  }
  return rc;
}

MDBX_NOTHROW_PURE_FUNCTION static inline size_t dpl_exist(const MDBX_txn *txn, pgno_t pgno) {
  tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
  dpl_t *dl = txn->tw.dirtylist;
  size_t i = dpl_search(txn, pgno);
  tASSERT(txn, (int)i > 0);
  return (dl->items[i].pgno == pgno) ? i : 0;
}

MDBX_INTERNAL void dpl_remove_ex(const MDBX_txn *txn, size_t i, size_t npages);

static inline void dpl_remove(const MDBX_txn *txn, size_t i) {
  dpl_remove_ex(txn, i, dpl_npages(txn->tw.dirtylist, i));
}

MDBX_INTERNAL int __must_check_result dpl_append(MDBX_txn *txn, pgno_t pgno, page_t *page, size_t npages);

MDBX_MAYBE_UNUSED MDBX_INTERNAL bool dpl_check(MDBX_txn *txn);

MDBX_NOTHROW_PURE_FUNCTION static inline uint32_t dpl_age(const MDBX_txn *txn, size_t i) {
  tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0);
  const dpl_t *dl = txn->tw.dirtylist;
  assert((intptr_t)i > 0 && i <= dl->length);
  size_t *const ptr = ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t));
  return txn->tw.dirtylru - (uint32_t)*ptr;
}

MDBX_INTERNAL void dpl_lru_reduce(MDBX_txn *txn);

static inline uint32_t dpl_lru_turn(MDBX_txn *txn) {
  txn->tw.dirtylru += 1;
  if (unlikely(txn->tw.dirtylru > UINT32_MAX / 3) && (txn->flags & MDBX_WRITEMAP) == 0)
    dpl_lru_reduce(txn);
  return txn->tw.dirtylru;
}

MDBX_INTERNAL void dpl_sift(MDBX_txn *const txn, pnl_t pl, const bool spilled);

MDBX_INTERNAL void dpl_release_shadows(MDBX_txn *txn);

typedef struct gc_update_context {
  unsigned loop;
  pgno_t prev_first_unallocated;
  bool dense;
  size_t reserve_adj;
  size_t retired_stored;
  size_t amount, reserved, cleaned_slot, reused_slot, fill_idx;
  txnid_t cleaned_id, rid;
#if MDBX_ENABLE_BIGFOOT
  txnid_t bigfoot;
#endif /* MDBX_ENABLE_BIGFOOT */
  union {
    MDBX_cursor cursor;
    cursor_couple_t couple;
  };
} gcu_t;

static inline int gc_update_init(MDBX_txn *txn, gcu_t *ctx) {
  memset(ctx, 0, offsetof(gcu_t, cursor));
  ctx->dense = txn->txnid <= MIN_TXNID;
#if MDBX_ENABLE_BIGFOOT
  ctx->bigfoot = txn->txnid;
#endif /* MDBX_ENABLE_BIGFOOT */
  return cursor_init(&ctx->cursor, txn, FREE_DBI);
}

#define ALLOC_DEFAULT 0
#define ALLOC_RESERVE 1
#define ALLOC_UNIMPORTANT 2
MDBX_INTERNAL pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags);

MDBX_INTERNAL pgr_t gc_alloc_single(const MDBX_cursor *const mc);
MDBX_INTERNAL int gc_update(MDBX_txn *txn, gcu_t *ctx);

MDBX_INTERNAL int lck_setup(MDBX_env *env, mdbx_mode_t mode);
#if MDBX_LOCKING > MDBX_LOCKING_SYSV
MDBX_INTERNAL int lck_ipclock_stubinit(osal_ipclock_t *ipc);
MDBX_INTERNAL int lck_ipclock_destroy(osal_ipclock_t *ipc);
#endif /* MDBX_LOCKING > MDBX_LOCKING_SYSV */

MDBX_INTERNAL int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, int global_uniqueness_flag);

MDBX_INTERNAL int lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor, const uint32_t current_pid);

MDBX_INTERNAL int lck_seize(MDBX_env *env);

MDBX_INTERNAL int lck_downgrade(MDBX_env *env);

MDBX_MAYBE_UNUSED MDBX_INTERNAL int lck_upgrade(MDBX_env *env, bool dont_wait);

MDBX_INTERNAL int lck_rdt_lock(MDBX_env *env);

MDBX_INTERNAL void lck_rdt_unlock(MDBX_env *env);

MDBX_INTERNAL int lck_txn_lock(MDBX_env *env, bool dont_wait);

MDBX_INTERNAL void lck_txn_unlock(MDBX_env *env);

MDBX_INTERNAL int lck_rpid_set(MDBX_env *env);

MDBX_INTERNAL int lck_rpid_clear(MDBX_env *env);

MDBX_INTERNAL int lck_rpid_check(MDBX_env *env, uint32_t pid);

static inline uint64_t meta_sign_calculate(const meta_t *meta) {
  uint64_t sign = DATASIGN_NONE;
#if 0 /* TODO */
  sign = hippeus_hash64(...);
#else
  (void)meta;
#endif
  /* LY: newer returns DATASIGN_NONE or DATASIGN_WEAK */
  return (sign > DATASIGN_WEAK) ? sign : ~sign;
}

static inline uint64_t meta_sign_get(const volatile meta_t *meta) { return unaligned_peek_u64_volatile(4, meta->sign); }

static inline void meta_sign_as_steady(meta_t *meta) { unaligned_poke_u64(4, meta->sign, meta_sign_calculate(meta)); }

static inline bool meta_is_steady(const volatile meta_t *meta) { return SIGN_IS_STEADY(meta_sign_get(meta)); }

MDBX_INTERNAL troika_t meta_tap(const MDBX_env *env);
MDBX_INTERNAL unsigned meta_eq_mask(const troika_t *troika);
MDBX_INTERNAL bool meta_should_retry(const MDBX_env *env, troika_t *troika);
MDBX_MAYBE_UNUSED MDBX_INTERNAL bool troika_verify_fsm(void);

struct meta_ptr {
  txnid_t txnid;
  union {
    const volatile meta_t *ptr_v;
    const meta_t *ptr_c;
  };
  size_t is_steady;
};

MDBX_INTERNAL meta_ptr_t meta_ptr(const MDBX_env *env, unsigned n);
MDBX_INTERNAL txnid_t meta_txnid(const volatile meta_t *meta);
MDBX_INTERNAL txnid_t recent_committed_txnid(const MDBX_env *env);
MDBX_INTERNAL int meta_sync(const MDBX_env *env, const meta_ptr_t head);

MDBX_INTERNAL const char *durable_caption(const meta_t *const meta);
MDBX_INTERNAL void meta_troika_dump(const MDBX_env *env, const troika_t *troika);

#define METAPAGE(env, n) page_meta(pgno2page(env, n))
#define METAPAGE_END(env) METAPAGE(env, NUM_METAS)

static inline meta_ptr_t meta_recent(const MDBX_env *env, const troika_t *troika) {
  meta_ptr_t r;
  r.txnid = troika->txnid[troika->recent];
  r.ptr_v = METAPAGE(env, troika->recent);
  r.is_steady = (troika->fsm >> troika->recent) & 1;
  return r;
}

static inline meta_ptr_t meta_prefer_steady(const MDBX_env *env, const troika_t *troika) {
  meta_ptr_t r;
  r.txnid = troika->txnid[troika->prefer_steady];
  r.ptr_v = METAPAGE(env, troika->prefer_steady);
  r.is_steady = (troika->fsm >> troika->prefer_steady) & 1;
  return r;
}

static inline meta_ptr_t meta_tail(const MDBX_env *env, const troika_t *troika) {
  const uint8_t tail = troika->tail_and_flags & 3;
  MDBX_ANALYSIS_ASSUME(tail < NUM_METAS);
  meta_ptr_t r;
  r.txnid = troika->txnid[tail];
  r.ptr_v = METAPAGE(env, tail);
  r.is_steady = (troika->fsm >> tail) & 1;
  return r;
}

static inline bool meta_is_used(const troika_t *troika, unsigned n) {
  return n == troika->recent || n == troika->prefer_steady;
}

static inline bool meta_bootid_match(const meta_t *meta) {

  return memcmp(&meta->bootid, &globals.bootid, 16) == 0 && (globals.bootid.x | globals.bootid.y) != 0;
}

static inline bool meta_weak_acceptable(const MDBX_env *env, const meta_t *meta, const int lck_exclusive) {
  return lck_exclusive
             ? /* exclusive lock */ meta_bootid_match(meta)
             : /* db already opened */ env->lck_mmap.lck && (env->lck_mmap.lck->envmode.weak & MDBX_RDONLY) == 0;
}

MDBX_NOTHROW_PURE_FUNCTION static inline txnid_t constmeta_txnid(const meta_t *meta) {
  const txnid_t a = unaligned_peek_u64(4, &meta->txnid_a);
  const txnid_t b = unaligned_peek_u64(4, &meta->txnid_b);
  return likely(a == b) ? a : 0;
}

static inline void meta_update_begin(const MDBX_env *env, meta_t *meta, txnid_t txnid) {
  eASSERT(env, meta >= METAPAGE(env, 0) && meta < METAPAGE_END(env));
  eASSERT(env, unaligned_peek_u64(4, meta->txnid_a) < txnid && unaligned_peek_u64(4, meta->txnid_b) < txnid);
  (void)env;
#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && MDBX_UNALIGNED_OK >= 8
  atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_b, 0, mo_AcquireRelease);
  atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_a, txnid, mo_AcquireRelease);
#else
  atomic_store32(&meta->txnid_b[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], 0, mo_AcquireRelease);
  atomic_store32(&meta->txnid_b[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], 0, mo_AcquireRelease);
  atomic_store32(&meta->txnid_a[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], (uint32_t)txnid, mo_AcquireRelease);
  atomic_store32(&meta->txnid_a[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], (uint32_t)(txnid >> 32), mo_AcquireRelease);
#endif
}

static inline void meta_update_end(const MDBX_env *env, meta_t *meta, txnid_t txnid) {
  eASSERT(env, meta >= METAPAGE(env, 0) && meta < METAPAGE_END(env));
  eASSERT(env, unaligned_peek_u64(4, meta->txnid_a) == txnid);
  eASSERT(env, unaligned_peek_u64(4, meta->txnid_b) < txnid);
  (void)env;
  jitter4testing(true);
  memcpy(&meta->bootid, &globals.bootid, 16);
#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && MDBX_UNALIGNED_OK >= 8
  atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_b, txnid, mo_AcquireRelease);
#else
  atomic_store32(&meta->txnid_b[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], (uint32_t)txnid, mo_AcquireRelease);
  atomic_store32(&meta->txnid_b[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], (uint32_t)(txnid >> 32), mo_AcquireRelease);
#endif
}

static inline void meta_set_txnid(const MDBX_env *env, meta_t *meta, const txnid_t txnid) {
  eASSERT(env, !env->dxb_mmap.base || meta < METAPAGE(env, 0) || meta >= METAPAGE_END(env));
  (void)env;
  /* update inconsistently since this function used ONLY for filling meta-image
   * for writing, but not the actual meta-page */
  memcpy(&meta->bootid, &globals.bootid, 16);
  unaligned_poke_u64(4, meta->txnid_a, txnid);
  unaligned_poke_u64(4, meta->txnid_b, txnid);
}

static inline uint8_t meta_cmp2int(txnid_t a, txnid_t b, uint8_t s) {
  return unlikely(a == b) ? 1 * s : (a > b) ? 2 * s : 0 * s;
}

static inline uint8_t meta_cmp2recent(uint8_t ab_cmp2int, bool a_steady, bool b_steady) {
  assert(ab_cmp2int < 3 /* && a_steady< 2 && b_steady < 2 */);
  return ab_cmp2int > 1 || (ab_cmp2int == 1 && a_steady > b_steady);
}

static inline uint8_t meta_cmp2steady(uint8_t ab_cmp2int, bool a_steady, bool b_steady) {
  assert(ab_cmp2int < 3 /* && a_steady< 2 && b_steady < 2 */);
  return a_steady > b_steady || (a_steady == b_steady && ab_cmp2int > 1);
}

static inline bool meta_choice_recent(txnid_t a_txnid, bool a_steady, txnid_t b_txnid, bool b_steady) {
  return meta_cmp2recent(meta_cmp2int(a_txnid, b_txnid, 1), a_steady, b_steady);
}

static inline bool meta_choice_steady(txnid_t a_txnid, bool a_steady, txnid_t b_txnid, bool b_steady) {
  return meta_cmp2steady(meta_cmp2int(a_txnid, b_txnid, 1), a_steady, b_steady);
}

MDBX_INTERNAL meta_t *meta_init_triplet(const MDBX_env *env, void *buffer);

MDBX_INTERNAL int meta_validate(MDBX_env *env, meta_t *const meta, const page_t *const page, const unsigned meta_number,
                                unsigned *guess_pagesize);

MDBX_INTERNAL int __must_check_result meta_validate_copy(MDBX_env *env, const meta_t *meta, meta_t *dest);

MDBX_INTERNAL int __must_check_result meta_override(MDBX_env *env, size_t target, txnid_t txnid, const meta_t *shape);

MDBX_INTERNAL int meta_wipe_steady(MDBX_env *env, txnid_t inclusive_upto);

#if !(defined(_WIN32) || defined(_WIN64))
#define MDBX_WRITETHROUGH_THRESHOLD_DEFAULT 2
#endif

struct iov_ctx {
  MDBX_env *env;
  osal_ioring_t *ior;
  mdbx_filehandle_t fd;
  int err;
#ifndef MDBX_NEED_WRITTEN_RANGE
#define MDBX_NEED_WRITTEN_RANGE 1
#endif /* MDBX_NEED_WRITTEN_RANGE */
#if MDBX_NEED_WRITTEN_RANGE
  pgno_t flush_begin;
  pgno_t flush_end;
#endif /* MDBX_NEED_WRITTEN_RANGE */
  uint64_t coherency_timestamp;
};

MDBX_INTERNAL __must_check_result int iov_init(MDBX_txn *const txn, iov_ctx_t *ctx, size_t items, size_t npages,
                                               mdbx_filehandle_t fd, bool check_coherence);

static inline bool iov_empty(const iov_ctx_t *ctx) { return osal_ioring_used(ctx->ior) == 0; }

MDBX_INTERNAL __must_check_result int iov_page(MDBX_txn *txn, iov_ctx_t *ctx, page_t *dp, size_t npages);

MDBX_INTERNAL __must_check_result int iov_write(iov_ctx_t *ctx);

MDBX_INTERNAL void spill_remove(MDBX_txn *txn, size_t idx, size_t npages);
MDBX_INTERNAL pnl_t spill_purge(MDBX_txn *txn);
MDBX_INTERNAL int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, const intptr_t wanna_spill_entries,
                                 const intptr_t wanna_spill_npages, const size_t need);
/*----------------------------------------------------------------------------*/

static inline size_t spill_search(const MDBX_txn *txn, pgno_t pgno) {
  tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
  const pnl_t pnl = txn->tw.spilled.list;
  if (likely(!pnl))
    return 0;
  pgno <<= 1;
  size_t n = pnl_search(pnl, pgno, (size_t)MAX_PAGENO + MAX_PAGENO + 1);
  return (n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] == pgno) ? n : 0;
}

static inline bool spill_intersect(const MDBX_txn *txn, pgno_t pgno, size_t npages) {
  const pnl_t pnl = txn->tw.spilled.list;
  if (likely(!pnl))
    return false;
  const size_t len = MDBX_PNL_GETSIZE(pnl);
  if (LOG_ENABLED(MDBX_LOG_EXTRA)) {
    DEBUG_EXTRA("PNL len %zu [", len);
    for (size_t i = 1; i <= len; ++i)
      DEBUG_EXTRA_PRINT(" %li", (pnl[i] & 1) ? -(long)(pnl[i] >> 1) : (long)(pnl[i] >> 1));
    DEBUG_EXTRA_PRINT("%s\n", "]");
  }
  const pgno_t spilled_range_begin = pgno << 1;
  const pgno_t spilled_range_last = ((pgno + (pgno_t)npages) << 1) - 1;
#if MDBX_PNL_ASCENDING
  const size_t n = pnl_search(pnl, spilled_range_begin, (size_t)(MAX_PAGENO + 1) << 1);
  tASSERT(txn, n && (n == MDBX_PNL_GETSIZE(pnl) + 1 || spilled_range_begin <= pnl[n]));
  const bool rc = n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] <= spilled_range_last;
#else
  const size_t n = pnl_search(pnl, spilled_range_last, (size_t)MAX_PAGENO + MAX_PAGENO + 1);
  tASSERT(txn, n && (n == MDBX_PNL_GETSIZE(pnl) + 1 || spilled_range_last >= pnl[n]));
  const bool rc = n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] >= spilled_range_begin;
#endif
  if (ASSERT_ENABLED()) {
    bool check = false;
    for (size_t i = 0; i < npages; ++i)
      check |= spill_search(txn, (pgno_t)(pgno + i)) != 0;
    tASSERT(txn, check == rc);
  }
  return rc;
}

static inline int txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0, const size_t need) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  tASSERT(txn, !m0 || cursor_is_tracked(m0));

  const intptr_t wanna_spill_entries = txn->tw.dirtylist ? (need - txn->tw.dirtyroom - txn->tw.loose_count) : 0;
  const intptr_t wanna_spill_npages =
      need + (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose : txn->tw.writemap_dirty_npages) -
      txn->tw.loose_count - txn->env->options.dp_limit;

  /* production mode */
  if (likely(wanna_spill_npages < 1 && wanna_spill_entries < 1)
#if xMDBX_DEBUG_SPILLING == 1
      /* debug mode: always try to spill if xMDBX_DEBUG_SPILLING == 1 */
      && txn->txnid % 23 > 11
#endif
  )
    return MDBX_SUCCESS;

  return spill_slowpath(txn, m0, wanna_spill_entries, wanna_spill_npages, need);
}

MDBX_INTERNAL int __must_check_result tree_search_finalize(MDBX_cursor *mc, const MDBX_val *key, int flags);
MDBX_INTERNAL int tree_search_lowest(MDBX_cursor *mc);

enum page_search_flags {
  Z_MODIFY = 1,
  Z_ROOTONLY = 2,
  Z_FIRST = 4,
  Z_LAST = 8,
};
MDBX_INTERNAL int __must_check_result tree_search(MDBX_cursor *mc, const MDBX_val *key, int flags);

#define MDBX_SPLIT_REPLACE MDBX_APPENDDUP /* newkey is not new */
MDBX_INTERNAL int __must_check_result page_split(MDBX_cursor *mc, const MDBX_val *const newkey, MDBX_val *const newdata,
                                                 pgno_t newpgno, const unsigned naf);

/*----------------------------------------------------------------------------*/

MDBX_INTERNAL int MDBX_PRINTF_ARGS(2, 3) bad_page(const page_t *mp, const char *fmt, ...);

MDBX_INTERNAL void MDBX_PRINTF_ARGS(2, 3) poor_page(const page_t *mp, const char *fmt, ...);

MDBX_NOTHROW_PURE_FUNCTION static inline bool is_frozen(const MDBX_txn *txn, const page_t *mp) {
  return mp->txnid < txn->txnid;
}

MDBX_NOTHROW_PURE_FUNCTION static inline bool is_spilled(const MDBX_txn *txn, const page_t *mp) {
  return mp->txnid == txn->txnid;
}

MDBX_NOTHROW_PURE_FUNCTION static inline bool is_shadowed(const MDBX_txn *txn, const page_t *mp) {
  return mp->txnid > txn->txnid;
}

MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline bool is_correct(const MDBX_txn *txn, const page_t *mp) {
  return mp->txnid <= txn->front_txnid;
}

MDBX_NOTHROW_PURE_FUNCTION static inline bool is_modifable(const MDBX_txn *txn, const page_t *mp) {
  return mp->txnid == txn->front_txnid;
}

MDBX_INTERNAL int __must_check_result page_check(const MDBX_cursor *const mc, const page_t *const mp);

MDBX_INTERNAL pgr_t page_get_any(const MDBX_cursor *const mc, const pgno_t pgno, const txnid_t front);

MDBX_INTERNAL pgr_t page_get_three(const MDBX_cursor *const mc, const pgno_t pgno, const txnid_t front);

MDBX_INTERNAL pgr_t page_get_large(const MDBX_cursor *const mc, const pgno_t pgno, const txnid_t front);

static inline int __must_check_result page_get(const MDBX_cursor *mc, const pgno_t pgno, page_t **mp,
                                               const txnid_t front) {
  pgr_t ret = page_get_three(mc, pgno, front);
  *mp = ret.page;
  return ret.err;
}

/*----------------------------------------------------------------------------*/

MDBX_INTERNAL int __must_check_result page_dirty(MDBX_txn *txn, page_t *mp, size_t npages);
MDBX_INTERNAL pgr_t page_new(MDBX_cursor *mc, const unsigned flags);
MDBX_INTERNAL pgr_t page_new_large(MDBX_cursor *mc, const size_t npages);
MDBX_INTERNAL int page_touch_modifable(MDBX_txn *txn, const page_t *const mp);
MDBX_INTERNAL int page_touch_unmodifable(MDBX_txn *txn, MDBX_cursor *mc, const page_t *const mp);

static inline int page_touch(MDBX_cursor *mc) {
  page_t *const mp = mc->pg[mc->top];
  MDBX_txn *txn = mc->txn;

  tASSERT(txn, mc->txn->flags & MDBX_TXN_DIRTY);
  tASSERT(txn, F_ISSET(*cursor_dbi_state(mc), DBI_LINDO | DBI_VALID | DBI_DIRTY));
  tASSERT(txn, !is_largepage(mp));
  if (ASSERT_ENABLED()) {
    if (mc->flags & z_inner) {
      subcur_t *mx = container_of(mc->tree, subcur_t, nested_tree);
      cursor_couple_t *couple = container_of(mx, cursor_couple_t, inner);
      tASSERT(txn, mc->tree == &couple->outer.subcur->nested_tree);
      tASSERT(txn, &mc->clc->k == &couple->outer.clc->v);
      tASSERT(txn, *couple->outer.dbi_state & DBI_DIRTY);
    }
    tASSERT(txn, dpl_check(txn));
  }

  if (is_modifable(txn, mp)) {
    if (!txn->tw.dirtylist) {
      tASSERT(txn, (txn->flags & MDBX_WRITEMAP) && !MDBX_AVOID_MSYNC);
      return MDBX_SUCCESS;
    }
    return is_subpage(mp) ? MDBX_SUCCESS : page_touch_modifable(txn, mp);
  }
  return page_touch_unmodifable(txn, mc, mp);
}

MDBX_INTERNAL void page_copy(page_t *const dst, const page_t *const src, const size_t size);
MDBX_INTERNAL pgr_t __must_check_result page_unspill(MDBX_txn *const txn, const page_t *const mp);

MDBX_INTERNAL page_t *page_shadow_alloc(MDBX_txn *txn, size_t num);

MDBX_INTERNAL void page_shadow_release(MDBX_env *env, page_t *dp, size_t npages);

MDBX_INTERNAL int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno, page_t *mp /* maybe null */,
                                 unsigned pageflags /* maybe unknown/zero */);

static inline int page_retire(MDBX_cursor *mc, page_t *mp) { return page_retire_ex(mc, mp->pgno, mp, mp->flags); }

static inline void page_wash(MDBX_txn *txn, size_t di, page_t *const mp, const size_t npages) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  mp->txnid = INVALID_TXNID;
  mp->flags = P_BAD;

  if (txn->tw.dirtylist) {
    tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
    tASSERT(txn, MDBX_AVOID_MSYNC || (di && txn->tw.dirtylist->items[di].ptr == mp));
    if (!MDBX_AVOID_MSYNC || di) {
      dpl_remove_ex(txn, di, npages);
      txn->tw.dirtyroom++;
      tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
                       (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit));
      if (!MDBX_AVOID_MSYNC || !(txn->flags & MDBX_WRITEMAP)) {
        page_shadow_release(txn->env, mp, npages);
        return;
      }
    }
  } else {
    tASSERT(txn, (txn->flags & MDBX_WRITEMAP) && !MDBX_AVOID_MSYNC && !di);
    txn->tw.writemap_dirty_npages -= (txn->tw.writemap_dirty_npages > npages) ? npages : txn->tw.writemap_dirty_npages;
  }
  VALGRIND_MAKE_MEM_UNDEFINED(mp, PAGEHDRSZ);
  VALGRIND_MAKE_MEM_NOACCESS(page_data(mp), pgno2bytes(txn->env, npages) - PAGEHDRSZ);
  MDBX_ASAN_POISON_MEMORY_REGION(page_data(mp), pgno2bytes(txn->env, npages) - PAGEHDRSZ);
}

MDBX_INTERNAL size_t page_subleaf2_reserve(const MDBX_env *env, size_t host_page_room, size_t subpage_len,
                                           size_t item_len);

#define page_next(mp) (*(page_t **)ptr_disp((mp)->entries, sizeof(void *) - sizeof(uint32_t)))

MDBX_INTERNAL void rthc_ctor(void);
MDBX_INTERNAL void rthc_dtor(const uint32_t current_pid);
MDBX_INTERNAL void rthc_lock(void);
MDBX_INTERNAL void rthc_unlock(void);

MDBX_INTERNAL int rthc_register(MDBX_env *const env);
MDBX_INTERNAL int rthc_remove(MDBX_env *const env);
MDBX_INTERNAL int rthc_uniq_check(const osal_mmap_t *pending, MDBX_env **found);

/* dtor called for thread, i.e. for all mdbx's environment objects */
MDBX_INTERNAL void rthc_thread_dtor(void *rthc);

static inline void *thread_rthc_get(osal_thread_key_t key) {
#if defined(_WIN32) || defined(_WIN64)
  return TlsGetValue(key);
#else
  return pthread_getspecific(key);
#endif
}

MDBX_INTERNAL void thread_rthc_set(osal_thread_key_t key, const void *value);

#if !defined(_WIN32) && !defined(_WIN64)
MDBX_INTERNAL void rthc_afterfork(void);
MDBX_INTERNAL void workaround_glibc_bug21031(void);
#endif /* !Windows */

static inline void thread_key_delete(osal_thread_key_t key) {
  TRACE("key = %" PRIuPTR, (uintptr_t)key);
#if defined(_WIN32) || defined(_WIN64)
  ENSURE(nullptr, TlsFree(key));
#else
  ENSURE(nullptr, pthread_key_delete(key) == 0);
  workaround_glibc_bug21031();
#endif
}

typedef struct walk_tbl {
  MDBX_val name;
  tree_t *internal, *nested;
} walk_tbl_t;

typedef int walk_func(const size_t pgno, const unsigned number, void *const ctx, const int deep,
                      const walk_tbl_t *table, const size_t page_size, const page_type_t page_type,
                      const MDBX_error_t err, const size_t nentries, const size_t payload_bytes,
                      const size_t header_bytes, const size_t unused_bytes);

typedef enum walk_options { dont_check_keys_ordering = 1 } walk_options_t;

MDBX_INTERNAL int walk_pages(MDBX_txn *txn, walk_func *visitor, void *user, walk_options_t options);

///

#define MDBX_RADIXSORT_THRESHOLD 142

/* ---------------------------------------------------------------------------
 * LY: State of the art quicksort-based sorting, with internal stack
 * and network-sort for small chunks.
 * Thanks to John M. Gamble for the http://pages.ripco.net/~jgamble/nw.html */

#if MDBX_HAVE_CMOV
#define SORT_CMP_SWAP(TYPE, CMP, a, b)                                                                                 \
  do {                                                                                                                 \
    const TYPE swap_tmp = (a);                                                                                         \
    const bool swap_cmp = expect_with_probability(CMP(swap_tmp, b), 0, .5);                                            \
    (a) = swap_cmp ? swap_tmp : b;                                                                                     \
    (b) = swap_cmp ? b : swap_tmp;                                                                                     \
  } while (0)
#else
#define SORT_CMP_SWAP(TYPE, CMP, a, b)                                                                                 \
  do                                                                                                                   \
    if (expect_with_probability(!CMP(a, b), 0, .5)) {                                                                  \
      const TYPE swap_tmp = (a);                                                                                       \
      (a) = (b);                                                                                                       \
      (b) = swap_tmp;                                                                                                  \
    }                                                                                                                  \
  while (0)
#endif

//  3 comparators, 3 parallel operations
//  o-----^--^--o
//        |  |
//  o--^--|--v--o
//     |  |
//  o--v--v-----o
//
//  [[1,2]]
//  [[0,2]]
//  [[0,1]]
#define SORT_NETWORK_3(TYPE, CMP, begin)                                                                               \
  do {                                                                                                                 \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]);                                                                      \
  } while (0)

//  5 comparators, 3 parallel operations
//  o--^--^--------o
//     |  |
//  o--v--|--^--^--o
//        |  |  |
//  o--^--v--|--v--o
//     |     |
//  o--v-----v-----o
//
//  [[0,1],[2,3]]
//  [[0,2],[1,3]]
//  [[1,2]]
#define SORT_NETWORK_4(TYPE, CMP, begin)                                                                               \
  do {                                                                                                                 \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]);                                                                      \
  } while (0)

//  9 comparators, 5 parallel operations
//  o--^--^-----^-----------o
//     |  |     |
//  o--|--|--^--v-----^--^--o
//     |  |  |        |  |
//  o--|--v--|--^--^--|--v--o
//     |     |  |  |  |
//  o--|-----v--|--v--|--^--o
//     |        |     |  |
//  o--v--------v-----v--v--o
//
//  [[0,4],[1,3]]
//  [[0,2]]
//  [[2,4],[0,1]]
//  [[2,3],[1,4]]
//  [[1,2],[3,4]]
#define SORT_NETWORK_5(TYPE, CMP, begin)                                                                               \
  do {                                                                                                                 \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]);                                                                      \
  } while (0)

//  12 comparators, 6 parallel operations
//  o-----^--^--^-----------------o
//        |  |  |
//  o--^--|--v--|--^--------^-----o
//     |  |     |  |        |
//  o--v--v-----|--|--^--^--|--^--o
//              |  |  |  |  |  |
//  o-----^--^--v--|--|--|--v--v--o
//        |  |     |  |  |
//  o--^--|--v-----v--|--v--------o
//     |  |           |
//  o--v--v-----------v-----------o
//
//  [[1,2],[4,5]]
//  [[0,2],[3,5]]
//  [[0,1],[3,4],[2,5]]
//  [[0,3],[1,4]]
//  [[2,4],[1,3]]
//  [[2,3]]
#define SORT_NETWORK_6(TYPE, CMP, begin)                                                                               \
  do {                                                                                                                 \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[5]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[3]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]);                                                                      \
  } while (0)

//  16 comparators, 6 parallel operations
//  o--^--------^-----^-----------------o
//     |        |     |
//  o--|--^-----|--^--v--------^--^-----o
//     |  |     |  |           |  |
//  o--|--|--^--v--|--^-----^--|--v-----o
//     |  |  |     |  |     |  |
//  o--|--|--|-----v--|--^--v--|--^--^--o
//     |  |  |        |  |     |  |  |
//  o--v--|--|--^-----v--|--^--v--|--v--o
//        |  |  |        |  |     |
//  o-----v--|--|--------v--v-----|--^--o
//           |  |                 |  |
//  o--------v--v-----------------v--v--o
//
//  [[0,4],[1,5],[2,6]]
//  [[0,2],[1,3],[4,6]]
//  [[2,4],[3,5],[0,1]]
//  [[2,3],[4,5]]
//  [[1,4],[3,6]]
//  [[1,2],[3,4],[5,6]]
#define SORT_NETWORK_7(TYPE, CMP, begin)                                                                               \
  do {                                                                                                                 \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[5]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[6]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[6]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[6]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[6]);                                                                      \
  } while (0)

//  19 comparators, 6 parallel operations
//  o--^--------^-----^-----------------o
//     |        |     |
//  o--|--^-----|--^--v--------^--^-----o
//     |  |     |  |           |  |
//  o--|--|--^--v--|--^-----^--|--v-----o
//     |  |  |     |  |     |  |
//  o--|--|--|--^--v--|--^--v--|--^--^--o
//     |  |  |  |     |  |     |  |  |
//  o--v--|--|--|--^--v--|--^--v--|--v--o
//        |  |  |  |     |  |     |
//  o-----v--|--|--|--^--v--v-----|--^--o
//           |  |  |  |           |  |
//  o--------v--|--v--|--^--------v--v--o
//              |     |  |
//  o-----------v-----v--v--------------o
//
//  [[0,4],[1,5],[2,6],[3,7]]
//  [[0,2],[1,3],[4,6],[5,7]]
//  [[2,4],[3,5],[0,1],[6,7]]
//  [[2,3],[4,5]]
//  [[1,4],[3,6]]
//  [[1,2],[3,4],[5,6]]
#define SORT_NETWORK_8(TYPE, CMP, begin)                                                                               \
  do {                                                                                                                 \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[5]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[6]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[7]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[2]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[3]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[6]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[7]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[5]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[6], begin[7]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[2], begin[3]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[4], begin[5]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[6]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[1], begin[2]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[3], begin[4]);                                                                      \
    SORT_CMP_SWAP(TYPE, CMP, begin[5], begin[6]);                                                                      \
  } while (0)

#define SORT_INNER(TYPE, CMP, begin, end, len)                                                                         \
  switch (len) {                                                                                                       \
  default:                                                                                                             \
    assert(false);                                                                                                     \
    __unreachable();                                                                                                   \
  case 0:                                                                                                              \
  case 1:                                                                                                              \
    break;                                                                                                             \
  case 2:                                                                                                              \
    SORT_CMP_SWAP(TYPE, CMP, begin[0], begin[1]);                                                                      \
    break;                                                                                                             \
  case 3:                                                                                                              \
    SORT_NETWORK_3(TYPE, CMP, begin);                                                                                  \
    break;                                                                                                             \
  case 4:                                                                                                              \
    SORT_NETWORK_4(TYPE, CMP, begin);                                                                                  \
    break;                                                                                                             \
  case 5:                                                                                                              \
    SORT_NETWORK_5(TYPE, CMP, begin);                                                                                  \
    break;                                                                                                             \
  case 6:                                                                                                              \
    SORT_NETWORK_6(TYPE, CMP, begin);                                                                                  \
    break;                                                                                                             \
  case 7:                                                                                                              \
    SORT_NETWORK_7(TYPE, CMP, begin);                                                                                  \
    break;                                                                                                             \
  case 8:                                                                                                              \
    SORT_NETWORK_8(TYPE, CMP, begin);                                                                                  \
    break;                                                                                                             \
  }

#define SORT_SWAP(TYPE, a, b)                                                                                          \
  do {                                                                                                                 \
    const TYPE swap_tmp = (a);                                                                                         \
    (a) = (b);                                                                                                         \
    (b) = swap_tmp;                                                                                                    \
  } while (0)

#define SORT_PUSH(low, high)                                                                                           \
  do {                                                                                                                 \
    top->lo = (low);                                                                                                   \
    top->hi = (high);                                                                                                  \
    ++top;                                                                                                             \
  } while (0)

#define SORT_POP(low, high)                                                                                            \
  do {                                                                                                                 \
    --top;                                                                                                             \
    low = top->lo;                                                                                                     \
    high = top->hi;                                                                                                    \
  } while (0)

#define SORT_IMPL(NAME, EXPECT_LOW_CARDINALITY_OR_PRESORTED, TYPE, CMP)                                                \
                                                                                                                       \
  static inline bool NAME##_is_sorted(const TYPE *first, const TYPE *last) {                                           \
    while (++first <= last)                                                                                            \
      if (expect_with_probability(CMP(first[0], first[-1]), 1, .1))                                                    \
        return false;                                                                                                  \
    return true;                                                                                                       \
  }                                                                                                                    \
                                                                                                                       \
  typedef struct {                                                                                                     \
    TYPE *lo, *hi;                                                                                                     \
  } NAME##_stack;                                                                                                      \
                                                                                                                       \
  __hot static void NAME(TYPE *const __restrict begin, TYPE *const __restrict end) {                                   \
    NAME##_stack stack[sizeof(size_t) * CHAR_BIT], *__restrict top = stack;                                            \
                                                                                                                       \
    TYPE *__restrict hi = end - 1;                                                                                     \
    TYPE *__restrict lo = begin;                                                                                       \
    while (true) {                                                                                                     \
      const ptrdiff_t len = hi - lo;                                                                                   \
      if (len < 8) {                                                                                                   \
        SORT_INNER(TYPE, CMP, lo, hi + 1, len + 1);                                                                    \
        if (unlikely(top == stack))                                                                                    \
          break;                                                                                                       \
        SORT_POP(lo, hi);                                                                                              \
        continue;                                                                                                      \
      }                                                                                                                \
                                                                                                                       \
      TYPE *__restrict mid = lo + (len >> 1);                                                                          \
      SORT_CMP_SWAP(TYPE, CMP, *lo, *mid);                                                                             \
      SORT_CMP_SWAP(TYPE, CMP, *mid, *hi);                                                                             \
      SORT_CMP_SWAP(TYPE, CMP, *lo, *mid);                                                                             \
                                                                                                                       \
      TYPE *right = hi - 1;                                                                                            \
      TYPE *left = lo + 1;                                                                                             \
      while (1) {                                                                                                      \
        while (expect_with_probability(CMP(*left, *mid), 0, .5))                                                       \
          ++left;                                                                                                      \
        while (expect_with_probability(CMP(*mid, *right), 0, .5))                                                      \
          --right;                                                                                                     \
        if (unlikely(left > right)) {                                                                                  \
          if (EXPECT_LOW_CARDINALITY_OR_PRESORTED) {                                                                   \
            if (NAME##_is_sorted(lo, right))                                                                           \
              lo = right + 1;                                                                                          \
            if (NAME##_is_sorted(left, hi))                                                                            \
              hi = left;                                                                                               \
          }                                                                                                            \
          break;                                                                                                       \
        }                                                                                                              \
        SORT_SWAP(TYPE, *left, *right);                                                                                \
        mid = (mid == left) ? right : (mid == right) ? left : mid;                                                     \
        ++left;                                                                                                        \
        --right;                                                                                                       \
      }                                                                                                                \
                                                                                                                       \
      if (right - lo > hi - left) {                                                                                    \
        SORT_PUSH(lo, right);                                                                                          \
        lo = left;                                                                                                     \
      } else {                                                                                                         \
        SORT_PUSH(left, hi);                                                                                           \
        hi = right;                                                                                                    \
      }                                                                                                                \
    }                                                                                                                  \
                                                                                                                       \
    if (AUDIT_ENABLED()) {                                                                                             \
      for (TYPE *scan = begin + 1; scan < end; ++scan)                                                                 \
        assert(CMP(scan[-1], scan[0]));                                                                                \
    }                                                                                                                  \
  }

/*------------------------------------------------------------------------------
 * LY: radix sort for large chunks */

#define RADIXSORT_IMPL(NAME, TYPE, EXTRACT_KEY, BUFFER_PREALLOCATED, END_GAP)                                          \
                                                                                                                       \
  __hot static bool NAME##_radixsort(TYPE *const begin, const size_t length) {                                         \
    TYPE *tmp;                                                                                                         \
    if (BUFFER_PREALLOCATED) {                                                                                         \
      tmp = begin + length + END_GAP;                                                                                  \
      /* memset(tmp, 0xDeadBeef, sizeof(TYPE) * length); */                                                            \
    } else {                                                                                                           \
      tmp = osal_malloc(sizeof(TYPE) * length);                                                                        \
      if (unlikely(!tmp))                                                                                              \
        return false;                                                                                                  \
    }                                                                                                                  \
                                                                                                                       \
    size_t key_shift = 0, key_diff_mask;                                                                               \
    do {                                                                                                               \
      struct {                                                                                                         \
        pgno_t a[256], b[256];                                                                                         \
      } counters;                                                                                                      \
      memset(&counters, 0, sizeof(counters));                                                                          \
                                                                                                                       \
      key_diff_mask = 0;                                                                                               \
      size_t prev_key = EXTRACT_KEY(begin) >> key_shift;                                                               \
      TYPE *r = begin, *end = begin + length;                                                                          \
      do {                                                                                                             \
        const size_t key = EXTRACT_KEY(r) >> key_shift;                                                                \
        counters.a[key & 255]++;                                                                                       \
        counters.b[(key >> 8) & 255]++;                                                                                \
        key_diff_mask |= prev_key ^ key;                                                                               \
        prev_key = key;                                                                                                \
      } while (++r != end);                                                                                            \
                                                                                                                       \
      pgno_t ta = 0, tb = 0;                                                                                           \
      for (size_t i = 0; i < 256; ++i) {                                                                               \
        const pgno_t ia = counters.a[i];                                                                               \
        counters.a[i] = ta;                                                                                            \
        ta += ia;                                                                                                      \
        const pgno_t ib = counters.b[i];                                                                               \
        counters.b[i] = tb;                                                                                            \
        tb += ib;                                                                                                      \
      }                                                                                                                \
                                                                                                                       \
      r = begin;                                                                                                       \
      do {                                                                                                             \
        const size_t key = EXTRACT_KEY(r) >> key_shift;                                                                \
        tmp[counters.a[key & 255]++] = *r;                                                                             \
      } while (++r != end);                                                                                            \
                                                                                                                       \
      if (unlikely(key_diff_mask < 256)) {                                                                             \
        memcpy(begin, tmp, ptr_dist(end, begin));                                                                      \
        break;                                                                                                         \
      }                                                                                                                \
      end = (r = tmp) + length;                                                                                        \
      do {                                                                                                             \
        const size_t key = EXTRACT_KEY(r) >> key_shift;                                                                \
        begin[counters.b[(key >> 8) & 255]++] = *r;                                                                    \
      } while (++r != end);                                                                                            \
                                                                                                                       \
      key_shift += 16;                                                                                                 \
    } while (key_diff_mask >> 16);                                                                                     \
                                                                                                                       \
    if (!(BUFFER_PREALLOCATED))                                                                                        \
      osal_free(tmp);                                                                                                  \
    return true;                                                                                                       \
  }

/*------------------------------------------------------------------------------
 * LY: Binary search */

#if defined(__clang__) && __clang_major__ > 4 && defined(__ia32__)
#define WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(size, flag)                                                                 \
  do                                                                                                                   \
    __asm __volatile(""                                                                                                \
                     : "+r"(size)                                                                                      \
                     : "r" /* the `b` constraint is more suitable here, but                                            \
                              cause CLANG to allocate and push/pop an one more                                         \
                              register, so using the `r` which avoids this. */                                         \
                     (flag));                                                                                          \
  while (0)
#else
#define WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(size, flag)                                                                 \
  do {                                                                                                                 \
    /* nope for non-clang or non-x86 */;                                                                               \
  } while (0)
#endif /* Workaround for CLANG */

#define SEARCH_IMPL(NAME, TYPE_LIST, TYPE_ARG, CMP)                            \
  static __always_inline const TYPE_LIST *NAME(                                \
      const TYPE_LIST *it, size_t length, const TYPE_ARG item) {               \
    const TYPE_LIST *const begin = it, *const end = begin + length;            \
                                                                               \
    if (MDBX_HAVE_CMOV)                                                        \
      do {                                                                     \
        /* Адаптивно-упрощенный шаг двоичного поиска:                          \
         *  - без переходов при наличии cmov или аналога;                      \
         *  - допускает лишние итерации;                                       \
         *  - но ищет пока size > 2, что требует дозавершения поиска           \
         *    среди остающихся 0-1-2 элементов. */                             \
        const TYPE_LIST *const middle = it + (length >> 1);                    \
        length = (length + 1) >> 1;                                            \
        const bool flag = expect_with_probability(CMP(*middle, item), 0, .5);  \
        WORKAROUND_FOR_CLANG_OPTIMIZER_BUG(length, flag);                      \
        it = flag ? middle : it;                                               \
      } while (length > 2);                                                    \
    else                                                                       \
      while (length > 2) {                                                     \
        /* Вариант с использованием условного перехода. Основное отличие в     \
         * том, что при "не равно" (true от компаратора) переход делается на 1 \
         * ближе к концу массива. Алгоритмически это верно и обеспечивает      \
         * чуть-чуть более быструю сходимость, но зато требует больше          \
         * вычислений при true от компаратора. Также ВАЖНО(!) не допускается   \
         * спекулятивное выполнение при size == 0. */                          \
        const TYPE_LIST *const middle = it + (length >> 1);                    \
        length = (length + 1) >> 1;                                            \
        const bool flag = expect_with_probability(CMP(*middle, item), 0, .5);  \
        if (flag) {                                                            \
          it = middle + 1;                                                     \
          length -= 1;                                                         \
        }                                                                      \
      }                                                                        \
    it += length > 1 && expect_with_probability(CMP(*it, item), 0, .5);        \
    it += length > 0 && expect_with_probability(CMP(*it, item), 0, .5);        \
                                                                               \
    if (AUDIT_ENABLED()) {                                                     \
      for (const TYPE_LIST *scan = begin; scan < it; ++scan)                   \
        assert(CMP(*scan, item));                                              \
      for (const TYPE_LIST *scan = it; scan < end; ++scan)                     \
        assert(!CMP(*scan, item));                                             \
      (void)begin, (void)end;                                                  \
    }                                                                          \
                                                                               \
    return it;                                                                 \
  }
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

__cold size_t mdbx_default_pagesize(void) {
  size_t pagesize = globals.sys_pagesize;
  ENSURE(nullptr, is_powerof2(pagesize));
  pagesize = (pagesize >= MDBX_MIN_PAGESIZE) ? pagesize : MDBX_MIN_PAGESIZE;
  pagesize = (pagesize <= MDBX_MAX_PAGESIZE) ? pagesize : MDBX_MAX_PAGESIZE;
  return pagesize;
}

__cold intptr_t mdbx_limits_dbsize_min(intptr_t pagesize) {
  if (pagesize < 1)
    pagesize = (intptr_t)mdbx_default_pagesize();
  else if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE ||
                    !is_powerof2((size_t)pagesize)))
    return -1;

  return MIN_PAGENO * pagesize;
}

__cold intptr_t mdbx_limits_dbsize_max(intptr_t pagesize) {
  if (pagesize < 1)
    pagesize = (intptr_t)mdbx_default_pagesize();
  else if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE ||
                    !is_powerof2((size_t)pagesize)))
    return -1;

  STATIC_ASSERT(MAX_MAPSIZE < INTPTR_MAX);
  const uint64_t limit = (1 + (uint64_t)MAX_PAGENO) * pagesize;
  return (limit < MAX_MAPSIZE) ? (intptr_t)limit : (intptr_t)MAX_MAPSIZE;
}

__cold intptr_t mdbx_limits_txnsize_max(intptr_t pagesize) {
  if (pagesize < 1)
    pagesize = (intptr_t)mdbx_default_pagesize();
  else if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE ||
                    !is_powerof2((size_t)pagesize)))
    return -1;

  STATIC_ASSERT(MAX_MAPSIZE < INTPTR_MAX);
  const uint64_t pgl_limit = pagesize * (uint64_t)(PAGELIST_LIMIT / MDBX_GOLD_RATIO_DBL);
  const uint64_t map_limit = (uint64_t)(MAX_MAPSIZE / MDBX_GOLD_RATIO_DBL);
  return (pgl_limit < map_limit) ? (intptr_t)pgl_limit : (intptr_t)map_limit;
}

__cold intptr_t mdbx_limits_keysize_max(intptr_t pagesize, MDBX_db_flags_t flags) {
  if (pagesize < 1)
    pagesize = (intptr_t)mdbx_default_pagesize();
  if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE ||
               !is_powerof2((size_t)pagesize)))
    return -1;

  return keysize_max(pagesize, flags);
}

__cold int mdbx_env_get_maxkeysize_ex(const MDBX_env *env, MDBX_db_flags_t flags) {
  if (unlikely(!env || env->signature.weak != env_signature))
    return -1;

  return (int)mdbx_limits_keysize_max((intptr_t)env->ps, flags);
}

__cold int mdbx_env_get_maxkeysize(const MDBX_env *env) { return mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT); }

__cold intptr_t mdbx_limits_keysize_min(MDBX_db_flags_t flags) { return keysize_min(flags); }

__cold intptr_t mdbx_limits_valsize_max(intptr_t pagesize, MDBX_db_flags_t flags) {
  if (pagesize < 1)
    pagesize = (intptr_t)mdbx_default_pagesize();
  if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE ||
               !is_powerof2((size_t)pagesize)))
    return -1;

  return valsize_max(pagesize, flags);
}

__cold int mdbx_env_get_maxvalsize_ex(const MDBX_env *env, MDBX_db_flags_t flags) {
  if (unlikely(!env || env->signature.weak != env_signature))
    return -1;

  return (int)mdbx_limits_valsize_max((intptr_t)env->ps, flags);
}

__cold intptr_t mdbx_limits_valsize_min(MDBX_db_flags_t flags) { return valsize_min(flags); }

__cold intptr_t mdbx_limits_pairsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags) {
  if (pagesize < 1)
    pagesize = (intptr_t)mdbx_default_pagesize();
  if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE ||
               !is_powerof2((size_t)pagesize)))
    return -1;

  if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP))
    return BRANCH_NODE_MAX(pagesize) - NODESIZE;

  return LEAF_NODE_MAX(pagesize) - NODESIZE;
}

__cold int mdbx_env_get_pairsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags) {
  if (unlikely(!env || env->signature.weak != env_signature))
    return -1;

  return (int)mdbx_limits_pairsize4page_max((intptr_t)env->ps, flags);
}

__cold intptr_t mdbx_limits_valsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags) {
  if (pagesize < 1)
    pagesize = (intptr_t)mdbx_default_pagesize();
  if (unlikely(pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE ||
               !is_powerof2((size_t)pagesize)))
    return -1;

  if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP))
    return valsize_max(pagesize, flags);

  return PAGESPACE(pagesize);
}

__cold int mdbx_env_get_valsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags) {
  if (unlikely(!env || env->signature.weak != env_signature))
    return -1;

  return (int)mdbx_limits_valsize4page_max((intptr_t)env->ps, flags);
}

/*----------------------------------------------------------------------------*/

static size_t estimate_rss(size_t database_bytes) {
  return database_bytes + database_bytes / 64 + (512 + MDBX_WORDBITS * 16) * MEGABYTE;
}

__cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, MDBX_warmup_flags_t flags,
                           unsigned timeout_seconds_16dot16) {
  if (unlikely(env == nullptr && txn == nullptr))
    return LOG_IFERR(MDBX_EINVAL);
  if (unlikely(flags > (MDBX_warmup_force | MDBX_warmup_oomsafe | MDBX_warmup_lock | MDBX_warmup_touchlimit |
                        MDBX_warmup_release)))
    return LOG_IFERR(MDBX_EINVAL);

  if (txn) {
    int err = check_txn(txn, MDBX_TXN_FINISHED | MDBX_TXN_ERROR);
    if (unlikely(err != MDBX_SUCCESS))
      return LOG_IFERR(err);
  }
  if (env) {
    int err = check_env(env, false);
    if (unlikely(err != MDBX_SUCCESS))
      return LOG_IFERR(err);
    if (txn && unlikely(txn->env != env))
      return LOG_IFERR(MDBX_EINVAL);
  } else {
    env = txn->env;
  }

  const uint64_t timeout_monotime = (timeout_seconds_16dot16 && (flags & MDBX_warmup_force))
                                        ? osal_monotime() + osal_16dot16_to_monotime(timeout_seconds_16dot16)
                                        : 0;

  if (flags & MDBX_warmup_release)
    munlock_all(env);

  pgno_t used_pgno;
  if (txn) {
    used_pgno = txn->geo.first_unallocated;
  } else {
    const troika_t troika = meta_tap(env);
    used_pgno = meta_recent(env, &troika).ptr_v->geometry.first_unallocated;
  }
  const size_t used_range = pgno_align2os_bytes(env, used_pgno);
  const pgno_t mlock_pgno = bytes2pgno(env, used_range);

  int rc = MDBX_SUCCESS;
  if (flags & MDBX_warmup_touchlimit) {
    const size_t estimated_rss = estimate_rss(used_range);
#if defined(_WIN32) || defined(_WIN64)
    SIZE_T current_ws_lower, current_ws_upper;
    if (GetProcessWorkingSetSize(GetCurrentProcess(), &current_ws_lower, &current_ws_upper) &&
        current_ws_lower < estimated_rss) {
      const SIZE_T ws_lower = estimated_rss;
      const SIZE_T ws_upper =
          (MDBX_WORDBITS == 32 && ws_lower > MEGABYTE * 2048) ? ws_lower : ws_lower + MDBX_WORDBITS * MEGABYTE * 32;
      if (!SetProcessWorkingSetSize(GetCurrentProcess(), ws_lower, ws_upper)) {
        rc = (int)GetLastError();
        WARNING("SetProcessWorkingSetSize(%zu, %zu) error %d", ws_lower, ws_upper, rc);
      }
    }
#endif /* Windows */
#ifdef RLIMIT_RSS
    struct rlimit rss;
    if (getrlimit(RLIMIT_RSS, &rss) == 0 && rss.rlim_cur < estimated_rss) {
      rss.rlim_cur = estimated_rss;
      if (rss.rlim_max < estimated_rss)
        rss.rlim_max = estimated_rss;
      if (setrlimit(RLIMIT_RSS, &rss)) {
        rc = errno;
        WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_RSS", (size_t)rss.rlim_cur, (size_t)rss.rlim_max, rc);
      }
    }
#endif /* RLIMIT_RSS */
#ifdef RLIMIT_MEMLOCK
    if (flags & MDBX_warmup_lock) {
      struct rlimit memlock;
      if (getrlimit(RLIMIT_MEMLOCK, &memlock) == 0 && memlock.rlim_cur < estimated_rss) {
        memlock.rlim_cur = estimated_rss;
        if (memlock.rlim_max < estimated_rss)
          memlock.rlim_max = estimated_rss;
        if (setrlimit(RLIMIT_MEMLOCK, &memlock)) {
          rc = errno;
          WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_MEMLOCK", (size_t)memlock.rlim_cur,
                  (size_t)memlock.rlim_max, rc);
        }
      }
    }
#endif /* RLIMIT_MEMLOCK */
    (void)estimated_rss;
  }

#if defined(MLOCK_ONFAULT) &&                                                                                          \
    ((defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 27)) || (defined(__ANDROID_API__) && __ANDROID_API__ >= 30)) &&        \
    (defined(__linux__) || defined(__gnu_linux__))
  if ((flags & MDBX_warmup_lock) != 0 && globals.linux_kernel_version >= 0x04040000 &&
      atomic_load32(&env->mlocked_pgno, mo_AcquireRelease) < mlock_pgno) {
    if (mlock2(env->dxb_mmap.base, used_range, MLOCK_ONFAULT)) {
      rc = errno;
      WARNING("mlock2(%zu, %s) error %d", used_range, "MLOCK_ONFAULT", rc);
    } else {
      update_mlcnt(env, mlock_pgno, true);
      rc = MDBX_SUCCESS;
    }
    if (rc != EINVAL)
      flags -= MDBX_warmup_lock;
  }
#endif /* MLOCK_ONFAULT */

  int err = MDBX_ENOSYS;
  err = dxb_set_readahead(env, used_pgno, true, true);
  if (err != MDBX_SUCCESS && rc == MDBX_SUCCESS)
    rc = err;

  if ((flags & MDBX_warmup_force) != 0 && (rc == MDBX_SUCCESS || rc == MDBX_ENOSYS)) {
    const volatile uint8_t *ptr = env->dxb_mmap.base;
    size_t offset = 0, unused = 42;
#if !(defined(_WIN32) || defined(_WIN64))
    if (flags & MDBX_warmup_oomsafe) {
      const int null_fd = open("/dev/null", O_WRONLY);
      if (unlikely(null_fd < 0))
        rc = errno;
      else {
        struct iovec iov[MDBX_AUXILARY_IOV_MAX];
        for (;;) {
          unsigned i;
          for (i = 0; i < MDBX_AUXILARY_IOV_MAX && offset < used_range; ++i) {
            iov[i].iov_base = (void *)(ptr + offset);
            iov[i].iov_len = 1;
            offset += globals.sys_pagesize;
          }
          if (unlikely(writev(null_fd, iov, i) < 0)) {
            rc = errno;
            if (rc == EFAULT)
              rc = ENOMEM;
            break;
          }
          if (offset >= used_range) {
            rc = MDBX_SUCCESS;
            break;
          }
          if (timeout_seconds_16dot16 && osal_monotime() > timeout_monotime) {
            rc = MDBX_RESULT_TRUE;
            break;
          }
        }
        close(null_fd);
      }
    } else
#endif /* Windows */
      for (;;) {
        unused += ptr[offset];
        offset += globals.sys_pagesize;
        if (offset >= used_range) {
          rc = MDBX_SUCCESS;
          break;
        }
        if (timeout_seconds_16dot16 && osal_monotime() > timeout_monotime) {
          rc = MDBX_RESULT_TRUE;
          break;
        }
      }
    (void)unused;
  }

  if ((flags & MDBX_warmup_lock) != 0 && (rc == MDBX_SUCCESS || rc == MDBX_ENOSYS) &&
      atomic_load32(&env->mlocked_pgno, mo_AcquireRelease) < mlock_pgno) {
#if defined(_WIN32) || defined(_WIN64)
    if (VirtualLock(env->dxb_mmap.base, used_range)) {
      update_mlcnt(env, mlock_pgno, true);
      rc = MDBX_SUCCESS;
    } else {
      rc = (int)GetLastError();
      WARNING("%s(%zu) error %d", "VirtualLock", used_range, rc);
    }
#elif defined(_POSIX_MEMLOCK_RANGE)
    if (mlock(env->dxb_mmap.base, used_range) == 0) {
      update_mlcnt(env, mlock_pgno, true);
      rc = MDBX_SUCCESS;
    } else {
      rc = errno;
      WARNING("%s(%zu) error %d", "mlock", used_range, rc);
    }
#else
    rc = MDBX_ENOSYS;
#endif
  }

  return LOG_IFERR(rc);
}

/*----------------------------------------------------------------------------*/

__cold int mdbx_env_get_fd(const MDBX_env *env, mdbx_filehandle_t *arg) {
  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(!arg))
    return LOG_IFERR(MDBX_EINVAL);

  *arg = env->lazy_fd;
  return MDBX_SUCCESS;
}

__cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, bool onoff) {
  int rc = check_env(env, false);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(flags & ((env->flags & ENV_ACTIVE) ? ~ENV_CHANGEABLE_FLAGS : ~ENV_USABLE_FLAGS)))
    return LOG_IFERR(MDBX_EPERM);

  if (unlikely(env->flags & MDBX_RDONLY))
    return LOG_IFERR(MDBX_EACCESS);

  const bool lock_needed = (env->flags & ENV_ACTIVE) && !env_owned_wrtxn(env);
  bool should_unlock = false;
  if (lock_needed) {
    rc = lck_txn_lock(env, false);
    if (unlikely(rc != MDBX_SUCCESS))
      return LOG_IFERR(rc);
    should_unlock = true;
  }

  if (onoff)
    env->flags = combine_durability_flags(env->flags, flags);
  else
    env->flags &= ~flags;

  if (should_unlock)
    lck_txn_unlock(env);
  return MDBX_SUCCESS;
}

__cold int mdbx_env_get_flags(const MDBX_env *env, unsigned *flags) {
  int rc = check_env(env, false);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(!flags))
    return LOG_IFERR(MDBX_EINVAL);

  *flags = env->flags & ENV_USABLE_FLAGS;
  return MDBX_SUCCESS;
}

__cold int mdbx_env_set_userctx(MDBX_env *env, void *ctx) {
  int rc = check_env(env, false);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  env->userctx = ctx;
  return MDBX_SUCCESS;
}

__cold void *mdbx_env_get_userctx(const MDBX_env *env) { return env ? env->userctx : nullptr; }

__cold int mdbx_env_set_assert(MDBX_env *env, MDBX_assert_func *func) {
  int rc = check_env(env, false);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

#if MDBX_DEBUG
  env->assert_func = func;
  return MDBX_SUCCESS;
#else
  (void)func;
  return LOG_IFERR(MDBX_ENOSYS);
#endif
}

__cold int mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr) {
  int rc = check_env(env, false);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  env->hsr_callback = hsr;
  return MDBX_SUCCESS;
}

__cold MDBX_hsr_func *mdbx_env_get_hsr(const MDBX_env *env) {
  return likely(env && env->signature.weak == env_signature) ? env->hsr_callback : nullptr;
}

#if defined(_WIN32) || defined(_WIN64)
__cold int mdbx_env_get_pathW(const MDBX_env *env, const wchar_t **arg) {
  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(!arg))
    return LOG_IFERR(MDBX_EINVAL);

  *arg = env->pathname.specified;
  return MDBX_SUCCESS;
}
#endif /* Windows */

__cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) {
  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(!arg))
    return LOG_IFERR(MDBX_EINVAL);

#if defined(_WIN32) || defined(_WIN64)
  if (!env->pathname_char) {
    *arg = nullptr;
    DWORD flags = /* WC_ERR_INVALID_CHARS */ 0x80;
    size_t mb_len =
        WideCharToMultiByte(CP_THREAD_ACP, flags, env->pathname.specified, -1, nullptr, 0, nullptr, nullptr);
    rc = mb_len ? MDBX_SUCCESS : (int)GetLastError();
    if (rc == ERROR_INVALID_FLAGS) {
      mb_len = WideCharToMultiByte(CP_THREAD_ACP, flags = 0, env->pathname.specified, -1, nullptr, 0, nullptr, nullptr);
      rc = mb_len ? MDBX_SUCCESS : (int)GetLastError();
    }
    if (unlikely(rc != MDBX_SUCCESS))
      return LOG_IFERR(rc);

    char *const mb_pathname = osal_malloc(mb_len);
    if (!mb_pathname)
      return LOG_IFERR(MDBX_ENOMEM);
    if (mb_len != (size_t)WideCharToMultiByte(CP_THREAD_ACP, flags, env->pathname.specified, -1, mb_pathname,
                                              (int)mb_len, nullptr, nullptr)) {
      rc = (int)GetLastError();
      osal_free(mb_pathname);
      return LOG_IFERR(rc);
    }
    if (env->pathname_char ||
        InterlockedCompareExchangePointer((PVOID volatile *)&env->pathname_char, mb_pathname, nullptr))
      osal_free(mb_pathname);
  }
  *arg = env->pathname_char;
#else
  *arg = env->pathname.specified;
#endif /* Windows */
  return MDBX_SUCCESS;
}

/*------------------------------------------------------------------------------
 * Legacy API */

#ifndef LIBMDBX_NO_EXPORTS_LEGACY_API

LIBMDBX_API int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, MDBX_txn **ret) {
  return __inline_mdbx_txn_begin(env, parent, flags, ret);
}

LIBMDBX_API int mdbx_txn_commit(MDBX_txn *txn) { return __inline_mdbx_txn_commit(txn); }

LIBMDBX_API __cold int mdbx_env_stat(const MDBX_env *env, MDBX_stat *stat, size_t bytes) {
  return __inline_mdbx_env_stat(env, stat, bytes);
}

LIBMDBX_API __cold int mdbx_env_info(const MDBX_env *env, MDBX_envinfo *info, size_t bytes) {
  return __inline_mdbx_env_info(env, info, bytes);
}

LIBMDBX_API int mdbx_dbi_flags(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags) {
  return __inline_mdbx_dbi_flags(txn, dbi, flags);
}

LIBMDBX_API __cold int mdbx_env_sync(MDBX_env *env) { return __inline_mdbx_env_sync(env); }

LIBMDBX_API __cold int mdbx_env_sync_poll(MDBX_env *env) { return __inline_mdbx_env_sync_poll(env); }

LIBMDBX_API __cold int mdbx_env_close(MDBX_env *env) { return __inline_mdbx_env_close(env); }

LIBMDBX_API __cold int mdbx_env_set_mapsize(MDBX_env *env, size_t size) {
  return __inline_mdbx_env_set_mapsize(env, size);
}

LIBMDBX_API __cold int mdbx_env_set_maxdbs(MDBX_env *env, MDBX_dbi dbs) {
  return __inline_mdbx_env_set_maxdbs(env, dbs);
}

LIBMDBX_API __cold int mdbx_env_get_maxdbs(const MDBX_env *env, MDBX_dbi *dbs) {
  return __inline_mdbx_env_get_maxdbs(env, dbs);
}

LIBMDBX_API __cold int mdbx_env_set_maxreaders(MDBX_env *env, unsigned readers) {
  return __inline_mdbx_env_set_maxreaders(env, readers);
}

LIBMDBX_API __cold int mdbx_env_get_maxreaders(const MDBX_env *env, unsigned *readers) {
  return __inline_mdbx_env_get_maxreaders(env, readers);
}

LIBMDBX_API __cold int mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold) {
  return __inline_mdbx_env_set_syncbytes(env, threshold);
}

LIBMDBX_API __cold int mdbx_env_get_syncbytes(const MDBX_env *env, size_t *threshold) {
  return __inline_mdbx_env_get_syncbytes(env, threshold);
}

LIBMDBX_API __cold int mdbx_env_set_syncperiod(MDBX_env *env, unsigned seconds_16dot16) {
  return __inline_mdbx_env_set_syncperiod(env, seconds_16dot16);
}

LIBMDBX_API __cold int mdbx_env_get_syncperiod(const MDBX_env *env, unsigned *seconds_16dot16) {
  return __inline_mdbx_env_get_syncperiod(env, seconds_16dot16);
}

LIBMDBX_API __cold uint64_t mdbx_key_from_int64(const int64_t i64) { return __inline_mdbx_key_from_int64(i64); }

LIBMDBX_API __cold uint32_t mdbx_key_from_int32(const int32_t i32) { return __inline_mdbx_key_from_int32(i32); }

LIBMDBX_API __cold intptr_t mdbx_limits_pgsize_min(void) { return __inline_mdbx_limits_pgsize_min(); }

LIBMDBX_API __cold intptr_t mdbx_limits_pgsize_max(void) { return __inline_mdbx_limits_pgsize_max(); }

#endif /* LIBMDBX_NO_EXPORTS_LEGACY_API */
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \note Please refer to the COPYRIGHT file for explanations license change,
/// credits and acknowledgments.
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

typedef struct compacting_context {
  MDBX_env *env;
  MDBX_txn *txn;
  MDBX_copy_flags_t flags;
  pgno_t first_unallocated;
  osal_condpair_t condpair;
  volatile unsigned head;
  volatile unsigned tail;
  uint8_t *write_buf[2];
  size_t write_len[2];
  /* Error code.  Never cleared if set.  Both threads can set nonzero
   * to fail the copy.  Not mutex-protected, expects atomic int. */
  volatile int error;
  mdbx_filehandle_t fd;
} ctx_t;

__cold static int compacting_walk_tree(ctx_t *ctx, tree_t *tree);

/* Dedicated writer thread for compacting copy. */
__cold static THREAD_RESULT THREAD_CALL compacting_write_thread(void *arg) {
  ctx_t *const ctx = arg;

#if defined(EPIPE) && !(defined(_WIN32) || defined(_WIN64))
  sigset_t sigset;
  sigemptyset(&sigset);
  sigaddset(&sigset, SIGPIPE);
  ctx->error = pthread_sigmask(SIG_BLOCK, &sigset, nullptr);
#endif /* EPIPE */

  osal_condpair_lock(&ctx->condpair);
  while (!ctx->error) {
    while (ctx->tail == ctx->head && !ctx->error) {
      int err = osal_condpair_wait(&ctx->condpair, true);
      if (err != MDBX_SUCCESS) {
        ctx->error = err;
        goto bailout;
      }
    }
    const unsigned toggle = ctx->tail & 1;
    size_t wsize = ctx->write_len[toggle];
    if (wsize == 0) {
      ctx->tail += 1;
      break /* EOF */;
    }
    ctx->write_len[toggle] = 0;
    uint8_t *ptr = ctx->write_buf[toggle];
    if (!ctx->error) {
      int err = osal_write(ctx->fd, ptr, wsize);
      if (err != MDBX_SUCCESS) {
#if defined(EPIPE) && !(defined(_WIN32) || defined(_WIN64))
        if (err == EPIPE) {
          /* Collect the pending SIGPIPE,
           * otherwise at least OS X gives it to the process on thread-exit. */
          int unused;
          sigwait(&sigset, &unused);
        }
#endif /* EPIPE */
        ctx->error = err;
        goto bailout;
      }
    }
    ctx->tail += 1;
    osal_condpair_signal(&ctx->condpair, false);
  }
bailout:
  osal_condpair_unlock(&ctx->condpair);
  return (THREAD_RESULT)0;
}

/* Give buffer and/or MDBX_EOF to writer thread, await unused buffer. */
__cold static int compacting_toggle_write_buffers(ctx_t *ctx) {
  osal_condpair_lock(&ctx->condpair);
  eASSERT(ctx->env, ctx->head - ctx->tail < 2 || ctx->error);
  ctx->head += 1;
  osal_condpair_signal(&ctx->condpair, true);
  while (!ctx->error && ctx->head - ctx->tail == 2 /* both buffers in use */) {
    if (ctx->flags & MDBX_CP_THROTTLE_MVCC)
      mdbx_txn_park(ctx->txn, false);
    int err = osal_condpair_wait(&ctx->condpair, false);
    if (err == MDBX_SUCCESS && (ctx->flags & MDBX_CP_THROTTLE_MVCC) != 0)
      err = mdbx_txn_unpark(ctx->txn, false);
    if (err != MDBX_SUCCESS)
      ctx->error = err;
  }
  osal_condpair_unlock(&ctx->condpair);
  return ctx->error;
}

static int compacting_put_bytes(ctx_t *ctx, const void *src, size_t bytes, pgno_t pgno, pgno_t npages) {
  assert(pgno == 0 || bytes > PAGEHDRSZ);
  while (bytes > 0) {
    const size_t side = ctx->head & 1;
    const size_t left = MDBX_ENVCOPY_WRITEBUF - ctx->write_len[side];
    if (left < (pgno ? PAGEHDRSZ : 1)) {
      int err = compacting_toggle_write_buffers(ctx);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
      continue;
    }
    const size_t chunk = (bytes < left) ? bytes : left;
    void *const dst = ctx->write_buf[side] + ctx->write_len[side];
    if (src) {
      memcpy(dst, src, chunk);
      if (pgno) {
        assert(chunk > PAGEHDRSZ);
        page_t *mp = dst;
        mp->pgno = pgno;
        if (mp->txnid == 0)
          mp->txnid = ctx->txn->txnid;
        if (mp->flags == P_LARGE) {
          assert(bytes <= pgno2bytes(ctx->env, npages));
          mp->pages = npages;
        }
        pgno = 0;
      }
      src = ptr_disp(src, chunk);
    } else
      memset(dst, 0, chunk);
    bytes -= chunk;
    ctx->write_len[side] += chunk;
  }
  return MDBX_SUCCESS;
}

static int compacting_put_page(ctx_t *ctx, const page_t *mp, const size_t head_bytes, const size_t tail_bytes,
                               const pgno_t npages) {
  if (tail_bytes) {
    assert(head_bytes + tail_bytes <= ctx->env->ps);
    assert(npages == 1 && (page_type(mp) == P_BRANCH || page_type(mp) == P_LEAF));
  } else {
    assert(head_bytes <= pgno2bytes(ctx->env, npages));
    assert((npages == 1 && page_type(mp) == (P_LEAF | P_DUPFIX)) || page_type(mp) == P_LARGE);
  }

  const pgno_t pgno = ctx->first_unallocated;
  ctx->first_unallocated += npages;
  int err = compacting_put_bytes(ctx, mp, head_bytes, pgno, npages);
  if (unlikely(err != MDBX_SUCCESS))
    return err;
  err = compacting_put_bytes(ctx, nullptr, pgno2bytes(ctx->env, npages) - (head_bytes + tail_bytes), 0, 0);
  if (unlikely(err != MDBX_SUCCESS))
    return err;
  return compacting_put_bytes(ctx, ptr_disp(mp, ctx->env->ps - tail_bytes), tail_bytes, 0, 0);
}

__cold static int compacting_walk(ctx_t *ctx, MDBX_cursor *mc, pgno_t *const parent_pgno, txnid_t parent_txnid) {
  mc->top = 0;
  mc->ki[0] = 0;
  int rc = page_get(mc, *parent_pgno, &mc->pg[0], parent_txnid);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

  rc = tree_search_finalize(mc, nullptr, Z_FIRST);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

  /* Make cursor pages writable */
  const intptr_t deep_limit = mc->top + 1;
  void *const buf = osal_malloc(pgno2bytes(ctx->env, deep_limit + 1));
  if (buf == nullptr)
    return MDBX_ENOMEM;

  void *ptr = buf;
  for (intptr_t i = 0; i <= mc->top; i++) {
    page_copy(ptr, mc->pg[i], ctx->env->ps);
    mc->pg[i] = ptr;
    ptr = ptr_disp(ptr, ctx->env->ps);
  }
  /* This is writable space for a leaf page. Usually not needed. */
  page_t *const leaf = ptr;

  while (mc->top >= 0) {
    page_t *mp = mc->pg[mc->top];
    const size_t nkeys = page_numkeys(mp);
    if (is_leaf(mp)) {
      if (!(mc->flags & z_inner) /* may have nested N_TREE or N_BIG nodes */) {
        for (size_t i = 0; i < nkeys; i++) {
          node_t *node = page_node(mp, i);
          if (node_flags(node) == N_BIG) {
            /* Need writable leaf */
            if (mp != leaf) {
              mc->pg[mc->top] = leaf;
              page_copy(leaf, mp, ctx->env->ps);
              mp = leaf;
              node = page_node(mp, i);
            }

            const pgr_t lp = page_get_large(mc, node_largedata_pgno(node), mp->txnid);
            if (unlikely((rc = lp.err) != MDBX_SUCCESS))
              goto bailout;
            const size_t datasize = node_ds(node);
            const pgno_t npages = largechunk_npages(ctx->env, datasize);
            poke_pgno(node_data(node), ctx->first_unallocated);
            rc = compacting_put_page(ctx, lp.page, PAGEHDRSZ + datasize, 0, npages);
            if (unlikely(rc != MDBX_SUCCESS))
              goto bailout;
          } else if (node_flags(node) & N_TREE) {
            if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) != sizeof(tree_t))) {
              ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid dupsort sub-tree node size",
                    (unsigned)node_ds(node));
              rc = MDBX_CORRUPTED;
              goto bailout;
            }

            /* Need writable leaf */
            if (mp != leaf) {
              mc->pg[mc->top] = leaf;
              page_copy(leaf, mp, ctx->env->ps);
              mp = leaf;
              node = page_node(mp, i);
            }

            tree_t *nested = nullptr;
            if (node_flags(node) & N_DUP) {
              rc = cursor_dupsort_setup(mc, node, mp);
              if (likely(rc == MDBX_SUCCESS)) {
                nested = &mc->subcur->nested_tree;
                rc = compacting_walk(ctx, &mc->subcur->cursor, &nested->root, mp->txnid);
              }
            } else {
              cASSERT(mc, (mc->flags & z_inner) == 0 && mc->subcur == 0);
              cursor_couple_t *couple = container_of(mc, cursor_couple_t, outer);
              nested = &couple->inner.nested_tree;
              memcpy(nested, node_data(node), sizeof(tree_t));
              rc = compacting_walk_tree(ctx, nested);
            }
            if (unlikely(rc != MDBX_SUCCESS))
              goto bailout;
            memcpy(node_data(node), nested, sizeof(tree_t));
          }
        }
      }
    } else {
      mc->ki[mc->top]++;
      if (mc->ki[mc->top] < nkeys) {
        for (;;) {
          const node_t *node = page_node(mp, mc->ki[mc->top]);
          rc = page_get(mc, node_pgno(node), &mp, mp->txnid);
          if (unlikely(rc != MDBX_SUCCESS))
            goto bailout;
          mc->top += 1;
          if (unlikely(mc->top >= deep_limit)) {
            rc = MDBX_CURSOR_FULL;
            goto bailout;
          }
          mc->ki[mc->top] = 0;
          if (!is_branch(mp)) {
            mc->pg[mc->top] = mp;
            break;
          }
          /* Whenever we advance to a sibling branch page,
           * we must proceed all the way down to its first leaf. */
          page_copy(mc->pg[mc->top], mp, ctx->env->ps);
        }
        continue;
      }
    }

    const pgno_t pgno = ctx->first_unallocated;
    if (likely(!is_dupfix_leaf(mp))) {
      rc = compacting_put_page(ctx, mp, PAGEHDRSZ + mp->lower, ctx->env->ps - (PAGEHDRSZ + mp->upper), 1);
    } else {
      rc = compacting_put_page(ctx, mp, PAGEHDRSZ + page_numkeys(mp) * mp->dupfix_ksize, 0, 1);
    }
    if (unlikely(rc != MDBX_SUCCESS))
      goto bailout;

    if (mc->top) {
      /* Update parent if there is one */
      node_set_pgno(page_node(mc->pg[mc->top - 1], mc->ki[mc->top - 1]), pgno);
      cursor_pop(mc);
    } else {
      /* Otherwise we're done */
      *parent_pgno = pgno;
      break;
    }
  }

bailout:
  osal_free(buf);
  return rc;
}

__cold static int compacting_walk_tree(ctx_t *ctx, tree_t *tree) {
  if (unlikely(tree->root == P_INVALID))
    return MDBX_SUCCESS; /* empty db */

  cursor_couple_t couple;
  memset(&couple, 0, sizeof(couple));
  couple.inner.cursor.signature = ~cur_signature_live;
  kvx_t kvx = {.clc = {.k = {.lmin = INT_MAX}, .v = {.lmin = INT_MAX}}};
  int rc = cursor_init4walk(&couple, ctx->txn, tree, &kvx);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

  couple.outer.checking |= z_ignord | z_pagecheck;
  couple.inner.cursor.checking |= z_ignord | z_pagecheck;
  if (!tree->mod_txnid)
    tree->mod_txnid = ctx->txn->txnid;
  return compacting_walk(ctx, &couple.outer, &tree->root, tree->mod_txnid);
}

__cold static void compacting_fixup_meta(MDBX_env *env, meta_t *meta) {
  eASSERT(env, meta->trees.gc.mod_txnid || meta->trees.gc.root == P_INVALID);
  eASSERT(env, meta->trees.main.mod_txnid || meta->trees.main.root == P_INVALID);

  /* Calculate filesize taking in account shrink/growing thresholds */
  if (meta->geometry.first_unallocated != meta->geometry.now) {
    meta->geometry.now = meta->geometry.first_unallocated;
    const size_t aligner = pv2pages(meta->geometry.grow_pv ? meta->geometry.grow_pv : meta->geometry.shrink_pv);
    if (aligner) {
      const pgno_t aligned = pgno_align2os_pgno(env, meta->geometry.first_unallocated + aligner -
                                                         meta->geometry.first_unallocated % aligner);
      meta->geometry.now = aligned;
    }
  }

  if (meta->geometry.now < meta->geometry.lower)
    meta->geometry.now = meta->geometry.lower;
  if (meta->geometry.now > meta->geometry.upper)
    meta->geometry.now = meta->geometry.upper;

  /* Update signature */
  assert(meta->geometry.now >= meta->geometry.first_unallocated);
  meta_sign_as_steady(meta);
}

/* Make resizable */
__cold static void meta_make_sizeable(meta_t *meta) {
  meta->geometry.lower = MIN_PAGENO;
  if (meta->geometry.grow_pv == 0) {
    const pgno_t step = 1 + (meta->geometry.upper - meta->geometry.lower) / 42;
    meta->geometry.grow_pv = pages2pv(step);
  }
  if (meta->geometry.shrink_pv == 0) {
    const pgno_t step = pv2pages(meta->geometry.grow_pv) << 1;
    meta->geometry.shrink_pv = pages2pv(step);
  }
}

__cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, mdbx_filehandle_t fd, uint8_t *buffer,
                                       const bool dest_is_pipe, const MDBX_copy_flags_t flags) {
  const size_t meta_bytes = pgno2bytes(env, NUM_METAS);
  uint8_t *const data_buffer = buffer + ceil_powerof2(meta_bytes, globals.sys_pagesize);
  meta_t *const meta = meta_init_triplet(env, buffer);
  meta_set_txnid(env, meta, txn->txnid);

  if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE)
    meta_make_sizeable(meta);

  /* copy canary sequences if present */
  if (txn->canary.v) {
    meta->canary = txn->canary;
    meta->canary.v = constmeta_txnid(meta);
  }

  if (txn->dbs[MAIN_DBI].root == P_INVALID) {
    /* When the DB is empty, handle it specially to
     * fix any breakage like page leaks from ITS#8174. */
    meta->trees.main.flags = txn->dbs[MAIN_DBI].flags;
    compacting_fixup_meta(env, meta);
    if (dest_is_pipe) {
      if (flags & MDBX_CP_THROTTLE_MVCC)
        mdbx_txn_park(txn, false);
      int rc = osal_write(fd, buffer, meta_bytes);
      if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_THROTTLE_MVCC) != 0)
        rc = mdbx_txn_unpark(txn, false);
      if (unlikely(rc != MDBX_SUCCESS))
        return rc;
    }
  } else {
    /* Count free pages + GC pages. */
    cursor_couple_t couple;
    int rc = cursor_init(&couple.outer, txn, FREE_DBI);
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;
    pgno_t gc_npages = txn->dbs[FREE_DBI].branch_pages + txn->dbs[FREE_DBI].leaf_pages + txn->dbs[FREE_DBI].large_pages;
    MDBX_val key, data;
    rc = outer_first(&couple.outer, &key, &data);
    while (rc == MDBX_SUCCESS) {
      const pnl_t pnl = data.iov_base;
      if (unlikely(data.iov_len % sizeof(pgno_t) || data.iov_len < MDBX_PNL_SIZEOF(pnl))) {
        ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-record length", data.iov_len);
        return MDBX_CORRUPTED;
      }
      if (unlikely(!pnl_check(pnl, txn->geo.first_unallocated))) {
        ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-record content");
        return MDBX_CORRUPTED;
      }
      gc_npages += MDBX_PNL_GETSIZE(pnl);
      rc = outer_next(&couple.outer, &key, &data, MDBX_NEXT);
    }
    if (unlikely(rc != MDBX_NOTFOUND))
      return rc;

    meta->geometry.first_unallocated = txn->geo.first_unallocated - gc_npages;
    meta->trees.main = txn->dbs[MAIN_DBI];

    ctx_t ctx;
    memset(&ctx, 0, sizeof(ctx));
    rc = osal_condpair_init(&ctx.condpair);
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;

    memset(data_buffer, 0, 2 * (size_t)MDBX_ENVCOPY_WRITEBUF);
    ctx.write_buf[0] = data_buffer;
    ctx.write_buf[1] = data_buffer + (size_t)MDBX_ENVCOPY_WRITEBUF;
    ctx.first_unallocated = NUM_METAS;
    ctx.env = env;
    ctx.fd = fd;
    ctx.txn = txn;
    ctx.flags = flags;

    osal_thread_t thread;
    int thread_err = osal_thread_create(&thread, compacting_write_thread, &ctx);
    if (likely(thread_err == MDBX_SUCCESS)) {
      if (dest_is_pipe) {
        if (!meta->trees.main.mod_txnid)
          meta->trees.main.mod_txnid = txn->txnid;
        compacting_fixup_meta(env, meta);
        if (flags & MDBX_CP_THROTTLE_MVCC)
          mdbx_txn_park(txn, false);
        rc = osal_write(fd, buffer, meta_bytes);
        if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_THROTTLE_MVCC) != 0)
          rc = mdbx_txn_unpark(txn, false);
      }
      if (likely(rc == MDBX_SUCCESS))
        rc = compacting_walk_tree(&ctx, &meta->trees.main);
      if (ctx.write_len[ctx.head & 1])
        /* toggle to flush non-empty buffers */
        compacting_toggle_write_buffers(&ctx);

      if (likely(rc == MDBX_SUCCESS) && unlikely(meta->geometry.first_unallocated != ctx.first_unallocated)) {
        if (ctx.first_unallocated > meta->geometry.first_unallocated) {
          ERROR("the source DB %s: post-compactification used pages %" PRIaPGNO " %c expected %" PRIaPGNO,
                "has double-used pages or other corruption", ctx.first_unallocated, '>',
                meta->geometry.first_unallocated);
          rc = MDBX_CORRUPTED; /* corrupted DB */
        }
        if (ctx.first_unallocated < meta->geometry.first_unallocated) {
          WARNING("the source DB %s: post-compactification used pages %" PRIaPGNO " %c expected %" PRIaPGNO,
                  "has page leak(s)", ctx.first_unallocated, '<', meta->geometry.first_unallocated);
          if (dest_is_pipe)
            /* the root within already written meta-pages is wrong */
            rc = MDBX_CORRUPTED;
        }
        /* fixup meta */
        meta->geometry.first_unallocated = ctx.first_unallocated;
      }

      /* toggle with empty buffers to exit thread's loop */
      eASSERT(env, (ctx.write_len[ctx.head & 1]) == 0);
      compacting_toggle_write_buffers(&ctx);
      thread_err = osal_thread_join(thread);
      eASSERT(env, (ctx.tail == ctx.head && ctx.write_len[ctx.head & 1] == 0) || ctx.error);
      osal_condpair_destroy(&ctx.condpair);
    }
    if (unlikely(thread_err != MDBX_SUCCESS))
      return thread_err;
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;
    if (unlikely(ctx.error != MDBX_SUCCESS))
      return ctx.error;
    if (!dest_is_pipe)
      compacting_fixup_meta(env, meta);
  }

  if (flags & MDBX_CP_THROTTLE_MVCC)
    mdbx_txn_park(txn, false);

  /* Extend file if required */
  if (meta->geometry.now != meta->geometry.first_unallocated) {
    const size_t whole_size = pgno2bytes(env, meta->geometry.now);
    if (!dest_is_pipe)
      return osal_fsetsize(fd, whole_size);

    const size_t used_size = pgno2bytes(env, meta->geometry.first_unallocated);
    memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF);
    for (size_t offset = used_size; offset < whole_size;) {
      const size_t chunk =
          ((size_t)MDBX_ENVCOPY_WRITEBUF < whole_size - offset) ? (size_t)MDBX_ENVCOPY_WRITEBUF : whole_size - offset;
      int rc = osal_write(fd, data_buffer, chunk);
      if (unlikely(rc != MDBX_SUCCESS))
        return rc;
      offset += chunk;
    }
  }
  return MDBX_SUCCESS;
}

//----------------------------------------------------------------------------

__cold static int copy_asis(MDBX_env *env, MDBX_txn *txn, mdbx_filehandle_t fd, uint8_t *buffer,
                            const bool dest_is_pipe, const MDBX_copy_flags_t flags) {
  bool should_unlock = false;
  if ((txn->flags & MDBX_TXN_RDONLY) != 0 && (flags & MDBX_CP_RENEW_TXN) != 0) {
    /* Try temporarily block writers until we snapshot the meta pages */
    int err = lck_txn_lock(env, true);
    if (likely(err == MDBX_SUCCESS))
      should_unlock = true;
    else if (unlikely(err != MDBX_BUSY))
      return err;
  }

  jitter4testing(false);
  int rc = MDBX_SUCCESS;
  const size_t meta_bytes = pgno2bytes(env, NUM_METAS);
  troika_t troika = meta_tap(env);
  /* Make a snapshot of meta-pages,
   * but writing ones after the data was flushed */
retry_snap_meta:
  memcpy(buffer, env->dxb_mmap.base, meta_bytes);
  const meta_ptr_t recent = meta_recent(env, &troika);
  meta_t *headcopy = /* LY: get pointer to the snapshot copy */
      ptr_disp(buffer, ptr_dist(recent.ptr_c, env->dxb_mmap.base));
  jitter4testing(false);
  if (txn->flags & MDBX_TXN_RDONLY) {
    if (recent.txnid != txn->txnid) {
      if (flags & MDBX_CP_RENEW_TXN)
        rc = mdbx_txn_renew(txn);
      else {
        rc = MDBX_MVCC_RETARDED;
        for (size_t n = 0; n < NUM_METAS; ++n) {
          meta_t *const meta = page_meta(ptr_disp(buffer, pgno2bytes(env, n)));
          if (troika.txnid[n] == txn->txnid && ((/* is_steady */ (troika.fsm >> n) & 1) || rc != MDBX_SUCCESS)) {
            rc = MDBX_SUCCESS;
            headcopy = meta;
          } else if (troika.txnid[n] > txn->txnid)
            meta_set_txnid(env, meta, 0);
        }
      }
    }
    if (should_unlock)
      lck_txn_unlock(env);
    else {
      troika_t snap = meta_tap(env);
      if (memcmp(&troika, &snap, sizeof(troika_t)) && rc == MDBX_SUCCESS) {
        troika = snap;
        goto retry_snap_meta;
      }
    }
  }
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

  if (txn->flags & MDBX_TXN_RDONLY)
    eASSERT(env, meta_txnid(headcopy) == txn->txnid);
  if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE)
    meta_make_sizeable(headcopy);
  /* Update signature to steady */
  meta_sign_as_steady(headcopy);

  /* Copy the data */
  const size_t whole_size = pgno_align2os_bytes(env, txn->geo.end_pgno);
  const size_t used_size = pgno2bytes(env, txn->geo.first_unallocated);
  jitter4testing(false);

  if (flags & MDBX_CP_THROTTLE_MVCC)
    mdbx_txn_park(txn, false);

  if (dest_is_pipe)
    rc = osal_write(fd, buffer, meta_bytes);

  uint8_t *const data_buffer = buffer + ceil_powerof2(meta_bytes, globals.sys_pagesize);
#if MDBX_USE_COPYFILERANGE
  static bool copyfilerange_unavailable;
#if (defined(__linux__) || defined(__gnu_linux__))
  if (globals.linux_kernel_version >= 0x05030000 && globals.linux_kernel_version < 0x05130000)
    copyfilerange_unavailable = true;
#endif /* linux */
  bool not_the_same_filesystem = false;
  if (!copyfilerange_unavailable) {
    struct statfs statfs_info;
    if (fstatfs(fd, &statfs_info) || statfs_info.f_type == /* ECRYPTFS_SUPER_MAGIC */ 0xf15f)
      /* avoid use copyfilerange_unavailable() to ecryptfs due bugs */
      not_the_same_filesystem = true;
  }
#endif /* MDBX_USE_COPYFILERANGE */

  for (size_t offset = meta_bytes; rc == MDBX_SUCCESS && offset < used_size;) {
    if (flags & MDBX_CP_THROTTLE_MVCC) {
      rc = mdbx_txn_unpark(txn, false);
      if (unlikely(rc != MDBX_SUCCESS))
        break;
    }

#if MDBX_USE_SENDFILE
    static bool sendfile_unavailable;
    if (dest_is_pipe && likely(!sendfile_unavailable)) {
      off_t in_offset = offset;
      const ssize_t written = sendfile(fd, env->lazy_fd, &in_offset, used_size - offset);
      if (likely(written > 0)) {
        offset = in_offset;
        if (flags & MDBX_CP_THROTTLE_MVCC)
          rc = mdbx_txn_park(txn, false);
        continue;
      }
      rc = MDBX_ENODATA;
      if (written == 0 || ignore_enosys_and_eagain(rc = errno) != MDBX_RESULT_TRUE)
        break;
      sendfile_unavailable = true;
    }
#endif /* MDBX_USE_SENDFILE */

#if MDBX_USE_COPYFILERANGE
    if (!dest_is_pipe && !not_the_same_filesystem && likely(!copyfilerange_unavailable)) {
      off_t in_offset = offset, out_offset = offset;
      ssize_t bytes_copied = copy_file_range(env->lazy_fd, &in_offset, fd, &out_offset, used_size - offset, 0);
      if (likely(bytes_copied > 0)) {
        offset = in_offset;
        if (flags & MDBX_CP_THROTTLE_MVCC)
          rc = mdbx_txn_park(txn, false);
        continue;
      }
      rc = MDBX_ENODATA;
      if (bytes_copied == 0)
        break;
      rc = errno;
      if (rc == EXDEV || rc == /* workaround for ecryptfs bug(s),
                                  maybe useful for others FS */
                             EINVAL)
        not_the_same_filesystem = true;
      else if (ignore_enosys_and_eagain(rc) == MDBX_RESULT_TRUE)
        copyfilerange_unavailable = true;
      else
        break;
    }
#endif /* MDBX_USE_COPYFILERANGE */

    /* fallback to portable */
    const size_t chunk =
        ((size_t)MDBX_ENVCOPY_WRITEBUF < used_size - offset) ? (size_t)MDBX_ENVCOPY_WRITEBUF : used_size - offset;
    /* copy to avoid EFAULT in case swapped-out */
    memcpy(data_buffer, ptr_disp(env->dxb_mmap.base, offset), chunk);
    if (flags & MDBX_CP_THROTTLE_MVCC)
      mdbx_txn_park(txn, false);
    rc = osal_write(fd, data_buffer, chunk);
    offset += chunk;
  }

  /* Extend file if required */
  if (likely(rc == MDBX_SUCCESS) && whole_size != used_size) {
    if (!dest_is_pipe)
      rc = osal_fsetsize(fd, whole_size);
    else {
      memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF);
      for (size_t offset = used_size; rc == MDBX_SUCCESS && offset < whole_size;) {
        const size_t chunk =
            ((size_t)MDBX_ENVCOPY_WRITEBUF < whole_size - offset) ? (size_t)MDBX_ENVCOPY_WRITEBUF : whole_size - offset;
        rc = osal_write(fd, data_buffer, chunk);
        offset += chunk;
      }
    }
  }

  return rc;
}

//----------------------------------------------------------------------------

__cold static int copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, MDBX_copy_flags_t flags) {
  if (unlikely(txn->flags & MDBX_TXN_DIRTY))
    return MDBX_BAD_TXN;

  int rc = MDBX_SUCCESS;
  if (txn->flags & MDBX_TXN_RDONLY) {
    if (flags & MDBX_CP_THROTTLE_MVCC) {
      rc = mdbx_txn_park(txn, true);
      if (unlikely(rc != MDBX_SUCCESS))
        return rc;
    }
  } else if (unlikely(flags & (MDBX_CP_THROTTLE_MVCC | MDBX_CP_RENEW_TXN)))
    return MDBX_EINVAL;

  const int dest_is_pipe = osal_is_pipe(fd);
  if (MDBX_IS_ERROR(dest_is_pipe))
    return dest_is_pipe;

  if (!dest_is_pipe) {
    rc = osal_fseek(fd, 0);
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;
  }

  MDBX_env *const env = txn->env;
  const size_t buffer_size =
      pgno_align2os_bytes(env, NUM_METAS) +
      ceil_powerof2(((flags & MDBX_CP_COMPACT) ? 2 * (size_t)MDBX_ENVCOPY_WRITEBUF : (size_t)MDBX_ENVCOPY_WRITEBUF),
                    globals.sys_pagesize);

  uint8_t *buffer = nullptr;
  rc = osal_memalign_alloc(globals.sys_pagesize, buffer_size, (void **)&buffer);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

  if (!dest_is_pipe) {
    /* Firstly write a stub to meta-pages.
     * Now we sure to incomplete copy will not be used. */
    memset(buffer, -1, pgno2bytes(env, NUM_METAS));
    rc = osal_write(fd, buffer, pgno2bytes(env, NUM_METAS));
  }

  if (likely(rc == MDBX_SUCCESS))
    rc = mdbx_txn_unpark(txn, false);
  if (likely(rc == MDBX_SUCCESS)) {
    memset(buffer, 0, pgno2bytes(env, NUM_METAS));
    rc = ((flags & MDBX_CP_COMPACT) ? copy_with_compacting : copy_asis)(env, txn, fd, buffer, dest_is_pipe, flags);

    if (likely(rc == MDBX_SUCCESS))
      rc = mdbx_txn_unpark(txn, false);
  }

  if (txn->flags & MDBX_TXN_RDONLY) {
    if (flags & MDBX_CP_THROTTLE_MVCC)
      mdbx_txn_park(txn, true);
    else if (flags & MDBX_CP_DISPOSE_TXN)
      mdbx_txn_reset(txn);
  }

  if (!dest_is_pipe) {
    if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_DONT_FLUSH) == 0)
      rc = osal_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_SIZE);

    /* Write actual meta */
    if (likely(rc == MDBX_SUCCESS))
      rc = osal_pwrite(fd, buffer, pgno2bytes(env, NUM_METAS), 0);

    if (likely(rc == MDBX_SUCCESS) && (flags & MDBX_CP_DONT_FLUSH) == 0)
      rc = osal_fsync(fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
  }

  osal_memalign_free(buffer);
  return rc;
}

__cold static int copy2pathname(MDBX_txn *txn, const pathchar_t *dest_path, MDBX_copy_flags_t flags) {
  if (unlikely(!dest_path || *dest_path == '\0'))
    return MDBX_EINVAL;

  /* The destination path must exist, but the destination file must not.
   * We don't want the OS to cache the writes, since the source data is
   * already in the OS cache. */
  mdbx_filehandle_t newfd = INVALID_HANDLE_VALUE;
  int rc = osal_openfile(MDBX_OPEN_COPY, txn->env, dest_path, &newfd,
#if defined(_WIN32) || defined(_WIN64)
                         (mdbx_mode_t)-1
#else
                         S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP
#endif
  );
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

#if defined(_WIN32) || defined(_WIN64)
  /* no locking required since the file opened with ShareMode == 0 */
#else
  MDBX_STRUCT_FLOCK lock_op;
  memset(&lock_op, 0, sizeof(lock_op));
  lock_op.l_type = F_WRLCK;
  lock_op.l_whence = SEEK_SET;
  lock_op.l_start = 0;
  lock_op.l_len = OFF_T_MAX;
  const int err_fcntl = MDBX_FCNTL(newfd, MDBX_F_SETLK, &lock_op) ? errno : MDBX_SUCCESS;

  const int err_flock =
#ifdef LOCK_EX
      flock(newfd, LOCK_EX | LOCK_NB) ? errno : MDBX_SUCCESS;
#else
      MDBX_ENOSYS;
#endif /* LOCK_EX */

  const int err_check_fs_local =
      /* avoid call osal_check_fs_local() on success */
      (!err_fcntl && !err_flock && !MDBX_DEBUG) ? MDBX_SUCCESS :
#if !defined(__ANDROID_API__) || __ANDROID_API__ >= 24
                                                osal_check_fs_local(newfd, 0);
#else
                                                MDBX_ENOSYS;
#endif

  const bool flock_may_fail =
#if defined(__linux__) || defined(__gnu_linux__)
      err_check_fs_local != 0;
#else
      true;
#endif /* Linux */

  if (!err_fcntl &&
      (err_flock == EWOULDBLOCK || err_flock == EAGAIN || ignore_enosys_and_eremote(err_flock) == MDBX_RESULT_TRUE)) {
    rc = err_flock;
    if (flock_may_fail) {
      WARNING("ignore %s(%" MDBX_PRIsPATH ") error %d: since %s done, local/remote-fs check %d", "flock", dest_path,
              err_flock, "fcntl-lock", err_check_fs_local);
      rc = MDBX_SUCCESS;
    }
  } else if (!err_flock && err_check_fs_local == MDBX_RESULT_TRUE &&
             ignore_enosys_and_eremote(err_fcntl) == MDBX_RESULT_TRUE) {
    WARNING("ignore %s(%" MDBX_PRIsPATH ") error %d: since %s done, local/remote-fs check %d", "fcntl-lock", dest_path,
            err_fcntl, "flock", err_check_fs_local);
  } else if (err_fcntl || err_flock) {
    ERROR("file-lock(%" MDBX_PRIsPATH ") failed: fcntl-lock %d, flock %d, local/remote-fs check %d", dest_path,
          err_fcntl, err_flock, err_check_fs_local);
    if (err_fcntl == ENOLCK || err_flock == ENOLCK)
      rc = ENOLCK;
    else if (err_fcntl == EWOULDBLOCK || err_flock == EWOULDBLOCK)
      rc = EWOULDBLOCK;
    else if (EWOULDBLOCK != EAGAIN && (err_fcntl == EAGAIN || err_flock == EAGAIN))
      rc = EAGAIN;
    else
      rc = (err_fcntl && ignore_enosys_and_eremote(err_fcntl) != MDBX_RESULT_TRUE) ? err_fcntl : err_flock;
  }
#endif /* Windows / POSIX */

  if (rc == MDBX_SUCCESS)
    rc = copy2fd(txn, newfd, flags);

  if (newfd != INVALID_HANDLE_VALUE) {
    int err = osal_closefile(newfd);
    if (rc == MDBX_SUCCESS && err != rc)
      rc = err;
    if (rc != MDBX_SUCCESS)
      (void)osal_removefile(dest_path);
  }
  return rc;
}

//----------------------------------------------------------------------------

__cold int mdbx_txn_copy2fd(MDBX_txn *txn, mdbx_filehandle_t fd, MDBX_copy_flags_t flags) {
  int rc = check_txn(txn, MDBX_TXN_BLOCKED);
  if (likely(rc == MDBX_SUCCESS))
    rc = copy2fd(txn, fd, flags);
  if (flags & MDBX_CP_DISPOSE_TXN)
    mdbx_txn_abort(txn);
  return LOG_IFERR(rc);
}

__cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, MDBX_copy_flags_t flags) {
  if (unlikely(flags & (MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN)))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  MDBX_txn *txn = nullptr;
  rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  rc = copy2fd(txn, fd, flags | MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN);
  mdbx_txn_abort(txn);
  return LOG_IFERR(rc);
}

__cold int mdbx_txn_copy2pathname(MDBX_txn *txn, const char *dest_path, MDBX_copy_flags_t flags) {
#if defined(_WIN32) || defined(_WIN64)
  wchar_t *dest_pathW = nullptr;
  int rc = osal_mb2w(dest_path, &dest_pathW);
  if (likely(rc == MDBX_SUCCESS)) {
    rc = mdbx_txn_copy2pathnameW(txn, dest_pathW, flags);
    osal_free(dest_pathW);
  }
  return LOG_IFERR(rc);
}

__cold int mdbx_txn_copy2pathnameW(MDBX_txn *txn, const wchar_t *dest_path, MDBX_copy_flags_t flags) {
#endif /* Windows */
  int rc = check_txn(txn, MDBX_TXN_BLOCKED);
  if (likely(rc == MDBX_SUCCESS))
    rc = copy2pathname(txn, dest_path, flags);
  if (flags & MDBX_CP_DISPOSE_TXN)
    mdbx_txn_abort(txn);
  return LOG_IFERR(rc);
}

__cold int mdbx_env_copy(MDBX_env *env, const char *dest_path, MDBX_copy_flags_t flags) {
#if defined(_WIN32) || defined(_WIN64)
  wchar_t *dest_pathW = nullptr;
  int rc = osal_mb2w(dest_path, &dest_pathW);
  if (likely(rc == MDBX_SUCCESS)) {
    rc = mdbx_env_copyW(env, dest_pathW, flags);
    osal_free(dest_pathW);
  }
  return LOG_IFERR(rc);
}

__cold int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest_path, MDBX_copy_flags_t flags) {
#endif /* Windows */
  if (unlikely(flags & (MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN)))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  MDBX_txn *txn = nullptr;
  rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  rc = copy2pathname(txn, dest_path, flags | MDBX_CP_DISPOSE_TXN | MDBX_CP_RENEW_TXN);
  mdbx_txn_abort(txn);
  return LOG_IFERR(rc);
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

MDBX_cursor *mdbx_cursor_create(void *context) {
  cursor_couple_t *couple = osal_calloc(1, sizeof(cursor_couple_t));
  if (unlikely(!couple))
    return nullptr;

  VALGRIND_MAKE_MEM_UNDEFINED(couple, sizeof(cursor_couple_t));
  couple->outer.signature = cur_signature_ready4dispose;
  couple->outer.next = &couple->outer;
  couple->userctx = context;
  cursor_reset(couple);
  VALGRIND_MAKE_MEM_DEFINED(&couple->outer.backup, sizeof(couple->outer.backup));
  VALGRIND_MAKE_MEM_DEFINED(&couple->outer.tree, sizeof(couple->outer.tree));
  VALGRIND_MAKE_MEM_DEFINED(&couple->outer.clc, sizeof(couple->outer.clc));
  VALGRIND_MAKE_MEM_DEFINED(&couple->outer.dbi_state, sizeof(couple->outer.dbi_state));
  VALGRIND_MAKE_MEM_DEFINED(&couple->outer.subcur, sizeof(couple->outer.subcur));
  VALGRIND_MAKE_MEM_DEFINED(&couple->outer.txn, sizeof(couple->outer.txn));
  return &couple->outer;
}

int mdbx_cursor_renew(MDBX_txn *txn, MDBX_cursor *mc) {
  return likely(mc) ? mdbx_cursor_bind(txn, mc, (kvx_t *)mc->clc - txn->env->kvs) : LOG_IFERR(MDBX_EINVAL);
}

int mdbx_cursor_reset(MDBX_cursor *mc) {
  int rc = cursor_check(mc, MDBX_TXN_FINISHED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_reset((cursor_couple_t *)mc);
  return MDBX_SUCCESS;
}

int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) {
  if (unlikely(!mc))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(mc->signature != cur_signature_ready4dispose && mc->signature != cur_signature_live)) {
    int rc = (mc->signature == cur_signature_wait4eot) ? MDBX_EINVAL : MDBX_EBADSIGN;
    return LOG_IFERR(rc);
  }

  int rc = check_txn(txn, MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(dbi == FREE_DBI && !(txn->flags & MDBX_TXN_RDONLY)))
    return LOG_IFERR(MDBX_EACCESS);

  rc = dbi_check(txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(mc->backup)) /* Cursor from parent transaction */
    LOG_IFERR(MDBX_EINVAL);

  if (mc->signature == cur_signature_live) {
    if (mc->txn == txn && cursor_dbi(mc) == dbi)
      return MDBX_SUCCESS;
    rc = mdbx_cursor_unbind(mc);
    if (unlikely(rc != MDBX_SUCCESS))
      return (rc == MDBX_BAD_TXN) ? MDBX_EINVAL : rc;
  }
  cASSERT(mc, mc->next == mc);

  rc = cursor_init(mc, txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  mc->next = txn->cursors[dbi];
  txn->cursors[dbi] = mc;
  return MDBX_SUCCESS;
}

int mdbx_cursor_unbind(MDBX_cursor *mc) {
  if (unlikely(!mc))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(mc->signature != cur_signature_live))
    return (mc->signature == cur_signature_ready4dispose) ? MDBX_SUCCESS : LOG_IFERR(MDBX_EBADSIGN);

  if (unlikely(mc->backup)) /* Cursor from parent transaction */
    /* TODO: реализовать при переходе на двусвязный список курсоров */
    return LOG_IFERR(MDBX_EINVAL);

  int rc = check_txn(mc->txn, MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD);
  if (unlikely(rc != MDBX_SUCCESS)) {
    for (const MDBX_txn *txn = mc->txn; rc == MDBX_BAD_TXN && check_txn(txn, MDBX_TXN_FINISHED) == MDBX_SUCCESS;
         txn = txn->nested)
      if (dbi_state(txn, cursor_dbi(mc)) == 0)
        /* специальный случай: курсор прикреплён к родительской транзакции, но соответствующий dbi-дескриптор ещё
         * не использовался во вложенной транзакции, т.е. курсор ещё не импортирован в дочернюю транзакцию и не имеет
         * связанного сохранённого состояния (поэтому mc→backup равен nullptr). */
        rc = MDBX_EINVAL;
    return LOG_IFERR(rc);
  }

  if (unlikely(!mc->txn || mc->txn->signature != txn_signature)) {
    ERROR("Wrong cursor's transaction %p 0x%x", __Wpedantic_format_voidptr(mc->txn), mc->txn ? mc->txn->signature : 0);
    return LOG_IFERR(MDBX_PROBLEM);
  }

  if (mc->next != mc) {
    const size_t dbi = cursor_dbi(mc);
    cASSERT(mc, dbi < mc->txn->n_dbi);
    cASSERT(mc, &mc->txn->env->kvs[dbi].clc == mc->clc);
    if (dbi < mc->txn->n_dbi) {
      MDBX_cursor **prev = &mc->txn->cursors[dbi];
      while (/* *prev && */ *prev != mc) {
        ENSURE(mc->txn->env, (*prev)->signature == cur_signature_live || (*prev)->signature == cur_signature_wait4eot);
        prev = &(*prev)->next;
      }
      cASSERT(mc, *prev == mc);
      *prev = mc->next;
    }
    mc->next = mc;
  }
  cursor_drown((cursor_couple_t *)mc);
  mc->signature = cur_signature_ready4dispose;
  return MDBX_SUCCESS;
}

int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) {
  if (unlikely(!ret))
    return LOG_IFERR(MDBX_EINVAL);
  *ret = nullptr;

  MDBX_cursor *const mc = mdbx_cursor_create(nullptr);
  if (unlikely(!mc))
    return LOG_IFERR(MDBX_ENOMEM);

  int rc = mdbx_cursor_bind(txn, mc, dbi);
  if (unlikely(rc != MDBX_SUCCESS)) {
    mdbx_cursor_close(mc);
    return LOG_IFERR(rc);
  }

  *ret = mc;
  return MDBX_SUCCESS;
}

void mdbx_cursor_close(MDBX_cursor *cursor) {
  if (likely(cursor)) {
    int err = mdbx_cursor_close2(cursor);
    if (unlikely(err != MDBX_SUCCESS))
      mdbx_panic("%s:%d error %d (%s) while closing cursor", __func__, __LINE__, err, mdbx_liberr2str(err));
  }
}

int mdbx_cursor_close2(MDBX_cursor *mc) {
  if (unlikely(!mc))
    return LOG_IFERR(MDBX_EINVAL);

  if (mc->signature == cur_signature_ready4dispose) {
    if (unlikely(mc->txn || mc->backup))
      return LOG_IFERR(MDBX_PANIC);
    cursor_drown((cursor_couple_t *)mc);
    mc->signature = 0;
    osal_free(mc);
    return MDBX_SUCCESS;
  }

  if (unlikely(mc->signature != cur_signature_live))
    return LOG_IFERR(MDBX_EBADSIGN);

  MDBX_txn *const txn = mc->txn;
  int rc = check_txn(txn, MDBX_TXN_FINISHED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (mc->backup) {
    /* Cursor closed before nested txn ends */
    cursor_reset((cursor_couple_t *)mc);
    mc->signature = cur_signature_wait4eot;
    return MDBX_SUCCESS;
  }

  if (mc->next != mc) {
    const size_t dbi = cursor_dbi(mc);
    cASSERT(mc, dbi < mc->txn->n_dbi);
    cASSERT(mc, &mc->txn->env->kvs[dbi].clc == mc->clc);
    if (likely(dbi < txn->n_dbi)) {
      MDBX_cursor **prev = &txn->cursors[dbi];
      while (/* *prev && */ *prev != mc) {
        ENSURE(txn->env, (*prev)->signature == cur_signature_live || (*prev)->signature == cur_signature_wait4eot);
        prev = &(*prev)->next;
      }
      tASSERT(txn, *prev == mc);
      *prev = mc->next;
    }
    mc->next = mc;
  }
  cursor_drown((cursor_couple_t *)mc);
  mc->signature = 0;
  osal_free(mc);
  return MDBX_SUCCESS;
}

int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) {
  int rc = cursor_check(src, MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  rc = mdbx_cursor_bind(src->txn, dest, cursor_dbi(src));
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

  assert(dest->tree == src->tree);
  assert(cursor_dbi(dest) == cursor_dbi(src));
again:
  assert(dest->clc == src->clc);
  assert(dest->txn == src->txn);
  dest->top_and_flags = src->top_and_flags;
  for (intptr_t i = 0; i <= src->top; ++i) {
    dest->ki[i] = src->ki[i];
    dest->pg[i] = src->pg[i];
  }

  if (src->subcur) {
    dest->subcur->nested_tree = src->subcur->nested_tree;
    src = &src->subcur->cursor;
    dest = &dest->subcur->cursor;
    goto again;
  }

  return MDBX_SUCCESS;
}

int mdbx_txn_release_all_cursors_ex(const MDBX_txn *txn, bool unbind, size_t *count) {
  int rc = check_txn(txn, MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  size_t n = 0;
  do {
    TXN_FOREACH_DBI_FROM(txn, i, MAIN_DBI) {
      MDBX_cursor *mc = txn->cursors[i], *next = nullptr;
      if (mc) {
        txn->cursors[i] = nullptr;
        do {
          next = mc->next;
          if (mc->signature == cur_signature_live) {
            mc->signature = cur_signature_wait4eot;
            cursor_drown((cursor_couple_t *)mc);
          } else
            ENSURE(nullptr, mc->signature == cur_signature_wait4eot);
          if (mc->backup) {
            MDBX_cursor *bk = mc->backup;
            mc->next = bk->next;
            mc->backup = bk->backup;
            bk->backup = nullptr;
            bk->signature = 0;
            osal_free(bk);
          } else {
            mc->signature = cur_signature_ready4dispose;
            mc->next = mc;
            ++n;
            if (!unbind) {
              mc->signature = 0;
              osal_free(mc);
            }
          }
        } while ((mc = next) != nullptr);
      }
    }
    txn = txn->parent;
  } while (txn);

  if (count)
    *count = n;
  return MDBX_SUCCESS;
}

int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, bool ignore_multival) {
  const int incomparable = INT16_MAX + 1;

  if (unlikely(!l))
    return r ? -incomparable * 9 : 0;
  else if (unlikely(!r))
    return incomparable * 9;

  if (unlikely(cursor_check_pure(l) != MDBX_SUCCESS))
    return (cursor_check_pure(r) == MDBX_SUCCESS) ? -incomparable * 8 : 0;
  if (unlikely(cursor_check_pure(r) != MDBX_SUCCESS))
    return (cursor_check_pure(l) == MDBX_SUCCESS) ? incomparable * 8 : 0;

  if (unlikely(l->clc != r->clc)) {
    if (l->txn->env != r->txn->env)
      return (l->txn->env > r->txn->env) ? incomparable * 7 : -incomparable * 7;
    if (l->txn->txnid != r->txn->txnid)
      return (l->txn->txnid > r->txn->txnid) ? incomparable * 6 : -incomparable * 6;
    return (l->clc > r->clc) ? incomparable * 5 : -incomparable * 5;
  }
  assert(cursor_dbi(l) == cursor_dbi(r));

  int diff = is_pointed(l) - is_pointed(r);
  if (unlikely(diff))
    return (diff > 0) ? incomparable * 4 : -incomparable * 4;
  if (unlikely(!is_pointed(l)))
    return 0;

  intptr_t detent = (l->top <= r->top) ? l->top : r->top;
  for (intptr_t i = 0; i <= detent; ++i) {
    diff = l->ki[i] - r->ki[i];
    if (diff)
      return diff;
  }
  if (unlikely(l->top != r->top))
    return (l->top > r->top) ? incomparable * 3 : -incomparable * 3;

  assert((l->subcur != nullptr) == (r->subcur != nullptr));
  if (unlikely((l->subcur != nullptr) != (r->subcur != nullptr)))
    return l->subcur ? incomparable * 2 : -incomparable * 2;
  if (ignore_multival || !l->subcur)
    return 0;

#if MDBX_DEBUG
  if (is_pointed(&l->subcur->cursor)) {
    const page_t *mp = l->pg[l->top];
    const node_t *node = page_node(mp, l->ki[l->top]);
    assert(node_flags(node) & N_DUP);
  }
  if (is_pointed(&r->subcur->cursor)) {
    const page_t *mp = r->pg[r->top];
    const node_t *node = page_node(mp, r->ki[r->top]);
    assert(node_flags(node) & N_DUP);
  }
#endif /* MDBX_DEBUG */

  l = &l->subcur->cursor;
  r = &r->subcur->cursor;
  diff = is_pointed(l) - is_pointed(r);
  if (unlikely(diff))
    return (diff > 0) ? incomparable * 2 : -incomparable * 2;
  if (unlikely(!is_pointed(l)))
    return 0;

  detent = (l->top <= r->top) ? l->top : r->top;
  for (intptr_t i = 0; i <= detent; ++i) {
    diff = l->ki[i] - r->ki[i];
    if (diff)
      return diff;
  }
  if (unlikely(l->top != r->top))
    return (l->top > r->top) ? incomparable : -incomparable;

  return (l->flags & z_eof_hard) - (r->flags & z_eof_hard);
}

int mdbx_cursor_count_ex(const MDBX_cursor *mc, size_t *count, MDBX_stat *ns, size_t bytes) {
  int rc = cursor_check_ro(mc);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (ns) {
    const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid);
    if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid)
      return LOG_IFERR(MDBX_EINVAL);
    memset(ns, 0, sizeof(*ns));
  }

  size_t nvals = 0;
  if (is_filled(mc)) {
    nvals = 1;
    if (!inner_hollow(mc)) {
      const page_t *mp = mc->pg[mc->top];
      const node_t *node = page_node(mp, mc->ki[mc->top]);
      cASSERT(mc, node_flags(node) & N_DUP);
      const tree_t *nt = &mc->subcur->nested_tree;
      nvals = unlikely(nt->items > PTRDIFF_MAX) ? PTRDIFF_MAX : (size_t)nt->items;
      if (ns) {
        ns->ms_psize = (unsigned)node_ds(node);
        if (node_flags(node) & N_TREE) {
          ns->ms_psize = mc->txn->env->ps;
          ns->ms_depth = nt->height;
          ns->ms_branch_pages = nt->branch_pages;
        }
        cASSERT(mc, nt->large_pages == 0);
        ns->ms_leaf_pages = nt->leaf_pages;
        ns->ms_entries = nt->items;
        if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(ns->ms_mod_txnid)))
          ns->ms_mod_txnid = nt->mod_txnid;
      }
    }
  }

  if (likely(count))
    *count = nvals;

  return MDBX_SUCCESS;
}

int mdbx_cursor_count(const MDBX_cursor *mc, size_t *count) {
  if (unlikely(count == nullptr))
    return LOG_IFERR(MDBX_EINVAL);

  return mdbx_cursor_count_ex(mc, count, nullptr, 0);
}

int mdbx_cursor_on_first(const MDBX_cursor *mc) {
  int rc = cursor_check_pure(mc);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  for (intptr_t i = 0; i <= mc->top; ++i) {
    if (mc->ki[i])
      return MDBX_RESULT_FALSE;
  }

  return MDBX_RESULT_TRUE;
}

int mdbx_cursor_on_first_dup(const MDBX_cursor *mc) {
  int rc = cursor_check_pure(mc);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (is_filled(mc) && mc->subcur) {
    mc = &mc->subcur->cursor;
    for (intptr_t i = 0; i <= mc->top; ++i) {
      if (mc->ki[i])
        return MDBX_RESULT_FALSE;
    }
  }

  return MDBX_RESULT_TRUE;
}

int mdbx_cursor_on_last(const MDBX_cursor *mc) {
  int rc = cursor_check_pure(mc);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  for (intptr_t i = 0; i <= mc->top; ++i) {
    size_t nkeys = page_numkeys(mc->pg[i]);
    if (mc->ki[i] < nkeys - 1)
      return MDBX_RESULT_FALSE;
  }

  return MDBX_RESULT_TRUE;
}

int mdbx_cursor_on_last_dup(const MDBX_cursor *mc) {
  int rc = cursor_check_pure(mc);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (is_filled(mc) && mc->subcur) {
    mc = &mc->subcur->cursor;
    for (intptr_t i = 0; i <= mc->top; ++i) {
      size_t nkeys = page_numkeys(mc->pg[i]);
      if (mc->ki[i] < nkeys - 1)
        return MDBX_RESULT_FALSE;
    }
  }

  return MDBX_RESULT_TRUE;
}

int mdbx_cursor_eof(const MDBX_cursor *mc) {
  int rc = cursor_check_pure(mc);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  return is_eof(mc) ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE;
}

int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) {
  int rc = cursor_check_ro(mc);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  return LOG_IFERR(cursor_ops(mc, key, data, op));
}

__hot static int scan_confinue(MDBX_cursor *mc, MDBX_predicate_func *predicate, void *context, void *arg, MDBX_val *key,
                               MDBX_val *value, MDBX_cursor_op turn_op) {
  int rc;
  switch (turn_op) {
  case MDBX_NEXT:
  case MDBX_NEXT_NODUP:
    for (;;) {
      rc = predicate(context, key, value, arg);
      if (rc != MDBX_RESULT_FALSE)
        return rc;
      rc = outer_next(mc, key, value, turn_op);
      if (unlikely(rc != MDBX_SUCCESS))
        return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc;
    }

  case MDBX_PREV:
  case MDBX_PREV_NODUP:
    for (;;) {
      rc = predicate(context, key, value, arg);
      if (rc != MDBX_RESULT_FALSE)
        return rc;
      rc = outer_prev(mc, key, value, turn_op);
      if (unlikely(rc != MDBX_SUCCESS))
        return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc;
    }

  case MDBX_NEXT_DUP:
    if (mc->subcur)
      for (;;) {
        rc = predicate(context, key, value, arg);
        if (rc != MDBX_RESULT_FALSE)
          return rc;
        rc = inner_next(&mc->subcur->cursor, value);
        if (unlikely(rc != MDBX_SUCCESS))
          return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc;
      }
    return MDBX_NOTFOUND;

  case MDBX_PREV_DUP:
    if (mc->subcur)
      for (;;) {
        rc = predicate(context, key, value, arg);
        if (rc != MDBX_RESULT_FALSE)
          return rc;
        rc = inner_prev(&mc->subcur->cursor, value);
        if (unlikely(rc != MDBX_SUCCESS))
          return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc;
      }
    return MDBX_NOTFOUND;

  default:
    for (;;) {
      rc = predicate(context, key, value, arg);
      if (rc != MDBX_RESULT_FALSE)
        return rc;
      rc = cursor_ops(mc, key, value, turn_op);
      if (unlikely(rc != MDBX_SUCCESS))
        return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc;
    }
  }
}

int mdbx_cursor_scan(MDBX_cursor *mc, MDBX_predicate_func *predicate, void *context, MDBX_cursor_op start_op,
                     MDBX_cursor_op turn_op, void *arg) {
  if (unlikely(!predicate))
    return LOG_IFERR(MDBX_EINVAL);

  const unsigned valid_start_mask = 1 << MDBX_FIRST | 1 << MDBX_FIRST_DUP | 1 << MDBX_LAST | 1 << MDBX_LAST_DUP |
                                    1 << MDBX_GET_CURRENT | 1 << MDBX_GET_MULTIPLE;
  if (unlikely(start_op > 30 || ((1 << start_op) & valid_start_mask) == 0))
    return LOG_IFERR(MDBX_EINVAL);

  const unsigned valid_turn_mask = 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | 1 << MDBX_PREV |
                                   1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | 1 << MDBX_NEXT_MULTIPLE |
                                   1 << MDBX_PREV_MULTIPLE;
  if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0))
    return LOG_IFERR(MDBX_EINVAL);

  MDBX_val key = {nullptr, 0}, value = {nullptr, 0};
  int rc = mdbx_cursor_get(mc, &key, &value, start_op);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);
  return LOG_IFERR(scan_confinue(mc, predicate, context, arg, &key, &value, turn_op));
}

int mdbx_cursor_scan_from(MDBX_cursor *mc, MDBX_predicate_func *predicate, void *context, MDBX_cursor_op from_op,
                          MDBX_val *key, MDBX_val *value, MDBX_cursor_op turn_op, void *arg) {
  if (unlikely(!predicate || !key))
    return LOG_IFERR(MDBX_EINVAL);

  const unsigned valid_start_mask = 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY |
                                    1 << MDBX_GET_MULTIPLE | 1 << MDBX_SET_LOWERBOUND | 1 << MDBX_SET_UPPERBOUND;
  if (unlikely(from_op < MDBX_TO_KEY_LESSER_THAN && ((1 << from_op) & valid_start_mask) == 0))
    return LOG_IFERR(MDBX_EINVAL);

  const unsigned valid_turn_mask = 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | 1 << MDBX_PREV |
                                   1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | 1 << MDBX_NEXT_MULTIPLE |
                                   1 << MDBX_PREV_MULTIPLE;
  if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = mdbx_cursor_get(mc, key, value, from_op);
  if (unlikely(MDBX_IS_ERROR(rc)))
    return LOG_IFERR(rc);

  cASSERT(mc, key != nullptr);
  MDBX_val stub;
  if (!value) {
    value = &stub;
    rc = cursor_ops(mc, key, value, MDBX_GET_CURRENT);
    if (unlikely(rc != MDBX_SUCCESS))
      return LOG_IFERR(rc);
  }
  return LOG_IFERR(scan_confinue(mc, predicate, context, arg, key, value, turn_op));
}

int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs, size_t limit, MDBX_cursor_op op) {
  if (unlikely(!count))
    return LOG_IFERR(MDBX_EINVAL);

  *count = 0;
  if (unlikely(limit < 4 || limit > INTPTR_MAX - 2))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = cursor_check_ro(mc);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(mc->subcur))
    return LOG_IFERR(MDBX_INCOMPATIBLE) /* must be a non-dupsort table */;

  switch (op) {
  case MDBX_NEXT:
    if (unlikely(is_eof(mc)))
      return LOG_IFERR(is_pointed(mc) ? MDBX_NOTFOUND : MDBX_ENODATA);
    break;

  case MDBX_FIRST:
    if (!is_filled(mc)) {
      rc = outer_first(mc, nullptr, nullptr);
      if (unlikely(rc != MDBX_SUCCESS))
        return LOG_IFERR(rc);
    }
    break;

  default:
    DEBUG("unhandled/unimplemented cursor operation %u", op);
    return LOG_IFERR(MDBX_EINVAL);
  }

  const page_t *mp = mc->pg[mc->top];
  size_t nkeys = page_numkeys(mp);
  size_t ki = mc->ki[mc->top];
  size_t n = 0;
  while (n + 2 <= limit) {
    cASSERT(mc, ki < nkeys);
    if (unlikely(ki >= nkeys))
      goto sibling;

    const node_t *leaf = page_node(mp, ki);
    pairs[n] = get_key(leaf);
    rc = node_read(mc, leaf, &pairs[n + 1], mp);
    if (unlikely(rc != MDBX_SUCCESS))
      goto bailout;

    n += 2;
    if (++ki == nkeys) {
    sibling:
      rc = cursor_sibling_right(mc);
      if (rc != MDBX_SUCCESS) {
        if (rc == MDBX_NOTFOUND)
          rc = MDBX_RESULT_TRUE;
        goto bailout;
      }

      mp = mc->pg[mc->top];
      DEBUG("next page is %" PRIaPGNO ", key index %u", mp->pgno, mc->ki[mc->top]);
      if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) {
        ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->pgno, mp->flags);
        rc = MDBX_CORRUPTED;
        goto bailout;
      }
      nkeys = page_numkeys(mp);
      ki = 0;
    }
  }
  mc->ki[mc->top] = (indx_t)ki;

bailout:
  *count = n;
  return LOG_IFERR(rc);
}

/*----------------------------------------------------------------------------*/

int mdbx_cursor_set_userctx(MDBX_cursor *mc, void *ctx) {
  int rc = cursor_check(mc, 0);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_couple_t *couple = container_of(mc, cursor_couple_t, outer);
  couple->userctx = ctx;
  return MDBX_SUCCESS;
}

void *mdbx_cursor_get_userctx(const MDBX_cursor *mc) {
  if (unlikely(!mc))
    return nullptr;

  if (unlikely(mc->signature != cur_signature_ready4dispose && mc->signature != cur_signature_live))
    return nullptr;

  cursor_couple_t *couple = container_of(mc, cursor_couple_t, outer);
  return couple->userctx;
}

MDBX_txn *mdbx_cursor_txn(const MDBX_cursor *mc) {
  if (unlikely(!mc || mc->signature != cur_signature_live))
    return nullptr;
  MDBX_txn *txn = mc->txn;
  if (unlikely(!txn || txn->signature != txn_signature || (txn->flags & MDBX_TXN_FINISHED)))
    return nullptr;
  return (txn->flags & MDBX_TXN_HAS_CHILD) ? txn->env->txn : txn;
}

MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *mc) {
  if (unlikely(!mc || mc->signature != cur_signature_live))
    return UINT_MAX;
  return cursor_dbi(mc);
}

/*----------------------------------------------------------------------------*/

int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, MDBX_put_flags_t flags) {
  if (unlikely(key == nullptr || data == nullptr))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = cursor_check_rw(mc);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(flags & MDBX_MULTIPLE)) {
    rc = cursor_check_multiple(mc, key, data, flags);
    if (unlikely(rc != MDBX_SUCCESS))
      return LOG_IFERR(rc);
  }

  if (flags & MDBX_RESERVE) {
    if (unlikely(mc->tree->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_INTEGERDUP | MDBX_DUPFIXED)))
      return LOG_IFERR(MDBX_INCOMPATIBLE);
    data->iov_base = nullptr;
  }

  return LOG_IFERR(cursor_put_checklen(mc, key, data, flags));
}

int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) {
  int rc = cursor_check_rw(mc);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  return LOG_IFERR(cursor_del(mc, flags));
}

__cold int mdbx_cursor_ignord(MDBX_cursor *mc) {
  int rc = cursor_check(mc, 0);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  mc->checking |= z_ignord;
  if (mc->subcur)
    mc->subcur->cursor.checking |= z_ignord;

  return MDBX_SUCCESS;
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) {
  return LOG_IFERR(dbi_open(txn, name, flags, dbi, nullptr, nullptr));
}

int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp,
                      MDBX_cmp_func *datacmp) {
  return LOG_IFERR(dbi_open(txn, name, flags, dbi, keycmp, datacmp));
}

static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, MDBX_db_flags_t flags, MDBX_dbi *dbi,
                         MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) {
  MDBX_val thunk, *name;
  if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || name_cstr == MDBX_CHK_META)
    name = (void *)name_cstr;
  else {
    thunk.iov_len = strlen(name_cstr);
    thunk.iov_base = (void *)name_cstr;
    name = &thunk;
  }
  return dbi_open(txn, name, flags, dbi, keycmp, datacmp);
}

int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) {
  return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, nullptr, nullptr));
}

int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp,
                     MDBX_cmp_func *datacmp) {
  return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, keycmp, datacmp));
}

__cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) {
  int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_couple_t cx;
  rc = cursor_init(&cx.outer, txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (txn->dbs[dbi].height) {
    cx.outer.next = txn->cursors[dbi];
    txn->cursors[dbi] = &cx.outer;
    rc = tree_drop(&cx.outer, dbi == MAIN_DBI || (cx.outer.tree->flags & MDBX_DUPSORT));
    txn->cursors[dbi] = cx.outer.next;
    if (unlikely(rc != MDBX_SUCCESS))
      return LOG_IFERR(rc);
  }

  /* Invalidate the dropped DB's cursors */
  for (MDBX_cursor *mc = txn->cursors[dbi]; mc; mc = mc->next)
    be_poor(mc);

  if (!del || dbi < CORE_DBS) {
    /* reset the DB record, mark it dirty */
    txn->dbi_state[dbi] |= DBI_DIRTY;
    txn->dbs[dbi].height = 0;
    txn->dbs[dbi].branch_pages = 0;
    txn->dbs[dbi].leaf_pages = 0;
    txn->dbs[dbi].large_pages = 0;
    txn->dbs[dbi].items = 0;
    txn->dbs[dbi].root = P_INVALID;
    txn->dbs[dbi].sequence = 0;
    /* txn->dbs[dbi].mod_txnid = txn->txnid; */
    txn->flags |= MDBX_TXN_DIRTY;
    return MDBX_SUCCESS;
  }

  MDBX_env *const env = txn->env;
  MDBX_val name = env->kvs[dbi].name;
  rc = cursor_init(&cx.outer, txn, MAIN_DBI);
  if (likely(rc == MDBX_SUCCESS)) {
    rc = cursor_seek(&cx.outer, &name, nullptr, MDBX_SET).err;
    if (likely(rc == MDBX_SUCCESS)) {
      cx.outer.next = txn->cursors[MAIN_DBI];
      txn->cursors[MAIN_DBI] = &cx.outer;
      rc = cursor_del(&cx.outer, N_TREE);
      txn->cursors[MAIN_DBI] = cx.outer.next;
      if (likely(rc == MDBX_SUCCESS)) {
        tASSERT(txn, txn->dbi_state[MAIN_DBI] & DBI_DIRTY);
        tASSERT(txn, txn->flags & MDBX_TXN_DIRTY);
        txn->dbi_state[dbi] = DBI_LINDO | DBI_OLDEN;
        rc = osal_fastmutex_acquire(&env->dbi_lock);
        if (likely(rc == MDBX_SUCCESS))
          return LOG_IFERR(dbi_close_release(env, dbi));
      }
    }
  }

  txn->flags |= MDBX_TXN_ERROR;
  return LOG_IFERR(rc);
}

__cold int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name_cstr) {
  MDBX_val thunk, *name;
  if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || name_cstr == MDBX_CHK_META)
    name = (void *)name_cstr;
  else {
    thunk.iov_len = strlen(name_cstr);
    thunk.iov_base = (void *)name_cstr;
    name = &thunk;
  }
  return mdbx_dbi_rename2(txn, dbi, name);
}

__cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *new_name) {
  int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(new_name == MDBX_CHK_MAIN || new_name->iov_base == MDBX_CHK_MAIN || new_name == MDBX_CHK_GC ||
               new_name->iov_base == MDBX_CHK_GC || new_name == MDBX_CHK_META || new_name->iov_base == MDBX_CHK_META))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(dbi < CORE_DBS))
    return LOG_IFERR(MDBX_EINVAL);
  rc = dbi_check(txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  rc = osal_fastmutex_acquire(&txn->env->dbi_lock);
  if (likely(rc == MDBX_SUCCESS)) {
    struct dbi_rename_result pair = dbi_rename_locked(txn, dbi, *new_name);
    if (pair.defer)
      pair.defer->next = nullptr;
    dbi_defer_release(txn->env, pair.defer);
    rc = pair.err;
  }
  return LOG_IFERR(rc);
}

int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) {
  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(dbi < CORE_DBS))
    return (dbi == MAIN_DBI) ? MDBX_SUCCESS : LOG_IFERR(MDBX_BAD_DBI);

  if (unlikely(dbi >= env->max_dbi))
    return LOG_IFERR(MDBX_BAD_DBI);

  rc = osal_fastmutex_acquire(&env->dbi_lock);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(dbi >= env->n_dbi)) {
    rc = MDBX_BAD_DBI;
  bailout:
    osal_fastmutex_release(&env->dbi_lock);
    return LOG_IFERR(rc);
  }

  while (env->basal_txn && (env->dbs_flags[dbi] & DB_VALID) && (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0) {
    /* LY: Опасный код, так как env->txn может быть изменено в другом потоке.
     * К сожалению тут нет надежного решения и может быть падение при неверном
     * использовании API (вызове mdbx_dbi_close конкурентно с завершением
     * пишущей транзакции).
     *
     * Для минимизации вероятности падения сначала проверяем dbi-флаги
     * в basal_txn, а уже после в env->txn. Таким образом, падение может быть
     * только при коллизии с завершением вложенной транзакции.
     *
     * Альтернативно можно попробовать выполнять обновление/put записи в
     * mainDb соответствующей таблице закрываемого хендла. Семантически это
     * верный путь, но проблема в текущем API, в котором исторически dbi-хендл
     * живет и закрывается вне транзакции. Причем проблема не только в том,
     * что нет указателя на текущую пишущую транзакцию, а в том что
     * пользователь точно не ожидает что закрытие хендла приведет к
     * скрытой/непрозрачной активности внутри транзакции потенциально
     * выполняемой в другом потоке. Другими словами, проблема может быть
     * только при неверном использовании API и если пользователь это
     * допускает, то точно не будет ожидать скрытых действий внутри
     * транзакции, и поэтому этот путь потенциально более опасен. */
    const MDBX_txn *const hazard = env->txn;
    osal_compiler_barrier();
    if ((dbi_state(env->basal_txn, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) {
      rc = MDBX_DANGLING_DBI;
      goto bailout;
    }
    osal_memory_barrier();
    if (unlikely(hazard != env->txn))
      continue;
    if (hazard != env->basal_txn && hazard && (hazard->flags & MDBX_TXN_FINISHED) == 0 &&
        hazard->signature == txn_signature &&
        (dbi_state(hazard, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) {
      rc = MDBX_DANGLING_DBI;
      goto bailout;
    }
    osal_compiler_barrier();
    if (likely(hazard == env->txn))
      break;
  }
  rc = dbi_close_release(env, dbi);
  return LOG_IFERR(rc);
}

int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state) {
  int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR - MDBX_TXN_PARKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  rc = dbi_check(txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(!flags || !state))
    return LOG_IFERR(MDBX_EINVAL);

  *flags = txn->dbs[dbi].flags & DB_PERSISTENT_FLAGS;
  *state = txn->dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE);
  return MDBX_SUCCESS;
}

static void stat_get(const tree_t *db, MDBX_stat *st, size_t bytes) {
  st->ms_depth = db->height;
  st->ms_branch_pages = db->branch_pages;
  st->ms_leaf_pages = db->leaf_pages;
  st->ms_overflow_pages = db->large_pages;
  st->ms_entries = db->items;
  if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid)))
    st->ms_mod_txnid = db->mod_txnid;
}

__cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) {
  int rc = check_txn(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  rc = dbi_check(txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(txn->flags & MDBX_TXN_BLOCKED))
    return LOG_IFERR(MDBX_BAD_TXN);

  if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) {
    rc = tbl_fetch((MDBX_txn *)txn, dbi);
    if (unlikely(rc != MDBX_SUCCESS))
      return LOG_IFERR(rc);
  }

  if (unlikely(!dest))
    return LOG_IFERR(MDBX_EINVAL);

  const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid);
  if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid)
    return LOG_IFERR(MDBX_EINVAL);

  dest->ms_psize = txn->env->ps;
  stat_get(&txn->dbs[dbi], dest, bytes);
  return MDBX_SUCCESS;
}

__cold int mdbx_enumerate_tables(const MDBX_txn *txn, MDBX_table_enum_func *func, void *ctx) {
  if (unlikely(!func))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = check_txn(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_couple_t cx;
  rc = cursor_init(&cx.outer, txn, MAIN_DBI);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cx.outer.next = txn->cursors[MAIN_DBI];
  txn->cursors[MAIN_DBI] = &cx.outer;
  for (rc = outer_first(&cx.outer, nullptr, nullptr); rc == MDBX_SUCCESS;
       rc = outer_next(&cx.outer, nullptr, nullptr, MDBX_NEXT_NODUP)) {
    node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]);
    if (node_flags(node) != N_TREE)
      continue;
    if (unlikely(node_ds(node) != sizeof(tree_t))) {
      ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid dupsort sub-tree node size",
            (unsigned)node_ds(node));
      rc = MDBX_CORRUPTED;
      break;
    }

    tree_t reside;
    const tree_t *tree = memcpy(&reside, node_data(node), sizeof(reside));
    const MDBX_val name = {node_key(node), node_ks(node)};
    const MDBX_env *const env = txn->env;
    MDBX_dbi dbi = 0;
    for (size_t i = CORE_DBS; i < env->n_dbi; ++i) {
      if (i >= txn->n_dbi || !(env->dbs_flags[i] & DB_VALID))
        continue;
      if (env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[i].name))
        continue;

      tree = dbi_dig(txn, i, &reside);
      dbi = (MDBX_dbi)i;
      break;
    }

    MDBX_stat stat;
    stat_get(tree, &stat, sizeof(stat));
    rc = func(ctx, txn, &name, tree->flags, &stat, dbi);
    if (rc != MDBX_SUCCESS)
      goto bailout;
  }
  rc = (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc;

bailout:
  txn->cursors[MAIN_DBI] = cx.outer.next;
  return LOG_IFERR(rc);
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

__cold static intptr_t reasonable_db_maxsize(void) {
  static intptr_t cached_result;
  if (cached_result == 0) {
    intptr_t pagesize, total_ram_pages;
    if (unlikely(mdbx_get_sysraminfo(&pagesize, &total_ram_pages, nullptr) != MDBX_SUCCESS))
      /* the 32-bit limit is good enough for fallback */
      return cached_result = MAX_MAPSIZE32;

#if defined(__SANITIZE_ADDRESS__)
    total_ram_pages >>= 4;
#endif /* __SANITIZE_ADDRESS__ */
    if (RUNNING_ON_VALGRIND)
      total_ram_pages >>= 4;

    if (unlikely((size_t)total_ram_pages * 2 > MAX_MAPSIZE / (size_t)pagesize))
      return cached_result = MAX_MAPSIZE;
    assert(MAX_MAPSIZE >= (size_t)(total_ram_pages * pagesize * 2));

    /* Suggesting should not be more than golden ratio of the size of RAM. */
    cached_result = (intptr_t)((size_t)total_ram_pages * 207 >> 7) * pagesize;

    /* Round to the nearest human-readable granulation. */
    for (size_t unit = MEGABYTE; unit; unit <<= 5) {
      const size_t floor = floor_powerof2(cached_result, unit);
      const size_t ceil = ceil_powerof2(cached_result, unit);
      const size_t threshold = (size_t)cached_result >> 4;
      const bool down = cached_result - floor < ceil - cached_result || ceil > MAX_MAPSIZE;
      if (threshold < (down ? cached_result - floor : ceil - cached_result))
        break;
      cached_result = down ? floor : ceil;
    }
  }
  return cached_result;
}

__cold static int check_alternative_lck_absent(const pathchar_t *lck_pathname) {
  int err = osal_fileexists(lck_pathname);
  if (unlikely(err != MDBX_RESULT_FALSE)) {
    if (err == MDBX_RESULT_TRUE)
      err = MDBX_DUPLICATED_CLK;
    ERROR("Alternative/Duplicate LCK-file '%" MDBX_PRIsPATH "' error %d", lck_pathname, err);
  }
  return err;
}

__cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, const mdbx_mode_t mode) {
  memset(&env->pathname, 0, sizeof(env->pathname));
  if (unlikely(!pathname || !*pathname))
    return MDBX_EINVAL;

  int rc;
#if defined(_WIN32) || defined(_WIN64)
  const DWORD dwAttrib = GetFileAttributesW(pathname);
  if (dwAttrib == INVALID_FILE_ATTRIBUTES) {
    rc = GetLastError();
    if (rc != MDBX_ENOFILE)
      return rc;
    if (mode == 0 || (env->flags & MDBX_RDONLY) != 0)
      /* can't open existing */
      return rc;

    /* auto-create directory if requested */
    if ((env->flags & MDBX_NOSUBDIR) == 0 && !CreateDirectoryW(pathname, nullptr)) {
      rc = GetLastError();
      if (rc != ERROR_ALREADY_EXISTS)
        return rc;
    }
  } else {
    /* ignore passed MDBX_NOSUBDIR flag and set it automatically */
    env->flags |= MDBX_NOSUBDIR;
    if (dwAttrib & FILE_ATTRIBUTE_DIRECTORY)
      env->flags -= MDBX_NOSUBDIR;
  }
#else
  struct stat st;
  if (stat(pathname, &st) != 0) {
    rc = errno;
    if (rc != MDBX_ENOFILE)
      return rc;
    if (mode == 0 || (env->flags & MDBX_RDONLY) != 0)
      /* can't open non-existing */
      return rc /* MDBX_ENOFILE */;

    /* auto-create directory if requested */
    const mdbx_mode_t dir_mode =
        (/* inherit read/write permissions for group and others */ mode & (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) |
        /* always add read/write/search for owner */ S_IRWXU |
        ((mode & S_IRGRP) ? /* +search if readable by group */ S_IXGRP : 0) |
        ((mode & S_IROTH) ? /* +search if readable by others */ S_IXOTH : 0);
    if ((env->flags & MDBX_NOSUBDIR) == 0 && mkdir(pathname, dir_mode)) {
      rc = errno;
      if (rc != EEXIST)
        return rc;
    }
  } else {
    /* ignore passed MDBX_NOSUBDIR flag and set it automatically */
    env->flags |= MDBX_NOSUBDIR;
    if (S_ISDIR(st.st_mode))
      env->flags -= MDBX_NOSUBDIR;
  }
#endif

  static const pathchar_t dxb_name[] = MDBX_DATANAME;
  static const pathchar_t lck_name[] = MDBX_LOCKNAME;
  static const pathchar_t lock_suffix[] = MDBX_LOCK_SUFFIX;

#if defined(_WIN32) || defined(_WIN64)
  assert(dxb_name[0] == '\\' && lck_name[0] == '\\');
  const size_t pathname_len = wcslen(pathname);
#else
  assert(dxb_name[0] == '/' && lck_name[0] == '/');
  const size_t pathname_len = strlen(pathname);
#endif
  assert(!osal_isdirsep(lock_suffix[0]));
  size_t base_len = pathname_len;
  static const size_t dxb_name_len = ARRAY_LENGTH(dxb_name) - 1;
  if (env->flags & MDBX_NOSUBDIR) {
    if (base_len > dxb_name_len && osal_pathequal(pathname + base_len - dxb_name_len, dxb_name, dxb_name_len)) {
      env->flags -= MDBX_NOSUBDIR;
      base_len -= dxb_name_len;
    } else if (base_len == dxb_name_len - 1 && osal_isdirsep(dxb_name[0]) && osal_isdirsep(lck_name[0]) &&
               osal_pathequal(pathname + base_len - dxb_name_len + 1, dxb_name + 1, dxb_name_len - 1)) {
      env->flags -= MDBX_NOSUBDIR;
      base_len -= dxb_name_len - 1;
    }
  }

  const size_t suflen_with_NOSUBDIR = sizeof(lock_suffix) + sizeof(pathchar_t);
  const size_t suflen_without_NOSUBDIR = sizeof(lck_name) + sizeof(dxb_name);
  const size_t enough4any =
      (suflen_with_NOSUBDIR > suflen_without_NOSUBDIR) ? suflen_with_NOSUBDIR : suflen_without_NOSUBDIR;
  const size_t bytes_needed = sizeof(pathchar_t) * (base_len * 2 + pathname_len + 1) + enough4any;
  env->pathname.buffer = osal_malloc(bytes_needed);
  if (!env->pathname.buffer)
    return MDBX_ENOMEM;

  env->pathname.specified = env->pathname.buffer;
  env->pathname.dxb = env->pathname.specified + pathname_len + 1;
  env->pathname.lck = env->pathname.dxb + base_len + dxb_name_len + 1;
  rc = MDBX_SUCCESS;
  pathchar_t *const buf = env->pathname.buffer;
  if (base_len) {
    memcpy(buf, pathname, sizeof(pathchar_t) * pathname_len);
    if (env->flags & MDBX_NOSUBDIR) {
      const pathchar_t *const lck_ext = osal_fileext(lck_name, ARRAY_LENGTH(lck_name));
      if (lck_ext) {
        pathchar_t *pathname_ext = osal_fileext(buf, pathname_len);
        memcpy(pathname_ext ? pathname_ext : buf + pathname_len, lck_ext,
               sizeof(pathchar_t) * (ARRAY_END(lck_name) - lck_ext));
        rc = check_alternative_lck_absent(buf);
      }
    } else {
      memcpy(buf + base_len, dxb_name, sizeof(dxb_name));
      memcpy(buf + base_len + dxb_name_len, lock_suffix, sizeof(lock_suffix));
      rc = check_alternative_lck_absent(buf);
    }

    memcpy(env->pathname.dxb, pathname, sizeof(pathchar_t) * (base_len + 1));
    memcpy(env->pathname.lck, pathname, sizeof(pathchar_t) * base_len);
    if (env->flags & MDBX_NOSUBDIR) {
      memcpy(env->pathname.lck + base_len, lock_suffix, sizeof(lock_suffix));
    } else {
      memcpy(env->pathname.dxb + base_len, dxb_name, sizeof(dxb_name));
      memcpy(env->pathname.lck + base_len, lck_name, sizeof(lck_name));
    }
  } else {
    assert(!(env->flags & MDBX_NOSUBDIR));
    memcpy(buf, dxb_name + 1, sizeof(dxb_name) - sizeof(pathchar_t));
    memcpy(buf + dxb_name_len - 1, lock_suffix, sizeof(lock_suffix));
    rc = check_alternative_lck_absent(buf);

    memcpy(env->pathname.dxb, dxb_name + 1, sizeof(dxb_name) - sizeof(pathchar_t));
    memcpy(env->pathname.lck, lck_name + 1, sizeof(lck_name) - sizeof(pathchar_t));
  }

  memcpy(env->pathname.specified, pathname, sizeof(pathchar_t) * (pathname_len + 1));
  return rc;
}

/*----------------------------------------------------------------------------*/

__cold int mdbx_env_create(MDBX_env **penv) {
  if (unlikely(!penv))
    return LOG_IFERR(MDBX_EINVAL);
  *penv = nullptr;

#ifdef MDBX_HAVE_C11ATOMICS
  if (unlikely(!atomic_is_lock_free((const volatile uint32_t *)penv))) {
    ERROR("lock-free atomic ops for %u-bit types is required", 32);
    return LOG_IFERR(MDBX_INCOMPATIBLE);
  }
#if MDBX_64BIT_ATOMIC
  if (unlikely(!atomic_is_lock_free((const volatile uint64_t *)penv))) {
    ERROR("lock-free atomic ops for %u-bit types is required", 64);
    return LOG_IFERR(MDBX_INCOMPATIBLE);
  }
#endif /* MDBX_64BIT_ATOMIC */
#endif /* MDBX_HAVE_C11ATOMICS */

  if (unlikely(!is_powerof2(globals.sys_pagesize) || globals.sys_pagesize < MDBX_MIN_PAGESIZE)) {
    ERROR("unsuitable system pagesize %u", globals.sys_pagesize);
    return LOG_IFERR(MDBX_INCOMPATIBLE);
  }

#if defined(__linux__) || defined(__gnu_linux__)
  if (unlikely(globals.linux_kernel_version < 0x03100000)) {
    /* 2025-08-05: Ядро 3.16 выпущено 11 лет назад и было самым долго поддерживаемым из 3.x до июля 2020.
     * Три года назад (в 2022) здесь была заблокирована работа на ядрах меньше 4.x, как устаревших и для которых
     * крайне затруднительно обеспечить какое-либо тестирование. Теперь же я решил изменить решение и разрешить
     * работу на старых ядрах начиная с 3.16, логика тут такая:
     *  - поведение старых ядер уже точно не будет меняться,
     *    а в текущем коде libmdbx есть всё необходимое для работы начиная с 3.16;
     *  - есть широко-используемые проекты (Isar), которым требуется поддержка старых ядер;
     *  - сейчас тестирование для 4.x также затруднено, как и для 3.16, уже не приносит какого-либо облегчения
     *    с тестированием и мне приходится полагаться на гарантии совместимости API ядра и glibc/musl;
     *  - использование возможностей из новых ядер всё равно требует проверок/ветвлений;
     *  = поэтому сейчас нет причин отказываться от работы на 3.16 поддерживая ядра 4.0 */
    ERROR("too old linux kernel %u.%u.%u.%u, the >= 3.16 is required", globals.linux_kernel_version >> 24,
          (globals.linux_kernel_version >> 16) & 255, (globals.linux_kernel_version >> 8) & 255,
          globals.linux_kernel_version & 255);
    return LOG_IFERR(MDBX_INCOMPATIBLE);
  }
#endif /* Linux */

  MDBX_env *env = osal_calloc(1, sizeof(MDBX_env));
  if (unlikely(!env))
    return LOG_IFERR(MDBX_ENOMEM);

  env->max_readers = DEFAULT_READERS;
  env->max_dbi = env->n_dbi = CORE_DBS;
  env->lazy_fd = env->dsync_fd = env->fd4meta = env->lck_mmap.fd = INVALID_HANDLE_VALUE;
  env->stuck_meta = -1;

  env_options_init(env);
  env_setup_pagesize(env, (globals.sys_pagesize < MDBX_MAX_PAGESIZE) ? globals.sys_pagesize : MDBX_MAX_PAGESIZE);

  int rc = osal_fastmutex_init(&env->dbi_lock);
  if (unlikely(rc != MDBX_SUCCESS))
    goto bailout;

#if defined(_WIN32) || defined(_WIN64)
  imports.srwl_Init(&env->remap_guard);
  InitializeCriticalSection(&env->windowsbug_lock);
#else
  rc = osal_fastmutex_init(&env->remap_guard);
  if (unlikely(rc != MDBX_SUCCESS)) {
    osal_fastmutex_destroy(&env->dbi_lock);
    goto bailout;
  }

#if MDBX_LOCKING > MDBX_LOCKING_SYSV
  lck_t *const stub = lckless_stub(env);
  rc = lck_ipclock_stubinit(&stub->wrt_lock);
#endif /* MDBX_LOCKING */
  if (unlikely(rc != MDBX_SUCCESS)) {
    osal_fastmutex_destroy(&env->remap_guard);
    osal_fastmutex_destroy(&env->dbi_lock);
    goto bailout;
  }
#endif /* Windows */

  VALGRIND_CREATE_MEMPOOL(env, 0, 0);
  env->signature.weak = env_signature;
  *penv = env;
  return MDBX_SUCCESS;

bailout:
  osal_free(env);
  return LOG_IFERR(rc);
}

__cold int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target) {
  if (unlikely(target >= NUM_METAS))
    return LOG_IFERR(MDBX_EINVAL);
  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely((env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_EXCLUSIVE))
    return LOG_IFERR(MDBX_EPERM);

  const meta_t *const target_meta = METAPAGE(env, target);
  txnid_t new_txnid = constmeta_txnid(target_meta);
  if (new_txnid < MIN_TXNID)
    new_txnid = MIN_TXNID;
  for (unsigned n = 0; n < NUM_METAS; ++n) {
    if (n == target)
      continue;
    page_t *const page = pgno2page(env, n);
    meta_t meta = *page_meta(page);
    if (meta_validate(env, &meta, page, n, nullptr) != MDBX_SUCCESS) {
      int err = meta_override(env, n, 0, nullptr);
      if (unlikely(err != MDBX_SUCCESS))
        return LOG_IFERR(err);
    } else {
      txnid_t txnid = constmeta_txnid(&meta);
      if (new_txnid <= txnid)
        new_txnid = safe64_txnid_next(txnid);
    }
  }

  if (unlikely(new_txnid > MAX_TXNID)) {
    ERROR("txnid overflow, raise %d", MDBX_TXN_FULL);
    return LOG_IFERR(MDBX_TXN_FULL);
  }
  return LOG_IFERR(meta_override(env, target, new_txnid, target_meta));
}

__cold int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname, unsigned target_meta, bool writeable) {
#if defined(_WIN32) || defined(_WIN64)
  wchar_t *pathnameW = nullptr;
  int rc = osal_mb2w(pathname, &pathnameW);
  if (likely(rc == MDBX_SUCCESS)) {
    rc = mdbx_env_open_for_recoveryW(env, pathnameW, target_meta, writeable);
    osal_free(pathnameW);
  }
  return LOG_IFERR(rc);
}

__cold int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, unsigned target_meta, bool writeable) {
#endif /* Windows */

  if (unlikely(target_meta >= NUM_METAS))
    return LOG_IFERR(MDBX_EINVAL);
  int rc = check_env(env, false);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);
  if (unlikely(env->dxb_mmap.base))
    return LOG_IFERR(MDBX_EPERM);

  env->stuck_meta = (int8_t)target_meta;
  return
#if defined(_WIN32) || defined(_WIN64)
      mdbx_env_openW
#else
      mdbx_env_open
#endif /* Windows */
      (env, pathname, writeable ? MDBX_EXCLUSIVE : MDBX_EXCLUSIVE | MDBX_RDONLY, 0);
}

__cold int mdbx_env_delete(const char *pathname, MDBX_env_delete_mode_t mode) {
#if defined(_WIN32) || defined(_WIN64)
  wchar_t *pathnameW = nullptr;
  int rc = osal_mb2w(pathname, &pathnameW);
  if (likely(rc == MDBX_SUCCESS)) {
    rc = mdbx_env_deleteW(pathnameW, mode);
    osal_free(pathnameW);
  }
  return LOG_IFERR(rc);
}

__cold int mdbx_env_deleteW(const wchar_t *pathname, MDBX_env_delete_mode_t mode) {
#endif /* Windows */

  switch (mode) {
  default:
    return LOG_IFERR(MDBX_EINVAL);
  case MDBX_ENV_JUST_DELETE:
  case MDBX_ENV_ENSURE_UNUSED:
  case MDBX_ENV_WAIT_FOR_UNUSED:
    break;
  }

#ifdef __e2k__ /* https://bugs.mcst.ru/bugzilla/show_bug.cgi?id=6011 */
  MDBX_env *const dummy_env = alloca(sizeof(MDBX_env));
#else
  MDBX_env dummy_env_silo, *const dummy_env = &dummy_env_silo;
#endif
  memset(dummy_env, 0, sizeof(*dummy_env));
  dummy_env->flags = (mode == MDBX_ENV_ENSURE_UNUSED) ? MDBX_EXCLUSIVE : MDBX_ENV_DEFAULTS;
  dummy_env->ps = (unsigned)mdbx_default_pagesize();

  STATIC_ASSERT(sizeof(dummy_env->flags) == sizeof(MDBX_env_flags_t));
  int rc = MDBX_RESULT_TRUE, err = env_handle_pathname(dummy_env, pathname, 0);
  if (likely(err == MDBX_SUCCESS)) {
    mdbx_filehandle_t clk_handle = INVALID_HANDLE_VALUE, dxb_handle = INVALID_HANDLE_VALUE;
    if (mode > MDBX_ENV_JUST_DELETE) {
      err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, dummy_env->pathname.dxb, &dxb_handle, 0);
      err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err;
      if (err == MDBX_SUCCESS) {
        err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, dummy_env->pathname.lck, &clk_handle, 0);
        err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err;
      }
      if (err == MDBX_SUCCESS && clk_handle != INVALID_HANDLE_VALUE)
        err = osal_lockfile(clk_handle, mode == MDBX_ENV_WAIT_FOR_UNUSED);
      if (err == MDBX_SUCCESS && dxb_handle != INVALID_HANDLE_VALUE)
        err = osal_lockfile(dxb_handle, mode == MDBX_ENV_WAIT_FOR_UNUSED);
    }

    if (err == MDBX_SUCCESS) {
      err = osal_removefile(dummy_env->pathname.dxb);
      if (err == MDBX_SUCCESS)
        rc = MDBX_SUCCESS;
      else if (err == MDBX_ENOFILE)
        err = MDBX_SUCCESS;
    }

    if (err == MDBX_SUCCESS) {
      err = osal_removefile(dummy_env->pathname.lck);
      if (err == MDBX_SUCCESS)
        rc = MDBX_SUCCESS;
      else if (err == MDBX_ENOFILE)
        err = MDBX_SUCCESS;
    }

    if (err == MDBX_SUCCESS && !(dummy_env->flags & MDBX_NOSUBDIR) &&
        (/* pathname != "." */ pathname[0] != '.' || pathname[1] != 0) &&
        (/* pathname != ".." */ pathname[0] != '.' || pathname[1] != '.' || pathname[2] != 0)) {
      err = osal_removedirectory(pathname);
      if (err == MDBX_SUCCESS)
        rc = MDBX_SUCCESS;
      else if (err == MDBX_ENOFILE)
        err = MDBX_SUCCESS;
    }

    if (dxb_handle != INVALID_HANDLE_VALUE)
      osal_closefile(dxb_handle);
    if (clk_handle != INVALID_HANDLE_VALUE)
      osal_closefile(clk_handle);
  } else if (err == MDBX_ENOFILE)
    err = MDBX_SUCCESS;

  osal_free(dummy_env->pathname.buffer);
  return LOG_IFERR((err == MDBX_SUCCESS) ? rc : err);
}

__cold int mdbx_env_open(MDBX_env *env, const char *pathname, MDBX_env_flags_t flags, mdbx_mode_t mode) {
#if defined(_WIN32) || defined(_WIN64)
  wchar_t *pathnameW = nullptr;
  int rc = osal_mb2w(pathname, &pathnameW);
  if (likely(rc == MDBX_SUCCESS)) {
    rc = mdbx_env_openW(env, pathnameW, flags, mode);
    osal_free(pathnameW);
    if (rc == MDBX_SUCCESS)
      /* force to make cache of the multi-byte pathname representation */
      mdbx_env_get_path(env, &pathname);
  }
  return LOG_IFERR(rc);
}

__cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, MDBX_env_flags_t flags, mdbx_mode_t mode) {
#endif /* Windows */

  int rc = check_env(env, false);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(flags & ~ENV_USABLE_FLAGS))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(env->lazy_fd != INVALID_HANDLE_VALUE || (env->flags & ENV_ACTIVE) != 0 || env->dxb_mmap.base))
    return LOG_IFERR(MDBX_EPERM);

  /* Pickup previously mdbx_env_set_flags(),
   * but avoid MDBX_UTTERLY_NOSYNC by disjunction */
  const uint32_t saved_me_flags = env->flags;
  flags = combine_durability_flags(flags | DEPRECATED_COALESCE, env->flags);

  if (flags & MDBX_RDONLY) {
    /* Silently ignore irrelevant flags when we're only getting read access */
    flags &= ~(MDBX_WRITEMAP | DEPRECATED_MAPASYNC | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | DEPRECATED_COALESCE |
               MDBX_LIFORECLAIM | MDBX_NOMEMINIT | MDBX_ACCEDE);
    mode = 0;
  } else {
#if MDBX_MMAP_INCOHERENT_FILE_WRITE
    /* Temporary `workaround` for OpenBSD kernel's flaw.
     * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */
    if ((flags & MDBX_WRITEMAP) == 0) {
      if (flags & MDBX_ACCEDE)
        flags |= MDBX_WRITEMAP;
      else {
        debug_log(MDBX_LOG_ERROR, __func__, __LINE__,
                  "System (i.e. OpenBSD) requires MDBX_WRITEMAP because "
                  "of an internal flaw(s) in a file/buffer/page cache.\n");
        return LOG_IFERR(42 /* ENOPROTOOPT */);
      }
    }
#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
  }

  env->flags = (flags & ~ENV_FATAL_ERROR);
  rc = env_handle_pathname(env, pathname, mode);
  if (unlikely(rc != MDBX_SUCCESS))
    goto bailout;

  env->kvs = osal_calloc(env->max_dbi, sizeof(env->kvs[0]));
  env->dbs_flags = osal_calloc(env->max_dbi, sizeof(env->dbs_flags[0]));
  env->dbi_seqs = osal_calloc(env->max_dbi, sizeof(env->dbi_seqs[0]));
  if (unlikely(!(env->kvs && env->dbs_flags && env->dbi_seqs))) {
    rc = MDBX_ENOMEM;
    goto bailout;
  }

  if ((flags & MDBX_RDONLY) == 0) {
    MDBX_txn *txn = nullptr;
    const intptr_t bitmap_bytes =
#if MDBX_ENABLE_DBI_SPARSE
        ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / CHAR_BIT;
#else
        0;
#endif /* MDBX_ENABLE_DBI_SPARSE */
    const size_t base = sizeof(MDBX_txn) + sizeof(cursor_couple_t);
    const size_t size = base + bitmap_bytes +
                        env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + sizeof(txn->dbi_seqs[0]) +
                                        sizeof(txn->dbi_state[0]));

    txn = osal_calloc(1, size);
    if (unlikely(!txn)) {
      rc = MDBX_ENOMEM;
      goto bailout;
    }
    txn->dbs = ptr_disp(txn, base);
    txn->cursors = ptr_disp(txn->dbs, env->max_dbi * sizeof(txn->dbs[0]));
    txn->dbi_seqs = ptr_disp(txn->cursors, env->max_dbi * sizeof(txn->cursors[0]));
    txn->dbi_state = ptr_disp(txn, size - env->max_dbi * sizeof(txn->dbi_state[0]));
#if MDBX_ENABLE_DBI_SPARSE
    txn->dbi_sparse = ptr_disp(txn->dbi_state, -bitmap_bytes);
#endif /* MDBX_ENABLE_DBI_SPARSE */
    txn->env = env;
    txn->flags = MDBX_TXN_FINISHED;
    env->basal_txn = txn;
    txn->tw.retired_pages = pnl_alloc(MDBX_PNL_INITIAL);
    txn->tw.repnl = pnl_alloc(MDBX_PNL_INITIAL);
    if (unlikely(!txn->tw.retired_pages || !txn->tw.repnl)) {
      rc = MDBX_ENOMEM;
      goto bailout;
    }
    env_options_adjust_defaults(env);
  }

  rc = env_open(env, mode);
  if (unlikely(rc != MDBX_SUCCESS))
    goto bailout;

#if MDBX_DEBUG
  const troika_t troika = meta_tap(env);
  const meta_ptr_t head = meta_recent(env, &troika);
  const tree_t *db = &head.ptr_c->trees.main;

  DEBUG("opened database version %u, pagesize %u", (uint8_t)unaligned_peek_u64(4, head.ptr_c->magic_and_version),
        env->ps);
  DEBUG("using meta page %" PRIaPGNO ", txn %" PRIaTXN, data_page(head.ptr_c)->pgno, head.txnid);
  DEBUG("depth: %u", db->height);
  DEBUG("entries: %" PRIu64, db->items);
  DEBUG("branch pages: %" PRIaPGNO, db->branch_pages);
  DEBUG("leaf pages: %" PRIaPGNO, db->leaf_pages);
  DEBUG("large/overflow pages: %" PRIaPGNO, db->large_pages);
  DEBUG("root: %" PRIaPGNO, db->root);
  DEBUG("schema_altered: %" PRIaTXN, db->mod_txnid);
#endif /* MDBX_DEBUG */

  if (likely(rc == MDBX_SUCCESS)) {
    dxb_sanitize_tail(env, nullptr);
  } else {
  bailout:
    if (likely(env_close(env, false) == MDBX_SUCCESS)) {
      env->flags = saved_me_flags;
    } else {
      rc = MDBX_PANIC;
      env->flags = saved_me_flags | ENV_FATAL_ERROR;
    }
  }
  return LOG_IFERR(rc);
}

/*----------------------------------------------------------------------------*/

#if !(defined(_WIN32) || defined(_WIN64))
__cold int mdbx_env_resurrect_after_fork(MDBX_env *env) {
  if (unlikely(!env))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(env->signature.weak != env_signature))
    return LOG_IFERR(MDBX_EBADSIGN);

  if (unlikely(env->flags & ENV_FATAL_ERROR))
    return LOG_IFERR(MDBX_PANIC);

  if (unlikely((env->flags & ENV_ACTIVE) == 0))
    return MDBX_SUCCESS;

  const uint32_t new_pid = osal_getpid();
  if (unlikely(env->pid == new_pid))
    return MDBX_SUCCESS;

  if (!atomic_cas32(&env->signature, env_signature, ~env_signature))
    return LOG_IFERR(MDBX_EBADSIGN);

  if (env->txn)
    txn_abort(env->basal_txn);
  env->registered_reader_pid = 0;
  int rc = env_close(env, true);
  env->signature.weak = env_signature;
  if (likely(rc == MDBX_SUCCESS)) {
    rc = (env->flags & MDBX_EXCLUSIVE) ? MDBX_BUSY : env_open(env, 0);
    if (unlikely(rc != MDBX_SUCCESS && env_close(env, false) != MDBX_SUCCESS)) {
      rc = MDBX_PANIC;
      env->flags |= ENV_FATAL_ERROR;
    }
  }
  return LOG_IFERR(rc);
}
#endif /* Windows */

__cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) {
  page_t *dp;
  int rc = MDBX_SUCCESS;

  if (unlikely(!env))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(env->signature.weak != env_signature))
    return LOG_IFERR(MDBX_EBADSIGN);

#if MDBX_ENV_CHECKPID || !(defined(_WIN32) || defined(_WIN64))
  /* Check the PID even if MDBX_ENV_CHECKPID=0 on non-Windows
   * platforms (i.e. where fork() is available).
   * This is required to legitimize a call after fork()
   * from a child process, that should be allowed to free resources. */
  if (unlikely(env->pid != osal_getpid()))
    env->flags |= ENV_FATAL_ERROR;
#endif /* MDBX_ENV_CHECKPID */

  if (env->dxb_mmap.base && (env->flags & (MDBX_RDONLY | ENV_FATAL_ERROR)) == 0 && env->basal_txn) {
    if (env->basal_txn->owner && env->basal_txn->owner != osal_thread_self())
      return LOG_IFERR(MDBX_BUSY);
  } else
    dont_sync = true;

  if (!atomic_cas32(&env->signature, env_signature, 0))
    return LOG_IFERR(MDBX_EBADSIGN);

  if (!dont_sync) {
#if defined(_WIN32) || defined(_WIN64)
    /* On windows, without blocking is impossible to determine whether another
     * process is running a writing transaction or not.
     * Because in the "owner died" condition kernel don't release
     * file lock immediately. */
    rc = env_sync(env, true, false);
    rc = (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc;
#else
    struct stat st;
    if (unlikely(fstat(env->lazy_fd, &st)))
      rc = errno;
    else if (st.st_nlink > 0 /* don't sync deleted files */) {
      rc = env_sync(env, true, true);
      rc = (rc == MDBX_BUSY || rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK ||
            rc == MDBX_RESULT_TRUE)
               ? MDBX_SUCCESS
               : rc;
    }
#endif /* Windows */
  }

  if (env->basal_txn && (MDBX_TXN_CHECKOWNER ? env->basal_txn->owner == osal_thread_self() : !!env->basal_txn->owner))
    lck_txn_unlock(env);

  eASSERT(env, env->signature.weak == 0);
  rc = env_close(env, false) ? MDBX_PANIC : rc;
  ENSURE(env, osal_fastmutex_destroy(&env->dbi_lock) == MDBX_SUCCESS);
#if defined(_WIN32) || defined(_WIN64)
  /* remap_guard don't have destructor (Slim Reader/Writer Lock) */
  DeleteCriticalSection(&env->windowsbug_lock);
#else
  ENSURE(env, osal_fastmutex_destroy(&env->remap_guard) == MDBX_SUCCESS);
#endif /* Windows */

#if MDBX_LOCKING > MDBX_LOCKING_SYSV
  lck_t *const stub = lckless_stub(env);
  /* может вернуть ошибку в дочернем процессе после fork() */
  lck_ipclock_destroy(&stub->wrt_lock);
#endif /* MDBX_LOCKING */

  while ((dp = env->shadow_reserve) != nullptr) {
    MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, env->ps);
    VALGRIND_MAKE_MEM_DEFINED(&page_next(dp), sizeof(page_t *));
    env->shadow_reserve = page_next(dp);
    void *const ptr = ptr_disp(dp, -(ptrdiff_t)sizeof(size_t));
    osal_free(ptr);
  }
  VALGRIND_DESTROY_MEMPOOL(env);
  osal_free(env);

  return LOG_IFERR(rc);
}

/*----------------------------------------------------------------------------*/

static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, const size_t bytes,
                         troika_t *const troika) {
  const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid);
  const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat);
  const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid);
  if (unlikely(env->flags & ENV_FATAL_ERROR))
    return MDBX_PANIC;

  /* is the environment open?
   * (https://libmdbx.dqdkfa.ru/dead-github/issues/171) */
  if (unlikely(!env->dxb_mmap.base)) {
    /* environment not yet opened */
#if 1
    /* default behavior: returns the available info but zeroed the rest */
    memset(out, 0, bytes);
    out->mi_geo.lower = env->geo_in_bytes.lower;
    out->mi_geo.upper = env->geo_in_bytes.upper;
    out->mi_geo.shrink = env->geo_in_bytes.shrink;
    out->mi_geo.grow = env->geo_in_bytes.grow;
    out->mi_geo.current = env->geo_in_bytes.now;
    out->mi_maxreaders = env->max_readers;
    out->mi_dxb_pagesize = env->ps;
    out->mi_sys_pagesize = globals.sys_pagesize;
    if (likely(bytes > size_before_bootid)) {
      out->mi_bootid.current.x = globals.bootid.x;
      out->mi_bootid.current.y = globals.bootid.y;
    }
    return MDBX_SUCCESS;
#else
    /* some users may prefer this behavior: return appropriate error */
    return MDBX_EPERM;
#endif
  }

  *troika = (txn && !(txn->flags & MDBX_TXN_RDONLY)) ? txn->tw.troika : meta_tap(env);
  const meta_ptr_t head = meta_recent(env, troika);
  const meta_t *const meta0 = METAPAGE(env, 0);
  const meta_t *const meta1 = METAPAGE(env, 1);
  const meta_t *const meta2 = METAPAGE(env, 2);
  out->mi_recent_txnid = head.txnid;
  out->mi_meta_txnid[0] = troika->txnid[0];
  out->mi_meta_sign[0] = unaligned_peek_u64(4, meta0->sign);
  out->mi_meta_txnid[1] = troika->txnid[1];
  out->mi_meta_sign[1] = unaligned_peek_u64(4, meta1->sign);
  out->mi_meta_txnid[2] = troika->txnid[2];
  out->mi_meta_sign[2] = unaligned_peek_u64(4, meta2->sign);
  if (likely(bytes > size_before_bootid)) {
    memcpy(&out->mi_bootid.meta[0], &meta0->bootid, 16);
    memcpy(&out->mi_bootid.meta[1], &meta1->bootid, 16);
    memcpy(&out->mi_bootid.meta[2], &meta2->bootid, 16);
    if (likely(bytes > size_before_dxbid))
      memcpy(&out->mi_dxbid, &meta0->dxbid, 16);
  }

  const volatile meta_t *txn_meta = head.ptr_v;
  out->mi_last_pgno = txn_meta->geometry.first_unallocated - 1;
  out->mi_geo.current = pgno2bytes(env, txn_meta->geometry.now);
  if (txn) {
    out->mi_last_pgno = txn->geo.first_unallocated - 1;
    out->mi_geo.current = pgno2bytes(env, txn->geo.end_pgno);

    const txnid_t wanna_meta_txnid = (txn->flags & MDBX_TXN_RDONLY) ? txn->txnid : txn->txnid - xMDBX_TXNID_STEP;
    txn_meta = (out->mi_meta_txnid[0] == wanna_meta_txnid) ? meta0 : txn_meta;
    txn_meta = (out->mi_meta_txnid[1] == wanna_meta_txnid) ? meta1 : txn_meta;
    txn_meta = (out->mi_meta_txnid[2] == wanna_meta_txnid) ? meta2 : txn_meta;
  }
  out->mi_geo.lower = pgno2bytes(env, txn_meta->geometry.lower);
  out->mi_geo.upper = pgno2bytes(env, txn_meta->geometry.upper);
  out->mi_geo.shrink = pgno2bytes(env, pv2pages(txn_meta->geometry.shrink_pv));
  out->mi_geo.grow = pgno2bytes(env, pv2pages(txn_meta->geometry.grow_pv));
  out->mi_mapsize = env->dxb_mmap.limit;

  const lck_t *const lck = env->lck;
  out->mi_maxreaders = env->max_readers;
  out->mi_numreaders = env->lck_mmap.lck ? atomic_load32(&lck->rdt_length, mo_Relaxed) : INT32_MAX;
  out->mi_dxb_pagesize = env->ps;
  out->mi_sys_pagesize = globals.sys_pagesize;

  if (likely(bytes > size_before_bootid)) {
    const uint64_t unsynced_pages =
        atomic_load64(&lck->unsynced_pages, mo_Relaxed) +
        ((uint32_t)out->mi_recent_txnid != atomic_load32(&lck->meta_sync_txnid, mo_Relaxed));
    out->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages);
    const uint64_t monotime_now = osal_monotime();
    uint64_t ts = atomic_load64(&lck->eoos_timestamp, mo_Relaxed);
    out->mi_since_sync_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0;
    ts = atomic_load64(&lck->readers_check_timestamp, mo_Relaxed);
    out->mi_since_reader_check_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0;
    out->mi_autosync_threshold = pgno2bytes(env, atomic_load32(&lck->autosync_threshold, mo_Relaxed));
    out->mi_autosync_period_seconds16dot16 =
        osal_monotime_to_16dot16_noUnderflow(atomic_load64(&lck->autosync_period, mo_Relaxed));
    out->mi_bootid.current.x = globals.bootid.x;
    out->mi_bootid.current.y = globals.bootid.y;
    out->mi_mode = env->lck_mmap.lck ? lck->envmode.weak : env->flags;
  }

  if (likely(bytes > size_before_pgop_stat)) {
#if MDBX_ENABLE_PGOP_STAT
    out->mi_pgop_stat.newly = atomic_load64(&lck->pgops.newly, mo_Relaxed);
    out->mi_pgop_stat.cow = atomic_load64(&lck->pgops.cow, mo_Relaxed);
    out->mi_pgop_stat.clone = atomic_load64(&lck->pgops.clone, mo_Relaxed);
    out->mi_pgop_stat.split = atomic_load64(&lck->pgops.split, mo_Relaxed);
    out->mi_pgop_stat.merge = atomic_load64(&lck->pgops.merge, mo_Relaxed);
    out->mi_pgop_stat.spill = atomic_load64(&lck->pgops.spill, mo_Relaxed);
    out->mi_pgop_stat.unspill = atomic_load64(&lck->pgops.unspill, mo_Relaxed);
    out->mi_pgop_stat.wops = atomic_load64(&lck->pgops.wops, mo_Relaxed);
    out->mi_pgop_stat.prefault = atomic_load64(&lck->pgops.prefault, mo_Relaxed);
    out->mi_pgop_stat.mincore = atomic_load64(&lck->pgops.mincore, mo_Relaxed);
    out->mi_pgop_stat.msync = atomic_load64(&lck->pgops.msync, mo_Relaxed);
    out->mi_pgop_stat.fsync = atomic_load64(&lck->pgops.fsync, mo_Relaxed);
#else
    memset(&out->mi_pgop_stat, 0, sizeof(out->mi_pgop_stat));
#endif /* MDBX_ENABLE_PGOP_STAT*/
  }

  txnid_t overall_latter_reader_txnid = out->mi_recent_txnid;
  txnid_t self_latter_reader_txnid = overall_latter_reader_txnid;
  if (env->lck_mmap.lck) {
    for (size_t i = 0; i < out->mi_numreaders; ++i) {
      const uint32_t pid = atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease);
      if (pid) {
        const txnid_t txnid = safe64_read(&lck->rdt[i].txnid);
        if (overall_latter_reader_txnid > txnid)
          overall_latter_reader_txnid = txnid;
        if (pid == env->pid && self_latter_reader_txnid > txnid)
          self_latter_reader_txnid = txnid;
      }
    }
  }
  out->mi_self_latter_reader_txnid = self_latter_reader_txnid;
  out->mi_latter_reader_txnid = overall_latter_reader_txnid;

  osal_compiler_barrier();
  return MDBX_SUCCESS;
}

__cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, size_t bytes, troika_t *troika) {
  MDBX_envinfo snap;
  int rc = env_info_snap(env, txn, &snap, sizeof(snap), troika);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

  eASSERT(env, sizeof(snap) >= bytes);
  while (1) {
    rc = env_info_snap(env, txn, out, bytes, troika);
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;
    snap.mi_since_sync_seconds16dot16 = out->mi_since_sync_seconds16dot16;
    snap.mi_since_reader_check_seconds16dot16 = out->mi_since_reader_check_seconds16dot16;
    if (likely(memcmp(&snap, out, bytes) == 0))
      return MDBX_SUCCESS;
    memcpy(&snap, out, bytes);
  }
}

__cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *arg, size_t bytes) {
  if (unlikely((env == nullptr && txn == nullptr) || arg == nullptr))
    return LOG_IFERR(MDBX_EINVAL);

  const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid);
  const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat);
  const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid);
  if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && bytes != size_before_pgop_stat &&
      bytes != size_before_dxbid)
    return LOG_IFERR(MDBX_EINVAL);

  if (txn) {
    int err = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR);
    if (unlikely(err != MDBX_SUCCESS))
      return LOG_IFERR(err);
  }
  if (env) {
    int err = check_env(env, false);
    if (unlikely(err != MDBX_SUCCESS))
      return LOG_IFERR(err);
    if (txn && unlikely(txn->env != env))
      return LOG_IFERR(MDBX_EINVAL);
  } else {
    env = txn->env;
  }

  troika_t troika;
  return LOG_IFERR(env_info(env, txn, arg, bytes, &troika));
}

__cold int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *out, size_t bytes) {
#if defined(_WIN32) || defined(_WIN64)
  wchar_t *pathnameW = nullptr;
  int rc = osal_mb2w(pathname, &pathnameW);
  if (likely(rc == MDBX_SUCCESS)) {
    rc = mdbx_preopen_snapinfoW(pathnameW, out, bytes);
    osal_free(pathnameW);
  }
  return LOG_IFERR(rc);
}

__cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, size_t bytes) {
#endif /* Windows */
  if (unlikely(!out))
    return LOG_IFERR(MDBX_EINVAL);

  const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid);
  const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat);
  const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid);
  if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && bytes != size_before_pgop_stat &&
      bytes != size_before_dxbid)
    return LOG_IFERR(MDBX_EINVAL);

  memset(out, 0, bytes);
  if (likely(bytes > size_before_bootid)) {
    out->mi_bootid.current.x = globals.bootid.x;
    out->mi_bootid.current.y = globals.bootid.y;
  }

  MDBX_env env;
  memset(&env, 0, sizeof(env));
  env.pid = osal_getpid();
  if (unlikely(!is_powerof2(globals.sys_pagesize) || globals.sys_pagesize < MDBX_MIN_PAGESIZE)) {
    ERROR("unsuitable system pagesize %u", globals.sys_pagesize);
    return LOG_IFERR(MDBX_INCOMPATIBLE);
  }
  out->mi_sys_pagesize = globals.sys_pagesize;
  env.flags = MDBX_RDONLY | MDBX_NORDAHEAD | MDBX_ACCEDE | MDBX_VALIDATION;
  env.stuck_meta = -1;
  env.lck_mmap.fd = INVALID_HANDLE_VALUE;
  env.lazy_fd = INVALID_HANDLE_VALUE;
  env.dsync_fd = INVALID_HANDLE_VALUE;
  env.fd4meta = INVALID_HANDLE_VALUE;
#if defined(_WIN32) || defined(_WIN64)
  env.dxb_lock_event = INVALID_HANDLE_VALUE;
  env.ioring.overlapped_fd = INVALID_HANDLE_VALUE;
#endif /* Windows */
  env_options_init(&env);

  int rc = env_handle_pathname(&env, pathname, 0);
  if (unlikely(rc != MDBX_SUCCESS))
    goto bailout;
  rc = osal_openfile(MDBX_OPEN_DXB_READ, &env, env.pathname.dxb, &env.lazy_fd, 0);
  if (unlikely(rc != MDBX_SUCCESS))
    goto bailout;

  meta_t header;
  rc = dxb_read_header(&env, &header, 0, 0);
  if (unlikely(rc != MDBX_SUCCESS))
    goto bailout;

  out->mi_dxb_pagesize = env_setup_pagesize(&env, header.pagesize);
  out->mi_geo.lower = pgno2bytes(&env, header.geometry.lower);
  out->mi_geo.upper = pgno2bytes(&env, header.geometry.upper);
  out->mi_geo.shrink = pgno2bytes(&env, pv2pages(header.geometry.shrink_pv));
  out->mi_geo.grow = pgno2bytes(&env, pv2pages(header.geometry.grow_pv));
  out->mi_geo.current = pgno2bytes(&env, header.geometry.now);
  out->mi_last_pgno = header.geometry.first_unallocated - 1;

  const unsigned n = 0;
  out->mi_recent_txnid = constmeta_txnid(&header);
  out->mi_meta_sign[n] = unaligned_peek_u64(4, &header.sign);
  if (likely(bytes > size_before_bootid)) {
    memcpy(&out->mi_bootid.meta[n], &header.bootid, 16);
    if (likely(bytes > size_before_dxbid))
      memcpy(&out->mi_dxbid, &header.dxbid, 16);
  }

bailout:
  env_close(&env, false);
  return LOG_IFERR(rc);
}

/*----------------------------------------------------------------------------*/

__cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now, intptr_t size_upper,
                                 intptr_t growth_step, intptr_t shrink_threshold, intptr_t pagesize) {
  int rc = check_env(env, false);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  MDBX_txn *const txn_owned = env_owned_wrtxn(env);
  bool should_unlock = false;

#if MDBX_DEBUG && 0 /* минимальные шаги для проверки/отладки уже не нужны */
  if (growth_step < 0) {
    growth_step = 1;
    if (shrink_threshold < 0)
      shrink_threshold = 1;
  }
#endif /* MDBX_DEBUG */

  if (env->dxb_mmap.base) {
    /* env already mapped */
    if (unlikely(env->flags & MDBX_RDONLY))
      return LOG_IFERR(MDBX_EACCESS);

    if (!txn_owned) {
      int err = lck_txn_lock(env, false);
      if (unlikely(err != MDBX_SUCCESS))
        return LOG_IFERR(err);
      should_unlock = true;
      env->basal_txn->tw.troika = meta_tap(env);
      eASSERT(env, !env->txn && !env->basal_txn->nested);
      env->basal_txn->txnid = env->basal_txn->tw.troika.txnid[env->basal_txn->tw.troika.recent];
      txn_snapshot_oldest(env->basal_txn);
    }

    /* get untouched params from current TXN or DB */
    if (pagesize <= 0 || pagesize >= INT_MAX)
      pagesize = env->ps;
    const geo_t *const geo = env->txn ? &env->txn->geo : &meta_recent(env, &env->basal_txn->tw.troika).ptr_c->geometry;
    if (size_lower < 0)
      size_lower = pgno2bytes(env, geo->lower);
    if (size_now < 0)
      size_now = pgno2bytes(env, geo->now);
    if (size_upper < 0)
      size_upper = pgno2bytes(env, geo->upper);
    if (growth_step < 0)
      growth_step = pgno2bytes(env, pv2pages(geo->grow_pv));
    if (shrink_threshold < 0)
      shrink_threshold = pgno2bytes(env, pv2pages(geo->shrink_pv));

    if (pagesize != (intptr_t)env->ps) {
      rc = MDBX_EINVAL;
      goto bailout;
    }
    const size_t usedbytes = pgno2bytes(env, mvcc_snapshot_largest(env, geo->first_unallocated));
    if ((size_t)size_upper < usedbytes) {
      rc = MDBX_MAP_FULL;
      goto bailout;
    }
    if ((size_t)size_now < usedbytes)
      size_now = usedbytes;
  } else {
    /* env NOT yet mapped */
    if (unlikely(env->txn))
      return LOG_IFERR(MDBX_PANIC);

    /* is requested some auto-value for pagesize ? */
    if (pagesize >= INT_MAX /* maximal */)
      pagesize = MDBX_MAX_PAGESIZE;
    else if (pagesize == 0 /* minimal */)
      pagesize = MDBX_MIN_PAGESIZE;
    else if (pagesize < 0 /* default */) {
      pagesize = globals.sys_pagesize;
      if ((uintptr_t)pagesize > MDBX_MAX_PAGESIZE)
        pagesize = MDBX_MAX_PAGESIZE;
      eASSERT(env, (uintptr_t)pagesize >= MDBX_MIN_PAGESIZE);

      /* choose pagesize */
      intptr_t top = (size_now > size_lower) ? size_now : size_lower;
      if (size_upper > top)
        top = size_upper;
      if (top < 0 /* default */)
        top = reasonable_db_maxsize();
      else if (top == 0 /* minimal */)
        top = MIN_MAPSIZE;
      else if (top >= (intptr_t)MAX_MAPSIZE /* maximal */)
        top = MAX_MAPSIZE;

      while (top > pagesize * (int64_t)(MAX_PAGENO + 1) && pagesize < MDBX_MAX_PAGESIZE)
        pagesize <<= 1;
    }
  }

  if (pagesize < (intptr_t)MDBX_MIN_PAGESIZE || pagesize > (intptr_t)MDBX_MAX_PAGESIZE || !is_powerof2(pagesize)) {
    rc = MDBX_EINVAL;
    goto bailout;
  }

  const bool size_lower_default = size_lower < 0;
  if (size_lower <= 0) {
    size_lower = (size_lower == 0) ? MIN_MAPSIZE : pagesize * MDBX_WORDBITS;
    if (size_lower / pagesize < MIN_PAGENO)
      size_lower = MIN_PAGENO * pagesize;
  }
  if (size_lower >= INTPTR_MAX) {
    size_lower = reasonable_db_maxsize();
    if ((size_t)size_lower / pagesize > MAX_PAGENO + 1)
      size_lower = pagesize * (MAX_PAGENO + 1);
  }

  if (size_now >= INTPTR_MAX) {
    size_now = reasonable_db_maxsize();
    if ((size_t)size_now / pagesize > MAX_PAGENO + 1)
      size_now = pagesize * (MAX_PAGENO + 1);
  }

  if (size_upper <= 0) {
    if ((growth_step == 0 || size_upper == 0) && size_now >= size_lower)
      size_upper = size_now;
    else if (size_now <= 0 || size_now >= reasonable_db_maxsize() / 2)
      size_upper = reasonable_db_maxsize();
    else if ((size_t)size_now >= MAX_MAPSIZE32 / 2 && (size_t)size_now <= MAX_MAPSIZE32 / 4 * 3)
      size_upper = MAX_MAPSIZE32;
    else {
      size_upper = ceil_powerof2(((size_t)size_now < MAX_MAPSIZE / 4) ? size_now + size_now : size_now + size_now / 2,
                                 MEGABYTE * MDBX_WORDBITS * MDBX_WORDBITS / 32);
      if ((size_t)size_upper > MAX_MAPSIZE)
        size_upper = MAX_MAPSIZE;
    }
    if ((size_t)size_upper / pagesize > (MAX_PAGENO + 1))
      size_upper = pagesize * (MAX_PAGENO + 1);
  } else if (size_upper >= INTPTR_MAX) {
    size_upper = reasonable_db_maxsize();
    if ((size_t)size_upper / pagesize > MAX_PAGENO + 1)
      size_upper = pagesize * (MAX_PAGENO + 1);
  }

  if (unlikely(size_lower < (intptr_t)MIN_MAPSIZE || size_lower > size_upper)) {
    /* паранойа на случай переполнения при невероятных значениях */
    rc = MDBX_EINVAL;
    goto bailout;
  }

  if (size_now <= 0) {
    size_now = size_lower;
    if (size_upper >= size_lower && size_now > size_upper)
      size_now = size_upper;
  }

  if ((uint64_t)size_lower / pagesize < MIN_PAGENO) {
    size_lower = pagesize * MIN_PAGENO;
    if (unlikely(size_lower > size_upper)) {
      /* паранойа на случай переполнения при невероятных значениях */
      rc = MDBX_EINVAL;
      goto bailout;
    }
    if (size_now < size_lower)
      size_now = size_lower;
  }

  if (unlikely((size_t)size_upper > MAX_MAPSIZE || (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) {
    rc = MDBX_TOO_LARGE;
    goto bailout;
  }

  const size_t unit = (globals.sys_pagesize > (size_t)pagesize) ? globals.sys_pagesize : (size_t)pagesize;
  size_lower = ceil_powerof2(size_lower, unit);
  size_upper = ceil_powerof2(size_upper, unit);
  size_now = ceil_powerof2(size_now, unit);

  /* LY: подбираем значение size_upper:
   *  - кратное размеру страницы
   *  - без нарушения MAX_MAPSIZE и MAX_PAGENO */
  while (unlikely((size_t)size_upper > MAX_MAPSIZE || (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) {
    if ((size_t)size_upper < unit + MIN_MAPSIZE || (size_t)size_upper < (size_t)pagesize * (MIN_PAGENO + 1)) {
      /* паранойа на случай переполнения при невероятных значениях */
      rc = MDBX_EINVAL;
      goto bailout;
    }
    size_upper -= unit;
    if ((size_t)size_upper < (size_t)size_lower)
      size_lower = size_upper;
  }
  eASSERT(env, (size_upper - size_lower) % globals.sys_pagesize == 0);

  if (size_now < size_lower)
    size_now = size_lower;
  if (size_now > size_upper)
    size_now = size_upper;

  if (growth_step < 0) {
    growth_step = ((size_t)(size_upper - size_lower)) / 42;
    if (!size_lower_default && growth_step > size_lower && size_lower < (intptr_t)MEGABYTE)
      growth_step = size_lower;
    else if (growth_step / size_lower > 64)
      growth_step = size_lower << 6;
    if (growth_step < 65536)
      growth_step = 65536;
    if ((size_upper - size_lower) / growth_step > 65536)
      growth_step = (size_upper - size_lower) >> 16;
    const intptr_t growth_step_limit = MEGABYTE * ((MDBX_WORDBITS > 32) ? 4096 : 256);
    if (growth_step > growth_step_limit)
      growth_step = growth_step_limit;
  }
  if (growth_step == 0 && shrink_threshold > 0)
    growth_step = 1;
  growth_step = ceil_powerof2(growth_step, unit);

  if (shrink_threshold < 0)
    shrink_threshold = growth_step + growth_step;
  shrink_threshold = ceil_powerof2(shrink_threshold, unit);

  //----------------------------------------------------------------------------

  if (!env->dxb_mmap.base) {
    /* save user's geo-params for future open/create */
    if (pagesize != (intptr_t)env->ps)
      env_setup_pagesize(env, pagesize);
    env->geo_in_bytes.lower = size_lower;
    env->geo_in_bytes.now = size_now;
    env->geo_in_bytes.upper = size_upper;
    env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(pages2pv(bytes2pgno(env, growth_step))));
    env->geo_in_bytes.shrink = pgno2bytes(env, pv2pages(pages2pv(bytes2pgno(env, shrink_threshold))));
    env_options_adjust_defaults(env);

    ENSURE(env, env->geo_in_bytes.lower >= MIN_MAPSIZE);
    ENSURE(env, env->geo_in_bytes.lower / (unsigned)pagesize >= MIN_PAGENO);
    ENSURE(env, env->geo_in_bytes.lower % (unsigned)pagesize == 0);
    ENSURE(env, env->geo_in_bytes.lower % globals.sys_pagesize == 0);

    ENSURE(env, env->geo_in_bytes.upper <= MAX_MAPSIZE);
    ENSURE(env, env->geo_in_bytes.upper / (unsigned)pagesize <= MAX_PAGENO + 1);
    ENSURE(env, env->geo_in_bytes.upper % (unsigned)pagesize == 0);
    ENSURE(env, env->geo_in_bytes.upper % globals.sys_pagesize == 0);

    ENSURE(env, env->geo_in_bytes.now >= env->geo_in_bytes.lower);
    ENSURE(env, env->geo_in_bytes.now <= env->geo_in_bytes.upper);
    ENSURE(env, env->geo_in_bytes.now % (unsigned)pagesize == 0);
    ENSURE(env, env->geo_in_bytes.now % globals.sys_pagesize == 0);

    ENSURE(env, env->geo_in_bytes.grow % (unsigned)pagesize == 0);
    ENSURE(env, env->geo_in_bytes.grow % globals.sys_pagesize == 0);
    ENSURE(env, env->geo_in_bytes.shrink % (unsigned)pagesize == 0);
    ENSURE(env, env->geo_in_bytes.shrink % globals.sys_pagesize == 0);

    rc = MDBX_SUCCESS;
  } else {
    /* apply new params to opened environment */
    ENSURE(env, pagesize == (intptr_t)env->ps);
    meta_t meta;
    memset(&meta, 0, sizeof(meta));
    if (!env->txn) {
      const meta_ptr_t head = meta_recent(env, &env->basal_txn->tw.troika);

      uint64_t timestamp = 0;
      while ("workaround for "
             "https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
        rc = coherency_fetch_head(env->basal_txn, head, &timestamp);
        if (likely(rc == MDBX_SUCCESS))
          break;
        if (unlikely(rc != MDBX_RESULT_TRUE))
          goto bailout;
      }
      meta = *head.ptr_c;
      const txnid_t txnid = safe64_txnid_next(head.txnid);
      if (unlikely(txnid > MAX_TXNID)) {
        rc = MDBX_TXN_FULL;
        ERROR("txnid overflow, raise %d", rc);
        goto bailout;
      }
      meta_set_txnid(env, &meta, txnid);
    }

    const geo_t *const current_geo = &(env->txn ? env->txn : env->basal_txn)->geo;
    /* update env-geo to avoid influences */
    env->geo_in_bytes.now = pgno2bytes(env, current_geo->now);
    env->geo_in_bytes.lower = pgno2bytes(env, current_geo->lower);
    env->geo_in_bytes.upper = pgno2bytes(env, current_geo->upper);
    env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(current_geo->grow_pv));
    env->geo_in_bytes.shrink = pgno2bytes(env, pv2pages(current_geo->shrink_pv));

    geo_t new_geo;
    new_geo.lower = bytes2pgno(env, size_lower);
    new_geo.now = bytes2pgno(env, size_now);
    new_geo.upper = bytes2pgno(env, size_upper);
    new_geo.grow_pv = pages2pv(bytes2pgno(env, growth_step));
    new_geo.shrink_pv = pages2pv(bytes2pgno(env, shrink_threshold));
    new_geo.first_unallocated = current_geo->first_unallocated;

    ENSURE(env, pgno_align2os_bytes(env, new_geo.lower) == (size_t)size_lower);
    ENSURE(env, pgno_align2os_bytes(env, new_geo.upper) == (size_t)size_upper);
    ENSURE(env, pgno_align2os_bytes(env, new_geo.now) == (size_t)size_now);
    ENSURE(env, new_geo.grow_pv == pages2pv(pv2pages(new_geo.grow_pv)));
    ENSURE(env, new_geo.shrink_pv == pages2pv(pv2pages(new_geo.shrink_pv)));

    ENSURE(env, (size_t)size_lower >= MIN_MAPSIZE);
    ENSURE(env, new_geo.lower >= MIN_PAGENO);
    ENSURE(env, (size_t)size_upper <= MAX_MAPSIZE);
    ENSURE(env, new_geo.upper <= MAX_PAGENO + 1);
    ENSURE(env, new_geo.now >= new_geo.first_unallocated);
    ENSURE(env, new_geo.upper >= new_geo.now);
    ENSURE(env, new_geo.now >= new_geo.lower);

    if (memcmp(current_geo, &new_geo, sizeof(geo_t)) != 0) {
#if defined(_WIN32) || defined(_WIN64)
      /* Was DB shrinking disabled before and now it will be enabled? */
      if (new_geo.lower < new_geo.upper && new_geo.shrink_pv &&
          !(current_geo->lower < current_geo->upper && current_geo->shrink_pv)) {
        if (!env->lck_mmap.lck) {
          rc = MDBX_EPERM;
          goto bailout;
        }
        int err = lck_rdt_lock(env);
        if (unlikely(MDBX_IS_ERROR(err))) {
          rc = err;
          goto bailout;
        }

        /* Check if there are any reading threads that do not use the SRWL */
        const size_t CurrentTid = GetCurrentThreadId();
        const reader_slot_t *const begin = env->lck_mmap.lck->rdt;
        const reader_slot_t *const end = begin + atomic_load32(&env->lck_mmap.lck->rdt_length, mo_AcquireRelease);
        for (const reader_slot_t *reader = begin; reader < end; ++reader) {
          if (reader->pid.weak == env->pid && reader->tid.weak != CurrentTid) {
            /* At least one thread may don't use SRWL */
            rc = MDBX_EPERM;
            break;
          }
        }

        lck_rdt_unlock(env);
        if (unlikely(rc != MDBX_SUCCESS))
          goto bailout;
      }
#endif /* Windows */

      if (new_geo.now != current_geo->now || new_geo.upper != current_geo->upper) {
        rc = dxb_resize(env, current_geo->first_unallocated, new_geo.now, new_geo.upper, explicit_resize);
        if (unlikely(rc != MDBX_SUCCESS))
          goto bailout;
      }
      if (env->txn) {
        env->txn->geo = new_geo;
        env->txn->flags |= MDBX_TXN_DIRTY;
      } else {
        meta.geometry = new_geo;
        rc = dxb_sync_locked(env, env->flags, &meta, &env->basal_txn->tw.troika);
        if (likely(rc == MDBX_SUCCESS)) {
          env->geo_in_bytes.now = pgno2bytes(env, new_geo.now = meta.geometry.now);
          env->geo_in_bytes.upper = pgno2bytes(env, new_geo.upper = meta.geometry.upper);
        }
      }
    }
    if (likely(rc == MDBX_SUCCESS)) {
      /* update env-geo to avoid influences */
      eASSERT(env, env->geo_in_bytes.now == pgno2bytes(env, new_geo.now));
      env->geo_in_bytes.lower = pgno2bytes(env, new_geo.lower);
      eASSERT(env, env->geo_in_bytes.upper == pgno2bytes(env, new_geo.upper));
      env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(new_geo.grow_pv));
      env->geo_in_bytes.shrink = pgno2bytes(env, pv2pages(new_geo.shrink_pv));
    }
  }

bailout:
  if (should_unlock)
    lck_txn_unlock(env);
  return LOG_IFERR(rc);
}

__cold int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock) {
  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  return LOG_IFERR(env_sync(env, force, nonblock));
}

/*----------------------------------------------------------------------------*/

static void stat_add(const tree_t *db, MDBX_stat *const st, const size_t bytes) {
  st->ms_depth += db->height;
  st->ms_branch_pages += db->branch_pages;
  st->ms_leaf_pages += db->leaf_pages;
  st->ms_overflow_pages += db->large_pages;
  st->ms_entries += db->items;
  if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid)))
    st->ms_mod_txnid = (st->ms_mod_txnid > db->mod_txnid) ? st->ms_mod_txnid : db->mod_txnid;
}

static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) {
  memset(st, 0, bytes);

  int err = check_txn(txn, MDBX_TXN_BLOCKED);
  if (unlikely(err != MDBX_SUCCESS))
    return err;

  cursor_couple_t cx;
  err = cursor_init(&cx.outer, (MDBX_txn *)txn, MAIN_DBI);
  if (unlikely(err != MDBX_SUCCESS))
    return err;

  const MDBX_env *const env = txn->env;
  st->ms_psize = env->ps;
  TXN_FOREACH_DBI_FROM(txn, dbi,
                       /* assuming GC is internal and not subject for accounting */ MAIN_DBI) {
    if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID)
      stat_add(txn->dbs + dbi, st, bytes);
  }

  if (!(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT) && txn->dbs[MAIN_DBI].items /* TODO: use `md_subs` field */) {

    /* scan and account not opened named tables */
    err = tree_search(&cx.outer, nullptr, Z_FIRST);
    while (err == MDBX_SUCCESS) {
      const page_t *mp = cx.outer.pg[cx.outer.top];
      for (size_t i = 0; i < page_numkeys(mp); i++) {
        const node_t *node = page_node(mp, i);
        if (node_flags(node) != N_TREE)
          continue;
        if (unlikely(node_ds(node) != sizeof(tree_t))) {
          ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid table node size", node_ds(node));
          return MDBX_CORRUPTED;
        }

        /* skip opened and already accounted */
        const MDBX_val name = {node_key(node), node_ks(node)};
        TXN_FOREACH_DBI_USER(txn, dbi) {
          if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID &&
              env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[dbi].name) == 0) {
            node = nullptr;
            break;
          }
        }

        if (node) {
          tree_t db;
          memcpy(&db, node_data(node), sizeof(db));
          stat_add(&db, st, bytes);
        }
      }
      err = cursor_sibling_right(&cx.outer);
    }
    if (unlikely(err != MDBX_NOTFOUND))
      return err;
  }

  return MDBX_SUCCESS;
}

__cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_stat *dest, size_t bytes) {
  if (unlikely(!dest))
    return LOG_IFERR(MDBX_EINVAL);
  const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid);
  if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid)
    return LOG_IFERR(MDBX_EINVAL);

  if (likely(txn)) {
    if (env && unlikely(txn->env != env))
      return LOG_IFERR(MDBX_EINVAL);
    return LOG_IFERR(stat_acc(txn, dest, bytes));
  }

  int err = check_env(env, true);
  if (unlikely(err != MDBX_SUCCESS))
    return LOG_IFERR(err);

  MDBX_txn *txn_owned = env_owned_wrtxn(env);
  if (txn_owned)
    /* inside write-txn */
    return LOG_IFERR(stat_acc(txn_owned, dest, bytes));

  err = mdbx_txn_begin((MDBX_env *)env, nullptr, MDBX_TXN_RDONLY, &txn_owned);
  if (unlikely(err != MDBX_SUCCESS))
    return LOG_IFERR(err);

  const int rc = stat_acc(txn_owned, dest, bytes);
  err = mdbx_txn_abort(txn_owned);
  if (unlikely(err != MDBX_SUCCESS))
    return LOG_IFERR(err);
  return LOG_IFERR(rc);
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

/*------------------------------------------------------------------------------
 * Readers API */

__cold int mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func, void *ctx) {
  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(!func))
    return LOG_IFERR(MDBX_EINVAL);

  rc = MDBX_RESULT_TRUE;
  int serial = 0;
  lck_t *const lck = env->lck_mmap.lck;
  if (likely(lck)) {
    const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
    for (size_t i = 0; i < snap_nreaders; i++) {
      const reader_slot_t *r = lck->rdt + i;
    retry_reader:;
      const uint32_t pid = atomic_load32(&r->pid, mo_AcquireRelease);
      if (!pid)
        continue;
      txnid_t txnid = safe64_read(&r->txnid);
      const uint64_t tid = atomic_load64(&r->tid, mo_Relaxed);
      const pgno_t pages_used = atomic_load32(&r->snapshot_pages_used, mo_Relaxed);
      const uint64_t reader_pages_retired = atomic_load64(&r->snapshot_pages_retired, mo_Relaxed);
      if (unlikely(txnid != safe64_read(&r->txnid) || pid != atomic_load32(&r->pid, mo_AcquireRelease) ||
                   tid != atomic_load64(&r->tid, mo_Relaxed) ||
                   pages_used != atomic_load32(&r->snapshot_pages_used, mo_Relaxed) ||
                   reader_pages_retired != atomic_load64(&r->snapshot_pages_retired, mo_Relaxed)))
        goto retry_reader;

      eASSERT(env, txnid > 0);
      if (txnid >= SAFE64_INVALID_THRESHOLD)
        txnid = 0;

      size_t bytes_used = 0;
      size_t bytes_retained = 0;
      uint64_t lag = 0;
      if (txnid) {
        troika_t troika = meta_tap(env);
      retry_header:;
        const meta_ptr_t head = meta_recent(env, &troika);
        const uint64_t head_pages_retired = unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired);
        if (unlikely(meta_should_retry(env, &troika) ||
                     head_pages_retired != unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired)))
          goto retry_header;

        lag = (head.txnid - txnid) / xMDBX_TXNID_STEP;
        bytes_used = pgno2bytes(env, pages_used);
        bytes_retained = (head_pages_retired > reader_pages_retired)
                             ? pgno2bytes(env, (pgno_t)(head_pages_retired - reader_pages_retired))
                             : 0;
      }
      rc = func(ctx, ++serial, (unsigned)i, pid, (mdbx_tid_t)((intptr_t)tid), txnid, lag, bytes_used, bytes_retained);
      if (unlikely(rc != MDBX_SUCCESS))
        break;
    }
  }

  return LOG_IFERR(rc);
}

__cold int mdbx_reader_check(MDBX_env *env, int *dead) {
  if (dead)
    *dead = 0;
  return LOG_IFERR(mvcc_cleanup_dead(env, false, dead));
}

__cold int mdbx_thread_register(const MDBX_env *env) {
  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(!env->lck_mmap.lck))
    return LOG_IFERR((env->flags & MDBX_EXCLUSIVE) ? MDBX_EINVAL : MDBX_EPERM);

  if (unlikely((env->flags & ENV_TXKEY) == 0)) {
    eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS);
    return LOG_IFERR(MDBX_EINVAL) /* MDBX_NOSTICKYTHREADS mode */;
  }

  eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY);
  reader_slot_t *r = thread_rthc_get(env->me_txkey);
  if (unlikely(r != nullptr)) {
    eASSERT(env, r->pid.weak == env->pid);
    eASSERT(env, r->tid.weak == osal_thread_self());
    if (unlikely(r->pid.weak != env->pid))
      return LOG_IFERR(MDBX_BAD_RSLOT);
    return MDBX_RESULT_TRUE /* already registered */;
  }

  return LOG_IFERR(mvcc_bind_slot((MDBX_env *)env).err);
}

__cold int mdbx_thread_unregister(const MDBX_env *env) {
  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(!env->lck_mmap.lck))
    return MDBX_RESULT_TRUE;

  if (unlikely((env->flags & ENV_TXKEY) == 0)) {
    eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS);
    return MDBX_RESULT_TRUE /* MDBX_NOSTICKYTHREADS mode */;
  }

  eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY);
  reader_slot_t *r = thread_rthc_get(env->me_txkey);
  if (unlikely(r == nullptr))
    return MDBX_RESULT_TRUE /* not registered */;

  eASSERT(env, r->pid.weak == env->pid);
  if (unlikely(r->pid.weak != env->pid || r->tid.weak != osal_thread_self()))
    return LOG_IFERR(MDBX_BAD_RSLOT);

  eASSERT(env, r->txnid.weak >= SAFE64_INVALID_THRESHOLD);
  if (unlikely(r->txnid.weak < SAFE64_INVALID_THRESHOLD))
    return LOG_IFERR(MDBX_BUSY) /* transaction is still active */;

  atomic_store32(&r->pid, 0, mo_Relaxed);
  atomic_store32(&env->lck->rdt_refresh_flag, true, mo_AcquireRelease);
  thread_rthc_set(env->me_txkey, nullptr);
  return MDBX_SUCCESS;
}

/*------------------------------------------------------------------------------
 * Locking API */

int mdbx_txn_lock(MDBX_env *env, bool dont_wait) {
  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(env->flags & MDBX_RDONLY))
    return LOG_IFERR(MDBX_EACCESS);
  if (dont_wait && unlikely(env->basal_txn->owner || (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0))
    return LOG_IFERR(MDBX_BUSY);

  return LOG_IFERR(lck_txn_lock(env, dont_wait));
}

int mdbx_txn_unlock(MDBX_env *env) {
  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(env->flags & MDBX_RDONLY))
    return LOG_IFERR(MDBX_EACCESS);
#if MDBX_TXN_CHECKOWNER
  if (unlikely(env->basal_txn->owner != osal_thread_self()))
    return LOG_IFERR(MDBX_THREAD_MISMATCH);
#endif /* MDBX_TXN_CHECKOWNER */
  if (unlikely((env->basal_txn->flags & MDBX_TXN_FINISHED) == 0))
    return LOG_IFERR(MDBX_BUSY);

  lck_txn_unlock(env);
  return MDBX_SUCCESS;
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

static inline double key2double(const int64_t key) {
  union {
    uint64_t u;
    double f;
  } casting;

  casting.u = (key < 0) ? key + UINT64_C(0x8000000000000000) : UINT64_C(0xffffFFFFffffFFFF) - key;
  return casting.f;
}

static inline uint64_t double2key(const double *const ptr) {
  STATIC_ASSERT(sizeof(double) == sizeof(int64_t));
  const int64_t i = *(const int64_t *)ptr;
  const uint64_t u = (i < 0) ? UINT64_C(0xffffFFFFffffFFFF) - i : i + UINT64_C(0x8000000000000000);
  if (ASSERT_ENABLED()) {
    const double f = key2double(u);
    assert(memcmp(&f, ptr, sizeof(double)) == 0);
  }
  return u;
}

static inline float key2float(const int32_t key) {
  union {
    uint32_t u;
    float f;
  } casting;

  casting.u = (key < 0) ? key + UINT32_C(0x80000000) : UINT32_C(0xffffFFFF) - key;
  return casting.f;
}

static inline uint32_t float2key(const float *const ptr) {
  STATIC_ASSERT(sizeof(float) == sizeof(int32_t));
  const int32_t i = *(const int32_t *)ptr;
  const uint32_t u = (i < 0) ? UINT32_C(0xffffFFFF) - i : i + UINT32_C(0x80000000);
  if (ASSERT_ENABLED()) {
    const float f = key2float(u);
    assert(memcmp(&f, ptr, sizeof(float)) == 0);
  }
  return u;
}

uint64_t mdbx_key_from_double(const double ieee754_64bit) { return double2key(&ieee754_64bit); }

uint64_t mdbx_key_from_ptrdouble(const double *const ieee754_64bit) { return double2key(ieee754_64bit); }

uint32_t mdbx_key_from_float(const float ieee754_32bit) { return float2key(&ieee754_32bit); }

uint32_t mdbx_key_from_ptrfloat(const float *const ieee754_32bit) { return float2key(ieee754_32bit); }

#define IEEE754_DOUBLE_MANTISSA_SIZE 52
#define IEEE754_DOUBLE_EXPONENTA_BIAS 0x3FF
#define IEEE754_DOUBLE_EXPONENTA_MAX 0x7FF
#define IEEE754_DOUBLE_IMPLICIT_LEAD UINT64_C(0x0010000000000000)
#define IEEE754_DOUBLE_MANTISSA_MASK UINT64_C(0x000FFFFFFFFFFFFF)
#define IEEE754_DOUBLE_MANTISSA_AMAX UINT64_C(0x001FFFFFFFFFFFFF)

static inline int clz64(uint64_t value) {
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_clzl)
  if (sizeof(value) == sizeof(int))
    return __builtin_clz(value);
  if (sizeof(value) == sizeof(long))
    return __builtin_clzl(value);
#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || __has_builtin(__builtin_clzll)
  return __builtin_clzll(value);
#endif /* have(long long) && long long == uint64_t */
#endif /* GNU C */

#if defined(_MSC_VER)
  unsigned long index;
#if defined(_M_AMD64) || defined(_M_ARM64) || defined(_M_X64)
  _BitScanReverse64(&index, value);
  return 63 - index;
#else
  if (value > UINT32_MAX) {
    _BitScanReverse(&index, (uint32_t)(value >> 32));
    return 31 - index;
  }
  _BitScanReverse(&index, (uint32_t)value);
  return 63 - index;
#endif
#endif /* MSVC */

  value |= value >> 1;
  value |= value >> 2;
  value |= value >> 4;
  value |= value >> 8;
  value |= value >> 16;
  value |= value >> 32;
  static const uint8_t debruijn_clz64[64] = {63, 16, 62, 7,  15, 36, 61, 3,  6,  14, 22, 26, 35, 47, 60, 2,
                                             9,  5,  28, 11, 13, 21, 42, 19, 25, 31, 34, 40, 46, 52, 59, 1,
                                             17, 8,  37, 4,  23, 27, 48, 10, 29, 12, 43, 20, 32, 41, 53, 18,
                                             38, 24, 49, 30, 44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58, 0};
  return debruijn_clz64[value * UINT64_C(0x03F79D71B4CB0A89) >> 58];
}

static inline uint64_t round_mantissa(const uint64_t u64, int shift) {
  assert(shift < 0 && u64 > 0);
  shift = -shift;
  const unsigned half = 1 << (shift - 1);
  const unsigned lsb = 1 & (unsigned)(u64 >> shift);
  const unsigned tie2even = 1 ^ lsb;
  return (u64 + half - tie2even) >> shift;
}

uint64_t mdbx_key_from_jsonInteger(const int64_t json_integer) {
  const uint64_t bias = UINT64_C(0x8000000000000000);
  if (json_integer > 0) {
    const uint64_t u64 = json_integer;
    int shift = clz64(u64) - (64 - IEEE754_DOUBLE_MANTISSA_SIZE - 1);
    uint64_t mantissa = u64 << shift;
    if (unlikely(shift < 0)) {
      mantissa = round_mantissa(u64, shift);
      if (mantissa > IEEE754_DOUBLE_MANTISSA_AMAX)
        mantissa = round_mantissa(u64, --shift);
    }

    assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD && mantissa <= IEEE754_DOUBLE_MANTISSA_AMAX);
    const uint64_t exponent = (uint64_t)IEEE754_DOUBLE_EXPONENTA_BIAS + IEEE754_DOUBLE_MANTISSA_SIZE - shift;
    assert(exponent > 0 && exponent <= IEEE754_DOUBLE_EXPONENTA_MAX);
    const uint64_t key = bias + (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) + (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD);
#if !defined(_MSC_VER) || defined(_DEBUG) /* Workaround for MSVC error LNK2019: unresolved external                    \
                                             symbol __except1 referenced in function __ftol3_except */
    assert(key == mdbx_key_from_double((double)json_integer));
#endif /* Workaround for MSVC */
    return key;
  }

  if (json_integer < 0) {
    const uint64_t u64 = -json_integer;
    int shift = clz64(u64) - (64 - IEEE754_DOUBLE_MANTISSA_SIZE - 1);
    uint64_t mantissa = u64 << shift;
    if (unlikely(shift < 0)) {
      mantissa = round_mantissa(u64, shift);
      if (mantissa > IEEE754_DOUBLE_MANTISSA_AMAX)
        mantissa = round_mantissa(u64, --shift);
    }

    assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD && mantissa <= IEEE754_DOUBLE_MANTISSA_AMAX);
    const uint64_t exponent = (uint64_t)IEEE754_DOUBLE_EXPONENTA_BIAS + IEEE754_DOUBLE_MANTISSA_SIZE - shift;
    assert(exponent > 0 && exponent <= IEEE754_DOUBLE_EXPONENTA_MAX);
    const uint64_t key =
        bias - 1 - (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) - (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD);
#if !defined(_MSC_VER) || defined(_DEBUG) /* Workaround for MSVC error LNK2019: unresolved external                    \
                                             symbol __except1 referenced in function __ftol3_except */
    assert(key == mdbx_key_from_double((double)json_integer));
#endif /* Workaround for MSVC */
    return key;
  }

  return bias;
}

int64_t mdbx_jsonInteger_from_key(const MDBX_val v) {
  assert(v.iov_len == 8);
  const uint64_t key = unaligned_peek_u64(2, v.iov_base);
  const uint64_t bias = UINT64_C(0x8000000000000000);
  const uint64_t covalent = (key > bias) ? key - bias : bias - key - 1;
  const int shift = IEEE754_DOUBLE_EXPONENTA_BIAS + 63 -
                    (IEEE754_DOUBLE_EXPONENTA_MAX & (int)(covalent >> IEEE754_DOUBLE_MANTISSA_SIZE));
  if (unlikely(shift < 1))
    return (key < bias) ? INT64_MIN : INT64_MAX;
  if (unlikely(shift > 63))
    return 0;

  const uint64_t unscaled = ((covalent & IEEE754_DOUBLE_MANTISSA_MASK) << (63 - IEEE754_DOUBLE_MANTISSA_SIZE)) + bias;
  const int64_t absolute = unscaled >> shift;
  const int64_t value = (key < bias) ? -absolute : absolute;
  assert(key == mdbx_key_from_jsonInteger(value) ||
         (mdbx_key_from_jsonInteger(value - 1) < key && key < mdbx_key_from_jsonInteger(value + 1)));
  return value;
}

double mdbx_double_from_key(const MDBX_val v) {
  assert(v.iov_len == 8);
  return key2double(unaligned_peek_u64(2, v.iov_base));
}

float mdbx_float_from_key(const MDBX_val v) {
  assert(v.iov_len == 4);
  return key2float(unaligned_peek_u32(2, v.iov_base));
}

int32_t mdbx_int32_from_key(const MDBX_val v) {
  assert(v.iov_len == 4);
  return (int32_t)(unaligned_peek_u32(2, v.iov_base) - UINT32_C(0x80000000));
}

int64_t mdbx_int64_from_key(const MDBX_val v) {
  assert(v.iov_len == 8);
  return (int64_t)(unaligned_peek_u64(2, v.iov_base) - UINT64_C(0x8000000000000000));
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

__cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) {
  if (volume <= 1024 * 1024 * 4ul)
    return MDBX_RESULT_TRUE;

  intptr_t pagesize, total_ram_pages;
  int err = mdbx_get_sysraminfo(&pagesize, &total_ram_pages, nullptr);
  if (unlikely(err != MDBX_SUCCESS))
    return LOG_IFERR(err);

  const int log2page = log2n_powerof2(pagesize);
  const intptr_t volume_pages = (volume + pagesize - 1) >> log2page;
  const intptr_t redundancy_pages = (redundancy < 0) ? -(intptr_t)((-redundancy + pagesize - 1) >> log2page)
                                                     : (intptr_t)(redundancy + pagesize - 1) >> log2page;
  if (volume_pages >= total_ram_pages || volume_pages + redundancy_pages >= total_ram_pages)
    return MDBX_RESULT_FALSE;

  intptr_t avail_ram_pages;
  err = mdbx_get_sysraminfo(nullptr, nullptr, &avail_ram_pages);
  if (unlikely(err != MDBX_SUCCESS))
    return LOG_IFERR(err);

  return (volume_pages + redundancy_pages >= avail_ram_pages) ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE;
}

int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, uint64_t increment) {
  int rc = check_txn(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  rc = dbi_check(txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) {
    rc = tbl_fetch(txn, dbi);
    if (unlikely(rc != MDBX_SUCCESS))
      return LOG_IFERR(rc);
  }

  tree_t *dbs = &txn->dbs[dbi];
  if (likely(result))
    *result = dbs->sequence;

  if (likely(increment > 0)) {
    if (unlikely(dbi == FREE_DBI || (txn->flags & MDBX_TXN_RDONLY) != 0))
      return MDBX_EACCESS;

    uint64_t new = dbs->sequence + increment;
    if (unlikely(new < increment))
      return MDBX_RESULT_TRUE;

    tASSERT(txn, new > dbs->sequence);
    if ((txn->dbi_state[dbi] & DBI_DIRTY) == 0) {
      txn->flags |= MDBX_TXN_DIRTY;
      txn->dbi_state[dbi] |= DBI_DIRTY;
      if (unlikely(dbi == MAIN_DBI) && txn->dbs[MAIN_DBI].root != P_INVALID) {
        /* LY: Временная подпорка для coherency_check(), которую в перспективе
         * следует заменить вместе с переделкой установки mod_txnid.
         *
         * Суть проблемы:
         *  - coherency_check() в качестве одного из критериев "когерентности"
         *    проверяет условие meta.maindb.mod_txnid == maindb.root->txnid;
         *  - при обновлении maindb.sequence высталяется DBI_DIRTY, что приведет
         *    к обновлению meta.maindb.mod_txnid = current_txnid;
         *  - однако, если в само дерево maindb обновление не вносились и оно
         *    не пустое, то корневая страницы останеться с прежним txnid и из-за
         *    этого ложно сработает coherency_check().
         *
         * Временное (текущее) решение: Принудительно обновляем корневую
         * страницу в описанной выше ситуации. Это устраняет проблему, но и
         * не создает рисков регресса.
         *
         * FIXME: Итоговое решение, которое предстоит реализовать:
         *  - изменить семантику установки/обновления mod_txnid, привязав его
         *    строго к изменению b-tree, но не атрибутов;
         *  - обновлять mod_txnid при фиксации вложенных транзакций;
         *  - для dbi-хендлов пользовательских table (видимо) можно оставить
         *    DBI_DIRTY в качестве признака необходимости обновления записи
         *    table в MainDB, при этом взводить DBI_DIRTY вместе с обновлением
         *    mod_txnid, в том числе при обновлении sequence.
         *  - для MAIN_DBI при обновлении sequence не следует взводить DBI_DIRTY
         *    и/или обновлять mod_txnid, а только взводить MDBX_TXN_DIRTY.
         *  - альтернативно, можно перераспределить флажки-признаки dbi_state,
         *    чтобы различать состояние dirty-tree и dirty-attributes. */
        cursor_couple_t cx;
        rc = cursor_init(&cx.outer, txn, MAIN_DBI);
        if (unlikely(rc != MDBX_SUCCESS))
          return LOG_IFERR(rc);
        rc = tree_search(&cx.outer, nullptr, Z_MODIFY | Z_ROOTONLY);
        if (unlikely(rc != MDBX_SUCCESS))
          return LOG_IFERR(rc);
      }
    }
    dbs->sequence = new;
  }

  return MDBX_SUCCESS;
}

int mdbx_cmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b) {
  eASSERT(nullptr, txn->signature == txn_signature);
  tASSERT(txn, (dbi_state(txn, dbi) & DBI_VALID) && !dbi_changed(txn, dbi));
  tASSERT(txn, dbi < txn->env->n_dbi && (txn->env->dbs_flags[dbi] & DB_VALID) != 0);
  return txn->env->kvs[dbi].clc.k.cmp(a, b);
}

int mdbx_dcmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b) {
  eASSERT(nullptr, txn->signature == txn_signature);
  tASSERT(txn, (dbi_state(txn, dbi) & DBI_VALID) && !dbi_changed(txn, dbi));
  tASSERT(txn, dbi < txn->env->n_dbi && (txn->env->dbs_flags[dbi] & DB_VALID));
  return txn->env->kvs[dbi].clc.v.cmp(a, b);
}

__cold MDBX_cmp_func *mdbx_get_keycmp(MDBX_db_flags_t flags) { return builtin_keycmp(flags); }

__cold MDBX_cmp_func *mdbx_get_datacmp(MDBX_db_flags_t flags) { return builtin_datacmp(flags); }

/*----------------------------------------------------------------------------*/

__cold const char *mdbx_liberr2str(int errnum) {
  /* Table of descriptions for MDBX errors */
  static const char *const tbl[] = {
      "MDBX_KEYEXIST: Key/data pair already exists",
      "MDBX_NOTFOUND: No matching key/data pair found",
      "MDBX_PAGE_NOTFOUND: Requested page not found",
      "MDBX_CORRUPTED: Database is corrupted",
      "MDBX_PANIC: Environment had fatal error",
      "MDBX_VERSION_MISMATCH: DB version mismatch libmdbx",
      "MDBX_INVALID: File is not an MDBX file",
      "MDBX_MAP_FULL: Environment mapsize limit reached",
      "MDBX_DBS_FULL: Too many DBI-handles (maxdbs reached)",
      "MDBX_READERS_FULL: Too many readers (maxreaders reached)",
      nullptr /* MDBX_TLS_FULL (-30789): unused in MDBX */,
      "MDBX_TXN_FULL: Transaction has too many dirty pages,"
      " i.e transaction is too big",
      "MDBX_CURSOR_FULL: Cursor stack limit reachedn - this usually indicates"
      " corruption, i.e branch-pages loop",
      "MDBX_PAGE_FULL: Internal error - Page has no more space",
      "MDBX_UNABLE_EXTEND_MAPSIZE: Database engine was unable to extend"
      " mapping, e.g. since address space is unavailable or busy,"
      " or Operation system not supported such operations",
      "MDBX_INCOMPATIBLE: Environment or database is not compatible"
      " with the requested operation or the specified flags",
      "MDBX_BAD_RSLOT: Invalid reuse of reader locktable slot,"
      " e.g. read-transaction already run for current thread",
      "MDBX_BAD_TXN: Transaction is not valid for requested operation,"
      " e.g. had errored and be must aborted, has a child, or is invalid",
      "MDBX_BAD_VALSIZE: Invalid size or alignment of key or data"
      " for target database, either invalid table name",
      "MDBX_BAD_DBI: The specified DBI-handle is invalid"
      " or changed by another thread/transaction",
      "MDBX_PROBLEM: Unexpected internal error, transaction should be aborted",
      "MDBX_BUSY: Another write transaction is running,"
      " or environment is already used while opening with MDBX_EXCLUSIVE flag",
  };

  if (errnum >= MDBX_KEYEXIST && errnum <= MDBX_BUSY) {
    int i = errnum - MDBX_KEYEXIST;
    return tbl[i];
  }

  switch (errnum) {
  case MDBX_SUCCESS:
    return "MDBX_SUCCESS: Successful";
  case MDBX_EMULTIVAL:
    return "MDBX_EMULTIVAL: The specified key has"
           " more than one associated value";
  case MDBX_EBADSIGN:
    return "MDBX_EBADSIGN: Wrong signature of a runtime object(s),"
           " e.g. memory corruption or double-free";
  case MDBX_WANNA_RECOVERY:
    return "MDBX_WANNA_RECOVERY: Database should be recovered,"
           " but this could NOT be done automatically for now"
           " since it opened in read-only mode";
  case MDBX_EKEYMISMATCH:
    return "MDBX_EKEYMISMATCH: The given key value is mismatched to the"
           " current cursor position";
  case MDBX_TOO_LARGE:
    return "MDBX_TOO_LARGE: Database is too large for current system,"
           " e.g. could NOT be mapped into RAM";
  case MDBX_THREAD_MISMATCH:
    return "MDBX_THREAD_MISMATCH: A thread has attempted to use a not"
           " owned object, e.g. a transaction that started by another thread";
  case MDBX_TXN_OVERLAPPING:
    return "MDBX_TXN_OVERLAPPING: Overlapping read and write transactions for"
           " the current thread";
  case MDBX_DUPLICATED_CLK:
    return "MDBX_DUPLICATED_CLK: Alternative/Duplicate LCK-file is exists,"
           " please keep one and remove unused other";
  case MDBX_DANGLING_DBI:
    return "MDBX_DANGLING_DBI: Some cursors and/or other resources should be"
           " closed before table or corresponding DBI-handle could be (re)used";
  case MDBX_OUSTED:
    return "MDBX_OUSTED: The parked read transaction was outed for the sake"
           " of recycling old MVCC snapshots";
  case MDBX_MVCC_RETARDED:
    return "MDBX_MVCC_RETARDED: MVCC snapshot used by parked transaction was bygone";
  default:
    return nullptr;
  }
}

__cold const char *mdbx_strerror_r(int errnum, char *buf, size_t buflen) {
  const char *msg = mdbx_liberr2str(errnum);
  if (!msg && buflen > 0 && buflen < INT_MAX) {
#if defined(_WIN32) || defined(_WIN64)
    DWORD size = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, errnum,
                                MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, (DWORD)buflen, nullptr);
    while (size && buf[size - 1] <= ' ')
      --size;
    buf[size] = 0;
    return size ? buf : "FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM) failed";
#elif defined(_GNU_SOURCE) && defined(__GLIBC__)
    /* GNU-specific */
    if (errnum > 0)
      msg = strerror_r(errnum, buf, buflen);
#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600)
    /* XSI-compliant */
    if (errnum > 0 && strerror_r(errnum, buf, buflen) == 0)
      msg = buf;
#else
    if (errnum > 0) {
      msg = strerror(errnum);
      if (msg) {
        strncpy(buf, msg, buflen);
        msg = buf;
      }
    }
#endif
    if (!msg) {
      (void)snprintf(buf, buflen, "error %d", errnum);
      msg = buf;
    }
    buf[buflen - 1] = '\0';
  }
  return msg;
}

__cold const char *mdbx_strerror(int errnum) {
#if defined(_WIN32) || defined(_WIN64)
  static char buf[1024];
  return mdbx_strerror_r(errnum, buf, sizeof(buf));
#else
  const char *msg = mdbx_liberr2str(errnum);
  if (!msg) {
    if (errnum > 0)
      msg = strerror(errnum);
    if (!msg) {
      static char buf[32];
      (void)snprintf(buf, sizeof(buf) - 1, "error %d", errnum);
      msg = buf;
    }
  }
  return msg;
#endif
}

#if defined(_WIN32) || defined(_WIN64) /* Bit of madness for Windows */
const char *mdbx_strerror_r_ANSI2OEM(int errnum, char *buf, size_t buflen) {
  const char *msg = mdbx_liberr2str(errnum);
  if (!msg && buflen > 0 && buflen < INT_MAX) {
    DWORD size = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, errnum,
                                MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, (DWORD)buflen, nullptr);
    while (size && buf[size - 1] <= ' ')
      --size;
    buf[size] = 0;
    if (!size)
      msg = "FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM) failed";
    else if (!CharToOemBuffA(buf, buf, size))
      msg = "CharToOemBuffA() failed";
    else
      msg = buf;
  }
  return msg;
}

const char *mdbx_strerror_ANSI2OEM(int errnum) {
  static char buf[1024];
  return mdbx_strerror_r_ANSI2OEM(errnum, buf, sizeof(buf));
}
#endif /* Bit of madness for Windows */
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

static pgno_t env_max_pgno(const MDBX_env *env) {
  return env->ps ? bytes2pgno(env, env->geo_in_bytes.upper ? env->geo_in_bytes.upper : MAX_MAPSIZE) : PAGELIST_LIMIT;
}

__cold pgno_t default_dp_limit(const MDBX_env *env) {
  /* auto-setup dp_limit by "The42" ;-) */
  intptr_t total_ram_pages, avail_ram_pages;
  int err = mdbx_get_sysraminfo(nullptr, &total_ram_pages, &avail_ram_pages);
  pgno_t dp_limit = 1024;
  if (unlikely(err != MDBX_SUCCESS))
    ERROR("mdbx_get_sysraminfo(), rc %d", err);
  else {
    size_t estimate = (size_t)(total_ram_pages + avail_ram_pages) / 42;
    if (env->ps) {
      if (env->ps > globals.sys_pagesize)
        estimate /= env->ps / globals.sys_pagesize;
      else if (env->ps < globals.sys_pagesize)
        estimate *= globals.sys_pagesize / env->ps;
    }
    dp_limit = (pgno_t)estimate;
  }

  dp_limit = (dp_limit < PAGELIST_LIMIT) ? dp_limit : PAGELIST_LIMIT;
  const pgno_t max_pgno = env_max_pgno(env);
  if (dp_limit > max_pgno - NUM_METAS)
    dp_limit = max_pgno - NUM_METAS;
  dp_limit = (dp_limit > CURSOR_STACK_SIZE * 4) ? dp_limit : CURSOR_STACK_SIZE * 4;
  return dp_limit;
}

__cold static pgno_t default_rp_augment_limit(const MDBX_env *env) {
  const size_t timeframe = /* 16 секунд */ 16 << 16;
  const size_t remain_1sec =
      (env->options.gc_time_limit < timeframe) ? timeframe - (size_t)env->options.gc_time_limit : 0;
  const size_t minimum = (env->maxgc_large1page * 2 > MDBX_PNL_INITIAL) ? env->maxgc_large1page * 2 : MDBX_PNL_INITIAL;
  const size_t one_third = env->geo_in_bytes.now / 3 >> env->ps2ln;
  const size_t augment_limit =
      (one_third > minimum) ? minimum + (one_third - minimum) / timeframe * remain_1sec : minimum;
  eASSERT(env, augment_limit < PAGELIST_LIMIT);
  return pnl_bytes2size(pnl_size2bytes(augment_limit));
}

static bool default_prefault_write(const MDBX_env *env) {
  return !MDBX_MMAP_INCOHERENT_FILE_WRITE && !env->incore &&
         (env->flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == MDBX_WRITEMAP;
}

static bool default_prefer_waf_insteadof_balance(const MDBX_env *env) {
  (void)env;
  return false;
}

static uint16_t default_subpage_limit(const MDBX_env *env) {
  (void)env;
  return 65535 /* 100% */;
}

static uint16_t default_subpage_room_threshold(const MDBX_env *env) {
  (void)env;
  return 0 /* 0% */;
}

static uint16_t default_subpage_reserve_prereq(const MDBX_env *env) {
  (void)env;
  return 27525 /* 42% */;
}

static uint16_t default_subpage_reserve_limit(const MDBX_env *env) {
  (void)env;
  return 2753 /* 4.2% */;
}

static uint16_t default_merge_threshold_16dot16_percent(const MDBX_env *env) {
  (void)env;
  return 65536 / 4 /* 25% */;
}

static pgno_t default_dp_reserve_limit(const MDBX_env *env) {
  (void)env;
  return MDBX_PNL_INITIAL;
}

static pgno_t default_dp_initial(const MDBX_env *env) {
  (void)env;
  return MDBX_PNL_INITIAL;
}

static uint8_t default_spill_max_denominator(const MDBX_env *env) {
  (void)env;
  return 8;
}

static uint8_t default_spill_min_denominator(const MDBX_env *env) {
  (void)env;
  return 8;
}

static uint8_t default_spill_parent4child_denominator(const MDBX_env *env) {
  (void)env;
  return 0;
}

static uint8_t default_dp_loose_limit(const MDBX_env *env) {
  (void)env;
  return 64;
}

void env_options_init(MDBX_env *env) {
  env->options.rp_augment_limit = default_rp_augment_limit(env);
  env->options.dp_reserve_limit = default_dp_reserve_limit(env);
  env->options.dp_initial = default_dp_initial(env);
  env->options.dp_limit = default_dp_limit(env);
  env->options.spill_max_denominator = default_spill_max_denominator(env);
  env->options.spill_min_denominator = default_spill_min_denominator(env);
  env->options.spill_parent4child_denominator = default_spill_parent4child_denominator(env);
  env->options.dp_loose_limit = default_dp_loose_limit(env);
  env->options.merge_threshold_16dot16_percent = default_merge_threshold_16dot16_percent(env);
  if (default_prefer_waf_insteadof_balance(env))
    env->options.prefer_waf_insteadof_balance = true;

#if !(defined(_WIN32) || defined(_WIN64))
  env->options.writethrough_threshold =
#if defined(__linux__) || defined(__gnu_linux__)
      globals.running_on_WSL1 ? MAX_PAGENO :
#endif /* Linux */
                              MDBX_WRITETHROUGH_THRESHOLD_DEFAULT;
#endif /* Windows */

  env->options.subpage.limit = default_subpage_limit(env);
  env->options.subpage.room_threshold = default_subpage_room_threshold(env);
  env->options.subpage.reserve_prereq = default_subpage_reserve_prereq(env);
  env->options.subpage.reserve_limit = default_subpage_reserve_limit(env);
}

void env_options_adjust_dp_limit(MDBX_env *env) {
  if (!env->options.flags.non_auto.dp_limit)
    env->options.dp_limit = default_dp_limit(env);
  else {
    const pgno_t max_pgno = env_max_pgno(env);
    if (env->options.dp_limit > max_pgno - NUM_METAS)
      env->options.dp_limit = max_pgno - NUM_METAS;
    if (env->options.dp_limit < CURSOR_STACK_SIZE * 4)
      env->options.dp_limit = CURSOR_STACK_SIZE * 4;
  }
  if (env->options.dp_initial > env->options.dp_limit && env->options.dp_initial > default_dp_initial(env))
    env->options.dp_initial = env->options.dp_limit;
  env->options.need_dp_limit_adjust = false;
}

void env_options_adjust_defaults(MDBX_env *env) {
  if (!env->options.flags.non_auto.rp_augment_limit)
    env->options.rp_augment_limit = default_rp_augment_limit(env);
  if (!env->options.flags.non_auto.prefault_write)
    env->options.prefault_write = default_prefault_write(env);

  env->options.need_dp_limit_adjust = true;
  if (!env->txn)
    env_options_adjust_dp_limit(env);

  const size_t basis = env->geo_in_bytes.now;
  /* TODO: use options? */
  const unsigned factor = 9;
  size_t threshold = (basis < ((size_t)65536 << factor))  ? 65536        /* minimal threshold */
                     : (basis > (MEGABYTE * 4 << factor)) ? MEGABYTE * 4 /* maximal threshold */
                                                          : basis >> factor;
  threshold =
      (threshold < env->geo_in_bytes.shrink || !env->geo_in_bytes.shrink) ? threshold : env->geo_in_bytes.shrink;
  env->madv_threshold = bytes2pgno(env, bytes_align2os_bytes(env, threshold));
}

//------------------------------------------------------------------------------

__cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, uint64_t value) {
  int err = check_env(env, false);
  if (unlikely(err != MDBX_SUCCESS))
    return LOG_IFERR(err);

  const bool lock_needed = ((env->flags & ENV_ACTIVE) && env->basal_txn && !env_owned_wrtxn(env));
  bool should_unlock = false;
  switch (option) {
  case MDBX_opt_sync_bytes:
    if (value == /* default */ UINT64_MAX)
      value = MAX_WRITE;
    if (unlikely(env->flags & MDBX_RDONLY))
      return LOG_IFERR(MDBX_EACCESS);
    if (unlikely(!(env->flags & ENV_ACTIVE)))
      return LOG_IFERR(MDBX_EPERM);
    if (unlikely(value > SIZE_MAX - 65536))
      return LOG_IFERR(MDBX_EINVAL);
    value = bytes2pgno(env, (size_t)value + env->ps - 1);
    if ((uint32_t)value != atomic_load32(&env->lck->autosync_threshold, mo_AcquireRelease) &&
        atomic_store32(&env->lck->autosync_threshold, (uint32_t)value, mo_Relaxed)
        /* Дергаем sync(force=off) только если задано новое не-нулевое значение
         * и мы вне транзакции */
        && lock_needed) {
      err = env_sync(env, false, false);
      if (err == /* нечего сбрасывать на диск */ MDBX_RESULT_TRUE)
        err = MDBX_SUCCESS;
    }
    break;

  case MDBX_opt_sync_period:
    if (value == /* default */ UINT64_MAX)
      value = 2780315 /* 42.42424 секунды */;
    if (unlikely(env->flags & MDBX_RDONLY))
      return LOG_IFERR(MDBX_EACCESS);
    if (unlikely(!(env->flags & ENV_ACTIVE)))
      return LOG_IFERR(MDBX_EPERM);
    if (unlikely(value > UINT32_MAX))
      return LOG_IFERR(MDBX_EINVAL);
    value = osal_16dot16_to_monotime((uint32_t)value);
    if (value != atomic_load64(&env->lck->autosync_period, mo_AcquireRelease) &&
        atomic_store64(&env->lck->autosync_period, value, mo_Relaxed)
        /* Дергаем sync(force=off) только если задано новое не-нулевое значение
         * и мы вне транзакции */
        && lock_needed) {
      err = env_sync(env, false, false);
      if (err == /* нечего сбрасывать на диск */ MDBX_RESULT_TRUE)
        err = MDBX_SUCCESS;
    }
    break;

  case MDBX_opt_max_db:
    if (value == /* default */ UINT64_MAX)
      value = 42;
    if (unlikely(value > MDBX_MAX_DBI))
      return LOG_IFERR(MDBX_EINVAL);
    if (unlikely(env->dxb_mmap.base))
      return LOG_IFERR(MDBX_EPERM);
    env->max_dbi = (unsigned)value + CORE_DBS;
    break;

  case MDBX_opt_max_readers:
    if (value == /* default */ UINT64_MAX)
      value = MDBX_READERS_LIMIT;
    if (unlikely(value < 1 || value > MDBX_READERS_LIMIT))
      return LOG_IFERR(MDBX_EINVAL);
    if (unlikely(env->dxb_mmap.base))
      return LOG_IFERR(MDBX_EPERM);
    env->max_readers = (unsigned)value;
    break;

  case MDBX_opt_dp_reserve_limit:
    if (value == /* default */ UINT64_MAX)
      value = default_dp_reserve_limit(env);
    if (unlikely(value > INT_MAX))
      return LOG_IFERR(MDBX_EINVAL);
    if (env->options.dp_reserve_limit != (unsigned)value) {
      if (lock_needed) {
        err = lck_txn_lock(env, false);
        if (unlikely(err != MDBX_SUCCESS))
          return LOG_IFERR(err);
        should_unlock = true;
      }
      env->options.dp_reserve_limit = (unsigned)value;
      while (env->shadow_reserve_len > env->options.dp_reserve_limit) {
        eASSERT(env, env->shadow_reserve != nullptr);
        page_t *dp = env->shadow_reserve;
        MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, env->ps);
        VALGRIND_MAKE_MEM_DEFINED(&page_next(dp), sizeof(page_t *));
        env->shadow_reserve = page_next(dp);
        void *const ptr = ptr_disp(dp, -(ptrdiff_t)sizeof(size_t));
        osal_free(ptr);
        env->shadow_reserve_len -= 1;
      }
    }
    break;

  case MDBX_opt_rp_augment_limit:
    if (value == /* default */ UINT64_MAX) {
      env->options.flags.non_auto.rp_augment_limit = 0;
      env->options.rp_augment_limit = default_rp_augment_limit(env);
    } else if (unlikely(value > PAGELIST_LIMIT))
      return LOG_IFERR(MDBX_EINVAL);
    else {
      env->options.flags.non_auto.rp_augment_limit = 1;
      env->options.rp_augment_limit = (unsigned)value;
    }
    break;

  case MDBX_opt_gc_time_limit:
    if (value == /* default */ UINT64_MAX)
      value = 0;
    if (unlikely(value > UINT32_MAX))
      return LOG_IFERR(MDBX_EINVAL);
    if (unlikely(env->flags & MDBX_RDONLY))
      return LOG_IFERR(MDBX_EACCESS);
    value = osal_16dot16_to_monotime((uint32_t)value);
    if (value != env->options.gc_time_limit) {
      if (env->txn && lock_needed)
        return LOG_IFERR(MDBX_EPERM);
      env->options.gc_time_limit = value;
      if (!env->options.flags.non_auto.rp_augment_limit)
        env->options.rp_augment_limit = default_rp_augment_limit(env);
    }
    break;

  case MDBX_opt_txn_dp_limit:
  case MDBX_opt_txn_dp_initial:
    if (value != /* default */ UINT64_MAX && unlikely(value > PAGELIST_LIMIT || value < CURSOR_STACK_SIZE * 4))
      return LOG_IFERR(MDBX_EINVAL);
    if (unlikely(env->flags & MDBX_RDONLY))
      return LOG_IFERR(MDBX_EACCESS);
    if (lock_needed) {
      err = lck_txn_lock(env, false);
      if (unlikely(err != MDBX_SUCCESS))
        return LOG_IFERR(err);
      should_unlock = true;
    }
    if (env->txn)
      err = MDBX_EPERM /* unable change during transaction */;
    else {
      const pgno_t max_pgno = env_max_pgno(env);
      if (option == MDBX_opt_txn_dp_initial) {
        if (value == /* default */ UINT64_MAX)
          env->options.dp_initial = default_dp_initial(env);
        else {
          env->options.dp_initial = (pgno_t)value;
          if (env->options.dp_initial > max_pgno)
            env->options.dp_initial = (max_pgno > CURSOR_STACK_SIZE * 4) ? max_pgno : CURSOR_STACK_SIZE * 4;
        }
      }
      if (option == MDBX_opt_txn_dp_limit) {
        if (value == /* default */ UINT64_MAX) {
          env->options.flags.non_auto.dp_limit = 0;
        } else {
          env->options.flags.non_auto.dp_limit = 1;
          env->options.dp_limit = (pgno_t)value;
        }
        env_options_adjust_dp_limit(env);
      }
    }
    break;

  case MDBX_opt_spill_max_denominator:
    if (value == /* default */ UINT64_MAX)
      value = default_spill_max_denominator(env);
    if (unlikely(value > 255))
      return LOG_IFERR(MDBX_EINVAL);
    env->options.spill_max_denominator = (uint8_t)value;
    break;
  case MDBX_opt_spill_min_denominator:
    if (value == /* default */ UINT64_MAX)
      value = default_spill_min_denominator(env);
    if (unlikely(value > 255))
      return LOG_IFERR(MDBX_EINVAL);
    env->options.spill_min_denominator = (uint8_t)value;
    break;
  case MDBX_opt_spill_parent4child_denominator:
    if (value == /* default */ UINT64_MAX)
      value = default_spill_parent4child_denominator(env);
    if (unlikely(value > 255))
      return LOG_IFERR(MDBX_EINVAL);
    env->options.spill_parent4child_denominator = (uint8_t)value;
    break;

  case MDBX_opt_loose_limit:
    if (value == /* default */ UINT64_MAX)
      value = default_dp_loose_limit(env);
    if (unlikely(value > 255))
      return LOG_IFERR(MDBX_EINVAL);
    env->options.dp_loose_limit = (uint8_t)value;
    break;

  case MDBX_opt_merge_threshold_16dot16_percent:
    if (value == /* default */ UINT64_MAX)
      value = default_merge_threshold_16dot16_percent(env);
    if (unlikely(value < 8192 || value > 32768))
      return LOG_IFERR(MDBX_EINVAL);
    env->options.merge_threshold_16dot16_percent = (unsigned)value;
    recalculate_merge_thresholds(env);
    break;

  case MDBX_opt_writethrough_threshold:
#if defined(_WIN32) || defined(_WIN64)
    /* позволяем "установить" значение по-умолчанию и совпадающее
     * с поведением соответствующим текущей установке MDBX_NOMETASYNC */
    if (value == /* default */ UINT64_MAX && value != ((env->flags & MDBX_NOMETASYNC) ? 0 : UINT_MAX))
      err = MDBX_EINVAL;
#else
    if (value == /* default */ UINT64_MAX)
      value = MDBX_WRITETHROUGH_THRESHOLD_DEFAULT;
    if (value != (unsigned)value)
      err = MDBX_EINVAL;
    else
      env->options.writethrough_threshold = (unsigned)value;
#endif
    break;

  case MDBX_opt_prefault_write_enable:
    if (value == /* default */ UINT64_MAX) {
      env->options.prefault_write = default_prefault_write(env);
      env->options.flags.non_auto.prefault_write = false;
    } else if (value > 1)
      err = MDBX_EINVAL;
    else {
      env->options.prefault_write = value != 0;
      env->options.flags.non_auto.prefault_write = true;
    }
    break;

  case MDBX_opt_prefer_waf_insteadof_balance:
    if (value == /* default */ UINT64_MAX)
      env->options.prefer_waf_insteadof_balance = default_prefer_waf_insteadof_balance(env);
    else if (value > 1)
      err = MDBX_EINVAL;
    else
      env->options.prefer_waf_insteadof_balance = value != 0;
    break;

  case MDBX_opt_subpage_limit:
    if (value == /* default */ UINT64_MAX) {
      env->options.subpage.limit = default_subpage_limit(env);
      recalculate_subpage_thresholds(env);
    } else if (value > 65535)
      err = MDBX_EINVAL;
    else {
      env->options.subpage.limit = (uint16_t)value;
      recalculate_subpage_thresholds(env);
    }
    break;

  case MDBX_opt_subpage_room_threshold:
    if (value == /* default */ UINT64_MAX) {
      env->options.subpage.room_threshold = default_subpage_room_threshold(env);
      recalculate_subpage_thresholds(env);
    } else if (value > 65535)
      err = MDBX_EINVAL;
    else {
      env->options.subpage.room_threshold = (uint16_t)value;
      recalculate_subpage_thresholds(env);
    }
    break;

  case MDBX_opt_subpage_reserve_prereq:
    if (value == /* default */ UINT64_MAX) {
      env->options.subpage.reserve_prereq = default_subpage_reserve_prereq(env);
      recalculate_subpage_thresholds(env);
    } else if (value > 65535)
      err = MDBX_EINVAL;
    else {
      env->options.subpage.reserve_prereq = (uint16_t)value;
      recalculate_subpage_thresholds(env);
    }
    break;

  case MDBX_opt_subpage_reserve_limit:
    if (value == /* default */ UINT64_MAX) {
      env->options.subpage.reserve_limit = default_subpage_reserve_limit(env);
      recalculate_subpage_thresholds(env);
    } else if (value > 65535)
      err = MDBX_EINVAL;
    else {
      env->options.subpage.reserve_limit = (uint16_t)value;
      recalculate_subpage_thresholds(env);
    }
    break;

  default:
    return LOG_IFERR(MDBX_EINVAL);
  }

  if (should_unlock)
    lck_txn_unlock(env);
  return LOG_IFERR(err);
}

__cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, uint64_t *pvalue) {
  int err = check_env(env, false);
  if (unlikely(err != MDBX_SUCCESS))
    return LOG_IFERR(err);
  if (unlikely(!pvalue))
    return LOG_IFERR(MDBX_EINVAL);

  switch (option) {
  case MDBX_opt_sync_bytes:
    if (unlikely(!(env->flags & ENV_ACTIVE)))
      return LOG_IFERR(MDBX_EPERM);
    *pvalue = pgno2bytes(env, atomic_load32(&env->lck->autosync_threshold, mo_Relaxed));
    break;

  case MDBX_opt_sync_period:
    if (unlikely(!(env->flags & ENV_ACTIVE)))
      return LOG_IFERR(MDBX_EPERM);
    *pvalue = osal_monotime_to_16dot16(atomic_load64(&env->lck->autosync_period, mo_Relaxed));
    break;

  case MDBX_opt_max_db:
    *pvalue = env->max_dbi - CORE_DBS;
    break;

  case MDBX_opt_max_readers:
    *pvalue = env->max_readers;
    break;

  case MDBX_opt_dp_reserve_limit:
    *pvalue = env->options.dp_reserve_limit;
    break;

  case MDBX_opt_rp_augment_limit:
    *pvalue = env->options.rp_augment_limit;
    break;

  case MDBX_opt_gc_time_limit:
    *pvalue = osal_monotime_to_16dot16(env->options.gc_time_limit);
    break;

  case MDBX_opt_txn_dp_limit:
    *pvalue = env->options.dp_limit;
    break;
  case MDBX_opt_txn_dp_initial:
    *pvalue = env->options.dp_initial;
    break;

  case MDBX_opt_spill_max_denominator:
    *pvalue = env->options.spill_max_denominator;
    break;
  case MDBX_opt_spill_min_denominator:
    *pvalue = env->options.spill_min_denominator;
    break;
  case MDBX_opt_spill_parent4child_denominator:
    *pvalue = env->options.spill_parent4child_denominator;
    break;

  case MDBX_opt_loose_limit:
    *pvalue = env->options.dp_loose_limit;
    break;

  case MDBX_opt_merge_threshold_16dot16_percent:
    *pvalue = env->options.merge_threshold_16dot16_percent;
    break;

  case MDBX_opt_writethrough_threshold:
#if defined(_WIN32) || defined(_WIN64)
    *pvalue = (env->flags & MDBX_NOMETASYNC) ? 0 : INT_MAX;
#else
    *pvalue = env->options.writethrough_threshold;
#endif
    break;

  case MDBX_opt_prefault_write_enable:
    *pvalue = env->options.prefault_write;
    break;

  case MDBX_opt_prefer_waf_insteadof_balance:
    *pvalue = env->options.prefer_waf_insteadof_balance;
    break;

  case MDBX_opt_subpage_limit:
    *pvalue = env->options.subpage.limit;
    break;

  case MDBX_opt_subpage_room_threshold:
    *pvalue = env->options.subpage.room_threshold;
    break;

  case MDBX_opt_subpage_reserve_prereq:
    *pvalue = env->options.subpage.reserve_prereq;
    break;

  case MDBX_opt_subpage_reserve_limit:
    *pvalue = env->options.subpage.reserve_limit;
    break;

  default:
    return LOG_IFERR(MDBX_EINVAL);
  }

  return MDBX_SUCCESS;
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

typedef struct diff_result {
  ptrdiff_t diff;
  intptr_t level;
  ptrdiff_t root_nkeys;
} diff_t;

/* calculates: r = x - y */
__hot static int cursor_diff(const MDBX_cursor *const __restrict x, const MDBX_cursor *const __restrict y,
                             diff_t *const __restrict r) {
  r->diff = 0;
  r->level = 0;
  r->root_nkeys = 0;

  int rc = check_txn(x->txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

  if (unlikely(x->txn != y->txn))
    return MDBX_BAD_TXN;

  if (unlikely(y->dbi_state != x->dbi_state))
    return MDBX_EINVAL;

  const intptr_t depth = (x->top < y->top) ? x->top : y->top;
  if (unlikely(depth < 0))
    return MDBX_ENODATA;

  r->root_nkeys = page_numkeys(x->pg[0]);
  intptr_t nkeys = r->root_nkeys;
  for (;;) {
    if (unlikely(y->pg[r->level] != x->pg[r->level])) {
      ERROR("Mismatch cursors's pages at %zu level", r->level);
      return MDBX_PROBLEM;
    }
    r->diff = x->ki[r->level] - y->ki[r->level];
    if (r->diff)
      break;
    r->level += 1;
    if (r->level > depth) {
      r->diff = CMP2INT(x->flags & z_eof_hard, y->flags & z_eof_hard);
      return MDBX_SUCCESS;
    }
    nkeys = page_numkeys(x->pg[r->level]);
  }

  while (unlikely(r->diff == 1) && likely(r->level < depth)) {
    r->level += 1;
    /*   DB'PAGEs: 0------------------>MAX
     *
     *    CURSORs:       y < x
     *  STACK[i ]:         |
     *  STACK[+1]:  ...y++N|0++x...
     */
    nkeys = page_numkeys(y->pg[r->level]);
    r->diff = (nkeys - y->ki[r->level]) + x->ki[r->level];
    assert(r->diff > 0);
  }

  while (unlikely(r->diff == -1) && likely(r->level < depth)) {
    r->level += 1;
    /*   DB'PAGEs: 0------------------>MAX
     *
     *    CURSORs:       x < y
     *  STACK[i ]:         |
     *  STACK[+1]:  ...x--N|0--y...
     */
    nkeys = page_numkeys(x->pg[r->level]);
    r->diff = -(nkeys - x->ki[r->level]) - y->ki[r->level];
    assert(r->diff < 0);
  }

  return MDBX_SUCCESS;
}

__hot static ptrdiff_t estimate(const tree_t *tree, diff_t *const __restrict dr) {
  /*        root: branch-page    => scale = leaf-factor * branch-factor^(N-1)
   *     level-1: branch-page(s) => scale = leaf-factor * branch-factor^2
   *     level-2: branch-page(s) => scale = leaf-factor * branch-factor
   *     level-N: branch-page(s) => scale = leaf-factor
   *  leaf-level: leaf-page(s)   => scale = 1
   */
  ptrdiff_t btree_power = (ptrdiff_t)tree->height - 2 - (ptrdiff_t)dr->level;
  if (btree_power < 0)
    return dr->diff;

  ptrdiff_t estimated = (ptrdiff_t)tree->items * dr->diff / (ptrdiff_t)tree->leaf_pages;
  if (btree_power == 0)
    return estimated;

  if (tree->height < 4) {
    assert(dr->level == 0 && btree_power == 1);
    return (ptrdiff_t)tree->items * dr->diff / (ptrdiff_t)dr->root_nkeys;
  }

  /* average_branchpage_fillfactor = total(branch_entries) / branch_pages
     total(branch_entries) = leaf_pages + branch_pages - 1 (root page) */
  const size_t log2_fixedpoint = sizeof(size_t) - 1;
  const size_t half = UINT64_C(1) << (log2_fixedpoint - 1);
  const size_t factor = ((tree->leaf_pages + tree->branch_pages - 1) << log2_fixedpoint) / tree->branch_pages;
  while (1) {
    switch ((size_t)btree_power) {
    default: {
      const size_t square = (factor * factor + half) >> log2_fixedpoint;
      const size_t quad = (square * square + half) >> log2_fixedpoint;
      do {
        estimated = estimated * quad + half;
        estimated >>= log2_fixedpoint;
        btree_power -= 4;
      } while (btree_power >= 4);
      continue;
    }
    case 3:
      estimated = estimated * factor + half;
      estimated >>= log2_fixedpoint;
      __fallthrough /* fall through */;
    case 2:
      estimated = estimated * factor + half;
      estimated >>= log2_fixedpoint;
      __fallthrough /* fall through */;
    case 1:
      estimated = estimated * factor + half;
      estimated >>= log2_fixedpoint;
      __fallthrough /* fall through */;
    case 0:
      if (unlikely(estimated > (ptrdiff_t)tree->items))
        return (ptrdiff_t)tree->items;
      if (unlikely(estimated < -(ptrdiff_t)tree->items))
        return -(ptrdiff_t)tree->items;
      return estimated;
    }
  }
}

/*------------------------------------------------------------------------------
 * Range-Estimation API */

__hot int mdbx_estimate_distance(const MDBX_cursor *first, const MDBX_cursor *last, ptrdiff_t *distance_items) {
  if (unlikely(!distance_items))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = cursor_check_pure(first);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  rc = cursor_check_pure(last);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  *distance_items = 0;
  diff_t dr;
  rc = cursor_diff(last, first, &dr);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cASSERT(first, dr.diff || inner_pointed(first) == inner_pointed(last));
  if (unlikely(dr.diff == 0) && inner_pointed(first)) {
    first = &first->subcur->cursor;
    last = &last->subcur->cursor;
    rc = cursor_diff(first, last, &dr);
    if (unlikely(rc != MDBX_SUCCESS))
      return LOG_IFERR(rc);
  }

  if (likely(dr.diff != 0))
    *distance_items = estimate(first->tree, &dr);

  return MDBX_SUCCESS;
}

__hot int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, MDBX_cursor_op move_op,
                             ptrdiff_t *distance_items) {
  if (unlikely(!distance_items || move_op == MDBX_GET_CURRENT || move_op == MDBX_GET_MULTIPLE))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = cursor_check_ro(cursor);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(!is_pointed(cursor)))
    return LOG_IFERR(MDBX_ENODATA);

  cursor_couple_t next;
  rc = cursor_init(&next.outer, cursor->txn, cursor_dbi(cursor));
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_cpstk(cursor, &next.outer);
  if (cursor->tree->flags & MDBX_DUPSORT) {
    subcur_t *mx = &container_of(cursor, cursor_couple_t, outer)->inner;
    cursor_cpstk(&mx->cursor, &next.inner.cursor);
  }

  MDBX_val stub_data;
  if (data == nullptr) {
    const unsigned mask = 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY;
    if (unlikely(mask & (1 << move_op)))
      return LOG_IFERR(MDBX_EINVAL);
    stub_data.iov_base = nullptr;
    stub_data.iov_len = 0;
    data = &stub_data;
  }

  MDBX_val stub_key;
  if (key == nullptr) {
    const unsigned mask =
        1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY | 1 << MDBX_SET | 1 << MDBX_SET_RANGE;
    if (unlikely(mask & (1 << move_op)))
      return LOG_IFERR(MDBX_EINVAL);
    stub_key.iov_base = nullptr;
    stub_key.iov_len = 0;
    key = &stub_key;
  }

  next.outer.signature = cur_signature_live;
  rc = cursor_ops(&next.outer, key, data, move_op);
  if (unlikely(rc != MDBX_SUCCESS && (rc != MDBX_NOTFOUND || !is_pointed(&next.outer))))
    return LOG_IFERR(rc);

  if (move_op == MDBX_LAST) {
    next.outer.flags |= z_eof_hard;
    next.inner.cursor.flags |= z_eof_hard;
  }
  return mdbx_estimate_distance(cursor, &next.outer, distance_items);
}

__hot int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *begin_key, const MDBX_val *begin_data,
                              const MDBX_val *end_key, const MDBX_val *end_data, ptrdiff_t *size_items) {
  if (unlikely(!size_items))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(begin_data && (begin_key == nullptr || begin_key == MDBX_EPSILON)))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(end_data && (end_key == nullptr || end_key == MDBX_EPSILON)))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(begin_key == MDBX_EPSILON && end_key == MDBX_EPSILON))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = check_txn(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_couple_t begin;
  /* LY: first, initialize cursor to refresh a DB in case it have DB_STALE */
  rc = cursor_init(&begin.outer, txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(begin.outer.tree->items == 0)) {
    *size_items = 0;
    return MDBX_SUCCESS;
  }

  if (!begin_key) {
    if (unlikely(!end_key)) {
      /* LY: FIRST..LAST case */
      *size_items = (ptrdiff_t)begin.outer.tree->items;
      return MDBX_SUCCESS;
    }
    rc = outer_first(&begin.outer, nullptr, nullptr);
    if (unlikely(end_key == MDBX_EPSILON)) {
      /* LY: FIRST..+epsilon case */
      return LOG_IFERR((rc == MDBX_SUCCESS) ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) : rc);
    }
  } else {
    if (unlikely(begin_key == MDBX_EPSILON)) {
      if (end_key == nullptr) {
        /* LY: -epsilon..LAST case */
        rc = outer_last(&begin.outer, nullptr, nullptr);
        return LOG_IFERR((rc == MDBX_SUCCESS) ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) : rc);
      }
      /* LY: -epsilon..value case */
      assert(end_key != MDBX_EPSILON);
      begin_key = end_key;
    } else if (unlikely(end_key == MDBX_EPSILON)) {
      /* LY: value..+epsilon case */
      assert(begin_key != MDBX_EPSILON);
      end_key = begin_key;
    }
    if (end_key && !begin_data && !end_data &&
        (begin_key == end_key || begin.outer.clc->k.cmp(begin_key, end_key) == 0)) {
      /* LY: single key case */
      rc = cursor_seek(&begin.outer, (MDBX_val *)begin_key, nullptr, MDBX_SET).err;
      if (unlikely(rc != MDBX_SUCCESS)) {
        *size_items = 0;
        return LOG_IFERR((rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc);
      }
      *size_items = 1;
      if (inner_pointed(&begin.outer))
        *size_items = (sizeof(*size_items) >= sizeof(begin.inner.nested_tree.items) ||
                       begin.inner.nested_tree.items <= PTRDIFF_MAX)
                          ? (size_t)begin.inner.nested_tree.items
                          : PTRDIFF_MAX;

      return MDBX_SUCCESS;
    } else {
      MDBX_val proxy_key = *begin_key;
      MDBX_val proxy_data = {nullptr, 0};
      if (begin_data)
        proxy_data = *begin_data;
      rc = LOG_IFERR(cursor_seek(&begin.outer, &proxy_key, &proxy_data, MDBX_SET_LOWERBOUND).err);
    }
  }

  if (unlikely(rc != MDBX_SUCCESS)) {
    if (rc != MDBX_NOTFOUND || !is_pointed(&begin.outer))
      return LOG_IFERR(rc);
  }

  cursor_couple_t end;
  rc = cursor_init(&end.outer, txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);
  if (!end_key) {
    rc = outer_last(&end.outer, nullptr, nullptr);
    end.outer.flags |= z_eof_hard;
    end.inner.cursor.flags |= z_eof_hard;
  } else {
    MDBX_val proxy_key = *end_key;
    MDBX_val proxy_data = {nullptr, 0};
    if (end_data)
      proxy_data = *end_data;
    rc = cursor_seek(&end.outer, &proxy_key, &proxy_data, MDBX_SET_LOWERBOUND).err;
  }
  if (unlikely(rc != MDBX_SUCCESS)) {
    if (rc != MDBX_NOTFOUND || !is_pointed(&end.outer))
      return LOG_IFERR(rc);
  }

  rc = mdbx_estimate_distance(&begin.outer, &end.outer, size_items);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);
  assert(*size_items >= -(ptrdiff_t)begin.outer.tree->items && *size_items <= (ptrdiff_t)begin.outer.tree->items);

#if 0 /* LY: Was decided to returns as-is (i.e. negative) the estimation                                               \
       * results for an inverted ranges. */

  /* Commit 8ddfd1f34ad7cf7a3c4aa75d2e248ca7e639ed63
     Change-Id: If59eccf7311123ab6384c4b93f9b1fed5a0a10d1 */

  if (*size_items < 0) {
    /* LY: inverted range case */
    *size_items += (ptrdiff_t)begin.outer.tree->items;
  } else if (*size_items == 0 && begin_key && end_key) {
    int cmp = begin.outer.kvx->cmp(&origin_begin_key, &origin_end_key);
    if (cmp == 0 && cursor_pointed(begin.inner.cursor.flags) &&
        begin_data && end_data)
      cmp = begin.outer.kvx->v.cmp(&origin_begin_data, &origin_end_data);
    if (cmp > 0) {
      /* LY: inverted range case with empty scope */
      *size_items = (ptrdiff_t)begin.outer.tree->items;
    }
  }
  assert(*size_items >= 0 &&
         *size_items <= (ptrdiff_t)begin.outer.tree->items);
#endif

  return MDBX_SUCCESS;
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

__cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask) {
  if (unlikely(!mask))
    return LOG_IFERR(MDBX_EINVAL);
  *mask = 0;

  int rc = check_txn(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_couple_t cx;
  rc = cursor_init(&cx.outer, txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if ((cx.outer.tree->flags & MDBX_DUPSORT) == 0)
    return MDBX_RESULT_TRUE;

  MDBX_val key, data;
  rc = outer_first(&cx.outer, &key, &data);
  while (rc == MDBX_SUCCESS) {
    const node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]);
    const tree_t *db = node_data(node);
    const unsigned flags = node_flags(node);
    switch (flags) {
    case N_BIG:
    case 0:
      /* single-value entry, deep = 0 */
      *mask |= 1 << 0;
      break;
    case N_DUP:
      /* single sub-page, deep = 1 */
      *mask |= 1 << 1;
      break;
    case N_DUP | N_TREE:
      /* sub-tree */
      *mask |= 1 << UNALIGNED_PEEK_16(db, tree_t, height);
      break;
    default:
      ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid node-size", flags);
      return LOG_IFERR(MDBX_CORRUPTED);
    }
    rc = outer_next(&cx.outer, &key, &data, MDBX_NEXT_NODUP);
  }

  return LOG_IFERR((rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc);
}

int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary) {
  if (unlikely(canary == nullptr))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED);
  if (unlikely(rc != MDBX_SUCCESS)) {
    memset(canary, 0, sizeof(*canary));
    return LOG_IFERR(rc);
  }

  *canary = txn->canary;
  return MDBX_SUCCESS;
}

int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data) {
  DKBUF_DEBUG;
  DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key));

  if (unlikely(!key || !data))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = check_txn(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_couple_t cx;
  rc = cursor_init(&cx.outer, txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  return LOG_IFERR(cursor_seek(&cx.outer, (MDBX_val *)key, data, MDBX_SET).err);
}

int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) {
  if (unlikely(!key || !data))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = check_txn(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_couple_t cx;
  rc = cursor_init(&cx.outer, txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  return LOG_IFERR(cursor_ops(&cx.outer, key, data, MDBX_SET_LOWERBOUND));
}

int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, size_t *values_count) {
  DKBUF_DEBUG;
  DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key));

  if (unlikely(!key || !data))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = check_txn(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_couple_t cx;
  rc = cursor_init(&cx.outer, txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  rc = cursor_seek(&cx.outer, key, data, MDBX_SET_KEY).err;
  if (unlikely(rc != MDBX_SUCCESS)) {
    if (values_count)
      *values_count = 0;
    return LOG_IFERR(rc);
  }

  if (values_count) {
    *values_count = 1;
    if (inner_pointed(&cx.outer))
      *values_count =
          (sizeof(*values_count) >= sizeof(cx.inner.nested_tree.items) || cx.inner.nested_tree.items <= PTRDIFF_MAX)
              ? (size_t)cx.inner.nested_tree.items
              : PTRDIFF_MAX;
  }
  return MDBX_SUCCESS;
}

/*----------------------------------------------------------------------------*/

int mdbx_canary_put(MDBX_txn *txn, const MDBX_canary *canary) {
  int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (likely(canary)) {
    if (txn->canary.x == canary->x && txn->canary.y == canary->y && txn->canary.z == canary->z)
      return MDBX_SUCCESS;
    txn->canary.x = canary->x;
    txn->canary.y = canary->y;
    txn->canary.z = canary->z;
  }
  txn->canary.v = txn->txnid;
  txn->flags |= MDBX_TXN_DIRTY;

  return MDBX_SUCCESS;
}

/* Функция сообщает находится ли указанный адрес в "грязной" странице у
 * заданной пишущей транзакции. В конечном счете это позволяет избавиться от
 * лишнего копирования данных из НЕ-грязных страниц.
 *
 * "Грязные" страницы - это те, которые уже были изменены в ходе пишущей
 * транзакции. Соответственно, какие-либо дальнейшие изменения могут привести
 * к перезаписи таких страниц. Поэтому все функции, выполняющие изменения, в
 * качестве аргументов НЕ должны получать указатели на данные в таких
 * страницах. В свою очередь "НЕ грязные" страницы перед модификацией будут
 * скопированы.
 *
 * Другими словами, данные из "грязных" страниц должны быть либо скопированы
 * перед передачей в качестве аргументов для дальнейших модификаций, либо
 * отвергнуты на стадии проверки корректности аргументов.
 *
 * Таким образом, функция позволяет как избавится от лишнего копирования,
 * так и выполнить более полную проверку аргументов.
 *
 * ВАЖНО: Передаваемый указатель должен указывать на начало данных. Только
 * так гарантируется что актуальный заголовок страницы будет физически
 * расположен в той-же странице памяти, в том числе для многостраничных
 * P_LARGE страниц с длинными данными. */
int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) {
  int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  const MDBX_env *env = txn->env;
  const ptrdiff_t offset = ptr_dist(ptr, env->dxb_mmap.base);
  if (offset >= 0) {
    const pgno_t pgno = bytes2pgno(env, offset);
    if (likely(pgno < txn->geo.first_unallocated)) {
      const page_t *page = pgno2page(env, pgno);
      if (unlikely(page->pgno != pgno || (page->flags & P_ILL_BITS) != 0)) {
        /* The ptr pointed into middle of a large page,
         * not to the beginning of a data. */
        return LOG_IFERR(MDBX_EINVAL);
      }
      return ((txn->flags & MDBX_TXN_RDONLY) || !is_modifable(txn, page)) ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE;
    }
    if ((size_t)offset < env->dxb_mmap.limit) {
      /* Указатель адресует что-то в пределах mmap, но за границей
       * распределенных страниц. Такое может случится если mdbx_is_dirty()
       * вызывается после операции, в ходе которой грязная страница была
       * возвращена в нераспределенное пространство. */
      return (txn->flags & MDBX_TXN_RDONLY) ? LOG_IFERR(MDBX_EINVAL) : MDBX_RESULT_TRUE;
    }
  }

  /* Страница вне используемого mmap-диапазона, т.е. либо в функцию был
   * передан некорректный адрес, либо адрес в теневой странице, которая была
   * выделена посредством malloc().
   *
   * Для режима MDBX_WRITE_MAP режима страница однозначно "не грязная",
   * а для режимов без MDBX_WRITE_MAP однозначно "не чистая". */
  return (txn->flags & (MDBX_WRITEMAP | MDBX_TXN_RDONLY)) ? LOG_IFERR(MDBX_EINVAL) : MDBX_RESULT_TRUE;
}

int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, const MDBX_val *data) {
  if (unlikely(!key))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(dbi <= FREE_DBI))
    return LOG_IFERR(MDBX_BAD_DBI);

  int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_couple_t cx;
  rc = cursor_init(&cx.outer, txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  MDBX_val proxy;
  MDBX_cursor_op op = MDBX_SET;
  unsigned flags = MDBX_ALLDUPS;
  if (data) {
    proxy = *data;
    data = &proxy;
    op = MDBX_GET_BOTH;
    flags = 0;
  }
  rc = cursor_seek(&cx.outer, (MDBX_val *)key, (MDBX_val *)data, op).err;
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cx.outer.next = txn->cursors[dbi];
  txn->cursors[dbi] = &cx.outer;
  rc = cursor_del(&cx.outer, flags);
  txn->cursors[dbi] = cx.outer.next;
  return LOG_IFERR(rc);
}

int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, MDBX_put_flags_t flags) {
  if (unlikely(!key || !data))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(dbi <= FREE_DBI))
    return LOG_IFERR(MDBX_BAD_DBI);

  if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND |
                         MDBX_APPENDDUP | MDBX_CURRENT | MDBX_MULTIPLE)))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_couple_t cx;
  rc = cursor_init(&cx.outer, txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(flags & MDBX_MULTIPLE)) {
    rc = cursor_check_multiple(&cx.outer, key, data, flags);
    if (unlikely(rc != MDBX_SUCCESS))
      return LOG_IFERR(rc);
  }

  if (flags & MDBX_RESERVE) {
    if (unlikely(cx.outer.tree->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_INTEGERDUP | MDBX_DUPFIXED)))
      return LOG_IFERR(MDBX_INCOMPATIBLE);
    data->iov_base = nullptr;
  }

  cx.outer.next = txn->cursors[dbi];
  txn->cursors[dbi] = &cx.outer;

  /* LY: support for update (explicit overwrite) */
  if (flags & MDBX_CURRENT) {
    rc = cursor_seek(&cx.outer, (MDBX_val *)key, nullptr, MDBX_SET).err;
    if (likely(rc == MDBX_SUCCESS) && (txn->dbs[dbi].flags & MDBX_DUPSORT) && (flags & MDBX_ALLDUPS) == 0) {
      /* LY: allows update (explicit overwrite) only for unique keys */
      node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]);
      if (node_flags(node) & N_DUP) {
        tASSERT(txn, inner_pointed(&cx.outer) && cx.outer.subcur->nested_tree.items > 1);
        rc = MDBX_EMULTIVAL;
        if ((flags & MDBX_NOOVERWRITE) == 0) {
          flags -= MDBX_CURRENT;
          rc = cursor_del(&cx.outer, MDBX_ALLDUPS);
        }
      }
    }
  }

  if (likely(rc == MDBX_SUCCESS))
    rc = cursor_put_checklen(&cx.outer, key, data, flags);
  txn->cursors[dbi] = cx.outer.next;

  return LOG_IFERR(rc);
}

//------------------------------------------------------------------------------

/* Позволяет обновить или удалить существующую запись с получением
 * в old_data предыдущего значения данных. При этом если new_data равен
 * нулю, то выполняется удаление, иначе обновление/вставка.
 *
 * Текущее значение может находиться в уже измененной (грязной) странице.
 * В этом случае страница будет перезаписана при обновлении, а само старое
 * значение утрачено. Поэтому исходно в old_data должен быть передан
 * дополнительный буфер для копирования старого значения.
 * Если переданный буфер слишком мал, то функция вернет -1, установив
 * old_data->iov_len в соответствующее значение.
 *
 * Для не-уникальных ключей также возможен второй сценарий использования,
 * когда посредством old_data из записей с одинаковым ключом для
 * удаления/обновления выбирается конкретная. Для выбора этого сценария
 * во flags следует одновременно указать MDBX_CURRENT и MDBX_NOOVERWRITE.
 * Именно эта комбинация выбрана, так как она лишена смысла, и этим позволяет
 * идентифицировать запрос такого сценария.
 *
 * Функция может быть замещена соответствующими операциями с курсорами
 * после двух доработок (TODO):
 *  - внешняя аллокация курсоров, в том числе на стеке (без malloc).
 *  - получения dirty-статуса страницы по адресу (знать о MUTABLE/WRITEABLE).
 */

int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *new_data, MDBX_val *old_data,
                    MDBX_put_flags_t flags, MDBX_preserve_func preserver, void *preserver_context) {
  if (unlikely(!key || !old_data || old_data == new_data))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(old_data->iov_base == nullptr && old_data->iov_len))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(new_data == nullptr && (flags & (MDBX_CURRENT | MDBX_RESERVE)) != MDBX_CURRENT))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(dbi <= FREE_DBI))
    return LOG_IFERR(MDBX_BAD_DBI);

  if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND |
                         MDBX_APPENDDUP | MDBX_CURRENT)))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  cursor_couple_t cx;
  rc = cursor_init(&cx.outer, txn, dbi);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);
  cx.outer.next = txn->cursors[dbi];
  txn->cursors[dbi] = &cx.outer;

  MDBX_val present_key = *key;
  if (F_ISSET(flags, MDBX_CURRENT | MDBX_NOOVERWRITE)) {
    /* в old_data значение для выбора конкретного дубликата */
    if (unlikely(!(txn->dbs[dbi].flags & MDBX_DUPSORT))) {
      rc = MDBX_EINVAL;
      goto bailout;
    }

    /* убираем лишний бит, он был признаком запрошенного режима */
    flags -= MDBX_NOOVERWRITE;

    rc = cursor_seek(&cx.outer, &present_key, old_data, MDBX_GET_BOTH).err;
    if (rc != MDBX_SUCCESS)
      goto bailout;
  } else {
    /* в old_data буфер для сохранения предыдущего значения */
    if (unlikely(new_data && old_data->iov_base == new_data->iov_base))
      return LOG_IFERR(MDBX_EINVAL);
    MDBX_val present_data;
    rc = cursor_seek(&cx.outer, &present_key, &present_data, MDBX_SET_KEY).err;
    if (unlikely(rc != MDBX_SUCCESS)) {
      old_data->iov_base = nullptr;
      old_data->iov_len = 0;
      if (rc != MDBX_NOTFOUND || (flags & MDBX_CURRENT))
        goto bailout;
    } else if (flags & MDBX_NOOVERWRITE) {
      rc = MDBX_KEYEXIST;
      *old_data = present_data;
      goto bailout;
    } else {
      page_t *page = cx.outer.pg[cx.outer.top];
      if (txn->dbs[dbi].flags & MDBX_DUPSORT) {
        if (flags & MDBX_CURRENT) {
          /* disallow update/delete for multi-values */
          node_t *node = page_node(page, cx.outer.ki[cx.outer.top]);
          if (node_flags(node) & N_DUP) {
            tASSERT(txn, inner_pointed(&cx.outer) && cx.outer.subcur->nested_tree.items > 1);
            if (cx.outer.subcur->nested_tree.items > 1) {
              rc = MDBX_EMULTIVAL;
              goto bailout;
            }
          }
          /* В LMDB флажок MDBX_CURRENT здесь приведет
           * к замене данных без учета MDBX_DUPSORT сортировки,
           * но здесь это в любом случае допустимо, так как мы
           * проверили что для ключа есть только одно значение. */
        }
      }

      if (is_modifable(txn, page)) {
        if (new_data && cmp_lenfast(&present_data, new_data) == 0) {
          /* если данные совпадают, то ничего делать не надо */
          *old_data = *new_data;
          goto bailout;
        }
        rc = preserver ? preserver(preserver_context, old_data, present_data.iov_base, present_data.iov_len)
                       : MDBX_SUCCESS;
        if (unlikely(rc != MDBX_SUCCESS))
          goto bailout;
      } else {
        *old_data = present_data;
      }
      flags |= MDBX_CURRENT;
    }
  }

  if (likely(new_data))
    rc = cursor_put_checklen(&cx.outer, key, new_data, flags);
  else
    rc = cursor_del(&cx.outer, flags & MDBX_ALLDUPS);

bailout:
  txn->cursors[dbi] = cx.outer.next;
  return LOG_IFERR(rc);
}

static int default_value_preserver(void *context, MDBX_val *target, const void *src, size_t bytes) {
  (void)context;
  if (unlikely(target->iov_len < bytes)) {
    target->iov_base = nullptr;
    target->iov_len = bytes;
    return MDBX_RESULT_TRUE;
  }
  memcpy(target->iov_base, src, target->iov_len = bytes);
  return MDBX_SUCCESS;
}

int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *new_data, MDBX_val *old_data,
                 MDBX_put_flags_t flags) {
  return mdbx_replace_ex(txn, dbi, key, new_data, old_data, flags, default_value_preserver, nullptr);
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

#ifdef __SANITIZE_THREAD__
/* LY: avoid tsan-trap by txn, mm_last_pg and geo.first_unallocated */
__attribute__((__no_sanitize_thread__, __noinline__))
#endif
int mdbx_txn_straggler(const MDBX_txn *txn, int *percent) {
  int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR((rc > 0) ? -rc : rc);

  MDBX_env *env = txn->env;
  if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0)) {
    if (percent)
      *percent = (int)((txn->geo.first_unallocated * UINT64_C(100) + txn->geo.end_pgno / 2) / txn->geo.end_pgno);
    return 0;
  }

  txnid_t lag;
  troika_t troika = meta_tap(env);
  do {
    const meta_ptr_t head = meta_recent(env, &troika);
    if (percent) {
      const pgno_t maxpg = head.ptr_v->geometry.now;
      *percent = (int)((head.ptr_v->geometry.first_unallocated * UINT64_C(100) + maxpg / 2) / maxpg);
    }
    lag = (head.txnid - txn->txnid) / xMDBX_TXNID_STEP;
  } while (unlikely(meta_should_retry(env, &troika)));

  return (lag > INT_MAX) ? INT_MAX : (int)lag;
}

MDBX_env *mdbx_txn_env(const MDBX_txn *txn) {
  if (unlikely(!txn || txn->signature != txn_signature || txn->env->signature.weak != env_signature))
    return nullptr;
  return txn->env;
}

uint64_t mdbx_txn_id(const MDBX_txn *txn) {
  if (unlikely(!txn || txn->signature != txn_signature))
    return 0;
  return txn->txnid;
}

MDBX_txn_flags_t mdbx_txn_flags(const MDBX_txn *txn) {
  STATIC_ASSERT(
      (MDBX_TXN_INVALID & (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | MDBX_TXN_HAS_CHILD |
                           txn_gc_drained | txn_shrink_allowed | txn_rw_begin_flags | txn_ro_begin_flags)) == 0);
  if (unlikely(!txn || txn->signature != txn_signature))
    return MDBX_TXN_INVALID;
  assert(0 == (int)(txn->flags & MDBX_TXN_INVALID));

  MDBX_txn_flags_t flags = txn->flags;
  if (F_ISSET(flags, MDBX_TXN_PARKED | MDBX_TXN_RDONLY) && txn->to.reader &&
      safe64_read(&txn->to.reader->tid) == MDBX_TID_TXN_OUSTED)
    flags |= MDBX_TXN_OUSTED;
  return flags;
}

int mdbx_txn_reset(MDBX_txn *txn) {
  int rc = check_txn(txn, 0);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  /* This call is only valid for read-only txns */
  if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0))
    return LOG_IFERR(MDBX_EINVAL);

  /* LY: don't close DBI-handles */
  rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE);
  if (rc == MDBX_SUCCESS) {
    tASSERT(txn, txn->signature == txn_signature);
    tASSERT(txn, txn->owner == 0);
  }
  return LOG_IFERR(rc);
}

int mdbx_txn_break(MDBX_txn *txn) {
  do {
    int rc = check_txn(txn, 0);
    if (unlikely(rc != MDBX_SUCCESS))
      return LOG_IFERR(rc);
    txn->flags |= MDBX_TXN_ERROR;
    if (txn->flags & MDBX_TXN_RDONLY)
      break;
    txn = txn->nested;
  } while (txn);
  return MDBX_SUCCESS;
}

int mdbx_txn_abort(MDBX_txn *txn) {
  int rc = check_txn(txn, 0);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  rc = check_env(txn->env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

#if MDBX_TXN_CHECKOWNER
  if ((txn->flags & (MDBX_TXN_RDONLY | MDBX_NOSTICKYTHREADS)) == MDBX_NOSTICKYTHREADS &&
      unlikely(txn->owner != osal_thread_self())) {
    mdbx_txn_break(txn);
    return LOG_IFERR(MDBX_THREAD_MISMATCH);
  }
#endif /* MDBX_TXN_CHECKOWNER */

  return LOG_IFERR(txn_abort(txn));
}

int mdbx_txn_park(MDBX_txn *txn, bool autounpark) {
  STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_ERROR);
  int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);
  if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0))
    return LOG_IFERR(MDBX_TXN_INVALID);

  if (unlikely((txn->flags & MDBX_TXN_ERROR))) {
    rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE);
    return LOG_IFERR(rc ? rc : MDBX_OUSTED);
  }

  return LOG_IFERR(txn_park(txn, autounpark));
}

int mdbx_txn_unpark(MDBX_txn *txn, bool restart_if_ousted) {
  STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_PARKED + MDBX_TXN_ERROR);
  int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED - MDBX_TXN_ERROR);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);
  if (unlikely(!F_ISSET(txn->flags, MDBX_TXN_RDONLY | MDBX_TXN_PARKED)))
    return MDBX_SUCCESS;

  rc = txn_unpark(txn);
  if (likely(rc != MDBX_OUSTED) || !restart_if_ousted)
    return LOG_IFERR(rc);

  tASSERT(txn, txn->flags & MDBX_TXN_FINISHED);
  rc = txn_renew(txn, MDBX_TXN_RDONLY);
  return (rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : LOG_IFERR(rc);
}

int mdbx_txn_renew(MDBX_txn *txn) {
  if (unlikely(!txn))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(txn->signature != txn_signature))
    return LOG_IFERR(MDBX_EBADSIGN);

  if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0))
    return LOG_IFERR(MDBX_EINVAL);

  if (unlikely(txn->owner != 0 || !(txn->flags & MDBX_TXN_FINISHED))) {
    int rc = mdbx_txn_reset(txn);
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;
  }

  int rc = txn_renew(txn, MDBX_TXN_RDONLY);
  if (rc == MDBX_SUCCESS) {
    tASSERT(txn, txn->owner == (txn->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self());
    DEBUG("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid,
          (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)txn->env, txn->dbs[MAIN_DBI].root,
          txn->dbs[FREE_DBI].root);
  }
  return LOG_IFERR(rc);
}

int mdbx_txn_set_userctx(MDBX_txn *txn, void *ctx) {
  int rc = check_txn(txn, MDBX_TXN_FINISHED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  txn->userctx = ctx;
  return MDBX_SUCCESS;
}

void *mdbx_txn_get_userctx(const MDBX_txn *txn) { return check_txn(txn, MDBX_TXN_FINISHED) ? nullptr : txn->userctx; }

int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, MDBX_txn **ret, void *context) {
  if (unlikely(!ret))
    return LOG_IFERR(MDBX_EINVAL);
  *ret = nullptr;

  if (unlikely((flags & ~txn_rw_begin_flags) && (parent || (flags & ~txn_ro_begin_flags))))
    return LOG_IFERR(MDBX_EINVAL);

  int rc = check_env(env, true);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(env->flags & MDBX_RDONLY & ~flags)) /* write txn in RDONLY env */
    return LOG_IFERR(MDBX_EACCESS);

  MDBX_txn *txn = nullptr;
  if (parent) {
    /* Nested transactions: Max 1 child, write txns only, no writemap */
    rc = check_txn(parent, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED);
    if (unlikely(rc != MDBX_SUCCESS))
      return LOG_IFERR(rc);

    if (unlikely(parent->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP))) {
      rc = MDBX_BAD_TXN;
      if ((parent->flags & MDBX_TXN_RDONLY) == 0) {
        ERROR("%s mode is incompatible with nested transactions", "MDBX_WRITEMAP");
        rc = MDBX_INCOMPATIBLE;
      }
      return LOG_IFERR(rc);
    }

    if (env->options.spill_parent4child_denominator) {
      /* Spill dirty-pages of parent to provide dirtyroom for child txn */
      rc = txn_spill(parent, nullptr, parent->tw.dirtylist->length / env->options.spill_parent4child_denominator);
      if (unlikely(rc != MDBX_SUCCESS))
        return LOG_IFERR(rc);
    }
    tASSERT(parent, audit_ex(parent, 0, false) == 0);

    flags |= parent->flags & (txn_rw_begin_flags | MDBX_TXN_SPILLS | MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP);
  } else if ((flags & MDBX_TXN_RDONLY) == 0) {
    /* Reuse preallocated write txn. However, do not touch it until
     * txn_renew() succeeds, since it currently may be active. */
    txn = env->basal_txn;
    goto renew;
  }

  const intptr_t bitmap_bytes =
#if MDBX_ENABLE_DBI_SPARSE
      ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / CHAR_BIT;
#else
      0;
#endif /* MDBX_ENABLE_DBI_SPARSE */
  STATIC_ASSERT(sizeof(txn->tw) > sizeof(txn->to));
  const size_t base =
      (flags & MDBX_TXN_RDONLY) ? sizeof(MDBX_txn) - sizeof(txn->tw) + sizeof(txn->to) : sizeof(MDBX_txn);
  const size_t size = base +
                      ((flags & MDBX_TXN_RDONLY) ? (size_t)bitmap_bytes + env->max_dbi * sizeof(txn->dbi_seqs[0]) : 0) +
                      env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + sizeof(txn->dbi_state[0]));
  txn = osal_malloc(size);
  if (unlikely(txn == nullptr))
    return LOG_IFERR(MDBX_ENOMEM);
#if MDBX_DEBUG
  memset(txn, 0xCD, size);
  VALGRIND_MAKE_MEM_UNDEFINED(txn, size);
#endif /* MDBX_DEBUG */
  MDBX_ANALYSIS_ASSUME(size > base);
  memset(txn, 0, (MDBX_GOOFY_MSVC_STATIC_ANALYZER && base > size) ? size : base);
  txn->dbs = ptr_disp(txn, base);
  txn->cursors = ptr_disp(txn->dbs, env->max_dbi * sizeof(txn->dbs[0]));
#if MDBX_DEBUG
  txn->cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */
#endif
  txn->dbi_state = ptr_disp(txn, size - env->max_dbi * sizeof(txn->dbi_state[0]));
  txn->flags = flags;
  txn->env = env;

  if (parent) {
    tASSERT(parent, dpl_check(parent));
#if MDBX_ENABLE_DBI_SPARSE
    txn->dbi_sparse = parent->dbi_sparse;
#endif /* MDBX_ENABLE_DBI_SPARSE */
    txn->dbi_seqs = parent->dbi_seqs;
    txn->geo = parent->geo;
    rc = dpl_alloc(txn);
    if (likely(rc == MDBX_SUCCESS)) {
      const size_t len = MDBX_PNL_GETSIZE(parent->tw.repnl) + parent->tw.loose_count;
      txn->tw.repnl = pnl_alloc((len > MDBX_PNL_INITIAL) ? len : MDBX_PNL_INITIAL);
      if (unlikely(!txn->tw.repnl))
        rc = MDBX_ENOMEM;
    }
    if (unlikely(rc != MDBX_SUCCESS)) {
    nested_failed:
      pnl_free(txn->tw.repnl);
      dpl_free(txn);
      osal_free(txn);
      return LOG_IFERR(rc);
    }

    /* Move loose pages to reclaimed list */
    if (parent->tw.loose_count) {
      do {
        page_t *lp = parent->tw.loose_pages;
        tASSERT(parent, lp->flags == P_LOOSE);
        rc = pnl_insert_span(&parent->tw.repnl, lp->pgno, 1);
        if (unlikely(rc != MDBX_SUCCESS))
          goto nested_failed;
        MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *));
        VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *));
        parent->tw.loose_pages = page_next(lp);
        /* Remove from dirty list */
        page_wash(parent, dpl_exist(parent, lp->pgno), lp, 1);
      } while (parent->tw.loose_pages);
      parent->tw.loose_count = 0;
#if MDBX_ENABLE_REFUND
      parent->tw.loose_refund_wl = 0;
#endif /* MDBX_ENABLE_REFUND */
      tASSERT(parent, dpl_check(parent));
    }
    txn->tw.dirtyroom = parent->tw.dirtyroom;
    txn->tw.dirtylru = parent->tw.dirtylru;

    dpl_sort(parent);
    if (parent->tw.spilled.list)
      spill_purge(parent);

    tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.repnl) >= MDBX_PNL_GETSIZE(parent->tw.repnl));
    memcpy(txn->tw.repnl, parent->tw.repnl, MDBX_PNL_SIZEOF(parent->tw.repnl));
    eASSERT(env, pnl_check_allocated(txn->tw.repnl, (txn->geo.first_unallocated /* LY: intentional assignment
                                                                               here, only for assertion */
                                                     = parent->geo.first_unallocated) -
                                                        MDBX_ENABLE_REFUND));

    txn->tw.gc.time_acc = parent->tw.gc.time_acc;
    txn->tw.gc.last_reclaimed = parent->tw.gc.last_reclaimed;
    if (parent->tw.gc.retxl) {
      txn->tw.gc.retxl = parent->tw.gc.retxl;
      parent->tw.gc.retxl = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.gc.retxl);
    }

    txn->tw.retired_pages = parent->tw.retired_pages;
    parent->tw.retired_pages = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.retired_pages);

    txn->txnid = parent->txnid;
    txn->front_txnid = parent->front_txnid + 1;
#if MDBX_ENABLE_REFUND
    txn->tw.loose_refund_wl = 0;
#endif /* MDBX_ENABLE_REFUND */
    txn->canary = parent->canary;
    parent->flags |= MDBX_TXN_HAS_CHILD;
    parent->nested = txn;
    txn->parent = parent;
    txn->owner = parent->owner;
    txn->tw.troika = parent->tw.troika;

    txn->cursors[FREE_DBI] = nullptr;
    txn->cursors[MAIN_DBI] = nullptr;
    txn->dbi_state[FREE_DBI] = parent->dbi_state[FREE_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY);
    txn->dbi_state[MAIN_DBI] = parent->dbi_state[MAIN_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY);
    memset(txn->dbi_state + CORE_DBS, 0, (txn->n_dbi = parent->n_dbi) - CORE_DBS);
    memcpy(txn->dbs, parent->dbs, sizeof(txn->dbs[0]) * CORE_DBS);

    tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length ==
                        (parent->parent ? parent->parent->tw.dirtyroom : parent->env->options.dp_limit));
    tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
                     (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit));
    env->txn = txn;
    tASSERT(parent, parent->cursors[FREE_DBI] == nullptr);
    rc = parent->cursors[MAIN_DBI] ? cursor_shadow(parent->cursors[MAIN_DBI], txn, MAIN_DBI) : MDBX_SUCCESS;
    if (AUDIT_ENABLED() && ASSERT_ENABLED()) {
      txn->signature = txn_signature;
      tASSERT(txn, audit_ex(txn, 0, false) == 0);
    }
    if (unlikely(rc != MDBX_SUCCESS))
      txn_end(txn, TXN_END_FAIL_BEGINCHILD);
  } else { /* MDBX_TXN_RDONLY */
    txn->dbi_seqs = ptr_disp(txn->cursors, env->max_dbi * sizeof(txn->cursors[0]));
#if MDBX_ENABLE_DBI_SPARSE
    txn->dbi_sparse = ptr_disp(txn->dbi_state, -bitmap_bytes);
#endif /* MDBX_ENABLE_DBI_SPARSE */
  renew:
    rc = txn_renew(txn, flags);
  }

  if (unlikely(rc != MDBX_SUCCESS)) {
    if (txn != env->basal_txn)
      osal_free(txn);
  } else {
    if (flags & (MDBX_TXN_RDONLY_PREPARE - MDBX_TXN_RDONLY))
      eASSERT(env, txn->flags == (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED));
    else if (flags & MDBX_TXN_RDONLY)
      eASSERT(env, (txn->flags & ~(MDBX_NOSTICKYTHREADS | MDBX_TXN_RDONLY | MDBX_WRITEMAP |
                                   /* Win32: SRWL flag */ txn_shrink_allowed)) == 0);
    else {
      eASSERT(env, (txn->flags & ~(MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP | txn_shrink_allowed | MDBX_NOMETASYNC |
                                   MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0);
      assert(!txn->tw.spilled.list && !txn->tw.spilled.least_removed);
    }
    txn->signature = txn_signature;
    txn->userctx = context;
    *ret = txn;
    DEBUG("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid,
          (flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)env, txn->dbs[MAIN_DBI].root,
          txn->dbs[FREE_DBI].root);
  }

  return LOG_IFERR(rc);
}

int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
  STATIC_ASSERT(MDBX_TXN_FINISHED == MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - MDBX_TXN_ERROR - MDBX_TXN_PARKED);
  const uint64_t ts_0 = latency ? osal_monotime() : 0;
  uint64_t ts_1 = 0, ts_2 = 0, ts_3 = 0, ts_4 = 0, ts_5 = 0, gc_cputime = 0;

  /* txn_end() mode for a commit which writes nothing */
  unsigned end_mode = TXN_END_PURE_COMMIT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE;

  int rc = check_txn(txn, MDBX_TXN_FINISHED);
  if (unlikely(rc != MDBX_SUCCESS)) {
    if (rc == MDBX_BAD_TXN && (txn->flags & MDBX_TXN_RDONLY)) {
      rc = MDBX_RESULT_TRUE;
      goto fail;
    }
  bailout:
    if (latency)
      memset(latency, 0, sizeof(*latency));
    return LOG_IFERR(rc);
  }

  MDBX_env *const env = txn->env;
  if (MDBX_ENV_CHECKPID && unlikely(env->pid != osal_getpid())) {
    env->flags |= ENV_FATAL_ERROR;
    rc = MDBX_PANIC;
    goto bailout;
  }

  if (unlikely(txn->flags & MDBX_TXN_RDONLY)) {
    if (txn->flags & MDBX_TXN_ERROR) {
      rc = MDBX_RESULT_TRUE;
      goto fail;
    }
    goto done;
  }

#if MDBX_TXN_CHECKOWNER
  if ((txn->flags & MDBX_NOSTICKYTHREADS) && txn == env->basal_txn && unlikely(txn->owner != osal_thread_self())) {
    txn->flags |= MDBX_TXN_ERROR;
    rc = MDBX_THREAD_MISMATCH;
    return LOG_IFERR(rc);
  }
#endif /* MDBX_TXN_CHECKOWNER */

  if (unlikely(txn->flags & MDBX_TXN_ERROR)) {
    rc = MDBX_RESULT_TRUE;
    goto fail;
  }

  if (txn->nested) {
    rc = mdbx_txn_commit_ex(txn->nested, nullptr);
    tASSERT(txn, txn->nested == nullptr);
    if (unlikely(rc != MDBX_SUCCESS))
      goto fail;
  }

  if (unlikely(txn != env->txn)) {
    DEBUG("%s", "attempt to commit unknown transaction");
    rc = MDBX_EINVAL;
    goto fail;
  }

  if (txn->parent) {
    tASSERT(txn, audit_ex(txn, 0, false) == 0);
    eASSERT(env, txn != env->basal_txn);
    MDBX_txn *const parent = txn->parent;
    eASSERT(env, parent->signature == txn_signature);
    eASSERT(env, parent->nested == txn && (parent->flags & MDBX_TXN_HAS_CHILD) != 0);
    eASSERT(env, dpl_check(txn));

    if (txn->tw.dirtylist->length == 0 && !(txn->flags & MDBX_TXN_DIRTY) && parent->n_dbi == txn->n_dbi) {
      /* fast completion of pure nested transaction */
      VERBOSE("fast-complete pure nested txn %" PRIaTXN, txn->txnid);

      tASSERT(txn, memcmp(&parent->geo, &txn->geo, sizeof(parent->geo)) == 0);
      tASSERT(txn, memcmp(&parent->canary, &txn->canary, sizeof(parent->canary)) == 0);
      tASSERT(txn, !txn->tw.spilled.list || MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0);
      tASSERT(txn, txn->tw.loose_count == 0);

      /* Update parent's DBs array */
      eASSERT(env, parent->n_dbi == txn->n_dbi);
      TXN_FOREACH_DBI_ALL(txn, dbi) {
        tASSERT(txn, (txn->dbi_state[dbi] & (DBI_CREAT | DBI_DIRTY)) == 0);
        if (txn->dbi_state[dbi] & DBI_FRESH) {
          parent->dbs[dbi] = txn->dbs[dbi];
          /* preserve parent's status */
          const uint8_t state = txn->dbi_state[dbi] | DBI_FRESH;
          DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, (parent->dbi_state[dbi] != state) ? "update" : "still",
                parent->dbi_state[dbi], state);
          parent->dbi_state[dbi] = state;
        }
      }
      txn_done_cursors(txn, true);
      end_mode = TXN_END_PURE_COMMIT | TXN_END_SLOT | TXN_END_FREE | TXN_END_EOTDONE;
      goto done;
    }

    /* Preserve space for spill list to avoid parent's state corruption
     * if allocation fails. */
    const size_t parent_retired_len = (uintptr_t)parent->tw.retired_pages;
    tASSERT(txn, parent_retired_len <= MDBX_PNL_GETSIZE(txn->tw.retired_pages));
    const size_t retired_delta = MDBX_PNL_GETSIZE(txn->tw.retired_pages) - parent_retired_len;
    if (retired_delta) {
      rc = pnl_need(&txn->tw.repnl, retired_delta);
      if (unlikely(rc != MDBX_SUCCESS))
        goto fail;
    }

    if (txn->tw.spilled.list) {
      if (parent->tw.spilled.list) {
        rc = pnl_need(&parent->tw.spilled.list, MDBX_PNL_GETSIZE(txn->tw.spilled.list));
        if (unlikely(rc != MDBX_SUCCESS))
          goto fail;
      }
      spill_purge(txn);
    }

    if (unlikely(txn->tw.dirtylist->length + parent->tw.dirtylist->length > parent->tw.dirtylist->detent &&
                 !dpl_reserve(parent, txn->tw.dirtylist->length + parent->tw.dirtylist->length))) {
      rc = MDBX_ENOMEM;
      goto fail;
    }

    //-------------------------------------------------------------------------

    parent->tw.gc.retxl = txn->tw.gc.retxl;
    txn->tw.gc.retxl = nullptr;

    parent->tw.retired_pages = txn->tw.retired_pages;
    txn->tw.retired_pages = nullptr;

    pnl_free(parent->tw.repnl);
    parent->tw.repnl = txn->tw.repnl;
    txn->tw.repnl = nullptr;
    parent->tw.gc.time_acc = txn->tw.gc.time_acc;
    parent->tw.gc.last_reclaimed = txn->tw.gc.last_reclaimed;

    parent->geo = txn->geo;
    parent->canary = txn->canary;
    parent->flags |= txn->flags & MDBX_TXN_DIRTY;

    /* Move loose pages to parent */
#if MDBX_ENABLE_REFUND
    parent->tw.loose_refund_wl = txn->tw.loose_refund_wl;
#endif /* MDBX_ENABLE_REFUND */
    parent->tw.loose_count = txn->tw.loose_count;
    parent->tw.loose_pages = txn->tw.loose_pages;

    /* Merge our cursors into parent's and close them */
    txn_done_cursors(txn, true);
    end_mode |= TXN_END_EOTDONE;

    /* Update parent's DBs array */
    eASSERT(env, parent->n_dbi == txn->n_dbi);
    TXN_FOREACH_DBI_ALL(txn, dbi) {
      if (txn->dbi_state[dbi] != (parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY))) {
        eASSERT(env, (txn->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)) != 0 ||
                         (txn->dbi_state[dbi] | DBI_STALE) ==
                             (parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY)));
        parent->dbs[dbi] = txn->dbs[dbi];
        /* preserve parent's status */
        const uint8_t state = txn->dbi_state[dbi] | (parent->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY));
        DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, (parent->dbi_state[dbi] != state) ? "update" : "still",
              parent->dbi_state[dbi], state);
        parent->dbi_state[dbi] = state;
      }
    }

    if (latency) {
      ts_1 = osal_monotime();
      ts_2 = /* no gc-update */ ts_1;
      ts_3 = /* no audit */ ts_2;
      ts_4 = /* no write */ ts_3;
      ts_5 = /* no sync */ ts_4;
    }
    txn_merge(parent, txn, parent_retired_len);
    env->txn = parent;
    parent->nested = nullptr;
    tASSERT(parent, dpl_check(parent));

#if MDBX_ENABLE_REFUND
    txn_refund(parent);
    if (ASSERT_ENABLED()) {
      /* Check parent's loose pages not suitable for refund */
      for (page_t *lp = parent->tw.loose_pages; lp; lp = page_next(lp)) {
        tASSERT(parent, lp->pgno < parent->tw.loose_refund_wl && lp->pgno + 1 < parent->geo.first_unallocated);
        MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *));
        VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *));
      }
      /* Check parent's reclaimed pages not suitable for refund */
      if (MDBX_PNL_GETSIZE(parent->tw.repnl))
        tASSERT(parent, MDBX_PNL_MOST(parent->tw.repnl) + 1 < parent->geo.first_unallocated);
    }
#endif /* MDBX_ENABLE_REFUND */

    txn->signature = 0;
    osal_free(txn);
    tASSERT(parent, audit_ex(parent, 0, false) == 0);
    rc = MDBX_SUCCESS;
    goto provide_latency;
  }

  if (!txn->tw.dirtylist) {
    tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
  } else {
    tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
    tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
                     (txn->parent ? txn->parent->tw.dirtyroom : env->options.dp_limit));
  }
  txn_done_cursors(txn, false);
  end_mode |= TXN_END_EOTDONE;

  if ((!txn->tw.dirtylist || txn->tw.dirtylist->length == 0) &&
      (txn->flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) {
    TXN_FOREACH_DBI_ALL(txn, i) { tASSERT(txn, !(txn->dbi_state[i] & DBI_DIRTY)); }
#if defined(MDBX_NOSUCCESS_EMPTY_COMMIT) && MDBX_NOSUCCESS_EMPTY_COMMIT
    rc = txn_end(txn, end_mode);
    if (unlikely(rc != MDBX_SUCCESS))
      goto fail;
    rc = MDBX_RESULT_TRUE;
    goto provide_latency;
#else
    goto done;
#endif /* MDBX_NOSUCCESS_EMPTY_COMMIT */
  }

  DEBUG("committing txn %" PRIaTXN " %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid, (void *)txn,
        (void *)env, txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root);

  if (txn->n_dbi > CORE_DBS) {
    /* Update table root pointers */
    cursor_couple_t cx;
    rc = cursor_init(&cx.outer, txn, MAIN_DBI);
    if (unlikely(rc != MDBX_SUCCESS))
      goto fail;
    cx.outer.next = txn->cursors[MAIN_DBI];
    txn->cursors[MAIN_DBI] = &cx.outer;
    TXN_FOREACH_DBI_USER(txn, i) {
      if ((txn->dbi_state[i] & DBI_DIRTY) == 0)
        continue;
      tree_t *const db = &txn->dbs[i];
      DEBUG("update main's entry for sub-db %zu, mod_txnid %" PRIaTXN " -> %" PRIaTXN, i, db->mod_txnid, txn->txnid);
      /* Может быть mod_txnid > front после коммита вложенных тразакций */
      db->mod_txnid = txn->txnid;
      MDBX_val data = {db, sizeof(tree_t)};
      rc = cursor_put(&cx.outer, &env->kvs[i].name, &data, N_TREE);
      if (unlikely(rc != MDBX_SUCCESS)) {
        txn->cursors[MAIN_DBI] = cx.outer.next;
        goto fail;
      }
    }
    txn->cursors[MAIN_DBI] = cx.outer.next;
  }

  ts_1 = latency ? osal_monotime() : 0;

  gcu_t gcu_ctx;
  gc_cputime = latency ? osal_cputime(nullptr) : 0;
  rc = gc_update_init(txn, &gcu_ctx);
  if (unlikely(rc != MDBX_SUCCESS))
    goto fail;
  rc = gc_update(txn, &gcu_ctx);
  gc_cputime = latency ? osal_cputime(nullptr) - gc_cputime : 0;
  if (unlikely(rc != MDBX_SUCCESS))
    goto fail;

  tASSERT(txn, txn->tw.loose_count == 0);
  txn->dbs[FREE_DBI].mod_txnid = (txn->dbi_state[FREE_DBI] & DBI_DIRTY) ? txn->txnid : txn->dbs[FREE_DBI].mod_txnid;

  txn->dbs[MAIN_DBI].mod_txnid = (txn->dbi_state[MAIN_DBI] & DBI_DIRTY) ? txn->txnid : txn->dbs[MAIN_DBI].mod_txnid;

  ts_2 = latency ? osal_monotime() : 0;
  ts_3 = ts_2;
  if (AUDIT_ENABLED()) {
    rc = audit_ex(txn, MDBX_PNL_GETSIZE(txn->tw.retired_pages), true);
    ts_3 = osal_monotime();
    if (unlikely(rc != MDBX_SUCCESS))
      goto fail;
  }

  bool need_flush_for_nometasync = false;
  const meta_ptr_t head = meta_recent(env, &txn->tw.troika);
  const uint32_t meta_sync_txnid = atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed);
  /* sync prev meta */
  if (head.is_steady && meta_sync_txnid != (uint32_t)head.txnid) {
    /* Исправление унаследованного от LMDB недочета:
     *
     * Всё хорошо, если все процессы работающие с БД не используют WRITEMAP.
     * Тогда мета-страница (обновленная, но не сброшенная на диск) будет
     * сохранена в результате fdatasync() при записи данных этой транзакции.
     *
     * Всё хорошо, если все процессы работающие с БД используют WRITEMAP
     * без MDBX_AVOID_MSYNC.
     * Тогда мета-страница (обновленная, но не сброшенная на диск) будет
     * сохранена в результате msync() при записи данных этой транзакции.
     *
     * Если же в процессах работающих с БД используется оба метода, как sync()
     * в режиме MDBX_WRITEMAP, так и записи через файловый дескриптор, то
     * становится невозможным обеспечить фиксацию на диске мета-страницы
     * предыдущей транзакции и данных текущей транзакции, за счет одной
     * sync-операцией выполняемой после записи данных текущей транзакции.
     * Соответственно, требуется явно обновлять мета-страницу, что полностью
     * уничтожает выгоду от NOMETASYNC. */
    const uint32_t txnid_dist = ((txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) ? MDBX_NOMETASYNC_LAZY_FD
                                                                                        : MDBX_NOMETASYNC_LAZY_WRITEMAP;
    /* Смысл "магии" в том, чтобы избежать отдельного вызова fdatasync()
     * или msync() для гарантированной фиксации на диске мета-страницы,
     * которая была "лениво" отправлена на запись в предыдущей транзакции,
     * но не сброшена на диск из-за активного режима MDBX_NOMETASYNC. */
    if (
#if defined(_WIN32) || defined(_WIN64)
        !env->ioring.overlapped_fd &&
#endif
        meta_sync_txnid == (uint32_t)head.txnid - txnid_dist)
      need_flush_for_nometasync = true;
    else {
      rc = meta_sync(env, head);
      if (unlikely(rc != MDBX_SUCCESS)) {
        ERROR("txn-%s: error %d", "presync-meta", rc);
        goto fail;
      }
    }
  }

  if (txn->tw.dirtylist) {
    tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
    tASSERT(txn, txn->tw.loose_count == 0);

    mdbx_filehandle_t fd =
#if defined(_WIN32) || defined(_WIN64)
        env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd;
    (void)need_flush_for_nometasync;
#else
        (need_flush_for_nometasync || env->dsync_fd == INVALID_HANDLE_VALUE ||
         txn->tw.dirtylist->length > env->options.writethrough_threshold ||
         atomic_load64(&env->lck->unsynced_pages, mo_Relaxed))
            ? env->lazy_fd
            : env->dsync_fd;
#endif /* Windows */

    iov_ctx_t write_ctx;
    rc = iov_init(txn, &write_ctx, txn->tw.dirtylist->length, txn->tw.dirtylist->pages_including_loose, fd, false);
    if (unlikely(rc != MDBX_SUCCESS)) {
      ERROR("txn-%s: error %d", "iov-init", rc);
      goto fail;
    }

    rc = txn_write(txn, &write_ctx);
    if (unlikely(rc != MDBX_SUCCESS)) {
      ERROR("txn-%s: error %d", "write", rc);
      goto fail;
    }
  } else {
    tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
    env->lck->unsynced_pages.weak += txn->tw.writemap_dirty_npages;
    if (!env->lck->eoos_timestamp.weak)
      env->lck->eoos_timestamp.weak = osal_monotime();
  }

  /* TODO: use ctx.flush_begin & ctx.flush_end for range-sync */
  ts_4 = latency ? osal_monotime() : 0;

  meta_t meta;
  memcpy(meta.magic_and_version, head.ptr_c->magic_and_version, 8);
  meta.reserve16 = head.ptr_c->reserve16;
  meta.validator_id = head.ptr_c->validator_id;
  meta.extra_pagehdr = head.ptr_c->extra_pagehdr;
  unaligned_poke_u64(4, meta.pages_retired,
                     unaligned_peek_u64(4, head.ptr_c->pages_retired) + MDBX_PNL_GETSIZE(txn->tw.retired_pages));
  meta.geometry = txn->geo;
  meta.trees.gc = txn->dbs[FREE_DBI];
  meta.trees.main = txn->dbs[MAIN_DBI];
  meta.canary = txn->canary;
  memcpy(&meta.dxbid, &head.ptr_c->dxbid, sizeof(meta.dxbid));

  txnid_t commit_txnid = txn->txnid;
#if MDBX_ENABLE_BIGFOOT
  if (gcu_ctx.bigfoot > txn->txnid) {
    commit_txnid = gcu_ctx.bigfoot;
    TRACE("use @%" PRIaTXN " (+%zu) for commit bigfoot-txn", commit_txnid, (size_t)(commit_txnid - txn->txnid));
  }
#endif
  meta.unsafe_sign = DATASIGN_NONE;
  meta_set_txnid(env, &meta, commit_txnid);

  rc = dxb_sync_locked(env, env->flags | txn->flags | txn_shrink_allowed, &meta, &txn->tw.troika);

  ts_5 = latency ? osal_monotime() : 0;
  if (unlikely(rc != MDBX_SUCCESS)) {
    env->flags |= ENV_FATAL_ERROR;
    ERROR("txn-%s: error %d", "sync", rc);
    goto fail;
  }

  end_mode = TXN_END_COMMITTED | TXN_END_UPDATE | TXN_END_EOTDONE;

done:
  if (latency)
    txn_take_gcprof(txn, latency);
  rc = txn_end(txn, end_mode);

provide_latency:
  if (latency) {
    latency->preparation = ts_1 ? osal_monotime_to_16dot16(ts_1 - ts_0) : 0;
    latency->gc_wallclock = (ts_2 > ts_1) ? osal_monotime_to_16dot16(ts_2 - ts_1) : 0;
    latency->gc_cputime = gc_cputime ? osal_monotime_to_16dot16(gc_cputime) : 0;
    latency->audit = (ts_3 > ts_2) ? osal_monotime_to_16dot16(ts_3 - ts_2) : 0;
    latency->write = (ts_4 > ts_3) ? osal_monotime_to_16dot16(ts_4 - ts_3) : 0;
    latency->sync = (ts_5 > ts_4) ? osal_monotime_to_16dot16(ts_5 - ts_4) : 0;
    const uint64_t ts_6 = osal_monotime();
    latency->ending = ts_5 ? osal_monotime_to_16dot16(ts_6 - ts_5) : 0;
    latency->whole = osal_monotime_to_16dot16_noUnderflow(ts_6 - ts_0);
  }
  return LOG_IFERR(rc);

fail:
  txn->flags |= MDBX_TXN_ERROR;
  if (latency)
    txn_take_gcprof(txn, latency);
  txn_abort(txn);
  goto provide_latency;
}

int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) {
  int rc = check_txn(txn, MDBX_TXN_FINISHED);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);

  if (unlikely(!info))
    return LOG_IFERR(MDBX_EINVAL);

  MDBX_env *const env = txn->env;
#if MDBX_ENV_CHECKPID
  if (unlikely(env->pid != osal_getpid())) {
    env->flags |= ENV_FATAL_ERROR;
    return LOG_IFERR(MDBX_PANIC);
  }
#endif /* MDBX_ENV_CHECKPID */

  info->txn_id = txn->txnid;
  info->txn_space_used = pgno2bytes(env, txn->geo.first_unallocated);

  if (txn->flags & MDBX_TXN_RDONLY) {
    meta_ptr_t head;
    uint64_t head_retired;
    troika_t troika = meta_tap(env);
    do {
      /* fetch info from volatile head */
      head = meta_recent(env, &troika);
      head_retired = unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired);
      info->txn_space_limit_soft = pgno2bytes(env, head.ptr_v->geometry.now);
      info->txn_space_limit_hard = pgno2bytes(env, head.ptr_v->geometry.upper);
      info->txn_space_leftover = pgno2bytes(env, head.ptr_v->geometry.now - head.ptr_v->geometry.first_unallocated);
    } while (unlikely(meta_should_retry(env, &troika)));

    info->txn_reader_lag = head.txnid - info->txn_id;
    info->txn_space_dirty = info->txn_space_retired = 0;
    uint64_t reader_snapshot_pages_retired = 0;
    if (txn->to.reader &&
        ((txn->flags & MDBX_TXN_PARKED) == 0 || safe64_read(&txn->to.reader->tid) != MDBX_TID_TXN_OUSTED) &&
        head_retired >
            (reader_snapshot_pages_retired = atomic_load64(&txn->to.reader->snapshot_pages_retired, mo_Relaxed))) {
      info->txn_space_dirty = info->txn_space_retired =
          pgno2bytes(env, (pgno_t)(head_retired - reader_snapshot_pages_retired));

      size_t retired_next_reader = 0;
      lck_t *const lck = env->lck_mmap.lck;
      if (scan_rlt && info->txn_reader_lag > 1 && lck) {
        /* find next more recent reader */
        txnid_t next_reader = head.txnid;
        const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
        for (size_t i = 0; i < snap_nreaders; ++i) {
        retry:
          if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease)) {
            jitter4testing(true);
            const uint64_t snap_tid = safe64_read(&lck->rdt[i].tid);
            const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid);
            const uint64_t snap_retired = atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_AcquireRelease);
            if (unlikely(snap_retired != atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_Relaxed)) ||
                snap_txnid != safe64_read(&lck->rdt[i].txnid) || snap_tid != safe64_read(&lck->rdt[i].tid))
              goto retry;
            if (snap_txnid <= txn->txnid) {
              retired_next_reader = 0;
              break;
            }
            if (snap_txnid < next_reader && snap_tid >= MDBX_TID_TXN_OUSTED) {
              next_reader = snap_txnid;
              retired_next_reader = pgno2bytes(
                  env, (pgno_t)(snap_retired - atomic_load64(&txn->to.reader->snapshot_pages_retired, mo_Relaxed)));
            }
          }
        }
      }
      info->txn_space_dirty = retired_next_reader;
    }
  } else {
    info->txn_space_limit_soft = pgno2bytes(env, txn->geo.now);
    info->txn_space_limit_hard = pgno2bytes(env, txn->geo.upper);
    info->txn_space_retired =
        pgno2bytes(env, txn->nested ? (size_t)txn->tw.retired_pages : MDBX_PNL_GETSIZE(txn->tw.retired_pages));
    info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom);
    info->txn_space_dirty =
        pgno2bytes(env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
                                          : (txn->tw.writemap_dirty_npages + txn->tw.writemap_spilled_npages));
    info->txn_reader_lag = INT64_MAX;
    lck_t *const lck = env->lck_mmap.lck;
    if (scan_rlt && lck) {
      txnid_t oldest_snapshot = txn->txnid;
      const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
      if (snap_nreaders) {
        oldest_snapshot = txn_snapshot_oldest(txn);
        if (oldest_snapshot == txn->txnid - 1) {
          /* check if there is at least one reader */
          bool exists = false;
          for (size_t i = 0; i < snap_nreaders; ++i) {
            if (atomic_load32(&lck->rdt[i].pid, mo_Relaxed) && txn->txnid > safe64_read(&lck->rdt[i].txnid)) {
              exists = true;
              break;
            }
          }
          oldest_snapshot += !exists;
        }
      }
      info->txn_reader_lag = txn->txnid - oldest_snapshot;
    }
  }

  return MDBX_SUCCESS;
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

struct audit_ctx {
  size_t used;
  uint8_t *const done_bitmap;
};

static int audit_dbi(void *ctx, const MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags,
                     const struct MDBX_stat *stat, MDBX_dbi dbi) {
  struct audit_ctx *audit_ctx = ctx;
  (void)name;
  (void)txn;
  (void)flags;
  audit_ctx->used += (size_t)stat->ms_branch_pages + (size_t)stat->ms_leaf_pages + (size_t)stat->ms_overflow_pages;
  if (dbi)
    audit_ctx->done_bitmap[dbi / CHAR_BIT] |= 1 << dbi % CHAR_BIT;
  return MDBX_SUCCESS;
}

static size_t audit_db_used(const tree_t *db) {
  return db ? (size_t)db->branch_pages + (size_t)db->leaf_pages + (size_t)db->large_pages : 0;
}

__cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, bool dont_filter_gc) {
  const MDBX_env *const env = txn->env;
  size_t pending = 0;
  if ((txn->flags & MDBX_TXN_RDONLY) == 0)
    pending = txn->tw.loose_count + MDBX_PNL_GETSIZE(txn->tw.repnl) +
              (MDBX_PNL_GETSIZE(txn->tw.retired_pages) - retired_stored);

  cursor_couple_t cx;
  int rc = cursor_init(&cx.outer, txn, FREE_DBI);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

  size_t gc = 0;
  MDBX_val key, data;
  rc = outer_first(&cx.outer, &key, &data);
  while (rc == MDBX_SUCCESS) {
    if (!dont_filter_gc) {
      if (unlikely(key.iov_len != sizeof(txnid_t))) {
        ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-key size", (unsigned)key.iov_len);
        return MDBX_CORRUPTED;
      }
      txnid_t id = unaligned_peek_u64(4, key.iov_base);
      if (txn->tw.gc.retxl ? txl_contain(txn->tw.gc.retxl, id) : (id <= txn->tw.gc.last_reclaimed))
        goto skip;
    }
    gc += *(pgno_t *)data.iov_base;
  skip:
    rc = outer_next(&cx.outer, &key, &data, MDBX_NEXT);
  }
  tASSERT(txn, rc == MDBX_NOTFOUND);

  const size_t done_bitmap_size = (txn->n_dbi + CHAR_BIT - 1) / CHAR_BIT;
  if (txn->parent) {
    tASSERT(txn, txn->n_dbi == txn->parent->n_dbi && txn->n_dbi == txn->env->txn->n_dbi);
#if MDBX_ENABLE_DBI_SPARSE
    tASSERT(txn, txn->dbi_sparse == txn->parent->dbi_sparse && txn->dbi_sparse == txn->env->txn->dbi_sparse);
#endif /* MDBX_ENABLE_DBI_SPARSE */
  }

  struct audit_ctx ctx = {0, alloca(done_bitmap_size)};
  memset(ctx.done_bitmap, 0, done_bitmap_size);
  ctx.used =
      NUM_METAS + audit_db_used(dbi_dig(txn, FREE_DBI, nullptr)) + audit_db_used(dbi_dig(txn, MAIN_DBI, nullptr));

  rc = mdbx_enumerate_tables(txn, audit_dbi, &ctx);
  tASSERT(txn, rc == MDBX_SUCCESS);

  for (size_t dbi = CORE_DBS; dbi < txn->n_dbi; ++dbi) {
    if (ctx.done_bitmap[dbi / CHAR_BIT] & (1 << dbi % CHAR_BIT))
      continue;
    const tree_t *db = dbi_dig(txn, dbi, nullptr);
    if (db)
      ctx.used += audit_db_used(db);
    else if (dbi_state(txn, dbi))
      WARNING("audit %s@%" PRIaTXN ": unable account dbi %zd / \"%.*s\", state 0x%02x", txn->parent ? "nested-" : "",
              txn->txnid, dbi, (int)env->kvs[dbi].name.iov_len, (const char *)env->kvs[dbi].name.iov_base,
              dbi_state(txn, dbi));
  }

  if (pending + gc + ctx.used == txn->geo.first_unallocated)
    return MDBX_SUCCESS;

  if ((txn->flags & MDBX_TXN_RDONLY) == 0)
    ERROR("audit @%" PRIaTXN ": %zu(pending) = %zu(loose) + "
          "%zu(reclaimed) + %zu(retired-pending) - %zu(retired-stored)",
          txn->txnid, pending, txn->tw.loose_count, MDBX_PNL_GETSIZE(txn->tw.repnl),
          txn->tw.retired_pages ? MDBX_PNL_GETSIZE(txn->tw.retired_pages) : 0, retired_stored);
  ERROR("audit @%" PRIaTXN ": %zu(pending) + %zu"
        "(gc) + %zu(count) = %zu(total) <> %zu"
        "(allocated)",
        txn->txnid, pending, gc, ctx.used, pending + gc + ctx.used, (size_t)txn->geo.first_unallocated);
  return MDBX_PROBLEM;
}

__cold int audit_ex(MDBX_txn *txn, size_t retired_stored, bool dont_filter_gc) {
  MDBX_env *const env = txn->env;
  int rc = osal_fastmutex_acquire(&env->dbi_lock);
  if (likely(rc == MDBX_SUCCESS)) {
    rc = audit_ex_locked(txn, retired_stored, dont_filter_gc);
    ENSURE(txn->env, osal_fastmutex_release(&env->dbi_lock) == MDBX_SUCCESS);
  }
  return rc;
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

typedef struct MDBX_chk_internal {
  MDBX_chk_context_t *usr;
  const struct MDBX_chk_callbacks *cb;
  uint64_t monotime_timeout;

  size_t *problem_counter;
  uint8_t flags;
  bool got_break;
  bool write_locked;
  uint8_t scope_depth;

  MDBX_chk_table_t table_gc, table_main;
  int16_t *pagemap;
  MDBX_chk_table_t *last_lookup;
  const void *last_nested;
  MDBX_chk_scope_t scope_stack[12];
  MDBX_chk_table_t *table[MDBX_MAX_DBI + CORE_DBS];

  MDBX_envinfo envinfo;
  troika_t troika;
  MDBX_val v2a_buf;
} MDBX_chk_internal_t;

__cold static int chk_check_break(MDBX_chk_scope_t *const scope) {
  MDBX_chk_internal_t *const chk = scope->internal;
  return (chk->got_break || (chk->cb->check_break && (chk->got_break = chk->cb->check_break(chk->usr))))
             ? MDBX_RESULT_TRUE
             : MDBX_RESULT_FALSE;
}

__cold static void chk_line_end(MDBX_chk_line_t *line) {
  if (likely(line)) {
    MDBX_chk_internal_t *chk = line->ctx->internal;
    assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end);
    if (likely(chk->cb->print_done))
      chk->cb->print_done(line);
  }
}

__cold __must_check_result static MDBX_chk_line_t *chk_line_begin(MDBX_chk_scope_t *const scope,
                                                                  enum MDBX_chk_severity severity) {
  MDBX_chk_internal_t *const chk = scope->internal;
  if (severity < MDBX_chk_warning)
    mdbx_env_chk_encount_problem(chk->usr);
  MDBX_chk_line_t *line = nullptr;
  if (likely(chk->cb->print_begin)) {
    line = chk->cb->print_begin(chk->usr, severity);
    if (likely(line)) {
      assert(line->ctx == nullptr || (line->ctx == chk->usr && line->empty));
      assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end);
      line->ctx = chk->usr;
    }
  }
  return line;
}

__cold static MDBX_chk_line_t *chk_line_feed(MDBX_chk_line_t *line) {
  if (likely(line)) {
    MDBX_chk_internal_t *chk = line->ctx->internal;
    enum MDBX_chk_severity severity = line->severity;
    chk_line_end(line);
    line = chk_line_begin(chk->usr->scope, severity);
  }
  return line;
}

__cold static MDBX_chk_line_t *chk_flush(MDBX_chk_line_t *line) {
  if (likely(line)) {
    MDBX_chk_internal_t *chk = line->ctx->internal;
    assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end);
    if (likely(chk->cb->print_flush)) {
      chk->cb->print_flush(line);
      assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end);
      line->out = line->begin;
    }
  }
  return line;
}

__cold static size_t chk_print_wanna(MDBX_chk_line_t *line, size_t need) {
  if (likely(line && need)) {
    size_t have = line->end - line->out;
    assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end);
    if (need > have) {
      line = chk_flush(line);
      have = line->end - line->out;
    }
    return (need < have) ? need : have;
  }
  return 0;
}

__cold static MDBX_chk_line_t *chk_puts(MDBX_chk_line_t *line, const char *str) {
  if (likely(line && str && *str)) {
    MDBX_chk_internal_t *chk = line->ctx->internal;
    size_t left = strlen(str);
    assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end);
    if (chk->cb->print_chars) {
      chk->cb->print_chars(line, str, left);
      assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end);
    } else
      do {
        size_t chunk = chk_print_wanna(line, left);
        assert(chunk <= left);
        if (unlikely(!chunk))
          break;
        memcpy(line->out, str, chunk);
        line->out += chunk;
        assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end);
        str += chunk;
        left -= chunk;
      } while (left);
    line->empty = false;
  }
  return line;
}

__cold static MDBX_chk_line_t *chk_print_va(MDBX_chk_line_t *line, const char *fmt, va_list args) {
  if (likely(line)) {
    MDBX_chk_internal_t *chk = line->ctx->internal;
    assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end);
    if (chk->cb->print_format) {
      chk->cb->print_format(line, fmt, args);
      assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end);
    } else {
      va_list ones;
      va_copy(ones, args);
      const int needed = vsnprintf(nullptr, 0, fmt, ones);
      va_end(ones);
      if (likely(needed > 0)) {
        const size_t have = chk_print_wanna(line, needed);
        if (likely(have > 0)) {
          int written = vsnprintf(line->out, have, fmt, args);
          if (likely(written > 0))
            line->out += written;
          assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end);
        }
      }
    }
    line->empty = false;
  }
  return line;
}

__cold static MDBX_chk_line_t *MDBX_PRINTF_ARGS(2, 3) chk_print(MDBX_chk_line_t *line, const char *fmt, ...) {
  if (likely(line)) {
    // MDBX_chk_internal_t *chk = line->ctx->internal;
    va_list args;
    va_start(args, fmt);
    line = chk_print_va(line, fmt, args);
    va_end(args);
    line->empty = false;
  }
  return line;
}

__cold static MDBX_chk_line_t *chk_print_size(MDBX_chk_line_t *line, const char *prefix, const uint64_t value,
                                              const char *suffix) {
  static const char sf[] = "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */
  if (likely(line)) {
    MDBX_chk_internal_t *chk = line->ctx->internal;
    prefix = prefix ? prefix : "";
    suffix = suffix ? suffix : "";
    if (chk->cb->print_size)
      chk->cb->print_size(line, prefix, value, suffix);
    else
      for (unsigned i = 0;; ++i) {
        const unsigned scale = 10 + i * 10;
        const uint64_t rounded = value + (UINT64_C(5) << (scale - 10));
        const uint64_t integer = rounded >> scale;
        const uint64_t fractional = (rounded - (integer << scale)) * 100u >> scale;
        if ((rounded >> scale) <= 1000)
          return chk_print(line, "%s%" PRIu64 " (%u.%02u %ciB)%s", prefix, value, (unsigned)integer,
                           (unsigned)fractional, sf[i], suffix);
      }
    line->empty = false;
  }
  return line;
}

__cold static int chk_error_rc(MDBX_chk_scope_t *const scope, int err, const char *subj) {
  MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error);
  if (line)
    chk_line_end(chk_flush(chk_print(line, "%s() failed, error %s (%d)", subj, mdbx_strerror(err), err)));
  else
    debug_log(MDBX_LOG_ERROR, "mdbx_env_chk", 0, "%s() failed, error %s (%d)", subj, mdbx_strerror(err), err);
  return err;
}

__cold static void MDBX_PRINTF_ARGS(5, 6)
    chk_object_issue(MDBX_chk_scope_t *const scope, const char *object, uint64_t entry_number, const char *caption,
                     const char *extra_fmt, ...) {
  MDBX_chk_internal_t *const chk = scope->internal;
  MDBX_chk_issue_t *issue = chk->usr->scope->issues;
  while (issue) {
    if (issue->caption == caption) {
      issue->count += 1;
      break;
    } else
      issue = issue->next;
  }
  const bool fresh = issue == nullptr;
  if (fresh) {
    issue = osal_malloc(sizeof(*issue));
    if (likely(issue)) {
      issue->caption = caption;
      issue->count = 1;
      issue->next = chk->usr->scope->issues;
      chk->usr->scope->issues = issue;
    } else
      chk_error_rc(scope, MDBX_ENOMEM, "adding issue");
  }

  va_list args;
  va_start(args, extra_fmt);
  if (chk->cb->issue) {
    mdbx_env_chk_encount_problem(chk->usr);
    chk->cb->issue(chk->usr, object, entry_number, caption, extra_fmt, args);
  } else {
    MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error);
    if (entry_number != UINT64_MAX)
      chk_print(line, "%s #%" PRIu64 ": %s", object, entry_number, caption);
    else
      chk_print(line, "%s: %s", object, caption);
    if (extra_fmt)
      chk_puts(chk_print_va(chk_puts(line, " ("), extra_fmt, args), ")");
    chk_line_end(fresh ? chk_flush(line) : line);
  }
  va_end(args);
}

__cold static void MDBX_PRINTF_ARGS(2, 3) chk_scope_issue(MDBX_chk_scope_t *const scope, const char *fmt, ...) {
  MDBX_chk_internal_t *const chk = scope->internal;
  va_list args;
  va_start(args, fmt);
  if (likely(chk->cb->issue)) {
    mdbx_env_chk_encount_problem(chk->usr);
    chk->cb->issue(chk->usr, nullptr, 0, nullptr, fmt, args);
  } else
    chk_line_end(chk_print_va(chk_line_begin(scope, MDBX_chk_error), fmt, args));
  va_end(args);
}

__cold static int chk_scope_end(MDBX_chk_internal_t *chk, int err) {
  assert(chk->scope_depth > 0);
  MDBX_chk_scope_t *const inner = chk->scope_stack + chk->scope_depth;
  MDBX_chk_scope_t *const outer = chk->scope_depth ? inner - 1 : nullptr;
  if (!outer || outer->stage != inner->stage) {
    if (err == MDBX_SUCCESS && *chk->problem_counter)
      err = MDBX_PROBLEM;
    else if (*chk->problem_counter == 0 && MDBX_IS_ERROR(err))
      *chk->problem_counter = 1;
    if (chk->problem_counter != &chk->usr->result.total_problems) {
      chk->usr->result.total_problems += *chk->problem_counter;
      chk->problem_counter = &chk->usr->result.total_problems;
    }
    if (chk->cb->stage_end)
      err = chk->cb->stage_end(chk->usr, inner->stage, err);
  }
  if (chk->cb->scope_conclude)
    err = chk->cb->scope_conclude(chk->usr, outer, inner, err);
  chk->usr->scope = outer;
  chk->usr->scope_nesting = chk->scope_depth -= 1;
  if (outer)
    outer->subtotal_issues += inner->subtotal_issues;
  if (chk->cb->scope_pop)
    chk->cb->scope_pop(chk->usr, outer, inner);

  while (inner->issues) {
    MDBX_chk_issue_t *next = inner->issues->next;
    osal_free(inner->issues);
    inner->issues = next;
  }
  memset(inner, -1, sizeof(*inner));
  return err;
}

__cold static int chk_scope_begin_args(MDBX_chk_internal_t *chk, int verbosity_adjustment, enum MDBX_chk_stage stage,
                                       const void *object, size_t *problems, const char *fmt, va_list args) {
  if (unlikely(chk->scope_depth + 1u >= ARRAY_LENGTH(chk->scope_stack)))
    return MDBX_BACKLOG_DEPLETED;

  MDBX_chk_scope_t *const outer = chk->scope_stack + chk->scope_depth;
  const int verbosity = outer->verbosity + (verbosity_adjustment - 1) * (1 << MDBX_chk_severity_prio_shift);
  MDBX_chk_scope_t *const inner = outer + 1;
  memset(inner, 0, sizeof(*inner));
  inner->internal = outer->internal;
  inner->stage = stage ? stage : (stage = outer->stage);
  inner->object = object;
  inner->verbosity = (verbosity < MDBX_chk_warning) ? MDBX_chk_warning : (enum MDBX_chk_severity)verbosity;
  if (problems)
    chk->problem_counter = problems;
  else if (!chk->problem_counter || outer->stage != stage)
    chk->problem_counter = &chk->usr->result.total_problems;

  if (chk->cb->scope_push) {
    const int err = chk->cb->scope_push(chk->usr, outer, inner, fmt, args);
    if (unlikely(err != MDBX_SUCCESS))
      return err;
  }
  chk->usr->scope = inner;
  chk->usr->scope_nesting = chk->scope_depth += 1;

  if (stage != outer->stage && chk->cb->stage_begin) {
    int err = chk->cb->stage_begin(chk->usr, stage);
    if (unlikely(err != MDBX_SUCCESS)) {
      err = chk_scope_end(chk, err);
      assert(err != MDBX_SUCCESS);
      return err ? err : MDBX_RESULT_TRUE;
    }
  }
  return MDBX_SUCCESS;
}

__cold static int MDBX_PRINTF_ARGS(6, 7)
    chk_scope_begin(MDBX_chk_internal_t *chk, int verbosity_adjustment, enum MDBX_chk_stage stage, const void *object,
                    size_t *problems, const char *fmt, ...) {
  va_list args;
  va_start(args, fmt);
  int rc = chk_scope_begin_args(chk, verbosity_adjustment, stage, object, problems, fmt, args);
  va_end(args);
  return rc;
}

__cold static int chk_scope_restore(MDBX_chk_scope_t *const target, int err) {
  MDBX_chk_internal_t *const chk = target->internal;
  assert(target <= chk->usr->scope);
  while (chk->usr->scope > target)
    err = chk_scope_end(chk, err);
  return err;
}

__cold void chk_scope_pop(MDBX_chk_scope_t *const inner) {
  if (inner && inner > inner->internal->scope_stack)
    chk_scope_restore(inner - 1, MDBX_SUCCESS);
}

__cold static MDBX_chk_scope_t *MDBX_PRINTF_ARGS(3, 4)
    chk_scope_push(MDBX_chk_scope_t *const scope, int verbosity_adjustment, const char *fmt, ...) {
  chk_scope_restore(scope, MDBX_SUCCESS);
  va_list args;
  va_start(args, fmt);
  int err = chk_scope_begin_args(scope->internal, verbosity_adjustment, scope->stage, nullptr, nullptr, fmt, args);
  va_end(args);
  return err ? nullptr : scope + 1;
}

__cold static const char *chk_v2a(MDBX_chk_internal_t *chk, const MDBX_val *val) {
  if (val == MDBX_CHK_MAIN)
    return "@MAIN";
  if (val == MDBX_CHK_GC)
    return "@GC";
  if (val == MDBX_CHK_META)
    return "@META";

  const unsigned char *const data = val->iov_base;
  const size_t len = val->iov_len;
  if (data == MDBX_CHK_MAIN)
    return "@MAIN";
  if (data == MDBX_CHK_GC)
    return "@GC";
  if (data == MDBX_CHK_META)
    return "@META";

  if (!len)
    return "<zero-length>";
  if (!data)
    return "<nullptr>";
  if (len > 65536) {
    const size_t enough = 42;
    if (chk->v2a_buf.iov_len < enough) {
      void *ptr = osal_realloc(chk->v2a_buf.iov_base, enough);
      if (unlikely(!ptr))
        return "<out-of-memory>";
      chk->v2a_buf.iov_base = ptr;
      chk->v2a_buf.iov_len = enough;
    }
    snprintf(chk->v2a_buf.iov_base, chk->v2a_buf.iov_len, "<too-long.%" PRIuSIZE ">", len);
    return chk->v2a_buf.iov_base;
  }

  bool printable = true;
  bool quoting = false;
  size_t xchars = 0;
  for (size_t i = 0; i < len && printable; ++i) {
    quoting = quoting || !(data[i] == '_' || isalnum(data[i]));
    printable = isprint(data[i]) || (data[i] < ' ' && ++xchars < 4 && len > xchars * 4);
  }

  size_t need = len + 1;
  if (quoting || !printable)
    need += len + /* quotes */ 2 + 2 * /* max xchars */ 4;
  if (need > chk->v2a_buf.iov_len) {
    void *ptr = osal_realloc(chk->v2a_buf.iov_base, need);
    if (unlikely(!ptr))
      return "<out-of-memory>";
    chk->v2a_buf.iov_base = ptr;
    chk->v2a_buf.iov_len = need;
  }

  static const char hex[] = "0123456789abcdef";
  char *w = chk->v2a_buf.iov_base;
  if (!quoting) {
    memcpy(w, data, len);
    w += len;
  } else if (printable) {
    *w++ = '\'';
    for (size_t i = 0; i < len; ++i) {
      if (data[i] < ' ') {
        assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 4);
        w[0] = '\\';
        w[1] = 'x';
        w[2] = hex[data[i] >> 4];
        w[3] = hex[data[i] & 15];
        w += 4;
      } else if (strchr("\"'`\\", data[i])) {
        assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2);
        w[0] = '\\';
        w[1] = data[i];
        w += 2;
      } else {
        assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 1);
        *w++ = data[i];
      }
    }
    *w++ = '\'';
  } else {
    *w++ = '\\';
    *w++ = 'x';
    for (size_t i = 0; i < len; ++i) {
      assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2);
      w[0] = hex[data[i] >> 4];
      w[1] = hex[data[i] & 15];
      w += 2;
    }
  }
  assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w);
  *w = 0;
  return chk->v2a_buf.iov_base;
}

__cold static void chk_dispose(MDBX_chk_internal_t *chk) {
  assert(chk->table[FREE_DBI] == &chk->table_gc);
  assert(chk->table[MAIN_DBI] == &chk->table_main);
  for (size_t i = 0; i < ARRAY_LENGTH(chk->table); ++i) {
    MDBX_chk_table_t *const tbl = chk->table[i];
    if (tbl) {
      chk->table[i] = nullptr;
      if (chk->cb->table_dispose && tbl->cookie) {
        chk->cb->table_dispose(chk->usr, tbl);
        tbl->cookie = nullptr;
      }
      if (tbl != &chk->table_gc && tbl != &chk->table_main) {
        osal_free(tbl);
      }
    }
  }
  osal_free(chk->v2a_buf.iov_base);
  osal_free(chk->pagemap);
  chk->usr->internal = nullptr;
  chk->usr->scope = nullptr;
  chk->pagemap = nullptr;
  memset(chk, 0xDD, sizeof(*chk));
  osal_free(chk);
}

static size_t div_8s(size_t numerator, size_t divider) {
  assert(numerator <= (SIZE_MAX >> 8));
  return (numerator << 8) / divider;
}

static size_t mul_8s(size_t quotient, size_t multiplier) {
  size_t hi = multiplier * (quotient >> 8);
  size_t lo = multiplier * (quotient & 255) + 128;
  return hi + (lo >> 8);
}

static void histogram_reduce(struct MDBX_chk_histogram *p) {
  const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1;
  // ищем пару для слияния с минимальной ошибкой
  size_t min_err = SIZE_MAX, min_i = last - 1;
  for (size_t i = 0; i < last; ++i) {
    const size_t b1 = p->ranges[i].begin, e1 = p->ranges[i].end, s1 = p->ranges[i].amount;
    const size_t b2 = p->ranges[i + 1].begin, e2 = p->ranges[i + 1].end, s2 = p->ranges[i + 1].amount;
    const size_t l1 = e1 - b1, l2 = e2 - b2, lx = e2 - b1, sx = s1 + s2;
    assert(s1 > 0 && b1 > 0 && b1 < e1);
    assert(s2 > 0 && b2 > 0 && b2 < e2);
    assert(e1 <= b2);
    // за ошибку принимаем площадь изменений на гистограмме при слиянии
    const size_t h1 = div_8s(s1, l1), h2 = div_8s(s2, l2), hx = div_8s(sx, lx);
    const size_t d1 = mul_8s((h1 > hx) ? h1 - hx : hx - h1, l1);
    const size_t d2 = mul_8s((h2 > hx) ? h2 - hx : hx - h2, l2);
    const size_t dx = mul_8s(hx, b2 - e1);
    const size_t err = d1 + d2 + dx;
    if (min_err >= err) {
      min_i = i;
      min_err = err;
    }
  }
  // объединяем
  p->ranges[min_i].end = p->ranges[min_i + 1].end;
  p->ranges[min_i].amount += p->ranges[min_i + 1].amount;
  p->ranges[min_i].count += p->ranges[min_i + 1].count;
  if (min_i < last)
    // перемещаем хвост
    memmove(p->ranges + min_i, p->ranges + min_i + 1, (last - min_i) * sizeof(p->ranges[0]));
  // обнуляем последний элемент и продолжаем
  p->ranges[last].count = 0;
}

static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) {
  STATIC_ASSERT(ARRAY_LENGTH(p->ranges) > 2);
  p->amount += n;
  p->count += 1;
  if (likely(n < 2)) {
    p->ones += n;
    p->pad += 1;
  } else
    for (;;) {
      const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1;
      size_t i = 0;
      while (i < size && p->ranges[i].count && n >= p->ranges[i].begin) {
        if (n < p->ranges[i].end) {
          // значение попадает в существующий интервал
          p->ranges[i].amount += n;
          p->ranges[i].count += 1;
          return;
        }
        ++i;
      }
      if (p->ranges[last].count == 0) {
        // использованы еще не все слоты, добавляем интервал
        assert(i < size);
        if (p->ranges[i].count) {
          // раздвигаем
          assert(i < last);
#ifdef __COVERITY__
          if (i < last) /* avoid Coverity false-positive issue */
#endif                  /* __COVERITY__ */
            memmove(p->ranges + i + 1, p->ranges + i, (last - i) * sizeof(p->ranges[0]));
        }
        p->ranges[i].begin = n;
        p->ranges[i].end = n + 1;
        p->ranges[i].amount = n;
        p->ranges[i].count = 1;
        return;
      }
      histogram_reduce(p);
    }
}

__cold static MDBX_chk_line_t *histogram_dist(MDBX_chk_line_t *line, const struct MDBX_chk_histogram *histogram,
                                              const char *prefix, const char *first, bool amount) {
  line = chk_print(line, "%s:", prefix);
  const char *comma = "";
  const size_t first_val = amount ? histogram->ones : histogram->pad;
  if (first_val) {
    chk_print(line, " %s=%" PRIuSIZE, first, first_val);
    comma = ",";
  }
  for (size_t n = 0; n < ARRAY_LENGTH(histogram->ranges); ++n)
    if (histogram->ranges[n].count) {
      chk_print(line, "%s %" PRIuSIZE, comma, histogram->ranges[n].begin);
      if (histogram->ranges[n].begin != histogram->ranges[n].end - 1)
        chk_print(line, "-%" PRIuSIZE, histogram->ranges[n].end - 1);
      line = chk_print(line, "=%" PRIuSIZE, amount ? histogram->ranges[n].amount : histogram->ranges[n].count);
      comma = ",";
    }
  return line;
}

__cold static MDBX_chk_line_t *histogram_print(MDBX_chk_scope_t *scope, MDBX_chk_line_t *line,
                                               const struct MDBX_chk_histogram *histogram, const char *prefix,
                                               const char *first, bool amount) {
  if (histogram->count) {
    line = chk_print(line, "%s %" PRIuSIZE, prefix, amount ? histogram->amount : histogram->count);
    if (scope->verbosity > MDBX_chk_info)
      line = chk_puts(histogram_dist(line, histogram, " (distribution", first, amount), ")");
  }
  return line;
}

//-----------------------------------------------------------------------------

__cold static int chk_get_tbl(MDBX_chk_scope_t *const scope, const walk_tbl_t *in, MDBX_chk_table_t **out) {
  MDBX_chk_internal_t *const chk = scope->internal;
  if (chk->last_lookup && chk->last_lookup->name.iov_base == in->name.iov_base) {
    *out = chk->last_lookup;
    return MDBX_SUCCESS;
  }

  for (size_t i = 0; i < ARRAY_LENGTH(chk->table); ++i) {
    MDBX_chk_table_t *tbl = chk->table[i];
    if (!tbl) {
      tbl = osal_calloc(1, sizeof(MDBX_chk_table_t));
      if (unlikely(!tbl)) {
        *out = nullptr;
        return chk_error_rc(scope, MDBX_ENOMEM, "alloc_table");
      }
      chk->table[i] = tbl;
      tbl->flags = in->internal->flags;
      tbl->id = -1;
      tbl->name = in->name;
    }
    if (tbl->name.iov_base == in->name.iov_base) {
      if (tbl->id < 0) {
        tbl->id = (int)i;
        tbl->cookie =
            chk->cb->table_filter ? chk->cb->table_filter(chk->usr, &tbl->name, tbl->flags) : (void *)(intptr_t)-1;
      }
      *out = (chk->last_lookup = tbl);
      return MDBX_SUCCESS;
    }
  }
  chk_scope_issue(scope, "too many tables > %u", (unsigned)ARRAY_LENGTH(chk->table) - CORE_DBS - /* meta */ 1);
  *out = nullptr;
  return MDBX_PROBLEM;
}

//------------------------------------------------------------------------------

__cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, const unsigned num) {
  MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_verbose);
  MDBX_chk_internal_t *const chk = scope->internal;
  if (line) {
    MDBX_env *const env = chk->usr->env;
    const bool have_bootid = (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) != 0;
    const bool bootid_match = have_bootid && memcmp(&chk->envinfo.mi_bootid.meta[num], &chk->envinfo.mi_bootid.current,
                                                    sizeof(chk->envinfo.mi_bootid.current)) == 0;

    const char *status = "stay";
    if (num == chk->troika.recent)
      status = "head";
    else if (num == TROIKA_TAIL(&chk->troika))
      status = "tail";
    line = chk_print(line, "meta-%u: %s, ", num, status);

    switch (chk->envinfo.mi_meta_sign[num]) {
    case DATASIGN_NONE:
      line = chk_puts(line, "no-sync/legacy");
      break;
    case DATASIGN_WEAK:
      line = chk_print(line, "weak-%s",
                       have_bootid ? (bootid_match ? "intact (same boot-id)" : "dead") : "unknown (no boot-id)");
      break;
    default:
      line = chk_puts(line, "steady");
      break;
    }
    const txnid_t meta_txnid = chk->envinfo.mi_meta_txnid[num];
    line = chk_print(line, " txn#%" PRIaTXN ", ", meta_txnid);
    if (chk->envinfo.mi_bootid.meta[num].x | chk->envinfo.mi_bootid.meta[num].y)
      line = chk_print(line, "boot-id %" PRIx64 "-%" PRIx64 " (%s)", chk->envinfo.mi_bootid.meta[num].x,
                       chk->envinfo.mi_bootid.meta[num].y, bootid_match ? "live" : "not match");
    else
      line = chk_puts(line, "no boot-id");

    if (env->stuck_meta >= 0) {
      if (num == (unsigned)env->stuck_meta)
        line = chk_print(line, ", %s", "forced for checking");
    } else if (meta_txnid > chk->envinfo.mi_recent_txnid &&
               (env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE)
      line = chk_print(line, ", rolled-back %" PRIu64 " commit(s) (%" PRIu64 " >>> %" PRIu64 ")",
                       meta_txnid - chk->envinfo.mi_recent_txnid, meta_txnid, chk->envinfo.mi_recent_txnid);
    chk_line_end(line);
  }
}

__cold static int chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, const int deep,
                                const walk_tbl_t *tbl_info, const size_t page_size, const page_type_t pagetype,
                                const MDBX_error_t page_err, const size_t nentries, const size_t payload_bytes,
                                const size_t header_bytes, const size_t unused_bytes) {
  MDBX_chk_scope_t *const scope = ctx;
  MDBX_chk_internal_t *const chk = scope->internal;
  MDBX_chk_context_t *const usr = chk->usr;
  MDBX_env *const env = usr->env;

  MDBX_chk_table_t *tbl;
  int err = chk_get_tbl(scope, tbl_info, &tbl);
  if (unlikely(err))
    return err;

  if (deep > 42) {
    chk_scope_issue(scope, "too deeply %u", deep);
    return MDBX_CORRUPTED /* avoid infinite loop/recursion */;
  }
  histogram_acc(deep, &tbl->histogram.deep);
  usr->result.processed_pages += npages;
  const size_t page_bytes = payload_bytes + header_bytes + unused_bytes;

  int height = deep + 1;
  if (tbl->id >= CORE_DBS)
    height -= usr->txn->dbs[MAIN_DBI].height;
  const tree_t *nested = tbl_info->nested;
  if (nested) {
    if (tbl->flags & MDBX_DUPSORT)
      height -= tbl_info->internal->height;
    else {
      chk_object_issue(scope, "nested tree", pgno, "unexpected", "table %s flags 0x%x, deep %i",
                       chk_v2a(chk, &tbl->name), tbl->flags, deep);
      nested = nullptr;
    }
  } else
    chk->last_nested = nullptr;

  const char *pagetype_caption;
  bool branch = false;
  switch (pagetype) {
  default:
    chk_object_issue(scope, "page", pgno, "unknown page-type", "type %u, deep %i", (unsigned)pagetype, deep);
    pagetype_caption = "unknown";
    tbl->pages.other += npages;
    break;
  case page_broken:
    assert(page_err != MDBX_SUCCESS);
    pagetype_caption = "broken";
    tbl->pages.other += npages;
    break;
  case page_sub_broken:
    assert(page_err != MDBX_SUCCESS);
    pagetype_caption = "broken-subpage";
    tbl->pages.other += npages;
    break;
  case page_large:
    pagetype_caption = "large";
    histogram_acc(npages, &tbl->histogram.large_pages);
    if (tbl->flags & MDBX_DUPSORT)
      chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i", (unsigned)pagetype,
                       chk_v2a(chk, &tbl->name), tbl->flags, deep);
    break;
  case page_branch:
    branch = true;
    if (!nested) {
      pagetype_caption = "branch";
      tbl->pages.branch += 1;
    } else {
      pagetype_caption = "nested-branch";
      tbl->pages.nested_branch += 1;
    }
    break;
  case page_dupfix_leaf:
    if (!nested)
      chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i", (unsigned)pagetype,
                       chk_v2a(chk, &tbl->name), tbl->flags, deep);
    /* fall through */
    __fallthrough;
  case page_leaf:
    if (!nested) {
      pagetype_caption = "leaf";
      tbl->pages.leaf += 1;
      if (height != tbl_info->internal->height)
        chk_object_issue(scope, "page", pgno, "wrong tree height", "actual %i != %i table %s", height,
                         tbl_info->internal->height, chk_v2a(chk, &tbl->name));
    } else {
      pagetype_caption = (pagetype == page_leaf) ? "nested-leaf" : "nested-leaf-dupfix";
      tbl->pages.nested_leaf += 1;
      if (chk->last_nested != nested) {
        histogram_acc(height, &tbl->histogram.nested_tree);
        chk->last_nested = nested;
      }
      if (height != nested->height)
        chk_object_issue(scope, "page", pgno, "wrong nested-tree height", "actual %i != %i dupsort-node %s", height,
                         nested->height, chk_v2a(chk, &tbl->name));
    }
    break;
  case page_sub_dupfix_leaf:
  case page_sub_leaf:
    pagetype_caption = (pagetype == page_sub_leaf) ? "subleaf-dupsort" : "subleaf-dupfix";
    tbl->pages.nested_subleaf += 1;
    if ((tbl->flags & MDBX_DUPSORT) == 0 || nested)
      chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i", (unsigned)pagetype,
                       chk_v2a(chk, &tbl->name), tbl->flags, deep);
    break;
  }

  if (npages) {
    if (tbl->cookie) {
      MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra);
      if (npages == 1)
        chk_print(line, "%s-page %" PRIuSIZE, pagetype_caption, pgno);
      else
        chk_print(line, "%s-span %" PRIuSIZE "[%u]", pagetype_caption, pgno, npages);
      chk_line_end(chk_print(
          line, " of %s: header %" PRIiPTR ", %s %" PRIiPTR ", payload %" PRIiPTR ", unused %" PRIiPTR ", deep %i",
          chk_v2a(chk, &tbl->name), header_bytes, (pagetype == page_branch) ? "keys" : "entries", nentries,
          payload_bytes, unused_bytes, deep));
    }

    bool already_used = false;
    for (unsigned n = 0; n < npages; ++n) {
      const size_t spanpgno = pgno + n;
      if (spanpgno >= usr->result.alloc_pages) {
        chk_object_issue(scope, "page", spanpgno, "wrong page-no", "%s-page: %" PRIuSIZE " > %" PRIuSIZE ", deep %i",
                         pagetype_caption, spanpgno, usr->result.alloc_pages, deep);
        tbl->pages.all += 1;
      } else if (chk->pagemap[spanpgno]) {
        const MDBX_chk_table_t *const rival = chk->table[chk->pagemap[spanpgno] - 1];
        chk_object_issue(scope, "page", spanpgno, (branch && rival == tbl) ? "loop" : "already used",
                         "%s-page: by %s, deep %i", pagetype_caption, chk_v2a(chk, &rival->name), deep);
        already_used = true;
      } else {
        chk->pagemap[spanpgno] = (int16_t)tbl->id + 1;
        tbl->pages.all += 1;
      }
    }

    if (already_used)
      return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */
                    : MDBX_SUCCESS;
  }

  if (MDBX_IS_ERROR(page_err)) {
    chk_object_issue(scope, "page", pgno, "invalid/corrupted", "%s-page", pagetype_caption);
  } else {
    if (unused_bytes > page_size)
      chk_object_issue(scope, "page", pgno, "illegal unused-bytes", "%s-page: %u < %" PRIuSIZE " < %u",
                       pagetype_caption, 0, unused_bytes, env->ps);

    if (header_bytes < (int)sizeof(long) || (size_t)header_bytes >= env->ps - sizeof(long)) {
      chk_object_issue(scope, "page", pgno, "illegal header-length",
                       "%s-page: %" PRIuSIZE " < %" PRIuSIZE " < %" PRIuSIZE, pagetype_caption, sizeof(long),
                       header_bytes, env->ps - sizeof(long));
    }
    if (nentries < 1 || (pagetype == page_branch && nentries < 2)) {
      chk_object_issue(scope, "page", pgno, nentries ? "half-empty" : "empty",
                       "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE " entries, deep %i", pagetype_caption,
                       payload_bytes, nentries, deep);
      tbl->pages.empty += 1;
    }

    if (npages) {
      if (page_bytes != page_size) {
        chk_object_issue(scope, "page", pgno, "misused",
                         "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR "h + %" PRIuPTR "p + %" PRIuPTR
                         "u), deep %i",
                         pagetype_caption, page_size, page_bytes, header_bytes, payload_bytes, unused_bytes, deep);
        if (page_size > page_bytes)
          tbl->lost_bytes += page_size - page_bytes;
      } else {
        tbl->payload_bytes += payload_bytes + header_bytes;
        usr->result.total_payload_bytes += payload_bytes + header_bytes;
      }
    }
  }
  return chk_check_break(scope);
}

__cold static int chk_tree(MDBX_chk_scope_t *const scope) {
  MDBX_chk_internal_t *const chk = scope->internal;
  MDBX_chk_context_t *const usr = chk->usr;
  MDBX_env *const env = usr->env;
  MDBX_txn *const txn = usr->txn;

#if defined(_WIN32) || defined(_WIN64)
  SetLastError(ERROR_SUCCESS);
#else
  errno = 0;
#endif /* Windows */
  chk->pagemap = osal_calloc(usr->result.alloc_pages, sizeof(*chk->pagemap));
  if (!chk->pagemap) {
    int err = osal_get_errno();
    return chk_error_rc(scope, err ? err : MDBX_ENOMEM, "calloc");
  }

  if (scope->verbosity > MDBX_chk_info)
    chk_scope_push(scope, 0, "Walking pages...");
  /* always skip key ordering checking
   * to avoid MDBX_CORRUPTED in case custom comparators were used */
  usr->result.processed_pages = NUM_METAS;
  int err = walk_pages(txn, chk_pgvisitor, scope, dont_check_keys_ordering);
  if (MDBX_IS_ERROR(err) && err != MDBX_EINTR)
    chk_error_rc(scope, err, "walk_pages");

  for (size_t n = NUM_METAS; n < usr->result.alloc_pages; ++n)
    if (!chk->pagemap[n])
      usr->result.unused_pages += 1;

  MDBX_chk_table_t total;
  memset(&total, 0, sizeof(total));
  total.pages.all = NUM_METAS;
  for (size_t i = 0; i < ARRAY_LENGTH(chk->table) && chk->table[i]; ++i) {
    MDBX_chk_table_t *const tbl = chk->table[i];
    total.payload_bytes += tbl->payload_bytes;
    total.lost_bytes += tbl->lost_bytes;
    total.pages.all += tbl->pages.all;
    total.pages.empty += tbl->pages.empty;
    total.pages.other += tbl->pages.other;
    total.pages.branch += tbl->pages.branch;
    total.pages.leaf += tbl->pages.leaf;
    total.pages.nested_branch += tbl->pages.nested_branch;
    total.pages.nested_leaf += tbl->pages.nested_leaf;
    total.pages.nested_subleaf += tbl->pages.nested_subleaf;
  }
  assert(total.pages.all == usr->result.processed_pages);

  const size_t total_page_bytes = pgno2bytes(env, total.pages.all);
  if (usr->scope->subtotal_issues || usr->scope->verbosity >= MDBX_chk_verbose)
    chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution),
                           "walked %zu pages, left/unused %zu"
                           ", %" PRIuSIZE " problem(s)",
                           usr->result.processed_pages, usr->result.unused_pages, usr->scope->subtotal_issues));

  err = chk_scope_restore(scope, err);
  if (scope->verbosity > MDBX_chk_info) {
    for (size_t i = 0; i < ARRAY_LENGTH(chk->table) && chk->table[i]; ++i) {
      MDBX_chk_table_t *const tbl = chk->table[i];
      MDBX_chk_scope_t *inner = chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &tbl->name));
      if (tbl->pages.all == 0)
        chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_resolution), "empty"));
      else {
        MDBX_chk_line_t *line = chk_line_begin(inner, MDBX_chk_info);
        if (line) {
          line = chk_print(line, "page usage: subtotal %" PRIuSIZE, tbl->pages.all);
          const size_t branch_pages = tbl->pages.branch + tbl->pages.nested_branch;
          const size_t leaf_pages = tbl->pages.leaf + tbl->pages.nested_leaf + tbl->pages.nested_subleaf;
          if (tbl->pages.other)
            line = chk_print(line, ", other %" PRIuSIZE, tbl->pages.other);
          if (tbl->pages.other == 0 || (branch_pages | leaf_pages | tbl->histogram.large_pages.count) != 0) {
            line = chk_print(line, ", branch %" PRIuSIZE ", leaf %" PRIuSIZE, branch_pages, leaf_pages);
            if (tbl->histogram.large_pages.count || (tbl->flags & MDBX_DUPSORT) == 0) {
              line = chk_print(line, ", large %" PRIuSIZE, tbl->histogram.large_pages.count);
              if (tbl->histogram.large_pages.amount | tbl->histogram.large_pages.count)
                line = histogram_print(inner, line, &tbl->histogram.large_pages, " amount", "single", true);
            }
          }
          line = histogram_dist(chk_line_feed(line), &tbl->histogram.deep, "tree deep density", "1", false);
          if (tbl != &chk->table_gc && tbl->histogram.nested_tree.count) {
            line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE, tbl->histogram.nested_tree.count);
            line = histogram_dist(line, &tbl->histogram.nested_tree, " density", "1", false);
            line = chk_print(chk_line_feed(line),
                             "nested tree(s) pages %" PRIuSIZE ": branch %" PRIuSIZE ", leaf %" PRIuSIZE
                             ", subleaf %" PRIuSIZE,
                             tbl->pages.nested_branch + tbl->pages.nested_leaf, tbl->pages.nested_branch,
                             tbl->pages.nested_leaf, tbl->pages.nested_subleaf);
          }

          const size_t bytes = pgno2bytes(env, tbl->pages.all);
          line =
              chk_print(chk_line_feed(line),
                        "page filling: subtotal %" PRIuSIZE " bytes (%.1f%%), payload %" PRIuSIZE
                        " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)",
                        bytes, bytes * 100.0 / total_page_bytes, tbl->payload_bytes, tbl->payload_bytes * 100.0 / bytes,
                        bytes - tbl->payload_bytes, (bytes - tbl->payload_bytes) * 100.0 / bytes);
          if (tbl->pages.empty)
            line = chk_print(line, ", %" PRIuSIZE " empty pages", tbl->pages.empty);
          if (tbl->lost_bytes)
            line = chk_print(line, ", %" PRIuSIZE " bytes lost", tbl->lost_bytes);
          chk_line_end(line);
        }
      }
      chk_scope_restore(scope, 0);
    }
  }

  MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution);
  line = chk_print(line,
                   "summary: total %" PRIuSIZE " bytes, payload %" PRIuSIZE " (%.1f%%), unused %" PRIuSIZE " (%.1f%%),"
                   " average fill %.1f%%",
                   total_page_bytes, usr->result.total_payload_bytes,
                   usr->result.total_payload_bytes * 100.0 / total_page_bytes,
                   total_page_bytes - usr->result.total_payload_bytes,
                   (total_page_bytes - usr->result.total_payload_bytes) * 100.0 / total_page_bytes,
                   usr->result.total_payload_bytes * 100.0 / total_page_bytes);
  if (total.pages.empty)
    line = chk_print(line, ", %" PRIuSIZE " empty pages", total.pages.empty);
  if (total.lost_bytes)
    line = chk_print(line, ", %" PRIuSIZE " bytes lost", total.lost_bytes);
  chk_line_end(line);
  return err;
}

typedef int(chk_kv_visitor)(MDBX_chk_scope_t *const scope, MDBX_chk_table_t *tbl, const size_t record_number,
                            const MDBX_val *key, const MDBX_val *data);

__cold static int chk_handle_kv(MDBX_chk_scope_t *const scope, MDBX_chk_table_t *tbl, const size_t record_number,
                                const MDBX_val *key, const MDBX_val *data) {
  MDBX_chk_internal_t *const chk = scope->internal;
  int err = MDBX_SUCCESS;
  assert(tbl->cookie);
  if (chk->cb->table_handle_kv)
    err = chk->cb->table_handle_kv(chk->usr, tbl, record_number, key, data);
  return err ? err : chk_check_break(scope);
}

__cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, MDBX_chk_table_t *tbl, chk_kv_visitor *handler) {
  MDBX_chk_internal_t *const chk = scope->internal;
  MDBX_chk_context_t *const usr = chk->usr;
  MDBX_env *const env = usr->env;
  MDBX_txn *const txn = usr->txn;
  MDBX_cursor *cursor = nullptr;
  size_t record_count = 0, dups = 0, sub_databases = 0;
  int err;

  if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & txn->flags) {
    chk_line_end(chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_error),
                                     "abort processing %s due to a previous error", chk_v2a(chk, &tbl->name))));
    err = MDBX_BAD_TXN;
    goto bailout;
  }

  if (0 > (int)dbi) {
    err = dbi_open(txn, &tbl->name, MDBX_DB_ACCEDE, &dbi,
                   (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr,
                   (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr);
    if (unlikely(err)) {
      tASSERT(txn, dbi >= txn->env->n_dbi || (txn->env->dbs_flags[dbi] & DB_VALID) == 0);
      chk_error_rc(scope, err, "mdbx_dbi_open");
      goto bailout;
    }
    tASSERT(txn, dbi < txn->env->n_dbi && (txn->env->dbs_flags[dbi] & DB_VALID) != 0);
  }

  const tree_t *const db = txn->dbs + dbi;
  if (handler) {
    const char *key_mode = nullptr;
    switch (tbl->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) {
    case 0:
      key_mode = "usual";
      break;
    case MDBX_REVERSEKEY:
      key_mode = "reserve";
      break;
    case MDBX_INTEGERKEY:
      key_mode = "ordinal";
      break;
    case MDBX_REVERSEKEY | MDBX_INTEGERKEY:
      key_mode = "msgpack";
      break;
    default:
      key_mode = "inconsistent";
      chk_scope_issue(scope, "wrong key-mode (0x%x)", tbl->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY));
    }

    const char *value_mode = nullptr;
    switch (tbl->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP)) {
    case 0:
      value_mode = "single";
      break;
    case MDBX_DUPSORT:
      value_mode = "multi";
      break;
    case MDBX_DUPSORT | MDBX_REVERSEDUP:
      value_mode = "multi-reverse";
      break;
    case MDBX_DUPSORT | MDBX_DUPFIXED:
      value_mode = "multi-samelength";
      break;
    case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP:
      value_mode = "multi-reverse-samelength";
      break;
    case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP:
      value_mode = "multi-ordinal";
      break;
    case MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_REVERSEDUP:
      value_mode = "multi-msgpack";
      break;
    case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP:
      value_mode = "reserved";
      break;
    default:
      value_mode = "inconsistent";
      chk_scope_issue(scope, "wrong value-mode (0x%x)",
                      tbl->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP));
    }

    MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info);
    line = chk_print(line, "key-value kind: %s-key => %s-value", key_mode, value_mode);
    line = chk_print(line, ", flags:");
    if (!tbl->flags)
      line = chk_print(line, " none");
    else {
      const uint8_t f[] = {
          MDBX_DUPSORT, MDBX_INTEGERKEY, MDBX_REVERSEKEY, MDBX_DUPFIXED, MDBX_REVERSEDUP, MDBX_INTEGERDUP, 0};
      const char *const t[] = {"dupsort", "integerkey", "reversekey", "dupfix", "reversedup", "integerdup"};
      for (size_t i = 0; f[i]; i++)
        if (tbl->flags & f[i])
          line = chk_print(line, " %s", t[i]);
    }
    chk_line_end(chk_print(line, " (0x%02X)", tbl->flags));

    line = chk_print(chk_line_begin(scope, MDBX_chk_verbose), "entries %" PRIu64 ", sequence %" PRIu64, db->items,
                     db->sequence);
    if (db->mod_txnid)
      line = chk_print(line, ", last modification txn#%" PRIaTXN, db->mod_txnid);
    if (db->root != P_INVALID)
      line = chk_print(line, ", root #%" PRIaPGNO, db->root);
    chk_line_end(line);
    chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_verbose),
                           "b-tree depth %u, pages: branch %" PRIaPGNO ", leaf %" PRIaPGNO ", large %" PRIaPGNO,
                           db->height, db->branch_pages, db->leaf_pages, db->large_pages));

    if ((chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) {
      const size_t branch_pages = tbl->pages.branch + tbl->pages.nested_branch;
      const size_t leaf_pages = tbl->pages.leaf + tbl->pages.nested_leaf;
      const size_t subtotal_pages = db->branch_pages + db->leaf_pages + db->large_pages;
      if (subtotal_pages != tbl->pages.all)
        chk_scope_issue(scope, "%s pages mismatch (%" PRIuSIZE " != walked %" PRIuSIZE ")", "subtotal", subtotal_pages,
                        tbl->pages.all);
      if (db->branch_pages != branch_pages)
        chk_scope_issue(scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", "branch", db->branch_pages,
                        branch_pages);
      if (db->leaf_pages != leaf_pages)
        chk_scope_issue(scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", "all-leaf", db->leaf_pages,
                        leaf_pages);
      if (db->large_pages != tbl->histogram.large_pages.amount)
        chk_scope_issue(scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", "large/overlow",
                        db->large_pages, tbl->histogram.large_pages.amount);
    }
  }

  err = mdbx_cursor_open(txn, dbi, &cursor);
  if (unlikely(err)) {
    chk_error_rc(scope, err, "mdbx_cursor_open");
    goto bailout;
  }
  if (chk->flags & MDBX_CHK_IGNORE_ORDER) {
    cursor->checking |= z_ignord | z_pagecheck;
    if (cursor->subcur)
      cursor->subcur->cursor.checking |= z_ignord | z_pagecheck;
  }

  const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, tbl->flags);
  MDBX_val prev_key = {nullptr, 0}, prev_data = {nullptr, 0};
  MDBX_val key, data;
  err = mdbx_cursor_get(cursor, &key, &data, MDBX_FIRST);
  while (err == MDBX_SUCCESS) {
    err = chk_check_break(scope);
    if (unlikely(err))
      goto bailout;

    bool bad_key = false;
    if (key.iov_len > maxkeysize) {
      chk_object_issue(scope, "entry", record_count, "key length exceeds max-key-size", "%" PRIuPTR " > %" PRIuPTR,
                       key.iov_len, maxkeysize);
      bad_key = true;
    } else if ((tbl->flags & MDBX_INTEGERKEY) && key.iov_len != 8 && key.iov_len != 4) {
      chk_object_issue(scope, "entry", record_count, "wrong key length", "%" PRIuPTR " != 4or8", key.iov_len);
      bad_key = true;
    }

    bool bad_data = false;
    if ((tbl->flags & MDBX_INTEGERDUP) && data.iov_len != 8 && data.iov_len != 4) {
      chk_object_issue(scope, "entry", record_count, "wrong data length", "%" PRIuPTR " != 4or8", data.iov_len);
      bad_data = true;
    }

    if (prev_key.iov_base) {
      if (prev_data.iov_base && !bad_data && (tbl->flags & MDBX_DUPFIXED) && prev_data.iov_len != data.iov_len) {
        chk_object_issue(scope, "entry", record_count, "different data length", "%" PRIuPTR " != %" PRIuPTR,
                         prev_data.iov_len, data.iov_len);
        bad_data = true;
      }

      if (!bad_key) {
        int cmp = mdbx_cmp(txn, dbi, &key, &prev_key);
        if (cmp == 0) {
          ++dups;
          if ((tbl->flags & MDBX_DUPSORT) == 0) {
            chk_object_issue(scope, "entry", record_count, "duplicated entries", nullptr);
            if (prev_data.iov_base && data.iov_len == prev_data.iov_len &&
                memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0)
              chk_object_issue(scope, "entry", record_count, "complete duplicate", nullptr);
          } else if (!bad_data && prev_data.iov_base) {
            cmp = mdbx_dcmp(txn, dbi, &data, &prev_data);
            if (cmp == 0)
              chk_object_issue(scope, "entry", record_count, "complete duplicate", nullptr);
            else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER))
              chk_object_issue(scope, "entry", record_count, "wrong order of multi-values", nullptr);
          }
        } else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER))
          chk_object_issue(scope, "entry", record_count, "wrong order of entries", nullptr);
      }
    }

    if (!bad_key) {
      if (!prev_key.iov_base && (tbl->flags & MDBX_INTEGERKEY))
        chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "fixed key-size %" PRIuSIZE, key.iov_len));
      prev_key = key;
    }
    if (!bad_data) {
      if (!prev_data.iov_base && (tbl->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)))
        chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "fixed data-size %" PRIuSIZE, data.iov_len));
      prev_data = data;
    }

    record_count++;
    histogram_acc(key.iov_len, &tbl->histogram.key_len);
    histogram_acc(data.iov_len, &tbl->histogram.val_len);

    const node_t *const node = page_node(cursor->pg[cursor->top], cursor->ki[cursor->top]);
    if (node_flags(node) == N_TREE) {
      if (dbi != MAIN_DBI || (tbl->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)))
        chk_object_issue(scope, "entry", record_count, "unexpected table", "node-flags 0x%x", node_flags(node));
      else if (data.iov_len != sizeof(tree_t))
        chk_object_issue(scope, "entry", record_count, "wrong table node size", "node-size %" PRIuSIZE " != %" PRIuSIZE,
                         data.iov_len, sizeof(tree_t));
      else if (scope->stage == MDBX_chk_maindb)
        /* подсчитываем table при первом проходе */
        sub_databases += 1;
      else {
        /* обработка table при втором проходе */
        tree_t aligned_db;
        memcpy(&aligned_db, data.iov_base, sizeof(aligned_db));
        walk_tbl_t tbl_info = {.name = key};
        tbl_info.internal = &aligned_db;
        MDBX_chk_table_t *table;
        err = chk_get_tbl(scope, &tbl_info, &table);
        if (unlikely(err))
          goto bailout;
        if (table->cookie) {
          err = chk_scope_begin(chk, 0, MDBX_chk_tables, table, &usr->result.problems_kv, "Processing table %s...",
                                chk_v2a(chk, &table->name));
          if (likely(!err)) {
            err = chk_db(usr->scope, (MDBX_dbi)-1, table, chk_handle_kv);
            if (err != MDBX_EINTR && err != MDBX_RESULT_TRUE)
              usr->result.table_processed += 1;
          }
          err = chk_scope_restore(scope, err);
          if (unlikely(err))
            goto bailout;
        } else
          chk_line_end(chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skip processing %s...",
                                           chk_v2a(chk, &table->name))));
      }
    } else if (handler) {
      err = handler(scope, tbl, record_count, &key, &data);
      if (unlikely(err))
        goto bailout;
    }

    err = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT);
  }

  err = (err != MDBX_NOTFOUND) ? chk_error_rc(scope, err, "mdbx_cursor_get") : MDBX_SUCCESS;
  if (err == MDBX_SUCCESS && record_count != db->items)
    chk_scope_issue(scope, "different number of entries %" PRIuSIZE " != %" PRIu64, record_count, db->items);
bailout:
  if (cursor) {
    if (handler) {
      if (tbl->histogram.key_len.count) {
        MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info);
        line = histogram_dist(line, &tbl->histogram.key_len, "key length density", "0/1", false);
        chk_line_feed(line);
        line = histogram_dist(line, &tbl->histogram.val_len, "value length density", "0/1", false);
        chk_line_end(line);
      }
      if (scope->stage == MDBX_chk_maindb)
        usr->result.table_total = sub_databases;
      if (chk->cb->table_conclude)
        err = chk->cb->table_conclude(usr, tbl, cursor, err);
      MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution);
      line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count);
      if (dups || (tbl->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)))
        line = chk_print(line, " %" PRIuSIZE " dups,", dups);
      if (sub_databases || dbi == MAIN_DBI)
        line = chk_print(line, " %" PRIuSIZE " tables,", sub_databases);
      line = chk_print(line,
                       " %" PRIuSIZE " key's bytes,"
                       " %" PRIuSIZE " data's bytes,"
                       " %" PRIuSIZE " problem(s)",
                       tbl->histogram.key_len.amount, tbl->histogram.val_len.amount, scope->subtotal_issues);
      chk_line_end(chk_flush(line));
    }

    mdbx_cursor_close(cursor);
    if (!txn->cursors[dbi] && (txn->dbi_state[dbi] & DBI_FRESH))
      mdbx_dbi_close(env, dbi);
  }
  return err;
}

__cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, MDBX_chk_table_t *tbl, const size_t record_number,
                                const MDBX_val *key, const MDBX_val *data) {
  MDBX_chk_internal_t *const chk = scope->internal;
  MDBX_chk_context_t *const usr = chk->usr;
  assert(tbl == &chk->table_gc);
  (void)tbl;
  const char *bad = "";
  pgno_t *iptr = data->iov_base;

  if (key->iov_len != sizeof(txnid_t))
    chk_object_issue(scope, "entry", record_number, "wrong txn-id size", "key-size %" PRIuSIZE, key->iov_len);
  else {
    txnid_t txnid;
    memcpy(&txnid, key->iov_base, sizeof(txnid));
    if (txnid < 1 || txnid > usr->txn->txnid)
      chk_object_issue(scope, "entry", record_number, "wrong txn-id", "%" PRIaTXN, txnid);
    else {
      if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t))
        chk_object_issue(scope, "entry", txnid, "wrong idl size", "%" PRIuPTR, data->iov_len);
      size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0;
      if (number > PAGELIST_LIMIT)
        chk_object_issue(scope, "entry", txnid, "wrong idl length", "%" PRIuPTR, number);
      else if ((number + 1) * sizeof(pgno_t) > data->iov_len) {
        chk_object_issue(scope, "entry", txnid, "trimmed idl", "%" PRIuSIZE " > %" PRIuSIZE " (corruption)",
                         (number + 1) * sizeof(pgno_t), data->iov_len);
        number = data->iov_len / sizeof(pgno_t) - 1;
      } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >=
                 /* LY: allow gap up to one page. it is ok
                  * and better than shink-and-retry inside gc_update() */
                 usr->env->ps)
        chk_object_issue(scope, "entry", txnid, "extra idl space",
                         "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", (number + 1) * sizeof(pgno_t),
                         data->iov_len);

      usr->result.gc_pages += number;
      if (chk->envinfo.mi_latter_reader_txnid > txnid)
        usr->result.reclaimable_pages += number;

      size_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : usr->txn->geo.first_unallocated;
      size_t span = 1;
      for (size_t i = 0; i < number; ++i) {
        const size_t pgno = iptr[i];
        if (pgno < NUM_METAS)
          chk_object_issue(scope, "entry", txnid, "wrong idl entry", "pgno %" PRIuSIZE " < meta-pages %u", pgno,
                           NUM_METAS);
        else if (pgno >= usr->result.backed_pages)
          chk_object_issue(scope, "entry", txnid, "wrong idl entry", "pgno %" PRIuSIZE " > backed-pages %" PRIuSIZE,
                           pgno, usr->result.backed_pages);
        else if (pgno >= usr->result.alloc_pages)
          chk_object_issue(scope, "entry", txnid, "wrong idl entry", "pgno %" PRIuSIZE " > alloc-pages %" PRIuSIZE,
                           pgno, usr->result.alloc_pages - 1);
        else {
          if (MDBX_PNL_DISORDERED(prev, pgno)) {
            bad = " [bad sequence]";
            chk_object_issue(scope, "entry", txnid, "bad sequence", "%" PRIuSIZE " %c [%" PRIuSIZE "].%" PRIuSIZE, prev,
                             (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), i, pgno);
          }
          if (chk->pagemap) {
            const intptr_t id = chk->pagemap[pgno];
            if (id == 0)
              chk->pagemap[pgno] = -1 /* mark the pgno listed in GC */;
            else if (id > 0) {
              assert(id - 1 <= (intptr_t)ARRAY_LENGTH(chk->table));
              chk_object_issue(scope, "page", pgno, "already used", "by %s", chk_v2a(chk, &chk->table[id - 1]->name));
            } else
              chk_object_issue(scope, "page", pgno, "already listed in GC", nullptr);
          }
        }
        prev = pgno;
        while (i + span < number &&
               iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) : pgno_sub(pgno, span)))
          ++span;
      }
      if (tbl->cookie) {
        chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_details),
                               "transaction %" PRIaTXN ", %" PRIuSIZE " pages, maxspan %" PRIuSIZE "%s", txnid, number,
                               span, bad));
        for (size_t i = 0; i < number; i += span) {
          const size_t pgno = iptr[i];
          for (span = 1; i + span < number &&
                         iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) : pgno_sub(pgno, span));
               ++span)
            ;
          histogram_acc(span, &tbl->histogram.nested_tree);
          MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra);
          if (line) {
            if (span > 1)
              line = chk_print(line, "%9" PRIuSIZE "[%" PRIuSIZE "]", pgno, span);
            else
              line = chk_print(line, "%9" PRIuSIZE, pgno);
            chk_line_end(line);
            int err = chk_check_break(scope);
            if (err)
              return err;
          }
        }
      }
    }
  }
  return chk_check_break(scope);
}

__cold static int env_chk(MDBX_chk_scope_t *const scope) {
  MDBX_chk_internal_t *const chk = scope->internal;
  MDBX_chk_context_t *const usr = chk->usr;
  MDBX_env *const env = usr->env;
  MDBX_txn *const txn = usr->txn;
  int err = env_info(env, txn, &chk->envinfo, sizeof(chk->envinfo), &chk->troika);
  if (unlikely(err))
    return chk_error_rc(scope, err, "env_info");

  MDBX_chk_line_t *line =
      chk_puts(chk_line_begin(scope, MDBX_chk_info - (1 << MDBX_chk_severity_prio_shift)), "dxb-id ");
  if (chk->envinfo.mi_dxbid.x | chk->envinfo.mi_dxbid.y)
    line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, chk->envinfo.mi_dxbid.x, chk->envinfo.mi_dxbid.y);
  else
    line = chk_puts(line, "is absent");
  chk_line_end(line);

  line = chk_puts(chk_line_begin(scope, MDBX_chk_info), "current boot-id ");
  if (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y)
    line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, chk->envinfo.mi_bootid.current.x,
                     chk->envinfo.mi_bootid.current.y);
  else
    line = chk_puts(line, "is unavailable");
  chk_line_end(line);

  err = osal_filesize(env->lazy_fd, &env->dxb_mmap.filesize);
  if (unlikely(err))
    return chk_error_rc(scope, err, "osal_filesize");

  //--------------------------------------------------------------------------

  err = chk_scope_begin(chk, 1, MDBX_chk_meta, nullptr, &usr->result.problems_meta, "Peek the meta-pages...");
  if (likely(!err)) {
    MDBX_chk_scope_t *const inner = usr->scope;
    const uint64_t dxbfile_pages = env->dxb_mmap.filesize >> env->ps2ln;
    usr->result.alloc_pages = txn->geo.first_unallocated;
    usr->result.backed_pages = bytes2pgno(env, env->dxb_mmap.current);
    if (unlikely(usr->result.backed_pages > dxbfile_pages))
      chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, usr->result.backed_pages, dxbfile_pages);
    if (unlikely(dxbfile_pages < NUM_METAS))
      chk_scope_issue(inner, "file-pages %" PRIu64 " < %u", dxbfile_pages, NUM_METAS);
    if (unlikely(usr->result.backed_pages < NUM_METAS))
      chk_scope_issue(inner, "backed-pages %zu < %u", usr->result.backed_pages, NUM_METAS);
    if (unlikely(usr->result.backed_pages < NUM_METAS)) {
      chk_scope_issue(inner, "backed-pages %zu < num-metas %u", usr->result.backed_pages, NUM_METAS);
      return MDBX_CORRUPTED;
    }
    if (unlikely(dxbfile_pages < NUM_METAS)) {
      chk_scope_issue(inner, "backed-pages %zu < num-metas %u", usr->result.backed_pages, NUM_METAS);
      return MDBX_CORRUPTED;
    }
    if (unlikely(usr->result.backed_pages > (size_t)MAX_PAGENO + 1)) {
      chk_scope_issue(inner, "backed-pages %zu > max-pages %zu", usr->result.backed_pages, (size_t)MAX_PAGENO + 1);
      usr->result.backed_pages = MAX_PAGENO + 1;
    }

    if ((env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) {
      if (unlikely(usr->result.backed_pages > dxbfile_pages)) {
        chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, usr->result.backed_pages, dxbfile_pages);
        usr->result.backed_pages = (size_t)dxbfile_pages;
      }
      if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) {
        chk_scope_issue(scope, "alloc-pages %zu > backed-pages %zu", usr->result.alloc_pages, usr->result.backed_pages);
        usr->result.alloc_pages = usr->result.backed_pages;
      }
    } else {
      /* DB may be shrunk by writer down to the allocated (but unused) pages. */
      if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) {
        chk_scope_issue(inner, "alloc-pages %zu > backed-pages %zu", usr->result.alloc_pages, usr->result.backed_pages);
        usr->result.alloc_pages = usr->result.backed_pages;
      }
      if (unlikely(usr->result.alloc_pages > dxbfile_pages)) {
        chk_scope_issue(inner, "alloc-pages %zu > file-pages %" PRIu64, usr->result.alloc_pages, dxbfile_pages);
        usr->result.alloc_pages = (size_t)dxbfile_pages;
      }
      if (unlikely(usr->result.backed_pages > dxbfile_pages))
        usr->result.backed_pages = (size_t)dxbfile_pages;
    }

    line = chk_line_feed(chk_print(chk_line_begin(inner, MDBX_chk_info),
                                   "pagesize %u (%u system), max keysize %u..%u"
                                   ", max readers %u",
                                   env->ps, globals.sys_pagesize, mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT),
                                   mdbx_env_get_maxkeysize_ex(env, MDBX_DB_DEFAULTS), env->max_readers));
    line = chk_line_feed(chk_print_size(line, "mapsize ", env->dxb_mmap.current, nullptr));
    if (txn->geo.lower == txn->geo.upper)
      line = chk_print_size(line, "fixed datafile: ", chk->envinfo.mi_geo.current, nullptr);
    else {
      line = chk_print_size(line, "dynamic datafile: ", chk->envinfo.mi_geo.lower, nullptr);
      line = chk_print_size(line, " .. ", chk->envinfo.mi_geo.upper, ", ");
      line = chk_print_size(line, "+", chk->envinfo.mi_geo.grow, ", ");

      line = chk_line_feed(chk_print_size(line, "-", chk->envinfo.mi_geo.shrink, nullptr));
      line = chk_print_size(line, "current datafile: ", chk->envinfo.mi_geo.current, nullptr);
    }
    tASSERT(txn, txn->geo.now == chk->envinfo.mi_geo.current / chk->envinfo.mi_dxb_pagesize);
    chk_line_end(chk_print(line, ", %u pages", txn->geo.now));
#if defined(_WIN32) || defined(_WIN64) || MDBX_DEBUG
    if (txn->geo.shrink_pv && txn->geo.now != txn->geo.upper && scope->verbosity >= MDBX_chk_verbose) {
      line = chk_line_begin(inner, MDBX_chk_notice);
      chk_line_feed(chk_print(line, " > WARNING: Due Windows system limitations a file couldn't"));
      chk_line_feed(chk_print(line, " > be truncated while the database is opened. So, the size"));
      chk_line_feed(chk_print(line, " > database file of may by large than the database itself,"));
      chk_line_end(chk_print(line, " > until it will be closed or reopened in read-write mode."));
    }
#endif /* Windows || Debug */
    chk_verbose_meta(inner, 0);
    chk_verbose_meta(inner, 1);
    chk_verbose_meta(inner, 2);

    if (env->stuck_meta >= 0) {
      chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_processing),
                             "skip checking meta-pages since the %u"
                             " is selected for verification",
                             env->stuck_meta));
      line = chk_line_feed(chk_print(chk_line_begin(inner, MDBX_chk_resolution),
                                     "transactions: recent %" PRIu64 ", "
                                     "selected for verification %" PRIu64 ", lag %" PRIi64,
                                     chk->envinfo.mi_recent_txnid, chk->envinfo.mi_meta_txnid[env->stuck_meta],
                                     chk->envinfo.mi_recent_txnid - chk->envinfo.mi_meta_txnid[env->stuck_meta]));
      chk_line_end(line);
    } else {
      chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose), "performs check for meta-pages clashes"));
      const unsigned meta_clash_mask = meta_eq_mask(&chk->troika);
      if (meta_clash_mask & 1)
        chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 0, 1);
      if (meta_clash_mask & 2)
        chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 1, 2);
      if (meta_clash_mask & 4)
        chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 2, 0);

      const unsigned prefer_steady_metanum = chk->troika.prefer_steady;
      const uint64_t prefer_steady_txnid = chk->troika.txnid[prefer_steady_metanum];
      const unsigned recent_metanum = chk->troika.recent;
      const uint64_t recent_txnid = chk->troika.txnid[recent_metanum];
      if (env->flags & MDBX_EXCLUSIVE) {
        chk_line_end(
            chk_puts(chk_line_begin(inner, MDBX_chk_verbose), "performs full check recent-txn-id with meta-pages"));
        eASSERT(env, recent_txnid == chk->envinfo.mi_recent_txnid);
        if (prefer_steady_txnid != recent_txnid) {
          if ((chk->flags & MDBX_CHK_READWRITE) != 0 && (env->flags & MDBX_RDONLY) == 0 &&
              recent_txnid > prefer_steady_txnid &&
              (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) != 0 &&
              chk->envinfo.mi_bootid.current.x == chk->envinfo.mi_bootid.meta[recent_metanum].x &&
              chk->envinfo.mi_bootid.current.y == chk->envinfo.mi_bootid.meta[recent_metanum].y) {
            chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_verbose),
                                   "recent meta-%u is weak, but boot-id match current"
                                   " (will synced upon successful check)",
                                   recent_metanum));
          } else
            chk_scope_issue(inner, "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 " != %" PRIi64 ")",
                            prefer_steady_metanum, prefer_steady_txnid, recent_txnid);
        }
      } else if (chk->write_locked) {
        chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose),
                              "performs lite check recent-txn-id with meta-pages (not a "
                              "monopolistic mode)"));
        if (recent_txnid != chk->envinfo.mi_recent_txnid) {
          chk_scope_issue(inner, "weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64 " != %" PRIi64 ")",
                          recent_metanum, recent_txnid, chk->envinfo.mi_recent_txnid);
        }
      } else {
        chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose),
                              "skip check recent-txn-id with meta-pages (monopolistic or "
                              "read-write mode only)"));
      }

      chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_resolution),
                             "transactions: recent %" PRIu64 ", latter reader %" PRIu64 ", lag %" PRIi64,
                             chk->envinfo.mi_recent_txnid, chk->envinfo.mi_latter_reader_txnid,
                             chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid));
    }
  }
  err = chk_scope_restore(scope, err);

  //--------------------------------------------------------------------------

  const char *const subj_tree = "B-Trees";
  if (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL)
    chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skipping %s traversal...", subj_tree));
  else {
    err = chk_scope_begin(chk, -1, MDBX_chk_tree, nullptr, &usr->result.tree_problems,
                          "Traversal %s by txn#%" PRIaTXN "...", subj_tree, txn->txnid);
    if (likely(!err))
      err = chk_tree(usr->scope);
    if (usr->result.tree_problems && usr->result.gc_tree_problems == 0)
      usr->result.gc_tree_problems = usr->result.tree_problems;
    if (usr->result.tree_problems && usr->result.kv_tree_problems == 0)
      usr->result.kv_tree_problems = usr->result.tree_problems;
    chk_scope_restore(scope, err);
  }

  const char *const subj_gc = chk_v2a(chk, MDBX_CHK_GC);
  if (usr->result.gc_tree_problems > 0)
    chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing),
                           "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", subj_gc, subj_tree,
                           usr->result.problems_gc = usr->result.gc_tree_problems));
  else {
    err = chk_scope_begin(chk, -1, MDBX_chk_gc, &chk->table_gc, &usr->result.problems_gc,
                          "Processing %s by txn#%" PRIaTXN "...", subj_gc, txn->txnid);
    if (likely(!err))
      err = chk_db(usr->scope, FREE_DBI, &chk->table_gc, chk_handle_gc);
    line = chk_line_begin(scope, MDBX_chk_info);
    if (line) {
      histogram_print(scope, line, &chk->table_gc.histogram.nested_tree, "span(s)", "single", false);
      chk_line_end(line);
    }
    if (usr->result.problems_gc == 0 && (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) {
      const size_t used_pages = usr->result.alloc_pages - usr->result.gc_pages;
      if (usr->result.processed_pages != used_pages)
        chk_scope_issue(usr->scope, "used pages mismatch (%" PRIuSIZE "(walked) != %" PRIuSIZE "(allocated - GC))",
                        usr->result.processed_pages, used_pages);
      if (usr->result.unused_pages != usr->result.gc_pages)
        chk_scope_issue(usr->scope, "GC pages mismatch (%" PRIuSIZE "(expected) != %" PRIuSIZE "(GC))",
                        usr->result.unused_pages, usr->result.gc_pages);
    }
  }
  chk_scope_restore(scope, err);

  //--------------------------------------------------------------------------

  err = chk_scope_begin(chk, 1, MDBX_chk_space, nullptr, nullptr, "Page allocation:");
  const double percent_boundary_reciprocal = 100.0 / txn->geo.upper;
  const double percent_backed_reciprocal = 100.0 / usr->result.backed_pages;
  const size_t detained = usr->result.gc_pages - usr->result.reclaimable_pages;
  const size_t available2boundary = txn->geo.upper - usr->result.alloc_pages + usr->result.reclaimable_pages;
  const size_t available2backed = usr->result.backed_pages - usr->result.alloc_pages + usr->result.reclaimable_pages;
  const size_t remained2boundary = txn->geo.upper - usr->result.alloc_pages;
  const size_t remained2backed = usr->result.backed_pages - usr->result.alloc_pages;

  const size_t used = (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) ? usr->result.alloc_pages - usr->result.gc_pages
                                                                   : usr->result.processed_pages;

  line = chk_line_begin(usr->scope, MDBX_chk_info);
  line = chk_print(line,
                   "backed by file: %" PRIuSIZE " pages (%.1f%%)"
                   ", %" PRIuSIZE " left to boundary (%.1f%%)",
                   usr->result.backed_pages, usr->result.backed_pages * percent_boundary_reciprocal,
                   txn->geo.upper - usr->result.backed_pages,
                   (txn->geo.upper - usr->result.backed_pages) * percent_boundary_reciprocal);
  line = chk_line_feed(line);

  line = chk_print(line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", "used", used,
                   used * percent_backed_reciprocal, used * percent_boundary_reciprocal);
  line = chk_line_feed(line);

  line = chk_print(line, "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE " to boundary (%.1f%% of boundary)",
                   "remained", remained2backed, remained2backed * percent_backed_reciprocal, remained2boundary,
                   remained2boundary * percent_boundary_reciprocal);
  line = chk_line_feed(line);

  line =
      chk_print(line,
                "reclaimable: %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)"
                ", GC %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)",
                usr->result.reclaimable_pages, usr->result.reclaimable_pages * percent_backed_reciprocal,
                usr->result.reclaimable_pages * percent_boundary_reciprocal, usr->result.gc_pages,
                usr->result.gc_pages * percent_backed_reciprocal, usr->result.gc_pages * percent_boundary_reciprocal);
  line = chk_line_feed(line);

  line = chk_print(line,
                   "detained by reader(s): %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)"
                   ", %u reader(s), lag %" PRIi64,
                   detained, detained * percent_backed_reciprocal, detained * percent_boundary_reciprocal,
                   chk->envinfo.mi_numreaders, chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid);
  line = chk_line_feed(line);

  line = chk_print(line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", "allocated",
                   usr->result.alloc_pages, usr->result.alloc_pages * percent_backed_reciprocal,
                   usr->result.alloc_pages * percent_boundary_reciprocal);
  line = chk_line_feed(line);

  line = chk_print(line, "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE " to boundary (%.1f%% of boundary)",
                   "available", available2backed, available2backed * percent_backed_reciprocal, available2boundary,
                   available2boundary * percent_boundary_reciprocal);
  chk_line_end(line);

  line = chk_line_begin(usr->scope, MDBX_chk_resolution);
  line = chk_print(line, "%s %" PRIaPGNO " pages", (txn->geo.upper == txn->geo.now) ? "total" : "upto", txn->geo.upper);
  line = chk_print(line, ", backed %" PRIuSIZE " (%.1f%%)", usr->result.backed_pages,
                   usr->result.backed_pages * percent_boundary_reciprocal);
  line = chk_print(line, ", allocated %" PRIuSIZE " (%.1f%%)", usr->result.alloc_pages,
                   usr->result.alloc_pages * percent_boundary_reciprocal);
  line = chk_print(line, ", available %" PRIuSIZE " (%.1f%%)", available2boundary,
                   available2boundary * percent_boundary_reciprocal);
  chk_line_end(line);
  chk_scope_restore(scope, err);

  //--------------------------------------------------------------------------

  const char *const subj_main = chk_v2a(chk, MDBX_CHK_MAIN);
  if (chk->flags & MDBX_CHK_SKIP_KV_TRAVERSAL)
    chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skip processing %s...", subj_main));
  else if ((usr->result.problems_kv = usr->result.kv_tree_problems) > 0)
    chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing),
                           "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", subj_main, subj_tree,
                           usr->result.problems_kv = usr->result.kv_tree_problems));
  else {
    err = chk_scope_begin(chk, 0, MDBX_chk_maindb, &chk->table_main, &usr->result.problems_kv, "Processing %s...",
                          subj_main);
    if (likely(!err))
      err = chk_db(usr->scope, MAIN_DBI, &chk->table_main, chk_handle_kv);
    chk_scope_restore(scope, err);

    const char *const subj_tables = "table(s)";
    if (usr->result.problems_kv && usr->result.table_total)
      chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skip processing %s", subj_tables));
    else if (usr->result.problems_kv == 0 && usr->result.table_total == 0)
      chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "No %s", subj_tables));
    else if (usr->result.problems_kv == 0 && usr->result.table_total) {
      err = chk_scope_begin(chk, 1, MDBX_chk_tables, nullptr, &usr->result.problems_kv,
                            "Processing %s by txn#%" PRIaTXN "...", subj_tables, txn->txnid);
      if (!err)
        err = chk_db(usr->scope, MAIN_DBI, &chk->table_main, nullptr);
      if (usr->scope->subtotal_issues)
        chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution),
                               "processed %" PRIuSIZE " of %" PRIuSIZE " %s, %" PRIuSIZE " problems(s)",
                               usr->result.table_processed, usr->result.table_total, subj_tables,
                               usr->scope->subtotal_issues));
    }
    chk_scope_restore(scope, err);
  }

  return chk_scope_end(chk, chk_scope_begin(chk, 0, MDBX_chk_conclude, nullptr, nullptr, nullptr));
}

__cold int mdbx_env_chk_encount_problem(MDBX_chk_context_t *ctx) {
  if (likely(ctx && ctx->internal && ctx->internal->usr == ctx && ctx->internal->problem_counter && ctx->scope)) {
    *ctx->internal->problem_counter += 1;
    ctx->scope->subtotal_issues += 1;
    return MDBX_SUCCESS;
  }
  return MDBX_EINVAL;
}

__cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, MDBX_chk_context_t *ctx,
                        const MDBX_chk_flags_t flags, MDBX_chk_severity_t verbosity, unsigned timeout_seconds_16dot16) {
  int err, rc = check_env(env, false);
  if (unlikely(rc != MDBX_SUCCESS))
    return LOG_IFERR(rc);
  if (unlikely(!cb || !ctx || ctx->internal))
    return LOG_IFERR(MDBX_EINVAL);

  MDBX_chk_internal_t *const chk = osal_calloc(1, sizeof(MDBX_chk_internal_t));
  if (unlikely(!chk))
    return LOG_IFERR(MDBX_ENOMEM);

  chk->cb = cb;
  chk->usr = ctx;
  chk->usr->internal = chk;
  chk->usr->env = env;
  chk->flags = flags;

  chk->table_gc.id = -1;
  chk->table_gc.name.iov_base = MDBX_CHK_GC;
  chk->table[FREE_DBI] = &chk->table_gc;

  chk->table_main.id = -1;
  chk->table_main.name.iov_base = MDBX_CHK_MAIN;
  chk->table[MAIN_DBI] = &chk->table_main;

  chk->monotime_timeout =
      timeout_seconds_16dot16 ? osal_16dot16_to_monotime(timeout_seconds_16dot16) + osal_monotime() : 0;
  chk->usr->scope_nesting = 0;
  chk->usr->result.tables = (const void *)&chk->table;

  MDBX_chk_scope_t *const top = chk->scope_stack;
  top->verbosity = verbosity;
  top->internal = chk;

  // init
  rc = chk_scope_end(chk, chk_scope_begin(chk, 0, MDBX_chk_init, nullptr, nullptr, nullptr));

  // lock
  if (likely(!rc))
    rc = chk_scope_begin(chk, 0, MDBX_chk_lock, nullptr, nullptr, "Taking %slock...",
                         (env->flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) ? "" : "read ");
  if (likely(!rc) && (env->flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0 && (flags & MDBX_CHK_READWRITE)) {
    rc = mdbx_txn_lock(env, false);
    if (unlikely(rc))
      chk_error_rc(ctx->scope, rc, "mdbx_txn_lock");
    else
      chk->write_locked = true;
  }
  if (likely(!rc)) {
    rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &ctx->txn);
    if (unlikely(rc))
      chk_error_rc(ctx->scope, rc, "mdbx_txn_begin");
  }
  chk_scope_end(chk, rc);

  // doit
  if (likely(!rc)) {
    chk->table_gc.flags = ctx->txn->dbs[FREE_DBI].flags;
    chk->table_main.flags = ctx->txn->dbs[MAIN_DBI].flags;
    rc = env_chk(top);
  }

  // unlock
  if (ctx->txn || chk->write_locked) {
    chk_scope_begin(chk, 0, MDBX_chk_unlock, nullptr, nullptr, nullptr);
    if (ctx->txn) {
      err = mdbx_txn_abort(ctx->txn);
      if (err && !rc)
        rc = err;
      ctx->txn = nullptr;
    }
    if (chk->write_locked)
      mdbx_txn_unlock(env);
    rc = chk_scope_end(chk, rc);
  }

  // finalize
  err = chk_scope_begin(chk, 0, MDBX_chk_finalize, nullptr, nullptr, nullptr);
  rc = chk_scope_end(chk, err ? err : rc);
  chk_dispose(chk);
  return LOG_IFERR(rc);
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

/*------------------------------------------------------------------------------
 * Pack/Unpack 16-bit values for Grow step & Shrink threshold */

MDBX_NOTHROW_CONST_FUNCTION static inline pgno_t me2v(size_t m, size_t e) {
  assert(m < 2048 && e < 8);
  return (pgno_t)(32768 + ((m + 1) << (e + 8)));
}

MDBX_NOTHROW_CONST_FUNCTION static inline uint16_t v2me(size_t v, size_t e) {
  assert(v > (e ? me2v(2047, e - 1) : 32768));
  assert(v <= me2v(2047, e));
  size_t m = (v - 32768 + ((size_t)1 << (e + 8)) - 1) >> (e + 8);
  m -= m > 0;
  assert(m < 2048 && e < 8);
  // f e d c b a 9 8 7 6 5 4 3 2 1 0
  // 1 e e e m m m m m m m m m m m 1
  const uint16_t pv = (uint16_t)(0x8001 + (e << 12) + (m << 1));
  assert(pv != 65535);
  return pv;
}

/* Convert 16-bit packed (exponential quantized) value to number of pages */
pgno_t pv2pages(uint16_t pv) {
  if ((pv & 0x8001) != 0x8001)
    return pv;
  if (pv == 65535)
    return 65536;
  // f e d c b a 9 8 7 6 5 4 3 2 1 0
  // 1 e e e m m m m m m m m m m m 1
  return me2v((pv >> 1) & 2047, (pv >> 12) & 7);
}

/* Convert number of pages to 16-bit packed (exponential quantized) value */
uint16_t pages2pv(size_t pages) {
  if (pages < 32769 || (pages < 65536 && (pages & 1) == 0))
    return (uint16_t)pages;
  if (pages <= me2v(2047, 0))
    return v2me(pages, 0);
  if (pages <= me2v(2047, 1))
    return v2me(pages, 1);
  if (pages <= me2v(2047, 2))
    return v2me(pages, 2);
  if (pages <= me2v(2047, 3))
    return v2me(pages, 3);
  if (pages <= me2v(2047, 4))
    return v2me(pages, 4);
  if (pages <= me2v(2047, 5))
    return v2me(pages, 5);
  if (pages <= me2v(2047, 6))
    return v2me(pages, 6);
  return (pages < me2v(2046, 7)) ? v2me(pages, 7) : 65533;
}

__cold bool pv2pages_verify(void) {
  bool ok = true, dump_translation = false;
  for (size_t i = 0; i < 65536; ++i) {
    size_t pages = pv2pages(i);
    size_t x = pages2pv(pages);
    size_t xp = pv2pages(x);
    if (pages != xp) {
      ERROR("%zu => %zu => %zu => %zu\n", i, pages, x, xp);
      ok = false;
    } else if (dump_translation && !(x == i || (x % 2 == 0 && x < 65536))) {
      DEBUG("%zu => %zu => %zu => %zu\n", i, pages, x, xp);
    }
  }
  return ok;
}

/*----------------------------------------------------------------------------*/

MDBX_NOTHROW_PURE_FUNCTION size_t bytes_align2os_bytes(const MDBX_env *env, size_t bytes) {
  return ceil_powerof2(bytes, (env->ps > globals.sys_pagesize) ? env->ps : globals.sys_pagesize);
}

MDBX_NOTHROW_PURE_FUNCTION size_t pgno_align2os_bytes(const MDBX_env *env, size_t pgno) {
  return ceil_powerof2(pgno2bytes(env, pgno), globals.sys_pagesize);
}

MDBX_NOTHROW_PURE_FUNCTION pgno_t pgno_align2os_pgno(const MDBX_env *env, size_t pgno) {
  return bytes2pgno(env, pgno_align2os_bytes(env, pgno));
}

/*----------------------------------------------------------------------------*/

MDBX_NOTHROW_PURE_FUNCTION static __always_inline int cmp_int_inline(const size_t expected_alignment, const MDBX_val *a,
                                                                     const MDBX_val *b) {
  if (likely(a->iov_len == b->iov_len)) {
    if (sizeof(size_t) > 7 && likely(a->iov_len == 8))
      return CMP2INT(unaligned_peek_u64(expected_alignment, a->iov_base),
                     unaligned_peek_u64(expected_alignment, b->iov_base));
    if (likely(a->iov_len == 4))
      return CMP2INT(unaligned_peek_u32(expected_alignment, a->iov_base),
                     unaligned_peek_u32(expected_alignment, b->iov_base));
    if (sizeof(size_t) < 8 && likely(a->iov_len == 8))
      return CMP2INT(unaligned_peek_u64(expected_alignment, a->iov_base),
                     unaligned_peek_u64(expected_alignment, b->iov_base));
  }
  ERROR("mismatch and/or invalid size %p.%zu/%p.%zu for INTEGERKEY/INTEGERDUP", a->iov_base, a->iov_len, b->iov_base,
        b->iov_len);
  return 0;
}

MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_int_unaligned(const MDBX_val *a, const MDBX_val *b) {
  return cmp_int_inline(1, a, b);
}

#ifndef cmp_int_align2
/* Compare two items pointing at 2-byte aligned unsigned int's. */
MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_int_align2(const MDBX_val *a, const MDBX_val *b) {
  return cmp_int_inline(2, a, b);
}
#endif /* cmp_int_align2 */

#ifndef cmp_int_align4
/* Compare two items pointing at 4-byte aligned unsigned int's. */
MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_int_align4(const MDBX_val *a, const MDBX_val *b) {
  return cmp_int_inline(4, a, b);
}
#endif /* cmp_int_align4 */

/* Compare two items lexically */
MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_lexical(const MDBX_val *a, const MDBX_val *b) {
  if (a->iov_len == b->iov_len)
    return a->iov_len ? memcmp(a->iov_base, b->iov_base, a->iov_len) : 0;

  const int diff_len = (a->iov_len < b->iov_len) ? -1 : 1;
  const size_t shortest = (a->iov_len < b->iov_len) ? a->iov_len : b->iov_len;
  int diff_data = shortest ? memcmp(a->iov_base, b->iov_base, shortest) : 0;
  return likely(diff_data) ? diff_data : diff_len;
}

MDBX_NOTHROW_PURE_FUNCTION static __always_inline unsigned tail3le(const uint8_t *p, size_t l) {
  STATIC_ASSERT(sizeof(unsigned) > 2);
  // 1: 0 0 0
  // 2: 0 1 1
  // 3: 0 1 2
  return p[0] | p[l >> 1] << 8 | p[l - 1] << 16;
}

/* Compare two items in reverse byte order */
MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_reverse(const MDBX_val *a, const MDBX_val *b) {
  size_t left = (a->iov_len < b->iov_len) ? a->iov_len : b->iov_len;
  if (likely(left)) {
    const uint8_t *pa = ptr_disp(a->iov_base, a->iov_len);
    const uint8_t *pb = ptr_disp(b->iov_base, b->iov_len);
    while (left >= sizeof(size_t)) {
      pa -= sizeof(size_t);
      pb -= sizeof(size_t);
      left -= sizeof(size_t);
      STATIC_ASSERT(sizeof(size_t) == 4 || sizeof(size_t) == 8);
      if (sizeof(size_t) == 4) {
        uint32_t xa = unaligned_peek_u32(1, pa);
        uint32_t xb = unaligned_peek_u32(1, pb);
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
        xa = osal_bswap32(xa);
        xb = osal_bswap32(xb);
#endif /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */
        if (xa != xb)
          return (xa < xb) ? -1 : 1;
      } else {
        uint64_t xa = unaligned_peek_u64(1, pa);
        uint64_t xb = unaligned_peek_u64(1, pb);
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
        xa = osal_bswap64(xa);
        xb = osal_bswap64(xb);
#endif /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */
        if (xa != xb)
          return (xa < xb) ? -1 : 1;
      }
    }
    if (sizeof(size_t) == 8 && left >= 4) {
      pa -= 4;
      pb -= 4;
      left -= 4;
      uint32_t xa = unaligned_peek_u32(1, pa);
      uint32_t xb = unaligned_peek_u32(1, pb);
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
      xa = osal_bswap32(xa);
      xb = osal_bswap32(xb);
#endif /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */
      if (xa != xb)
        return (xa < xb) ? -1 : 1;
    }
    if (left) {
      unsigned xa = tail3le(pa - left, left);
      unsigned xb = tail3le(pb - left, left);
      if (xa != xb)
        return (xa < xb) ? -1 : 1;
    }
  }
  return CMP2INT(a->iov_len, b->iov_len);
}

/* Fast non-lexically comparator */
MDBX_NOTHROW_PURE_FUNCTION __hot int cmp_lenfast(const MDBX_val *a, const MDBX_val *b) {
  int diff = CMP2INT(a->iov_len, b->iov_len);
  return (likely(diff) || a->iov_len == 0) ? diff : memcmp(a->iov_base, b->iov_base, a->iov_len);
}

MDBX_NOTHROW_PURE_FUNCTION __hot bool eq_fast_slowpath(const uint8_t *a, const uint8_t *b, size_t l) {
  if (likely(l > 3)) {
    if (MDBX_UNALIGNED_OK >= 4 && likely(l < 9))
      return ((unaligned_peek_u32(1, a) - unaligned_peek_u32(1, b)) |
              (unaligned_peek_u32(1, a + l - 4) - unaligned_peek_u32(1, b + l - 4))) == 0;
    if (MDBX_UNALIGNED_OK >= 8 && sizeof(size_t) > 7 && likely(l < 17))
      return ((unaligned_peek_u64(1, a) - unaligned_peek_u64(1, b)) |
              (unaligned_peek_u64(1, a + l - 8) - unaligned_peek_u64(1, b + l - 8))) == 0;
    return memcmp(a, b, l) == 0;
  }
  if (likely(l))
    return tail3le(a, l) == tail3le(b, l);
  return true;
}

int cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b) { return eq_fast(a, b) ? 0 : 1; }

int cmp_equal_or_wrong(const MDBX_val *a, const MDBX_val *b) { return eq_fast(a, b) ? 0 : -1; }

/*----------------------------------------------------------------------------*/

__cold void update_mlcnt(const MDBX_env *env, const pgno_t new_aligned_mlocked_pgno, const bool lock_not_release) {
  for (;;) {
    const pgno_t mlock_pgno_before = atomic_load32(&env->mlocked_pgno, mo_AcquireRelease);
    eASSERT(env, pgno_align2os_pgno(env, mlock_pgno_before) == mlock_pgno_before);
    eASSERT(env, pgno_align2os_pgno(env, new_aligned_mlocked_pgno) == new_aligned_mlocked_pgno);
    if (lock_not_release ? (mlock_pgno_before >= new_aligned_mlocked_pgno)
                         : (mlock_pgno_before <= new_aligned_mlocked_pgno))
      break;
    if (likely(atomic_cas32(&((MDBX_env *)env)->mlocked_pgno, mlock_pgno_before, new_aligned_mlocked_pgno)))
      for (;;) {
        mdbx_atomic_uint32_t *const mlcnt = env->lck->mlcnt;
        const int32_t snap_locked = atomic_load32(mlcnt + 0, mo_Relaxed);
        const int32_t snap_unlocked = atomic_load32(mlcnt + 1, mo_Relaxed);
        if (mlock_pgno_before == 0 && (snap_locked - snap_unlocked) < INT_MAX) {
          eASSERT(env, lock_not_release);
          if (unlikely(!atomic_cas32(mlcnt + 0, snap_locked, snap_locked + 1)))
            continue;
        }
        if (new_aligned_mlocked_pgno == 0 && (snap_locked - snap_unlocked) > 0) {
          eASSERT(env, !lock_not_release);
          if (unlikely(!atomic_cas32(mlcnt + 1, snap_unlocked, snap_unlocked + 1)))
            continue;
        }
        NOTICE("%s-pages %u..%u, mlocked-process(es) %u -> %u", lock_not_release ? "lock" : "unlock",
               lock_not_release ? mlock_pgno_before : new_aligned_mlocked_pgno,
               lock_not_release ? new_aligned_mlocked_pgno : mlock_pgno_before, snap_locked - snap_unlocked,
               atomic_load32(mlcnt + 0, mo_Relaxed) - atomic_load32(mlcnt + 1, mo_Relaxed));
        return;
      }
  }
}

__cold void munlock_after(const MDBX_env *env, const pgno_t aligned_pgno, const size_t end_bytes) {
  if (atomic_load32(&env->mlocked_pgno, mo_AcquireRelease) > aligned_pgno) {
    int err = MDBX_ENOSYS;
    const size_t munlock_begin = pgno2bytes(env, aligned_pgno);
    const size_t munlock_size = end_bytes - munlock_begin;
    eASSERT(env, end_bytes % globals.sys_pagesize == 0 && munlock_begin % globals.sys_pagesize == 0 &&
                     munlock_size % globals.sys_pagesize == 0);
#if defined(_WIN32) || defined(_WIN64)
    err = VirtualUnlock(ptr_disp(env->dxb_mmap.base, munlock_begin), munlock_size) ? MDBX_SUCCESS : (int)GetLastError();
    if (err == ERROR_NOT_LOCKED)
      err = MDBX_SUCCESS;
#elif defined(_POSIX_MEMLOCK_RANGE)
    err = munlock(ptr_disp(env->dxb_mmap.base, munlock_begin), munlock_size) ? errno : MDBX_SUCCESS;
#endif
    if (likely(err == MDBX_SUCCESS))
      update_mlcnt(env, aligned_pgno, false);
    else {
#if defined(_WIN32) || defined(_WIN64)
      WARNING("VirtualUnlock(%zu, %zu) error %d", munlock_begin, munlock_size, err);
#else
      WARNING("munlock(%zu, %zu) error %d", munlock_begin, munlock_size, err);
#endif
    }
  }
}

__cold void munlock_all(const MDBX_env *env) {
  munlock_after(env, 0, bytes_align2os_bytes(env, env->dxb_mmap.current));
}

/*----------------------------------------------------------------------------*/

uint32_t combine_durability_flags(const uint32_t a, const uint32_t b) {
  uint32_t r = a | b;

  /* avoid false MDBX_UTTERLY_NOSYNC */
  if (F_ISSET(r, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && !F_ISSET(b, MDBX_UTTERLY_NOSYNC))
    r = (r - MDBX_UTTERLY_NOSYNC) | MDBX_SAFE_NOSYNC;

  /* convert DEPRECATED_MAPASYNC to MDBX_SAFE_NOSYNC */
  if ((r & (MDBX_WRITEMAP | DEPRECATED_MAPASYNC)) == (MDBX_WRITEMAP | DEPRECATED_MAPASYNC) &&
      !F_ISSET(r, MDBX_UTTERLY_NOSYNC))
    r = (r - DEPRECATED_MAPASYNC) | MDBX_SAFE_NOSYNC;

  /* force MDBX_NOMETASYNC if NOSYNC enabled */
  if (r & (MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC))
    r |= MDBX_NOMETASYNC;

  assert(!(F_ISSET(r, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && !F_ISSET(b, MDBX_UTTERLY_NOSYNC)));
  return r;
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

/* check against https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
static bool coherency_check(const MDBX_env *env, const txnid_t txnid, const volatile tree_t *trees,
                            const volatile meta_t *meta, bool report) {
  const txnid_t freedb_mod_txnid = trees[FREE_DBI].mod_txnid;
  const txnid_t maindb_mod_txnid = trees[MAIN_DBI].mod_txnid;
  const pgno_t last_pgno = meta->geometry.now;

  const pgno_t freedb_root_pgno = trees[FREE_DBI].root;
  const page_t *freedb_root =
      (env->dxb_mmap.base && freedb_root_pgno < last_pgno) ? pgno2page(env, freedb_root_pgno) : nullptr;

  const pgno_t maindb_root_pgno = trees[MAIN_DBI].root;
  const page_t *maindb_root =
      (env->dxb_mmap.base && maindb_root_pgno < last_pgno) ? pgno2page(env, maindb_root_pgno) : nullptr;
  const uint64_t magic_and_version = unaligned_peek_u64_volatile(4, &meta->magic_and_version);

  bool ok = true;
  if (freedb_root_pgno != P_INVALID && unlikely(freedb_root_pgno >= last_pgno)) {
    if (report)
      WARNING("catch invalid %s-db root %" PRIaPGNO " for meta_txnid %" PRIaTXN " %s", "free", freedb_root_pgno, txnid,
              (env->stuck_meta < 0) ? "(workaround for incoherent flaw of unified page/buffer cache)"
                                    : "(wagering meta)");
    ok = false;
  }
  if (maindb_root_pgno != P_INVALID && unlikely(maindb_root_pgno >= last_pgno)) {
    if (report)
      WARNING("catch invalid %s-db root %" PRIaPGNO " for meta_txnid %" PRIaTXN " %s", "main", maindb_root_pgno, txnid,
              (env->stuck_meta < 0) ? "(workaround for incoherent flaw of unified page/buffer cache)"
                                    : "(wagering meta)");
    ok = false;
  }
  if (unlikely(txnid < freedb_mod_txnid ||
               (!freedb_mod_txnid && freedb_root && likely(magic_and_version == MDBX_DATA_MAGIC)))) {
    if (report)
      WARNING(
          "catch invalid %s-db.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN " %s", "free", freedb_mod_txnid, txnid,
          (env->stuck_meta < 0) ? "(workaround for incoherent flaw of unified page/buffer cache)" : "(wagering meta)");
    ok = false;
  }
  if (unlikely(txnid < maindb_mod_txnid ||
               (!maindb_mod_txnid && maindb_root && likely(magic_and_version == MDBX_DATA_MAGIC)))) {
    if (report)
      WARNING(
          "catch invalid %s-db.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN " %s", "main", maindb_mod_txnid, txnid,
          (env->stuck_meta < 0) ? "(workaround for incoherent flaw of unified page/buffer cache)" : "(wagering meta)");
    ok = false;
  }

  /* Проверяем отметки внутри корневых страниц только если сами страницы
   * в пределах текущего отображения. Иначе возможны SIGSEGV до переноса
   * вызова coherency_check_head() после dxb_resize() внутри txn_renew(). */
  if (likely(freedb_root && freedb_mod_txnid &&
             (size_t)ptr_dist(env->dxb_mmap.base, freedb_root) < env->dxb_mmap.limit)) {
    VALGRIND_MAKE_MEM_DEFINED(freedb_root, sizeof(freedb_root->txnid));
    MDBX_ASAN_UNPOISON_MEMORY_REGION(freedb_root, sizeof(freedb_root->txnid));
    const txnid_t root_txnid = freedb_root->txnid;
    if (unlikely(root_txnid != freedb_mod_txnid)) {
      if (report)
        WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN " for %s-db.mod_txnid %" PRIaTXN " %s",
                freedb_root_pgno, root_txnid, "free", freedb_mod_txnid,
                (env->stuck_meta < 0) ? "(workaround for incoherent flaw of "
                                        "unified page/buffer cache)"
                                      : "(wagering meta)");
      ok = false;
    }
  }
  if (likely(maindb_root && maindb_mod_txnid &&
             (size_t)ptr_dist(env->dxb_mmap.base, maindb_root) < env->dxb_mmap.limit)) {
    VALGRIND_MAKE_MEM_DEFINED(maindb_root, sizeof(maindb_root->txnid));
    MDBX_ASAN_UNPOISON_MEMORY_REGION(maindb_root, sizeof(maindb_root->txnid));
    const txnid_t root_txnid = maindb_root->txnid;
    if (unlikely(root_txnid != maindb_mod_txnid)) {
      if (report)
        WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN " for %s-db.mod_txnid %" PRIaTXN " %s",
                maindb_root_pgno, root_txnid, "main", maindb_mod_txnid,
                (env->stuck_meta < 0) ? "(workaround for incoherent flaw of "
                                        "unified page/buffer cache)"
                                      : "(wagering meta)");
      ok = false;
    }
  }
  if (unlikely(!ok) && report)
    env->lck->pgops.incoherence.weak =
        (env->lck->pgops.incoherence.weak >= INT32_MAX) ? INT32_MAX : env->lck->pgops.incoherence.weak + 1;
  return ok;
}

__cold int coherency_timeout(uint64_t *timestamp, intptr_t pgno, const MDBX_env *env) {
  if (likely(timestamp && *timestamp == 0))
    *timestamp = osal_monotime();
  else if (unlikely(!timestamp || osal_monotime() - *timestamp > osal_16dot16_to_monotime(65536 / 10))) {
    if (pgno >= 0 && pgno != env->stuck_meta)
      ERROR("bailout waiting for %" PRIuSIZE " page arrival %s", pgno,
            "(workaround for incoherent flaw of unified page/buffer cache)");
    else if (env->stuck_meta < 0)
      ERROR("bailout waiting for valid snapshot (%s)", "workaround for incoherent flaw of unified page/buffer cache");
    return MDBX_PROBLEM;
  }

  osal_memory_fence(mo_AcquireRelease, true);
  osal_yield();
  return MDBX_RESULT_TRUE;
}

/* check with timeout as the workaround
 * for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
__hot int coherency_fetch_head(MDBX_txn *txn, const meta_ptr_t head, uint64_t *timestamp) {
  /* Copy the DB info and flags */
  txn->txnid = head.txnid;
  txn->geo = head.ptr_c->geometry;
  memcpy(txn->dbs, &head.ptr_c->trees, sizeof(head.ptr_c->trees));
  STATIC_ASSERT(sizeof(head.ptr_c->trees) == CORE_DBS * sizeof(tree_t));
  VALGRIND_MAKE_MEM_UNDEFINED(txn->dbs + CORE_DBS, txn->env->max_dbi - CORE_DBS);
  txn->canary = head.ptr_c->canary;

  if (unlikely(!coherency_check(txn->env, head.txnid, txn->dbs, head.ptr_v, *timestamp == 0) ||
               txn->txnid != meta_txnid(head.ptr_v)))
    return coherency_timeout(timestamp, -1, txn->env);

  if (unlikely(txn->dbs[FREE_DBI].flags != MDBX_INTEGERKEY)) {
    if ((txn->dbs[FREE_DBI].flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY ||
        unaligned_peek_u64(4, &head.ptr_c->magic_and_version) == MDBX_DATA_MAGIC) {
      ERROR("unexpected/invalid db-flags 0x%x for %s", txn->dbs[FREE_DBI].flags, "GC/FreeDB");
      return MDBX_INCOMPATIBLE;
    }
    txn->dbs[FREE_DBI].flags &= DB_PERSISTENT_FLAGS;
  }
  tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY);
  tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags));
  return MDBX_SUCCESS;
}

int coherency_check_written(const MDBX_env *env, const txnid_t txnid, const volatile meta_t *meta, const intptr_t pgno,
                            uint64_t *timestamp) {
  const bool report = !(timestamp && *timestamp);
  const txnid_t head_txnid = meta_txnid(meta);
  if (likely(head_txnid >= MIN_TXNID && head_txnid >= txnid)) {
    if (likely(coherency_check(env, head_txnid, &meta->trees.gc, meta, report))) {
      eASSERT(env, meta->trees.gc.flags == MDBX_INTEGERKEY);
      eASSERT(env, check_table_flags(meta->trees.main.flags));
      return MDBX_SUCCESS;
    }
  } else if (report) {
    env->lck->pgops.incoherence.weak =
        (env->lck->pgops.incoherence.weak >= INT32_MAX) ? INT32_MAX : env->lck->pgops.incoherence.weak + 1;
    WARNING("catch %s txnid %" PRIaTXN " for meta_%" PRIaPGNO " %s",
            (head_txnid < MIN_TXNID) ? "invalid" : "unexpected", head_txnid,
            bytes2pgno(env, ptr_dist(meta, env->dxb_mmap.base)),
            "(workaround for incoherent flaw of unified page/buffer cache)");
  }
  return coherency_timeout(timestamp, pgno, env);
}

bool coherency_check_meta(const MDBX_env *env, const volatile meta_t *meta, bool report) {
  uint64_t timestamp = 0;
  return coherency_check_written(env, 0, meta, -1, report ? &timestamp : nullptr) == MDBX_SUCCESS;
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \note Please refer to the COPYRIGHT file for explanations license change,
/// credits and acknowledgments.
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

__cold int cursor_validate(const MDBX_cursor *mc) {
  if (!mc->txn->tw.dirtylist) {
    cASSERT(mc, (mc->txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
  } else {
    cASSERT(mc, (mc->txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
    cASSERT(mc, mc->txn->tw.dirtyroom + mc->txn->tw.dirtylist->length ==
                    (mc->txn->parent ? mc->txn->parent->tw.dirtyroom : mc->txn->env->options.dp_limit));
  }

  cASSERT(mc, (mc->checking & z_updating) ? mc->top + 1 <= mc->tree->height : mc->top + 1 == mc->tree->height);
  if (unlikely((mc->checking & z_updating) ? mc->top + 1 > mc->tree->height : mc->top + 1 != mc->tree->height))
    return MDBX_CURSOR_FULL;

  if (is_pointed(mc) && (mc->checking & z_updating) == 0) {
    const page_t *mp = mc->pg[mc->top];
    const size_t nkeys = page_numkeys(mp);
    if (!is_hollow(mc)) {
      cASSERT(mc, mc->ki[mc->top] < nkeys);
      if (mc->ki[mc->top] >= nkeys)
        return MDBX_CURSOR_FULL;
    }
    if (inner_pointed(mc)) {
      cASSERT(mc, is_filled(mc));
      if (!is_filled(mc))
        return MDBX_CURSOR_FULL;
    }
  }

  for (intptr_t n = 0; n <= mc->top; ++n) {
    page_t *mp = mc->pg[n];
    const size_t nkeys = page_numkeys(mp);
    const bool expect_branch = (n < mc->tree->height - 1) ? true : false;
    const bool expect_nested_leaf = (n + 1 == mc->tree->height - 1) ? true : false;
    const bool branch = is_branch(mp) ? true : false;
    cASSERT(mc, branch == expect_branch);
    if (unlikely(branch != expect_branch))
      return MDBX_CURSOR_FULL;
    if ((mc->checking & z_updating) == 0) {
      cASSERT(mc, nkeys > mc->ki[n] || (!branch && nkeys == mc->ki[n] && (mc->flags & z_hollow) != 0));
      if (unlikely(nkeys <= mc->ki[n] && !(!branch && nkeys == mc->ki[n] && (mc->flags & z_hollow) != 0)))
        return MDBX_CURSOR_FULL;
    } else {
      cASSERT(mc, nkeys + 1 >= mc->ki[n]);
      if (unlikely(nkeys + 1 < mc->ki[n]))
        return MDBX_CURSOR_FULL;
    }

    int err = page_check(mc, mp);
    if (unlikely(err != MDBX_SUCCESS))
      return err;

    for (size_t i = 0; i < nkeys; ++i) {
      if (branch) {
        node_t *node = page_node(mp, i);
        cASSERT(mc, node_flags(node) == 0);
        if (unlikely(node_flags(node) != 0))
          return MDBX_CURSOR_FULL;
        pgno_t pgno = node_pgno(node);
        page_t *np;
        err = page_get(mc, pgno, &np, mp->txnid);
        cASSERT(mc, err == MDBX_SUCCESS);
        if (unlikely(err != MDBX_SUCCESS))
          return err;
        const bool nested_leaf = is_leaf(np) ? true : false;
        cASSERT(mc, nested_leaf == expect_nested_leaf);
        if (unlikely(nested_leaf != expect_nested_leaf))
          return MDBX_CURSOR_FULL;
        err = page_check(mc, np);
        if (unlikely(err != MDBX_SUCCESS))
          return err;
      }
    }
  }
  return MDBX_SUCCESS;
}

__cold int cursor_validate_updating(MDBX_cursor *mc) {
  const uint8_t checking = mc->checking;
  mc->checking |= z_updating;
  const int rc = cursor_validate(mc);
  mc->checking = checking;
  return rc;
}

bool cursor_is_tracked(const MDBX_cursor *mc) {
  for (MDBX_cursor *scan = mc->txn->cursors[cursor_dbi(mc)]; scan; scan = scan->next)
    if (mc == ((mc->flags & z_inner) ? &scan->subcur->cursor : scan))
      return true;
  return false;
}

/*----------------------------------------------------------------------------*/

static int touch_dbi(MDBX_cursor *mc) {
  cASSERT(mc, (mc->flags & z_inner) == 0);
  cASSERT(mc, (*cursor_dbi_state(mc) & DBI_DIRTY) == 0);
  *cursor_dbi_state(mc) |= DBI_DIRTY;
  mc->txn->flags |= MDBX_TXN_DIRTY;

  if (!cursor_is_core(mc)) {
    /* Touch DB record of named DB */
    cursor_couple_t cx;
    int rc = dbi_check(mc->txn, MAIN_DBI);
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;
    rc = cursor_init(&cx.outer, mc->txn, MAIN_DBI);
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;
    mc->txn->dbi_state[MAIN_DBI] |= DBI_DIRTY;
    rc = tree_search(&cx.outer, &container_of(mc->clc, kvx_t, clc)->name, Z_MODIFY);
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;
  }
  return MDBX_SUCCESS;
}

__hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, const MDBX_val *data) {
  cASSERT(mc, (mc->txn->flags & MDBX_TXN_RDONLY) == 0);
  cASSERT(mc, is_pointed(mc) || mc->tree->height == 0);
  cASSERT(mc, cursor_is_tracked(mc));

  cASSERT(mc, F_ISSET(dbi_state(mc->txn, FREE_DBI), DBI_LINDO | DBI_VALID));
  cASSERT(mc, F_ISSET(dbi_state(mc->txn, MAIN_DBI), DBI_LINDO | DBI_VALID));
  if ((mc->flags & z_inner) == 0) {
    MDBX_txn *const txn = mc->txn;
    dpl_lru_turn(txn);

    if (unlikely((*cursor_dbi_state(mc) & DBI_DIRTY) == 0)) {
      int err = touch_dbi(mc);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
    }

    /* Estimate how much space this operation will take: */
    /* 1) Max b-tree height, reasonable enough with including dups' sub-tree */
    size_t need = CURSOR_STACK_SIZE + 3;
    /* 2) GC/FreeDB for any payload */
    if (!cursor_is_gc(mc)) {
      need += txn->dbs[FREE_DBI].height + (size_t)3;
      /* 3) Named DBs also dirty the main DB */
      if (!cursor_is_main(mc))
        need += txn->dbs[MAIN_DBI].height + (size_t)3;
    }
#if xMDBX_DEBUG_SPILLING != 2
    /* production mode */
    /* 4) Double the page chain estimation
     * for extensively splitting, rebalance and merging */
    need += need;
    /* 5) Factor the key+data which to be put in */
    need += bytes2pgno(txn->env, node_size(key, data)) + (size_t)1;
#else
    /* debug mode */
    (void)key;
    (void)data;
    txn->env->debug_dirtied_est = ++need;
    txn->env->debug_dirtied_act = 0;
#endif /* xMDBX_DEBUG_SPILLING == 2 */

    int err = txn_spill(txn, mc, need);
    if (unlikely(err != MDBX_SUCCESS))
      return err;
  }

  if (likely(is_pointed(mc)) && ((mc->txn->flags & MDBX_TXN_SPILLS) || !is_modifable(mc->txn, mc->pg[mc->top]))) {
    const int8_t top = mc->top;
    mc->top = 0;
    do {
      int err = page_touch(mc);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
      mc->top += 1;
    } while (mc->top <= top);
    mc->top = top;
  }
  return MDBX_SUCCESS;
}

/*----------------------------------------------------------------------------*/

int cursor_shadow(MDBX_cursor *mc, MDBX_txn *nested, const size_t dbi) {
  tASSERT(nested, dbi > FREE_DBI && dbi < nested->n_dbi);
  const size_t size = mc->subcur ? sizeof(MDBX_cursor) + sizeof(subcur_t) : sizeof(MDBX_cursor);
  for (MDBX_cursor *bk; mc; mc = bk->next) {
    cASSERT(mc, mc != mc->next);
    if (mc->signature != cur_signature_live) {
      ENSURE(nested->env, mc->signature == cur_signature_wait4eot);
      bk = mc;
      continue;
    }
    bk = osal_malloc(size);
    if (unlikely(!bk))
      return MDBX_ENOMEM;
#if MDBX_DEBUG
    memset(bk, 0xCD, size);
    VALGRIND_MAKE_MEM_UNDEFINED(bk, size);
#endif /* MDBX_DEBUG */
    *bk = *mc;
    mc->backup = bk;
    mc->txn = nested;
    mc->tree = &nested->dbs[dbi];
    mc->dbi_state = &nested->dbi_state[dbi];
    subcur_t *mx = mc->subcur;
    if (mx) {
      *(subcur_t *)(bk + 1) = *mx;
      mx->cursor.txn = nested;
      mx->cursor.dbi_state = &nested->dbi_state[dbi];
    }
    mc->next = nested->cursors[dbi];
    nested->cursors[dbi] = mc;
  }
  return MDBX_SUCCESS;
}

MDBX_cursor *cursor_eot(MDBX_cursor *mc, MDBX_txn *txn, const bool merge) {
  MDBX_cursor *const next = mc->next;
  const unsigned stage = mc->signature;
  MDBX_cursor *const bk = mc->backup;
  ENSURE(txn->env, stage == cur_signature_live || (stage == cur_signature_wait4eot && bk));
  tASSERT(txn, mc->txn == txn);
  if (bk) {
    subcur_t *mx = mc->subcur;
    tASSERT(txn, mc->txn->parent != nullptr);
    tASSERT(txn, bk->txn == txn->parent);
    /* Zap: Using uninitialized memory '*mc->backup'. */
    MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(6001);
    ENSURE(txn->env, bk->signature == cur_signature_live);
    tASSERT(txn, mx == bk->subcur);
    if (merge) {
      /* Update pointers to parent txn */
      mc->next = bk->next;
      mc->backup = bk->backup;
      mc->txn = bk->txn;
      mc->tree = bk->tree;
      mc->dbi_state = bk->dbi_state;
      if (mx) {
        mx->cursor.txn = bk->txn;
        mx->cursor.dbi_state = bk->dbi_state;
      }
    } else {
      /* Restore from backup, i.e. rollback/abort nested txn */
      *mc = *bk;
      mc->signature = stage /* Promote (cur_signature_wait4eot) state to parent txn */;
      if (mx)
        *mx = *(subcur_t *)(bk + 1);
    }
    bk->signature = 0;
    osal_free(bk);
  } else {
    ENSURE(mc->txn->env, stage == cur_signature_live);
    mc->signature = cur_signature_ready4dispose /* Cursor may be reused */;
    mc->next = mc;
    cursor_drown((cursor_couple_t *)mc);
  }
  return next;
}

/*----------------------------------------------------------------------------*/

static __always_inline int couple_init(cursor_couple_t *couple, const MDBX_txn *const txn, tree_t *const tree,
                                       kvx_t *const kvx, uint8_t *const dbi_state) {

  VALGRIND_MAKE_MEM_UNDEFINED(couple, sizeof(cursor_couple_t));
  tASSERT(txn, F_ISSET(*dbi_state, DBI_VALID | DBI_LINDO));

  couple->outer.signature = cur_signature_live;
  couple->outer.next = &couple->outer;
  couple->outer.backup = nullptr;
  couple->outer.txn = (MDBX_txn *)txn;
  couple->outer.tree = tree;
  couple->outer.clc = &kvx->clc;
  couple->outer.dbi_state = dbi_state;
  couple->outer.top_and_flags = z_fresh_mark;
  STATIC_ASSERT((int)z_branch == P_BRANCH && (int)z_leaf == P_LEAF && (int)z_largepage == P_LARGE &&
                (int)z_dupfix == P_DUPFIX);
  couple->outer.checking = (AUDIT_ENABLED() || (txn->env->flags & MDBX_VALIDATION)) ? z_pagecheck | z_leaf : z_leaf;
  couple->outer.subcur = nullptr;

  if (tree->flags & MDBX_DUPSORT) {
    couple->inner.cursor.signature = cur_signature_live;
    subcur_t *const mx = couple->outer.subcur = &couple->inner;
    mx->cursor.subcur = nullptr;
    mx->cursor.next = &mx->cursor;
    mx->cursor.txn = (MDBX_txn *)txn;
    mx->cursor.tree = &mx->nested_tree;
    mx->cursor.clc = ptr_disp(couple->outer.clc, sizeof(clc_t));
    tASSERT(txn, &mx->cursor.clc->k == &kvx->clc.v);
    mx->cursor.dbi_state = dbi_state;
    mx->cursor.top_and_flags = z_fresh_mark | z_inner;
    STATIC_ASSERT(MDBX_DUPFIXED * 2 == P_DUPFIX);
    mx->cursor.checking = couple->outer.checking + ((tree->flags & MDBX_DUPFIXED) << 1);
  }

  if (unlikely(*dbi_state & DBI_STALE))
    return tbl_fetch(couple->outer.txn, cursor_dbi(&couple->outer));

  return tbl_setup_ifneed(txn->env, kvx, tree);
}

__cold int cursor_init4walk(cursor_couple_t *couple, const MDBX_txn *const txn, tree_t *const tree, kvx_t *const kvx) {
  return couple_init(couple, txn, tree, kvx, txn->dbi_state);
}

int cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi) {
  STATIC_ASSERT(offsetof(cursor_couple_t, outer) == 0);
  int rc = dbi_check(txn, dbi);
  if (likely(rc == MDBX_SUCCESS))
    rc = couple_init(container_of(mc, cursor_couple_t, outer), txn, &txn->dbs[dbi], &txn->env->kvs[dbi],
                     &txn->dbi_state[dbi]);
  return rc;
}

__cold static int unexpected_dupsort(MDBX_cursor *mc) {
  ERROR("unexpected dupsort-page/node for non-dupsort db/cursor (dbi %zu)", cursor_dbi(mc));
  mc->txn->flags |= MDBX_TXN_ERROR;
  be_poor(mc);
  return MDBX_CORRUPTED;
}

int cursor_dupsort_setup(MDBX_cursor *mc, const node_t *node, const page_t *mp) {
  cASSERT(mc, is_pointed(mc));
  subcur_t *mx = mc->subcur;
  if (!MDBX_DISABLE_VALIDATION && unlikely(mx == nullptr))
    return unexpected_dupsort(mc);

  const uint8_t flags = node_flags(node);
  switch (flags) {
  default:
    ERROR("invalid node flags %u", flags);
    goto bailout;
  case N_DUP | N_TREE:
    if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) != sizeof(tree_t))) {
      ERROR("invalid nested-db record size (%zu, expect %zu)", node_ds(node), sizeof(tree_t));
      goto bailout;
    }
    memcpy(&mx->nested_tree, node_data(node), sizeof(tree_t));
    const txnid_t pp_txnid = mp->txnid;
    if (!MDBX_DISABLE_VALIDATION && unlikely(mx->nested_tree.mod_txnid > pp_txnid)) {
      ERROR("nested-db.mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN ")", mx->nested_tree.mod_txnid, pp_txnid);
      goto bailout;
    }
    mx->cursor.top_and_flags = z_fresh_mark | z_inner;
    break;
  case N_DUP:
    if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) <= PAGEHDRSZ)) {
      ERROR("invalid nested-page size %zu", node_ds(node));
      goto bailout;
    }
    page_t *sp = node_data(node);
    mx->nested_tree.height = 1;
    mx->nested_tree.branch_pages = 0;
    mx->nested_tree.leaf_pages = 1;
    mx->nested_tree.large_pages = 0;
    mx->nested_tree.items = page_numkeys(sp);
    mx->nested_tree.root = 0;
    mx->nested_tree.mod_txnid = mp->txnid;
    mx->cursor.top_and_flags = z_inner;
    mx->cursor.pg[0] = sp;
    mx->cursor.ki[0] = 0;
    mx->nested_tree.flags = flags_db2sub(mc->tree->flags);
    mx->nested_tree.dupfix_size = (mc->tree->flags & MDBX_DUPFIXED) ? sp->dupfix_ksize : 0;
    break;
  }

  if (unlikely(mx->nested_tree.dupfix_size != mc->tree->dupfix_size)) {
    if (!MDBX_DISABLE_VALIDATION && unlikely(mc->tree->dupfix_size != 0)) {
      ERROR("cursor mismatched nested-db dupfix_size %u", mc->tree->dupfix_size);
      goto bailout;
    }
    if (!MDBX_DISABLE_VALIDATION && unlikely((mc->tree->flags & MDBX_DUPFIXED) == 0)) {
      ERROR("mismatched nested-db flags %u", mc->tree->flags);
      goto bailout;
    }
    if (!MDBX_DISABLE_VALIDATION &&
        unlikely(mx->nested_tree.dupfix_size < mc->clc->v.lmin || mx->nested_tree.dupfix_size > mc->clc->v.lmax)) {
      ERROR("mismatched nested-db.dupfix_size (%u) <> min/max value-length "
            "(%zu/%zu)",
            mx->nested_tree.dupfix_size, mc->clc->v.lmin, mc->clc->v.lmax);
      goto bailout;
    }
    mc->tree->dupfix_size = mx->nested_tree.dupfix_size;
    mc->clc->v.lmin = mc->clc->v.lmax = mx->nested_tree.dupfix_size;
    cASSERT(mc, mc->clc->v.lmax >= mc->clc->v.lmin);
  }

  DEBUG("Sub-db dbi -%zu root page %" PRIaPGNO, cursor_dbi(&mx->cursor), mx->nested_tree.root);
  return MDBX_SUCCESS;

bailout:
  mx->cursor.top_and_flags = z_poor_mark | z_inner;
  return MDBX_CORRUPTED;
}

/*----------------------------------------------------------------------------*/

MDBX_cursor *cursor_cpstk(const MDBX_cursor *csrc, MDBX_cursor *cdst) {
  cASSERT(cdst, cdst->txn == csrc->txn);
  cASSERT(cdst, cdst->tree == csrc->tree);
  cASSERT(cdst, cdst->clc == csrc->clc);
  cASSERT(cdst, cdst->dbi_state == csrc->dbi_state);
  cdst->top_and_flags = csrc->top_and_flags;

  for (intptr_t i = 0; i <= csrc->top; i++) {
    cdst->pg[i] = csrc->pg[i];
    cdst->ki[i] = csrc->ki[i];
  }
  return cdst;
}

static __always_inline int sibling(MDBX_cursor *mc, bool right) {
  if (mc->top < 1) {
    /* root has no siblings */
    return MDBX_NOTFOUND;
  }

  cursor_pop(mc);
  DEBUG("parent page is page %" PRIaPGNO ", index %u", mc->pg[mc->top]->pgno, mc->ki[mc->top]);

  int err;
  if (right ? (mc->ki[mc->top] + (size_t)1 >= page_numkeys(mc->pg[mc->top])) : (mc->ki[mc->top] == 0)) {
    DEBUG("no more keys aside, moving to next %s sibling", right ? "right" : "left");
    err = right ? cursor_sibling_right(mc) : cursor_sibling_left(mc);
    if (err != MDBX_SUCCESS) {
      if (likely(err == MDBX_NOTFOUND))
        /* undo cursor_pop before returning */
        mc->top += 1;
      return err;
    }
  } else {
    mc->ki[mc->top] += right ? 1 : -1;
    DEBUG("just moving to %s index key %u", right ? "right" : "left", mc->ki[mc->top]);
  }
  cASSERT(mc, is_branch(mc->pg[mc->top]));

  page_t *mp = mc->pg[mc->top];
  const node_t *node = page_node(mp, mc->ki[mc->top]);
  err = page_get(mc, node_pgno(node), &mp, mp->txnid);
  if (likely(err == MDBX_SUCCESS)) {
    err = cursor_push(mc, mp, right ? 0 : (indx_t)page_numkeys(mp) - 1);
    if (likely(err == MDBX_SUCCESS))
      return err;
  }

  be_poor(mc);
  return err;
}

__hot int cursor_sibling_left(MDBX_cursor *mc) {
  int err = sibling(mc, false);
  if (likely(err != MDBX_NOTFOUND))
    return err;

  cASSERT(mc, mc->top >= 0);
  size_t nkeys = page_numkeys(mc->pg[mc->top]);
  cASSERT(mc, nkeys > 0);
  mc->ki[mc->top] = 0;
  return MDBX_NOTFOUND;
}

__hot int cursor_sibling_right(MDBX_cursor *mc) {
  int err = sibling(mc, true);
  if (likely(err != MDBX_NOTFOUND))
    return err;

  cASSERT(mc, mc->top >= 0);
  size_t nkeys = page_numkeys(mc->pg[mc->top]);
  cASSERT(mc, nkeys > 0);
  mc->ki[mc->top] = (indx_t)nkeys - 1;
  mc->flags = z_eof_soft | z_eof_hard | (mc->flags & z_clear_mask);
  inner_gone(mc);
  return MDBX_NOTFOUND;
}

/*----------------------------------------------------------------------------*/

/* Функция-шаблон: Приземляет курсор на данные в текущей позиции.
 * В том числе, загружает данные во вложенный курсор при его наличии. */
static __always_inline int cursor_bring(const bool inner, const bool tend2first, MDBX_cursor *__restrict mc,
                                        MDBX_val *__restrict key, MDBX_val *__restrict data, bool eof) {
  if (inner) {
    cASSERT(mc, !data && !mc->subcur && (mc->flags & z_inner) != 0);
  } else {
    cASSERT(mc, (mc->flags & z_inner) == 0);
  }

  const page_t *mp = mc->pg[mc->top];
  if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) {
    ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->pgno, mp->flags);
    return MDBX_CORRUPTED;
  }

  const size_t nkeys = page_numkeys(mp);
  cASSERT(mc, nkeys > 0);
  const size_t ki = mc->ki[mc->top];
  cASSERT(mc, nkeys > ki);
  cASSERT(mc, !eof || ki == nkeys - 1);

  if (inner && is_dupfix_leaf(mp)) {
    be_filled(mc);
    if (eof)
      mc->flags |= z_eof_soft;
    if (likely(key))
      *key = page_dupfix_key(mp, ki, mc->tree->dupfix_size);
    return MDBX_SUCCESS;
  }

  const node_t *__restrict node = page_node(mp, ki);
  if (!inner && (node_flags(node) & N_DUP)) {
    int err = cursor_dupsort_setup(mc, node, mp);
    if (unlikely(err != MDBX_SUCCESS))
      return err;
    MDBX_ANALYSIS_ASSUME(mc->subcur != nullptr);
    if (node_flags(node) & N_TREE) {
      err = tend2first ? inner_first(&mc->subcur->cursor, data) : inner_last(&mc->subcur->cursor, data);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
    } else {
      if (!tend2first) {
        mc->subcur->cursor.ki[0] = (indx_t)mc->subcur->nested_tree.items - 1;
        mc->subcur->cursor.flags |= z_eof_soft;
      }
      if (data) {
        const page_t *inner_mp = mc->subcur->cursor.pg[0];
        cASSERT(mc, is_subpage(inner_mp) && is_leaf(inner_mp));
        const size_t inner_ki = mc->subcur->cursor.ki[0];
        if (is_dupfix_leaf(inner_mp))
          *data = page_dupfix_key(inner_mp, inner_ki, mc->tree->dupfix_size);
        else
          *data = get_key(page_node(inner_mp, inner_ki));
      }
    }
    be_filled(mc);
  } else {
    if (!inner)
      inner_gone(mc);
    if (data) {
      int err = node_read(mc, node, data, mp);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
    }
    be_filled(mc);
    if (eof)
      mc->flags |= z_eof_soft;
  }

  get_key_optional(node, key);
  return MDBX_SUCCESS;
}

/* Функция-шаблон: Устанавливает курсор в начало или конец. */
static __always_inline int cursor_brim(const bool inner, const bool tend2first, MDBX_cursor *__restrict mc,
                                       MDBX_val *__restrict key, MDBX_val *__restrict data) {
  if (mc->top != 0) {
    int err = tree_search(mc, nullptr, tend2first ? Z_FIRST : Z_LAST);
    if (unlikely(err != MDBX_SUCCESS))
      return err;
  }
  const size_t nkeys = page_numkeys(mc->pg[mc->top]);
  cASSERT(mc, nkeys > 0);
  mc->ki[mc->top] = tend2first ? 0 : nkeys - 1;
  return cursor_bring(inner, tend2first, mc, key, data, !tend2first);
}

__hot int inner_first(MDBX_cursor *mc, MDBX_val *data) { return cursor_brim(true, true, mc, data, nullptr); }

__hot int inner_last(MDBX_cursor *mc, MDBX_val *data) { return cursor_brim(true, false, mc, data, nullptr); }

__hot int outer_first(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) {
  return cursor_brim(false, true, mc, key, data);
}

__hot int outer_last(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) {
  return cursor_brim(false, false, mc, key, data);
}

/*----------------------------------------------------------------------------*/

/* Функция-шаблон: Передвигает курсор на одну позицию.
 * При необходимости управляет вложенным курсором. */
static __always_inline int cursor_step(const bool inner, const bool forward, MDBX_cursor *__restrict mc,
                                       MDBX_val *__restrict key, MDBX_val *__restrict data, MDBX_cursor_op op) {
  if (forward) {
    if (inner)
      cASSERT(mc, op == MDBX_NEXT);
    else
      cASSERT(mc, op == MDBX_NEXT || op == MDBX_NEXT_DUP || op == MDBX_NEXT_NODUP);
  } else {
    if (inner)
      cASSERT(mc, op == MDBX_PREV);
    else
      cASSERT(mc, op == MDBX_PREV || op == MDBX_PREV_DUP || op == MDBX_PREV_NODUP);
  }
  if (inner) {
    cASSERT(mc, !data && !mc->subcur && (mc->flags & z_inner) != 0);
  } else {
    cASSERT(mc, (mc->flags & z_inner) == 0);
  }

  if (unlikely(is_poor(mc))) {
    int state = mc->flags;
    if (state & z_fresh) {
      if (forward)
        return inner ? inner_first(mc, key) : outer_first(mc, key, data);
      else
        return inner ? inner_last(mc, key) : outer_last(mc, key, data);
    }
    mc->flags = inner ? z_inner | z_poor_mark : z_poor_mark;
    return (state & z_after_delete) ? MDBX_NOTFOUND : MDBX_ENODATA;
  }

  const page_t *mp = mc->pg[mc->top];
  const intptr_t nkeys = page_numkeys(mp);
  cASSERT(mc, nkeys > 0);

  intptr_t ki = mc->ki[mc->top];
  const uint8_t state = mc->flags & (z_after_delete | z_hollow | z_eof_hard | z_eof_soft);
  if (likely(state == 0)) {
    cASSERT(mc, ki < nkeys);
    if (!inner && op != (forward ? MDBX_NEXT_NODUP : MDBX_PREV_NODUP)) {
      int err = MDBX_NOTFOUND;
      if (inner_pointed(mc)) {
        err = forward ? inner_next(&mc->subcur->cursor, data) : inner_prev(&mc->subcur->cursor, data);
        if (likely(err == MDBX_SUCCESS)) {
          get_key_optional(page_node(mp, ki), key);
          return MDBX_SUCCESS;
        }
        if (unlikely(err != MDBX_NOTFOUND && err != MDBX_ENODATA)) {
          cASSERT(mc, !inner_pointed(mc));
          return err;
        }
        cASSERT(mc, !forward || (mc->subcur->cursor.flags & z_eof_soft));
      }
      if (op == (forward ? MDBX_NEXT_DUP : MDBX_PREV_DUP))
        return err;
    }
    if (!inner)
      inner_gone(mc);
  } else {
    if (mc->flags & z_hollow) {
      cASSERT(mc, !inner_pointed(mc));
      return MDBX_ENODATA;
    }

    if (!inner && op == (forward ? MDBX_NEXT_DUP : MDBX_PREV_DUP))
      return MDBX_NOTFOUND;

    if (forward) {
      if (state & z_after_delete) {
        if (ki < nkeys)
          goto bring;
      } else {
        cASSERT(mc, state & (z_eof_soft | z_eof_hard));
        return MDBX_NOTFOUND;
      }
    } else if (state & z_eof_hard) {
      mc->ki[mc->top] = (indx_t)nkeys - 1;
      goto bring;
    }
  }

  DEBUG("turn-%s: top page was %" PRIaPGNO " in cursor %p, ki %zi of %zi", forward ? "next" : "prev", mp->pgno,
        __Wpedantic_format_voidptr(mc), ki, nkeys);
  if (forward) {
    if (likely(++ki < nkeys))
      mc->ki[mc->top] = (indx_t)ki;
    else {
      DEBUG("%s", "=====> move to next sibling page");
      int err = cursor_sibling_right(mc);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
      mp = mc->pg[mc->top];
      DEBUG("next page is %" PRIaPGNO ", key index %u", mp->pgno, mc->ki[mc->top]);
    }
  } else {
    if (likely(--ki >= 0))
      mc->ki[mc->top] = (indx_t)ki;
    else {
      DEBUG("%s", "=====> move to prev sibling page");
      int err = cursor_sibling_left(mc);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
      mp = mc->pg[mc->top];
      DEBUG("prev page is %" PRIaPGNO ", key index %u", mp->pgno, mc->ki[mc->top]);
    }
  }
  DEBUG("==> cursor points to page %" PRIaPGNO " with %zu keys, key index %u", mp->pgno, page_numkeys(mp),
        mc->ki[mc->top]);

bring:
  return cursor_bring(inner, forward, mc, key, data, false);
}

__hot int inner_next(MDBX_cursor *mc, MDBX_val *data) { return cursor_step(true, true, mc, data, nullptr, MDBX_NEXT); }

__hot int inner_prev(MDBX_cursor *mc, MDBX_val *data) { return cursor_step(true, false, mc, data, nullptr, MDBX_PREV); }

__hot int outer_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) {
  return cursor_step(false, true, mc, key, data, op);
}

__hot int outer_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) {
  return cursor_step(false, false, mc, key, data, op);
}

/*----------------------------------------------------------------------------*/

__hot int cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags) {
  int err;
  DKBUF_DEBUG;
  MDBX_env *const env = mc->txn->env;
  if (LOG_ENABLED(MDBX_LOG_DEBUG) && (flags & MDBX_RESERVE))
    data->iov_base = nullptr;
  DEBUG("==> put db %d key [%s], size %" PRIuPTR ", data [%s] size %" PRIuPTR, cursor_dbi_dbg(mc), DKEY_DEBUG(key),
        key->iov_len, DVAL_DEBUG(data), data->iov_len);

  if ((flags & MDBX_CURRENT) != 0 && (mc->flags & z_inner) == 0) {
    if (unlikely(flags & (MDBX_APPEND | MDBX_NOOVERWRITE)))
      return MDBX_EINVAL;
    /* Запрошено обновление текущей записи, на которой сейчас стоит курсор.
     * Проверяем что переданный ключ совпадает со значением в текущей позиции
     * курсора. Здесь проще вызвать cursor_ops(), так как для обслуживания
     * таблиц с MDBX_DUPSORT также требуется текущий размер данных. */
    MDBX_val current_key, current_data;
    err = cursor_ops(mc, &current_key, &current_data, MDBX_GET_CURRENT);
    if (unlikely(err != MDBX_SUCCESS))
      return err;
    if (mc->clc->k.cmp(key, &current_key) != 0)
      return MDBX_EKEYMISMATCH;

    if (unlikely((flags & MDBX_MULTIPLE))) {
      if (unlikely(!mc->subcur))
        return MDBX_EINVAL;
      err = cursor_del(mc, flags & MDBX_ALLDUPS);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
      if (unlikely(data[1].iov_len == 0))
        return MDBX_SUCCESS;
      flags -= MDBX_CURRENT;
      goto skip_check_samedata;
    }

    if (mc->subcur) {
      node_t *node = page_node(mc->pg[mc->top], mc->ki[mc->top]);
      if (node_flags(node) & N_DUP) {
        cASSERT(mc, inner_pointed(mc));
        /* Если за ключом более одного значения, либо если размер данных
         * отличается, то вместо обновления требуется удаление и
         * последующая вставка. */
        if (mc->subcur->nested_tree.items > 1 || current_data.iov_len != data->iov_len) {
          err = cursor_del(mc, flags & MDBX_ALLDUPS);
          if (unlikely(err != MDBX_SUCCESS))
            return err;
          flags -= MDBX_CURRENT;
          goto skip_check_samedata;
        }
      } else if (unlikely(node_size(key, data) > env->leaf_nodemax)) {
        /* Уже есть пара key-value хранящаяся в обычном узле. Новые данные
         * слишком большие для размещения в обычном узле вместе с ключом, но
         * могут быть размещены в вложенном дереве. Удаляем узел со старыми
         * данными, чтобы при помещении новых создать вложенное дерево. */
        err = cursor_del(mc, 0);
        if (unlikely(err != MDBX_SUCCESS))
          return err;
        flags -= MDBX_CURRENT;
        goto skip_check_samedata;
      }
    }
    if (!(flags & MDBX_RESERVE) && unlikely(cmp_lenfast(&current_data, data) == 0))
      return MDBX_SUCCESS /* the same data, nothing to update */;
  skip_check_samedata:;
  }

  int rc = MDBX_SUCCESS;
  if (mc->tree->height == 0) {
    /* new database, cursor has nothing to point to */
    cASSERT(mc, is_poor(mc));
    rc = MDBX_NO_ROOT;
  } else if ((flags & MDBX_CURRENT) == 0) {
    bool exact = false;
    MDBX_val last_key, old_data;
    if ((flags & MDBX_APPEND) && mc->tree->items > 0) {
      old_data.iov_base = nullptr;
      old_data.iov_len = 0;
      rc = (mc->flags & z_inner) ? inner_last(mc, &last_key) : outer_last(mc, &last_key, &old_data);
      if (likely(rc == MDBX_SUCCESS)) {
        const int cmp = mc->clc->k.cmp(key, &last_key);
        if (likely(cmp > 0)) {
          mc->ki[mc->top]++; /* step forward for appending */
          rc = MDBX_NOTFOUND;
        } else if (unlikely(cmp != 0)) {
          /* new-key < last-key */
          return MDBX_EKEYMISMATCH;
        } else {
          rc = MDBX_SUCCESS;
          exact = true;
        }
      }
    } else {
      csr_t csr =
          /* olddata may not be updated in case DUPFIX-page of dupfix-table */
          cursor_seek(mc, (MDBX_val *)key, &old_data, MDBX_SET);
      rc = csr.err;
      exact = csr.exact;
    }
    if (likely(rc == MDBX_SUCCESS)) {
      if (exact) {
        if (unlikely(flags & MDBX_NOOVERWRITE)) {
          DEBUG("duplicate key [%s]", DKEY_DEBUG(key));
          *data = old_data;
          return MDBX_KEYEXIST;
        }
        if (unlikely(mc->flags & z_inner)) {
          /* nested subtree of DUPSORT-database with the same key,
           * nothing to update */
          eASSERT(env, data->iov_len == 0 && (old_data.iov_len == 0 ||
                                              /* olddata may not be updated in case
                                                 DUPFIX-page of dupfix-table */
                                              (mc->tree->flags & MDBX_DUPFIXED)));
          return MDBX_SUCCESS;
        }
        if (unlikely(flags & MDBX_ALLDUPS) && inner_pointed(mc)) {
          err = cursor_del(mc, MDBX_ALLDUPS);
          if (unlikely(err != MDBX_SUCCESS))
            return err;
          flags -= MDBX_ALLDUPS;
          cASSERT(mc, mc->top + 1 == mc->tree->height);
          rc = (mc->top >= 0) ? MDBX_NOTFOUND : MDBX_NO_ROOT;
          exact = false;
        } else if (!(flags & (MDBX_RESERVE | MDBX_MULTIPLE))) {
          /* checking for early exit without dirtying pages */
          if (unlikely(eq_fast(data, &old_data))) {
            cASSERT(mc, mc->clc->v.cmp(data, &old_data) == 0);
            if (mc->subcur) {
              if (flags & MDBX_NODUPDATA)
                return MDBX_KEYEXIST;
              if (flags & MDBX_APPENDDUP)
                return MDBX_EKEYMISMATCH;
            }
            /* the same data, nothing to update */
            return MDBX_SUCCESS;
          }
          cASSERT(mc, mc->clc->v.cmp(data, &old_data) != 0);
        }
      }
    } else if (unlikely(rc != MDBX_NOTFOUND))
      return rc;
  }

  mc->flags &= ~z_after_delete;
  MDBX_val xdata, *ref_data = data;
  size_t *batch_dupfix_done = nullptr, batch_dupfix_given = 0;
  if (unlikely(flags & MDBX_MULTIPLE)) {
    batch_dupfix_given = data[1].iov_len;
    if (unlikely(data[1].iov_len == 0))
      return /* nothing todo */ MDBX_SUCCESS;
    batch_dupfix_done = &data[1].iov_len;
    *batch_dupfix_done = 0;
  }

  /* Cursor is positioned, check for room in the dirty list */
  err = cursor_touch(mc, key, ref_data);
  if (unlikely(err))
    return err;

  if (unlikely(rc == MDBX_NO_ROOT)) {
    /* new database, write a root leaf page */
    DEBUG("%s", "allocating new root leaf page");
    pgr_t npr = page_new(mc, P_LEAF);
    if (unlikely(npr.err != MDBX_SUCCESS))
      return npr.err;
    npr.err = cursor_push(mc, npr.page, 0);
    if (unlikely(npr.err != MDBX_SUCCESS))
      return npr.err;
    mc->tree->root = npr.page->pgno;
    mc->tree->height++;
    if (mc->tree->flags & MDBX_INTEGERKEY) {
      assert(key->iov_len >= mc->clc->k.lmin && key->iov_len <= mc->clc->k.lmax);
      mc->clc->k.lmin = mc->clc->k.lmax = key->iov_len;
    }
    if (mc->tree->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) {
      assert(data->iov_len >= mc->clc->v.lmin && data->iov_len <= mc->clc->v.lmax);
      assert(mc->subcur != nullptr);
      mc->tree->dupfix_size = /* mc->subcur->nested_tree.dupfix_size = */
          (unsigned)(mc->clc->v.lmin = mc->clc->v.lmax = data->iov_len);
      cASSERT(mc, mc->clc->v.lmin == mc->subcur->cursor.clc->k.lmin);
      cASSERT(mc, mc->clc->v.lmax == mc->subcur->cursor.clc->k.lmax);
      if (mc->flags & z_inner)
        npr.page->flags |= P_DUPFIX;
    }
  }

  MDBX_val old_singledup, old_data;
  tree_t nested_dupdb;
  page_t *sub_root = nullptr;
  bool insert_key, insert_data;
  uint16_t fp_flags = P_LEAF;
  page_t *fp = env->page_auxbuf;
  fp->txnid = mc->txn->front_txnid;
  insert_key = insert_data = (rc != MDBX_SUCCESS);
  old_singledup.iov_base = nullptr;
  old_singledup.iov_len = 0;
  if (insert_key) {
    /* The key does not exist */
    DEBUG("inserting key at index %i", mc->ki[mc->top]);
    if (mc->tree->flags & MDBX_DUPSORT) {
      inner_gone(mc);
      if (node_size(key, data) > env->leaf_nodemax) {
        /* Too big for a node, insert in sub-DB.  Set up an empty
         * "old sub-page" for convert_to_subtree to expand to a full page. */
        fp->dupfix_ksize = (mc->tree->flags & MDBX_DUPFIXED) ? (uint16_t)data->iov_len : 0;
        fp->lower = fp->upper = 0;
        old_data.iov_len = PAGEHDRSZ;
        goto convert_to_subtree;
      }
    }
  } else {
    /* there's only a key anyway, so this is a no-op */
    if (is_dupfix_leaf(mc->pg[mc->top])) {
      size_t ksize = mc->tree->dupfix_size;
      if (unlikely(key->iov_len != ksize))
        return MDBX_BAD_VALSIZE;
      void *ptr = page_dupfix_ptr(mc->pg[mc->top], mc->ki[mc->top], ksize);
      memcpy(ptr, key->iov_base, ksize);
    fix_parent:
      /* if overwriting slot 0 of leaf, need to
       * update branch key if there is a parent page */
      if (mc->top && !mc->ki[mc->top]) {
        size_t dtop = 1;
        mc->top--;
        /* slot 0 is always an empty key, find real slot */
        while (mc->top && !mc->ki[mc->top]) {
          mc->top--;
          dtop++;
        }
        err = MDBX_SUCCESS;
        if (mc->ki[mc->top])
          err = tree_propagate_key(mc, key);
        cASSERT(mc, mc->top + dtop < UINT16_MAX);
        mc->top += (uint8_t)dtop;
        if (unlikely(err != MDBX_SUCCESS))
          return err;
      }

      if (AUDIT_ENABLED()) {
        err = cursor_validate(mc);
        if (unlikely(err != MDBX_SUCCESS))
          return err;
      }
      return MDBX_SUCCESS;
    }

  more:
    if (AUDIT_ENABLED()) {
      err = cursor_validate(mc);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
    }
    node_t *const node = page_node(mc->pg[mc->top], mc->ki[mc->top]);

    /* Large/Overflow page overwrites need special handling */
    if (unlikely(node_flags(node) & N_BIG)) {
      const size_t dpages = (node_size(key, data) > env->leaf_nodemax) ? largechunk_npages(env, data->iov_len) : 0;

      const pgno_t pgno = node_largedata_pgno(node);
      pgr_t lp = page_get_large(mc, pgno, mc->pg[mc->top]->txnid);
      if (unlikely(lp.err != MDBX_SUCCESS))
        return lp.err;
      cASSERT(mc, page_type(lp.page) == P_LARGE);

      /* Is the ov page from this txn (or a parent) and big enough? */
      const size_t ovpages = lp.page->pages;
      const size_t extra_threshold =
          (mc->tree == &mc->txn->dbs[FREE_DBI]) ? 1 : /* LY: add configurable threshold to keep reserve space */ 0;
      if (!is_frozen(mc->txn, lp.page) && ovpages >= dpages && ovpages <= dpages + extra_threshold) {
        /* yes, overwrite it. */
        if (!is_modifable(mc->txn, lp.page)) {
          if (is_spilled(mc->txn, lp.page)) {
            lp = /* TODO: avoid search and get txn & spill-index from
                     page_result */
                page_unspill(mc->txn, lp.page);
            if (unlikely(lp.err))
              return lp.err;
          } else {
            if (unlikely(!mc->txn->parent)) {
              ERROR("Unexpected not frozen/modifiable/spilled but shadowed %s "
                    "page %" PRIaPGNO " mod-txnid %" PRIaTXN ","
                    " without parent transaction, current txn %" PRIaTXN " front %" PRIaTXN,
                    "large/overflow", pgno, lp.page->txnid, mc->txn->txnid, mc->txn->front_txnid);
              return MDBX_PROBLEM;
            }

            /* It is writable only in a parent txn */
            page_t *np = page_shadow_alloc(mc->txn, ovpages);
            if (unlikely(!np))
              return MDBX_ENOMEM;

            memcpy(np, lp.page, PAGEHDRSZ); /* Copy header of page */
            err = page_dirty(mc->txn, lp.page = np, ovpages);
            if (unlikely(err != MDBX_SUCCESS))
              return err;

#if MDBX_ENABLE_PGOP_STAT
            mc->txn->env->lck->pgops.clone.weak += ovpages;
#endif /* MDBX_ENABLE_PGOP_STAT */
            cASSERT(mc, dpl_check(mc->txn));
          }
        }
        node_set_ds(node, data->iov_len);
        if (flags & MDBX_RESERVE)
          data->iov_base = page_data(lp.page);
        else
          memcpy(page_data(lp.page), data->iov_base, data->iov_len);

        if (AUDIT_ENABLED()) {
          err = cursor_validate(mc);
          if (unlikely(err != MDBX_SUCCESS))
            return err;
        }
        return MDBX_SUCCESS;
      }

      if ((err = page_retire(mc, lp.page)) != MDBX_SUCCESS)
        return err;
    } else {
      old_data.iov_len = node_ds(node);
      old_data.iov_base = node_data(node);
      cASSERT(mc, ptr_disp(old_data.iov_base, old_data.iov_len) <= ptr_disp(mc->pg[mc->top], env->ps));

      /* DB has dups? */
      if (mc->tree->flags & MDBX_DUPSORT) {
        /* Prepare (sub-)page/sub-DB to accept the new item, if needed.
         * fp: old sub-page or a header faking it.
         * mp: new (sub-)page.
         * xdata: node data with new sub-page or sub-DB. */
        size_t growth = 0; /* growth in page size.*/
        page_t *mp = fp = xdata.iov_base = env->page_auxbuf;
        mp->pgno = mc->pg[mc->top]->pgno;

        /* Was a single item before, must convert now */
        if (!(node_flags(node) & N_DUP)) {
          /* does data match? */
          if (flags & MDBX_APPENDDUP) {
            const int cmp = mc->clc->v.cmp(data, &old_data);
            cASSERT(mc, cmp != 0 || eq_fast(data, &old_data));
            if (unlikely(cmp <= 0))
              return MDBX_EKEYMISMATCH;
          } else if (eq_fast(data, &old_data)) {
            cASSERT(mc, mc->clc->v.cmp(data, &old_data) == 0);
            if (flags & MDBX_NODUPDATA)
              return MDBX_KEYEXIST;
            /* data is match exactly byte-to-byte, nothing to update */
            rc = MDBX_SUCCESS;
            if (unlikely(batch_dupfix_done))
              goto batch_dupfix_continue;
            return rc;
          }

          /* Just overwrite the current item */
          if (flags & MDBX_CURRENT) {
            cASSERT(mc, node_size(key, data) <= env->leaf_nodemax);
            goto current;
          }

          /* Back up original data item */
          memcpy(old_singledup.iov_base = fp + 1, old_data.iov_base, old_singledup.iov_len = old_data.iov_len);

          /* Make sub-page header for the dup items, with dummy body */
          fp->flags = P_LEAF | P_SUBP;
          fp->lower = 0;
          xdata.iov_len = PAGEHDRSZ + old_data.iov_len + data->iov_len;
          if (mc->tree->flags & MDBX_DUPFIXED) {
            fp->flags |= P_DUPFIX;
            fp->dupfix_ksize = (uint16_t)data->iov_len;
            /* Будем создавать DUPFIX-страницу, как минимум с двумя элементами.
             * При коротких значениях и наличии свободного места можно сделать
             * некоторое резервирование места, чтобы при последующих добавлениях
             * не сразу расширять созданную под-страницу.
             * Резервирование в целом сомнительно (см ниже), но может сработать
             * в плюс (а если в минус то несущественный) при коротких ключах. */
            xdata.iov_len +=
                page_subleaf2_reserve(env, page_room(mc->pg[mc->top]) + old_data.iov_len, xdata.iov_len, data->iov_len);
            cASSERT(mc, (xdata.iov_len & 1) == 0);
          } else {
            xdata.iov_len += 2 * (sizeof(indx_t) + NODESIZE) + (old_data.iov_len & 1) + (data->iov_len & 1);
          }
          cASSERT(mc, (xdata.iov_len & 1) == 0);
          fp->upper = (uint16_t)(xdata.iov_len - PAGEHDRSZ);
          old_data.iov_len = xdata.iov_len; /* pretend olddata is fp */
        } else if (node_flags(node) & N_TREE) {
          /* Data is on sub-DB, just store it */
          flags |= N_DUP | N_TREE;
          goto dupsort_put;
        } else {
          /* Data is on sub-page */
          fp = old_data.iov_base;
          switch (flags) {
          default:
            growth = is_dupfix_leaf(fp) ? fp->dupfix_ksize : (node_size(data, nullptr) + sizeof(indx_t));
            if (page_room(fp) >= growth) {
              /* На текущей под-странице есть место для добавления элемента.
               * Оптимальнее продолжить использовать эту страницу, ибо
               * добавление вложенного дерева увеличит WAF на одну страницу. */
              goto continue_subpage;
            }
            /* На текущей под-странице нет места для еще одного элемента.
             * Можно либо увеличить эту под-страницу, либо вынести куст
             * значений во вложенное дерево.
             *
             * Продолжать использовать текущую под-страницу возможно
             * только пока и если размер после добавления элемента будет
             * меньше leaf_nodemax. Соответственно, при превышении
             * просто сразу переходим на вложенное дерево. */
            xdata.iov_len = old_data.iov_len + (growth += growth & 1);
            if (xdata.iov_len > env->subpage_limit)
              goto convert_to_subtree;

            /* Можно либо увеличить под-страницу, в том числе с некоторым
             * запасом, либо перейти на вложенное поддерево.
             *
             * Резервирование места на под-странице представляется сомнительным:
             *  - Резервирование увеличит рыхлость страниц, в том числе
             *    вероятность разделения основной/гнездовой страницы;
             *  - Сложно предсказать полезный размер резервирования,
             *    особенно для не-MDBX_DUPFIXED;
             *  - Наличие резерва позволяет съекономить только на перемещении
             *    части элементов основной/гнездовой страницы при последующих
             *    добавлениях в нее элементов. Причем после первого изменения
             *    размера под-страницы, её тело будет примыкать
             *    к неиспользуемому месту на основной/гнездовой странице,
             *    поэтому последующие последовательные добавления потребуют
             *    только передвижения в entries[].
             *
             * Соответственно, более важным/определяющим представляется
             * своевременный переход к вложеному дереву, но тут достаточно
             * сложный конфликт интересов:
             *  - При склонности к переходу к вложенным деревьям, суммарно
             *    в БД будет большее кол-во более рыхлых страниц. Это увеличит
             *    WAF, а также RAF при последовательных чтениях большой БД.
             *    Однако, при коротких ключах и большом кол-ве
             *    дубликатов/мультизначений, плотность ключей в листовых
             *    страницах основного дерева будет выше. Соответственно, будет
             *    пропорционально меньше branch-страниц. Поэтому будет выше
             *    вероятность оседания/не-вымывания страниц основного дерева из
             *    LRU-кэша, а также попадания в write-back кэш при записи.
             *  - Наоботот, при склонности к использованию под-страниц, будут
             *    наблюдаться обратные эффекты. Плюс некоторые накладные расходы
             *    на лишнее копирование данных под-страниц в сценариях
             *    нескольких обонвлений дубликатов одного куста в одной
             *    транзакции.
             *
             * Суммарно наиболее рациональным представляется такая тактика:
             *  - Вводим три порога subpage_limit, subpage_room_threshold
             *    и subpage_reserve_prereq, которые могут быть
             *    заданы/скорректированы пользователем в ‰ от leaf_nodemax;
             *  - Используем под-страницу пока её размер меньше subpage_limit
             *    и на основной/гнездовой странице не-менее
             *    subpage_room_threshold свободного места;
             *  - Резервируем место только для 1-3 коротких dupfix-элементов,
             *    расширяя размер под-страницы на размер кэш-линии ЦПУ, но
             *    только если на странице не менее subpage_reserve_prereq
             *    свободного места.
             *  - По-умолчанию устанавливаем:
             *     subpage_limit = leaf_nodemax (1000‰);
             *     subpage_room_threshold = 0;
             *     subpage_reserve_prereq = leaf_nodemax (1000‰).
             */
            if (is_dupfix_leaf(fp))
              growth += page_subleaf2_reserve(env, page_room(mc->pg[mc->top]) + old_data.iov_len, xdata.iov_len,
                                              data->iov_len);
            else {
              /* TODO: Если добавить возможность для пользователя задавать
               * min/max размеров ключей/данных, то здесь разумно реализовать
               * тактику резервирования подобную dupfixed. */
            }
            break;

          case MDBX_CURRENT | MDBX_NODUPDATA:
          case MDBX_CURRENT:
          continue_subpage:
            fp->txnid = mc->txn->front_txnid;
            fp->pgno = mp->pgno;
            mc->subcur->cursor.pg[0] = fp;
            flags |= N_DUP;
            goto dupsort_put;
          }
          xdata.iov_len = old_data.iov_len + growth;
          cASSERT(mc, (xdata.iov_len & 1) == 0);
        }

        fp_flags = fp->flags;
        if (xdata.iov_len > env->subpage_limit || node_size_len(node_ks(node), xdata.iov_len) > env->leaf_nodemax ||
            (env->subpage_room_threshold &&
             page_room(mc->pg[mc->top]) + node_size_len(node_ks(node), old_data.iov_len) <
                 env->subpage_room_threshold + node_size_len(node_ks(node), xdata.iov_len))) {
          /* Too big for a sub-page, convert to sub-DB */
        convert_to_subtree:
          fp_flags &= ~P_SUBP;
          nested_dupdb.dupfix_size = 0;
          nested_dupdb.flags = flags_db2sub(mc->tree->flags);
          if (mc->tree->flags & MDBX_DUPFIXED) {
            fp_flags |= P_DUPFIX;
            nested_dupdb.dupfix_size = fp->dupfix_ksize;
          }
          nested_dupdb.height = 1;
          nested_dupdb.branch_pages = 0;
          nested_dupdb.leaf_pages = 1;
          nested_dupdb.large_pages = 0;
          nested_dupdb.items = page_numkeys(fp);
          xdata.iov_len = sizeof(nested_dupdb);
          xdata.iov_base = &nested_dupdb;
          const pgr_t par = gc_alloc_single(mc);
          mp = par.page;
          if (unlikely(par.err != MDBX_SUCCESS))
            return par.err;
          mc->tree->leaf_pages += 1;
          cASSERT(mc, env->ps > old_data.iov_len);
          growth = env->ps - (unsigned)old_data.iov_len;
          cASSERT(mc, (growth & 1) == 0);
          flags |= N_DUP | N_TREE;
          nested_dupdb.root = mp->pgno;
          nested_dupdb.sequence = 0;
          nested_dupdb.mod_txnid = mc->txn->txnid;
          sub_root = mp;
        }
        if (mp != fp) {
          mp->flags = fp_flags;
          mp->txnid = mc->txn->front_txnid;
          mp->dupfix_ksize = fp->dupfix_ksize;
          mp->lower = fp->lower;
          cASSERT(mc, fp->upper + growth < UINT16_MAX);
          mp->upper = fp->upper + (indx_t)growth;
          if (unlikely(fp_flags & P_DUPFIX)) {
            memcpy(page_data(mp), page_data(fp), page_numkeys(fp) * fp->dupfix_ksize);
            cASSERT(mc, (((mp->dupfix_ksize & page_numkeys(mp)) ^ mp->upper) & 1) == 0);
          } else {
            cASSERT(mc, (mp->upper & 1) == 0);
            memcpy(ptr_disp(mp, mp->upper + PAGEHDRSZ), ptr_disp(fp, fp->upper + PAGEHDRSZ),
                   old_data.iov_len - fp->upper - PAGEHDRSZ);
            memcpy(mp->entries, fp->entries, page_numkeys(fp) * sizeof(mp->entries[0]));
            for (size_t i = 0; i < page_numkeys(fp); i++) {
              cASSERT(mc, mp->entries[i] + growth <= UINT16_MAX);
              mp->entries[i] += (indx_t)growth;
            }
          }
        }

        if (!insert_key)
          node_del(mc, 0);
        ref_data = &xdata;
        flags |= N_DUP;
        goto insert_node;
      }

      /* MDBX passes N_TREE in 'flags' to write a DB record */
      if (unlikely((node_flags(node) ^ flags) & N_TREE))
        return MDBX_INCOMPATIBLE;

    current:
      if (data->iov_len == old_data.iov_len) {
        cASSERT(mc, EVEN_CEIL(key->iov_len) == EVEN_CEIL(node_ks(node)));
        /* same size, just replace it. Note that we could
         * also reuse this node if the new data is smaller,
         * but instead we opt to shrink the node in that case. */
        if (flags & MDBX_RESERVE)
          data->iov_base = old_data.iov_base;
        else if (!(mc->flags & z_inner))
          memcpy(old_data.iov_base, data->iov_base, data->iov_len);
        else {
          cASSERT(mc, page_numkeys(mc->pg[mc->top]) == 1);
          cASSERT(mc, page_type_compat(mc->pg[mc->top]) == P_LEAF);
          cASSERT(mc, node_ds(node) == 0);
          cASSERT(mc, node_flags(node) == 0);
          cASSERT(mc, key->iov_len < UINT16_MAX);
          node_set_ks(node, key->iov_len);
          memcpy(node_key(node), key->iov_base, key->iov_len);
          cASSERT(mc, ptr_disp(node_key(node), node_ds(node)) < ptr_disp(mc->pg[mc->top], env->ps));
          goto fix_parent;
        }

        if (AUDIT_ENABLED()) {
          err = cursor_validate(mc);
          if (unlikely(err != MDBX_SUCCESS))
            return err;
        }
        return MDBX_SUCCESS;
      }
    }
    node_del(mc, 0);
  }

  ref_data = data;

insert_node:;
  const unsigned naf = flags & NODE_ADD_FLAGS;
  size_t nsize = is_dupfix_leaf(mc->pg[mc->top]) ? key->iov_len : leaf_size(env, key, ref_data);
  if (page_room(mc->pg[mc->top]) < nsize) {
    rc = page_split(mc, key, ref_data, P_INVALID, insert_key ? naf : naf | MDBX_SPLIT_REPLACE);
    if (rc == MDBX_SUCCESS && AUDIT_ENABLED())
      rc = insert_key ? cursor_validate(mc) : cursor_validate_updating(mc);
  } else {
    /* There is room already in this leaf page. */
    if (is_dupfix_leaf(mc->pg[mc->top])) {
      cASSERT(mc, !(naf & (N_BIG | N_TREE | N_DUP)) && ref_data->iov_len == 0);
      rc = node_add_dupfix(mc, mc->ki[mc->top], key);
    } else
      rc = node_add_leaf(mc, mc->ki[mc->top], key, ref_data, naf);
    if (likely(rc == 0)) {
      /* Adjust other cursors pointing to mp */
      page_t *const mp = mc->pg[mc->top];
      const size_t dbi = cursor_dbi(mc);
      for (MDBX_cursor *m2 = mc->txn->cursors[dbi]; m2; m2 = m2->next) {
        MDBX_cursor *m3 = (mc->flags & z_inner) ? &m2->subcur->cursor : m2;
        if (!is_related(mc, m3) || m3->pg[mc->top] != mp)
          continue;
        if (m3->ki[mc->top] >= mc->ki[mc->top])
          m3->ki[mc->top] += insert_key;
        if (inner_pointed(m3))
          cursor_inner_refresh(m3, mp, m3->ki[mc->top]);
      }
    }
  }

  if (likely(rc == MDBX_SUCCESS)) {
    /* Now store the actual data in the child DB. Note that we're
     * storing the user data in the keys field, so there are strict
     * size limits on dupdata. The actual data fields of the child
     * DB are all zero size. */
    if (flags & N_DUP) {
      MDBX_val empty;
    dupsort_put:
      empty.iov_len = 0;
      empty.iov_base = nullptr;
      node_t *node = page_node(mc->pg[mc->top], mc->ki[mc->top]);
#define SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE 1
      STATIC_ASSERT((MDBX_NODUPDATA >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE) == MDBX_NOOVERWRITE);
      unsigned inner_flags = MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE);
      if ((flags & MDBX_CURRENT) == 0) {
        inner_flags -= MDBX_CURRENT;
        rc = cursor_dupsort_setup(mc, node, mc->pg[mc->top]);
        if (unlikely(rc != MDBX_SUCCESS))
          goto dupsort_error;
      }
      subcur_t *const mx = mc->subcur;
      if (sub_root) {
        cASSERT(mc, mx->nested_tree.height == 1 && mx->nested_tree.root == sub_root->pgno);
        mx->cursor.flags = z_inner;
        mx->cursor.top = 0;
        mx->cursor.pg[0] = sub_root;
        mx->cursor.ki[0] = 0;
      }
      if (old_singledup.iov_base) {
        /* converted, write the original data first */
        if (is_dupfix_leaf(mx->cursor.pg[0]))
          rc = node_add_dupfix(&mx->cursor, 0, &old_singledup);
        else
          rc = node_add_leaf(&mx->cursor, 0, &old_singledup, &empty, 0);
        if (unlikely(rc != MDBX_SUCCESS))
          goto dupsort_error;
        mx->cursor.tree->items = 1;
      }
      if (!(node_flags(node) & N_TREE) || sub_root) {
        page_t *const mp = mc->pg[mc->top];
        const intptr_t nkeys = page_numkeys(mp);
        const size_t dbi = cursor_dbi(mc);

        for (MDBX_cursor *m2 = mc->txn->cursors[dbi]; m2; m2 = m2->next) {
          if (!is_related(mc, m2) || m2->pg[mc->top] != mp)
            continue;
          if (/* пропускаем незаполненные курсоры, иначе получится что у такого
                 курсора будет инициализирован вложенный,
                 что антилогично и бесполезно. */
              is_filled(m2) && m2->ki[mc->top] == mc->ki[mc->top]) {
            cASSERT(m2, m2->subcur->cursor.clc == mx->cursor.clc);
            m2->subcur->nested_tree = mx->nested_tree;
            m2->subcur->cursor.pg[0] = mx->cursor.pg[0];
            if (old_singledup.iov_base) {
              m2->subcur->cursor.top_and_flags = z_inner;
              m2->subcur->cursor.ki[0] = 0;
            }
            DEBUG("Sub-dbi -%zu root page %" PRIaPGNO, cursor_dbi(&m2->subcur->cursor), m2->subcur->nested_tree.root);
          } else if (!insert_key && m2->ki[mc->top] < nkeys)
            cursor_inner_refresh(m2, mp, m2->ki[mc->top]);
        }
      }
      cASSERT(mc, mc->subcur->nested_tree.items < PTRDIFF_MAX);
      const size_t probe = (size_t)mc->subcur->nested_tree.items;
#define SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND 1
      STATIC_ASSERT((MDBX_APPENDDUP >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND) == MDBX_APPEND);
      inner_flags |= (flags & MDBX_APPENDDUP) >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND;
      rc = cursor_put(&mc->subcur->cursor, data, &empty, inner_flags);
      if (flags & N_TREE) {
        void *db = node_data(node);
        mc->subcur->nested_tree.mod_txnid = mc->txn->txnid;
        memcpy(db, &mc->subcur->nested_tree, sizeof(tree_t));
      }
      insert_data = (probe != (size_t)mc->subcur->nested_tree.items);
    }
    /* Increment count unless we just replaced an existing item. */
    if (insert_data)
      mc->tree->items++;
    if (insert_key) {
      if (unlikely(rc != MDBX_SUCCESS))
        goto dupsort_error;
      /* If we succeeded and the key didn't exist before,
       * make sure the cursor is marked valid. */
      be_filled(mc);
    }
    if (likely(rc == MDBX_SUCCESS)) {
      cASSERT(mc, is_filled(mc));
      if (unlikely(batch_dupfix_done)) {
      batch_dupfix_continue:
        /* let caller know how many succeeded, if any */
        if ((*batch_dupfix_done += 1) < batch_dupfix_given) {
          data[0].iov_base = ptr_disp(data[0].iov_base, data[0].iov_len);
          insert_key = insert_data = false;
          old_singledup.iov_base = nullptr;
          sub_root = nullptr;
          goto more;
        }
      }
      if (AUDIT_ENABLED())
        rc = cursor_validate(mc);
    }
    return rc;

  dupsort_error:
    if (unlikely(rc == MDBX_KEYEXIST)) {
      /* should not happen, we deleted that item */
      ERROR("Unexpected %i error while put to nested dupsort's hive", rc);
      rc = MDBX_PROBLEM;
    }
  }
  mc->txn->flags |= MDBX_TXN_ERROR;
  return rc;
}

int cursor_check_multiple(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags) {
  (void)key;
  if (unlikely(flags & MDBX_RESERVE))
    return MDBX_EINVAL;
  if (unlikely(!(mc->tree->flags & MDBX_DUPFIXED)))
    return MDBX_INCOMPATIBLE;
  const size_t number = data[1].iov_len;
  if (unlikely(number > MAX_MAPSIZE / 2 / (BRANCH_NODE_MAX(MDBX_MAX_PAGESIZE) - NODESIZE))) {
    /* checking for multiplication overflow */
    if (unlikely(number > MAX_MAPSIZE / 2 / data->iov_len))
      return MDBX_TOO_LARGE;
  }
  return MDBX_SUCCESS;
}

__hot int cursor_put_checklen(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags) {
  cASSERT(mc, (mc->flags & z_inner) == 0);
  if (unlikely(key->iov_len > mc->clc->k.lmax || key->iov_len < mc->clc->k.lmin)) {
    cASSERT(mc, !"Invalid key-size");
    return MDBX_BAD_VALSIZE;
  }
  if (unlikely(data->iov_len > mc->clc->v.lmax || data->iov_len < mc->clc->v.lmin)) {
    cASSERT(mc, !"Invalid data-size");
    return MDBX_BAD_VALSIZE;
  }

  uint64_t aligned_keybytes, aligned_databytes;
  MDBX_val aligned_key, aligned_data;
  if (mc->tree->flags & MDBX_INTEGERKEY) {
    if (key->iov_len == 8) {
      if (unlikely(7 & (uintptr_t)key->iov_base)) {
        /* copy instead of return error to avoid break compatibility */
        aligned_key.iov_base = bcopy_8(&aligned_keybytes, key->iov_base);
        aligned_key.iov_len = key->iov_len;
        key = &aligned_key;
      }
    } else if (key->iov_len == 4) {
      if (unlikely(3 & (uintptr_t)key->iov_base)) {
        /* copy instead of return error to avoid break compatibility */
        aligned_key.iov_base = bcopy_4(&aligned_keybytes, key->iov_base);
        aligned_key.iov_len = key->iov_len;
        key = &aligned_key;
      }
    } else {
      cASSERT(mc, !"key-size is invalid for MDBX_INTEGERKEY");
      return MDBX_BAD_VALSIZE;
    }
  }
  if (mc->tree->flags & MDBX_INTEGERDUP) {
    if (data->iov_len == 8) {
      if (unlikely(7 & (uintptr_t)data->iov_base)) {
        if (unlikely(flags & MDBX_MULTIPLE)) {
          /* LY: использование alignof(uint64_t) тут не подходил из-за ошибок
           * MSVC и некоторых других компиляторов, когда для элементов
           * массивов/векторов обеспечивает выравнивание только на 4-х байтовых
           * границу и одновременно alignof(uint64_t) == 8. */
          if (MDBX_WORDBITS > 32 || (3 & (uintptr_t)data->iov_base) != 0)
            return MDBX_BAD_VALSIZE;
        } else {
          /* copy instead of return error to avoid break compatibility */
          aligned_data.iov_base = bcopy_8(&aligned_databytes, data->iov_base);
          aligned_data.iov_len = data->iov_len;
          data = &aligned_data;
        }
      }
    } else if (data->iov_len == 4) {
      if (unlikely(3 & (uintptr_t)data->iov_base)) {
        if (unlikely(flags & MDBX_MULTIPLE))
          return MDBX_BAD_VALSIZE;
        /* copy instead of return error to avoid break compatibility */
        aligned_data.iov_base = bcopy_4(&aligned_databytes, data->iov_base);
        aligned_data.iov_len = data->iov_len;
        data = &aligned_data;
      }
    } else {
      cASSERT(mc, !"data-size is invalid for MDBX_INTEGERKEY");
      return MDBX_BAD_VALSIZE;
    }
  }
  return cursor_put(mc, key, data, flags);
}

__hot int cursor_del(MDBX_cursor *mc, unsigned flags) {
  if (unlikely(!is_filled(mc)))
    return MDBX_ENODATA;

  int rc = cursor_touch(mc, nullptr, nullptr);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

  page_t *mp = mc->pg[mc->top];
  cASSERT(mc, is_modifable(mc->txn, mp));
  if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) {
    ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->pgno, mp->flags);
    return MDBX_CORRUPTED;
  }
  if (is_dupfix_leaf(mp))
    goto del_key;

  node_t *node = page_node(mp, mc->ki[mc->top]);
  if (node_flags(node) & N_DUP) {
    if (flags & (MDBX_ALLDUPS | /* for compatibility */ MDBX_NODUPDATA)) {
      /* will subtract the final entry later */
      mc->tree->items -= mc->subcur->nested_tree.items - 1;
    } else {
      if (!(node_flags(node) & N_TREE)) {
        page_t *sp = node_data(node);
        cASSERT(mc, is_subpage(sp));
        sp->txnid = mp->txnid;
        mc->subcur->cursor.pg[0] = sp;
      }
      rc = cursor_del(&mc->subcur->cursor, 0);
      if (unlikely(rc != MDBX_SUCCESS))
        return rc;
      /* If sub-DB still has entries, we're done */
      if (mc->subcur->nested_tree.items) {
        if (node_flags(node) & N_TREE) {
          /* update table info */
          mc->subcur->nested_tree.mod_txnid = mc->txn->txnid;
          memcpy(node_data(node), &mc->subcur->nested_tree, sizeof(tree_t));
        } else {
          /* shrink sub-page */
          node = node_shrink(mp, mc->ki[mc->top], node);
          mc->subcur->cursor.pg[0] = node_data(node);
          /* fix other sub-DB cursors pointed at sub-pages on this page */
          for (MDBX_cursor *m2 = mc->txn->cursors[cursor_dbi(mc)]; m2; m2 = m2->next) {
            if (!is_related(mc, m2) || m2->pg[mc->top] != mp)
              continue;
            const node_t *inner = node;
            if (unlikely(m2->ki[mc->top] >= page_numkeys(mp))) {
              m2->flags = z_poor_mark;
              m2->subcur->nested_tree.root = 0;
              m2->subcur->cursor.top_and_flags = z_inner | z_poor_mark;
              continue;
            }
            if (m2->ki[mc->top] != mc->ki[mc->top]) {
              inner = page_node(mp, m2->ki[mc->top]);
              if (node_flags(inner) & N_TREE)
                continue;
            }
            m2->subcur->cursor.pg[0] = node_data(inner);
          }
        }
        mc->tree->items -= 1;
        cASSERT(mc, mc->tree->items > 0 && mc->tree->height > 0 && mc->tree->root != P_INVALID);
        return rc;
      }
      /* otherwise fall thru and delete the sub-DB */
    }

    if ((node_flags(node) & N_TREE) && mc->subcur->cursor.tree->height) {
      /* add all the child DB's pages to the free list */
      rc = tree_drop(&mc->subcur->cursor, false);
      if (unlikely(rc != MDBX_SUCCESS))
        goto fail;
    }
    inner_gone(mc);
  } else {
    cASSERT(mc, !inner_pointed(mc));
    /* MDBX passes N_TREE in 'flags' to delete a DB record */
    if (unlikely((node_flags(node) ^ flags) & N_TREE))
      return MDBX_INCOMPATIBLE;
  }

  /* add large/overflow pages to free list */
  if (node_flags(node) & N_BIG) {
    pgr_t lp = page_get_large(mc, node_largedata_pgno(node), mp->txnid);
    if (unlikely((rc = lp.err) || (rc = page_retire(mc, lp.page))))
      goto fail;
  }

del_key:
  mc->tree->items -= 1;
  const MDBX_dbi dbi = cursor_dbi(mc);
  indx_t ki = mc->ki[mc->top];
  mp = mc->pg[mc->top];
  cASSERT(mc, is_leaf(mp));
  node_del(mc, mc->tree->dupfix_size);

  /* Adjust other cursors pointing to mp */
  for (MDBX_cursor *m2 = mc->txn->cursors[dbi]; m2; m2 = m2->next) {
    MDBX_cursor *m3 = (mc->flags & z_inner) ? &m2->subcur->cursor : m2;
    if (!is_related(mc, m3) || m3->pg[mc->top] != mp)
      continue;
    if (m3->ki[mc->top] == ki) {
      m3->flags |= z_after_delete;
      inner_gone(m3);
    } else {
      m3->ki[mc->top] -= m3->ki[mc->top] > ki;
      if (inner_pointed(m3))
        cursor_inner_refresh(m3, m3->pg[mc->top], m3->ki[mc->top]);
    }
  }

  rc = tree_rebalance(mc);
  if (unlikely(rc != MDBX_SUCCESS))
    goto fail;

  mc->flags |= z_after_delete;
  inner_gone(mc);
  if (unlikely(mc->top < 0)) {
    /* DB is totally empty now, just bail out.
     * Other cursors adjustments were already done
     * by rebalance and aren't needed here. */
    cASSERT(mc, mc->tree->items == 0 && (mc->tree->root == P_INVALID || (is_inner(mc) && !mc->tree->root)) &&
                    mc->flags < 0);
    return MDBX_SUCCESS;
  }

  ki = mc->ki[mc->top];
  mp = mc->pg[mc->top];
  cASSERT(mc, is_leaf(mc->pg[mc->top]));
  size_t nkeys = page_numkeys(mp);
  cASSERT(mc, (mc->tree->items > 0 && nkeys > 0) || ((mc->flags & z_inner) && mc->tree->items == 0 && nkeys == 0));

  /* Adjust this and other cursors pointing to mp */
  const intptr_t top = /* может быть сброшен в -1 */ mc->top;
  for (MDBX_cursor *m2 = mc->txn->cursors[dbi]; m2; m2 = m2->next) {
    MDBX_cursor *m3 = (mc->flags & z_inner) ? &m2->subcur->cursor : m2;
    if (top > m3->top || m3->pg[top] != mp)
      continue;
    /* if m3 points past last node in page, find next sibling */
    if (m3->ki[top] >= nkeys) {
      rc = cursor_sibling_right(m3);
      if (rc == MDBX_NOTFOUND) {
        rc = MDBX_SUCCESS;
        continue;
      }
      if (unlikely(rc != MDBX_SUCCESS))
        goto fail;
    }
    if (/* пропускаем незаполненные курсоры, иначе получится что у такого
           курсора будет инициализирован вложенный,
           что антилогично и бесполезно. */
        is_filled(m3) && m3->subcur &&
        (m3->ki[top] >= ki ||
         /* уже переместились вправо */ m3->pg[top] != mp)) {
      node = page_node(m3->pg[m3->top], m3->ki[m3->top]);
      /* Если это dupsort-узел, то должен быть валидный вложенный курсор. */
      if (node_flags(node) & N_DUP) {
        /* Тут три варианта событий:
         * 1) Вложенный курсор уже инициализирован, у узла есть флаг N_TREE,
         *    соответственно дубликаты вынесены в отдельное дерево с корнем
         *    в отдельной странице = ничего корректировать не требуется.
         * 2) Вложенный курсор уже инициализирован, у узла нет флага N_TREE,
         *    соответственно дубликаты размещены на вложенной sub-странице.
         * 3) Курсор стоял на удалённом элементе, который имел одно значение,
         *    а после удаления переместился на следующий элемент с дубликатами.
         *    В этом случае вложенный курсор не инициализирован и тепеь его
         *    нужно установить на первый дубликат. */
        if (is_pointed(&m3->subcur->cursor)) {
          if ((node_flags(node) & N_TREE) == 0) {
            cASSERT(m3, m3->subcur->cursor.top == 0 && m3->subcur->nested_tree.height == 1);
            m3->subcur->cursor.pg[0] = node_data(node);
          }
        } else {
          rc = cursor_dupsort_setup(m3, node, m3->pg[m3->top]);
          if (unlikely(rc != MDBX_SUCCESS))
            goto fail;
          if (node_flags(node) & N_TREE) {
            rc = inner_first(&m3->subcur->cursor, nullptr);
            if (unlikely(rc != MDBX_SUCCESS))
              goto fail;
          }
        }
      } else
        inner_gone(m3);
    }
  }

  cASSERT(mc, rc == MDBX_SUCCESS);
  if (AUDIT_ENABLED())
    rc = cursor_validate(mc);
  return rc;

fail:
  mc->txn->flags |= MDBX_TXN_ERROR;
  return rc;
}

/*----------------------------------------------------------------------------*/

__hot csr_t cursor_seek(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) {
  DKBUF_DEBUG;

  csr_t ret;
  ret.exact = false;
  if (unlikely(key->iov_len < mc->clc->k.lmin ||
               (key->iov_len > mc->clc->k.lmax &&
                (mc->clc->k.lmin == mc->clc->k.lmax || MDBX_DEBUG || MDBX_FORCE_ASSERTIONS)))) {
    cASSERT(mc, !"Invalid key-size");
    ret.err = MDBX_BAD_VALSIZE;
    return ret;
  }

  MDBX_val aligned_key = *key;
  uint64_t aligned_key_buf;
  if (mc->tree->flags & MDBX_INTEGERKEY) {
    if (aligned_key.iov_len == 8) {
      if (unlikely(7 & (uintptr_t)aligned_key.iov_base))
        /* copy instead of return error to avoid break compatibility */
        aligned_key.iov_base = bcopy_8(&aligned_key_buf, aligned_key.iov_base);
    } else if (aligned_key.iov_len == 4) {
      if (unlikely(3 & (uintptr_t)aligned_key.iov_base))
        /* copy instead of return error to avoid break compatibility */
        aligned_key.iov_base = bcopy_4(&aligned_key_buf, aligned_key.iov_base);
    } else {
      cASSERT(mc, !"key-size is invalid for MDBX_INTEGERKEY");
      ret.err = MDBX_BAD_VALSIZE;
      return ret;
    }
  }

  page_t *mp;
  node_t *node = nullptr;
  /* See if we're already on the right page */
  if (is_pointed(mc)) {
    mp = mc->pg[mc->top];
    cASSERT(mc, is_leaf(mp));
    const size_t nkeys = page_numkeys(mp);
    if (unlikely(nkeys == 0)) {
      /* при создании первой листовой страницы */
      cASSERT(mc, mc->top == 0 && mc->tree->height == 1 && mc->tree->branch_pages == 0 && mc->tree->leaf_pages == 1 &&
                      mc->ki[0] == 0);
      /* Логически верно, но нет смысла, ибо это мимолетная/временная
       * ситуация до добавления элемента выше по стеку вызовов:
         mc->flags |= z_eof_soft | z_hollow; */
      ret.err = MDBX_NOTFOUND;
      return ret;
    }

    MDBX_val nodekey;
    if (is_dupfix_leaf(mp))
      nodekey = page_dupfix_key(mp, 0, mc->tree->dupfix_size);
    else {
      node = page_node(mp, 0);
      nodekey = get_key(node);
      inner_gone(mc);
    }
    int cmp = mc->clc->k.cmp(&aligned_key, &nodekey);
    if (unlikely(cmp == 0)) {
      /* Probably happens rarely, but first node on the page
       * was the one we wanted. */
      mc->ki[mc->top] = 0;
      ret.exact = true;
      goto got_node;
    }

    if (cmp > 0) {
      /* Искомый ключ больше первого на этой странице,
       * целевая позиция на этой странице либо правее (ближе к концу). */
      if (likely(nkeys > 1)) {
        if (is_dupfix_leaf(mp)) {
          nodekey.iov_base = page_dupfix_ptr(mp, nkeys - 1, nodekey.iov_len);
        } else {
          node = page_node(mp, nkeys - 1);
          nodekey = get_key(node);
        }
        cmp = mc->clc->k.cmp(&aligned_key, &nodekey);
        if (cmp == 0) {
          /* last node was the one we wanted */
          mc->ki[mc->top] = (indx_t)(nkeys - 1);
          ret.exact = true;
          goto got_node;
        }
        if (cmp < 0) {
          /* Искомый ключ между первым и последним на этой страницы,
           * поэтому пропускаем поиск по дереву и продолжаем только на текущей
           * странице. */
          /* Сравниваем с текущей позицией, ибо частным сценарием является такое
           * совпадение, но не делаем проверку если текущая позиция является
           * первой/последний и соответственно такое сравнение было выше. */
          if (mc->ki[mc->top] > 0 && mc->ki[mc->top] < nkeys - 1) {
            if (is_dupfix_leaf(mp)) {
              nodekey.iov_base = page_dupfix_ptr(mp, mc->ki[mc->top], nodekey.iov_len);
            } else {
              node = page_node(mp, mc->ki[mc->top]);
              nodekey = get_key(node);
            }
            cmp = mc->clc->k.cmp(&aligned_key, &nodekey);
            if (cmp == 0) {
              /* current node was the one we wanted */
              ret.exact = true;
              goto got_node;
            }
          }
          goto search_node;
        }
      }

      /* Если в стеке курсора есть страницы справа, то продолжим искать там. */
      cASSERT(mc, mc->tree->height > mc->top);
      for (intptr_t i = 0; i < mc->top; i++)
        if ((size_t)mc->ki[i] + 1 < page_numkeys(mc->pg[i]))
          goto continue_other_pages;

      /* Ключ больше последнего. */
      mc->ki[mc->top] = (indx_t)nkeys;
      if (op < MDBX_SET_RANGE) {
      target_not_found:
        cASSERT(mc, op == MDBX_SET || op == MDBX_SET_KEY || op == MDBX_GET_BOTH || op == MDBX_GET_BOTH_RANGE);
        /* Операция предполагает поиск конкретного ключа, который не найден.
         * Поэтому переводим курсор в неустановленное состояние, но без сброса
         * top, что позволяет работать fastpath при последующем поиске по дереву
         * страниц. */
        mc->flags = z_hollow | (mc->flags & z_clear_mask);
        inner_gone(mc);
        ret.err = MDBX_NOTFOUND;
        return ret;
      }
      cASSERT(mc, op == MDBX_SET_RANGE);
      mc->flags = z_eof_soft | z_eof_hard | (mc->flags & z_clear_mask);
      ret.err = MDBX_NOTFOUND;
      return ret;
    }

    if (mc->top == 0) {
      /* There are no other pages */
      mc->ki[mc->top] = 0;
      if (op >= MDBX_SET_RANGE)
        goto got_node;
      else
        goto target_not_found;
    }
  }
  cASSERT(mc, !inner_pointed(mc));

continue_other_pages:
  ret.err = tree_search(mc, &aligned_key, 0);
  if (unlikely(ret.err != MDBX_SUCCESS))
    return ret;

  cASSERT(mc, is_pointed(mc) && !inner_pointed(mc));
  mp = mc->pg[mc->top];
  MDBX_ANALYSIS_ASSUME(mp != nullptr);
  cASSERT(mc, is_leaf(mp));

search_node:
  cASSERT(mc, is_pointed(mc) && !inner_pointed(mc));
  struct node_search_result nsr = node_search(mc, &aligned_key);
  node = nsr.node;
  ret.exact = nsr.exact;
  if (!ret.exact) {
    if (op < MDBX_SET_RANGE)
      goto target_not_found;

    if (node == nullptr) {
      DEBUG("%s", "===> inexact leaf not found, goto sibling");
      ret.err = cursor_sibling_right(mc);
      if (unlikely(ret.err != MDBX_SUCCESS))
        return ret; /* no entries matched */
      mp = mc->pg[mc->top];
      cASSERT(mc, is_leaf(mp));
      if (!is_dupfix_leaf(mp))
        node = page_node(mp, 0);
    }
  }

got_node:
  cASSERT(mc, is_pointed(mc) && !inner_pointed(mc));
  cASSERT(mc, mc->ki[mc->top] < page_numkeys(mc->pg[mc->top]));
  if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(mc, mp))) {
    ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->pgno, mp->flags);
    ret.err = MDBX_CORRUPTED;
    return ret;
  }

  if (is_dupfix_leaf(mp)) {
    if (op >= MDBX_SET_KEY)
      *key = page_dupfix_key(mp, mc->ki[mc->top], mc->tree->dupfix_size);
    be_filled(mc);
    ret.err = MDBX_SUCCESS;
    return ret;
  }

  if (node_flags(node) & N_DUP) {
    ret.err = cursor_dupsort_setup(mc, node, mp);
    if (unlikely(ret.err != MDBX_SUCCESS))
      return ret;
    if (op >= MDBX_SET) {
      MDBX_ANALYSIS_ASSUME(mc->subcur != nullptr);
      if (node_flags(node) & N_TREE) {
        ret.err = inner_first(&mc->subcur->cursor, data);
        if (unlikely(ret.err != MDBX_SUCCESS))
          return ret;
      } else if (data) {
        const page_t *inner_mp = mc->subcur->cursor.pg[0];
        cASSERT(mc, is_subpage(inner_mp) && is_leaf(inner_mp));
        const size_t inner_ki = mc->subcur->cursor.ki[0];
        if (is_dupfix_leaf(inner_mp))
          *data = page_dupfix_key(inner_mp, inner_ki, mc->tree->dupfix_size);
        else
          *data = get_key(page_node(inner_mp, inner_ki));
      }
    } else {
      MDBX_ANALYSIS_ASSUME(mc->subcur != nullptr);
      ret = cursor_seek(&mc->subcur->cursor, data, nullptr, MDBX_SET_RANGE);
      if (unlikely(ret.err != MDBX_SUCCESS)) {
        if (ret.err == MDBX_NOTFOUND && op < MDBX_SET_RANGE)
          goto target_not_found;
        return ret;
      }
      if (op == MDBX_GET_BOTH && !ret.exact)
        goto target_not_found;
    }
  } else if (likely(data)) {
    if (op <= MDBX_GET_BOTH_RANGE) {
      if (unlikely(data->iov_len < mc->clc->v.lmin || data->iov_len > mc->clc->v.lmax)) {
        cASSERT(mc, !"Invalid data-size");
        ret.err = MDBX_BAD_VALSIZE;
        return ret;
      }
      MDBX_val aligned_data = *data;
      uint64_t aligned_databytes;
      if (mc->tree->flags & MDBX_INTEGERDUP) {
        if (aligned_data.iov_len == 8) {
          if (unlikely(7 & (uintptr_t)aligned_data.iov_base))
            /* copy instead of return error to avoid break compatibility */
            aligned_data.iov_base = bcopy_8(&aligned_databytes, aligned_data.iov_base);
        } else if (aligned_data.iov_len == 4) {
          if (unlikely(3 & (uintptr_t)aligned_data.iov_base))
            /* copy instead of return error to avoid break compatibility */
            aligned_data.iov_base = bcopy_4(&aligned_databytes, aligned_data.iov_base);
        } else {
          cASSERT(mc, !"data-size is invalid for MDBX_INTEGERDUP");
          ret.err = MDBX_BAD_VALSIZE;
          return ret;
        }
      }
      MDBX_val actual_data;
      ret.err = node_read(mc, node, &actual_data, mc->pg[mc->top]);
      if (unlikely(ret.err != MDBX_SUCCESS))
        return ret;
      const int cmp = mc->clc->v.cmp(&aligned_data, &actual_data);
      if (cmp) {
        if (op != MDBX_GET_BOTH_RANGE) {
          cASSERT(mc, op == MDBX_GET_BOTH);
          goto target_not_found;
        }
        if (cmp > 0) {
          ret.err = MDBX_NOTFOUND;
          return ret;
        }
      }
      *data = actual_data;
    } else {
      ret.err = node_read(mc, node, data, mc->pg[mc->top]);
      if (unlikely(ret.err != MDBX_SUCCESS))
        return ret;
    }
  }

  /* The key already matches in all other cases */
  if (op >= MDBX_SET_KEY)
    get_key_optional(node, key);

  DEBUG("==> cursor placed on key [%s], data [%s]", DKEY_DEBUG(key), DVAL_DEBUG(data));
  ret.err = MDBX_SUCCESS;
  be_filled(mc);
  return ret;
}

__hot int cursor_ops(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, const MDBX_cursor_op op) {
  if (op != MDBX_GET_CURRENT)
    DEBUG(">> cursor %p(0x%x), ops %u, key %p, value %p", __Wpedantic_format_voidptr(mc), mc->flags, op,
          __Wpedantic_format_voidptr(key), __Wpedantic_format_voidptr(data));
  int rc;

  switch (op) {
  case MDBX_GET_CURRENT:
    cASSERT(mc, (mc->flags & z_inner) == 0);
    if (unlikely(!is_filled(mc))) {
      if (is_hollow(mc))
        return MDBX_ENODATA;
      if (mc->ki[mc->top] >= page_numkeys(mc->pg[mc->top]))
        return MDBX_NOTFOUND;
    }
    if (mc->flags & z_after_delete)
      return outer_next(mc, key, data, MDBX_NEXT_NODUP);
    else if (inner_pointed(mc) && (mc->subcur->cursor.flags & z_after_delete))
      return outer_next(mc, key, data, MDBX_NEXT_DUP);
    else {
      const page_t *mp = mc->pg[mc->top];
      const node_t *node = page_node(mp, mc->ki[mc->top]);
      get_key_optional(node, key);
      if (!data)
        return MDBX_SUCCESS;
      if (node_flags(node) & N_DUP) {
        if (!MDBX_DISABLE_VALIDATION && unlikely(!mc->subcur))
          return unexpected_dupsort(mc);
        mc = &mc->subcur->cursor;
        if (unlikely(!is_filled(mc))) {
          if (is_hollow(mc))
            return MDBX_ENODATA;
          if (mc->ki[mc->top] >= page_numkeys(mc->pg[mc->top]))
            return MDBX_NOTFOUND;
        }
        mp = mc->pg[mc->top];
        if (is_dupfix_leaf(mp))
          *data = page_dupfix_key(mp, mc->ki[mc->top], mc->tree->dupfix_size);
        else
          *data = get_key(page_node(mp, mc->ki[mc->top]));
        return MDBX_SUCCESS;
      } else {
        cASSERT(mc, !inner_pointed(mc));
        return node_read(mc, node, data, mc->pg[mc->top]);
      }
    }

  case MDBX_GET_BOTH:
  case MDBX_GET_BOTH_RANGE:
    if (unlikely(data == nullptr))
      return MDBX_EINVAL;
    if (unlikely(mc->subcur == nullptr))
      return MDBX_INCOMPATIBLE;
    /* fall through */
    __fallthrough;
  case MDBX_SET:
  case MDBX_SET_KEY:
  case MDBX_SET_RANGE:
    if (unlikely(key == nullptr))
      return MDBX_EINVAL;
    rc = cursor_seek(mc, key, data, op).err;
    if (rc == MDBX_SUCCESS)
      cASSERT(mc, is_filled(mc));
    else if (rc == MDBX_NOTFOUND && mc->tree->items) {
      cASSERT(mc, is_pointed(mc));
      cASSERT(mc, op == MDBX_SET_RANGE || op == MDBX_GET_BOTH_RANGE || is_hollow(mc));
      cASSERT(mc, op == MDBX_GET_BOTH_RANGE || inner_hollow(mc));
    } else
      cASSERT(mc, is_poor(mc) && !is_filled(mc));
    return rc;

  case MDBX_SEEK_AND_GET_MULTIPLE:
    if (unlikely(!key))
      return MDBX_EINVAL;
    rc = cursor_seek(mc, key, data, MDBX_SET).err;
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;
    __fallthrough /* fall through */;
  case MDBX_GET_MULTIPLE:
    if (unlikely(!data))
      return MDBX_EINVAL;
    if (unlikely((mc->tree->flags & MDBX_DUPFIXED) == 0))
      return MDBX_INCOMPATIBLE;
    if (unlikely(!is_filled(mc)))
      return MDBX_ENODATA;
    if (key) {
      const page_t *mp = mc->pg[mc->top];
      const node_t *node = page_node(mp, mc->ki[mc->top]);
      *key = get_key(node);
    }
    cASSERT(mc, is_filled(mc));
    if (unlikely(!inner_filled(mc))) {
      if (inner_pointed(mc))
        return MDBX_ENODATA;
      const page_t *mp = mc->pg[mc->top];
      const node_t *node = page_node(mp, mc->ki[mc->top]);
      return node_read(mc, node, data, mp);
    }
    goto fetch_multiple;

  case MDBX_NEXT_MULTIPLE:
    if (unlikely(!data))
      return MDBX_EINVAL;
    if (unlikely(mc->subcur == nullptr))
      return MDBX_INCOMPATIBLE;
    rc = outer_next(mc, key, data, MDBX_NEXT_DUP);
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;
    else {
    fetch_multiple:
      cASSERT(mc, is_filled(mc) && inner_filled(mc));
      MDBX_cursor *mx = &mc->subcur->cursor;
      data->iov_len = page_numkeys(mx->pg[mx->top]) * mx->tree->dupfix_size;
      data->iov_base = page_data(mx->pg[mx->top]);
      mx->ki[mx->top] = (indx_t)page_numkeys(mx->pg[mx->top]) - 1;
      return MDBX_SUCCESS;
    }

  case MDBX_PREV_MULTIPLE:
    if (unlikely(!data))
      return MDBX_EINVAL;
    if (unlikely(mc->subcur == nullptr))
      return MDBX_INCOMPATIBLE;
    if (unlikely(!is_filled(mc) || !inner_filled(mc)))
      return MDBX_ENODATA;
    rc = cursor_sibling_left(&mc->subcur->cursor);
    if (likely(rc == MDBX_SUCCESS))
      goto fetch_multiple;
    return rc;

  case MDBX_NEXT_DUP:
  case MDBX_NEXT:
  case MDBX_NEXT_NODUP:
    rc = outer_next(mc, key, data, op);
    mc->flags &= ~z_eof_hard;
    ((cursor_couple_t *)mc)->inner.cursor.flags &= ~z_eof_hard;
    return rc;

  case MDBX_PREV_DUP:
  case MDBX_PREV:
  case MDBX_PREV_NODUP:
    return outer_prev(mc, key, data, op);

  case MDBX_FIRST:
    return outer_first(mc, key, data);
  case MDBX_LAST:
    return outer_last(mc, key, data);

  case MDBX_LAST_DUP:
  case MDBX_FIRST_DUP:
    if (unlikely(data == nullptr))
      return MDBX_EINVAL;
    if (unlikely(!is_filled(mc)))
      return MDBX_ENODATA;
    else {
      node_t *node = page_node(mc->pg[mc->top], mc->ki[mc->top]);
      get_key_optional(node, key);
      if ((node_flags(node) & N_DUP) == 0)
        return node_read(mc, node, data, mc->pg[mc->top]);
      else if (MDBX_DISABLE_VALIDATION || likely(mc->subcur))
        return ((op == MDBX_FIRST_DUP) ? inner_first : inner_last)(&mc->subcur->cursor, data);
      else
        return unexpected_dupsort(mc);
    }
    break;

  case MDBX_SET_UPPERBOUND:
  case MDBX_SET_LOWERBOUND:
    if (unlikely(key == nullptr || data == nullptr))
      return MDBX_EINVAL;
    else {
      MDBX_val save_data = *data;
      csr_t csr = cursor_seek(mc, key, data, MDBX_SET_RANGE);
      rc = csr.err;
      if (rc == MDBX_SUCCESS && csr.exact && mc->subcur) {
        csr.exact = false;
        if (!save_data.iov_base) {
          /* Avoiding search nested dupfix hive if no data provided.
           * This is changes the semantic of MDBX_SET_LOWERBOUND but avoid
           * returning MDBX_BAD_VALSIZE. */
        } else if (is_pointed(&mc->subcur->cursor)) {
          *data = save_data;
          csr = cursor_seek(&mc->subcur->cursor, data, nullptr, MDBX_SET_RANGE);
          rc = csr.err;
          if (rc == MDBX_NOTFOUND) {
            cASSERT(mc, !csr.exact);
            rc = outer_next(mc, key, data, MDBX_NEXT_NODUP);
          }
        } else {
          int cmp = mc->clc->v.cmp(&save_data, data);
          csr.exact = (cmp == 0);
          if (cmp > 0)
            rc = outer_next(mc, key, data, MDBX_NEXT_NODUP);
        }
      }
      if (rc == MDBX_SUCCESS && !csr.exact)
        rc = MDBX_RESULT_TRUE;
      if (unlikely(op == MDBX_SET_UPPERBOUND)) {
        /* minor fixups for MDBX_SET_UPPERBOUND */
        if (rc == MDBX_RESULT_TRUE)
          /* already at great-than by MDBX_SET_LOWERBOUND */
          rc = MDBX_SUCCESS;
        else if (rc == MDBX_SUCCESS)
          /* exactly match, going next */
          rc = outer_next(mc, key, data, MDBX_NEXT);
      }
    }
    return rc;

  /* Doubtless API to positioning of the cursor at a specified key. */
  case MDBX_TO_KEY_LESSER_THAN:
  case MDBX_TO_KEY_LESSER_OR_EQUAL:
  case MDBX_TO_KEY_EQUAL:
  case MDBX_TO_KEY_GREATER_OR_EQUAL:
  case MDBX_TO_KEY_GREATER_THAN:
    if (unlikely(key == nullptr))
      return MDBX_EINVAL;
    else {
      csr_t csr = cursor_seek(mc, key, data, MDBX_SET_RANGE);
      rc = csr.err;
      if (csr.exact) {
        cASSERT(mc, csr.err == MDBX_SUCCESS);
        if (op == MDBX_TO_KEY_LESSER_THAN)
          rc = outer_prev(mc, key, data, MDBX_PREV_NODUP);
        else if (op == MDBX_TO_KEY_GREATER_THAN)
          rc = outer_next(mc, key, data, MDBX_NEXT_NODUP);
      } else if (op < MDBX_TO_KEY_EQUAL && (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS))
        rc = outer_prev(mc, key, data, MDBX_PREV_NODUP);
      else if (op == MDBX_TO_KEY_EQUAL && rc == MDBX_SUCCESS)
        rc = MDBX_NOTFOUND;
    }
    return rc;

  /* Doubtless API to positioning of the cursor at a specified key-value pair
   * for multi-value hives. */
  case MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN:
  case MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL:
  case MDBX_TO_EXACT_KEY_VALUE_EQUAL:
  case MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL:
  case MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN:
    if (unlikely(key == nullptr || data == nullptr))
      return MDBX_EINVAL;
    else {
      MDBX_val save_data = *data;
      csr_t csr = cursor_seek(mc, key, data, MDBX_SET_KEY);
      rc = csr.err;
      if (rc == MDBX_SUCCESS) {
        cASSERT(mc, csr.exact);
        if (inner_pointed(mc)) {
          MDBX_cursor *const mx = &mc->subcur->cursor;
          csr = cursor_seek(mx, &save_data, nullptr, MDBX_SET_RANGE);
          rc = csr.err;
          if (csr.exact) {
            cASSERT(mc, csr.err == MDBX_SUCCESS);
            if (op == MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN)
              rc = inner_prev(mx, data);
            else if (op == MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN)
              rc = inner_next(mx, data);
          } else if (op < MDBX_TO_EXACT_KEY_VALUE_EQUAL && (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS))
            rc = inner_prev(mx, data);
          else if (op == MDBX_TO_EXACT_KEY_VALUE_EQUAL && rc == MDBX_SUCCESS)
            rc = MDBX_NOTFOUND;
        } else {
          int cmp = mc->clc->v.cmp(data, &save_data);
          switch (op) {
          default:
            __unreachable();
          case MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN:
            rc = (cmp < 0) ? MDBX_SUCCESS : MDBX_NOTFOUND;
            break;
          case MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL:
            rc = (cmp <= 0) ? MDBX_SUCCESS : MDBX_NOTFOUND;
            break;
          case MDBX_TO_EXACT_KEY_VALUE_EQUAL:
            rc = (cmp == 0) ? MDBX_SUCCESS : MDBX_NOTFOUND;
            break;
          case MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL:
            rc = (cmp >= 0) ? MDBX_SUCCESS : MDBX_NOTFOUND;
            break;
          case MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN:
            rc = (cmp > 0) ? MDBX_SUCCESS : MDBX_NOTFOUND;
            break;
          }
        }
      }
    }
    return rc;

  case MDBX_TO_PAIR_LESSER_THAN:
  case MDBX_TO_PAIR_LESSER_OR_EQUAL:
  case MDBX_TO_PAIR_EQUAL:
  case MDBX_TO_PAIR_GREATER_OR_EQUAL:
  case MDBX_TO_PAIR_GREATER_THAN:
    if (unlikely(key == nullptr || data == nullptr))
      return MDBX_EINVAL;
    else {
      MDBX_val save_data = *data;
      csr_t csr = cursor_seek(mc, key, data, MDBX_SET_RANGE);
      rc = csr.err;
      if (csr.exact) {
        cASSERT(mc, csr.err == MDBX_SUCCESS);
        if (inner_pointed(mc)) {
          MDBX_cursor *const mx = &mc->subcur->cursor;
          csr = cursor_seek(mx, &save_data, nullptr, MDBX_SET_RANGE);
          rc = csr.err;
          if (csr.exact) {
            cASSERT(mc, csr.err == MDBX_SUCCESS);
            if (op == MDBX_TO_PAIR_LESSER_THAN)
              rc = outer_prev(mc, key, data, MDBX_PREV);
            else if (op == MDBX_TO_PAIR_GREATER_THAN)
              rc = outer_next(mc, key, data, MDBX_NEXT);
          } else if (op < MDBX_TO_PAIR_EQUAL && (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS))
            rc = outer_prev(mc, key, data, MDBX_PREV);
          else if (op == MDBX_TO_PAIR_EQUAL && rc == MDBX_SUCCESS)
            rc = MDBX_NOTFOUND;
          else if (op > MDBX_TO_PAIR_EQUAL && rc == MDBX_NOTFOUND)
            rc = outer_next(mc, key, data, MDBX_NEXT);
        } else {
          int cmp = mc->clc->v.cmp(data, &save_data);
          switch (op) {
          default:
            __unreachable();
          case MDBX_TO_PAIR_LESSER_THAN:
            if (cmp >= 0)
              rc = outer_prev(mc, key, data, MDBX_PREV);
            break;
          case MDBX_TO_PAIR_LESSER_OR_EQUAL:
            if (cmp > 0)
              rc = outer_prev(mc, key, data, MDBX_PREV);
            break;
          case MDBX_TO_PAIR_EQUAL:
            rc = (cmp == 0) ? MDBX_SUCCESS : MDBX_NOTFOUND;
            break;
          case MDBX_TO_PAIR_GREATER_OR_EQUAL:
            if (cmp < 0)
              rc = outer_next(mc, key, data, MDBX_NEXT);
            break;
          case MDBX_TO_PAIR_GREATER_THAN:
            if (cmp <= 0)
              rc = outer_next(mc, key, data, MDBX_NEXT);
            break;
          }
        }
      } else if (op < MDBX_TO_PAIR_EQUAL && (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS))
        rc = outer_prev(mc, key, data, MDBX_PREV_NODUP);
      else if (op == MDBX_TO_PAIR_EQUAL && rc == MDBX_SUCCESS)
        rc = MDBX_NOTFOUND;
    }
    return rc;

  default:
    DEBUG("unhandled/unimplemented cursor operation %u", op);
    return MDBX_EINVAL;
  }
}

int cursor_check(const MDBX_cursor *mc, int txn_bad_bits) {
  if (unlikely(mc == nullptr))
    return MDBX_EINVAL;

  if (unlikely(mc->signature != cur_signature_live)) {
    if (mc->signature != cur_signature_ready4dispose)
      return MDBX_EBADSIGN;
    return (txn_bad_bits > MDBX_TXN_FINISHED) ? MDBX_EINVAL : MDBX_SUCCESS;
  }

  /* проверяем что курсор в связном списке для отслеживания, исключение допускается только для read-only операций для
   * служебных/временных курсоров на стеке. */
  MDBX_MAYBE_UNUSED char stack_top[sizeof(void *)];
  cASSERT(mc, cursor_is_tracked(mc) || (!(txn_bad_bits & MDBX_TXN_RDONLY) && stack_top < (char *)mc &&
                                        (char *)mc - stack_top < (ptrdiff_t)globals.sys_pagesize * 4));

  if (txn_bad_bits) {
    int rc = check_txn(mc->txn, txn_bad_bits & ~MDBX_TXN_HAS_CHILD);
    if (unlikely(rc != MDBX_SUCCESS)) {
      cASSERT(mc, rc != MDBX_RESULT_TRUE);
      return rc;
    }

    if (likely((mc->txn->flags & MDBX_TXN_HAS_CHILD) == 0))
      return likely(!cursor_dbi_changed(mc)) ? MDBX_SUCCESS : MDBX_BAD_DBI;

    cASSERT(mc, (mc->txn->flags & MDBX_TXN_RDONLY) == 0 && mc->txn != mc->txn->env->txn && mc->txn->env->txn);
    rc = dbi_check(mc->txn->env->txn, cursor_dbi(mc));
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;

    cASSERT(mc, (mc->txn->flags & MDBX_TXN_RDONLY) == 0 && mc->txn == mc->txn->env->txn);
  }

  return MDBX_SUCCESS;
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

#if MDBX_ENABLE_DBI_SPARSE
size_t dbi_bitmap_ctz_fallback(const MDBX_txn *txn, intptr_t bmi) {
  tASSERT(txn, bmi != 0);
  bmi &= -bmi;
  if (sizeof(txn->dbi_sparse[0]) > 4) {
    static const uint8_t debruijn_ctz64[64] = {0,  1,  2,  53, 3,  7,  54, 27, 4,  38, 41, 8,  34, 55, 48, 28,
                                               62, 5,  39, 46, 44, 42, 22, 9,  24, 35, 59, 56, 49, 18, 29, 11,
                                               63, 52, 6,  26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
                                               51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12};
    return debruijn_ctz64[(UINT64_C(0x022FDD63CC95386D) * (uint64_t)bmi) >> 58];
  } else {
    static const uint8_t debruijn_ctz32[32] = {0,  1,  28, 2,  29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4,  8,
                                               31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6,  11, 5,  10, 9};
    return debruijn_ctz32[(UINT32_C(0x077CB531) * (uint32_t)bmi) >> 27];
  }
}
#endif /* MDBX_ENABLE_DBI_SPARSE */

struct dbi_snap_result dbi_snap(const MDBX_env *env, const size_t dbi) {
  eASSERT(env, dbi < env->n_dbi);
  struct dbi_snap_result r;
  uint32_t snap = atomic_load32(&env->dbi_seqs[dbi], mo_AcquireRelease);
  do {
    r.sequence = snap;
    r.flags = env->dbs_flags[dbi];
    snap = atomic_load32(&env->dbi_seqs[dbi], mo_AcquireRelease);
  } while (unlikely(snap != r.sequence));
  return r;
}

__noinline int dbi_import(MDBX_txn *txn, const size_t dbi) {
  const MDBX_env *const env = txn->env;
  if (dbi >= env->n_dbi || !env->dbs_flags[dbi])
    return MDBX_BAD_DBI;

#if MDBX_ENABLE_DBI_SPARSE
  const size_t bitmap_chunk = CHAR_BIT * sizeof(txn->dbi_sparse[0]);
  const size_t bitmap_indx = dbi / bitmap_chunk;
  const size_t bitmap_mask = (size_t)1 << dbi % bitmap_chunk;
  if (dbi >= txn->n_dbi) {
    for (size_t i = (txn->n_dbi + bitmap_chunk - 1) / bitmap_chunk; bitmap_indx >= i; ++i)
      txn->dbi_sparse[i] = 0;
    eASSERT(env, (txn->dbi_sparse[bitmap_indx] & bitmap_mask) == 0);
    MDBX_txn *scan = txn;
    do {
      eASSERT(env, scan->dbi_sparse == txn->dbi_sparse);
      eASSERT(env, scan->n_dbi < dbi + 1);
      scan->n_dbi = (unsigned)dbi + 1;
      scan->dbi_state[dbi] = 0;
      scan = scan->parent;
    } while (scan /* && scan->dbi_sparse == txn->dbi_sparse */);
    txn->dbi_sparse[bitmap_indx] |= bitmap_mask;
    goto lindo;
  }
  if ((txn->dbi_sparse[bitmap_indx] & bitmap_mask) == 0) {
    MDBX_txn *scan = txn;
    do {
      eASSERT(env, scan->dbi_sparse == txn->dbi_sparse);
      eASSERT(env, scan->n_dbi == txn->n_dbi);
      scan->dbi_state[dbi] = 0;
      scan = scan->parent;
    } while (scan /* && scan->dbi_sparse == txn->dbi_sparse */);
    txn->dbi_sparse[bitmap_indx] |= bitmap_mask;
    goto lindo;
  }
#else
  if (dbi >= txn->n_dbi) {
    size_t i = txn->n_dbi;
    do
      txn->dbi_state[i] = 0;
    while (dbi >= ++i);
    txn->n_dbi = i;
    goto lindo;
  }
#endif /* MDBX_ENABLE_DBI_SPARSE */

  if (!txn->dbi_state[dbi]) {
  lindo:
    /* dbi-слот еще не инициализирован в транзакции, а хендл не использовался */
    txn->cursors[dbi] = nullptr;
    MDBX_txn *const parent = txn->parent;
    if (unlikely(parent)) {
      /* вложенная пишущая транзакция */
      int rc = dbi_check(parent, dbi);
      /* копируем состояние table очищая new-флаги. */
      eASSERT(env, txn->dbi_seqs == parent->dbi_seqs);
      txn->dbi_state[dbi] = parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY);
      if (likely(rc == MDBX_SUCCESS)) {
        txn->dbs[dbi] = parent->dbs[dbi];
        if (parent->cursors[dbi]) {
          rc = cursor_shadow(parent->cursors[dbi], txn, dbi);
          if (unlikely(rc != MDBX_SUCCESS)) {
            /* не получилось забекапить курсоры */
            txn->dbi_state[dbi] = DBI_OLDEN | DBI_LINDO | DBI_STALE;
            txn->flags |= MDBX_TXN_ERROR;
          }
        }
      }
      return rc;
    }
    txn->dbi_seqs[dbi] = 0;
    txn->dbi_state[dbi] = DBI_LINDO;
  } else {
    eASSERT(env, txn->dbi_seqs[dbi] != env->dbi_seqs[dbi].weak);
    if (unlikely(txn->cursors[dbi])) {
      /* хендл уже использовался в транзакции и остались висячие курсоры */
      txn->dbi_seqs[dbi] = env->dbi_seqs[dbi].weak;
      txn->dbi_state[dbi] = DBI_OLDEN | DBI_LINDO;
      return MDBX_DANGLING_DBI;
    }
    if (unlikely(txn->dbi_state[dbi] & (DBI_OLDEN | DBI_VALID))) {
      /* хендл уже использовался в транзакции, но был закрыт или переоткрыт,
       * висячих курсоров нет */
      txn->dbi_seqs[dbi] = env->dbi_seqs[dbi].weak;
      txn->dbi_state[dbi] = DBI_OLDEN | DBI_LINDO;
      return MDBX_BAD_DBI;
    }
  }

  /* хендл не использовался в транзакции, либо явно пере-отрывается при
   * отсутствии висячих курсоров */
  eASSERT(env, (txn->dbi_state[dbi] & (DBI_LINDO | DBI_VALID)) == DBI_LINDO && !txn->cursors[dbi]);

  /* читаем актуальные флаги и sequence */
  struct dbi_snap_result snap = dbi_snap(env, dbi);
  txn->dbi_seqs[dbi] = snap.sequence;
  if (snap.flags & DB_VALID) {
    txn->dbs[dbi].flags = snap.flags & DB_PERSISTENT_FLAGS;
    txn->dbi_state[dbi] = (dbi >= CORE_DBS) ? DBI_LINDO | DBI_VALID | DBI_STALE : DBI_LINDO | DBI_VALID;
    return MDBX_SUCCESS;
  }
  return MDBX_BAD_DBI;
}

int dbi_defer_release(MDBX_env *const env, defer_free_item_t *const chain) {
  size_t length = 0;
  defer_free_item_t *obsolete_chain = nullptr;
#if MDBX_ENABLE_DBI_LOCKFREE
  const uint64_t now = osal_monotime();
  defer_free_item_t **scan = &env->defer_free;
  if (env->defer_free) {
    const uint64_t threshold_1second = osal_16dot16_to_monotime(1 * 65536);
    do {
      defer_free_item_t *item = *scan;
      if (now - item->timestamp < threshold_1second) {
        scan = &item->next;
        length += 1;
      } else {
        *scan = item->next;
        item->next = obsolete_chain;
        obsolete_chain = item;
      }
    } while (*scan);
  }

  eASSERT(env, *scan == nullptr);
  if (chain) {
    defer_free_item_t *item = chain;
    do {
      item->timestamp = now;
      item = item->next;
    } while (item);
    *scan = chain;
  }
#else  /* MDBX_ENABLE_DBI_LOCKFREE */
  obsolete_chain = chain;
#endif /* MDBX_ENABLE_DBI_LOCKFREE */

  ENSURE(env, osal_fastmutex_release(&env->dbi_lock) == MDBX_SUCCESS);
  if (length > 42)
    osal_yield();
  while (obsolete_chain) {
    defer_free_item_t *item = obsolete_chain;
    obsolete_chain = obsolete_chain->next;
    osal_free(item);
  }
  return chain ? MDBX_SUCCESS : MDBX_BAD_DBI;
}

/* Export or close DBI handles opened in this txn. */
int dbi_update(MDBX_txn *txn, int keep) {
  MDBX_env *const env = txn->env;
  tASSERT(txn, !txn->parent && txn == env->basal_txn);
  bool locked = false;
  defer_free_item_t *defer_chain = nullptr;
  TXN_FOREACH_DBI_USER(txn, dbi) {
    if (likely((txn->dbi_state[dbi] & DBI_CREAT) == 0))
      continue;
    if (!locked) {
      int err = osal_fastmutex_acquire(&env->dbi_lock);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
      locked = true;
      if (dbi >= env->n_dbi)
        /* хендл был закрыт из другого потока пока захватывали блокировку */
        continue;
    }
    tASSERT(txn, dbi < env->n_dbi);
    if (keep) {
      env->dbs_flags[dbi] = txn->dbs[dbi].flags | DB_VALID;
    } else {
      uint32_t seq = dbi_seq_next(env, dbi);
      defer_free_item_t *item = env->kvs[dbi].name.iov_base;
      if (item) {
        env->dbs_flags[dbi] = 0;
        env->kvs[dbi].name.iov_len = 0;
        env->kvs[dbi].name.iov_base = nullptr;
        atomic_store32(&env->dbi_seqs[dbi], seq, mo_AcquireRelease);
        osal_flush_incoherent_cpu_writeback();
        item->next = defer_chain;
        defer_chain = item;
      } else {
        eASSERT(env, env->kvs[dbi].name.iov_len == 0);
        eASSERT(env, env->dbs_flags[dbi] == 0);
      }
    }
  }

  if (locked) {
    size_t i = env->n_dbi;
    while ((env->dbs_flags[i - 1] & DB_VALID) == 0) {
      --i;
      eASSERT(env, i >= CORE_DBS);
      eASSERT(env, !env->dbs_flags[i] && !env->kvs[i].name.iov_len && !env->kvs[i].name.iov_base);
    }
    env->n_dbi = (unsigned)i;
    dbi_defer_release(env, defer_chain);
  }
  return MDBX_SUCCESS;
}

int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) {
  const MDBX_env *const env = txn->env;
  eASSERT(env, dbi < txn->n_dbi && dbi < env->n_dbi);
  eASSERT(env, dbi_state(txn, dbi) & DBI_LINDO);
  eASSERT(env, env->dbs_flags[dbi] != DB_POISON);
  if ((env->dbs_flags[dbi] & DB_VALID) == 0) {
    eASSERT(env, !env->kvs[dbi].clc.k.cmp && !env->kvs[dbi].clc.v.cmp && !env->kvs[dbi].name.iov_len &&
                     !env->kvs[dbi].name.iov_base && !env->kvs[dbi].clc.k.lmax && !env->kvs[dbi].clc.k.lmin &&
                     !env->kvs[dbi].clc.v.lmax && !env->kvs[dbi].clc.v.lmin);
  } else {
    eASSERT(env, !(txn->dbi_state[dbi] & DBI_VALID) || (txn->dbs[dbi].flags | DB_VALID) == env->dbs_flags[dbi]);
    eASSERT(env, env->kvs[dbi].name.iov_base || dbi < CORE_DBS);
  }

  /* Если dbi уже использовался, то корректными считаем четыре варианта:
   * 1) user_flags равны MDBX_DB_ACCEDE
   *   = предполагаем что пользователь открывает существующую table,
   *     при этом код проверки не позволит установить другие компараторы.
   * 2) user_flags нулевые, а оба компаратора пустые/нулевые или равны текущим
   *   = предполагаем что пользователь открывает существующую table
   *     старым способом с нулевыми с флагами по-умолчанию.
   * 3) user_flags совпадают, а компараторы не заданы или те же
   *    = предполагаем что пользователь открывает table указывая все параметры;
   * 4) user_flags отличаются, но table пустая и задан флаг MDBX_CREATE
   *    = предполагаем что пользователь пересоздает table;
   */
  if ((user_flags & ~MDBX_CREATE) != (unsigned)(env->dbs_flags[dbi] & DB_PERSISTENT_FLAGS)) {
    /* flags are differs, check other conditions */
    if ((!user_flags && (!keycmp || keycmp == env->kvs[dbi].clc.k.cmp) &&
         (!datacmp || datacmp == env->kvs[dbi].clc.v.cmp)) ||
        user_flags == MDBX_DB_ACCEDE) {
      user_flags = env->dbs_flags[dbi] & DB_PERSISTENT_FLAGS;
    } else if ((user_flags & MDBX_CREATE) == 0)
      return /* FIXME: return extended info */ MDBX_INCOMPATIBLE;
    else {
      if (txn->dbi_state[dbi] & DBI_STALE) {
        eASSERT(env, env->dbs_flags[dbi] & DB_VALID);
        int err = tbl_fetch(txn, dbi);
        if (unlikely(err == MDBX_SUCCESS))
          return err;
      }
      eASSERT(env, ((env->dbs_flags[dbi] ^ txn->dbs[dbi].flags) & DB_PERSISTENT_FLAGS) == 0);
      eASSERT(env, (txn->dbi_state[dbi] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == (DBI_LINDO | DBI_VALID));
      if (unlikely(txn->dbs[dbi].leaf_pages))
        return /* FIXME: return extended info */ MDBX_INCOMPATIBLE;

      /* Пересоздаём table если там пусто */
      if (unlikely(txn->cursors[dbi]))
        return MDBX_DANGLING_DBI;
      env->dbs_flags[dbi] = DB_POISON;
      atomic_store32(&env->dbi_seqs[dbi], dbi_seq_next(env, dbi), mo_AcquireRelease);

      const uint32_t seq = dbi_seq_next(env, dbi);
      const uint16_t db_flags = user_flags & DB_PERSISTENT_FLAGS;
      eASSERT(env, txn->dbs[dbi].height == 0 && txn->dbs[dbi].items == 0 && txn->dbs[dbi].root == P_INVALID);
      env->kvs[dbi].clc.k.cmp = keycmp ? keycmp : builtin_keycmp(user_flags);
      env->kvs[dbi].clc.v.cmp = datacmp ? datacmp : builtin_datacmp(user_flags);
      txn->dbs[dbi].flags = db_flags;
      txn->dbs[dbi].dupfix_size = 0;
      if (unlikely(tbl_setup(env, &env->kvs[dbi], &txn->dbs[dbi]))) {
        txn->dbi_state[dbi] = DBI_LINDO;
        txn->flags |= MDBX_TXN_ERROR;
        return MDBX_PROBLEM;
      }

      env->dbs_flags[dbi] = db_flags | DB_VALID;
      atomic_store32(&env->dbi_seqs[dbi], seq, mo_AcquireRelease);
      txn->dbi_seqs[dbi] = seq;
      txn->dbi_state[dbi] = DBI_LINDO | DBI_VALID | DBI_CREAT | DBI_DIRTY;
      txn->flags |= MDBX_TXN_DIRTY;
    }
  }

  if (!keycmp)
    keycmp = (env->dbs_flags[dbi] & DB_VALID) ? env->kvs[dbi].clc.k.cmp : builtin_keycmp(user_flags);
  if (env->kvs[dbi].clc.k.cmp != keycmp) {
    if (env->dbs_flags[dbi] & DB_VALID)
      return MDBX_EINVAL;
    env->kvs[dbi].clc.k.cmp = keycmp;
  }

  if (!datacmp)
    datacmp = (env->dbs_flags[dbi] & DB_VALID) ? env->kvs[dbi].clc.v.cmp : builtin_datacmp(user_flags);
  if (env->kvs[dbi].clc.v.cmp != datacmp) {
    if (env->dbs_flags[dbi] & DB_VALID)
      return MDBX_EINVAL;
    env->kvs[dbi].clc.v.cmp = datacmp;
  }

  return MDBX_SUCCESS;
}

static inline size_t dbi_namelen(const MDBX_val name) {
  return (name.iov_len > sizeof(defer_free_item_t)) ? name.iov_len : sizeof(defer_free_item_t);
}

static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp,
                           MDBX_cmp_func *datacmp, MDBX_val name) {
  MDBX_env *const env = txn->env;

  /* Cannot mix named table(s) with DUPSORT flags */
  tASSERT(txn, (txn->dbi_state[MAIN_DBI] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == (DBI_LINDO | DBI_VALID));
  if (unlikely(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT)) {
    if (unlikely((user_flags & MDBX_CREATE) == 0))
      return MDBX_NOTFOUND;
    if (unlikely(txn->dbs[MAIN_DBI].leaf_pages))
      /* В MainDB есть записи, либо она уже использовалась. */
      return MDBX_INCOMPATIBLE;

    /* Пересоздаём MainDB когда там пусто. */
    tASSERT(txn,
            txn->dbs[MAIN_DBI].height == 0 && txn->dbs[MAIN_DBI].items == 0 && txn->dbs[MAIN_DBI].root == P_INVALID);
    if (unlikely(txn->cursors[MAIN_DBI]))
      return MDBX_DANGLING_DBI;
    env->dbs_flags[MAIN_DBI] = DB_POISON;
    atomic_store32(&env->dbi_seqs[MAIN_DBI], dbi_seq_next(env, MAIN_DBI), mo_AcquireRelease);

    const uint32_t seq = dbi_seq_next(env, MAIN_DBI);
    const uint16_t main_flags = txn->dbs[MAIN_DBI].flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY);
    env->kvs[MAIN_DBI].clc.k.cmp = builtin_keycmp(main_flags);
    env->kvs[MAIN_DBI].clc.v.cmp = builtin_datacmp(main_flags);
    txn->dbs[MAIN_DBI].flags = main_flags;
    txn->dbs[MAIN_DBI].dupfix_size = 0;
    int err = tbl_setup(env, &env->kvs[MAIN_DBI], &txn->dbs[MAIN_DBI]);
    if (unlikely(err != MDBX_SUCCESS)) {
      txn->dbi_state[MAIN_DBI] = DBI_LINDO;
      txn->flags |= MDBX_TXN_ERROR;
      env->flags |= ENV_FATAL_ERROR;
      return err;
    }
    env->dbs_flags[MAIN_DBI] = main_flags | DB_VALID;
    txn->dbi_seqs[MAIN_DBI] = atomic_store32(&env->dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease);
    txn->dbi_state[MAIN_DBI] |= DBI_DIRTY;
    txn->flags |= MDBX_TXN_DIRTY;
  }

  tASSERT(txn, env->kvs[MAIN_DBI].clc.k.cmp);

  /* Is the DB already open? */
  size_t slot = env->n_dbi;
  for (size_t scan = CORE_DBS; scan < env->n_dbi; ++scan) {
    if ((env->dbs_flags[scan] & DB_VALID) == 0) {
      /* Remember this free slot */
      slot = (slot < scan) ? slot : scan;
      continue;
    }
    if (env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[scan].name) == 0) {
      slot = scan;
      int err = dbi_check(txn, slot);
      if (err == MDBX_BAD_DBI && txn->dbi_state[slot] == (DBI_OLDEN | DBI_LINDO)) {
        /* хендл использовался, стал невалидным,
         * но теперь явно пере-открывается в этой транзакци */
        eASSERT(env, !txn->cursors[slot]);
        txn->dbi_state[slot] = DBI_LINDO;
        err = dbi_check(txn, slot);
      }
      if (err == MDBX_SUCCESS) {
        err = dbi_bind(txn, slot, user_flags, keycmp, datacmp);
        if (likely(err == MDBX_SUCCESS)) {
          goto done;
        }
      }
      return err;
    }
  }

  /* Fail, if no free slot and max hit */
  if (unlikely(slot >= env->max_dbi))
    return MDBX_DBS_FULL;

  if (env->n_dbi == slot)
    eASSERT(env, !env->dbs_flags[slot] && !env->kvs[slot].name.iov_len && !env->kvs[slot].name.iov_base);

  env->dbs_flags[slot] = DB_POISON;
  atomic_store32(&env->dbi_seqs[slot], dbi_seq_next(env, slot), mo_AcquireRelease);
  memset(&env->kvs[slot], 0, sizeof(env->kvs[slot]));
  if (env->n_dbi == slot)
    env->n_dbi = (unsigned)slot + 1;
  eASSERT(env, slot < env->n_dbi);

  int err = dbi_check(txn, slot);
  eASSERT(env, err == MDBX_BAD_DBI);
  if (err != MDBX_BAD_DBI)
    return MDBX_PROBLEM;

  /* Find the DB info */
  MDBX_val body;
  cursor_couple_t cx;
  int rc = cursor_init(&cx.outer, txn, MAIN_DBI);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;
  rc = cursor_seek(&cx.outer, &name, &body, MDBX_SET).err;
  if (unlikely(rc != MDBX_SUCCESS)) {
    if (rc != MDBX_NOTFOUND || !(user_flags & MDBX_CREATE))
      return rc;
  } else {
    /* make sure this is actually a table */
    node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]);
    if (unlikely((node_flags(node) & (N_DUP | N_TREE)) != N_TREE))
      return MDBX_INCOMPATIBLE;
    if (!MDBX_DISABLE_VALIDATION && unlikely(body.iov_len != sizeof(tree_t))) {
      ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid table node size", body.iov_len);
      return MDBX_CORRUPTED;
    }
    memcpy(&txn->dbs[slot], body.iov_base, sizeof(tree_t));
  }

  /* Done here so we cannot fail after creating a new DB */
  defer_free_item_t *const clone = osal_malloc(dbi_namelen(name));
  if (unlikely(!clone))
    return MDBX_ENOMEM;
  memcpy(clone, name.iov_base, name.iov_len);
  name.iov_base = clone;

  uint8_t dbi_state = DBI_LINDO | DBI_VALID | DBI_FRESH;
  if (unlikely(rc)) {
    /* MDBX_NOTFOUND and MDBX_CREATE: Create new DB */
    tASSERT(txn, rc == MDBX_NOTFOUND);
    body.iov_base = memset(&txn->dbs[slot], 0, body.iov_len = sizeof(tree_t));
    txn->dbs[slot].root = P_INVALID;
    txn->dbs[slot].mod_txnid = txn->txnid;
    txn->dbs[slot].flags = user_flags & DB_PERSISTENT_FLAGS;
    cx.outer.next = txn->cursors[MAIN_DBI];
    txn->cursors[MAIN_DBI] = &cx.outer;
    rc = cursor_put_checklen(&cx.outer, &name, &body, N_TREE | MDBX_NOOVERWRITE);
    txn->cursors[MAIN_DBI] = cx.outer.next;
    if (unlikely(rc != MDBX_SUCCESS))
      goto bailout;

    dbi_state |= DBI_DIRTY | DBI_CREAT;
    txn->flags |= MDBX_TXN_DIRTY;
    tASSERT(txn, (txn->dbi_state[MAIN_DBI] & DBI_DIRTY) != 0);
  }

  /* Got info, register DBI in this txn */
  const uint32_t seq = dbi_seq_next(env, slot);
  eASSERT(env, env->dbs_flags[slot] == DB_POISON && !txn->cursors[slot] &&
                   (txn->dbi_state[slot] & (DBI_LINDO | DBI_VALID)) == DBI_LINDO);
  txn->dbi_state[slot] = dbi_state;
  memcpy(&txn->dbs[slot], body.iov_base, sizeof(txn->dbs[slot]));
  env->dbs_flags[slot] = txn->dbs[slot].flags;
  rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp);
  if (unlikely(rc != MDBX_SUCCESS))
    goto bailout;

  env->kvs[slot].name = name;
  env->dbs_flags[slot] = txn->dbs[slot].flags | DB_VALID;
  txn->dbi_seqs[slot] = atomic_store32(&env->dbi_seqs[slot], seq, mo_AcquireRelease);

done:
  *dbi = (MDBX_dbi)slot;
  tASSERT(txn, slot < txn->n_dbi && (env->dbs_flags[slot] & DB_VALID) != 0);
  eASSERT(env, dbi_check(txn, slot) == MDBX_SUCCESS);
  return MDBX_SUCCESS;

bailout:
  eASSERT(env, !txn->cursors[slot] && !env->kvs[slot].name.iov_len && !env->kvs[slot].name.iov_base);
  txn->dbi_state[slot] &= DBI_LINDO | DBI_OLDEN;
  env->dbs_flags[slot] = 0;
  osal_free(clone);
  if (slot + 1 == env->n_dbi)
    txn->n_dbi = env->n_dbi = (unsigned)slot;
  return rc;
}

int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp,
             MDBX_cmp_func *datacmp) {
  if (unlikely(!dbi))
    return MDBX_EINVAL;
  *dbi = 0;

  if (user_flags != MDBX_ACCEDE && unlikely(!check_table_flags(user_flags & ~MDBX_CREATE)))
    return MDBX_EINVAL;

  int rc = check_txn(txn, MDBX_TXN_BLOCKED);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

  if ((user_flags & MDBX_CREATE) && unlikely(txn->flags & MDBX_TXN_RDONLY))
    return MDBX_EACCESS;

  /* main table? */
  if (unlikely(name == MDBX_CHK_MAIN || name->iov_base == MDBX_CHK_MAIN)) {
    rc = dbi_bind(txn, MAIN_DBI, user_flags, keycmp, datacmp);
    if (likely(rc == MDBX_SUCCESS))
      *dbi = MAIN_DBI;
    return rc;
  }
  if (unlikely(name == MDBX_CHK_GC || name->iov_base == MDBX_CHK_GC)) {
    rc = dbi_bind(txn, FREE_DBI, user_flags, keycmp, datacmp);
    if (likely(rc == MDBX_SUCCESS))
      *dbi = FREE_DBI;
    return rc;
  }
  if (unlikely(name == MDBX_CHK_META || name->iov_base == MDBX_CHK_META))
    return MDBX_EINVAL;
  if (unlikely(name->iov_len > txn->env->leaf_nodemax - NODESIZE - sizeof(tree_t)))
    return MDBX_EINVAL;

#if MDBX_ENABLE_DBI_LOCKFREE
  /* Is the DB already open? */
  const MDBX_env *const env = txn->env;
  bool have_free_slot = env->n_dbi < env->max_dbi;
  for (size_t i = CORE_DBS; i < env->n_dbi; ++i) {
    if ((env->dbs_flags[i] & DB_VALID) == 0) {
      have_free_slot = true;
      continue;
    }

    struct dbi_snap_result snap = dbi_snap(env, i);
    const MDBX_val snap_name = env->kvs[i].name;
    const uint32_t main_seq = atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease);
    MDBX_cmp_func *const snap_cmp = env->kvs[MAIN_DBI].clc.k.cmp;
    if (unlikely(!(snap.flags & DB_VALID) || !snap_name.iov_base || !snap_name.iov_len || !snap_cmp))
      /* похоже на столкновение с параллельно работающим обновлением */
      goto slowpath_locking;

    const bool name_match = snap_cmp(&snap_name, name) == 0;
    if (unlikely(snap.sequence != atomic_load32(&env->dbi_seqs[i], mo_AcquireRelease) ||
                 main_seq != atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease) ||
                 snap.flags != env->dbs_flags[i] || snap_name.iov_base != env->kvs[i].name.iov_base ||
                 snap_name.iov_len != env->kvs[i].name.iov_len))
      /* похоже на столкновение с параллельно работающим обновлением */
      goto slowpath_locking;

    if (!name_match)
      continue;

    osal_flush_incoherent_cpu_writeback();
    if (user_flags != MDBX_ACCEDE &&
        (((user_flags ^ snap.flags) & DB_PERSISTENT_FLAGS) || (keycmp && keycmp != env->kvs[i].clc.k.cmp) ||
         (datacmp && datacmp != env->kvs[i].clc.v.cmp)))
      /* есть подозрение что пользователь открывает таблицу с другими флагами/атрибутами
       * или другими компараторами, поэтому уходим в безопасный режим */
      goto slowpath_locking;

    rc = dbi_check(txn, i);
    if (rc == MDBX_BAD_DBI && txn->dbi_state[i] == (DBI_OLDEN | DBI_LINDO)) {
      /* хендл использовался, стал невалидным,
       * но теперь явно пере-открывается в этой транзакци */
      eASSERT(env, !txn->cursors[i]);
      txn->dbi_state[i] = DBI_LINDO;
      rc = dbi_check(txn, i);
    }
    if (likely(rc == MDBX_SUCCESS)) {
      if (unlikely(snap.sequence != atomic_load32(&env->dbi_seqs[i], mo_AcquireRelease) ||
                   main_seq != atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease) ||
                   snap.flags != env->dbs_flags[i] || snap_name.iov_base != env->kvs[i].name.iov_base ||
                   snap_name.iov_len != env->kvs[i].name.iov_len))
        /* похоже на столкновение с параллельно работающим обновлением */
        goto slowpath_locking;
      rc = dbi_bind(txn, i, user_flags, keycmp, datacmp);
      if (likely(rc == MDBX_SUCCESS))
        *dbi = (MDBX_dbi)i;
    }
    return rc;
  }

  /* Fail, if no free slot and max hit */
  if (unlikely(!have_free_slot))
    return MDBX_DBS_FULL;

slowpath_locking:

#endif /* MDBX_ENABLE_DBI_LOCKFREE */

  rc = osal_fastmutex_acquire(&txn->env->dbi_lock);
  if (likely(rc == MDBX_SUCCESS)) {
    rc = dbi_open_locked(txn, user_flags, dbi, keycmp, datacmp, *name);
    ENSURE(txn->env, osal_fastmutex_release(&txn->env->dbi_lock) == MDBX_SUCCESS);
  }
  return rc;
}

__cold struct dbi_rename_result dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) {
  struct dbi_rename_result pair;
  pair.defer = nullptr;
  pair.err = dbi_check(txn, dbi);
  if (unlikely(pair.err != MDBX_SUCCESS))
    return pair;

  MDBX_env *const env = txn->env;
  MDBX_val old_name = env->kvs[dbi].name;
  if (env->kvs[MAIN_DBI].clc.k.cmp(&new_name, &old_name) == 0 && MDBX_DEBUG == 0)
    return pair;

  cursor_couple_t cx;
  pair.err = cursor_init(&cx.outer, txn, MAIN_DBI);
  if (unlikely(pair.err != MDBX_SUCCESS))
    return pair;
  pair.err = cursor_seek(&cx.outer, &new_name, nullptr, MDBX_SET).err;
  if (unlikely(pair.err != MDBX_NOTFOUND)) {
    pair.err = (pair.err == MDBX_SUCCESS) ? MDBX_KEYEXIST : pair.err;
    return pair;
  }

  pair.defer = osal_malloc(dbi_namelen(new_name));
  if (unlikely(!pair.defer)) {
    pair.err = MDBX_ENOMEM;
    return pair;
  }
  new_name.iov_base = memcpy(pair.defer, new_name.iov_base, new_name.iov_len);

  cx.outer.next = txn->cursors[MAIN_DBI];
  txn->cursors[MAIN_DBI] = &cx.outer;

  MDBX_val data = {&txn->dbs[dbi], sizeof(tree_t)};
  pair.err = cursor_put_checklen(&cx.outer, &new_name, &data, N_TREE | MDBX_NOOVERWRITE);
  if (likely(pair.err == MDBX_SUCCESS)) {
    pair.err = cursor_seek(&cx.outer, &old_name, nullptr, MDBX_SET).err;
    if (likely(pair.err == MDBX_SUCCESS))
      pair.err = cursor_del(&cx.outer, N_TREE);
    if (likely(pair.err == MDBX_SUCCESS)) {
      pair.defer = env->kvs[dbi].name.iov_base;
      env->kvs[dbi].name = new_name;
    } else
      txn->flags |= MDBX_TXN_ERROR;
  }

  txn->cursors[MAIN_DBI] = cx.outer.next;
  return pair;
}

static defer_free_item_t *dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) {
  eASSERT(env, dbi >= CORE_DBS);
  if (unlikely(dbi >= env->n_dbi))
    return nullptr;

  const uint32_t seq = dbi_seq_next(env, dbi);
  defer_free_item_t *defer_item = env->kvs[dbi].name.iov_base;
  if (likely(defer_item)) {
    env->dbs_flags[dbi] = 0;
    env->kvs[dbi].name.iov_len = 0;
    env->kvs[dbi].name.iov_base = nullptr;
    atomic_store32(&env->dbi_seqs[dbi], seq, mo_AcquireRelease);
    osal_flush_incoherent_cpu_writeback();
    defer_item->next = nullptr;

    if (env->n_dbi == dbi + 1) {
      size_t i = env->n_dbi;
      do {
        --i;
        eASSERT(env, i >= CORE_DBS);
        eASSERT(env, !env->dbs_flags[i] && !env->kvs[i].name.iov_len && !env->kvs[i].name.iov_base);
      } while (i > CORE_DBS && !env->kvs[i - 1].name.iov_base);
      env->n_dbi = (unsigned)i;
    }
  }

  return defer_item;
}

__cold const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, tree_t *fallback) {
  const MDBX_txn *dig = txn;
  do {
    tASSERT(txn, txn->n_dbi == dig->n_dbi);
    const uint8_t state = dbi_state(dig, dbi);
    if (state & DBI_LINDO)
      switch (state & (DBI_VALID | DBI_STALE | DBI_OLDEN)) {
      case DBI_VALID:
      case DBI_OLDEN:
        return dig->dbs + dbi;
      case 0:
        return fallback;
      case DBI_VALID | DBI_STALE:
      case DBI_OLDEN | DBI_STALE:
        break;
      default:
        tASSERT(txn, !!"unexpected dig->dbi_state[dbi]");
      }
    dig = dig->parent;
  } while (dig);
  return fallback;
}

int dbi_close_release(MDBX_env *env, MDBX_dbi dbi) { return dbi_defer_release(env, dbi_close_locked(env, dbi)); }
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

static inline size_t dpl_size2bytes(ptrdiff_t size) {
  assert(size > CURSOR_STACK_SIZE && (size_t)size <= PAGELIST_LIMIT);
#if MDBX_DPL_PREALLOC_FOR_RADIXSORT
  size += size;
#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */
  STATIC_ASSERT(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(dpl_t) +
                    (PAGELIST_LIMIT * (MDBX_DPL_PREALLOC_FOR_RADIXSORT + 1)) * sizeof(dp_t) +
                    MDBX_PNL_GRANULATE * sizeof(void *) * 2 <
                SIZE_MAX / 4 * 3);
  size_t bytes = ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(dpl_t) + size * sizeof(dp_t),
                               MDBX_PNL_GRANULATE * sizeof(void *) * 2) -
                 MDBX_ASSUME_MALLOC_OVERHEAD;
  return bytes;
}

static inline size_t dpl_bytes2size(const ptrdiff_t bytes) {
  size_t size = (bytes - sizeof(dpl_t)) / sizeof(dp_t);
#if MDBX_DPL_PREALLOC_FOR_RADIXSORT
  size >>= 1;
#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */
  assert(size > CURSOR_STACK_SIZE && size <= PAGELIST_LIMIT + MDBX_PNL_GRANULATE);
  return size;
}

void dpl_free(MDBX_txn *txn) {
  if (likely(txn->tw.dirtylist)) {
    osal_free(txn->tw.dirtylist);
    txn->tw.dirtylist = nullptr;
  }
}

dpl_t *dpl_reserve(MDBX_txn *txn, size_t size) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);

  size_t bytes = dpl_size2bytes((size < PAGELIST_LIMIT) ? size : PAGELIST_LIMIT);
  dpl_t *const dl = osal_realloc(txn->tw.dirtylist, bytes);
  if (likely(dl)) {
#ifdef osal_malloc_usable_size
    bytes = osal_malloc_usable_size(dl);
#endif /* osal_malloc_usable_size */
    dl->detent = dpl_bytes2size(bytes);
    tASSERT(txn, txn->tw.dirtylist == nullptr || dl->length <= dl->detent);
    txn->tw.dirtylist = dl;
  }
  return dl;
}

int dpl_alloc(MDBX_txn *txn) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);

  const size_t wanna = (txn->env->options.dp_initial < txn->geo.upper) ? txn->env->options.dp_initial : txn->geo.upper;
#if MDBX_FORCE_ASSERTIONS || MDBX_DEBUG
  if (txn->tw.dirtylist)
    /* обнуляем чтобы не сработал ассерт внутри dpl_reserve() */
    txn->tw.dirtylist->sorted = txn->tw.dirtylist->length = 0;
#endif /* asertions enabled */
  if (unlikely(!txn->tw.dirtylist || txn->tw.dirtylist->detent < wanna || txn->tw.dirtylist->detent > wanna + wanna) &&
      unlikely(!dpl_reserve(txn, wanna)))
    return MDBX_ENOMEM;

  dpl_clear(txn->tw.dirtylist);
  return MDBX_SUCCESS;
}

#define MDBX_DPL_EXTRACT_KEY(ptr) ((ptr)->pgno)
RADIXSORT_IMPL(dp, dp_t, MDBX_DPL_EXTRACT_KEY, MDBX_DPL_PREALLOC_FOR_RADIXSORT, 1)

#define DP_SORT_CMP(first, last) ((first).pgno < (last).pgno)
SORT_IMPL(dp_sort, false, dp_t, DP_SORT_CMP)

__hot __noinline dpl_t *dpl_sort_slowpath(const MDBX_txn *txn) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);

  dpl_t *dl = txn->tw.dirtylist;
  assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
  const size_t unsorted = dl->length - dl->sorted;
  if (likely(unsorted < MDBX_RADIXSORT_THRESHOLD) || unlikely(!dp_radixsort(dl->items + 1, dl->length))) {
    if (dl->sorted > unsorted / 4 + 4 &&
        (MDBX_DPL_PREALLOC_FOR_RADIXSORT || dl->length + unsorted < dl->detent + dpl_gap_mergesort)) {
      dp_t *const sorted_begin = dl->items + 1;
      dp_t *const sorted_end = sorted_begin + dl->sorted;
      dp_t *const end =
          dl->items + (MDBX_DPL_PREALLOC_FOR_RADIXSORT ? dl->length + dl->length + 1 : dl->detent + dpl_reserve_gap);
      dp_t *const tmp = end - unsorted;
      assert(dl->items + dl->length + 1 < tmp);
      /* copy unsorted to the end of allocated space and sort it */
      memcpy(tmp, sorted_end, unsorted * sizeof(dp_t));
      dp_sort(tmp, tmp + unsorted);
      /* merge two parts from end to begin */
      dp_t *__restrict w = dl->items + dl->length;
      dp_t *__restrict l = dl->items + dl->sorted;
      dp_t *__restrict r = end - 1;
      do {
        const bool cmp = expect_with_probability(l->pgno > r->pgno, 0, .5);
#if defined(__LCC__) || __CLANG_PREREQ(13, 0) || !MDBX_HAVE_CMOV
        *w = cmp ? *l-- : *r--;
#else
        *w = cmp ? *l : *r;
        l -= cmp;
        r += (ptrdiff_t)cmp - 1;
#endif
      } while (likely(--w > l));
      assert(r == tmp - 1);
      assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
      if (ASSERT_ENABLED())
        for (size_t i = 0; i <= dl->length; ++i)
          assert(dl->items[i].pgno < dl->items[i + 1].pgno);
    } else {
      dp_sort(dl->items + 1, dl->items + dl->length + 1);
      assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
    }
  } else {
    assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
  }
  dl->sorted = dl->length;
  return dl;
}

/* Returns the index of the first dirty-page whose pgno
 * member is greater than or equal to id. */
#define DP_SEARCH_CMP(dp, id) ((dp).pgno < (id))
SEARCH_IMPL(dp_bsearch, dp_t, pgno_t, DP_SEARCH_CMP)

__hot __noinline size_t dpl_search(const MDBX_txn *txn, pgno_t pgno) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);

  dpl_t *dl = txn->tw.dirtylist;
  assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
  if (AUDIT_ENABLED()) {
    for (const dp_t *ptr = dl->items + dl->sorted; --ptr > dl->items;) {
      assert(ptr[0].pgno < ptr[1].pgno);
      assert(ptr[0].pgno >= NUM_METAS);
    }
  }

  switch (dl->length - dl->sorted) {
  default:
    /* sort a whole */
    dpl_sort_slowpath(txn);
    break;
  case 0:
    /* whole sorted cases */
    break;

#define LINEAR_SEARCH_CASE(N)                                                                                          \
  case N:                                                                                                              \
    if (dl->items[dl->length - N + 1].pgno == pgno)                                                                    \
      return dl->length - N + 1;                                                                                       \
    __fallthrough

    /* use linear scan until the threshold */
    LINEAR_SEARCH_CASE(7); /* fall through */
    LINEAR_SEARCH_CASE(6); /* fall through */
    LINEAR_SEARCH_CASE(5); /* fall through */
    LINEAR_SEARCH_CASE(4); /* fall through */
    LINEAR_SEARCH_CASE(3); /* fall through */
    LINEAR_SEARCH_CASE(2); /* fall through */
  case 1:
    if (dl->items[dl->length].pgno == pgno)
      return dl->length;
    /* continue bsearch on the sorted part */
    break;
  }
  return dp_bsearch(dl->items + 1, dl->sorted, pgno) - dl->items;
}

const page_t *debug_dpl_find(const MDBX_txn *txn, const pgno_t pgno) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  const dpl_t *dl = txn->tw.dirtylist;
  if (dl) {
    tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
    assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
    for (size_t i = dl->length; i > dl->sorted; --i)
      if (dl->items[i].pgno == pgno)
        return dl->items[i].ptr;

    if (dl->sorted) {
      const size_t i = dp_bsearch(dl->items + 1, dl->sorted, pgno) - dl->items;
      if (dl->items[i].pgno == pgno)
        return dl->items[i].ptr;
    }
  } else {
    tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
  }
  return nullptr;
}

void dpl_remove_ex(const MDBX_txn *txn, size_t i, size_t npages) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);

  dpl_t *dl = txn->tw.dirtylist;
  assert((intptr_t)i > 0 && i <= dl->length);
  assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
  dl->pages_including_loose -= npages;
  dl->sorted -= dl->sorted >= i;
  dl->length -= 1;
  memmove(dl->items + i, dl->items + i + 1, (dl->length - i + 2) * sizeof(dl->items[0]));
  assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
}

int __must_check_result dpl_append(MDBX_txn *txn, pgno_t pgno, page_t *page, size_t npages) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
  const dp_t dp = {page, pgno, (pgno_t)npages};
  if ((txn->flags & MDBX_WRITEMAP) == 0) {
    size_t *const ptr = ptr_disp(page, -(ptrdiff_t)sizeof(size_t));
    *ptr = txn->tw.dirtylru;
  }

  dpl_t *dl = txn->tw.dirtylist;
  tASSERT(txn, dl->length <= PAGELIST_LIMIT + MDBX_PNL_GRANULATE);
  tASSERT(txn, dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
  if (AUDIT_ENABLED()) {
    for (size_t i = dl->length; i > 0; --i) {
      assert(dl->items[i].pgno != dp.pgno);
      if (unlikely(dl->items[i].pgno == dp.pgno)) {
        ERROR("Page %u already exist in the DPL at %zu", dp.pgno, i);
        return MDBX_PROBLEM;
      }
    }
  }

  if (unlikely(dl->length == dl->detent)) {
    if (unlikely(dl->detent >= PAGELIST_LIMIT)) {
      ERROR("DPL is full (PAGELIST_LIMIT %zu)", PAGELIST_LIMIT);
      return MDBX_TXN_FULL;
    }
    const size_t size = (dl->detent < MDBX_PNL_INITIAL * 42) ? dl->detent + dl->detent : dl->detent + dl->detent / 2;
    dl = dpl_reserve(txn, size);
    if (unlikely(!dl))
      return MDBX_ENOMEM;
    tASSERT(txn, dl->length < dl->detent);
  }

  /* Сортировка нужна для быстрого поиска, используем несколько тактик:
   *  1) Сохраняем упорядоченность при естественной вставке в нужном порядке.
   *  2) Добавляем в не-сортированный хвост, который сортируем и сливаем
   *     с отсортированной головой по необходимости, а пока хвост короткий
   *     ищем в нём сканированием, избегая большой пересортировки.
   *  3) Если не-сортированный хвост короткий, а добавляемый элемент близок
   *     к концу отсортированной головы, то выгоднее сразу вставить элемент
   *     в нужное место.
   *
   * Алгоритмически:
   *  - добавлять в не-сортированный хвост следует только если вставка сильно
   *    дорогая, т.е. если целевая позиция элемента сильно далека от конца;
   *  - для быстрой проверки достаточно сравнить добавляемый элемент с отстоящим
   *    от конца на максимально-приемлемое расстояние;
   *  - если список короче, либо элемент в этой позиции меньше вставляемого,
   *    то следует перемещать элементы и вставлять в отсортированную голову;
   *  - если не-сортированный хвост длиннее, либо элемент в этой позиции больше,
   *    то следует добавлять в не-сортированный хвост. */

  dl->pages_including_loose += npages;
  dp_t *i = dl->items + dl->length;

  const ptrdiff_t pivot = (ptrdiff_t)dl->length - dpl_insertion_threshold;
#if MDBX_HAVE_CMOV
  const pgno_t pivot_pgno =
      dl->items[(dl->length < dpl_insertion_threshold) ? 0 : dl->length - dpl_insertion_threshold].pgno;
#endif /* MDBX_HAVE_CMOV */

  /* copy the stub beyond the end */
  i[2] = i[1];
  dl->length += 1;

  if (likely(pivot <= (ptrdiff_t)dl->sorted) &&
#if MDBX_HAVE_CMOV
      pivot_pgno < dp.pgno) {
#else
      (pivot <= 0 || dl->items[pivot].pgno < dp.pgno)) {
#endif /* MDBX_HAVE_CMOV */
    dl->sorted += 1;

    /* сдвигаем несортированный хвост */
    while (i >= dl->items + dl->sorted) {
#if !defined(__GNUC__) /* пытаемся избежать вызова memmove() */
      i[1] = *i;
#elif MDBX_WORDBITS == 64 && (defined(__SIZEOF_INT128__) || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128))
      STATIC_ASSERT(sizeof(dp) == sizeof(__uint128_t));
      ((__uint128_t *)i)[1] = *(volatile __uint128_t *)i;
#else
    i[1].ptr = i->ptr;
    i[1].pgno = i->pgno;
    i[1].npages = i->npages;
#endif
      --i;
    }
    /* ищем нужную позицию сдвигая отсортированные элементы */
    while (i->pgno > pgno) {
      tASSERT(txn, i > dl->items);
      i[1] = *i;
      --i;
    }
    tASSERT(txn, i->pgno < dp.pgno);
  }

  i[1] = dp;
  assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
  assert(dl->sorted <= dl->length);
  return MDBX_SUCCESS;
}

__cold bool dpl_check(MDBX_txn *txn) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  const dpl_t *const dl = txn->tw.dirtylist;
  if (!dl) {
    tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
    return true;
  }
  tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);

  assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
  tASSERT(txn,
          txn->tw.dirtyroom + dl->length == (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit));

  if (!AUDIT_ENABLED())
    return true;

  size_t loose = 0, pages = 0;
  for (size_t i = dl->length; i > 0; --i) {
    const page_t *const dp = dl->items[i].ptr;
    if (!dp)
      continue;

    tASSERT(txn, dp->pgno == dl->items[i].pgno);
    if (unlikely(dp->pgno != dl->items[i].pgno))
      return false;

    if ((txn->flags & MDBX_WRITEMAP) == 0) {
      const uint32_t age = dpl_age(txn, i);
      tASSERT(txn, age < UINT32_MAX / 3);
      if (unlikely(age > UINT32_MAX / 3))
        return false;
    }

    tASSERT(txn, dp->flags == P_LOOSE || is_modifable(txn, dp));
    if (dp->flags == P_LOOSE) {
      loose += 1;
    } else if (unlikely(!is_modifable(txn, dp)))
      return false;

    const unsigned num = dpl_npages(dl, i);
    pages += num;
    tASSERT(txn, txn->geo.first_unallocated >= dp->pgno + num);
    if (unlikely(txn->geo.first_unallocated < dp->pgno + num))
      return false;

    if (i < dl->sorted) {
      tASSERT(txn, dl->items[i + 1].pgno >= dp->pgno + num);
      if (unlikely(dl->items[i + 1].pgno < dp->pgno + num))
        return false;
    }

    const size_t rpa = pnl_search(txn->tw.repnl, dp->pgno, txn->geo.first_unallocated);
    tASSERT(txn, rpa > MDBX_PNL_GETSIZE(txn->tw.repnl) || txn->tw.repnl[rpa] != dp->pgno);
    if (rpa <= MDBX_PNL_GETSIZE(txn->tw.repnl) && unlikely(txn->tw.repnl[rpa] == dp->pgno))
      return false;
    if (num > 1) {
      const size_t rpb = pnl_search(txn->tw.repnl, dp->pgno + num - 1, txn->geo.first_unallocated);
      tASSERT(txn, rpa == rpb);
      if (unlikely(rpa != rpb))
        return false;
    }
  }

  tASSERT(txn, loose == txn->tw.loose_count);
  if (unlikely(loose != txn->tw.loose_count))
    return false;

  tASSERT(txn, pages == dl->pages_including_loose);
  if (unlikely(pages != dl->pages_including_loose))
    return false;

  for (size_t i = 1; i <= MDBX_PNL_GETSIZE(txn->tw.retired_pages); ++i) {
    const page_t *const dp = debug_dpl_find(txn, txn->tw.retired_pages[i]);
    tASSERT(txn, !dp);
    if (unlikely(dp))
      return false;
  }

  return true;
}

/*----------------------------------------------------------------------------*/

__noinline void dpl_lru_reduce(MDBX_txn *txn) {
  NOTICE("lru-reduce %u -> %u", txn->tw.dirtylru, txn->tw.dirtylru >> 1);
  tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0);
  do {
    txn->tw.dirtylru >>= 1;
    dpl_t *dl = txn->tw.dirtylist;
    for (size_t i = 1; i <= dl->length; ++i) {
      size_t *const ptr = ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t));
      *ptr >>= 1;
    }
    txn = txn->parent;
  } while (txn);
}

void dpl_sift(MDBX_txn *const txn, pnl_t pl, const bool spilled) {
  tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
  tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
  if (MDBX_PNL_GETSIZE(pl) && txn->tw.dirtylist->length) {
    tASSERT(txn, pnl_check_allocated(pl, (size_t)txn->geo.first_unallocated << spilled));
    dpl_t *dl = dpl_sort(txn);

    /* Scanning in ascend order */
    const intptr_t step = MDBX_PNL_ASCENDING ? 1 : -1;
    const intptr_t begin = MDBX_PNL_ASCENDING ? 1 : MDBX_PNL_GETSIZE(pl);
    const intptr_t end = MDBX_PNL_ASCENDING ? MDBX_PNL_GETSIZE(pl) + 1 : 0;
    tASSERT(txn, pl[begin] <= pl[end - step]);

    size_t w, r = dpl_search(txn, pl[begin] >> spilled);
    tASSERT(txn, dl->sorted == dl->length);
    for (intptr_t i = begin; r <= dl->length;) { /* scan loop */
      assert(i != end);
      tASSERT(txn, !spilled || (pl[i] & 1) == 0);
      pgno_t pl_pgno = pl[i] >> spilled;
      pgno_t dp_pgno = dl->items[r].pgno;
      if (likely(dp_pgno != pl_pgno)) {
        const bool cmp = dp_pgno < pl_pgno;
        r += cmp;
        i += cmp ? 0 : step;
        if (likely(i != end))
          continue;
        return;
      }

      /* update loop */
      unsigned npages;
      w = r;
    remove_dl:
      npages = dpl_npages(dl, r);
      dl->pages_including_loose -= npages;
      if (!MDBX_AVOID_MSYNC || !(txn->flags & MDBX_WRITEMAP))
        page_shadow_release(txn->env, dl->items[r].ptr, npages);
      ++r;
    next_i:
      i += step;
      if (unlikely(i == end)) {
        while (r <= dl->length)
          dl->items[w++] = dl->items[r++];
      } else {
        while (r <= dl->length) {
          assert(i != end);
          tASSERT(txn, !spilled || (pl[i] & 1) == 0);
          pl_pgno = pl[i] >> spilled;
          dp_pgno = dl->items[r].pgno;
          if (dp_pgno < pl_pgno)
            dl->items[w++] = dl->items[r++];
          else if (dp_pgno > pl_pgno)
            goto next_i;
          else
            goto remove_dl;
        }
      }
      dl->sorted = dpl_setlen(dl, w - 1);
      txn->tw.dirtyroom += r - w;
      tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
                       (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit));
      return;
    }
  }
}

void dpl_release_shadows(MDBX_txn *txn) {
  tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0);
  MDBX_env *env = txn->env;
  dpl_t *const dl = txn->tw.dirtylist;

  for (size_t i = 1; i <= dl->length; i++)
    page_shadow_release(env, dl->items[i].ptr, dpl_npages(dl, i));

  dpl_clear(dl);
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

__cold int dxb_read_header(MDBX_env *env, meta_t *dest, const int lck_exclusive, const mdbx_mode_t mode_bits) {
  memset(dest, 0, sizeof(meta_t));
  int rc = osal_filesize(env->lazy_fd, &env->dxb_mmap.filesize);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

  unaligned_poke_u64(4, dest->sign, DATASIGN_WEAK);
  rc = MDBX_CORRUPTED;

  /* Read twice all meta pages so we can find the latest one. */
  unsigned loop_limit = NUM_METAS * 2;
  /* We don't know the page size on first time. So, just guess it. */
  unsigned guess_pagesize = 0;
  for (unsigned loop_count = 0; loop_count < loop_limit; ++loop_count) {
    const unsigned meta_number = loop_count % NUM_METAS;
    const unsigned offset = (guess_pagesize             ? guess_pagesize
                             : (loop_count > NUM_METAS) ? env->ps
                                                        : globals.sys_pagesize) *
                            meta_number;

    char buffer[MDBX_MIN_PAGESIZE];
    unsigned retryleft = 42;
    while (1) {
      TRACE("reading meta[%d]: offset %u, bytes %u, retry-left %u", meta_number, offset, MDBX_MIN_PAGESIZE, retryleft);
      int err = osal_pread(env->lazy_fd, buffer, MDBX_MIN_PAGESIZE, offset);
      if (err == MDBX_ENODATA && offset == 0 && loop_count == 0 && env->dxb_mmap.filesize == 0 &&
          mode_bits /* non-zero for DB creation */ != 0) {
        NOTICE("read meta: empty file (%d, %s)", err, mdbx_strerror(err));
        return err;
      }
#if defined(_WIN32) || defined(_WIN64)
      if (err == ERROR_LOCK_VIOLATION) {
        SleepEx(0, true);
        err = osal_pread(env->lazy_fd, buffer, MDBX_MIN_PAGESIZE, offset);
        if (err == ERROR_LOCK_VIOLATION && --retryleft) {
          WARNING("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, mdbx_strerror(err));
          continue;
        }
      }
#endif /* Windows */
      if (err != MDBX_SUCCESS) {
        ERROR("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, mdbx_strerror(err));
        return err;
      }

      char again[MDBX_MIN_PAGESIZE];
      err = osal_pread(env->lazy_fd, again, MDBX_MIN_PAGESIZE, offset);
#if defined(_WIN32) || defined(_WIN64)
      if (err == ERROR_LOCK_VIOLATION) {
        SleepEx(0, true);
        err = osal_pread(env->lazy_fd, again, MDBX_MIN_PAGESIZE, offset);
        if (err == ERROR_LOCK_VIOLATION && --retryleft) {
          WARNING("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, mdbx_strerror(err));
          continue;
        }
      }
#endif /* Windows */
      if (err != MDBX_SUCCESS) {
        ERROR("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, mdbx_strerror(err));
        return err;
      }

      if (memcmp(buffer, again, MDBX_MIN_PAGESIZE) == 0 || --retryleft == 0)
        break;

      VERBOSE("meta[%u] was updated, re-read it", meta_number);
    }

    if (!retryleft) {
      ERROR("meta[%u] is too volatile, skip it", meta_number);
      continue;
    }

    page_t *const page = (page_t *)buffer;
    meta_t *const meta = page_meta(page);
    rc = meta_validate(env, meta, page, meta_number, &guess_pagesize);
    if (rc != MDBX_SUCCESS)
      continue;

    bool latch;
    if (env->stuck_meta >= 0)
      latch = (meta_number == (unsigned)env->stuck_meta);
    else if (meta_bootid_match(meta))
      latch = meta_choice_recent(meta->unsafe_txnid, SIGN_IS_STEADY(meta->unsafe_sign), dest->unsafe_txnid,
                                 SIGN_IS_STEADY(dest->unsafe_sign));
    else
      latch = meta_choice_steady(meta->unsafe_txnid, SIGN_IS_STEADY(meta->unsafe_sign), dest->unsafe_txnid,
                                 SIGN_IS_STEADY(dest->unsafe_sign));
    if (latch) {
      *dest = *meta;
      if (!lck_exclusive && !meta_is_steady(dest))
        loop_limit += 1; /* LY: should re-read to hush race with update */
      VERBOSE("latch meta[%u]", meta_number);
    }
  }

  if (dest->pagesize == 0 ||
      (env->stuck_meta < 0 && !(meta_is_steady(dest) || meta_weak_acceptable(env, dest, lck_exclusive)))) {
    ERROR("%s", "no usable meta-pages, database is corrupted");
    if (rc == MDBX_SUCCESS) {
      /* TODO: try to restore the database by fully checking b-tree structure
       * for the each meta page, if the corresponding option was given */
      return MDBX_CORRUPTED;
    }
    return rc;
  }

  return MDBX_SUCCESS;
}

__cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, const pgno_t size_pgno, pgno_t limit_pgno,
                      const enum resize_mode mode) {
  /* Acquire guard to avoid collision between read and write txns
   * around geo_in_bytes and dxb_mmap */
#if defined(_WIN32) || defined(_WIN64)
  imports.srwl_AcquireExclusive(&env->remap_guard);
  int rc = MDBX_SUCCESS;
  mdbx_handle_array_t *suspended = nullptr;
  mdbx_handle_array_t array_onstack;
#else
  int rc = osal_fastmutex_acquire(&env->remap_guard);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;
#endif

  const size_t prev_size = env->dxb_mmap.current;
  const size_t prev_limit = env->dxb_mmap.limit;
  const pgno_t prev_limit_pgno = bytes2pgno(env, prev_limit);
  eASSERT(env, limit_pgno >= size_pgno);
  eASSERT(env, size_pgno >= used_pgno);
  if (mode < explicit_resize && size_pgno <= prev_limit_pgno) {
    /* The actual mapsize may be less since the geo.upper may be changed
     * by other process. Avoids remapping until it necessary. */
    limit_pgno = prev_limit_pgno;
  }
  const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno);
  const size_t size_bytes = pgno_align2os_bytes(env, size_pgno);
  const void *const prev_map = env->dxb_mmap.base;

  VERBOSE("resize(env-flags 0x%x, mode %d) datafile/mapping: "
          "present %" PRIuPTR " -> %" PRIuPTR ", "
          "limit %" PRIuPTR " -> %" PRIuPTR,
          env->flags, mode, prev_size, size_bytes, prev_limit, limit_bytes);

  eASSERT(env, limit_bytes >= size_bytes);
  eASSERT(env, bytes2pgno(env, size_bytes) >= size_pgno);
  eASSERT(env, bytes2pgno(env, limit_bytes) >= limit_pgno);

  unsigned mresize_flags = env->flags & (MDBX_RDONLY | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC);
  if (mode >= impilict_shrink)
    mresize_flags |= txn_shrink_allowed;

  if (limit_bytes == env->dxb_mmap.limit && size_bytes == env->dxb_mmap.current && size_bytes == env->dxb_mmap.filesize)
    goto bailout;

  /* При использовании MDBX_NOSTICKYTHREADS с транзакциями могут работать любые
   * потоки и у нас нет информации о том, какие именно. Поэтому нет возможности
   * выполнить remap-действия требующие приостановки работающих с БД потоков. */
  if ((env->flags & MDBX_NOSTICKYTHREADS) == 0) {
#if defined(_WIN32) || defined(_WIN64)
    if ((size_bytes < env->dxb_mmap.current && mode > implicit_grow) || limit_bytes != env->dxb_mmap.limit) {
      /* 1) Windows allows only extending a read-write section, but not a
       *    corresponding mapped view. Therefore in other cases we must suspend
       *    the local threads for safe remap.
       * 2) At least on Windows 10 1803 the entire mapped section is unavailable
       *    for short time during NtExtendSection() or VirtualAlloc() execution.
       * 3) Under Wine runtime environment on Linux a section extending is not
       *    supported.
       *
       * THEREFORE LOCAL THREADS SUSPENDING IS ALWAYS REQUIRED! */
      array_onstack.limit = ARRAY_LENGTH(array_onstack.handles);
      array_onstack.count = 0;
      suspended = &array_onstack;
      rc = osal_suspend_threads_before_remap(env, &suspended);
      if (rc != MDBX_SUCCESS) {
        ERROR("failed suspend-for-remap: errcode %d", rc);
        goto bailout;
      }
      mresize_flags |=
          (mode < explicit_resize) ? MDBX_MRESIZE_MAY_UNMAP : MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE;
    }
#else  /* Windows */
    lck_t *const lck = env->lck_mmap.lck;
    if (mode == explicit_resize && limit_bytes != env->dxb_mmap.limit) {
      mresize_flags |= MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE;
      if (lck) {
        int err = lck_rdt_lock(env) /* lock readers table until remap done */;
        if (unlikely(MDBX_IS_ERROR(err))) {
          rc = err;
          goto bailout;
        }

        /* looking for readers from this process */
        const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
        eASSERT(env, mode == explicit_resize);
        for (size_t i = 0; i < snap_nreaders; ++i) {
          if (lck->rdt[i].pid.weak == env->pid && lck->rdt[i].tid.weak != osal_thread_self()) {
            /* the base address of the mapping can't be changed since
             * the other reader thread from this process exists. */
            lck_rdt_unlock(env);
            mresize_flags &= ~(MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE);
            break;
          }
        }
      }
    }
#endif /* ! Windows */
  }

  const pgno_t aligned_munlock_pgno =
      (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) ? 0 : bytes2pgno(env, size_bytes);
  if (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) {
    mincore_clean_cache(env);
    if ((env->flags & MDBX_WRITEMAP) && env->lck->unsynced_pages.weak) {
#if MDBX_ENABLE_PGOP_STAT
      env->lck->pgops.msync.weak += 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
      rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, used_pgno), MDBX_SYNC_NONE);
      if (unlikely(rc != MDBX_SUCCESS))
        goto bailout;
    }
  }
  munlock_after(env, aligned_munlock_pgno, size_bytes);

  if (size_bytes < prev_size && mode > implicit_grow) {
    NOTICE("resize-MADV_%s %u..%u", (env->flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED", size_pgno,
           bytes2pgno(env, prev_size));
    const uint32_t munlocks_before = atomic_load32(&env->lck->mlcnt[1], mo_Relaxed);
    rc = MDBX_RESULT_TRUE;
#if defined(MADV_REMOVE)
    if (env->flags & MDBX_WRITEMAP)
      rc = madvise(ptr_disp(env->dxb_mmap.base, size_bytes), prev_size - size_bytes, MADV_REMOVE)
               ? ignore_enosys_and_eagain(errno)
               : MDBX_SUCCESS;
#endif /* MADV_REMOVE */
#if defined(MADV_DONTNEED)
    if (rc == MDBX_RESULT_TRUE)
      rc = madvise(ptr_disp(env->dxb_mmap.base, size_bytes), prev_size - size_bytes, MADV_DONTNEED)
               ? ignore_enosys_and_eagain(errno)
               : MDBX_SUCCESS;
#elif defined(POSIX_MADV_DONTNEED)
    if (rc == MDBX_RESULT_TRUE)
      rc = ignore_enosys(
          posix_madvise(ptr_disp(env->dxb_mmap.base, size_bytes), prev_size - size_bytes, POSIX_MADV_DONTNEED));
#elif defined(POSIX_FADV_DONTNEED)
    if (rc == MDBX_RESULT_TRUE)
      rc = ignore_enosys(posix_fadvise(env->lazy_fd, size_bytes, prev_size - size_bytes, POSIX_FADV_DONTNEED));
#endif /* MADV_DONTNEED */
    if (unlikely(MDBX_IS_ERROR(rc))) {
      const uint32_t mlocks_after = atomic_load32(&env->lck->mlcnt[0], mo_Relaxed);
      if (rc == MDBX_EINVAL) {
        const int severity = (mlocks_after - munlocks_before) ? MDBX_LOG_NOTICE : MDBX_LOG_WARN;
        if (LOG_ENABLED(severity))
          debug_log(severity, __func__, __LINE__,
                    "%s-madvise: ignore EINVAL (%d) since some pages maybe "
                    "locked (%u/%u mlcnt-processes)",
                    "resize", rc, mlocks_after, munlocks_before);
      } else {
        ERROR("%s-madvise(%s, %zu, +%zu), %u/%u mlcnt-processes, err %d", "mresize", "DONTNEED", size_bytes,
              prev_size - size_bytes, mlocks_after, munlocks_before, rc);
        goto bailout;
      }
    } else
      env->lck->discarded_tail.weak = size_pgno;
  }

  rc = osal_mresize(mresize_flags, &env->dxb_mmap, size_bytes, limit_bytes);
  eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current);

  if (rc == MDBX_SUCCESS) {
    eASSERT(env, limit_bytes == env->dxb_mmap.limit);
    eASSERT(env, size_bytes <= env->dxb_mmap.filesize);
    if (mode == explicit_resize)
      eASSERT(env, size_bytes == env->dxb_mmap.current);
    else
      eASSERT(env, size_bytes <= env->dxb_mmap.current);
    env->lck->discarded_tail.weak = size_pgno;
    const bool readahead =
        !(env->flags & MDBX_NORDAHEAD) && mdbx_is_readahead_reasonable(size_bytes, -(intptr_t)prev_size);
    const bool force = limit_bytes != prev_limit || env->dxb_mmap.base != prev_map
#if defined(_WIN32) || defined(_WIN64)
                       || prev_size > size_bytes
#endif /* Windows */
        ;
    rc = dxb_set_readahead(env, size_pgno, readahead, force);
  }

bailout:
  if (rc == MDBX_SUCCESS) {
    eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current);
    eASSERT(env, limit_bytes == env->dxb_mmap.limit);
    eASSERT(env, size_bytes <= env->dxb_mmap.filesize);
    if (mode == explicit_resize)
      eASSERT(env, size_bytes == env->dxb_mmap.current);
    else
      eASSERT(env, size_bytes <= env->dxb_mmap.current);
    /* update env-geo to avoid influences */
    env->geo_in_bytes.now = env->dxb_mmap.current;
    env->geo_in_bytes.upper = env->dxb_mmap.limit;
    env_options_adjust_defaults(env);
#ifdef ENABLE_MEMCHECK
    if (prev_limit != env->dxb_mmap.limit || prev_map != env->dxb_mmap.base) {
      VALGRIND_DISCARD(env->valgrind_handle);
      env->valgrind_handle = 0;
      if (env->dxb_mmap.limit)
        env->valgrind_handle = VALGRIND_CREATE_BLOCK(env->dxb_mmap.base, env->dxb_mmap.limit, "mdbx");
    }
#endif /* ENABLE_MEMCHECK */
  } else {
    if (rc != MDBX_UNABLE_EXTEND_MAPSIZE && rc != MDBX_EPERM) {
      ERROR("failed resize datafile/mapping: "
            "present %" PRIuPTR " -> %" PRIuPTR ", "
            "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
            prev_size, size_bytes, prev_limit, limit_bytes, rc);
    } else {
      WARNING("unable resize datafile/mapping: "
              "present %" PRIuPTR " -> %" PRIuPTR ", "
              "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
              prev_size, size_bytes, prev_limit, limit_bytes, rc);
      eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current);
    }
    if (!env->dxb_mmap.base) {
      env->flags |= ENV_FATAL_ERROR;
      if (env->txn)
        env->txn->flags |= MDBX_TXN_ERROR;
      rc = MDBX_PANIC;
    }
  }

#if defined(_WIN32) || defined(_WIN64)
  int err = MDBX_SUCCESS;
  imports.srwl_ReleaseExclusive(&env->remap_guard);
  if (suspended) {
    err = osal_resume_threads_after_remap(suspended);
    if (suspended != &array_onstack)
      osal_free(suspended);
  }
#else
  if (env->lck_mmap.lck && (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) != 0)
    lck_rdt_unlock(env);
  int err = osal_fastmutex_release(&env->remap_guard);
#endif /* Windows */
  if (err != MDBX_SUCCESS) {
    FATAL("failed resume-after-remap: errcode %d", err);
    return MDBX_PANIC;
  }
  return rc;
}
#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)
void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn) {
#if !defined(__SANITIZE_ADDRESS__)
  if (!RUNNING_ON_VALGRIND)
    return;
#endif
  if (txn) { /* transaction start */
    if (env->poison_edge < txn->geo.first_unallocated)
      env->poison_edge = txn->geo.first_unallocated;
    VALGRIND_MAKE_MEM_DEFINED(env->dxb_mmap.base, pgno2bytes(env, txn->geo.first_unallocated));
    MDBX_ASAN_UNPOISON_MEMORY_REGION(env->dxb_mmap.base, pgno2bytes(env, txn->geo.first_unallocated));
    /* don't touch more, it should be already poisoned */
  } else { /* transaction end */
    bool should_unlock = false;
    pgno_t last = MAX_PAGENO + 1;
    if (env->pid != osal_getpid()) {
      /* resurrect after fork */
      return;
    } else if (env_owned_wrtxn(env)) {
      /* inside write-txn */
      last = meta_recent(env, &env->basal_txn->tw.troika).ptr_v->geometry.first_unallocated;
    } else if (env->flags & MDBX_RDONLY) {
      /* read-only mode, no write-txn, no wlock mutex */
      last = NUM_METAS;
    } else if (lck_txn_lock(env, true) == MDBX_SUCCESS) {
      /* no write-txn */
      last = NUM_METAS;
      should_unlock = true;
    } else {
      /* write txn is running, therefore shouldn't poison any memory range */
      return;
    }

    last = mvcc_largest_this(env, last);
    const pgno_t edge = env->poison_edge;
    if (edge > last) {
      eASSERT(env, last >= NUM_METAS);
      env->poison_edge = last;
      VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->dxb_mmap.base, pgno2bytes(env, last)), pgno2bytes(env, edge - last));
      MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->dxb_mmap.base, pgno2bytes(env, last)), pgno2bytes(env, edge - last));
    }
    if (should_unlock)
      lck_txn_unlock(env);
  }
}
#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */

/* Turn on/off readahead. It's harmful when the DB is larger than RAM. */
__cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, const bool enable, const bool force_whole) {
  eASSERT(env, edge >= NUM_METAS && edge <= MAX_PAGENO + 1);
  eASSERT(env, (enable & 1) == (enable != 0));
  const bool toggle = force_whole || ((enable ^ env->lck->readahead_anchor) & 1) || !env->lck->readahead_anchor;
  const pgno_t prev_edge = env->lck->readahead_anchor >> 1;
  const size_t limit = env->dxb_mmap.limit;
  size_t offset = toggle ? 0 : pgno_align2os_bytes(env, (prev_edge < edge) ? prev_edge : edge);
  offset = (offset < limit) ? offset : limit;

  size_t length = pgno_align2os_bytes(env, (prev_edge < edge) ? edge : prev_edge);
  length = (length < limit) ? length : limit;
  length -= offset;

  eASSERT(env, 0 <= (intptr_t)length);
  if (length == 0)
    return MDBX_SUCCESS;

  NOTICE("readahead %s %u..%u", enable ? "ON" : "OFF", bytes2pgno(env, offset), bytes2pgno(env, offset + length));

#if defined(F_RDAHEAD)
  if (toggle && unlikely(fcntl(env->lazy_fd, F_RDAHEAD, enable) == -1))
    return errno;
#endif /* F_RDAHEAD */

  int err;
  void *const ptr = ptr_disp(env->dxb_mmap.base, offset);
  if (enable) {
#if defined(MADV_NORMAL)
    err = madvise(ptr, length, MADV_NORMAL) ? ignore_enosys_and_eagain(errno) : MDBX_SUCCESS;
    if (unlikely(MDBX_IS_ERROR(err)))
      return err;
#elif defined(POSIX_MADV_NORMAL)
    err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_NORMAL));
    if (unlikely(MDBX_IS_ERROR(err)))
      return err;
#elif defined(POSIX_FADV_NORMAL) && defined(POSIX_FADV_WILLNEED)
    err = ignore_enosys(posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_NORMAL));
    if (unlikely(MDBX_IS_ERROR(err)))
      return err;
#elif defined(_WIN32) || defined(_WIN64)
    /* no madvise on Windows */
#else
#warning "FIXME"
#endif
    if (toggle) {
      /* NOTE: Seems there is a bug in the Mach/Darwin/OSX kernel,
       * because MADV_WILLNEED with offset != 0 may cause SIGBUS
       * on following access to the hinted region.
       * 19.6.0 Darwin Kernel Version 19.6.0: Tue Jan 12 22:13:05 PST 2021;
       * root:xnu-6153.141.16~1/RELEASE_X86_64 x86_64 */
#if defined(F_RDADVISE)
      struct radvisory hint;
      hint.ra_offset = offset;
      hint.ra_count = unlikely(length > INT_MAX && sizeof(length) > sizeof(hint.ra_count)) ? INT_MAX : (int)length;
      (void)/* Ignore ENOTTY for DB on the ram-disk and so on */ fcntl(env->lazy_fd, F_RDADVISE, &hint);
#elif defined(MADV_WILLNEED)
      err = madvise(ptr, length, MADV_WILLNEED) ? ignore_enosys_and_eagain(errno) : MDBX_SUCCESS;
      if (unlikely(MDBX_IS_ERROR(err)))
        return err;
#elif defined(POSIX_MADV_WILLNEED)
      err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_WILLNEED));
      if (unlikely(MDBX_IS_ERROR(err)))
        return err;
#elif defined(_WIN32) || defined(_WIN64)
      if (imports.PrefetchVirtualMemory) {
        WIN32_MEMORY_RANGE_ENTRY hint;
        hint.VirtualAddress = ptr;
        hint.NumberOfBytes = length;
        (void)imports.PrefetchVirtualMemory(GetCurrentProcess(), 1, &hint, 0);
      }
#elif defined(POSIX_FADV_WILLNEED)
      err = ignore_enosys(posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_WILLNEED));
      if (unlikely(MDBX_IS_ERROR(err)))
        return err;
#else
#warning "FIXME"
#endif
    }
  } else {
    mincore_clean_cache(env);
#if defined(MADV_RANDOM)
    err = madvise(ptr, length, MADV_RANDOM) ? ignore_enosys_and_eagain(errno) : MDBX_SUCCESS;
    if (unlikely(MDBX_IS_ERROR(err)))
      return err;
#elif defined(POSIX_MADV_RANDOM)
    err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_RANDOM));
    if (unlikely(MDBX_IS_ERROR(err)))
      return err;
#elif defined(POSIX_FADV_RANDOM)
    err = ignore_enosys(posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_RANDOM));
    if (unlikely(MDBX_IS_ERROR(err)))
      return err;
#elif defined(_WIN32) || defined(_WIN64)
    /* no madvise on Windows */
#else
#warning "FIXME"
#endif /* MADV_RANDOM */
  }

  env->lck->readahead_anchor = (enable & 1) + (edge << 1);
  err = MDBX_SUCCESS;
  return err;
}

__cold int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bits) {
  meta_t header;
  eASSERT(env, !(env->flags & ENV_ACTIVE));
  int rc = MDBX_RESULT_FALSE;
  int err = dxb_read_header(env, &header, lck_rc, mode_bits);
  if (unlikely(err != MDBX_SUCCESS)) {
    if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE || err != MDBX_ENODATA || (env->flags & MDBX_RDONLY) != 0 ||
        /* recovery mode */ env->stuck_meta >= 0)
      return err;

    DEBUG("%s", "create new database");
    rc = /* new database */ MDBX_RESULT_TRUE;

    if (!env->geo_in_bytes.now) {
      /* set defaults if not configured */
      err = mdbx_env_set_geometry(env, 0, -1, -1, -1, -1, -1);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
    }

    err = env_page_auxbuffer(env);
    if (unlikely(err != MDBX_SUCCESS))
      return err;

    header = *meta_init_triplet(env, env->page_auxbuf);
    err = osal_pwrite(env->lazy_fd, env->page_auxbuf, env->ps * (size_t)NUM_METAS, 0);
    if (unlikely(err != MDBX_SUCCESS))
      return err;

    err = osal_fsetsize(env->lazy_fd, env->dxb_mmap.filesize = env->dxb_mmap.current = env->geo_in_bytes.now);
    if (unlikely(err != MDBX_SUCCESS))
      return err;

#ifndef NDEBUG /* just for checking */
    err = dxb_read_header(env, &header, lck_rc, mode_bits);
    if (unlikely(err != MDBX_SUCCESS))
      return err;
#endif
  }

  VERBOSE("header: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO
          " +%u -%u, txn_id %" PRIaTXN ", %s",
          header.trees.main.root, header.trees.gc.root, header.geometry.lower, header.geometry.first_unallocated,
          header.geometry.now, header.geometry.upper, pv2pages(header.geometry.grow_pv),
          pv2pages(header.geometry.shrink_pv), unaligned_peek_u64(4, header.txnid_a), durable_caption(&header));

  if (unlikely((header.trees.gc.flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY)) {
    ERROR("unexpected/invalid db-flags 0x%x for %s", header.trees.gc.flags, "GC/FreeDB");
    return MDBX_INCOMPATIBLE;
  }
  env->dbs_flags[FREE_DBI] = DB_VALID | MDBX_INTEGERKEY;
  env->kvs[FREE_DBI].clc.k.cmp = cmp_int_align4; /* aligned MDBX_INTEGERKEY */
  env->kvs[FREE_DBI].clc.k.lmax = env->kvs[FREE_DBI].clc.k.lmin = 8;
  env->kvs[FREE_DBI].clc.v.cmp = cmp_lenfast;
  env->kvs[FREE_DBI].clc.v.lmin = 4;
  env->kvs[FREE_DBI].clc.v.lmax = mdbx_env_get_maxvalsize_ex(env, MDBX_INTEGERKEY);

  if (env->ps != header.pagesize)
    env_setup_pagesize(env, header.pagesize);
  if ((env->flags & MDBX_RDONLY) == 0) {
    err = env_page_auxbuffer(env);
    if (unlikely(err != MDBX_SUCCESS))
      return err;
  }

  size_t expected_filesize = 0;
  const size_t used_bytes = pgno2bytes(env, header.geometry.first_unallocated);
  const size_t used_aligned2os_bytes = ceil_powerof2(used_bytes, globals.sys_pagesize);
  if ((env->flags & MDBX_RDONLY)    /* readonly */
      || lck_rc != MDBX_RESULT_TRUE /* not exclusive */
      || /* recovery mode */ env->stuck_meta >= 0) {
    /* use present params from db */
    const size_t pagesize = header.pagesize;
    err = mdbx_env_set_geometry(env, header.geometry.lower * pagesize, header.geometry.now * pagesize,
                                header.geometry.upper * pagesize, pv2pages(header.geometry.grow_pv) * pagesize,
                                pv2pages(header.geometry.shrink_pv) * pagesize, header.pagesize);
    if (unlikely(err != MDBX_SUCCESS)) {
      ERROR("%s: err %d", "could not apply geometry from db", err);
      return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err;
    }
  } else if (env->geo_in_bytes.now) {
    /* silently growth to last used page */
    if (env->geo_in_bytes.now < used_aligned2os_bytes)
      env->geo_in_bytes.now = used_aligned2os_bytes;
    if (env->geo_in_bytes.upper < used_aligned2os_bytes)
      env->geo_in_bytes.upper = used_aligned2os_bytes;

    /* apply preconfigured params, but only if substantial changes:
     *  - upper or lower limit changes
     *  - shrink threshold or growth step
     * But ignore change just a 'now/current' size. */
    if (bytes_align2os_bytes(env, env->geo_in_bytes.upper) != pgno2bytes(env, header.geometry.upper) ||
        bytes_align2os_bytes(env, env->geo_in_bytes.lower) != pgno2bytes(env, header.geometry.lower) ||
        bytes_align2os_bytes(env, env->geo_in_bytes.shrink) != pgno2bytes(env, pv2pages(header.geometry.shrink_pv)) ||
        bytes_align2os_bytes(env, env->geo_in_bytes.grow) != pgno2bytes(env, pv2pages(header.geometry.grow_pv))) {

      if (env->geo_in_bytes.shrink && env->geo_in_bytes.now > used_bytes)
        /* pre-shrink if enabled */
        env->geo_in_bytes.now = used_bytes + env->geo_in_bytes.shrink - used_bytes % env->geo_in_bytes.shrink;

      /* сейчас БД еще не открыта, поэтому этот вызов не изменит геометрию, но проверит и скорректирует параметры
       * с учетом реального размера страницы. */
      err = mdbx_env_set_geometry(env, env->geo_in_bytes.lower, env->geo_in_bytes.now, env->geo_in_bytes.upper,
                                  env->geo_in_bytes.grow, env->geo_in_bytes.shrink, header.pagesize);
      if (unlikely(err != MDBX_SUCCESS)) {
        ERROR("%s: err %d", "could not apply preconfigured db-geometry", err);
        return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err;
      }

      /* altering fields to match geometry given from user */
      expected_filesize = pgno_align2os_bytes(env, header.geometry.now);
      header.geometry.now = bytes2pgno(env, env->geo_in_bytes.now);
      header.geometry.lower = bytes2pgno(env, env->geo_in_bytes.lower);
      header.geometry.upper = bytes2pgno(env, env->geo_in_bytes.upper);
      header.geometry.grow_pv = pages2pv(bytes2pgno(env, env->geo_in_bytes.grow));
      header.geometry.shrink_pv = pages2pv(bytes2pgno(env, env->geo_in_bytes.shrink));

      VERBOSE("amending: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO
              " +%u -%u, txn_id %" PRIaTXN ", %s",
              header.trees.main.root, header.trees.gc.root, header.geometry.lower, header.geometry.first_unallocated,
              header.geometry.now, header.geometry.upper, pv2pages(header.geometry.grow_pv),
              pv2pages(header.geometry.shrink_pv), unaligned_peek_u64(4, header.txnid_a), durable_caption(&header));
    } else {
      /* fetch back 'now/current' size, since it was ignored during comparison and may differ. */
      env->geo_in_bytes.now = pgno_align2os_bytes(env, header.geometry.now);
    }
    ENSURE(env, header.geometry.now >= header.geometry.first_unallocated);
  } else {
    /* geo-params are not pre-configured by user, get current values from the meta. */
    env->geo_in_bytes.now = pgno2bytes(env, header.geometry.now);
    env->geo_in_bytes.lower = pgno2bytes(env, header.geometry.lower);
    env->geo_in_bytes.upper = pgno2bytes(env, header.geometry.upper);
    env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(header.geometry.grow_pv));
    env->geo_in_bytes.shrink = pgno2bytes(env, pv2pages(header.geometry.shrink_pv));
  }

  ENSURE(env, pgno_align2os_bytes(env, header.geometry.now) == env->geo_in_bytes.now);
  ENSURE(env, env->geo_in_bytes.now >= used_bytes);
  if (!expected_filesize)
    expected_filesize = env->geo_in_bytes.now;
  const uint64_t filesize_before = env->dxb_mmap.filesize;
  if (unlikely(filesize_before != env->geo_in_bytes.now)) {
    if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) {
      VERBOSE("filesize mismatch (expect %" PRIuPTR "b/%" PRIaPGNO "p, have %" PRIu64 "b/%" PRIu64
              "p), assume other process working",
              env->geo_in_bytes.now, bytes2pgno(env, env->geo_in_bytes.now), filesize_before,
              filesize_before >> env->ps2ln);
    } else {
      if (filesize_before != expected_filesize)
        WARNING("filesize mismatch (expect %" PRIuSIZE "b/%" PRIaPGNO "p, have %" PRIu64 "b/%" PRIu64 "p)",
                expected_filesize, bytes2pgno(env, expected_filesize), filesize_before, filesize_before >> env->ps2ln);
      if (filesize_before < used_bytes) {
        ERROR("last-page beyond end-of-file (last %" PRIaPGNO ", have %" PRIaPGNO ")",
              header.geometry.first_unallocated, bytes2pgno(env, (size_t)filesize_before));
        return MDBX_CORRUPTED;
      }

      if (env->flags & MDBX_RDONLY) {
        if (filesize_before & (globals.sys_pagesize - 1)) {
          ERROR("filesize should be rounded-up to system page size %u", globals.sys_pagesize);
          return MDBX_WANNA_RECOVERY;
        }
        WARNING("%s", "ignore filesize mismatch in readonly-mode");
      } else {
        VERBOSE("will resize datafile to %" PRIuSIZE " bytes, %" PRIaPGNO " pages", env->geo_in_bytes.now,
                bytes2pgno(env, env->geo_in_bytes.now));
      }
    }
  }

  VERBOSE("current boot-id %" PRIx64 "-%" PRIx64 " (%savailable)", globals.bootid.x, globals.bootid.y,
          (globals.bootid.x | globals.bootid.y) ? "" : "not-");

  /* calculate readahead hint before mmap with zero redundant pages */
  const bool readahead =
      !(env->flags & MDBX_NORDAHEAD) && mdbx_is_readahead_reasonable(used_bytes, 0) == MDBX_RESULT_TRUE;

  err = osal_mmap(env->flags, &env->dxb_mmap, env->geo_in_bytes.now, env->geo_in_bytes.upper,
                  (lck_rc && env->stuck_meta < 0) ? MMAP_OPTION_SETLENGTH : 0, env->pathname.dxb);
  if (unlikely(err != MDBX_SUCCESS))
    return err;

#if defined(MADV_DONTDUMP)
  err =
      madvise(env->dxb_mmap.base, env->dxb_mmap.limit, MADV_DONTDUMP) ? ignore_enosys_and_eagain(errno) : MDBX_SUCCESS;
  if (unlikely(MDBX_IS_ERROR(err)))
    return err;
#endif /* MADV_DONTDUMP */
#if defined(MADV_DODUMP)
  if (globals.runtime_flags & MDBX_DBG_DUMP) {
    const size_t meta_length_aligned2os = pgno_align2os_bytes(env, NUM_METAS);
    err = madvise(env->dxb_mmap.base, meta_length_aligned2os, MADV_DODUMP) ? ignore_enosys_and_eagain(errno)
                                                                           : MDBX_SUCCESS;
    if (unlikely(MDBX_IS_ERROR(err)))
      return err;
  }
#endif /* MADV_DODUMP */

#ifdef ENABLE_MEMCHECK
  env->valgrind_handle = VALGRIND_CREATE_BLOCK(env->dxb_mmap.base, env->dxb_mmap.limit, "mdbx");
#endif /* ENABLE_MEMCHECK */

  eASSERT(env, used_bytes >= pgno2bytes(env, NUM_METAS) && used_bytes <= env->dxb_mmap.limit);
#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)
  if (env->dxb_mmap.filesize > used_bytes && env->dxb_mmap.filesize < env->dxb_mmap.limit) {
    VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->dxb_mmap.base, used_bytes), env->dxb_mmap.filesize - used_bytes);
    MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->dxb_mmap.base, used_bytes), env->dxb_mmap.filesize - used_bytes);
  }
  env->poison_edge =
      bytes2pgno(env, (env->dxb_mmap.filesize < env->dxb_mmap.limit) ? env->dxb_mmap.filesize : env->dxb_mmap.limit);
#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */

  troika_t troika = meta_tap(env);
#if MDBX_DEBUG
  meta_troika_dump(env, &troika);
#endif
  //-------------------------------- validate/rollback head & steady meta-pages
  if (unlikely(env->stuck_meta >= 0)) {
    /* recovery mode */
    meta_t clone;
    meta_t const *const target = METAPAGE(env, env->stuck_meta);
    err = meta_validate_copy(env, target, &clone);
    if (unlikely(err != MDBX_SUCCESS)) {
      ERROR("target meta[%u] is corrupted", bytes2pgno(env, ptr_dist(data_page(target), env->dxb_mmap.base)));
      meta_troika_dump(env, &troika);
      return MDBX_CORRUPTED;
    }
  } else /* not recovery mode */
    while (1) {
      const unsigned meta_clash_mask = meta_eq_mask(&troika);
      if (unlikely(meta_clash_mask)) {
        ERROR("meta-pages are clashed: mask 0x%d", meta_clash_mask);
        meta_troika_dump(env, &troika);
        return MDBX_CORRUPTED;
      }

      if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) {
        /* non-exclusive mode,
         * meta-pages should be validated by a first process opened the DB */
        if (troika.recent == troika.prefer_steady)
          break;

        if (!env->lck_mmap.lck) {
          /* LY: without-lck (read-only) mode, so it is impossible that other
           * process made weak checkpoint. */
          ERROR("%s", "without-lck, unable recovery/rollback");
          meta_troika_dump(env, &troika);
          return MDBX_WANNA_RECOVERY;
        }

        /* LY: assume just have a collision with other running process,
         *     or someone make a weak checkpoint */
        VERBOSE("%s", "assume collision or online weak checkpoint");
        break;
      }
      eASSERT(env, lck_rc == MDBX_RESULT_TRUE);
      /* exclusive mode */

      const meta_ptr_t recent = meta_recent(env, &troika);
      const meta_ptr_t prefer_steady = meta_prefer_steady(env, &troika);
      meta_t clone;
      if (prefer_steady.is_steady) {
        err = meta_validate_copy(env, prefer_steady.ptr_c, &clone);
        if (unlikely(err != MDBX_SUCCESS)) {
          ERROR("meta[%u] with %s txnid %" PRIaTXN " is corrupted, %s needed",
                bytes2pgno(env, ptr_dist(prefer_steady.ptr_c, env->dxb_mmap.base)), "steady", prefer_steady.txnid,
                "manual recovery");
          meta_troika_dump(env, &troika);
          return MDBX_CORRUPTED;
        }
        if (prefer_steady.ptr_c == recent.ptr_c)
          break;
      }

      const pgno_t pgno = bytes2pgno(env, ptr_dist(recent.ptr_c, env->dxb_mmap.base));
      const bool last_valid = meta_validate_copy(env, recent.ptr_c, &clone) == MDBX_SUCCESS;
      eASSERT(env, !prefer_steady.is_steady || recent.txnid != prefer_steady.txnid);
      if (unlikely(!last_valid)) {
        if (unlikely(!prefer_steady.is_steady)) {
          ERROR("%s for open or automatic rollback, %s", "there are no suitable meta-pages",
                "manual recovery is required");
          meta_troika_dump(env, &troika);
          return MDBX_CORRUPTED;
        }
        WARNING("meta[%u] with last txnid %" PRIaTXN " is corrupted, rollback needed", pgno, recent.txnid);
        meta_troika_dump(env, &troika);
        goto purge_meta_head;
      }

      if (meta_bootid_match(recent.ptr_c)) {
        if (env->flags & MDBX_RDONLY) {
          ERROR("%s, but boot-id(%016" PRIx64 "-%016" PRIx64 ") is MATCH: "
                "rollback NOT needed, steady-sync NEEDED%s",
                "opening after an unclean shutdown", globals.bootid.x, globals.bootid.y,
                ", but unable in read-only mode");
          meta_troika_dump(env, &troika);
          return MDBX_WANNA_RECOVERY;
        }
        WARNING("%s, but boot-id(%016" PRIx64 "-%016" PRIx64 ") is MATCH: "
                "rollback NOT needed, steady-sync NEEDED%s",
                "opening after an unclean shutdown", globals.bootid.x, globals.bootid.y, "");
        header = clone;
        env->lck->unsynced_pages.weak = header.geometry.first_unallocated;
        if (!env->lck->eoos_timestamp.weak)
          env->lck->eoos_timestamp.weak = osal_monotime();
        break;
      }
      if (unlikely(!prefer_steady.is_steady)) {
        ERROR("%s, but %s for automatic rollback: %s", "opening after an unclean shutdown",
              "there are no suitable meta-pages", "manual recovery is required");
        meta_troika_dump(env, &troika);
        return MDBX_CORRUPTED;
      }
      if (env->flags & MDBX_RDONLY) {
        ERROR("%s and rollback needed: (from head %" PRIaTXN " to steady %" PRIaTXN ")%s",
              "opening after an unclean shutdown", recent.txnid, prefer_steady.txnid, ", but unable in read-only mode");
        meta_troika_dump(env, &troika);
        return MDBX_WANNA_RECOVERY;
      }

    purge_meta_head:
      NOTICE("%s and doing automatic rollback: "
             "purge%s meta[%u] with%s txnid %" PRIaTXN,
             "opening after an unclean shutdown", last_valid ? "" : " invalid", pgno, last_valid ? " weak" : "",
             recent.txnid);
      meta_troika_dump(env, &troika);
      ENSURE(env, prefer_steady.is_steady);
      err = meta_override(env, pgno, 0, last_valid ? recent.ptr_c : prefer_steady.ptr_c);
      if (err) {
        ERROR("rollback: overwrite meta[%u] with txnid %" PRIaTXN ", error %d", pgno, recent.txnid, err);
        return err;
      }
      troika = meta_tap(env);
      ENSURE(env, 0 == meta_txnid(recent.ptr_v));
      ENSURE(env, 0 == meta_eq_mask(&troika));
    }

  if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) {
    //-------------------------------------------------- shrink DB & update geo
    /* re-check size after mmap */
    if ((env->dxb_mmap.current & (globals.sys_pagesize - 1)) != 0 || env->dxb_mmap.current < used_bytes) {
      ERROR("unacceptable/unexpected datafile size %" PRIuPTR, env->dxb_mmap.current);
      return MDBX_PROBLEM;
    }
    if (env->dxb_mmap.current != env->geo_in_bytes.now) {
      header.geometry.now = bytes2pgno(env, env->dxb_mmap.current);
      NOTICE("need update meta-geo to filesize %" PRIuPTR " bytes, %" PRIaPGNO " pages", env->dxb_mmap.current,
             header.geometry.now);
    }

    const meta_ptr_t recent = meta_recent(env, &troika);
    if (/* не учитываем различия в geo.first_unallocated */
        header.geometry.grow_pv != recent.ptr_c->geometry.grow_pv ||
        header.geometry.shrink_pv != recent.ptr_c->geometry.shrink_pv ||
        header.geometry.lower != recent.ptr_c->geometry.lower ||
        header.geometry.upper != recent.ptr_c->geometry.upper || header.geometry.now != recent.ptr_c->geometry.now) {
      if ((env->flags & MDBX_RDONLY) != 0 ||
          /* recovery mode */ env->stuck_meta >= 0) {
        WARNING("skipped update meta.geo in %s mode: from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO
                "/s%u-g%u, to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u",
                (env->stuck_meta < 0) ? "read-only" : "recovery", recent.ptr_c->geometry.lower,
                recent.ptr_c->geometry.now, recent.ptr_c->geometry.upper, pv2pages(recent.ptr_c->geometry.shrink_pv),
                pv2pages(recent.ptr_c->geometry.grow_pv), header.geometry.lower, header.geometry.now,
                header.geometry.upper, pv2pages(header.geometry.shrink_pv), pv2pages(header.geometry.grow_pv));
      } else {
        const txnid_t next_txnid = safe64_txnid_next(recent.txnid);
        if (unlikely(next_txnid > MAX_TXNID)) {
          ERROR("txnid overflow, raise %d", MDBX_TXN_FULL);
          return MDBX_TXN_FULL;
        }
        NOTICE("updating meta.geo: "
               "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u (txn#%" PRIaTXN "), "
               "to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u (txn#%" PRIaTXN ")",
               recent.ptr_c->geometry.lower, recent.ptr_c->geometry.now, recent.ptr_c->geometry.upper,
               pv2pages(recent.ptr_c->geometry.shrink_pv), pv2pages(recent.ptr_c->geometry.grow_pv), recent.txnid,
               header.geometry.lower, header.geometry.now, header.geometry.upper, pv2pages(header.geometry.shrink_pv),
               pv2pages(header.geometry.grow_pv), next_txnid);

        ENSURE(env, header.unsafe_txnid == recent.txnid);
        meta_set_txnid(env, &header, next_txnid);
        err = dxb_sync_locked(env, env->flags | txn_shrink_allowed, &header, &troika);
        if (err) {
          ERROR("error %d, while updating meta.geo: "
                "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u (txn#%" PRIaTXN "), "
                "to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u (txn#%" PRIaTXN ")",
                err, recent.ptr_c->geometry.lower, recent.ptr_c->geometry.now, recent.ptr_c->geometry.upper,
                pv2pages(recent.ptr_c->geometry.shrink_pv), pv2pages(recent.ptr_c->geometry.grow_pv), recent.txnid,
                header.geometry.lower, header.geometry.now, header.geometry.upper, pv2pages(header.geometry.shrink_pv),
                pv2pages(header.geometry.grow_pv), header.unsafe_txnid);
          return err;
        }
      }
    }

    atomic_store32(&env->lck->discarded_tail, bytes2pgno(env, used_aligned2os_bytes), mo_Relaxed);

    if ((env->flags & MDBX_RDONLY) == 0 && env->stuck_meta < 0 &&
        (globals.runtime_flags & MDBX_DBG_DONT_UPGRADE) == 0) {
      for (unsigned n = 0; n < NUM_METAS; ++n) {
        meta_t *const meta = METAPAGE(env, n);
        if (unlikely(unaligned_peek_u64(4, &meta->magic_and_version) != MDBX_DATA_MAGIC) ||
            (meta->dxbid.x | meta->dxbid.y) == 0 || (meta->gc_flags & ~DB_PERSISTENT_FLAGS)) {
          const txnid_t txnid = meta_is_used(&troika, n) ? constmeta_txnid(meta) : 0;
          NOTICE("%s %s"
                 "meta[%u], txnid %" PRIaTXN,
                 "updating db-format/guid signature for", meta_is_steady(meta) ? "stead-" : "weak-", n, txnid);
          err = meta_override(env, n, txnid, meta);
          if (unlikely(err != MDBX_SUCCESS) &&
              /* Just ignore the MDBX_PROBLEM error, since here it is
               * returned only in case of the attempt to upgrade an obsolete
               * meta-page that is invalid for current state of a DB,
               * e.g. after shrinking DB file */
              err != MDBX_PROBLEM) {
            ERROR("%s meta[%u], txnid %" PRIaTXN ", error %d", "updating db-format signature for", n, txnid, err);
            return err;
          }
          troika = meta_tap(env);
        }
      }
    }
  } /* lck exclusive, lck_rc == MDBX_RESULT_TRUE */

  //---------------------------------------------------- setup madvise/readahead
  if (used_aligned2os_bytes < env->dxb_mmap.current) {
#if defined(MADV_REMOVE)
    if (lck_rc && (env->flags & MDBX_WRITEMAP) != 0 &&
        /* not recovery mode */ env->stuck_meta < 0) {
      NOTICE("open-MADV_%s %u..%u", "REMOVE (deallocate file space)", env->lck->discarded_tail.weak,
             bytes2pgno(env, env->dxb_mmap.current));
      err = madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), env->dxb_mmap.current - used_aligned2os_bytes,
                    MADV_REMOVE)
                ? ignore_enosys_and_eagain(errno)
                : MDBX_SUCCESS;
      if (unlikely(MDBX_IS_ERROR(err)))
        return err;
    }
#endif /* MADV_REMOVE */
#if defined(MADV_DONTNEED)
    NOTICE("open-MADV_%s %u..%u", "DONTNEED", env->lck->discarded_tail.weak, bytes2pgno(env, env->dxb_mmap.current));
    err = madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), env->dxb_mmap.current - used_aligned2os_bytes,
                  MADV_DONTNEED)
              ? ignore_enosys_and_eagain(errno)
              : MDBX_SUCCESS;
    if (unlikely(MDBX_IS_ERROR(err)))
      return err;
#elif defined(POSIX_MADV_DONTNEED)
    err = ignore_enosys(posix_madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes),
                                      env->dxb_mmap.current - used_aligned2os_bytes, POSIX_MADV_DONTNEED));
    if (unlikely(MDBX_IS_ERROR(err)))
      return err;
#elif defined(POSIX_FADV_DONTNEED)
    err = ignore_enosys(posix_fadvise(env->lazy_fd, used_aligned2os_bytes,
                                      env->dxb_mmap.current - used_aligned2os_bytes, POSIX_FADV_DONTNEED));
    if (unlikely(MDBX_IS_ERROR(err)))
      return err;
#endif /* MADV_DONTNEED */
  }

  err = dxb_set_readahead(env, bytes2pgno(env, used_bytes), readahead, true);
  if (unlikely(err != MDBX_SUCCESS))
    return err;

  return rc;
}

int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, troika_t *const troika) {
  eASSERT(env, ((env->flags ^ flags) & MDBX_WRITEMAP) == 0);
  eASSERT(env, pending->trees.gc.flags == MDBX_INTEGERKEY);
  eASSERT(env, check_table_flags(pending->trees.main.flags));
  const meta_t *const meta0 = METAPAGE(env, 0);
  const meta_t *const meta1 = METAPAGE(env, 1);
  const meta_t *const meta2 = METAPAGE(env, 2);
  const meta_ptr_t head = meta_recent(env, troika);
  int rc;

  eASSERT(env, pending < METAPAGE(env, 0) || pending > METAPAGE(env, NUM_METAS));
  eASSERT(env, (env->flags & (MDBX_RDONLY | ENV_FATAL_ERROR)) == 0);
  eASSERT(env, pending->geometry.first_unallocated <= pending->geometry.now);

  if (flags & MDBX_SAFE_NOSYNC) {
    /* Check auto-sync conditions */
    const pgno_t autosync_threshold = atomic_load32(&env->lck->autosync_threshold, mo_Relaxed);
    const uint64_t autosync_period = atomic_load64(&env->lck->autosync_period, mo_Relaxed);
    uint64_t eoos_timestamp;
    if ((autosync_threshold && atomic_load64(&env->lck->unsynced_pages, mo_Relaxed) >= autosync_threshold) ||
        (autosync_period && (eoos_timestamp = atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) &&
         osal_monotime() - eoos_timestamp >= autosync_period))
      flags &= MDBX_WRITEMAP | txn_shrink_allowed; /* force steady */
  }

  pgno_t shrink = 0;
  if (flags & txn_shrink_allowed) {
    const size_t prev_discarded_pgno = atomic_load32(&env->lck->discarded_tail, mo_Relaxed);
    if (prev_discarded_pgno < pending->geometry.first_unallocated)
      env->lck->discarded_tail.weak = pending->geometry.first_unallocated;
    else if (prev_discarded_pgno >= pending->geometry.first_unallocated + env->madv_threshold) {
      /* LY: check conditions to discard unused pages */
      const pgno_t largest_pgno =
          mvcc_snapshot_largest(env, (head.ptr_c->geometry.first_unallocated > pending->geometry.first_unallocated)
                                         ? head.ptr_c->geometry.first_unallocated
                                         : pending->geometry.first_unallocated);
      eASSERT(env, largest_pgno >= NUM_METAS);

#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)
      const pgno_t edge = env->poison_edge;
      if (edge > largest_pgno) {
        env->poison_edge = largest_pgno;
        VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->dxb_mmap.base, pgno2bytes(env, largest_pgno)),
                                   pgno2bytes(env, edge - largest_pgno));
        MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->dxb_mmap.base, pgno2bytes(env, largest_pgno)),
                                       pgno2bytes(env, edge - largest_pgno));
      }
#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */

#if defined(MADV_DONTNEED) || defined(POSIX_MADV_DONTNEED)
      const size_t discard_edge_pgno = pgno_align2os_pgno(env, largest_pgno);
      if (prev_discarded_pgno >= discard_edge_pgno + env->madv_threshold) {
        const size_t prev_discarded_bytes = pgno_align2os_bytes(env, prev_discarded_pgno);
        const size_t discard_edge_bytes = pgno2bytes(env, discard_edge_pgno);
        /* из-за выравнивания prev_discarded_bytes и discard_edge_bytes
         * могут быть равны */
        if (prev_discarded_bytes > discard_edge_bytes) {
          NOTICE("shrink-MADV_%s %zu..%zu", "DONTNEED", discard_edge_pgno, prev_discarded_pgno);
          munlock_after(env, discard_edge_pgno, bytes_align2os_bytes(env, env->dxb_mmap.current));
          const uint32_t munlocks_before = atomic_load32(&env->lck->mlcnt[1], mo_Relaxed);
#if defined(MADV_DONTNEED)
          int advise = MADV_DONTNEED;
#if defined(MADV_FREE) && 0 /* MADV_FREE works for only anonymous vma at the moment */
          if ((env->flags & MDBX_WRITEMAP) && global.linux_kernel_version > 0x04050000)
            advise = MADV_FREE;
#endif /* MADV_FREE */
          int err = madvise(ptr_disp(env->dxb_mmap.base, discard_edge_bytes), prev_discarded_bytes - discard_edge_bytes,
                            advise)
                        ? ignore_enosys_and_eagain(errno)
                        : MDBX_SUCCESS;
#else
          int err = ignore_enosys(posix_madvise(ptr_disp(env->dxb_mmap.base, discard_edge_bytes),
                                                prev_discarded_bytes - discard_edge_bytes, POSIX_MADV_DONTNEED));
#endif
          if (unlikely(MDBX_IS_ERROR(err))) {
            const uint32_t mlocks_after = atomic_load32(&env->lck->mlcnt[0], mo_Relaxed);
            if (err == MDBX_EINVAL) {
              const int severity = (mlocks_after - munlocks_before) ? MDBX_LOG_NOTICE : MDBX_LOG_WARN;
              if (LOG_ENABLED(severity))
                debug_log(severity, __func__, __LINE__,
                          "%s-madvise: ignore EINVAL (%d) since some pages maybe "
                          "locked (%u/%u mlcnt-processes)",
                          "shrink", err, mlocks_after, munlocks_before);
            } else {
              ERROR("%s-madvise(%s, %zu, +%zu), %u/%u mlcnt-processes, err %d", "shrink", "DONTNEED",
                    discard_edge_bytes, prev_discarded_bytes - discard_edge_bytes, mlocks_after, munlocks_before, err);
              return err;
            }
          } else
            env->lck->discarded_tail.weak = discard_edge_pgno;
        }
      }
#endif /* MADV_DONTNEED || POSIX_MADV_DONTNEED */

      /* LY: check conditions to shrink datafile */
      const pgno_t backlog_gap = 3 + pending->trees.gc.height * 3;
      pgno_t shrink_step = 0;
      if (pending->geometry.shrink_pv && pending->geometry.now - pending->geometry.first_unallocated >
                                             (shrink_step = pv2pages(pending->geometry.shrink_pv)) + backlog_gap) {
        if (pending->geometry.now > largest_pgno && pending->geometry.now - largest_pgno > shrink_step + backlog_gap) {
          const pgno_t aligner =
              pending->geometry.grow_pv ? /* grow_step */ pv2pages(pending->geometry.grow_pv) : shrink_step;
          const pgno_t with_backlog_gap = largest_pgno + backlog_gap;
          const pgno_t aligned =
              pgno_align2os_pgno(env, (size_t)with_backlog_gap + aligner - with_backlog_gap % aligner);
          const pgno_t bottom = (aligned > pending->geometry.lower) ? aligned : pending->geometry.lower;
          if (pending->geometry.now > bottom) {
            if (TROIKA_HAVE_STEADY(troika))
              /* force steady, but only if steady-checkpoint is present */
              flags &= MDBX_WRITEMAP | txn_shrink_allowed;
            shrink = pending->geometry.now - bottom;
            pending->geometry.now = bottom;
            if (unlikely(head.txnid == pending->unsafe_txnid)) {
              const txnid_t txnid = safe64_txnid_next(pending->unsafe_txnid);
              NOTICE("force-forward pending-txn %" PRIaTXN " -> %" PRIaTXN, pending->unsafe_txnid, txnid);
              ENSURE(env, !env->basal_txn || !env->txn);
              if (unlikely(txnid > MAX_TXNID)) {
                rc = MDBX_TXN_FULL;
                ERROR("txnid overflow, raise %d", rc);
                goto fail;
              }
              meta_set_txnid(env, pending, txnid);
              eASSERT(env, coherency_check_meta(env, pending, true));
            }
          }
        }
      }
    }
  }

  /* LY: step#1 - sync previously written/updated data-pages */
  rc = MDBX_RESULT_FALSE /* carry steady */;
  if (atomic_load64(&env->lck->unsynced_pages, mo_Relaxed)) {
    eASSERT(env, ((flags ^ env->flags) & MDBX_WRITEMAP) == 0);
    enum osal_syncmode_bits mode_bits = MDBX_SYNC_NONE;
    unsigned sync_op = 0;
    if ((flags & MDBX_SAFE_NOSYNC) == 0) {
      sync_op = 1;
      mode_bits = MDBX_SYNC_DATA;
      if (pending->geometry.first_unallocated > meta_prefer_steady(env, troika).ptr_c->geometry.now)
        mode_bits |= MDBX_SYNC_SIZE;
      if (flags & MDBX_NOMETASYNC)
        mode_bits |= MDBX_SYNC_IODQ;
    } else if (unlikely(env->incore))
      goto skip_incore_sync;
    if (flags & MDBX_WRITEMAP) {
#if MDBX_ENABLE_PGOP_STAT
      env->lck->pgops.msync.weak += sync_op;
#else
      (void)sync_op;
#endif /* MDBX_ENABLE_PGOP_STAT */
      rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, pending->geometry.first_unallocated), mode_bits);
    } else {
#if MDBX_ENABLE_PGOP_STAT
      env->lck->pgops.fsync.weak += sync_op;
#else
      (void)sync_op;
#endif /* MDBX_ENABLE_PGOP_STAT */
      rc = osal_fsync(env->lazy_fd, mode_bits);
    }
    if (unlikely(rc != MDBX_SUCCESS))
      goto fail;
    rc = (flags & MDBX_SAFE_NOSYNC) ? MDBX_RESULT_TRUE /* carry non-steady */
                                    : MDBX_RESULT_FALSE /* carry steady */;
  }
  eASSERT(env, coherency_check_meta(env, pending, true));

  /* Steady or Weak */
  if (rc == MDBX_RESULT_FALSE /* carry steady */) {
    meta_sign_as_steady(pending);
    atomic_store64(&env->lck->eoos_timestamp, 0, mo_Relaxed);
    atomic_store64(&env->lck->unsynced_pages, 0, mo_Relaxed);
  } else {
    assert(rc == MDBX_RESULT_TRUE /* carry non-steady */);
  skip_incore_sync:
    eASSERT(env, env->lck->unsynced_pages.weak > 0);
    /* Может быть нулевым если unsynced_pages > 0 в результате спиллинга.
     * eASSERT(env, env->lck->eoos_timestamp.weak != 0); */
    unaligned_poke_u64(4, pending->sign, DATASIGN_WEAK);
  }

  const bool legal4overwrite = head.txnid == pending->unsafe_txnid &&
                               !memcmp(&head.ptr_c->trees, &pending->trees, sizeof(pending->trees)) &&
                               !memcmp(&head.ptr_c->canary, &pending->canary, sizeof(pending->canary)) &&
                               !memcmp(&head.ptr_c->geometry, &pending->geometry, sizeof(pending->geometry));
  meta_t *target = nullptr;
  if (head.txnid == pending->unsafe_txnid) {
    ENSURE(env, legal4overwrite);
    if (!head.is_steady && meta_is_steady(pending))
      target = (meta_t *)head.ptr_c;
    else {
      NOTICE("skip update meta%" PRIaPGNO " for txn#%" PRIaTXN ", since it is already steady",
             data_page(head.ptr_c)->pgno, head.txnid);
      return MDBX_SUCCESS;
    }
  } else {
    const unsigned troika_tail = troika->tail_and_flags & 3;
    ENSURE(env, troika_tail < NUM_METAS && troika_tail != troika->recent && troika_tail != troika->prefer_steady);
    target = (meta_t *)meta_tail(env, troika).ptr_c;
  }

  /* LY: step#2 - update meta-page. */
  DEBUG("writing meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO
        "/%" PRIaPGNO " +%u -%u, txn_id %" PRIaTXN ", %s",
        data_page(target)->pgno, pending->trees.main.root, pending->trees.gc.root, pending->geometry.lower,
        pending->geometry.first_unallocated, pending->geometry.now, pending->geometry.upper,
        pv2pages(pending->geometry.grow_pv), pv2pages(pending->geometry.shrink_pv), pending->unsafe_txnid,
        durable_caption(pending));

  DEBUG("meta0: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO,
        (meta0 == head.ptr_c) ? "head"
        : (meta0 == target)   ? "tail"
                              : "stay",
        durable_caption(meta0), constmeta_txnid(meta0), meta0->trees.main.root, meta0->trees.gc.root);
  DEBUG("meta1: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO,
        (meta1 == head.ptr_c) ? "head"
        : (meta1 == target)   ? "tail"
                              : "stay",
        durable_caption(meta1), constmeta_txnid(meta1), meta1->trees.main.root, meta1->trees.gc.root);
  DEBUG("meta2: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO,
        (meta2 == head.ptr_c) ? "head"
        : (meta2 == target)   ? "tail"
                              : "stay",
        durable_caption(meta2), constmeta_txnid(meta2), meta2->trees.main.root, meta2->trees.gc.root);

  eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta0) || (meta_is_steady(pending) && !meta_is_steady(meta0)));
  eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta1) || (meta_is_steady(pending) && !meta_is_steady(meta1)));
  eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta2) || (meta_is_steady(pending) && !meta_is_steady(meta2)));

  eASSERT(env, ((env->flags ^ flags) & MDBX_WRITEMAP) == 0);
  ENSURE(env, target == head.ptr_c || constmeta_txnid(target) < pending->unsafe_txnid);
  if (flags & MDBX_WRITEMAP) {
    jitter4testing(true);
    if (likely(target != head.ptr_c)) {
      /* LY: 'invalidate' the meta. */
      meta_update_begin(env, target, pending->unsafe_txnid);
      unaligned_poke_u64(4, target->sign, DATASIGN_WEAK);
#ifndef NDEBUG
      /* debug: provoke failure to catch a violators, but don't touch pagesize
       * to allow readers catch actual pagesize. */
      void *provoke_begin = &target->trees.gc.root;
      void *provoke_end = &target->sign;
      memset(provoke_begin, 0xCC, ptr_dist(provoke_end, provoke_begin));
      jitter4testing(false);
#endif

      /* LY: update info */
      target->geometry = pending->geometry;
      target->trees.gc = pending->trees.gc;
      target->trees.main = pending->trees.main;
      eASSERT(env, target->trees.gc.flags == MDBX_INTEGERKEY);
      eASSERT(env, check_table_flags(target->trees.main.flags));
      target->canary = pending->canary;
      memcpy(target->pages_retired, pending->pages_retired, 8);
      jitter4testing(true);

      /* LY: 'commit' the meta */
      meta_update_end(env, target, unaligned_peek_u64(4, pending->txnid_b));
      jitter4testing(true);
      eASSERT(env, coherency_check_meta(env, target, true));
    } else {
      /* dangerous case (target == head), only sign could
       * me updated, check assertions once again */
      eASSERT(env, legal4overwrite && !head.is_steady && meta_is_steady(pending));
    }
    memcpy(target->sign, pending->sign, 8);
    osal_flush_incoherent_cpu_writeback();
    jitter4testing(true);
    if (!env->incore) {
      if (!MDBX_AVOID_MSYNC) {
        /* sync meta-pages */
#if MDBX_ENABLE_PGOP_STAT
        env->lck->pgops.msync.weak += 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
        rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS),
                        (flags & MDBX_NOMETASYNC) ? MDBX_SYNC_NONE : MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
      } else {
#if MDBX_ENABLE_PGOP_STAT
        env->lck->pgops.wops.weak += 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
        const page_t *page = data_page(target);
        rc = osal_pwrite(env->fd4meta, page, env->ps, ptr_dist(page, env->dxb_mmap.base));
        if (likely(rc == MDBX_SUCCESS)) {
          osal_flush_incoherent_mmap(target, sizeof(meta_t), globals.sys_pagesize);
          if ((flags & MDBX_NOMETASYNC) == 0 && env->fd4meta == env->lazy_fd) {
#if MDBX_ENABLE_PGOP_STAT
            env->lck->pgops.fsync.weak += 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
            rc = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
          }
        }
      }
      if (unlikely(rc != MDBX_SUCCESS))
        goto fail;
    }
  } else {
#if MDBX_ENABLE_PGOP_STAT
    env->lck->pgops.wops.weak += 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
    const meta_t undo_meta = *target;
    eASSERT(env, pending->trees.gc.flags == MDBX_INTEGERKEY);
    eASSERT(env, check_table_flags(pending->trees.main.flags));
    rc = osal_pwrite(env->fd4meta, pending, sizeof(meta_t), ptr_dist(target, env->dxb_mmap.base));
    if (unlikely(rc != MDBX_SUCCESS)) {
    undo:
      DEBUG("%s", "write failed, disk error?");
      /* On a failure, the pagecache still contains the new data.
       * Try write some old data back, to prevent it from being used. */
      osal_pwrite(env->fd4meta, &undo_meta, sizeof(meta_t), ptr_dist(target, env->dxb_mmap.base));
      goto fail;
    }
    osal_flush_incoherent_mmap(target, sizeof(meta_t), globals.sys_pagesize);
    /* sync meta-pages */
    if ((flags & MDBX_NOMETASYNC) == 0 && env->fd4meta == env->lazy_fd && !env->incore) {
#if MDBX_ENABLE_PGOP_STAT
      env->lck->pgops.fsync.weak += 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
      rc = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
      if (rc != MDBX_SUCCESS)
        goto undo;
    }
  }

  uint64_t timestamp = 0;
  while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
    rc = coherency_check_written(env, pending->unsafe_txnid, target,
                                 bytes2pgno(env, ptr_dist(target, env->dxb_mmap.base)), &timestamp);
    if (likely(rc == MDBX_SUCCESS))
      break;
    if (unlikely(rc != MDBX_RESULT_TRUE))
      goto fail;
  }

  const uint32_t sync_txnid_dist = ((flags & MDBX_NOMETASYNC) == 0)                     ? 0
                                   : ((flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) ? MDBX_NOMETASYNC_LAZY_FD
                                                                                        : MDBX_NOMETASYNC_LAZY_WRITEMAP;
  env->lck->meta_sync_txnid.weak = pending->txnid_a[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__].weak - sync_txnid_dist;

  *troika = meta_tap(env);
  for (MDBX_txn *txn = env->basal_txn; txn; txn = txn->nested)
    if (troika != &txn->tw.troika)
      txn->tw.troika = *troika;

  /* LY: shrink datafile if needed */
  if (unlikely(shrink)) {
    VERBOSE("shrink to %" PRIaPGNO " pages (-%" PRIaPGNO ")", pending->geometry.now, shrink);
    rc = dxb_resize(env, pending->geometry.first_unallocated, pending->geometry.now, pending->geometry.upper,
                    impilict_shrink);
    if (rc != MDBX_SUCCESS && rc != MDBX_EPERM)
      goto fail;
    eASSERT(env, coherency_check_meta(env, target, true));
  }

  lck_t *const lck = env->lck_mmap.lck;
  if (likely(lck))
    /* toggle oldest refresh */
    atomic_store32(&lck->rdt_refresh_flag, false, mo_Relaxed);

  return MDBX_SUCCESS;

fail:
  env->flags |= ENV_FATAL_ERROR;
  return rc;
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

MDBX_txn *env_owned_wrtxn(const MDBX_env *env) {
  if (likely(env->basal_txn)) {
    const bool is_owned = (env->flags & MDBX_NOSTICKYTHREADS) ? (env->basal_txn->owner != 0)
                                                              : (env->basal_txn->owner == osal_thread_self());
    if (is_owned)
      return env->txn ? env->txn : env->basal_txn;
  }
  return nullptr;
}

int env_page_auxbuffer(MDBX_env *env) {
  const int err = env->page_auxbuf
                      ? MDBX_SUCCESS
                      : osal_memalign_alloc(globals.sys_pagesize, env->ps * (size_t)NUM_METAS, &env->page_auxbuf);
  if (likely(err == MDBX_SUCCESS)) {
    memset(env->page_auxbuf, -1, env->ps * (size_t)2);
    memset(ptr_disp(env->page_auxbuf, env->ps * (size_t)2), 0, env->ps);
  }
  return err;
}

__cold unsigned env_setup_pagesize(MDBX_env *env, const size_t pagesize) {
  STATIC_ASSERT(PTRDIFF_MAX > MAX_MAPSIZE);
  STATIC_ASSERT(MDBX_MIN_PAGESIZE > sizeof(page_t) + sizeof(meta_t));
  ENSURE(env, is_powerof2(pagesize));
  ENSURE(env, pagesize >= MDBX_MIN_PAGESIZE);
  ENSURE(env, pagesize <= MDBX_MAX_PAGESIZE);
  ENSURE(env, !env->page_auxbuf && env->ps != pagesize);
  env->ps = (unsigned)pagesize;

  STATIC_ASSERT(MAX_GC1OVPAGE(MDBX_MIN_PAGESIZE) > 4);
  STATIC_ASSERT(MAX_GC1OVPAGE(MDBX_MAX_PAGESIZE) < PAGELIST_LIMIT);
  const intptr_t maxgc_ov1page = (pagesize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
  ENSURE(env, maxgc_ov1page > 42 && maxgc_ov1page < (intptr_t)PAGELIST_LIMIT / 4);
  env->maxgc_large1page = (unsigned)maxgc_ov1page;
  env->maxgc_per_branch = (unsigned)((pagesize - PAGEHDRSZ) / (sizeof(indx_t) + sizeof(node_t) + sizeof(txnid_t)));

  STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) > sizeof(tree_t) + NODESIZE + 42);
  STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MAX_PAGESIZE) < UINT16_MAX);
  STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) >= BRANCH_NODE_MAX(MDBX_MIN_PAGESIZE));
  STATIC_ASSERT(BRANCH_NODE_MAX(MDBX_MAX_PAGESIZE) > NODESIZE + 42);
  STATIC_ASSERT(BRANCH_NODE_MAX(MDBX_MAX_PAGESIZE) < UINT16_MAX);
  const intptr_t branch_nodemax = BRANCH_NODE_MAX(pagesize);
  const intptr_t leaf_nodemax = LEAF_NODE_MAX(pagesize);
  ENSURE(env, branch_nodemax > (intptr_t)(NODESIZE + 42) && branch_nodemax % 2 == 0 &&
                  leaf_nodemax > (intptr_t)(sizeof(tree_t) + NODESIZE + 42) && leaf_nodemax >= branch_nodemax &&
                  leaf_nodemax < (int)UINT16_MAX && leaf_nodemax % 2 == 0);
  env->leaf_nodemax = (uint16_t)leaf_nodemax;
  env->branch_nodemax = (uint16_t)branch_nodemax;
  env->ps2ln = (uint8_t)log2n_powerof2(pagesize);
  eASSERT(env, pgno2bytes(env, 1) == pagesize);
  eASSERT(env, bytes2pgno(env, pagesize + pagesize) == 2);
  recalculate_merge_thresholds(env);
  recalculate_subpage_thresholds(env);
  env_options_adjust_dp_limit(env);
  return env->ps;
}

__cold int env_sync(MDBX_env *env, bool force, bool nonblock) {
  if (unlikely(env->flags & MDBX_RDONLY))
    return MDBX_EACCESS;

  MDBX_txn *const txn_owned = env_owned_wrtxn(env);
  bool should_unlock = false;
  int rc = MDBX_RESULT_TRUE /* means "nothing to sync" */;

retry:;
  unsigned flags = env->flags & ~(MDBX_NOMETASYNC | txn_shrink_allowed);
  if (unlikely((flags & (ENV_FATAL_ERROR | ENV_ACTIVE)) != ENV_ACTIVE)) {
    rc = (flags & ENV_FATAL_ERROR) ? MDBX_PANIC : MDBX_EPERM;
    goto bailout;
  }

  const troika_t troika = (txn_owned || should_unlock) ? env->basal_txn->tw.troika : meta_tap(env);
  const meta_ptr_t head = meta_recent(env, &troika);
  const uint64_t unsynced_pages = atomic_load64(&env->lck->unsynced_pages, mo_Relaxed);
  if (unsynced_pages == 0) {
    const uint32_t synched_meta_txnid_u32 = atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed);
    if (synched_meta_txnid_u32 == (uint32_t)head.txnid && head.is_steady)
      goto bailout;
  }

  if (should_unlock && (env->flags & MDBX_WRITEMAP) &&
      unlikely(head.ptr_c->geometry.first_unallocated > bytes2pgno(env, env->dxb_mmap.current))) {

    if (unlikely(env->stuck_meta >= 0) && troika.recent != (uint8_t)env->stuck_meta) {
      NOTICE("skip %s since wagering meta-page (%u) is mispatch the recent "
             "meta-page (%u)",
             "sync datafile", env->stuck_meta, troika.recent);
      rc = MDBX_RESULT_TRUE;
    } else {
      rc = dxb_resize(env, head.ptr_c->geometry.first_unallocated, head.ptr_c->geometry.now, head.ptr_c->geometry.upper,
                      implicit_grow);
      if (unlikely(rc != MDBX_SUCCESS))
        goto bailout;
    }
  }

  const size_t autosync_threshold = atomic_load32(&env->lck->autosync_threshold, mo_Relaxed);
  const uint64_t autosync_period = atomic_load64(&env->lck->autosync_period, mo_Relaxed);
  uint64_t eoos_timestamp;
  if (force || (autosync_threshold && unsynced_pages >= autosync_threshold) ||
      (autosync_period && (eoos_timestamp = atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) &&
       osal_monotime() - eoos_timestamp >= autosync_period))
    flags &= MDBX_WRITEMAP /* clear flags for full steady sync */;

  if (!txn_owned) {
    if (!should_unlock) {
#if MDBX_ENABLE_PGOP_STAT
      unsigned wops = 0;
#endif /* MDBX_ENABLE_PGOP_STAT */

      int err;
      /* pre-sync to avoid latency for writer */
      if (unsynced_pages > /* FIXME: define threshold */ 42 && (flags & MDBX_SAFE_NOSYNC) == 0) {
        eASSERT(env, ((flags ^ env->flags) & MDBX_WRITEMAP) == 0);
        if (flags & MDBX_WRITEMAP) {
          /* Acquire guard to avoid collision with remap */
#if defined(_WIN32) || defined(_WIN64)
          imports.srwl_AcquireShared(&env->remap_guard);
#else
          err = osal_fastmutex_acquire(&env->remap_guard);
          if (unlikely(err != MDBX_SUCCESS))
            return err;
#endif
          const size_t usedbytes = pgno_align2os_bytes(env, head.ptr_c->geometry.first_unallocated);
          err = osal_msync(&env->dxb_mmap, 0, usedbytes, MDBX_SYNC_DATA);
#if defined(_WIN32) || defined(_WIN64)
          imports.srwl_ReleaseShared(&env->remap_guard);
#else
          int unlock_err = osal_fastmutex_release(&env->remap_guard);
          if (unlikely(unlock_err != MDBX_SUCCESS) && err == MDBX_SUCCESS)
            err = unlock_err;
#endif
        } else
          err = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA);

        if (unlikely(err != MDBX_SUCCESS))
          return err;

#if MDBX_ENABLE_PGOP_STAT
        wops = 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
        /* pre-sync done */
        rc = MDBX_SUCCESS /* means "some data was synced" */;
      }

      err = lck_txn_lock(env, nonblock);
      if (unlikely(err != MDBX_SUCCESS))
        return err;

      should_unlock = true;
#if MDBX_ENABLE_PGOP_STAT
      env->lck->pgops.wops.weak += wops;
#endif /* MDBX_ENABLE_PGOP_STAT */
      env->basal_txn->tw.troika = meta_tap(env);
      eASSERT(env, !env->txn && !env->basal_txn->nested);
      goto retry;
    }
    eASSERT(env, head.txnid == recent_committed_txnid(env));
    env->basal_txn->txnid = head.txnid;
    txn_snapshot_oldest(env->basal_txn);
    flags |= txn_shrink_allowed;
  }

  eASSERT(env, txn_owned || should_unlock);
  eASSERT(env, !txn_owned || (flags & txn_shrink_allowed) == 0);

  if (!head.is_steady && unlikely(env->stuck_meta >= 0) && troika.recent != (uint8_t)env->stuck_meta) {
    NOTICE("skip %s since wagering meta-page (%u) is mispatch the recent "
           "meta-page (%u)",
           "sync datafile", env->stuck_meta, troika.recent);
    rc = MDBX_RESULT_TRUE;
    goto bailout;
  }
  if (!head.is_steady || ((flags & MDBX_SAFE_NOSYNC) == 0 && unsynced_pages)) {
    DEBUG("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIu64, data_page(head.ptr_c)->pgno,
          durable_caption(head.ptr_c), unsynced_pages);
    meta_t meta = *head.ptr_c;
    rc = dxb_sync_locked(env, flags, &meta, &env->basal_txn->tw.troika);
    if (unlikely(rc != MDBX_SUCCESS))
      goto bailout;
  }

  /* LY: sync meta-pages if MDBX_NOMETASYNC enabled
   *     and someone was not synced above. */
  if (atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed) != (uint32_t)head.txnid)
    rc = meta_sync(env, head);

bailout:
  if (should_unlock)
    lck_txn_unlock(env);
  return rc;
}

__cold int env_open(MDBX_env *env, mdbx_mode_t mode) {
  /* Использование O_DSYNC или FILE_FLAG_WRITE_THROUGH:
   *
   *   0) Если размер страниц БД меньше системной страницы ОЗУ, то ядру ОС
   *      придется чаще обновлять страницы в unified page cache.
   *
   *      Однако, O_DSYNC не предполагает отключение unified page cache,
   *      поэтому подобные затруднения будем считать проблемой ОС и/или
   *      ожидаемым пенальти из-за использования мелких страниц БД.
   *
   *   1) В режиме MDBX_SYNC_DURABLE - O_DSYNC для записи как данных,
   *      так и мета-страниц. Однако, на Linux отказ от O_DSYNC с последующим
   *      fdatasync() может быть выгоднее при использовании HDD, так как
   *      позволяет io-scheduler переупорядочить запись с учетом актуального
   *      расположения файла БД на носителе.
   *
   *   2) В режиме MDBX_NOMETASYNC - O_DSYNC можно использовать для данных,
   *      но в этом может не быть смысла, так как fdatasync() всё равно
   *      требуется для гарантии фиксации мета после предыдущей транзакции.
   *
   *      В итоге на нормальных системах (не Windows) есть два варианта:
   *       - при возможности O_DIRECT и/или io_ring для данных, скорее всего,
   *         есть смысл вызвать fdatasync() перед записью данных, а затем
   *         использовать O_DSYNC;
   *       - не использовать O_DSYNC и вызывать fdatasync() после записи данных.
   *
   *      На Windows же следует минимизировать использование FlushFileBuffers()
   *      из-за проблем с производительностью. Поэтому на Windows в режиме
   *      MDBX_NOMETASYNC:
   *       - мета обновляется через дескриптор без FILE_FLAG_WRITE_THROUGH;
   *       - перед началом записи данных вызывается FlushFileBuffers(), если
   *         meta_sync_txnid не совпадает с последней записанной мета;
   *       - данные записываются через дескриптор с FILE_FLAG_WRITE_THROUGH.
   *
   *   3) В режиме MDBX_SAFE_NOSYNC - O_DSYNC нет смысла использовать, пока не
   *      будет реализована возможность полностью асинхронной "догоняющей"
   *      записи в выделенном процессе-сервере с io-ring очередями внутри.
   *
   * -----
   *
   * Использование O_DIRECT или FILE_FLAG_NO_BUFFERING:
   *
   *   Назначение этих флагов в отключении файлового дескриптора от
   *   unified page cache, т.е. от отображенных в память данных в случае
   *   libmdbx.
   *
   *   Поэтому, использование direct i/o в libmdbx без MDBX_WRITEMAP лишено
   *   смысла и контр-продуктивно, ибо так мы провоцируем ядро ОС на
   *   не-когерентность отображения в память с содержимым файла на носителе,
   *   либо требуем дополнительных проверок и действий направленных на
   *   фактическое отключение O_DIRECT для отображенных в память данных.
   *
   *   В режиме MDBX_WRITEMAP когерентность отображенных данных обеспечивается
   *   физически. Поэтому использование direct i/o может иметь смысл, если у
   *   ядра ОС есть какие-то проблемы с msync(), в том числе с
   *   производительностью:
   *    - использование io_ring или gather-write может быть дешевле, чем
   *      просмотр PTE ядром и запись измененных/грязных;
   *    - но проблема в том, что записываемые из user mode страницы либо не
   *      будут помечены чистыми (и соответственно будут записаны ядром
   *      еще раз), либо ядру необходимо искать и чистить PTE при получении
   *      запроса на запись.
   *
   *   Поэтому O_DIRECT или FILE_FLAG_NO_BUFFERING используется:
   *    - только в режиме MDBX_SYNC_DURABLE с MDBX_WRITEMAP;
   *    - когда ps >= me_os_psize;
   *    - опция сборки MDBX_AVOID_MSYNC != 0, которая по-умолчанию включена
   *      только на Windows (см ниже).
   *
   * -----
   *
   * Использование FILE_FLAG_OVERLAPPED на Windows:
   *
   * У Windows очень плохо с I/O (за исключением прямых постраничных
   * scatter/gather, которые работают в обход проблемного unified page
   * cache и поэтому почти бесполезны в libmdbx).
   *
   * При этом всё еще хуже при использовании FlushFileBuffers(), что также
   * требуется после FlushViewOfFile() в режиме MDBX_WRITEMAP. Поэтому
   * на Windows вместо FlushViewOfFile() и FlushFileBuffers() следует
   * использовать запись через дескриптор с FILE_FLAG_WRITE_THROUGH.
   *
   * В свою очередь, запись с FILE_FLAG_WRITE_THROUGH дешевле/быстрее
   * при использовании FILE_FLAG_OVERLAPPED. В результате, на Windows
   * в durable-режимах запись данных всегда в overlapped-режиме,
   * при этом для записи мета требуется отдельный не-overlapped дескриптор.
   */

  env->pid = osal_getpid();
  int rc = osal_openfile((env->flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ : MDBX_OPEN_DXB_LAZY, env, env->pathname.dxb,
                         &env->lazy_fd, mode);
  if (unlikely(rc != MDBX_SUCCESS))
    return rc;

#if MDBX_LOCKING == MDBX_LOCKING_SYSV
  env->me_sysv_ipc.key = ftok(env->pathname.dxb, 42);
  if (unlikely(env->me_sysv_ipc.key == -1))
    return errno;
#endif /* MDBX_LOCKING */

  /* Set the position in files outside of the data to avoid corruption
   * due to erroneous use of file descriptors in the application code. */
  const uint64_t safe_parking_lot_offset = UINT64_C(0x7fffFFFF80000000);
  osal_fseek(env->lazy_fd, safe_parking_lot_offset);

  env->fd4meta = env->lazy_fd;
#if defined(_WIN32) || defined(_WIN64)
  eASSERT(env, env->ioring.overlapped_fd == 0);
  bool ior_direct = false;
  if (!(env->flags & (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_EXCLUSIVE))) {
    if (MDBX_AVOID_MSYNC && (env->flags & MDBX_WRITEMAP)) {
      /* Запрошен режим MDBX_SYNC_DURABLE | MDBX_WRITEMAP при активной опции
       * MDBX_AVOID_MSYNC.
       *
       * 1) В этой комбинации наиболее выгодно использовать WriteFileGather(),
       * но для этого необходимо открыть файл с флагом FILE_FLAG_NO_BUFFERING и
       * после обеспечивать выравнивание адресов и размера данных на границу
       * системной страницы, что в свою очередь возможно если размер страницы БД
       * не меньше размера системной страницы ОЗУ. Поэтому для открытия файла в
       * нужном режиме требуется знать размер страницы БД.
       *
       * 2) Кроме этого, в Windows запись в заблокированный регион файла
       * возможно только через тот-же дескриптор. Поэтому изначальный захват
       * блокировок посредством lck_seize(), захват/освобождение блокировок
       * во время пишущих транзакций и запись данных должны выполнятся через
       * один дескриптор.
       *
       * Таким образом, требуется прочитать волатильный заголовок БД, чтобы
       * узнать размер страницы, чтобы открыть дескриптор файла в режиме нужном
       * для записи данных, чтобы использовать именно этот дескриптор для
       * изначального захвата блокировок. */
      meta_t header;
      uint64_t dxb_filesize;
      int err = dxb_read_header(env, &header, MDBX_SUCCESS, true);
      if ((err == MDBX_SUCCESS && header.pagesize >= globals.sys_pagesize) ||
          (err == MDBX_ENODATA && mode && env->ps >= globals.sys_pagesize &&
           osal_filesize(env->lazy_fd, &dxb_filesize) == MDBX_SUCCESS && dxb_filesize == 0))
        /* Может быть коллизия, если два процесса пытаются одновременно создать
         * БД с разным размером страницы, который у одного меньше системной
         * страницы, а у другого НЕ меньше. Эта допустимая, но очень странная
         * ситуация. Поэтому считаем её ошибочной и не пытаемся разрешить. */
        ior_direct = true;
    }

    rc = osal_openfile(ior_direct ? MDBX_OPEN_DXB_OVERLAPPED_DIRECT : MDBX_OPEN_DXB_OVERLAPPED, env, env->pathname.dxb,
                       &env->ioring.overlapped_fd, 0);
    if (unlikely(rc != MDBX_SUCCESS))
      return rc;
    env->dxb_lock_event = CreateEventW(nullptr, true, false, nullptr);
    if (unlikely(!env->dxb_lock_event))
      return (int)GetLastError();
    osal_fseek(env->ioring.overlapped_fd, safe_parking_lot_offset);
  }
#else
  if (mode == 0) {
    /* pickup mode for lck-file */
    struct stat st;
    if (unlikely(fstat(env->lazy_fd, &st)))
      return errno;
    mode = st.st_mode;
  }
  mode = (/* inherit read permissions for group and others */ mode & (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) |
         /* always add read/write for owner */ S_IRUSR | S_IWUSR |
         ((mode & S_IRGRP) ? /* +write if readable by group */ S_IWGRP : 0) |
         ((mode & S_IROTH) ? /* +write if readable by others */ S_IWOTH : 0);
#endif /* !Windows */
  const int lck_rc = lck_setup(env, mode);
  if (unlikely(MDBX_IS_ERROR(lck_rc)))
    return lck_rc;
  if (env->lck_mmap.fd != INVALID_HANDLE_VALUE)
    osal_fseek(env->lck_mmap.fd, safe_parking_lot_offset);

  eASSERT(env, env->dsync_fd == INVALID_HANDLE_VALUE);
  if (!(env->flags & (MDBX_RDONLY | MDBX_SAFE_NOSYNC | DEPRECATED_MAPASYNC
#if defined(_WIN32) || defined(_WIN64)
                      | MDBX_EXCLUSIVE
#endif /* !Windows */
                      ))) {
    rc = osal_openfile(MDBX_OPEN_DXB_DSYNC, env, env->pathname.dxb, &env->dsync_fd, 0);
    if (unlikely(MDBX_IS_ERROR(rc)))
      return rc;
    if (env->dsync_fd != INVALID_HANDLE_VALUE) {
      if ((env->flags & MDBX_NOMETASYNC) == 0)
        env->fd4meta = env->dsync_fd;
      osal_fseek(env->dsync_fd, safe_parking_lot_offset);
    }
  }

  const MDBX_env_flags_t lazy_flags = MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC | MDBX_NOMETASYNC;
  const MDBX_env_flags_t mode_flags = lazy_flags | MDBX_LIFORECLAIM | MDBX_NORDAHEAD | MDBX_RDONLY | MDBX_WRITEMAP;

  lck_t *const lck = env->lck_mmap.lck;
  if (lck && lck_rc != MDBX_RESULT_TRUE && (env->flags & MDBX_RDONLY) == 0) {
    MDBX_env_flags_t snap_flags;
    while ((snap_flags = atomic_load32(&lck->envmode, mo_AcquireRelease)) == MDBX_RDONLY) {
      if (atomic_cas32(&lck->envmode, MDBX_RDONLY, (snap_flags = (env->flags & mode_flags)))) {
        /* The case:
         *  - let's assume that for some reason the DB file is smaller
         *    than it should be according to the geometry,
         *    but not smaller than the last page used;
         *  - the first process that opens the database (lck_rc == RESULT_TRUE)
         *    does this in readonly mode and therefore cannot bring
         *    the file size back to normal;
         *  - some next process (lck_rc != RESULT_TRUE) opens the DB in
         *    read-write mode and now is here.
         *
         * FIXME: Should we re-check and set the size of DB-file right here? */
        break;
      }
      atomic_yield();
    }

    if (env->flags & MDBX_ACCEDE) {
      /* Pickup current mode-flags (MDBX_LIFORECLAIM, MDBX_NORDAHEAD, etc). */
      const MDBX_env_flags_t diff =
          (snap_flags ^ env->flags) & ((snap_flags & lazy_flags) ? mode_flags : mode_flags & ~MDBX_WRITEMAP);
      env->flags ^= diff;
      NOTICE("accede mode-flags: 0x%X, 0x%X -> 0x%X", diff, env->flags ^ diff, env->flags);
    }

    /* Ранее упущенный не очевидный момент: При работе БД в режимах
     * не-синхронной/отложенной фиксации на диске, все процессы-писатели должны
     * иметь одинаковый режим MDBX_WRITEMAP.
     *
     * В противном случае, сброс на диск следует выполнять дважды: сначала
     * msync(), затем fdatasync(). При этом msync() не обязан отрабатывать
     * в процессах без MDBX_WRITEMAP, так как файл в память отображен только
     * для чтения. Поэтому, в общем случае, различия по MDBX_WRITEMAP не
     * позволяют выполнить фиксацию данных на диск, после их изменения в другом
     * процессе.
     *
     * В режиме MDBX_UTTERLY_NOSYNC позволять совместную работу с MDBX_WRITEMAP
     * также не следует, поскольку никакой процесс (в том числе последний) не
     * может гарантированно сбросить данные на диск, а следовательно не должен
     * помечать какую-либо транзакцию как steady.
     *
     * В результате, требуется либо запретить совместную работу процессам с
     * разным MDBX_WRITEMAP в режиме отложенной записи, либо отслеживать такое
     * смешивание и блокировать steady-пометки - что контрпродуктивно. */
    const MDBX_env_flags_t rigorous_flags = (snap_flags & lazy_flags)
                                                ? MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC | MDBX_WRITEMAP
                                                : MDBX_SAFE_NOSYNC | MDBX_UTTERLY_NOSYNC;
    const MDBX_env_flags_t rigorous_diff = (snap_flags ^ env->flags) & rigorous_flags;
    if (rigorous_diff) {
      ERROR("current mode/flags 0x%X incompatible with requested 0x%X, "
            "rigorous diff 0x%X",
            env->flags, snap_flags, rigorous_diff);
      return MDBX_INCOMPATIBLE;
    }
  }

  mincore_clean_cache(env);
  const int dxb_rc = dxb_setup(env, lck_rc, mode);
  if (MDBX_IS_ERROR(dxb_rc))
    return dxb_rc;

  rc = osal_check_fs_incore(env->lazy_fd);
  env->incore = false;
  if (rc == MDBX_RESULT_TRUE) {
    env->incore = true;
    NOTICE("%s", "in-core database");
    rc = MDBX_SUCCESS;
  } else if (unlikely(rc != MDBX_SUCCESS)) {
    ERROR("check_fs_incore(), err %d", rc);
    return rc;
  }

  if (unlikely(/* recovery mode */ env->stuck_meta >= 0) &&
      (lck_rc != /* exclusive */ MDBX_RESULT_TRUE || (env->flags & MDBX_EXCLUSIVE) == 0)) {
    ERROR("%s", "recovery requires exclusive mode");
    return MDBX_BUSY;
  }

  DEBUG("opened dbenv %p", (void *)env);
  env->flags |= ENV_ACTIVE;
  if (!lck || lck_rc == MDBX_RESULT_TRUE) {
    env->lck->envmode.weak = env->flags & mode_flags;
    env->lck->meta_sync_txnid.weak = (uint32_t)recent_committed_txnid(env);
    env->lck->readers_check_timestamp.weak = osal_monotime();
  }
  if (lck) {
    if (lck_rc == MDBX_RESULT_TRUE) {
      rc = lck_downgrade(env);
      DEBUG("lck-downgrade-%s: rc %i", (env->flags & MDBX_EXCLUSIVE) ? "partial" : "full", rc);
      if (rc != MDBX_SUCCESS)
        return rc;
    } else {
      rc = mvcc_cleanup_dead(env, false, nullptr);
      if (MDBX_IS_ERROR(rc))
        return rc;
    }
  }

  rc = (env->flags & MDBX_RDONLY) ? MDBX_SUCCESS
                                  : osal_ioring_create(&env->ioring
#if defined(_WIN32) || defined(_WIN64)
                                                       ,
                                                       ior_direct, env->ioring.overlapped_fd
#endif /* Windows */
                                    );
  return rc;
}

__cold int env_close(MDBX_env *env, bool resurrect_after_fork) {
  const unsigned flags = env->flags;
  env->flags &= ~ENV_INTERNAL_FLAGS;
  if (flags & ENV_TXKEY) {
    thread_key_delete(env->me_txkey);
    env->me_txkey = 0;
  }

  if (env->lck)
    munlock_all(env);

  rthc_lock();
  int rc = rthc_remove(env);
  rthc_unlock();

#if MDBX_ENABLE_DBI_LOCKFREE
  for (defer_free_item_t *next, *ptr = env->defer_free; ptr; ptr = next) {
    next = ptr->next;
    osal_free(ptr);
  }
  env->defer_free = nullptr;
#endif /* MDBX_ENABLE_DBI_LOCKFREE */

  if (!(env->flags & MDBX_RDONLY))
    osal_ioring_destroy(&env->ioring);

  env->lck = nullptr;
  if (env->lck_mmap.lck)
    osal_munmap(&env->lck_mmap);

  if (env->dxb_mmap.base) {
    osal_munmap(&env->dxb_mmap);
#ifdef ENABLE_MEMCHECK
    VALGRIND_DISCARD(env->valgrind_handle);
    env->valgrind_handle = -1;
#endif /* ENABLE_MEMCHECK */
  }

#if defined(_WIN32) || defined(_WIN64)
  eASSERT(env, !env->ioring.overlapped_fd || env->ioring.overlapped_fd == INVALID_HANDLE_VALUE);
  if (env->dxb_lock_event != INVALID_HANDLE_VALUE) {
    CloseHandle(env->dxb_lock_event);
    env->dxb_lock_event = INVALID_HANDLE_VALUE;
  }
  eASSERT(env, !resurrect_after_fork);
  if (env->pathname_char) {
    osal_free(env->pathname_char);
    env->pathname_char = nullptr;
  }
#endif /* Windows */

  if (env->dsync_fd != INVALID_HANDLE_VALUE) {
    (void)osal_closefile(env->dsync_fd);
    env->dsync_fd = INVALID_HANDLE_VALUE;
  }

  if (env->lazy_fd != INVALID_HANDLE_VALUE) {
    (void)osal_closefile(env->lazy_fd);
    env->lazy_fd = INVALID_HANDLE_VALUE;
  }

  if (env->lck_mmap.fd != INVALID_HANDLE_VALUE) {
    (void)osal_closefile(env->lck_mmap.fd);
    env->lck_mmap.fd = INVALID_HANDLE_VALUE;
  }

  if (!resurrect_after_fork) {
    if (env->kvs) {
      for (size_t i = CORE_DBS; i < env->n_dbi; ++i)
        if (env->kvs[i].name.iov_len)
          osal_free(env->kvs[i].name.iov_base);
      osal_free(env->kvs);
      env->n_dbi = CORE_DBS;
      env->kvs = nullptr;
    }
    if (env->page_auxbuf) {
      osal_memalign_free(env->page_auxbuf);
      env->page_auxbuf = nullptr;
    }
    if (env->dbi_seqs) {
      osal_free(env->dbi_seqs);
      env->dbi_seqs = nullptr;
    }
    if (env->dbs_flags) {
      osal_free(env->dbs_flags);
      env->dbs_flags = nullptr;
    }
    if (env->pathname.buffer) {
      osal_free(env->pathname.buffer);
      env->pathname.buffer = nullptr;
    }
    if (env->basal_txn) {
      dpl_free(env->basal_txn);
      txl_free(env->basal_txn->tw.gc.retxl);
      pnl_free(env->basal_txn->tw.retired_pages);
      pnl_free(env->basal_txn->tw.spilled.list);
      pnl_free(env->basal_txn->tw.repnl);
      osal_free(env->basal_txn);
      env->basal_txn = nullptr;
    }
  }
  env->stuck_meta = -1;
  return rc;
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

#if MDBX_USE_MINCORE
/*------------------------------------------------------------------------------
 * Проверка размещения/расположения отображенных страниц БД в ОЗУ (mem-in-core),
 * с кешированием этой информации. */

static inline bool bit_tas(uint64_t *field, char bit) {
  const uint64_t m = UINT64_C(1) << bit;
  const bool r = (*field & m) != 0;
  *field |= m;
  return r;
}

static bool mincore_fetch(MDBX_env *const env, const size_t unit_begin) {
  lck_t *const lck = env->lck;
  for (size_t i = 1; i < ARRAY_LENGTH(lck->mincore_cache.begin); ++i) {
    const ptrdiff_t dist = unit_begin - lck->mincore_cache.begin[i];
    if (likely(dist >= 0 && dist < 64)) {
      const pgno_t tmp_begin = lck->mincore_cache.begin[i];
      const uint64_t tmp_mask = lck->mincore_cache.mask[i];
      do {
        lck->mincore_cache.begin[i] = lck->mincore_cache.begin[i - 1];
        lck->mincore_cache.mask[i] = lck->mincore_cache.mask[i - 1];
      } while (--i);
      lck->mincore_cache.begin[0] = tmp_begin;
      lck->mincore_cache.mask[0] = tmp_mask;
      return bit_tas(lck->mincore_cache.mask, (char)dist);
    }
  }

  size_t pages = 64;
  unsigned unit_log = globals.sys_pagesize_ln2;
  unsigned shift = 0;
  if (env->ps > globals.sys_pagesize) {
    unit_log = env->ps2ln;
    shift = env->ps2ln - globals.sys_pagesize_ln2;
    pages <<= shift;
  }

  const size_t offset = unit_begin << unit_log;
  size_t length = pages << globals.sys_pagesize_ln2;
  if (offset + length > env->dxb_mmap.current) {
    length = env->dxb_mmap.current - offset;
    pages = length >> globals.sys_pagesize_ln2;
  }

#if MDBX_ENABLE_PGOP_STAT
  env->lck->pgops.mincore.weak += 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
  uint8_t *const vector = alloca(pages);
  if (unlikely(mincore(ptr_disp(env->dxb_mmap.base, offset), length, (void *)vector))) {
    NOTICE("mincore(+%zu, %zu), err %d", offset, length, errno);
    return false;
  }

  for (size_t i = 1; i < ARRAY_LENGTH(lck->mincore_cache.begin); ++i) {
    lck->mincore_cache.begin[i] = lck->mincore_cache.begin[i - 1];
    lck->mincore_cache.mask[i] = lck->mincore_cache.mask[i - 1];
  }
  lck->mincore_cache.begin[0] = unit_begin;

  uint64_t mask = 0;
#ifdef MINCORE_INCORE
  STATIC_ASSERT(MINCORE_INCORE == 1);
#endif
  for (size_t i = 0; i < pages; ++i) {
    uint64_t bit = (vector[i] & 1) == 0;
    bit <<= i >> shift;
    mask |= bit;
  }

  lck->mincore_cache.mask[0] = ~mask;
  return bit_tas(lck->mincore_cache.mask, 0);
}
#endif /* MDBX_USE_MINCORE */

MDBX_MAYBE_UNUSED static inline bool mincore_probe(MDBX_env *const env, const pgno_t pgno) {
#if MDBX_USE_MINCORE
  const size_t offset_aligned = floor_powerof2(pgno2bytes(env, pgno), globals.sys_pagesize);
  const unsigned unit_log2 = (env->ps2ln > globals.sys_pagesize_ln2) ? env->ps2ln : globals.sys_pagesize_ln2;
  const size_t unit_begin = offset_aligned >> unit_log2;
  eASSERT(env, (unit_begin << unit_log2) == offset_aligned);
  const ptrdiff_t dist = unit_begin - env->lck->mincore_cache.begin[0];
  if (likely(dist >= 0 && dist < 64))
    return bit_tas(env->lck->mincore_cache.mask, (char)dist);
  return mincore_fetch(env, unit_begin);
#else
  (void)env;
  (void)pgno;
  return false;
#endif /* MDBX_USE_MINCORE */
}

/*----------------------------------------------------------------------------*/

MDBX_MAYBE_UNUSED __hot static pgno_t *scan4seq_fallback(pgno_t *range, const size_t len, const size_t seq) {
  assert(seq > 0 && len > seq);
#if MDBX_PNL_ASCENDING
  assert(range[-1] == len);
  const pgno_t *const detent = range + len - seq;
  const ptrdiff_t offset = (ptrdiff_t)seq;
  const pgno_t target = (pgno_t)offset;
  if (likely(len > seq + 3)) {
    do {
      const pgno_t diff0 = range[offset + 0] - range[0];
      const pgno_t diff1 = range[offset + 1] - range[1];
      const pgno_t diff2 = range[offset + 2] - range[2];
      const pgno_t diff3 = range[offset + 3] - range[3];
      if (diff0 == target)
        return range + 0;
      if (diff1 == target)
        return range + 1;
      if (diff2 == target)
        return range + 2;
      if (diff3 == target)
        return range + 3;
      range += 4;
    } while (range + 3 < detent);
    if (range == detent)
      return nullptr;
  }
  do
    if (range[offset] - *range == target)
      return range;
  while (++range < detent);
#else
  assert(range[-(ptrdiff_t)len] == len);
  const pgno_t *const detent = range - len + seq;
  const ptrdiff_t offset = -(ptrdiff_t)seq;
  const pgno_t target = (pgno_t)offset;
  if (likely(len > seq + 3)) {
    do {
      const pgno_t diff0 = range[-0] - range[offset - 0];
      const pgno_t diff1 = range[-1] - range[offset - 1];
      const pgno_t diff2 = range[-2] - range[offset - 2];
      const pgno_t diff3 = range[-3] - range[offset - 3];
      /* Смысл вычислений до ветвлений в том, чтобы позволить компилятору
       * загружать и вычислять все значения параллельно. */
      if (diff0 == target)
        return range - 0;
      if (diff1 == target)
        return range - 1;
      if (diff2 == target)
        return range - 2;
      if (diff3 == target)
        return range - 3;
      range -= 4;
    } while (range > detent + 3);
    if (range == detent)
      return nullptr;
  }
  do
    if (*range - range[offset] == target)
      return range;
  while (--range > detent);
#endif /* pnl_t sort-order */
  return nullptr;
}

MDBX_MAYBE_UNUSED static const pgno_t *scan4range_checker(const pnl_t pnl, const size_t seq) {
  size_t begin = MDBX_PNL_ASCENDING ? 1 : MDBX_PNL_GETSIZE(pnl);
#if MDBX_PNL_ASCENDING
  while (seq <= MDBX_PNL_GETSIZE(pnl) - begin) {
    if (pnl[begin + seq] - pnl[begin] == seq)
      return pnl + begin;
    ++begin;
  }
#else
  while (begin > seq) {
    if (pnl[begin - seq] - pnl[begin] == seq)
      return pnl + begin;
    --begin;
  }
#endif /* pnl_t sort-order */
  return nullptr;
}

#if defined(_MSC_VER) && !defined(__builtin_clz) && !__has_builtin(__builtin_clz)
MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clz(uint32_t value) {
  unsigned long index;
  _BitScanReverse(&index, value);
  return 31 - index;
}
#endif /* _MSC_VER */

#if defined(_MSC_VER) && !defined(__builtin_clzl) && !__has_builtin(__builtin_clzl)
MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) {
  unsigned long index;
#ifdef _WIN64
  assert(sizeof(value) == 8);
  _BitScanReverse64(&index, value);
  return 63 - index;
#else
  assert(sizeof(value) == 4);
  _BitScanReverse(&index, value);
  return 31 - index;
#endif
}
#endif /* _MSC_VER */

#if !MDBX_PNL_ASCENDING

#if !defined(MDBX_ATTRIBUTE_TARGET) && (__has_attribute(__target__) || __GNUC_PREREQ(5, 0))
#define MDBX_ATTRIBUTE_TARGET(target) __attribute__((__target__(target)))
#endif /* MDBX_ATTRIBUTE_TARGET */

#ifndef MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND
/* Workaround for GCC's bug with `-m32 -march=i686 -Ofast`
 * gcc/i686-buildroot-linux-gnu/12.2.0/include/xmmintrin.h:814:1:
 *     error: inlining failed in call to 'always_inline' '_mm_movemask_ps':
 *            target specific option mismatch */
#if !defined(__FAST_MATH__) || !__FAST_MATH__ || !defined(__GNUC__) || defined(__e2k__) || defined(__clang__) ||       \
    defined(__amd64__) || defined(__SSE2__)
#define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 0
#else
#define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 1
#endif
#endif /* MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND */

#if defined(__SSE2__) && defined(__SSE__)
#define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */
#elif (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(__amd64__)
#define __SSE2__
#define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */
#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND
#define MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET("sse,sse2")
#endif /* __SSE2__ */

#if defined(__AVX2__)
#define MDBX_ATTRIBUTE_TARGET_AVX2 /* nope */
#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND
#define MDBX_ATTRIBUTE_TARGET_AVX2 MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2")
#endif /* __AVX2__ */

#if defined(MDBX_ATTRIBUTE_TARGET_AVX2)
#if defined(__AVX512BW__)
#define MDBX_ATTRIBUTE_TARGET_AVX512BW /* nope */
#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && !MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND &&                \
    (__GNUC_PREREQ(6, 0) || __CLANG_PREREQ(5, 0))
#define MDBX_ATTRIBUTE_TARGET_AVX512BW MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2,avx512bw")
#endif /* __AVX512BW__ */
#endif /* MDBX_ATTRIBUTE_TARGET_AVX2 for MDBX_ATTRIBUTE_TARGET_AVX512BW */

#ifdef MDBX_ATTRIBUTE_TARGET_SSE2
MDBX_ATTRIBUTE_TARGET_SSE2 static __always_inline unsigned
diffcmp2mask_sse2(const pgno_t *const ptr, const ptrdiff_t offset, const __m128i pattern) {
  const __m128i f = _mm_loadu_si128((const __m128i *)ptr);
  const __m128i l = _mm_loadu_si128((const __m128i *)(ptr + offset));
  const __m128i cmp = _mm_cmpeq_epi32(_mm_sub_epi32(f, l), pattern);
  return _mm_movemask_ps(*(const __m128 *)&cmp);
}

MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_SSE2 static pgno_t *scan4seq_sse2(pgno_t *range, const size_t len,
                                                                                const size_t seq) {
  assert(seq > 0 && len > seq);
#if MDBX_PNL_ASCENDING
#error "FIXME: Not implemented"
#endif /* MDBX_PNL_ASCENDING */
  assert(range[-(ptrdiff_t)len] == len);
  pgno_t *const detent = range - len + seq;
  const ptrdiff_t offset = -(ptrdiff_t)seq;
  const pgno_t target = (pgno_t)offset;
  const __m128i pattern = _mm_set1_epi32(target);
  uint8_t mask;
  if (likely(len > seq + 3)) {
    do {
      mask = (uint8_t)diffcmp2mask_sse2(range - 3, offset, pattern);
      if (mask) {
#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__)
      found:
#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */
        return range + 28 - __builtin_clz(mask);
      }
      range -= 4;
    } while (range > detent + 3);
    if (range == detent)
      return nullptr;
  }

  /* Далее происходит чтение от 4 до 12 лишних байт, которые могут быть не
   * только за пределами региона выделенного под PNL, но и пересекать границу
   * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению.
   * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */
#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__)
  const unsigned on_page_safe_mask = 0xff0 /* enough for '-15' bytes offset */;
  if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) {
    const unsigned extra = (unsigned)(detent + 4 - range);
    assert(extra > 0 && extra < 4);
    mask = 0xF << extra;
    mask &= diffcmp2mask_sse2(range - 3, offset, pattern);
    if (mask)
      goto found;
    return nullptr;
  }
#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */
  do
    if (*range - range[offset] == target)
      return range;
  while (--range != detent);
  return nullptr;
}
#endif /* MDBX_ATTRIBUTE_TARGET_SSE2 */

#ifdef MDBX_ATTRIBUTE_TARGET_AVX2
MDBX_ATTRIBUTE_TARGET_AVX2 static __always_inline unsigned
diffcmp2mask_avx2(const pgno_t *const ptr, const ptrdiff_t offset, const __m256i pattern) {
  const __m256i f = _mm256_loadu_si256((const __m256i *)ptr);
  const __m256i l = _mm256_loadu_si256((const __m256i *)(ptr + offset));
  const __m256i cmp = _mm256_cmpeq_epi32(_mm256_sub_epi32(f, l), pattern);
  return _mm256_movemask_ps(*(const __m256 *)&cmp);
}

MDBX_ATTRIBUTE_TARGET_AVX2 static __always_inline unsigned
diffcmp2mask_sse2avx(const pgno_t *const ptr, const ptrdiff_t offset, const __m128i pattern) {
  const __m128i f = _mm_loadu_si128((const __m128i *)ptr);
  const __m128i l = _mm_loadu_si128((const __m128i *)(ptr + offset));
  const __m128i cmp = _mm_cmpeq_epi32(_mm_sub_epi32(f, l), pattern);
  return _mm_movemask_ps(*(const __m128 *)&cmp);
}

MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX2 static pgno_t *scan4seq_avx2(pgno_t *range, const size_t len,
                                                                                const size_t seq) {
  assert(seq > 0 && len > seq);
#if MDBX_PNL_ASCENDING
#error "FIXME: Not implemented"
#endif /* MDBX_PNL_ASCENDING */
  assert(range[-(ptrdiff_t)len] == len);
  pgno_t *const detent = range - len + seq;
  const ptrdiff_t offset = -(ptrdiff_t)seq;
  const pgno_t target = (pgno_t)offset;
  const __m256i pattern = _mm256_set1_epi32(target);
  uint8_t mask;
  if (likely(len > seq + 7)) {
    do {
      mask = (uint8_t)diffcmp2mask_avx2(range - 7, offset, pattern);
      if (mask) {
#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__)
      found:
#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */
        return range + 24 - __builtin_clz(mask);
      }
      range -= 8;
    } while (range > detent + 7);
    if (range == detent)
      return nullptr;
  }

  /* Далее происходит чтение от 4 до 28 лишних байт, которые могут быть не
   * только за пределами региона выделенного под PNL, но и пересекать границу
   * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению.
   * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */
#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__)
  const unsigned on_page_safe_mask = 0xfe0 /* enough for '-31' bytes offset */;
  if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) {
    const unsigned extra = (unsigned)(detent + 8 - range);
    assert(extra > 0 && extra < 8);
    mask = 0xFF << extra;
    mask &= diffcmp2mask_avx2(range - 7, offset, pattern);
    if (mask)
      goto found;
    return nullptr;
  }
#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */
  if (range - 3 > detent) {
    mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern);
    if (mask)
      return range + 28 - __builtin_clz(mask);
    range -= 4;
  }
  while (range > detent) {
    if (*range - range[offset] == target)
      return range;
    --range;
  }
  return nullptr;
}
#endif /* MDBX_ATTRIBUTE_TARGET_AVX2 */

#ifdef MDBX_ATTRIBUTE_TARGET_AVX512BW
MDBX_ATTRIBUTE_TARGET_AVX512BW static __always_inline unsigned
diffcmp2mask_avx512bw(const pgno_t *const ptr, const ptrdiff_t offset, const __m512i pattern) {
  const __m512i f = _mm512_loadu_si512((const __m512i *)ptr);
  const __m512i l = _mm512_loadu_si512((const __m512i *)(ptr + offset));
  return _mm512_cmpeq_epi32_mask(_mm512_sub_epi32(f, l), pattern);
}

MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX512BW static pgno_t *scan4seq_avx512bw(pgno_t *range, const size_t len,
                                                                                        const size_t seq) {
  assert(seq > 0 && len > seq);
#if MDBX_PNL_ASCENDING
#error "FIXME: Not implemented"
#endif /* MDBX_PNL_ASCENDING */
  assert(range[-(ptrdiff_t)len] == len);
  pgno_t *const detent = range - len + seq;
  const ptrdiff_t offset = -(ptrdiff_t)seq;
  const pgno_t target = (pgno_t)offset;
  const __m512i pattern = _mm512_set1_epi32(target);
  unsigned mask;
  if (likely(len > seq + 15)) {
    do {
      mask = diffcmp2mask_avx512bw(range - 15, offset, pattern);
      if (mask) {
#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__)
      found:
#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */
        return range + 16 - __builtin_clz(mask);
      }
      range -= 16;
    } while (range > detent + 15);
    if (range == detent)
      return nullptr;
  }

  /* Далее происходит чтение от 4 до 60 лишних байт, которые могут быть не
   * только за пределами региона выделенного под PNL, но и пересекать границу
   * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению.
   * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */
#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__)
  const unsigned on_page_safe_mask = 0xfc0 /* enough for '-63' bytes offset */;
  if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) {
    const unsigned extra = (unsigned)(detent + 16 - range);
    assert(extra > 0 && extra < 16);
    mask = 0xFFFF << extra;
    mask &= diffcmp2mask_avx512bw(range - 15, offset, pattern);
    if (mask)
      goto found;
    return nullptr;
  }
#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */
  if (range - 7 > detent) {
    mask = diffcmp2mask_avx2(range - 7, offset, *(const __m256i *)&pattern);
    if (mask)
      return range + 24 - __builtin_clz(mask);
    range -= 8;
  }
  if (range - 3 > detent) {
    mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern);
    if (mask)
      return range + 28 - __builtin_clz(mask);
    range -= 4;
  }
  while (range > detent) {
    if (*range - range[offset] == target)
      return range;
    --range;
  }
  return nullptr;
}
#endif /* MDBX_ATTRIBUTE_TARGET_AVX512BW */

#if (defined(__ARM_NEON) || defined(__ARM_NEON__)) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
static __always_inline size_t diffcmp2mask_neon(const pgno_t *const ptr, const ptrdiff_t offset,
                                                const uint32x4_t pattern) {
  const uint32x4_t f = vld1q_u32(ptr);
  const uint32x4_t l = vld1q_u32(ptr + offset);
  const uint16x4_t cmp = vmovn_u32(vceqq_u32(vsubq_u32(f, l), pattern));
  if (sizeof(size_t) > 7)
    return vget_lane_u64(vreinterpret_u64_u16(cmp), 0);
  else
    return vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(cmp, cmp))), 0);
}

__hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, const size_t seq) {
  assert(seq > 0 && len > seq);
#if MDBX_PNL_ASCENDING
#error "FIXME: Not implemented"
#endif /* MDBX_PNL_ASCENDING */
  assert(range[-(ptrdiff_t)len] == len);
  pgno_t *const detent = range - len + seq;
  const ptrdiff_t offset = -(ptrdiff_t)seq;
  const pgno_t target = (pgno_t)offset;
  const uint32x4_t pattern = vmovq_n_u32(target);
  size_t mask;
  if (likely(len > seq + 3)) {
    do {
      mask = diffcmp2mask_neon(range - 3, offset, pattern);
      if (mask) {
#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__)
      found:
#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */
        return ptr_disp(range, -(__builtin_clzl(mask) >> sizeof(size_t) / 4));
      }
      range -= 4;
    } while (range > detent + 3);
    if (range == detent)
      return nullptr;
  }

  /* Далее происходит чтение от 4 до 12 лишних байт, которые могут быть не
   * только за пределами региона выделенного под PNL, но и пересекать границу
   * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению.
   * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */
#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__)
  const unsigned on_page_safe_mask = 0xff0 /* enough for '-15' bytes offset */;
  if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) {
    const unsigned extra = (unsigned)(detent + 4 - range);
    assert(extra > 0 && extra < 4);
    mask = (~(size_t)0) << (extra * sizeof(size_t) * 2);
    mask &= diffcmp2mask_neon(range - 3, offset, pattern);
    if (mask)
      goto found;
    return nullptr;
  }
#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */
  do
    if (*range - range[offset] == target)
      return range;
  while (--range != detent);
  return nullptr;
}
#endif /* __ARM_NEON || __ARM_NEON__ */

#if defined(__AVX512BW__) && defined(MDBX_ATTRIBUTE_TARGET_AVX512BW)
#define scan4seq_default scan4seq_avx512bw
#define scan4seq_impl scan4seq_default
#elif defined(__AVX2__) && defined(MDBX_ATTRIBUTE_TARGET_AVX2)
#define scan4seq_default scan4seq_avx2
#elif defined(__SSE2__) && defined(MDBX_ATTRIBUTE_TARGET_SSE2)
#define scan4seq_default scan4seq_sse2
#elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define scan4seq_default scan4seq_neon
/* Choosing of another variants should be added here. */
#endif /* scan4seq_default */

#endif /* MDBX_PNL_ASCENDING */

#ifndef scan4seq_default
#define scan4seq_default scan4seq_fallback
#endif /* scan4seq_default */

#ifdef scan4seq_impl
/* The scan4seq_impl() is the best or no alternatives */
#elif !MDBX_HAVE_BUILTIN_CPU_SUPPORTS
/* The scan4seq_default() will be used since no cpu-features detection support
 * from compiler. Please don't ask to implement cpuid-based detection and don't
 * make such PRs. */
#define scan4seq_impl scan4seq_default
#else
/* Selecting the most appropriate implementation at runtime,
 * depending on the available CPU features. */
static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, const size_t seq);
static pgno_t *(*scan4seq_impl)(pgno_t *range, const size_t len, const size_t seq) = scan4seq_resolver;

static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, const size_t seq) {
  pgno_t *(*choice)(pgno_t *range, const size_t len, const size_t seq) = nullptr;
#if __has_builtin(__builtin_cpu_init) || defined(__BUILTIN_CPU_INIT__) || __GNUC_PREREQ(4, 8)
  __builtin_cpu_init();
#endif /* __builtin_cpu_init() */
#ifdef MDBX_ATTRIBUTE_TARGET_SSE2
  if (__builtin_cpu_supports("sse2"))
    choice = scan4seq_sse2;
#endif /* MDBX_ATTRIBUTE_TARGET_SSE2 */
#ifdef MDBX_ATTRIBUTE_TARGET_AVX2
  if (__builtin_cpu_supports("avx2"))
    choice = scan4seq_avx2;
#endif /* MDBX_ATTRIBUTE_TARGET_AVX2 */
#ifdef MDBX_ATTRIBUTE_TARGET_AVX512BW
  if (__builtin_cpu_supports("avx512bw"))
    choice = scan4seq_avx512bw;
#endif /* MDBX_ATTRIBUTE_TARGET_AVX512BW */
  /* Choosing of another variants should be added here. */
  scan4seq_impl = choice ? choice : scan4seq_default;
  return scan4seq_impl(range, len, seq);
}
#endif /* scan4seq_impl */

/*----------------------------------------------------------------------------*/

#define ALLOC_COALESCE 4    /* внутреннее состояние */
#define ALLOC_SHOULD_SCAN 8 /* внутреннее состояние */
#define ALLOC_LIFO 16       /* внутреннее состояние */

static inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc, const uint8_t flags) {
  /* If txn is updating the GC, then the retired-list cannot play catch-up with
   * itself by growing while trying to save it. */
  if (mc->tree == &txn->dbs[FREE_DBI] && !(flags & ALLOC_RESERVE) && !(mc->flags & z_gcu_preparation))
    return false;

  /* avoid search inside empty tree and while tree is updating,
     https://libmdbx.dqdkfa.ru/dead-github/issues/31 */
  if (unlikely(txn->dbs[FREE_DBI].items == 0)) {
    txn->flags |= txn_gc_drained;
    return false;
  }

  return true;
}

static inline bool is_already_reclaimed(const MDBX_txn *txn, txnid_t id) { return txl_contain(txn->tw.gc.retxl, id); }

__hot static pgno_t repnl_get_single(MDBX_txn *txn) {
  const size_t len = MDBX_PNL_GETSIZE(txn->tw.repnl);
  assert(len > 0);
  pgno_t *target = MDBX_PNL_EDGE(txn->tw.repnl);
  const ptrdiff_t dir = MDBX_PNL_ASCENDING ? 1 : -1;

  /* Есть ТРИ потенциально выигрышные, но противо-направленные тактики:
   *
   * 1. Стараться использовать страницы с наименьшими номерами. Так обмен с
   * диском будет более кучным, а у страниц ближе к концу БД будет больше шансов
   * попасть под авто-компактификацию. Частично эта тактика уже реализована, но
   * для её эффективности требуется явно приоритезировать выделение страниц:
   *   - поддерживать два repnl, для ближних и для дальних страниц;
   *   - использовать страницы из дальнего списка, если первый пуст,
   *     а второй слишком большой, либо при пустой GC.
   *
   * 2. Стараться выделять страницы последовательно. Так записываемые на диск
   * регионы будут линейными, что принципиально ускоряет запись на HDD.
   * Одновременно, в среднем это не повлияет на чтение, точнее говоря, если
   * порядок чтения не совпадает с порядком изменения (иначе говоря, если
   * чтение не коррелирует с обновлениями и/или вставками) то не повлияет, иначе
   * может ускорить. Однако, последовательности в среднем достаточно редки.
   * Поэтому для эффективности требуется аккумулировать и поддерживать в ОЗУ
   * огромные списки страниц, а затем сохранять их обратно в БД. Текущий формат
   * БД (без сжатых битовых карт) для этого крайне не удачен. Поэтому эта тактика не
   * имеет шансов быть успешной без смены формата БД (Mithril).
   *
   * 3. Стараться экономить последовательности страниц. Это позволяет избегать
   * лишнего чтения/поиска в GC при более-менее постоянном размещении и/или
   * обновлении данных требующих более одной страницы. Проблема в том, что без
   * информации от приложения библиотека не может знать насколько
   * востребованными будут последовательности в ближайшей перспективе, а
   * экономия последовательностей "на всякий случай" не только затратна
   * сама-по-себе, но и работает во вред (добавляет хаоса).
   *
   * Поэтому:
   *  - в TODO добавляется разделение repnl на «ближние» и «дальние» страницы,
   *    с последующей реализацией первой тактики;
   *  - преимущественное использование последовательностей отправляется
   *    в MithrilDB как составляющая "HDD frendly" feature;
   *  - реализованная в 3757eb72f7c6b46862f8f17881ac88e8cecc1979 экономия
   *    последовательностей отключается через MDBX_ENABLE_SAVING_SEQUENCES=0.
   *
   * В качестве альтернативы для безусловной «экономии» последовательностей,
   * в следующих версиях libmdbx, вероятно, будет предложено
   * API для взаимодействия с GC:
   *  - получение размера GC, включая гистограммы размеров последовательностей
   *    и близости к концу БД;
   *  - включение формирования "линейного запаса" для последующего использования
   *    в рамках текущей транзакции;
   *  - намеренная загрузка GC в память для коагуляции и "выпрямления";
   *  - намеренное копирование данных из страниц в конце БД для последующего
   *    из освобождения, т.е. контролируемая компактификация по запросу. */

#ifndef MDBX_ENABLE_SAVING_SEQUENCES
#define MDBX_ENABLE_SAVING_SEQUENCES 0
#endif
  if (MDBX_ENABLE_SAVING_SEQUENCES && unlikely(target[dir] == *target + 1) && len > 2) {
    /* Пытаемся пропускать последовательности при наличии одиночных элементов.
     * TODO: необходимо кэшировать пропускаемые последовательности
     * чтобы не сканировать список сначала при каждом выделении. */
    pgno_t *scan = target + dir + dir;
    size_t left = len;
    do {
      if (likely(scan[-dir] != *scan - 1 && *scan + 1 != scan[dir])) {
#if MDBX_PNL_ASCENDING
        target = scan;
        break;
#else
        /* вырезаем элемент с перемещением хвоста */
        const pgno_t pgno = *scan;
        MDBX_PNL_SETSIZE(txn->tw.repnl, len - 1);
        while (++scan <= target)
          scan[-1] = *scan;
        return pgno;
#endif
      }
      scan += dir;
    } while (--left > 2);
  }

  const pgno_t pgno = *target;
#if MDBX_PNL_ASCENDING
  /* вырезаем элемент с перемещением хвоста */
  MDBX_PNL_SETSIZE(txn->tw.repnl, len - 1);
  for (const pgno_t *const end = txn->tw.repnl + len - 1; target <= end; ++target)
    *target = target[1];
#else
  /* перемещать хвост не нужно, просто усекам список */
  MDBX_PNL_SETSIZE(txn->tw.repnl, len - 1);
#endif
  return pgno;
}

__hot static pgno_t repnl_get_sequence(MDBX_txn *txn, const size_t num, uint8_t flags) {
  const size_t len = MDBX_PNL_GETSIZE(txn->tw.repnl);
  pgno_t *edge = MDBX_PNL_EDGE(txn->tw.repnl);
  assert(len >= num && num > 1);
  const size_t seq = num - 1;
#if !MDBX_PNL_ASCENDING
  if (edge[-(ptrdiff_t)seq] - *edge == seq) {
    if (unlikely(flags & ALLOC_RESERVE))
      return P_INVALID;
    assert(edge == scan4range_checker(txn->tw.repnl, seq));
    /* перемещать хвост не нужно, просто усекам список */
    MDBX_PNL_SETSIZE(txn->tw.repnl, len - num);
    return *edge;
  }
#endif
  pgno_t *target = scan4seq_impl(edge, len, seq);
  assert(target == scan4range_checker(txn->tw.repnl, seq));
  if (target) {
    if (unlikely(flags & ALLOC_RESERVE))
      return P_INVALID;
    const pgno_t pgno = *target;
    /* вырезаем найденную последовательность с перемещением хвоста */
    MDBX_PNL_SETSIZE(txn->tw.repnl, len - num);
#if MDBX_PNL_ASCENDING
    for (const pgno_t *const end = txn->tw.repnl + len - num; target <= end; ++target)
      *target = target[num];
#else
    for (const pgno_t *const end = txn->tw.repnl + len; ++target <= end;)
      target[-(ptrdiff_t)num] = *target;
#endif
    return pgno;
  }
  return 0;
}

static inline pgr_t page_alloc_finalize(MDBX_env *const env, MDBX_txn *const txn, const MDBX_cursor *const mc,
                                        const pgno_t pgno, const size_t num) {
#if MDBX_ENABLE_PROFGC
  size_t majflt_before;
  const uint64_t cputime_before = osal_cputime(&majflt_before);
  gc_prof_stat_t *const prof =
      (cursor_dbi(mc) == FREE_DBI) ? &env->lck->pgops.gc_prof.self : &env->lck->pgops.gc_prof.work;
#else
  (void)mc;
#endif /* MDBX_ENABLE_PROFGC */
  ENSURE(env, pgno >= NUM_METAS);

  pgr_t ret;
  bool need_clean = (env->flags & MDBX_PAGEPERTURB) != 0;
  if (env->flags & MDBX_WRITEMAP) {
    ret.page = pgno2page(env, pgno);
    MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, pgno2bytes(env, num));
    VALGRIND_MAKE_MEM_UNDEFINED(ret.page, pgno2bytes(env, num));

    /* Содержимое выделенной страницы не нужно, но если страница отсутствует
     * в ОЗУ (что весьма вероятно), то любое обращение к ней приведет
     * к page-fault:
     *  - прерыванию по отсутствию страницы;
     *  - переключение контекста в режим ядра с засыпанием процесса;
     *  - чтение страницы с диска;
     *  - обновление PTE и пробуждением процесса;
     *  - переключение контекста по доступности ЦПУ.
     *
     * Пытаемся минимизировать накладные расходы записывая страницу, что при
     * наличии unified page cache приведет к появлению страницы в ОЗУ без чтения
     * с диска. При этом запись на диск должна быть отложена адекватным ядром,
     * так как страница отображена в память в режиме чтения-записи и следом в
     * неё пишет ЦПУ. */

    /* В случае если страница в памяти процесса, то излишняя запись может быть
     * достаточно дорогой. Кроме системного вызова и копирования данных, в особо
     * одаренных ОС при этом могут включаться файловая система, выделяться
     * временная страница, пополняться очереди асинхронного выполнения,
     * обновляться PTE с последующей генерацией page-fault и чтением данных из
     * грязной I/O очереди. Из-за этого штраф за лишнюю запись может быть
     * сравним с избегаемым ненужным чтением. */
    if (txn->tw.prefault_write_activated) {
      void *const pattern = ptr_disp(env->page_auxbuf, need_clean ? env->ps : env->ps * 2);
      size_t file_offset = pgno2bytes(env, pgno);
      if (likely(num == 1)) {
        if (!mincore_probe(env, pgno)) {
          osal_pwrite(env->lazy_fd, pattern, env->ps, file_offset);
#if MDBX_ENABLE_PGOP_STAT
          env->lck->pgops.prefault.weak += 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
          need_clean = false;
        }
      } else {
        struct iovec iov[MDBX_AUXILARY_IOV_MAX];
        size_t n = 0, cleared = 0;
        for (size_t i = 0; i < num; ++i) {
          if (!mincore_probe(env, pgno + (pgno_t)i)) {
            ++cleared;
            iov[n].iov_len = env->ps;
            iov[n].iov_base = pattern;
            if (unlikely(++n == MDBX_AUXILARY_IOV_MAX)) {
              osal_pwritev(env->lazy_fd, iov, MDBX_AUXILARY_IOV_MAX, file_offset);
#if MDBX_ENABLE_PGOP_STAT
              env->lck->pgops.prefault.weak += 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
              file_offset += pgno2bytes(env, MDBX_AUXILARY_IOV_MAX);
              n = 0;
            }
          }
        }
        if (likely(n > 0)) {
          osal_pwritev(env->lazy_fd, iov, n, file_offset);
#if MDBX_ENABLE_PGOP_STAT
          env->lck->pgops.prefault.weak += 1;
#endif /* MDBX_ENABLE_PGOP_STAT */
        }
        if (cleared == num)
          need_clean = false;
      }
    }
  } else {
    ret.page = page_shadow_alloc(txn, num);
    if (unlikely(!ret.page)) {
      ret.err = MDBX_ENOMEM;
      goto bailout;
    }
  }

  if (unlikely(need_clean))
    memset(ret.page, -1, pgno2bytes(env, num));

  VALGRIND_MAKE_MEM_UNDEFINED(ret.page, pgno2bytes(env, num));
  ret.page->pgno = pgno;
  ret.page->dupfix_ksize = 0;
  ret.page->flags = 0;
  if ((ASSERT_ENABLED() || AUDIT_ENABLED()) && num > 1) {
    ret.page->pages = (pgno_t)num;
    ret.page->flags = P_LARGE;
  }

  ret.err = page_dirty(txn, ret.page, (pgno_t)num);
bailout:
  tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
#if MDBX_ENABLE_PROFGC
  size_t majflt_after;
  prof->xtime_cpu += osal_cputime(&majflt_after) - cputime_before;
  prof->majflt += (uint32_t)(majflt_after - majflt_before);
#endif /* MDBX_ENABLE_PROFGC */
  return ret;
}

pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags) {
  pgr_t ret;
  MDBX_txn *const txn = mc->txn;
  MDBX_env *const env = txn->env;
#if MDBX_ENABLE_PROFGC
  gc_prof_stat_t *const prof =
      (cursor_dbi(mc) == FREE_DBI) ? &env->lck->pgops.gc_prof.self : &env->lck->pgops.gc_prof.work;
  prof->spe_counter += 1;
#endif /* MDBX_ENABLE_PROFGC */

  eASSERT(env, num > 0 || (flags & ALLOC_RESERVE));
  eASSERT(env, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));

  size_t newnext;
  const uint64_t monotime_begin = (MDBX_ENABLE_PROFGC || (num > 1 && env->options.gc_time_limit)) ? osal_monotime() : 0;
  struct monotime_cache now_cache;
  now_cache.expire_countdown = 1 /* старт с 1 позволяет избавиться как от лишних системных вызовов когда
                                    лимит времени задан нулевой или уже исчерпан, так и от подсчета
                                    времени при не-достижении rp_augment_limit */
      ;
  now_cache.value = monotime_begin;
  pgno_t pgno = 0;
  if (num > 1) {
#if MDBX_ENABLE_PROFGC
    prof->xpages += 1;
#endif /* MDBX_ENABLE_PROFGC */
    if (MDBX_PNL_GETSIZE(txn->tw.repnl) >= num) {
      eASSERT(env, MDBX_PNL_LAST(txn->tw.repnl) < txn->geo.first_unallocated &&
                       MDBX_PNL_FIRST(txn->tw.repnl) < txn->geo.first_unallocated);
      pgno = repnl_get_sequence(txn, num, flags);
      if (likely(pgno))
        goto done;
    }
  } else {
    eASSERT(env, num == 0 || MDBX_PNL_GETSIZE(txn->tw.repnl) == 0);
    eASSERT(env, !(flags & ALLOC_RESERVE) || num == 0);
  }

  //---------------------------------------------------------------------------

  if (unlikely(!is_gc_usable(txn, mc, flags)))
    goto no_gc;

  eASSERT(env, (flags & (ALLOC_COALESCE | ALLOC_LIFO | ALLOC_SHOULD_SCAN)) == 0);
  flags += (env->flags & MDBX_LIFORECLAIM) ? ALLOC_LIFO : 0;

  if (/* Не коагулируем записи при подготовке резерва для обновления GC.
       * Иначе попытка увеличить резерв может приводить к необходимости ещё
       * большего резерва из-за увеличения списка переработанных страниц. */
      (flags & ALLOC_RESERVE) == 0) {
    if (txn->dbs[FREE_DBI].branch_pages && MDBX_PNL_GETSIZE(txn->tw.repnl) < env->maxgc_large1page / 2)
      flags += ALLOC_COALESCE;
  }

  MDBX_cursor *const gc = ptr_disp(env->basal_txn, sizeof(MDBX_txn));
  eASSERT(env, mc != gc && gc->next == gc);
  gc->txn = txn;
  gc->dbi_state = txn->dbi_state;
  gc->top_and_flags = z_fresh_mark;

  txn->tw.prefault_write_activated = env->options.prefault_write;
  if (txn->tw.prefault_write_activated) {
    /* Проверка посредством minicore() существенно снижает затраты, но в
     * простейших случаях (тривиальный бенчмарк) интегральная производительность
     * становится вдвое меньше. А на платформах без mincore() и с проблемной
     * подсистемой виртуальной памяти ситуация может быть многократно хуже.
     * Поэтому избегаем затрат в ситуациях когда prefault-write скорее всего не
     * нужна. */
    const bool readahead_enabled = env->lck->readahead_anchor & 1;
    const pgno_t readahead_edge = env->lck->readahead_anchor >> 1;
    if (/* Не суетимся если GC почти пустая и БД маленькая */
        (txn->dbs[FREE_DBI].branch_pages == 0 && txn->geo.now < 1234) ||
        /* Не суетимся если страница в зоне включенного упреждающего чтения */
        (readahead_enabled && pgno + num < readahead_edge))
      txn->tw.prefault_write_activated = false;
  }

retry_gc_refresh_oldest:;
  txnid_t oldest = txn_snapshot_oldest(txn);
retry_gc_have_oldest:
  if (unlikely(oldest >= txn->txnid)) {
    ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN " for current-txnid %" PRIaTXN, oldest, txn->txnid);
    ret.err = MDBX_PROBLEM;
    goto fail;
  }
  const txnid_t detent = oldest + 1;

  txnid_t id = 0;
  MDBX_cursor_op op = MDBX_FIRST;
  if (flags & ALLOC_LIFO) {
    if (!txn->tw.gc.retxl) {
      txn->tw.gc.retxl = txl_alloc();
      if (unlikely(!txn->tw.gc.retxl)) {
        ret.err = MDBX_ENOMEM;
        goto fail;
      }
    }
    /* Begin lookup backward from oldest reader */
    id = detent - 1;
    op = MDBX_SET_RANGE;
  } else if (txn->tw.gc.last_reclaimed) {
    /* Continue lookup forward from last-reclaimed */
    id = txn->tw.gc.last_reclaimed + 1;
    if (id >= detent)
      goto depleted_gc;
    op = MDBX_SET_RANGE;
  }

next_gc:;
  MDBX_val key;
  key.iov_base = &id;
  key.iov_len = sizeof(id);

#if MDBX_ENABLE_PROFGC
  prof->rsteps += 1;
#endif /* MDBX_ENABLE_PROFGC */

  /* Seek first/next GC record */
  ret.err = cursor_ops(gc, &key, nullptr, op);
  if (unlikely(ret.err != MDBX_SUCCESS)) {
    if (unlikely(ret.err != MDBX_NOTFOUND))
      goto fail;
    if ((flags & ALLOC_LIFO) && op == MDBX_SET_RANGE) {
      op = MDBX_PREV;
      goto next_gc;
    }
    goto depleted_gc;
  }
  if (unlikely(key.iov_len != sizeof(txnid_t))) {
    ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC key-length");
    ret.err = MDBX_CORRUPTED;
    goto fail;
  }
  id = unaligned_peek_u64(4, key.iov_base);
  if (flags & ALLOC_LIFO) {
    op = MDBX_PREV;
    if (id >= detent || is_already_reclaimed(txn, id))
      goto next_gc;
  } else {
    op = MDBX_NEXT;
    if (unlikely(id >= detent))
      goto depleted_gc;
  }
  txn->flags &= ~txn_gc_drained;

  /* Reading next GC record */
  MDBX_val data;
  page_t *const mp = gc->pg[gc->top];
  if (unlikely((ret.err = node_read(gc, page_node(mp, gc->ki[gc->top]), &data, mp)) != MDBX_SUCCESS))
    goto fail;

  pgno_t *gc_pnl = (pgno_t *)data.iov_base;
  if (unlikely(data.iov_len % sizeof(pgno_t) || data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) ||
               !pnl_check(gc_pnl, txn->geo.first_unallocated))) {
    ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC value-length");
    ret.err = MDBX_CORRUPTED;
    goto fail;
  }

  const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl);
  TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len, gc_len + MDBX_PNL_GETSIZE(txn->tw.repnl));

  if (unlikely(gc_len + MDBX_PNL_GETSIZE(txn->tw.repnl) >= env->maxgc_large1page)) {
    /* Don't try to coalesce too much. */
    if (flags & ALLOC_SHOULD_SCAN) {
      eASSERT(env, flags & ALLOC_COALESCE);
      eASSERT(env, !(flags & ALLOC_RESERVE));
      eASSERT(env, num > 0);
#if MDBX_ENABLE_PROFGC
      env->lck->pgops.gc_prof.coalescences += 1;
#endif /* MDBX_ENABLE_PROFGC */
      TRACE("clear %s %s", "ALLOC_COALESCE", "since got threshold");
      if (MDBX_PNL_GETSIZE(txn->tw.repnl) >= num) {
        eASSERT(env, MDBX_PNL_LAST(txn->tw.repnl) < txn->geo.first_unallocated &&
                         MDBX_PNL_FIRST(txn->tw.repnl) < txn->geo.first_unallocated);
        if (likely(num == 1)) {
          pgno = repnl_get_single(txn);
          goto done;
        }
        pgno = repnl_get_sequence(txn, num, flags);
        if (likely(pgno))
          goto done;
      }
      flags -= ALLOC_COALESCE | ALLOC_SHOULD_SCAN;
    }
    if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE(txn->tw.repnl) >= env->options.rp_augment_limit) &&
        ((/* not a slot-request from gc-update */ num &&
          /* have enough unallocated space */ txn->geo.upper >= txn->geo.first_unallocated + num &&
          monotime_since_cached(monotime_begin, &now_cache) + txn->tw.gc.time_acc >= env->options.gc_time_limit) ||
         gc_len + MDBX_PNL_GETSIZE(txn->tw.repnl) >= PAGELIST_LIMIT)) {
      /* Stop reclaiming to avoid large/overflow the page list. This is a rare
       * case while search for a continuously multi-page region in a
       * large database, see https://libmdbx.dqdkfa.ru/dead-github/issues/123 */
      NOTICE("stop reclaiming %s: %zu (current) + %zu "
             "(chunk) -> %zu, rp_augment_limit %u",
             likely(gc_len + MDBX_PNL_GETSIZE(txn->tw.repnl) < PAGELIST_LIMIT) ? "since rp_augment_limit was reached"
                                                                               : "to avoid PNL overflow",
             MDBX_PNL_GETSIZE(txn->tw.repnl), gc_len, gc_len + MDBX_PNL_GETSIZE(txn->tw.repnl),
             env->options.rp_augment_limit);
      goto depleted_gc;
    }
  }

  /* Remember ID of readed GC record */
  txn->tw.gc.last_reclaimed = id;
  if (flags & ALLOC_LIFO) {
    ret.err = txl_append(&txn->tw.gc.retxl, id);
    if (unlikely(ret.err != MDBX_SUCCESS))
      goto fail;
  }

  /* Append PNL from GC record to tw.repnl */
  ret.err = pnl_need(&txn->tw.repnl, gc_len);
  if (unlikely(ret.err != MDBX_SUCCESS))
    goto fail;

  if (LOG_ENABLED(MDBX_LOG_EXTRA)) {
    DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO " len %zu, PNL", id, txn->dbs[FREE_DBI].root, gc_len);
    for (size_t i = gc_len; i; i--)
      DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]);
    DEBUG_EXTRA_PRINT(", first_unallocated %u\n", txn->geo.first_unallocated);
  }

  /* Merge in descending sorted order */
#if MDBX_ENABLE_PROFGC
  const uint64_t merge_begin = osal_monotime();
#endif /* MDBX_ENABLE_PROFGC */
  pnl_merge(txn->tw.repnl, gc_pnl);
#if MDBX_ENABLE_PROFGC
  prof->pnl_merge.calls += 1;
  prof->pnl_merge.volume += MDBX_PNL_GETSIZE(txn->tw.repnl);
  prof->pnl_merge.time += osal_monotime() - merge_begin;
#endif /* MDBX_ENABLE_PROFGC */
  flags |= ALLOC_SHOULD_SCAN;
  if (AUDIT_ENABLED()) {
    if (unlikely(!pnl_check(txn->tw.repnl, txn->geo.first_unallocated))) {
      ERROR("%s/%d: %s", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid txn retired-list");
      ret.err = MDBX_CORRUPTED;
      goto fail;
    }
  } else {
    eASSERT(env, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated));
  }
  eASSERT(env, dpl_check(txn));

  eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.repnl) == 0 || MDBX_PNL_MOST(txn->tw.repnl) < txn->geo.first_unallocated);
  if (MDBX_ENABLE_REFUND && MDBX_PNL_GETSIZE(txn->tw.repnl) &&
      unlikely(MDBX_PNL_MOST(txn->tw.repnl) == txn->geo.first_unallocated - 1)) {
    /* Refund suitable pages into "unallocated" space */
    txn_refund(txn);
  }
  eASSERT(env, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));

  /* Done for a kick-reclaim mode, actually no page needed */
  if (unlikely(num == 0)) {
    eASSERT(env, ret.err == MDBX_SUCCESS);
    TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id, MDBX_PNL_GETSIZE(txn->tw.repnl));
    goto early_exit;
  }

  /* TODO: delete reclaimed records */

  eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT);
  if (flags & ALLOC_COALESCE) {
    TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id, MDBX_PNL_GETSIZE(txn->tw.repnl));
    goto next_gc;
  }

scan:
  eASSERT(env, flags & ALLOC_SHOULD_SCAN);
  eASSERT(env, num > 0);
  if (MDBX_PNL_GETSIZE(txn->tw.repnl) >= num) {
    eASSERT(env, MDBX_PNL_LAST(txn->tw.repnl) < txn->geo.first_unallocated &&
                     MDBX_PNL_FIRST(txn->tw.repnl) < txn->geo.first_unallocated);
    if (likely(num == 1)) {
      eASSERT(env, !(flags & ALLOC_RESERVE));
      pgno = repnl_get_single(txn);
      goto done;
    }
    pgno = repnl_get_sequence(txn, num, flags);
    if (likely(pgno))
      goto done;
  }
  flags -= ALLOC_SHOULD_SCAN;
  if (ret.err == MDBX_SUCCESS) {
    TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id, MDBX_PNL_GETSIZE(txn->tw.repnl));
    goto next_gc;
  }

depleted_gc:
  TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "gc-depleted", id, MDBX_PNL_GETSIZE(txn->tw.repnl));
  ret.err = MDBX_NOTFOUND;
  if (flags & ALLOC_SHOULD_SCAN)
    goto scan;
  txn->flags |= txn_gc_drained;

  //-------------------------------------------------------------------------

  /* There is no suitable pages in the GC and to be able to allocate
   * we should CHOICE one of:
   *  - make a new steady checkpoint if reclaiming was stopped by
   *    the last steady-sync, or wipe it in the MDBX_UTTERLY_NOSYNC mode;
   *  - kick lagging reader(s) if reclaiming was stopped by ones of it.
   *  - extend the database file. */

  /* Will use new pages from the map if nothing is suitable in the GC. */
  newnext = txn->geo.first_unallocated + num;

  /* Does reclaiming stopped at the last steady point? */
  const meta_ptr_t recent = meta_recent(env, &txn->tw.troika);
  const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->tw.troika);
  if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady && detent == prefer_steady.txnid + 1) {
    DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN "-%s, detent %" PRIaTXN, recent.txnid,
          durable_caption(recent.ptr_c), prefer_steady.txnid, durable_caption(prefer_steady.ptr_c), detent);
    const pgno_t autosync_threshold = atomic_load32(&env->lck->autosync_threshold, mo_Relaxed);
    const uint64_t autosync_period = atomic_load64(&env->lck->autosync_period, mo_Relaxed);
    uint64_t eoos_timestamp;
    /* wipe the last steady-point if one of:
     *  - UTTERLY_NOSYNC mode AND auto-sync threshold is NOT specified
     *  - UTTERLY_NOSYNC mode AND free space at steady-point is exhausted
     * otherwise, make a new steady-point if one of:
     *  - auto-sync threshold is specified and reached;
     *  - upper limit of database size is reached;
     *  - database is full (with the current file size)
     *       AND auto-sync threshold it NOT specified */
    if (F_ISSET(env->flags, MDBX_UTTERLY_NOSYNC) &&
        ((autosync_threshold | autosync_period) == 0 || newnext >= prefer_steady.ptr_c->geometry.now)) {
      /* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode
       * without any auto-sync threshold(s). */
#if MDBX_ENABLE_PROFGC
      env->lck->pgops.gc_prof.wipes += 1;
#endif /* MDBX_ENABLE_PROFGC */
      ret.err = meta_wipe_steady(env, detent);
      DEBUG("gc-wipe-steady, rc %d", ret.err);
      if (unlikely(ret.err != MDBX_SUCCESS))
        goto fail;
      eASSERT(env, prefer_steady.ptr_c != meta_prefer_steady(env, &txn->tw.troika).ptr_c);
      goto retry_gc_refresh_oldest;
    }
    if ((autosync_threshold && atomic_load64(&env->lck->unsynced_pages, mo_Relaxed) >= autosync_threshold) ||
        (autosync_period && (eoos_timestamp = atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) &&
         osal_monotime() - eoos_timestamp >= autosync_period) ||
        newnext >= txn->geo.upper ||
        ((num == 0 || newnext >= txn->geo.end_pgno) && (autosync_threshold | autosync_period) == 0)) {
      /* make steady checkpoint. */
#if MDBX_ENABLE_PROFGC
      env->lck->pgops.gc_prof.flushes += 1;
#endif /* MDBX_ENABLE_PROFGC */
      meta_t meta = *recent.ptr_c;
      ret.err = dxb_sync_locked(env, env->flags & MDBX_WRITEMAP, &meta, &txn->tw.troika);
      DEBUG("gc-make-steady, rc %d", ret.err);
      eASSERT(env, ret.err != MDBX_RESULT_TRUE);
      if (unlikely(ret.err != MDBX_SUCCESS))
        goto fail;
      eASSERT(env, prefer_steady.ptr_c != meta_prefer_steady(env, &txn->tw.troika).ptr_c);
      goto retry_gc_refresh_oldest;
    }
  }

  if (unlikely(true == atomic_load32(&env->lck->rdt_refresh_flag, mo_AcquireRelease))) {
    oldest = txn_snapshot_oldest(txn);
    if (oldest >= detent)
      goto retry_gc_have_oldest;
  }

  /* Avoid kick lagging reader(s) if is enough unallocated space
   * at the end of database file. */
  if (!(flags & ALLOC_RESERVE) && newnext <= txn->geo.end_pgno) {
    eASSERT(env, pgno == 0);
    goto done;
  }

  if (oldest < txn->txnid - xMDBX_TXNID_STEP) {
    oldest = mvcc_kick_laggards(env, oldest);
    if (oldest >= detent)
      goto retry_gc_have_oldest;
  }

  //---------------------------------------------------------------------------

no_gc:
  eASSERT(env, pgno == 0);
#ifndef MDBX_ENABLE_BACKLOG_DEPLETED
#define MDBX_ENABLE_BACKLOG_DEPLETED 0
#endif /* MDBX_ENABLE_BACKLOG_DEPLETED*/
  if (MDBX_ENABLE_BACKLOG_DEPLETED && unlikely(!(txn->flags & txn_gc_drained))) {
    ret.err = MDBX_BACKLOG_DEPLETED;
    goto fail;
  }
  if (flags & ALLOC_RESERVE) {
    ret.err = MDBX_NOTFOUND;
    goto fail;
  }

  /* Will use new pages from the map if nothing is suitable in the GC. */
  newnext = txn->geo.first_unallocated + num;
  if (newnext <= txn->geo.end_pgno)
    goto done;

  if (newnext > txn->geo.upper || !txn->geo.grow_pv) {
    NOTICE("gc-alloc: next %zu > upper %" PRIaPGNO, newnext, txn->geo.upper);
    ret.err = MDBX_MAP_FULL;
    goto fail;
  }

  eASSERT(env, newnext > txn->geo.end_pgno);
  const size_t grow_step = pv2pages(txn->geo.grow_pv);
  size_t aligned = pgno_align2os_pgno(env, (pgno_t)(newnext + grow_step - newnext % grow_step));

  if (aligned > txn->geo.upper)
    aligned = txn->geo.upper;
  eASSERT(env, aligned >= newnext);

  VERBOSE("try growth datafile to %zu pages (+%zu)", aligned, aligned - txn->geo.end_pgno);
  ret.err = dxb_resize(env, txn->geo.first_unallocated, (pgno_t)aligned, txn->geo.upper, implicit_grow);
  if (ret.err != MDBX_SUCCESS) {
    ERROR("unable growth datafile to %zu pages (+%zu), errcode %d", aligned, aligned - txn->geo.end_pgno, ret.err);
    goto fail;
  }
  env->txn->geo.end_pgno = (pgno_t)aligned;
  eASSERT(env, pgno == 0);

  //---------------------------------------------------------------------------

done:
  ret.err = MDBX_SUCCESS;
  if (likely((flags & ALLOC_RESERVE) == 0)) {
    if (pgno) {
      eASSERT(env, pgno + num <= txn->geo.first_unallocated && pgno >= NUM_METAS);
      eASSERT(env, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
    } else {
      pgno = txn->geo.first_unallocated;
      txn->geo.first_unallocated += (pgno_t)num;
      eASSERT(env, txn->geo.first_unallocated <= txn->geo.end_pgno);
      eASSERT(env, pgno >= NUM_METAS && pgno + num <= txn->geo.first_unallocated);
    }

    ret = page_alloc_finalize(env, txn, mc, pgno, num);
    if (unlikely(ret.err != MDBX_SUCCESS)) {
    fail:
      eASSERT(env, ret.err != MDBX_SUCCESS);
      eASSERT(env, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
      int level;
      const char *what;
      if (flags & ALLOC_RESERVE) {
        level = (flags & ALLOC_UNIMPORTANT) ? MDBX_LOG_DEBUG : MDBX_LOG_NOTICE;
        what = num ? "reserve-pages" : "fetch-slot";
      } else {
        txn->flags |= MDBX_TXN_ERROR;
        level = MDBX_LOG_ERROR;
        what = "pages";
      }
      if (LOG_ENABLED(level))
        debug_log(level, __func__, __LINE__,
                  "unable alloc %zu %s, alloc-flags 0x%x, err %d, txn-flags "
                  "0x%x, re-list-len %zu, loose-count %zu, gc: height %u, "
                  "branch %zu, leaf %zu, large %zu, entries %zu\n",
                  num, what, flags, ret.err, txn->flags, MDBX_PNL_GETSIZE(txn->tw.repnl), txn->tw.loose_count,
                  txn->dbs[FREE_DBI].height, (size_t)txn->dbs[FREE_DBI].branch_pages,
                  (size_t)txn->dbs[FREE_DBI].leaf_pages, (size_t)txn->dbs[FREE_DBI].large_pages,
                  (size_t)txn->dbs[FREE_DBI].items);
      ret.page = nullptr;
    }
    if (num > 1)
      txn->tw.gc.time_acc += monotime_since_cached(monotime_begin, &now_cache);
  } else {
  early_exit:
    DEBUG("return nullptr for %zu pages for ALLOC_%s, rc %d", num, num ? "RESERVE" : "SLOT", ret.err);
    ret.page = nullptr;
  }

#if MDBX_ENABLE_PROFGC
  prof->rtime_monotonic += osal_monotime() - monotime_begin;
#endif /* MDBX_ENABLE_PROFGC */
  return ret;
}

__hot pgr_t gc_alloc_single(const MDBX_cursor *const mc) {
  MDBX_txn *const txn = mc->txn;
  tASSERT(txn, mc->txn->flags & MDBX_TXN_DIRTY);
  tASSERT(txn, F_ISSET(*cursor_dbi_state(mc), DBI_LINDO | DBI_VALID | DBI_DIRTY));

  /* If there are any loose pages, just use them */
  while (likely(txn->tw.loose_pages)) {
#if MDBX_ENABLE_REFUND
    if (unlikely(txn->tw.loose_refund_wl > txn->geo.first_unallocated)) {
      txn_refund(txn);
      if (!txn->tw.loose_pages)
        break;
    }
#endif /* MDBX_ENABLE_REFUND */

    page_t *lp = txn->tw.loose_pages;
    MDBX_ASAN_UNPOISON_MEMORY_REGION(lp, txn->env->ps);
    VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *));
    txn->tw.loose_pages = page_next(lp);
    txn->tw.loose_count--;
    DEBUG_EXTRA("db %d use loose page %" PRIaPGNO, cursor_dbi_dbg(mc), lp->pgno);
    tASSERT(txn, lp->pgno < txn->geo.first_unallocated);
    tASSERT(txn, lp->pgno >= NUM_METAS);
    VALGRIND_MAKE_MEM_UNDEFINED(page_data(lp), page_space(txn->env));
    lp->txnid = txn->front_txnid;
    pgr_t ret = {lp, MDBX_SUCCESS};
    return ret;
  }

  if (likely(MDBX_PNL_GETSIZE(txn->tw.repnl) > 0))
    return page_alloc_finalize(txn->env, txn, mc, repnl_get_single(txn), 1);

  return gc_alloc_ex(mc, 1, ALLOC_DEFAULT);
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

MDBX_NOTHROW_PURE_FUNCTION static bool is_lifo(const MDBX_txn *txn) {
  return (txn->env->flags & MDBX_LIFORECLAIM) != 0;
}

MDBX_MAYBE_UNUSED static inline const char *dbg_prefix(const gcu_t *ctx) {
  return is_lifo(ctx->cursor.txn) ? "    lifo" : "    fifo";
}

static inline size_t backlog_size(MDBX_txn *txn) { return MDBX_PNL_GETSIZE(txn->tw.repnl) + txn->tw.loose_count; }

static int clean_stored_retired(MDBX_txn *txn, gcu_t *ctx) {
  int err = MDBX_SUCCESS;
  if (ctx->retired_stored) {
    MDBX_cursor *const gc = ptr_disp(txn, sizeof(MDBX_txn));
    tASSERT(txn, txn == txn->env->basal_txn && gc->next == gc);
    gc->txn = txn;
    gc->dbi_state = txn->dbi_state;
    gc->top_and_flags = z_fresh_mark;
    gc->next = txn->cursors[FREE_DBI];
    txn->cursors[FREE_DBI] = gc;
    do {
      MDBX_val key, val;
#if MDBX_ENABLE_BIGFOOT
      key.iov_base = &ctx->bigfoot;
#else
      key.iov_base = &txn->txnid;
#endif /* MDBX_ENABLE_BIGFOOT */
      key.iov_len = sizeof(txnid_t);
      const csr_t csr = cursor_seek(gc, &key, &val, MDBX_SET);
      if (csr.err == MDBX_SUCCESS && csr.exact) {
        ctx->retired_stored = 0;
        err = cursor_del(gc, 0);
        TRACE("== clear-4linear, backlog %zu, err %d", backlog_size(txn), err);
      } else
        err = (csr.err == MDBX_NOTFOUND) ? MDBX_SUCCESS : csr.err;
    }
#if MDBX_ENABLE_BIGFOOT
    while (!err && --ctx->bigfoot >= txn->txnid);
#else
    while (0);
#endif /* MDBX_ENABLE_BIGFOOT */
    txn->cursors[FREE_DBI] = gc->next;
    gc->next = gc;
  }
  return err;
}

static int touch_gc(gcu_t *ctx) {
  tASSERT(ctx->cursor.txn, is_pointed(&ctx->cursor) || ctx->cursor.txn->dbs[FREE_DBI].leaf_pages == 0);
  MDBX_val key, val;
  key.iov_base = val.iov_base = nullptr;
  key.iov_len = sizeof(txnid_t);
  val.iov_len = MDBX_PNL_SIZEOF(ctx->cursor.txn->tw.retired_pages);
  ctx->cursor.flags |= z_gcu_preparation;
  int err = cursor_touch(&ctx->cursor, &key, &val);
  ctx->cursor.flags -= z_gcu_preparation;
  return err;
}

/* Prepare a backlog of pages to modify GC itself, while reclaiming is
 * prohibited. It should be enough to prevent search in gc_alloc_ex()
 * during a deleting, when GC tree is unbalanced. */
static int prepare_backlog(MDBX_txn *txn, gcu_t *ctx) {
  const size_t for_cow = txn->dbs[FREE_DBI].height;
  const size_t for_rebalance = for_cow + 1 + (txn->dbs[FREE_DBI].height + 1ul >= txn->dbs[FREE_DBI].branch_pages);
  size_t for_split = ctx->retired_stored == 0;
  tASSERT(txn, is_pointed(&ctx->cursor) || txn->dbs[FREE_DBI].leaf_pages == 0);

  const intptr_t retired_left = MDBX_PNL_SIZEOF(txn->tw.retired_pages) - ctx->retired_stored;
  size_t for_repnl = 0;
  if (MDBX_ENABLE_BIGFOOT && retired_left > 0) {
    for_repnl = (retired_left + txn->env->maxgc_large1page - 1) / txn->env->maxgc_large1page;
    const size_t per_branch_page = txn->env->maxgc_per_branch;
    for (size_t entries = for_repnl; entries > 1; for_split += entries)
      entries = (entries + per_branch_page - 1) / per_branch_page;
  } else if (!MDBX_ENABLE_BIGFOOT && retired_left != 0) {
    for_repnl = largechunk_npages(txn->env, MDBX_PNL_SIZEOF(txn->tw.retired_pages));
  }

  const size_t for_tree_before_touch = for_cow + for_rebalance + for_split;
  const size_t for_tree_after_touch = for_rebalance + for_split;
  const size_t for_all_before_touch = for_repnl + for_tree_before_touch;
  const size_t for_all_after_touch = for_repnl + for_tree_after_touch;

  if (likely(for_repnl < 2 && backlog_size(txn) > for_all_before_touch) &&
      (ctx->cursor.top < 0 || is_modifable(txn, ctx->cursor.pg[ctx->cursor.top])))
    return MDBX_SUCCESS;

  TRACE(">> retired-stored %zu, left %zi, backlog %zu, need %zu (4list %zu, "
        "4split %zu, "
        "4cow %zu, 4tree %zu)",
        ctx->retired_stored, retired_left, backlog_size(txn), for_all_before_touch, for_repnl, for_split, for_cow,
        for_tree_before_touch);

  int err = touch_gc(ctx);
  TRACE("== after-touch, backlog %zu, err %d", backlog_size(txn), err);

  if (!MDBX_ENABLE_BIGFOOT && unlikely(for_repnl > 1) &&
      MDBX_PNL_GETSIZE(txn->tw.retired_pages) != ctx->retired_stored && err == MDBX_SUCCESS) {
    if (unlikely(ctx->retired_stored)) {
      err = clean_stored_retired(txn, ctx);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
      if (!ctx->retired_stored)
        return /* restart by tail-recursion */ prepare_backlog(txn, ctx);
    }
    err = gc_alloc_ex(&ctx->cursor, for_repnl, ALLOC_RESERVE).err;
    TRACE("== after-4linear, backlog %zu, err %d", backlog_size(txn), err);
    cASSERT(&ctx->cursor, backlog_size(txn) >= for_repnl || err != MDBX_SUCCESS);
  }

  while (backlog_size(txn) < for_all_after_touch && err == MDBX_SUCCESS)
    err = gc_alloc_ex(&ctx->cursor, 0, ALLOC_RESERVE | ALLOC_UNIMPORTANT).err;

  TRACE("<< backlog %zu, err %d, gc: height %u, branch %zu, leaf %zu, large "
        "%zu, entries %zu",
        backlog_size(txn), err, txn->dbs[FREE_DBI].height, (size_t)txn->dbs[FREE_DBI].branch_pages,
        (size_t)txn->dbs[FREE_DBI].leaf_pages, (size_t)txn->dbs[FREE_DBI].large_pages,
        (size_t)txn->dbs[FREE_DBI].items);
  tASSERT(txn, err != MDBX_NOTFOUND || (txn->flags & txn_gc_drained) != 0);
  return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS;
}

static inline void zeroize_reserved(const MDBX_env *env, MDBX_val pnl) {
#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__))
  /* Для предотвращения предупреждения Valgrind из mdbx_dump_val()
   * вызванное через макрос DVAL_DEBUG() на выходе
   * из cursor_seek(MDBX_SET_KEY), которая вызывается ниже внутри gc_update() в
   * цикле очистки и цикле заполнения зарезервированных элементов. */
  memset(pnl.iov_base, 0xBB, pnl.iov_len);
#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */

  /* PNL is initially empty, zero out at least the length */
  memset(pnl.iov_base, 0, sizeof(pgno_t));
  if ((env->flags & (MDBX_WRITEMAP | MDBX_NOMEMINIT)) == 0)
    /* zero out to avoid leaking values from uninitialized malloc'ed memory
     * to the file in non-writemap mode if length of the saving page-list
     * was changed during space reservation. */
    memset(pnl.iov_base, 0, pnl.iov_len);
}

static int gcu_loose(MDBX_txn *txn, gcu_t *ctx) {
  tASSERT(txn, txn->tw.loose_count > 0);
  /* Return loose page numbers to tw.repnl, though usually none are left at this point.
   * The pages themselves remain in dirtylist. */
  if (unlikely(!txn->tw.gc.retxl && txn->tw.gc.last_reclaimed < 1)) {
    /* Put loose page numbers in tw.retired_pages, since unable to return ones to tw.repnl. */
    TRACE("%s: merge %zu loose-pages into %s-pages", dbg_prefix(ctx), txn->tw.loose_count, "retired");
    int err = pnl_need(&txn->tw.retired_pages, txn->tw.loose_count);
    if (unlikely(err != MDBX_SUCCESS))
      return err;
    for (page_t *lp = txn->tw.loose_pages; lp; lp = page_next(lp)) {
      pnl_append_prereserved(txn->tw.retired_pages, lp->pgno);
      MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *));
      VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *));
    }
  } else {
    /* Room for loose pages + temp PNL with same */
    TRACE("%s: merge %zu loose-pages into %s-pages", dbg_prefix(ctx), txn->tw.loose_count, "reclaimed");
    int err = pnl_need(&txn->tw.repnl, 2 * txn->tw.loose_count + 2);
    if (unlikely(err != MDBX_SUCCESS))
      return err;
    pnl_t loose = txn->tw.repnl + MDBX_PNL_ALLOCLEN(txn->tw.repnl) - txn->tw.loose_count - 1;
    size_t count = 0;
    for (page_t *lp = txn->tw.loose_pages; lp; lp = page_next(lp)) {
      tASSERT(txn, lp->flags == P_LOOSE);
      loose[++count] = lp->pgno;
      MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *));
      VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *));
    }
    tASSERT(txn, count == txn->tw.loose_count);
    MDBX_PNL_SETSIZE(loose, count);
    pnl_sort(loose, txn->geo.first_unallocated);
    pnl_merge(txn->tw.repnl, loose);
  }

  /* filter-out list of dirty-pages from loose-pages */
  dpl_t *const dl = txn->tw.dirtylist;
  if (dl) {
    tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
    tASSERT(txn, dl->sorted <= dl->length);
    size_t w = 0, sorted_out = 0;
    for (size_t r = w; ++r <= dl->length;) {
      page_t *dp = dl->items[r].ptr;
      tASSERT(txn, dp->flags == P_LOOSE || is_modifable(txn, dp));
      tASSERT(txn, dpl_endpgno(dl, r) <= txn->geo.first_unallocated);
      if ((dp->flags & P_LOOSE) == 0) {
        if (++w != r)
          dl->items[w] = dl->items[r];
      } else {
        tASSERT(txn, dp->flags == P_LOOSE);
        sorted_out += dl->sorted >= r;
        if (!MDBX_AVOID_MSYNC || !(txn->flags & MDBX_WRITEMAP))
          page_shadow_release(txn->env, dp, 1);
      }
    }
    TRACE("%s: filtered-out loose-pages from %zu -> %zu dirty-pages", dbg_prefix(ctx), dl->length, w);
    tASSERT(txn, txn->tw.loose_count == dl->length - w);
    dl->sorted -= sorted_out;
    tASSERT(txn, dl->sorted <= w);
    dpl_setlen(dl, w);
    dl->pages_including_loose -= txn->tw.loose_count;
    txn->tw.dirtyroom += txn->tw.loose_count;
    tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
                     (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit));
  } else {
    tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
  }
  txn->tw.loose_pages = nullptr;
  txn->tw.loose_count = 0;
#if MDBX_ENABLE_REFUND
  txn->tw.loose_refund_wl = 0;
#endif /* MDBX_ENABLE_REFUND */
  return MDBX_SUCCESS;
}

static int gcu_retired(MDBX_txn *txn, gcu_t *ctx) {
  int err;
  if (unlikely(!ctx->retired_stored)) {
    /* Make sure last page of GC is touched and on retired-list */
    err = outer_last(&ctx->cursor, nullptr, nullptr);
    if (likely(err == MDBX_SUCCESS))
      err = touch_gc(ctx);
    if (unlikely(err != MDBX_SUCCESS) && err != MDBX_NOTFOUND)
      return err;
  }

  MDBX_val key, data;
#if MDBX_ENABLE_BIGFOOT
  size_t retired_pages_before;
  do {
    if (ctx->bigfoot > txn->txnid) {
      err = clean_stored_retired(txn, ctx);
      if (unlikely(err != MDBX_SUCCESS))
        return err;
      tASSERT(txn, ctx->bigfoot <= txn->txnid);
    }

    retired_pages_before = MDBX_PNL_GETSIZE(txn->tw.retired_pages);
    err = prepare_backlog(txn, ctx);
    if (unlikely(err != MDBX_SUCCESS))
      return err;
    if (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
      TRACE("%s: retired-list changed (%zu -> %zu), retry", dbg_prefix(ctx), retired_pages_before,
            MDBX_PNL_GETSIZE(txn->tw.retired_pages));
      break;
    }

    pnl_sort(txn->tw.retired_pages, txn->geo.first_unallocated);
    ctx->retired_stored = 0;
    ctx->bigfoot = txn->txnid;
    do {
      if (ctx->retired_stored) {
        err = prepare_backlog(txn, ctx);
        if (unlikely(err != MDBX_SUCCESS))
          return err;
        if (ctx->retired_stored >= MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
          TRACE("%s: retired-list changed (%zu -> %zu), retry", dbg_prefix(ctx), retired_pages_before,
                MDBX_PNL_GETSIZE(txn->tw.retired_pages));
          break;
        }
      }
      key.iov_len = sizeof(txnid_t);
      key.iov_base = &ctx->bigfoot;
      const size_t left = MDBX_PNL_GETSIZE(txn->tw.retired_pages) - ctx->retired_stored;
      const size_t chunk =
          (left > txn->env->maxgc_large1page && ctx->bigfoot < MAX_TXNID) ? txn->env->maxgc_large1page : left;
      data.iov_len = (chunk + 1) * sizeof(pgno_t);
      err = cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE);
      if (unlikely(err != MDBX_SUCCESS))
        return err;

#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__))
      /* Для предотвращения предупреждения Valgrind из mdbx_dump_val()
       * вызванное через макрос DVAL_DEBUG() на выходе
       * из cursor_seek(MDBX_SET_KEY), которая вызывается как выше в цикле
       * очистки, так и ниже в цикле заполнения зарезервированных элементов.
       */
      memset(data.iov_base, 0xBB, data.iov_len);
#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */

      if (retired_pages_before == MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
        const size_t at = (is_lifo(txn) == MDBX_PNL_ASCENDING) ? left - chunk : ctx->retired_stored;
        pgno_t *const begin = txn->tw.retired_pages + at;
        /* MDBX_PNL_ASCENDING == false && LIFO == false:
         *  - the larger pgno is at the beginning of retired list
         *    and should be placed with the larger txnid.
         * MDBX_PNL_ASCENDING == true && LIFO == true:
         *  - the larger pgno is at the ending of retired list
         *    and should be placed with the smaller txnid. */
        const pgno_t save = *begin;
        *begin = (pgno_t)chunk;
        memcpy(data.iov_base, begin, data.iov_len);
        *begin = save;
        TRACE("%s: put-retired/bigfoot @ %" PRIaTXN " (slice #%u) #%zu [%zu..%zu] of %zu", dbg_prefix(ctx),
              ctx->bigfoot, (unsigned)(ctx->bigfoot - txn->txnid), chunk, at, at + chunk, retired_pages_before);
      }
      ctx->retired_stored += chunk;
    } while (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages) && (++ctx->bigfoot, true));
  } while (retired_pages_before != MDBX_PNL_GETSIZE(txn->tw.retired_pages));
#else
  /* Write to last page of GC */
  key.iov_len = sizeof(txnid_t);
  key.iov_base = &txn->txnid;
  do {
    prepare_backlog(txn, ctx);
    data.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages);
    err = cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE);
    if (unlikely(err != MDBX_SUCCESS))
      return err;

#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__))
    /* Для предотвращения предупреждения Valgrind из mdbx_dump_val()
     * вызванное через макрос DVAL_DEBUG() на выходе
     * из cursor_seek(MDBX_SET_KEY), которая вызывается как выше в цикле
     * очистки, так и ниже в цикле заполнения зарезервированных элементов. */
    memset(data.iov_base, 0xBB, data.iov_len);
#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */

    /* Retry if tw.retired_pages[] grew during the Put() */
  } while (data.iov_len < MDBX_PNL_SIZEOF(txn->tw.retired_pages));

  ctx->retired_stored = MDBX_PNL_GETSIZE(txn->tw.retired_pages);
  pnl_sort(txn->tw.retired_pages, txn->geo.first_unallocated);
  tASSERT(txn, data.iov_len == MDBX_PNL_SIZEOF(txn->tw.retired_pages));
  memcpy(data.iov_base, txn->tw.retired_pages, data.iov_len);

  TRACE("%s: put-retired #%zu @ %" PRIaTXN, dbg_prefix(ctx), ctx->retired_stored, txn->txnid);
#endif /* MDBX_ENABLE_BIGFOOT */
  if (LOG_ENABLED(MDBX_LOG_EXTRA)) {
    size_t i = ctx->retired_stored;
    DEBUG_EXTRA("txn %" PRIaTXN " root %" PRIaPGNO " num %zu, retired-PNL", txn->txnid, txn->dbs[FREE_DBI].root, i);
    for (; i; i--)
      DEBUG_EXTRA_PRINT(" %" PRIaPGNO, txn->tw.retired_pages[i]);
    DEBUG_EXTRA_PRINT("%s\n", ".");
  }
  return MDBX_SUCCESS;
}

typedef struct gcu_rid_result {
  int err;
  txnid_t rid;
} rid_t;

static rid_t get_rid_for_reclaimed(MDBX_txn *txn, gcu_t *ctx, const size_t left) {
  rid_t r;
  if (is_lifo(txn)) {
    if (txn->tw.gc.retxl == nullptr) {
      txn->tw.gc.retxl = txl_alloc();
      if (unlikely(!txn->tw.gc.retxl)) {
        r.err = MDBX_ENOMEM;
        goto return_error;
      }
    }
    if (MDBX_PNL_GETSIZE(txn->tw.gc.retxl) < txl_max &&
        left > (MDBX_PNL_GETSIZE(txn->tw.gc.retxl) - ctx->reused_slot) * txn->env->maxgc_large1page && !ctx->dense) {
      /* Hужен свободный для для сохранения списка страниц. */
      bool need_cleanup = false;
      txnid_t snap_oldest = 0;
    retry_rid:
      do {
        r.err = gc_alloc_ex(&ctx->cursor, 0, ALLOC_RESERVE).err;
        snap_oldest = txn->env->lck->cached_oldest.weak;
        if (likely(r.err == MDBX_SUCCESS)) {
          TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix(ctx), MDBX_PNL_LAST(txn->tw.gc.retxl));
          need_cleanup = true;
        }
      } while (r.err == MDBX_SUCCESS && MDBX_PNL_GETSIZE(txn->tw.gc.retxl) < txl_max &&
               left > (MDBX_PNL_GETSIZE(txn->tw.gc.retxl) - ctx->reused_slot) * txn->env->maxgc_large1page);

      if (likely(r.err == MDBX_SUCCESS)) {
        TRACE("%s: got enough from GC.", dbg_prefix(ctx));
        goto return_continue;
      } else if (unlikely(r.err != MDBX_NOTFOUND))
        /* LY: some troubles... */
        goto return_error;

      if (MDBX_PNL_GETSIZE(txn->tw.gc.retxl)) {
        if (need_cleanup) {
          txl_sort(txn->tw.gc.retxl);
          ctx->cleaned_slot = 0;
        }
        ctx->rid = MDBX_PNL_LAST(txn->tw.gc.retxl);
      } else {
        tASSERT(txn, txn->tw.gc.last_reclaimed == 0);
        if (unlikely(txn_snapshot_oldest(txn) != snap_oldest))
          /* should retry gc_alloc_ex()
           * if the oldest reader changes since the last attempt */
          goto retry_rid;
        /* no reclaimable GC entries,
         * therefore no entries with ID < mdbx_find_oldest(txn) */
        txn->tw.gc.last_reclaimed = ctx->rid = snap_oldest;
        TRACE("%s: none recycled yet, set rid to @%" PRIaTXN, dbg_prefix(ctx), ctx->rid);
      }

      /* В GC нет годных к переработке записей,
       * будем использовать свободные id в обратном порядке. */
      while (MDBX_PNL_GETSIZE(txn->tw.gc.retxl) < txl_max &&
             left > (MDBX_PNL_GETSIZE(txn->tw.gc.retxl) - ctx->reused_slot) * txn->env->maxgc_large1page) {
        if (unlikely(ctx->rid <= MIN_TXNID)) {
          ctx->dense = true;
          if (unlikely(MDBX_PNL_GETSIZE(txn->tw.gc.retxl) <= ctx->reused_slot)) {
            VERBOSE("** restart: reserve depleted (reused_gc_slot %zu >= "
                    "gc.reclaimed %zu)",
                    ctx->reused_slot, MDBX_PNL_GETSIZE(txn->tw.gc.retxl));
            goto return_restart;
          }
          break;
        }

        tASSERT(txn, ctx->rid >= MIN_TXNID && ctx->rid <= MAX_TXNID);
        ctx->rid -= 1;
        MDBX_val key = {&ctx->rid, sizeof(ctx->rid)}, data;
        r.err = cursor_seek(&ctx->cursor, &key, &data, MDBX_SET_KEY).err;
        if (unlikely(r.err == MDBX_SUCCESS)) {
          DEBUG("%s: GC's id %" PRIaTXN " is present, going to first", dbg_prefix(ctx), ctx->rid);
          r.err = outer_first(&ctx->cursor, &key, nullptr);
          if (unlikely(r.err != MDBX_SUCCESS || key.iov_len != sizeof(txnid_t))) {
            ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-key size", (unsigned)key.iov_len);
            r.err = MDBX_CORRUPTED;
            goto return_error;
          }
          const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
          if (unlikely(gc_first <= INITIAL_TXNID)) {
            NOTICE("%s: no free GC's id(s) less than %" PRIaTXN " (going dense-mode)", dbg_prefix(ctx), ctx->rid);
            ctx->dense = true;
            goto return_restart;
          }
          ctx->rid = gc_first - 1;
        }

        tASSERT(txn, !ctx->dense);
        r.err = txl_append(&txn->tw.gc.retxl, ctx->rid);
        if (unlikely(r.err != MDBX_SUCCESS))
          goto return_error;

        if (ctx->reused_slot)
          /* rare case, but it is better to clear and re-create GC entries
           * with less fragmentation. */
          need_cleanup = true;
        else
          ctx->cleaned_slot += 1 /* mark cleanup is not needed for added slot. */;

        TRACE("%s: append @%" PRIaTXN " to lifo-reclaimed, cleaned-gc-slot = %zu", dbg_prefix(ctx), ctx->rid,
              ctx->cleaned_slot);
      }

      if (need_cleanup) {
        if (ctx->cleaned_slot) {
          TRACE("%s: restart to clear and re-create GC entries", dbg_prefix(ctx));
          goto return_restart;
        }
        goto return_continue;
      }
    }

    const size_t i = MDBX_PNL_GETSIZE(txn->tw.gc.retxl) - ctx->reused_slot;
    tASSERT(txn, i > 0 && i <= MDBX_PNL_GETSIZE(txn->tw.gc.retxl));
    r.rid = txn->tw.gc.retxl[i];
    TRACE("%s: take @%" PRIaTXN " from lifo-reclaimed[%zu]", dbg_prefix(ctx), r.rid, i);
  } else {
    tASSERT(txn, txn->tw.gc.retxl == nullptr);
    if (unlikely(ctx->rid == 0)) {
      ctx->rid = txn_snapshot_oldest(txn);
      MDBX_val key;
      r.err = outer_first(&ctx->cursor, &key, nullptr);
      if (likely(r.err == MDBX_SUCCESS)) {
        if (unlikely(key.iov_len != sizeof(txnid_t))) {
          ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-key size", (unsigned)key.iov_len);
          r.err = MDBX_CORRUPTED;
          goto return_error;
        }
        const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
        if (ctx->rid >= gc_first && gc_first)
          ctx->rid = gc_first - 1;
        if (unlikely(ctx->rid <= MIN_TXNID)) {
          ERROR("%s", "** no GC tail-space to store (going dense-mode)");
          ctx->dense = true;
          goto return_restart;
        }
      } else if (r.err != MDBX_NOTFOUND) {
        r.rid = 0;
        return r;
      }
      txn->tw.gc.last_reclaimed = ctx->rid;
      ctx->cleaned_id = ctx->rid + 1;
    }
    r.rid = ctx->rid--;
    TRACE("%s: take @%" PRIaTXN " from GC", dbg_prefix(ctx), r.rid);
  }
  ++ctx->reused_slot;
  r.err = MDBX_SUCCESS;
  return r;

return_continue:
  r.err = MDBX_SUCCESS;
  r.rid = 0;
  return r;

return_restart:
  r.err = MDBX_RESULT_TRUE;
  r.rid = 0;
  return r;

return_error:
  tASSERT(txn, r.err != MDBX_SUCCESS);
  r.rid = 0;
  return r;
}

/* Cleanups retxl GC (aka freeDB) records, saves the retired-list (aka
 * freelist) of current transaction to GC, puts back into GC leftover of the
 * retxl pages with chunking. This recursive changes the retxl-list,
 * loose-list and retired-list. Keep trying until it stabilizes.
 *
 * NOTE: This code is a consequence of many iterations of adding crutches (aka
 * "checks and balances") to partially bypass the fundamental design problems
 * inherited from LMDB. So do not try to understand it completely in order to
 * avoid your madness. */
int gc_update(MDBX_txn *txn, gcu_t *ctx) {
  TRACE("\n>>> @%" PRIaTXN, txn->txnid);
  MDBX_env *const env = txn->env;
  ctx->cursor.next = txn->cursors[FREE_DBI];
  txn->cursors[FREE_DBI] = &ctx->cursor;
  int rc;

  /* txn->tw.repnl[] can grow and shrink during this call.
   * txn->tw.gc.last_reclaimed and txn->tw.retired_pages[] can only grow.
   * But page numbers cannot disappear from txn->tw.retired_pages[]. */
retry_clean_adj:
  ctx->reserve_adj = 0;
retry:
  ctx->loop += !(ctx->prev_first_unallocated > txn->geo.first_unallocated);
  TRACE(">> restart, loop %u", ctx->loop);

  tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
  tASSERT(txn, dpl_check(txn));
  if (unlikely(/* paranoia */ ctx->loop > ((MDBX_DEBUG > 0) ? 12 : 42))) {
    ERROR("txn #%" PRIaTXN " too more loops %u, bailout", txn->txnid, ctx->loop);
    rc = MDBX_PROBLEM;
    goto bailout;
  }

  if (unlikely(ctx->dense || ctx->prev_first_unallocated > txn->geo.first_unallocated)) {
    rc = clean_stored_retired(txn, ctx);
    if (unlikely(rc != MDBX_SUCCESS))
      goto bailout;
  }

  ctx->prev_first_unallocated = txn->geo.first_unallocated;
  rc = MDBX_SUCCESS;
  ctx->reserved = 0;
  ctx->cleaned_slot = 0;
  ctx->reused_slot = 0;
  ctx->amount = 0;
  ctx->fill_idx = ~0u;
  ctx->cleaned_id = 0;
  ctx->rid = txn->tw.gc.last_reclaimed;
  while (true) {
    /* Come back here after each Put() in case retired-list changed */
    TRACE("%s", " >> continue");

    tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
    MDBX_val key, data;
    if (is_lifo(txn)) {
      if (ctx->cleaned_slot < (txn->tw.gc.retxl ? MDBX_PNL_GETSIZE(txn->tw.gc.retxl) : 0)) {
        ctx->reserved = 0;
        ctx->cleaned_slot = 0;
        ctx->reused_slot = 0;
        ctx->fill_idx = ~0u;
        /* LY: cleanup reclaimed records. */
        do {
          ctx->cleaned_id = txn->tw.gc.retxl[++ctx->cleaned_slot];
          tASSERT(txn, ctx->cleaned_slot > 0 && ctx->cleaned_id <= env->lck->cached_oldest.weak);
          key.iov_base = &ctx->cleaned_id;
          key.iov_len = sizeof(ctx->cleaned_id);
          rc = cursor_seek(&ctx->cursor, &key, nullptr, MDBX_SET).err;
          if (rc == MDBX_NOTFOUND)
            continue;
          if (unlikely(rc != MDBX_SUCCESS))
            goto bailout;
          rc = prepare_backlog(txn, ctx);
          if (unlikely(rc != MDBX_SUCCESS))
            goto bailout;
          tASSERT(txn, ctx->cleaned_id <= env->lck->cached_oldest.weak);
          TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, dbg_prefix(ctx), ctx->cleaned_slot, ctx->cleaned_id);
          tASSERT(txn, *txn->cursors == &ctx->cursor);
          rc = cursor_del(&ctx->cursor, 0);
          if (unlikely(rc != MDBX_SUCCESS))
            goto bailout;
        } while (ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.gc.retxl));
        txl_sort(txn->tw.gc.retxl);
      }
    } else {
      /* Удаляем оставшиеся вынутые из GC записи. */
      while (txn->tw.gc.last_reclaimed && ctx->cleaned_id <= txn->tw.gc.last_reclaimed) {
        rc = outer_first(&ctx->cursor, &key, nullptr);
        if (rc == MDBX_NOTFOUND) {
          ctx->cleaned_id = txn->tw.gc.last_reclaimed + 1;
          ctx->rid = txn->tw.gc.last_reclaimed;
          ctx->reserved = 0;
          ctx->reused_slot = 0;
          break;
        }
        if (unlikely(rc != MDBX_SUCCESS))
          goto bailout;
        if (!MDBX_DISABLE_VALIDATION && unlikely(key.iov_len != sizeof(txnid_t))) {
          ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid GC-key size", (unsigned)key.iov_len);
          rc = MDBX_CORRUPTED;
          goto bailout;
        }
        if (ctx->rid != ctx->cleaned_id) {
          ctx->rid = ctx->cleaned_id;
          ctx->reserved = 0;
          ctx->reused_slot = 0;
        }
        ctx->cleaned_id = unaligned_peek_u64(4, key.iov_base);
        if (ctx->cleaned_id > txn->tw.gc.last_reclaimed)
          break;
        rc = prepare_backlog(txn, ctx);
        if (unlikely(rc != MDBX_SUCCESS))
          goto bailout;
        tASSERT(txn, ctx->cleaned_id <= txn->tw.gc.last_reclaimed);
        tASSERT(txn, ctx->cleaned_id <= env->lck->cached_oldest.weak);
        TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, dbg_prefix(ctx), ctx->cleaned_id);
        tASSERT(txn, *txn->cursors == &ctx->cursor);
        rc = cursor_del(&ctx->cursor, 0);
        if (unlikely(rc != MDBX_SUCCESS))
          goto bailout;
      }
    }

    tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
    tASSERT(txn, dpl_check(txn));
    if (AUDIT_ENABLED()) {
      rc = audit_ex(txn, ctx->retired_stored, false);
      if (unlikely(rc != MDBX_SUCCESS))
        goto bailout;
    }

    /* return suitable into unallocated space */
    if (txn_refund(txn)) {
      tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
      if (AUDIT_ENABLED()) {
        rc = audit_ex(txn, ctx->retired_stored, false);
        if (unlikely(rc != MDBX_SUCCESS))
          goto bailout;
      }
    }

    if (txn->tw.loose_pages) {
      /* put loose pages into the reclaimed- or retired-list */
      rc = gcu_loose(txn, ctx);
      if (unlikely(rc != MDBX_SUCCESS)) {
        if (rc == MDBX_RESULT_TRUE)
          continue;
        goto bailout;
      }
      tASSERT(txn, txn->tw.loose_pages == 0);
    }

    if (unlikely(ctx->reserved > MDBX_PNL_GETSIZE(txn->tw.repnl)) &&
        (ctx->loop < 5 || ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.repnl) > env->maxgc_large1page / 2)) {
      TRACE("%s: reclaimed-list changed %zu -> %zu, retry", dbg_prefix(ctx), ctx->amount,
            MDBX_PNL_GETSIZE(txn->tw.repnl));
      ctx->reserve_adj += ctx->reserved - MDBX_PNL_GETSIZE(txn->tw.repnl);
      goto retry;
    }
    ctx->amount = MDBX_PNL_GETSIZE(txn->tw.repnl);

    if (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
      /* store retired-list into GC */
      rc = gcu_retired(txn, ctx);
      if (unlikely(rc != MDBX_SUCCESS))
        goto bailout;
      continue;
    }

    tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
    tASSERT(txn, txn->tw.loose_count == 0);

    TRACE("%s", " >> reserving");
    if (AUDIT_ENABLED()) {
      rc = audit_ex(txn, ctx->retired_stored, false);
      if (unlikely(rc != MDBX_SUCCESS))
        goto bailout;
    }
    const size_t left = ctx->amount - ctx->reserved - ctx->reserve_adj;
    TRACE("%s: amount %zu, reserved %zd, reserve_adj %zu, left %zd, "
          "lifo-reclaimed-slots %zu, "
          "reused-gc-slots %zu",
          dbg_prefix(ctx), ctx->amount, ctx->reserved, ctx->reserve_adj, left,
          txn->tw.gc.retxl ? MDBX_PNL_GETSIZE(txn->tw.gc.retxl) : 0, ctx->reused_slot);
    if (0 >= (intptr_t)left)
      break;

    const rid_t rid_result = get_rid_for_reclaimed(txn, ctx, left);
    if (unlikely(!rid_result.rid)) {
      rc = rid_result.err;
      if (likely(rc == MDBX_SUCCESS))
        continue;
      if (likely(rc == MDBX_RESULT_TRUE))
        goto retry;
      goto bailout;
    }
    tASSERT(txn, rid_result.err == MDBX_SUCCESS);
    const txnid_t reservation_gc_id = rid_result.rid;

    size_t chunk = left;
    if (unlikely(left > env->maxgc_large1page)) {
      const size_t avail_gc_slots = txn->tw.gc.retxl         ? MDBX_PNL_GETSIZE(txn->tw.gc.retxl) - ctx->reused_slot + 1
                                    : (ctx->rid < INT16_MAX) ? (size_t)ctx->rid
                                                             : INT16_MAX;
      if (likely(avail_gc_slots > 1)) {
#if MDBX_ENABLE_BIGFOOT
        chunk = env->maxgc_large1page;
        if (avail_gc_slots < INT16_MAX && unlikely(left > env->maxgc_large1page * avail_gc_slots))
          /* TODO: Можно смотреть последовательности какой длины есть в repnl
           *       и пробовать нарезать куски соответствующего размера.
           *       Смысл в том, чтобы не дробить последовательности страниц,
           *       а использовать целиком. */
          chunk = env->maxgc_large1page + left / (env->maxgc_large1page * avail_gc_slots) * env->maxgc_large1page;
#else
        if (chunk < env->maxgc_large1page * 2)
          chunk /= 2;
        else {
          const size_t prefer_max_scatter = 257;
          const size_t threshold =
              env->maxgc_large1page * ((avail_gc_slots < prefer_max_scatter) ? avail_gc_slots : prefer_max_scatter);
          if (left < threshold)
            chunk = env->maxgc_large1page;
          else {
            const size_t tail = left - threshold + env->maxgc_large1page + 1;
            size_t span = 1;
            size_t avail = ((pgno2bytes(env, span) - PAGEHDRSZ) / sizeof(pgno_t)) /* - 1 + span */;
            if (tail > avail) {
              for (size_t i = ctx->amount - span; i > 0; --i) {
                if (MDBX_PNL_ASCENDING ? (txn->tw.repnl[i] + span)
                                       : (txn->tw.repnl[i] - span) == txn->tw.repnl[i + span]) {
                  span += 1;
                  avail = ((pgno2bytes(env, span) - PAGEHDRSZ) / sizeof(pgno_t)) - 1 + span;
                  if (avail >= tail)
                    break;
                }
              }
            }

            chunk = (avail >= tail)                                                     ? tail - span
                    : (avail_gc_slots > 3 && ctx->reused_slot < prefer_max_scatter - 3) ? avail - span
                                                                                        : tail;
          }
        }
#endif /* MDBX_ENABLE_BIGFOOT */
      }
    }
    tASSERT(txn, chunk > 0);

    TRACE("%s: gc_rid %" PRIaTXN ", reused_gc_slot %zu, reservation-id "
          "%" PRIaTXN,
          dbg_prefix(ctx), ctx->rid, ctx->reused_slot, reservation_gc_id);

    TRACE("%s: chunk %zu, gc-per-ovpage %u", dbg_prefix(ctx), chunk, env->maxgc_large1page);

    tASSERT(txn, reservation_gc_id <= env->lck->cached_oldest.weak);
    if (unlikely(reservation_gc_id < MIN_TXNID ||
                 reservation_gc_id > atomic_load64(&env->lck->cached_oldest, mo_Relaxed))) {
      ERROR("** internal error (reservation_gc_id %" PRIaTXN ")", reservation_gc_id);
      rc = MDBX_PROBLEM;
      goto bailout;
    }

    tASSERT(txn, reservation_gc_id >= MIN_TXNID && reservation_gc_id <= MAX_TXNID);
    key.iov_len = sizeof(reservation_gc_id);
    key.iov_base = (void *)&reservation_gc_id;
    data.iov_len = (chunk + 1) * sizeof(pgno_t);
    TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, dbg_prefix(ctx), chunk, ctx->reserved + 1,
          ctx->reserved + chunk + 1, reservation_gc_id);
    prepare_backlog(txn, ctx);
    rc = cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE | MDBX_NOOVERWRITE);
    tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
    if (unlikely(rc != MDBX_SUCCESS))
      goto bailout;

    zeroize_reserved(env, data);
    ctx->reserved += chunk;
    TRACE("%s: reserved %zu (+%zu), continue", dbg_prefix(ctx), ctx->reserved, chunk);

    continue;
  }

  tASSERT(txn, ctx->cleaned_slot == (txn->tw.gc.retxl ? MDBX_PNL_GETSIZE(txn->tw.gc.retxl) : 0));

  TRACE("%s", " >> filling");
  /* Fill in the reserved records */
  size_t excess_slots = 0;
  ctx->fill_idx = txn->tw.gc.retxl ? MDBX_PNL_GETSIZE(txn->tw.gc.retxl) - ctx->reused_slot : ctx->reused_slot;
  rc = MDBX_SUCCESS;
  tASSERT(txn, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
  tASSERT(txn, dpl_check(txn));
  if (ctx->amount) {
    MDBX_val key, data;
    key.iov_len = data.iov_len = 0;
    key.iov_base = data.iov_base = nullptr;

    size_t left = ctx->amount, excess = 0;
    if (txn->tw.gc.retxl == nullptr) {
      tASSERT(txn, is_lifo(txn) == 0);
      rc = outer_first(&ctx->cursor, &key, &data);
      if (unlikely(rc != MDBX_SUCCESS)) {
        if (rc != MDBX_NOTFOUND)
          goto bailout;
      }
    } else {
      tASSERT(txn, is_lifo(txn) != 0);
    }

    while (true) {
      txnid_t fill_gc_id;
      TRACE("%s: left %zu of %zu", dbg_prefix(ctx), left, MDBX_PNL_GETSIZE(txn->tw.repnl));
      if (txn->tw.gc.retxl == nullptr) {
        tASSERT(txn, is_lifo(txn) == 0);
        fill_gc_id = key.iov_base ? unaligned_peek_u64(4, key.iov_base) : MIN_TXNID;
        if (ctx->fill_idx == 0 || fill_gc_id > txn->tw.gc.last_reclaimed) {
          if (!left)
            break;
          VERBOSE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN " > last_reclaimed %" PRIaTXN
                  ", left %zu",
                  ctx->fill_idx, fill_gc_id, txn->tw.gc.last_reclaimed, left);
          ctx->reserve_adj = (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0;
          goto retry;
        }
        ctx->fill_idx -= 1;
      } else {
        tASSERT(txn, is_lifo(txn) != 0);
        if (ctx->fill_idx >= MDBX_PNL_GETSIZE(txn->tw.gc.retxl)) {
          if (!left)
            break;
          VERBOSE("** restart: reserve depleted (fill_idx %zu >= "
                  "gc.reclaimed %zu, left %zu",
                  ctx->fill_idx, MDBX_PNL_GETSIZE(txn->tw.gc.retxl), left);
          ctx->reserve_adj = (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0;
          goto retry;
        }
        ctx->fill_idx += 1;
        fill_gc_id = txn->tw.gc.retxl[ctx->fill_idx];
        TRACE("%s: seek-reservation @%" PRIaTXN " at gc.reclaimed[%zu]", dbg_prefix(ctx), fill_gc_id, ctx->fill_idx);
        key.iov_base = &fill_gc_id;
        key.iov_len = sizeof(fill_gc_id);
        rc = cursor_seek(&ctx->cursor, &key, &data, MDBX_SET_KEY).err;
        if (unlikely(rc != MDBX_SUCCESS))
          goto bailout;
      }
      tASSERT(txn, ctx->cleaned_slot == (txn->tw.gc.retxl ? MDBX_PNL_GETSIZE(txn->tw.gc.retxl) : 0));
      tASSERT(txn, fill_gc_id > 0 && fill_gc_id <= env->lck->cached_oldest.weak);
      key.iov_base = &fill_gc_id;
      key.iov_len = sizeof(fill_gc_id);

      tASSERT(txn, data.iov_len >= sizeof(pgno_t) * 2);
      size_t chunk = data.iov_len / sizeof(pgno_t) - 1;
      if (unlikely(chunk > left)) {
        const size_t delta = chunk - left;
        excess += delta;
        TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix(ctx), chunk, left, fill_gc_id);
        if (!left) {
          excess_slots += 1;
          goto next;
        }
        if ((ctx->loop < 5 && delta > (ctx->loop / 2)) || delta > env->maxgc_large1page)
          data.iov_len = (left + 1) * sizeof(pgno_t);
        chunk = left;
      }
      rc = cursor_put(&ctx->cursor, &key, &data, MDBX_CURRENT | MDBX_RESERVE);
      if (unlikely(rc != MDBX_SUCCESS))
        goto bailout;
      zeroize_reserved(env, data);

      if (unlikely(txn->tw.loose_count || ctx->amount != MDBX_PNL_GETSIZE(txn->tw.repnl))) {
        NOTICE("** restart: reclaimed-list changed (%zu -> %zu, loose +%zu)", ctx->amount,
               MDBX_PNL_GETSIZE(txn->tw.repnl), txn->tw.loose_count);
        if (ctx->loop < 5 || (ctx->loop > 10 && (ctx->loop & 1)))
          goto retry_clean_adj;
        goto retry;
      }

      if (unlikely(txn->tw.gc.retxl ? ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.gc.retxl)
                                    : ctx->cleaned_id < txn->tw.gc.last_reclaimed)) {
        NOTICE("%s", "** restart: reclaimed-slots changed");
        goto retry;
      }
      if (unlikely(ctx->retired_stored != MDBX_PNL_GETSIZE(txn->tw.retired_pages))) {
        tASSERT(txn, ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages));
        NOTICE("** restart: retired-list growth (%zu -> %zu)", ctx->retired_stored,
               MDBX_PNL_GETSIZE(txn->tw.retired_pages));
        goto retry;
      }

      pgno_t *dst = data.iov_base;
      *dst++ = (pgno_t)chunk;
      pgno_t *src = MDBX_PNL_BEGIN(txn->tw.repnl) + left - chunk;
      memcpy(dst, src, chunk * sizeof(pgno_t));
      pgno_t *from = src, *to = src + chunk;
      TRACE("%s: fill %zu [ %zu:%" PRIaPGNO "...%zu:%" PRIaPGNO "] @%" PRIaTXN, dbg_prefix(ctx), chunk,
            from - txn->tw.repnl, from[0], to - txn->tw.repnl, to[-1], fill_gc_id);

      left -= chunk;
      if (AUDIT_ENABLED()) {
        rc = audit_ex(txn, ctx->retired_stored + ctx->amount - left, true);
        if (unlikely(rc != MDBX_SUCCESS))
          goto bailout;
      }

    next:

      if (txn->tw.gc.retxl == nullptr) {
        tASSERT(txn, is_lifo(txn) == 0);
        rc = outer_next(&ctx->cursor, &key, &data, MDBX_NEXT);
        if (unlikely(rc != MDBX_SUCCESS)) {
          if (rc == MDBX_NOTFOUND && !left) {
            rc = MDBX_SUCCESS;
            break;
          }
          goto bailout;
        }
      } else {
        tASSERT(txn, is_lifo(txn) != 0);
      }
    }

    if (excess) {
      size_t n = excess, adj = excess;
      while (n >= env->maxgc_large1page)
        adj -= n /= env->maxgc_large1page;
      ctx->reserve_adj += adj;
      TRACE("%s: extra %zu reserved space, adj +%zu (%zu)", dbg_prefix(ctx), excess, adj, ctx->reserve_adj);
    }
  }

  tASSERT(txn, rc == MDBX_SUCCESS);
  if (unlikely(txn->tw.loose_count != 0 || ctx->amount != MDBX_PNL_GETSIZE(txn->tw.repnl))) {
    NOTICE("** restart: got %zu loose pages (reclaimed-list %zu -> %zu)", txn->tw.loose_count, ctx->amount,
           MDBX_PNL_GETSIZE(txn->tw.repnl));
    goto retry;
  }

  if (unlikely(excess_slots)) {
    const bool will_retry = ctx->loop < 5 || excess_slots > 1;
    NOTICE("** %s: reserve excess (excess-slots %zu, filled-slot %zu, adj %zu, "
           "loop %u)",
           will_retry ? "restart" : "ignore", excess_slots, ctx->fill_idx, ctx->reserve_adj, ctx->loop);
    if (will_retry)
      goto retry;
  }

  tASSERT(txn, txn->tw.gc.retxl == nullptr || ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.gc.retxl));

bailout:
  txn->cursors[FREE_DBI] = ctx->cursor.next;

  MDBX_PNL_SETSIZE(txn->tw.repnl, 0);
#if MDBX_ENABLE_PROFGC
  env->lck->pgops.gc_prof.wloops += (uint32_t)ctx->loop;
#endif /* MDBX_ENABLE_PROFGC */
  TRACE("<<< %u loops, rc = %d", ctx->loop, rc);
  return rc;
}
/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

static void mdbx_init(void);
static void mdbx_fini(void);

/*----------------------------------------------------------------------------*/
/* mdbx constructor/destructor */

#if defined(_WIN32) || defined(_WIN64)

#if MDBX_BUILD_SHARED_LIBRARY
#if MDBX_WITHOUT_MSVC_CRT && defined(NDEBUG)
/* DEBUG/CHECKED builds still require MSVC's CRT for runtime checks.
 *
 * Define dll's entry point only for Release build when NDEBUG is defined and
 * MDBX_WITHOUT_MSVC_CRT=ON. if the entry point isn't defined then MSVC's will
 * automatically use DllMainCRTStartup() from CRT library, which also
 * automatically call DllMain() from our mdbx.dll */
#pragma comment(linker, "/ENTRY:DllMain")
#endif /* MDBX_WITHOUT_MSVC_CRT */

BOOL APIENTRY DllMain(HANDLE module, DWORD reason, LPVOID reserved)
#else
#if !MDBX_MANUAL_MODULE_HANDLER
static
#endif /* !MDBX_MANUAL_MODULE_HANDLER */
    void NTAPI mdbx_module_handler(PVOID module, DWORD reason, PVOID reserved)
#endif /* MDBX_BUILD_SHARED_LIBRARY */
{
  (void)reserved;
  switch (reason) {
  case DLL_PROCESS_ATTACH:
    windows_import();
    mdbx_init();
    break;
  case DLL_PROCESS_DETACH:
    mdbx_fini();
    break;

  case DLL_THREAD_ATTACH:
    break;
  case DLL_THREAD_DETACH:
    rthc_thread_dtor(module);
    break;
  }
#if MDBX_BUILD_SHARED_LIBRARY
  return TRUE;
#endif
}

#if !MDBX_BUILD_SHARED_LIBRARY && !MDBX_MANUAL_MODULE_HANDLER
#if defined(_MSC_VER)
#  pragma const_seg(push)
#  pragma data_seg(push)

#  ifndef _M_IX86
     /* kick a linker to create the TLS directory if not already done */
#    pragma comment(linker, "/INCLUDE:_tls_used")
     /* Force some symbol references. */
#    pragma comment(linker, "/INCLUDE:mdbx_tls_anchor")
     /* specific const-segment for WIN64 */
#    pragma const_seg(".CRT$XLB")
     const
#  else
     /* kick a linker to create the TLS directory if not already done */
#    pragma comment(linker, "/INCLUDE:__tls_used")
     /* Force some symbol references. */
#    pragma comment(linker, "/INCLUDE:_mdbx_tls_anchor")
     /* specific data-segment for WIN32 */
#    pragma data_seg(".CRT$XLB")
#  endif

   __declspec(allocate(".CRT$XLB")) PIMAGE_TLS_CALLBACK mdbx_tls_anchor = mdbx_module_handler;
#  pragma data_seg(pop)
#  pragma const_seg(pop)

#elif defined(__GNUC__)
#  ifndef _M_IX86
     const
#  endif
   PIMAGE_TLS_CALLBACK mdbx_tls_anchor __attribute__((__section__(".CRT$XLB"), used)) = mdbx_module_handler;
#else
#  error FIXME
#endif
#endif /* !MDBX_BUILD_SHARED_LIBRARY && !MDBX_MANUAL_MODULE_HANDLER */

#else

#if defined(__linux__) || defined(__gnu_linux__)
#include <sys/utsname.h>

MDBX_EXCLUDE_FOR_GPROF
__cold static uint8_t probe_for_WSL(const char *tag) {
  const char *const WSL = strstr(tag, "WSL");
  if (WSL && WSL[3] >= '2' && WSL[3] <= '9')
    return WSL[3] - '0';
  const char *const wsl = strstr(tag, "wsl");
  if (wsl && wsl[3] >= '2' && wsl[3] <= '9')
    return wsl[3] - '0';
  if (WSL || wsl || strcasestr(tag, "Microsoft"))
    /* Expecting no new kernel within WSL1, either it will explicitly
     * marked by an appropriate WSL-version hint. */
    return (globals.linux_kernel_version < /* 4.19.x */ 0x04130000) ? 1 : 2;
  return 0;
}
#endif /* Linux */

#ifdef ENABLE_GPROF
extern void _mcleanup(void);
extern void monstartup(unsigned long, unsigned long);
extern void _init(void);
extern void _fini(void);
extern void __gmon_start__(void) __attribute__((__weak__));
#endif /* ENABLE_GPROF */

MDBX_EXCLUDE_FOR_GPROF
__cold static __attribute__((__constructor__)) void mdbx_global_constructor(void) {
#ifdef ENABLE_GPROF
  if (!&__gmon_start__)
    monstartup((uintptr_t)&_init, (uintptr_t)&_fini);
#endif /* ENABLE_GPROF */

#if defined(__linux__) || defined(__gnu_linux__)
  struct utsname buffer;
  if (uname(&buffer) == 0) {
    int i = 0;
    char *p = buffer.release;
    while (*p && i < 4) {
      if (*p >= '0' && *p <= '9') {
        long number = strtol(p, &p, 10);
        if (number > 0) {
          if (number > 255)
            number = 255;
          globals.linux_kernel_version += number << (24 - i * 8);
        }
        ++i;
      } else {
        ++p;
      }
    }
    /* "Official" way of detecting WSL1 but not WSL2
     * https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364
     *
     * WARNING: False negative detection of WSL1 will result in DATA LOSS!
     * So, the REQUIREMENTS for this code:
     *  1. MUST detect WSL1 without false-negatives.
     *  2. DESIRABLE detect WSL2 but without the risk of violating the first. */
    globals.running_on_WSL1 =
        probe_for_WSL(buffer.version) == 1 || probe_for_WSL(buffer.sysname) == 1 || probe_for_WSL(buffer.release) == 1;
  }
#endif /* Linux */

  mdbx_init();
}

MDBX_EXCLUDE_FOR_GPROF
__cold static __attribute__((__destructor__)) void mdbx_global_destructor(void) {
  mdbx_fini();
#ifdef ENABLE_GPROF
  if (!&__gmon_start__)
    _mcleanup();
#endif /* ENABLE_GPROF */
}

#endif /* ! Windows */

/******************************************************************************/

struct libmdbx_globals globals;

__cold static void mdbx_init(void) {
  globals.runtime_flags = ((MDBX_DEBUG) > 0) * MDBX_DBG_ASSERT + ((MDBX_DEBUG) > 1) * MDBX_DBG_AUDIT;
  globals.loglevel = MDBX_LOG_FATAL;
  ENSURE(nullptr, osal_fastmutex_init(&globals.debug_lock) == 0);
  osal_ctor();
  assert(globals.sys_pagesize > 0 && (globals.sys_pagesize & (globals.sys_pagesize - 1)) == 0);
  rthc_ctor();
#if MDBX_DEBUG
  ENSURE(nullptr, troika_verify_fsm());
  ENSURE(nullptr, pv2pages_verify());
#endif /* MDBX_DEBUG*/
}

MDBX_EXCLUDE_FOR_GPROF
__cold static void mdbx_fini(void) {
  const uint32_t current_pid = osal_getpid();
  TRACE(">> pid %d", current_pid);
  rthc_dtor(current_pid);
  osal_dtor();
  TRACE("<< pid %d\n", current_pid);
  ENSURE(nullptr, osal_fastmutex_destroy(&globals.debug_lock) == 0);
}

/******************************************************************************/

__dll_export
#ifdef __attribute_used__
    __attribute_used__
#elif defined(__GNUC__) || __has_attribute(__used__)
    __attribute__((__used__))
#endif
#ifdef __attribute_externally_visible__
        __attribute_externally_visible__
#elif (defined(__GNUC__) && !defined(__clang__)) ||                            \
    __has_attribute(__externally_visible__)
    __attribute__((__externally_visible__))
#endif
    const struct MDBX_build_info mdbx_build = {
#ifdef MDBX_BUILD_TIMESTAMP
    MDBX_BUILD_TIMESTAMP
#else
    "\"" __DATE__ " " __TIME__ "\""
#endif /* MDBX_BUILD_TIMESTAMP */

    ,
#ifdef MDBX_BUILD_TARGET
    MDBX_BUILD_TARGET
#else
  #if defined(__ANDROID_API__)
    "Android" MDBX_STRINGIFY(__ANDROID_API__)
  #elif defined(__OHOS__)
    "Harmony OS"
  #elif defined(__linux__) || defined(__gnu_linux__)
    "Linux"
  #elif defined(EMSCRIPTEN) || defined(__EMSCRIPTEN__)
    "webassembly"
  #elif defined(__CYGWIN__)
    "CYGWIN"
  #elif defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) \
      || defined(__WINDOWS__)
    "Windows"
  #elif defined(__APPLE__)
    #if (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) \
      || (defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR)
      "iOS"
    #else
      "MacOS"
    #endif
  #elif defined(__FreeBSD__)
    "FreeBSD"
  #elif defined(__DragonFly__)
    "DragonFlyBSD"
  #elif defined(__NetBSD__)
    "NetBSD"
  #elif defined(__OpenBSD__)
    "OpenBSD"
  #elif defined(__bsdi__)
    "UnixBSDI"
  #elif defined(__MACH__)
    "MACH"
  #elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC))
    "HPUX"
  #elif defined(_AIX)
    "AIX"
  #elif defined(__sun) && defined(__SVR4)
    "Solaris"
  #elif defined(__BSD__) || defined(BSD)
    "UnixBSD"
  #elif defined(__unix__) || defined(UNIX) || defined(__unix) \
      || defined(__UNIX) || defined(__UNIX__)
    "UNIX"
  #elif defined(_POSIX_VERSION)
    "POSIX" MDBX_STRINGIFY(_POSIX_VERSION)
  #else
    "UnknownOS"
  #endif /* Target OS */

    "-"

  #if defined(__e2k__) || defined(__elbrus__)
    "Elbrus"
  #elif defined(__amd64__)
    "AMD64"
  #elif defined(__ia32__)
    "IA32"
  #elif defined(__e2k__) || defined(__elbrus__)
    "Elbrus"
  #elif defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
    "Alpha"
  #elif defined(__aarch64__) || defined(_M_ARM64)
    "ARM64"
  #elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) \
      || defined(__TARGET_ARCH_THUMB) || defined(_ARM) || defined(_M_ARM) \
      || defined(_M_ARMT) || defined(__arm)
    "ARM"
  #elif defined(__mips64) || defined(__mips64__) || (defined(__mips) && (__mips >= 64))
    "MIPS64"
  #elif defined(__mips__) || defined(__mips) || defined(_R4000) || defined(__MIPS__)
    "MIPS"
  #elif defined(__hppa64__) || defined(__HPPA64__) || defined(__hppa64)
    "PARISC64"
  #elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa)
    "PARISC"
  #elif defined(__ia64__) || defined(__ia64) || defined(_IA64) \
      || defined(__IA64__) || defined(_M_IA64) || defined(__itanium__)
    "Itanium"
  #elif defined(__powerpc64__) || defined(__ppc64__) || defined(__ppc64) \
      || defined(__powerpc64) || defined(_ARCH_PPC64)
    "PowerPC64"
  #elif defined(__powerpc__) || defined(__ppc__) || defined(__powerpc) \
      || defined(__ppc) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__POWERPC__)
    "PowerPC"
  #elif defined(__sparc64__) || defined(__sparc64)
    "SPARC64"
  #elif defined(__sparc__) || defined(__sparc)
    "SPARC"
  #elif defined(__s390__) || defined(__s390) || defined(__zarch__) || defined(__zarch)
    "S390"
  #elif defined(__riscv) || defined(__riscv__) || defined(__RISCV) || defined(__RISCV__)
    "RISC-V (стеклянные бусы)"
  #else
    "UnknownARCH"
  #endif
#endif /* MDBX_BUILD_TARGET */

#ifdef MDBX_BUILD_TYPE
# if defined(_MSC_VER)
#   pragma message("Configuration-depended MDBX_BUILD_TYPE: " MDBX_BUILD_TYPE)
# endif
    "-" MDBX_BUILD_TYPE
#endif /* MDBX_BUILD_TYPE */
    ,
    "MDBX_DEBUG=" MDBX_STRINGIFY(MDBX_DEBUG)
#ifdef ENABLE_GPROF
    " ENABLE_GPROF"
#endif /* ENABLE_GPROF */
    " MDBX_WORDBITS=" MDBX_STRINGIFY(MDBX_WORDBITS)
    " BYTE_ORDER="
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    "LITTLE_ENDIAN"
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    "BIG_ENDIAN"
#else
    #error "FIXME: Unsupported byte order"
#endif /* __BYTE_ORDER__ */
    " MDBX_ENABLE_BIGFOOT=" MDBX_STRINGIFY(MDBX_ENABLE_BIGFOOT)
    " MDBX_ENV_CHECKPID=" MDBX_ENV_CHECKPID_CONFIG
    " MDBX_TXN_CHECKOWNER=" MDBX_TXN_CHECKOWNER_CONFIG
    " MDBX_64BIT_ATOMIC=" MDBX_64BIT_ATOMIC_CONFIG
    " MDBX_64BIT_CAS=" MDBX_64BIT_CAS_CONFIG
    " MDBX_TRUST_RTC=" MDBX_TRUST_RTC_CONFIG
    " MDBX_AVOID_MSYNC=" MDBX_STRINGIFY(MDBX_AVOID_MSYNC)
    " MDBX_ENABLE_REFUND=" MDBX_STRINGIFY(MDBX_ENABLE_REFUND)
    " MDBX_USE_MINCORE=" MDBX_STRINGIFY(MDBX_USE_MINCORE)
    " MDBX_ENABLE_PGOP_STAT=" MDBX_STRINGIFY(MDBX_ENABLE_PGOP_STAT)
    " MDBX_ENABLE_PROFGC=" MDBX_STRINGIFY(MDBX_ENABLE_PROFGC)
#if MDBX_DISABLE_VALIDATION
    " MDBX_DISABLE_VALIDATION=YES"
#endif /* MDBX_DISABLE_VALIDATION */
#ifdef __SANITIZE_ADDRESS__
    " SANITIZE_ADDRESS=YES"
#endif /* __SANITIZE_ADDRESS__ */
#ifdef ENABLE_MEMCHECK
    " ENABLE_MEMCHECK=YES"
#endif /* ENABLE_MEMCHECK */
#if MDBX_FORCE_ASSERTIONS
    " MDBX_FORCE_ASSERTIONS=YES"
#endif /* MDBX_FORCE_ASSERTIONS */
#ifdef _GNU_SOURCE
    " _GNU_SOURCE=YES"
#else
    " _GNU_SOURCE=NO"
#endif /* _GNU_SOURCE */
#ifdef __APPLE__
    " MDBX_APPLE_SPEED_INSTEADOF_DURABILITY=" MDBX_STRINGIFY(MDBX_APPLE_SPEED_INSTEADOF_DURABILITY)
#endif /* MacOS */
#if defined(_WIN32) || defined(_WIN64)
    " MDBX_WITHOUT_MSVC_CRT=" MDBX_STRINGIFY(MDBX_WITHOUT_MSVC_CRT)
    " MDBX_BUILD_SHARED_LIBRARY=" MDBX_STRINGIFY(MDBX_BUILD_SHARED_LIBRARY)
#if !MDBX_BUILD_SHARED_LIBRARY
    " MDBX_MANUAL_MODULE_HANDLER=" MDBX_STRINGIFY(MDBX_MANUAL_MODULE_HANDLER)
#endif
    " WINVER=" MDBX_STRINGIFY(WINVER)
#else /* Windows */
    " MDBX_LOCKING=" MDBX_LOCKING_CONFIG
    " MDBX_USE_OFDLOCKS=" MDBX_USE_OFDLOCKS_CONFIG
    " MDBX_USE_FALLOCATE=" MDBX_USE_FALLOCATE_CONFIG
#endif /* !Windows */
    " MDBX_CACHELINE_SIZE=" MDBX_STRINGIFY(MDBX_CACHELINE_SIZE)
    " MDBX_CPU_WRITEBACK_INCOHERENT=" MDBX_STRINGIFY(MDBX_CPU_WRITEBACK_INCOHERENT)
    " MDBX_MMAP_INCOHERENT_CPU_CACHE=" MDBX_STRINGIFY(MDBX_MMAP_INCOHERENT_CPU_CACHE)
    " MDBX_MMAP_INCOHERENT_FILE_WRITE=" MDBX_STRINGIFY(MDBX_MMAP_INCOHERENT_FILE_WRITE)
    " MDBX_UNALIGNED_OK=" MDBX_STRINGIFY(MDBX_UNALIGNED_OK)
    " MDBX_PNL_ASCENDING=" MDBX_STRINGIFY(MDBX_PNL_ASCENDING)
    ,
#ifdef MDBX_BUILD_COMPILER
    MDBX_BUILD_COMPILER
#else
  #ifdef __INTEL_COMPILER
    "Intel C/C++ " MDBX_STRINGIFY(__INTEL_COMPILER)
  #elif defined(__apple_build_version__)
    "Apple clang " MDBX_STRINGIFY(__apple_build_version__)
  #elif defined(__ibmxl__)
    "IBM clang C " MDBX_STRINGIFY(__ibmxl_version__) "." MDBX_STRINGIFY(__ibmxl_release__)
    "." MDBX_STRINGIFY(__ibmxl_modification__) "." MDBX_STRINGIFY(__ibmxl_ptf_fix_level__)
  #elif defined(__clang__)
    "clang " MDBX_STRINGIFY(__clang_version__)
  #elif defined(__MINGW64__)
    "MINGW-64 " MDBX_STRINGIFY(__MINGW64_MAJOR_VERSION) "." MDBX_STRINGIFY(__MINGW64_MINOR_VERSION)
  #elif defined(__MINGW32__)
    "MINGW-32 " MDBX_STRINGIFY(__MINGW32_MAJOR_VERSION) "." MDBX_STRINGIFY(__MINGW32_MINOR_VERSION)
  #elif defined(__MINGW__)
    "MINGW " MDBX_STRINGIFY(__MINGW_MAJOR_VERSION) "." MDBX_STRINGIFY(__MINGW_MINOR_VERSION)
  #elif defined(__IBMC__)
    "IBM C " MDBX_STRINGIFY(__IBMC__)
  #elif defined(__GNUC__)
    "GNU C/C++ "
    #ifdef __VERSION__
      __VERSION__
    #else
      MDBX_STRINGIFY(__GNUC__) "." MDBX_STRINGIFY(__GNUC_MINOR__) "." MDBX_STRINGIFY(__GNUC_PATCHLEVEL__)
    #endif
  #elif defined(_MSC_VER)
    "MSVC " MDBX_STRINGIFY(_MSC_FULL_VER) "-" MDBX_STRINGIFY(_MSC_BUILD)
  #else
    "Unknown compiler"
  #endif
#endif /* MDBX_BUILD_COMPILER */
    ,
#ifdef MDBX_BUILD_FLAGS_CONFIG
    MDBX_BUILD_FLAGS_CONFIG
#endif /* MDBX_BUILD_FLAGS_CONFIG */
#if defined(MDBX_BUILD_FLAGS_CONFIG) && defined(MDBX_BUILD_FLAGS)
    " "
#endif
#ifdef MDBX_BUILD_FLAGS
    MDBX_BUILD_FLAGS
#endif /* MDBX_BUILD_FLAGS */
#if !(defined(MDBX_BUILD_FLAGS_CONFIG) || defined(MDBX_BUILD_FLAGS))
    "undefined (please use correct build script)"
#ifdef _MSC_VER
#pragma message("warning: Build flags undefined. Please use correct build script")
#else
#warning "Build flags undefined. Please use correct build script"
#endif // _MSC_VER
#endif
  , MDBX_BUILD_METADATA
};

#ifdef __SANITIZE_ADDRESS__
#if !defined(_MSC_VER) || __has_attribute(weak)
LIBMDBX_API __attribute__((__weak__))
#endif
const char *__asan_default_options(void) {
  return "symbolize=1:allow_addr2line=1:"
#if MDBX_DEBUG
         "debug=1:"
         "verbosity=2:"
#endif /* MDBX_DEBUG */
         "log_threads=1:"
         "report_globals=1:"
         "replace_str=1:replace_intrin=1:"
         "malloc_context_size=9:"
#if !defined(__APPLE__)
         "detect_leaks=1:"
#endif
         "check_printf=1:"
         "detect_deadlocks=1:"
#ifndef LTO_ENABLED
         "check_initialization_order=1:"
#endif
         "detect_stack_use_after_return=1:"
         "intercept_tls_get_addr=1:"
         "decorate_proc_maps=1:"
         "abort_on_error=1";
}
#endif /* __SANITIZE_ADDRESS__ */

/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025

#if !(defined(_WIN32) || defined(_WIN64))
/*----------------------------------------------------------------------------*
 * POSIX/non-Windows LCK-implementation */

#if MDBX_LOCKING == MDBX_LOCKING_SYSV
#include <sys/sem.h>
#endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */

/* Описание реализации блокировок для POSIX & Linux:
 *
 * lck-файл отображается в память, в нём организуется таблица читателей и
 * размещаются совместно используемые posix-мьютексы (futex). Посредством
 * этих мьютексов (см struct lck_t) реализуются:
 *  - Блокировка таблицы читателей для регистрации,
 *    т.е. функции lck_rdt_lock() и lck_rdt_unlock().
 *  - Блокировка БД для пишущих транзакций,
 *    т.е. функции lck_txn_lock() и lck_txn_unlock().
 *
 * Остальной функционал реализуется отдельно посредством файловых блокировок:
 *  - Первоначальный захват БД в режиме exclusive/shared и последующий перевод
 *    в операционный режим, функции lck_seize() и lck_downgrade().
 *  - Проверка присутствие процессов-читателей,
 *    т.е. функции lck_rpid_set(), lck_rpid_clear() и lck_rpid_check().
 *
 * Для блокировки файлов используется fcntl(F_SETLK), так как:
 *  - lockf() оперирует только эксклюзивной блокировкой и требует
 *    открытия файла в RW-режиме.
 *  - flock() не гарантирует атомарности при смене блокировок
 *    и оперирует только всем файлом целиком.
 *  - Для контроля процессов-читателей используются однобайтовые
 *    range-блокировки lck-файла посредством fcntl(F_SETLK). При этом
 *    в качестве позиции используется pid процесса-читателя.
 *  - Для первоначального захвата и shared/exclusive выполняется блокировка
 *    основного файла БД и при успехе lck-файла.
 *
 * ----------------------------------------------------------------------------
 * УДЕРЖИВАЕМЫЕ БЛОКИРОВКИ В ЗАВИСИМОСТИ ОТ РЕЖИМА И СОСТОЯНИЯ
 *
 * Эксклюзивный режим без lck-файла:
 *   = заблокирован весь dxb-файл посредством F_RDLCK или F_WRLCK,
 *     в зависимости от MDBX_RDONLY.
 *
 * Не-операционный режим на время пере-инициализации и разрушении lck-файла:
 *   = F_WRLCK блокировка первого байта lck-файла, другие процессы ждут её
 *     снятия при получении F_RDLCK через F_SETLKW.
 *   - блокировки dxb-файла могут меняться до снятие эксклюзивной блокировки
 *    lck-файла:
 *       + для НЕ-эксклюзивного режима блокировка pid-байта в dxb-файле
 *         посредством F_RDLCK или F_WRLCK, в зависимости от MDBX_RDONLY.
 *       + для ЭКСКЛЮЗИВНОГО режима блокировка всего dxb-файла
 *         посредством F_RDLCK или F_WRLCK, в зависимости от MDBX_RDONLY.
 *
 * ОПЕРАЦИОННЫЙ режим с lck-файлом:
 *   = F_RDLCK блокировка первого байта lck-файла, другие процессы не могут
 *     получить F_WRLCK и таким образом видят что БД используется.
 *   + F_WRLCK блокировка pid-байта в clk-файле после первой транзакции чтения.
 *   + для НЕ-эксклюзивного режима блокировка pid-байта в dxb-файле
 *     посредством F_RDLCK или F_WRLCK, в зависимости от MDBX_RDONLY.
 *   + для ЭКСКЛЮЗИВНОГО режима блокировка pid-байта всего dxb-файла
 *     посредством F_RDLCK или F_WRLCK, в зависимости от MDBX_RDONLY.
 */

#if MDBX_USE_OFDLOCKS
static int op_setlk, op_setlkw, op_getlk;
__cold static void choice_fcntl(void) {
  assert(!op_setlk && !op_setlkw && !op_getlk);
  if ((globals.runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0
#if defined(__linux__) || defined(__gnu_linux__)
      && globals.linux_kernel_version > 0x030f0000 /* OFD locks are available since 3.15, but engages here
                                                      only for 3.16 and later kernels (i.e. LTS) because
                                                      of reliability reasons */
#endif                                             /* linux */
  ) {
    op_setlk = MDBX_F_OFD_SETLK;
    op_setlkw = MDBX_F_OFD_SETLKW;
    op_getlk = MDBX_F_OFD_GETLK;
    return;
  }
  op_setlk = MDBX_F_SETLK;
  op_setlkw = MDBX_F_SETLKW;
  op_getlk = MDBX_F_GETLK;
}
#else
#define op_setlk MDBX_F_SETLK
#define op_setlkw MDBX_F_SETLKW
#define op_getlk MDBX_F_GETLK
#endif /* MDBX_USE_OFDLOCKS */

static int lck_op(const mdbx_filehandle_t fd, int cmd, const int lck, const off_t offset, off_t len) {
  STATIC_ASSERT(sizeof(off_t) >= sizeof(void *) && sizeof(off_t) >= sizeof(size_t));
#if defined(__ANDROID_API__) && __ANDROID_API__ < 24
  STATIC_ASSERT_MSG((sizeof(off_t) * CHAR_BIT == MDBX_WORDBITS),
                    "The bitness of system `off_t` type is mismatch. Please "
                    "fix build and/or NDK configuration.");
#endif /* Android && API < 24 */
  assert(offset >= 0 && len > 0);
  assert((uint64_t)offset < (uint64_t)INT64_MAX && (uint64_t)len < (uint64_t)INT64_MAX &&
         (uint64_t)(offset + len) > (uint64_t)offset);

  assert((uint64_t)offset < (uint64_t)OFF_T_MAX && (uint64_t)len <= (uint64_t)OFF_T_MAX &&
         (uint64_t)(offset + len) <= (uint64_t)OFF_T_MAX);

  assert((uint64_t)((off_t)((uint64_t)offset + (uint64_t)len)) == ((uint64_t)offset + (uint64_t)len));

  jitter4testing(true);
  for (;;) {
    MDBX_STRUCT_FLOCK lock_op;
    STATIC_ASSERT_MSG(sizeof(off_t) <= sizeof(lock_op.l_start) && sizeof(off_t) <= sizeof(lock_op.l_len) &&
                          OFF_T_MAX == (off_t)OFF_T_MAX,
                      "Support for large/64-bit-sized files is misconfigured "
                      "for the target system and/or toolchain. "
                      "Please fix it or at least disable it completely.");
    memset(&lock_op, 0, sizeof(lock_op));
    lock_op.l_type = lck;
    lock_op.l_whence = SEEK_SET;
    lock_op.l_start = offset;
    lock_op.l_len = len;
    int rc = MDBX_FCNTL(fd, cmd, &lock_op);
    jitter4testing(true);
    if (rc != -1) {
      if (cmd == op_getlk) {
        /* Checks reader by pid. Returns:
         *   MDBX_RESULT_TRUE   - if pid is live (reader holds a lock).
         *   MDBX_RESULT_FALSE  - if pid is dead (a lock could be placed). */
        return (lock_op.l_type == F_UNLCK) ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE;
      }
      return MDBX_SUCCESS;
    }
    rc = errno;
#if MDBX_USE_OFDLOCKS
    if (ignore_enosys_and_einval(rc) == MDBX_RESULT_TRUE &&
        (cmd == MDBX_F_OFD_SETLK || cmd == MDBX_F_OFD_SETLKW || cmd == MDBX_F_OFD_GETLK)) {
      /* fallback to non-OFD locks */
      if (cmd == MDBX_F_OFD_SETLK)
        cmd = MDBX_F_SETLK;
      else if (cmd == MDBX_F_OFD_SETLKW)
        cmd = MDBX_F_SETLKW;
      else
        cmd = MDBX_F_GETLK;
      op_setlk = MDBX_F_SETLK;
      op_setlkw = MDBX_F_SETLKW;
      op_getlk = MDBX_F_GETLK;
      continue;
    }
#endif /* MDBX_USE_OFDLOCKS */
    if (rc != EINTR || cmd == op_setlkw) {
      assert(MDBX_IS_ERROR(rc));
      return rc;
    }
  }
}

static int lck_setlk_with3retries(const mdbx_filehandle_t fd, const int lck, const off_t offset, off_t len) {
  assert(lck != F_UNLCK);
  int retry_left = 3;
#if defined(__ANDROID_API__)
  retry_left *= 3;
#endif /* Android */
  while (true) {
    int rc = lck_op(fd, op_setlk, lck, offset, len);
    if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK || rc == EDEADLK) || --retry_left < 1)
      return rc;
#if defined(__ANDROID_API__)
    if (retry_left == 5 || retry_left == 3) {
      usleep(1000 * 42 / 2);
      continue;
    }
#endif /* Android */
    if (osal_yield())
      return rc;
  }
}

int osal_lockfile(mdbx_filehandle_t fd, bool wait) {
#if MDBX_USE_OFDLOCKS
  if (unlikely(op_setlk == 0))
    choice_fcntl();
#endif /* MDBX_USE_OFDLOCKS */
  return lck_op(fd, wait ? op_setlkw : op_setlk, F_WRLCK, 0, OFF_T_MAX);
}

int lck_rpid_set(MDBX_env *env) {
  assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE);
  assert(env->pid > 0);
  if (unlikely(osal_getpid() != env->pid))
    return MDBX_PANIC;
  return lck_op(env->lck_mmap.fd, op_setlk, F_WRLCK, env->pid, 1);
}

int lck_rpid_clear(MDBX_env *env) {
  assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE);
  assert(env->pid > 0);
  return lck_op(env->lck_mmap.fd, op_setlk, F_UNLCK, env->pid, 1);
}

int lck_rpid_check(MDBX_env *env, uint32_t pid) {
  assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE);
  assert(pid > 0);
  return lck_op(env->lck_mmap.fd, op_getlk, F_WRLCK, pid, 1);
}

/*---------------------------------------------------------------------------*/

#if MDBX_LOCKING > MDBX_LOCKING_SYSV
int lck_ipclock_stubinit(osal_ipclock_t *ipc) {
#if MDBX_LOCKING == MDBX_LOCKING_POSIX1988
  return sem_init(ipc, false, 1) ? errno : 0;
#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008
  return pthread_mutex_init(ipc, nullptr);
#else
#error "FIXME"
#endif
}

int lck_ipclock_destroy(osal_ipclock_t *ipc) {
#if MDBX_LOCKING == MDBX_LOCKING_POSIX1988
  return sem_destroy(ipc) ? errno : 0;
#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008
  return pthread_mutex_destroy(ipc);
#else
#error "FIXME"
#endif
}
#endif /* MDBX_LOCKING > MDBX_LOCKING_SYSV */

static int check_fstat(MDBX_env *env) {
  struct stat st;

  int rc = MDBX_SUCCESS;
  if (fstat(env->lazy_fd, &st)) {
    rc = errno;
    ERROR("fstat(%s), err %d", "DXB", rc);
    return rc;
  }

  if (!S_ISREG(st.st_mode) || st.st_nlink < 1) {
#ifdef EBADFD
    rc = EBADFD;
#else
    rc = EPERM;
#endif
    ERROR("%s %s, err %d", "DXB", (st.st_nlink < 1) ? "file was removed" : "not a regular file", rc);
    return rc;
  }

  if (st.st_size < (off_t)(MDBX_MIN_PAGESIZE * NUM_METAS)) {
    VERBOSE("dxb-file is too short (%u), exclusive-lock needed", (unsigned)st.st_size);
    rc = MDBX_RESULT_TRUE;
  }

  //----------------------------------------------------------------------------

  if (fstat(env->lck_mmap.fd, &st)) {
    rc = errno;
    ERROR("fstat(%s), err %d", "LCK", rc);
    return rc;
  }

  if (!S_ISREG(st.st_mode) || st.st_nlink < 1) {
#ifdef EBADFD
    rc = EBADFD;
#else
    rc = EPERM;
#endif
    ERROR("%s %s, err %d", "LCK", (st.st_nlink < 1) ? "file was removed" : "not a regular file", rc);
    return rc;
  }

  /* Checking file size for detect the situation when we got the shared lock
   * immediately after lck_destroy(). */
  if (st.st_size < (off_t)(sizeof(lck_t) + sizeof(reader_slot_t))) {
    VERBOSE("lck-file is too short (%u), exclusive-lock needed", (unsigned)st.st_size);
    rc = MDBX_RESULT_TRUE;
  }

  return rc;
}

__cold int lck_seize(MDBX_env *env) {
  assert(env->lazy_fd != INVALID_HANDLE_VALUE);
  if (unlikely(osal_getpid() != env->pid))
    return MDBX_PANIC;

  int rc = MDBX_SUCCESS;
#if defined(__linux__) || defined(__gnu_linux__)
  if (unlikely(globals.running_on_WSL1)) {
    rc = ENOLCK /* No record locks available */;
    ERROR("%s, err %u",
          "WSL1 (Windows Subsystem for Linux) is mad and trouble-full, "
          "injecting failure to avoid data loss",
          rc);
    return rc;
  }
#endif /* Linux */

#if MDBX_USE_OFDLOCKS
  if (unlikely(op_setlk == 0))
    choice_fcntl();
#endif /* MDBX_USE_OFDLOCKS */

  if (env->lck_mmap.fd == INVALID_HANDLE_VALUE) {
    /* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */
    rc = lck_setlk_with3retries(env->lazy_fd, (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX);
    if (rc != MDBX_SUCCESS) {
      ERROR("%s, err %u", "without-lck", rc);
      eASSERT(env, MDBX_IS_ERROR(rc));
      return rc;
    }
    return MDBX_RESULT_TRUE /* Done: return with exclusive locking. */;
  }

retry:
  if (rc == MDBX_RESULT_TRUE) {
    rc = lck_op(env->lck_mmap.fd, op_setlk, F_UNLCK, 0, 1);
    if (rc != MDBX_SUCCESS) {
      ERROR("%s, err %u", "unlock-before-retry", rc);
      eASSERT(env, MDBX_IS_ERROR(rc));
      return rc;
    }
  }

  /* Firstly try to get exclusive locking.  */
  rc = lck_setlk_with3retries(env->lck_mmap.fd, F_WRLCK, 0, 1);
  if (rc == MDBX_SUCCESS) {
    rc = check_fstat(env);
    if (MDBX_IS_ERROR(rc))
      return rc;

  continue_dxb_exclusive:
    rc = lck_setlk_with3retries(env->lazy_fd, (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX);
    if (rc == MDBX_SUCCESS)
      return MDBX_RESULT_TRUE /* Done: return with exclusive locking. */;

    int err = check_fstat(env);
    if (MDBX_IS_ERROR(err))
      return err;

    /* the cause may be a collision with POSIX's file-lock recovery. */
    if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK || rc == EDEADLK)) {
      ERROR("%s, err %u", "dxb-exclusive", rc);
      eASSERT(env, MDBX_IS_ERROR(rc));
      return rc;
    }

    /* Fallback to lck-shared */
  } else if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK || rc == EDEADLK)) {
    ERROR("%s, err %u", "try-exclusive", rc);
    eASSERT(env, MDBX_IS_ERROR(rc));
    return rc;
  }

  /* Here could be one of two:
   *  - lck_destroy() from the another process was hold the lock
   *    during a destruction.
   *  - either lck_seize() from the another process was got the exclusive
   *    lock and doing initialization.
   * For distinguish these cases will use size of the lck-file later. */

  /* Wait for lck-shared now. */
  /* Here may be await during transient processes, for instance until another
   * competing process doesn't call lck_downgrade(). */
  rc = lck_op(env->lck_mmap.fd, op_setlkw, F_RDLCK, 0, 1);
  if (rc != MDBX_SUCCESS) {
    ERROR("%s, err %u", "try-shared", rc);
    eASSERT(env, MDBX_IS_ERROR(rc));
    return rc;
  }

  rc = check_fstat(env);
  if (rc == MDBX_RESULT_TRUE)
    goto retry;
  if (rc != MDBX_SUCCESS) {
    ERROR("%s, err %u", "lck_fstat", rc);
    return rc;
  }

  /* got shared, retry exclusive */
  rc = lck_setlk_with3retries(env->lck_mmap.fd, F_WRLCK, 0, 1);
  if (rc == MDBX_SUCCESS)
    goto continue_dxb_exclusive;

  if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK || rc == EDEADLK)) {
    ERROR("%s, err %u", "try-exclusive", rc);
    eASSERT(env, MDBX_IS_ERROR(rc));
    return rc;
  }

  /* Lock against another process operating in without-lck or exclusive mode. */
  rc = lck_setlk_with3retries(env->lazy_fd, (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, env->pid, 1);
  if (rc != MDBX_SUCCESS) {
    ERROR("%s, err %u", "lock-against-without-lck", rc);
    eASSERT(env, MDBX_IS_ERROR(rc));
    return rc;
  }

  /* Done: return with shared locking. */
  return MDBX_RESULT_FALSE;
}

int lck_downgrade(MDBX_env *env) {
  assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE);
  if (unlikely(osal_getpid() != env->pid))
    return MDBX_PANIC;

  int rc = MDBX_SUCCESS;
  if ((env->flags & MDBX_EXCLUSIVE) == 0) {
    rc = lck_op(env->lazy_fd, op_setlk, F_UNLCK, 0, env->pid);
    if (rc == MDBX_SUCCESS)
      rc = lck_op(env->lazy_fd, op_setlk, F_UNLCK, env->pid + 1, OFF_T_MAX - env->pid - 1);
  }
  if (rc == MDBX_SUCCESS)
    rc = lck_setlk_with3retries(env->lck_mmap.fd, F_RDLCK, 0, 1);
  if (unlikely(rc != 0)) {
    ERROR("%s, err %u", "lck", rc);
    assert(MDBX_IS_ERROR(rc));
  }
  return rc;
}

int lck_upgrade(MDBX_env *env, bool dont_wait) {
  assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE);
  if (unlikely(osal_getpid() != env->pid))
    return MDBX_PANIC;

  const int cmd = dont_wait ? op_setlk : op_setlkw;
  int rc = lck_op(env->lck_mmap.fd, cmd, F_WRLCK, 0, 1);
  if (rc == MDBX_SUCCESS && (env->flags & MDBX_EXCLUSIVE) == 0) {
    rc = (env->pid > 1) ? lck_op(env->lazy_fd, cmd, F_WRLCK, 0, env->pid - 1) : MDBX_SUCCESS;
    if (rc == MDBX_SUCCESS) {
      rc = lck_op(env->lazy_fd, cmd, F_WRLCK, env->pid + 1, OFF_T_MAX - env->pid - 1);
      if (rc != MDBX_SUCCESS && env->pid > 1 && lck_setlk_with3retries(env->lazy_fd, F_UNLCK, 0, env->pid - 1))
        rc = MDBX_PANIC;
    }
    if (rc != MDBX_SUCCESS && lck_setlk_with3retries(env->lck_mmap.fd, F_RDLCK, 0, 1))
      rc = MDBX_PANIC;
  }
  if (unlikely(rc != 0)) {
    ERROR("%s, err %u", "lck", rc);
    assert(MDBX_IS_ERROR(rc));
  }
  return rc;
}

__cold int lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor, const uint32_t current_pid) {
  eASSERT(env, osal_getpid() == current_pid);
  int rc = MDBX_SUCCESS;
  struct stat lck_info;
  lck_t *lck = env->lck;
  if (lck && lck == env->lck_mmap.lck && !inprocess_neighbor &&
      /* try get exclusive access */
      lck_op(env->lck_mmap.fd, op_setlk, F_WRLCK, 0, OFF_T_MAX) == 0 &&
      /* if LCK was not removed */
      fstat(env->lck_mmap.fd, &lck_info) == 0 && lck_info.st_nlink > 0 &&
      lck_op(env->lazy_fd, op_setlk, (env->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX) == 0) {

    VERBOSE("%p got exclusive, drown ipc-locks", (void *)env);
    eASSERT(env, current_pid == env->pid);
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
    if (env->me_sysv_ipc.semid != -1)
      rc = semctl(env->me_sysv_ipc.semid, 2, IPC_RMID) ? errno : 0;
#else
    rc = lck_ipclock_destroy(&lck->rdt_lock);
    if (rc == 0)
      rc = lck_ipclock_destroy(&lck->wrt_lock);
#endif /* MDBX_LOCKING */

    eASSERT(env, rc == 0);
    if (rc == 0) {
      const bool synced = lck->unsynced_pages.weak == 0;
      osal_munmap(&env->lck_mmap);
      if (synced && env->lck_mmap.fd != INVALID_HANDLE_VALUE)
        rc = ftruncate(env->lck_mmap.fd, 0) ? errno : 0;
    }

    jitter4testing(false);
  }

#if MDBX_LOCKING == MDBX_LOCKING_SYSV
  env->me_sysv_ipc.semid = -1;
#endif /* MDBX_LOCKING */

  if (current_pid != env->pid) {
    eASSERT(env, !inprocess_neighbor);
    NOTICE("drown env %p after-fork pid %d -> %d", __Wpedantic_format_voidptr(env), env->pid, current_pid);
    inprocess_neighbor = nullptr;
  }

  /* 1) POSIX's fcntl() locks (i.e. when op_setlk == F_SETLK) should be restored
   * after file was closed.
   *
   * 2) File locks would be released (by kernel) while the file-descriptors will
   * be closed. But to avoid false-positive EACCESS and EDEADLK from the kernel,
   * locks should be released here explicitly with properly order. */

  /* close dxb and restore lock */
  if (env->dsync_fd != INVALID_HANDLE_VALUE) {
    if (unlikely(close(env->dsync_fd) != 0) && rc == MDBX_SUCCESS)
      rc = errno;
    env->dsync_fd = INVALID_HANDLE_VALUE;
  }
  if (env->lazy_fd != INVALID_HANDLE_VALUE) {
    if (unlikely(close(env->lazy_fd) != 0) && rc == MDBX_SUCCESS)
      rc = errno;
    env->lazy_fd = INVALID_HANDLE_VALUE;
    if (op_setlk == F_SETLK && inprocess_neighbor && rc == MDBX_SUCCESS) {
      /* restore file-lock */
      rc = lck_op(inprocess_neighbor->lazy_fd, F_SETLKW, (inprocess_neighbor->flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK,
                  (inprocess_neighbor->flags & MDBX_EXCLUSIVE) ? 0 : inprocess_neighbor->pid,
                  (inprocess_neighbor->flags & MDBX_EXCLUSIVE) ? OFF_T_MAX : 1);
    }
  }

  /* close clk and restore locks */
  if (env->lck_mmap.fd != INVALID_HANDLE_VALUE) {
    if (unlikely(close(env->lck_mmap.fd) != 0) && rc == MDBX_SUCCESS)
      rc = errno;
    env->lck_mmap.fd = INVALID_HANDLE_VALUE;
    if (op_setlk == F_SETLK && inprocess_neighbor && rc == MDBX_SUCCESS) {
      /* restore file-locks */
      rc = lck_op(inprocess_neighbor->lck_mmap.fd, F_SETLKW, F_RDLCK, 0, 1);
      if (rc == MDBX_SUCCESS && inprocess_neighbor->registered_reader_pid)
        rc = lck_rpid_set(inprocess_neighbor);
    }
  }

  if (inprocess_neighbor && rc != MDBX_SUCCESS)
    inprocess_neighbor->flags |= ENV_FATAL_ERROR;
  return rc;
}

/*---------------------------------------------------------------------------*/

__cold int lck_init(MDBX_env *env, MDBX_env *inprocess_neighbor, int global_uniqueness_flag) {
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
  int semid = -1;
  /* don't initialize semaphores twice */
  (void)inprocess_neighbor;
  if (global_uniqueness_flag == MDBX_RESULT_TRUE) {
    struct stat st;
    if (fstat(env->lazy_fd, &st))
      return errno;
  sysv_retry_create:
    semid = semget(env->me_sysv_ipc.key, 2, IPC_CREAT | IPC_EXCL | (st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)));
    if (unlikely(semid == -1)) {
      int err = errno;
      if (err != EEXIST)
        return err;

      /* remove and re-create semaphore set */
      semid = semget(env->me_sysv_ipc.key, 2, 0);
      if (semid == -1) {
        err = errno;
        if (err != ENOENT)
          return err;
        goto sysv_retry_create;
      }
      if (semctl(semid, 2, IPC_RMID)) {
        err = errno;
        if (err != EIDRM)
          return err;
      }
      goto sysv_retry_create;
    }

    unsigned short val_array[2] = {1, 1};
    if (semctl(semid, 2, SETALL, val_array))
      return errno;
  } else {
    semid = semget(env->me_sysv_ipc.key, 2, 0);
    if (semid == -1)
      return errno;

    /* check read & write access */
    struct semid_ds data[2];
    if (semctl(semid, 2, IPC_STAT, data) || semctl(semid, 2, IPC_SET, data))
      return errno;
  }

  env->me_sysv_ipc.semid = semid;
  return MDBX_SUCCESS;

#elif MDBX_LOCKING == MDBX_LOCKING_FUTEX
  (void)inprocess_neighbor;
  if (global_uniqueness_flag != MDBX_RESULT_TRUE)
    return MDBX_SUCCESS;
#error "FIXME: Not implemented"
#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988

  /* don't initialize semaphores twice */
  (void)inprocess_neighbor;
  if (global_uniqueness_flag == MDBX_RESULT_TRUE) {
    if (sem_init(&env->lck_mmap.lck->rdt_lock, true, 1))
      return errno;
    if (sem_init(&env->lck_mmap.lck->wrt_lock, true, 1))
      return errno;
  }
  return MDBX_SUCCESS;

#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || MDBX_LOCKING == MDBX_LOCKING_POSIX2008
  if (inprocess_neighbor)
    return MDBX_SUCCESS /* don't need any initialization for mutexes
      if LCK already opened/used inside current process */
        ;

  /* FIXME: Unfortunately, there is no other reliable way but to long testing
   * on each platform. On the other hand, behavior like FreeBSD is incorrect
   * and we can expect it to be rare. Moreover, even on FreeBSD without
   * additional in-process initialization, the probability of an problem
   * oc