🍑 nectarine
Audio synthesis tools for C23
Loading...
Searching...
No Matches
denormals.h
Go to the documentation of this file.
1/*
2 * 🍑 nectarine - denormals.h
3 * Copyright (c) 2026 Fawn <rubiefawn@gmail.com>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
20#pragma once
21
22#include <stdint.h>
23#if defined(__amd64__) || defined(_M_AMD64) || defined(__i386__) || defined(_M_IX86)
24 #include <immintrin.h>
25#elif defined(_M_ARM64) // MSVC arm64
26 #include <intrin.h>
27#elif defined(_M_ARM) // MSVC arm32
28 #include <float.h>
29#endif
30
40static inline void nec_disable_denormals(void) {
41#if defined(__SSE4_1__)
42 // This version of the function does not check if the processor
43 // supports the "denormals-are-zero" (or "DAZ") flag, and using it
44 // on a processor that does not support DAZ will raise a
45 // general-protection exception. Intel's manual states that DAZ is
46 // "...available in most of the Pentium 4 processors and in the
47 // Intel Xeon processor, with the exception of some early
48 // steppings", but does not specify what those exceptions are.
49 // SSE 4.1 is a conservative estimate. SSE3 might be more accurate,
50 // but the only way to know for sure is to test on actual hardware.
51 _mm_setcsr(_mm_getcsr() | 0x8040); // Set DAZ and FTZ both to 1
52#elif defined(__amd64__) || defined(_M_AMD64) || defined(__i386__) || defined(_M_IX86)
53 // https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html
54 // Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 1: Basic Architecture
55 // 11.6.3 Checking for the DAZ Flag in the MXCSR Register
56 unsigned int flags = 0x8000; // FTZ
57 alignas(16) uint8_t buf[512] = {0};
58 #if defined(__amd64__) || defined(_M_AMD64)
59 _fxsave64(buf);
60 #elif defined(__i386__) || defined(_M_IX86)
61 _fxsave(buf);
62 #endif
63 flags |= buf[28] & 0x0040; // DAZ if supported
64 _mm_setcsr(_mm_getcsr() | flags);
65#elif defined(_M_ARM64)
66 // https://developer.arm.com/documentation/ddi0601/2026-03/AArch64-Registers/FPCR--Floating-point-Control-Register
67 // https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
68 // 0x5a20 == ARM64_SYSREG(0b11, 0b011, 0b0100, 0b0100, 0b000)
69 _WriteStatusReg(0x5a20, _ReadStatusReg(0x5a20) | 0x1000000);
70#elif defined(_M_ARM)
71 // WARN: This is untested. MSVC for ARM32 is deprecated.
72 // https://learn.microsoft.com/en-us/windows/arm/arm32-to-arm64
73 // https://learn.microsoft.com/en-us/windows/whats-new/deprecated-features
74 // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/control87-controlfp-control87-2
75 _controlfp(0x03000000, 0x1000000); // _MCW_DN, _DN_FLUSH
76#elif defined(__aarch64__)
77 // https://developer.arm.com/documentation/ddi0601/2026-03/AArch64-Registers/FPCR--Floating-point-Control-Register
78 uint64_t fpcr;
79 __asm__ volatile ("mrs %0, fpcr" : "=r" (fpcr));
80 __asm__ volatile ("msr fpcr, %0" :: "ri" (fpcr | 0x1000000));
81#elif defined(__arm__)
82 // https://developer.arm.com/documentation/ddi0601/2026-03/AArch32-Registers/FPSCR--Floating-Point-Status-and-Control-Register
83 uint32_t fpscr;
84 __asm__ volatile ("vmrs %0, fpscr" : "=r" (fpscr));
85 __asm__ volatile ("vmsr fpscr, %0" :: "ri" (fpscr | 0x1000000));
86#else
87 #warning nec_disable_denormals() is not implemented on this architecture and will have no effect.
88#endif
89}
90
97static inline void nec_enable_denormals(void) {
98#if defined(__amd64__) || defined(_M_AMD64) || defined(__i386__) || defined(_M_IX86)
99 _mm_setcsr(_mm_getcsr() & ~0x8040); // Set DAZ and FTZ both to 0
100#elif defined(_M_ARM64)
101 // https://developer.arm.com/documentation/ddi0601/2026-03/AArch64-Registers/FPCR--Floating-point-Control-Register
102 // https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
103 // 0x5a20 == ARM64_SYSREG(0b11, 0b011, 0b0100, 0b0100, 0b000)
104 _WriteStatusReg(0x5a20, _ReadStatusReg(0x5a20) & ~0x1000000);
105#elif defined(_M_ARM)
106 // WARN: This is untested. MSVC for ARM32 is deprecated.
107 // https://learn.microsoft.com/en-us/windows/arm/arm32-to-arm64
108 // https://learn.microsoft.com/en-us/windows/whats-new/deprecated-features
109 // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/control87-controlfp-control87-2
110 _controlfp(0x03000000, 0x0000000); // _MCW_DN, _DN_SAVE
111#elif defined(__aarch64__)
112 // https://developer.arm.com/documentation/ddi0601/2026-03/AArch64-Registers/FPCR--Floating-point-Control-Register
113 uint64_t fpcr;
114 __asm__ volatile ("mrs %0, fpcr" : "=r" (fpcr));
115 __asm__ volatile ("msr fpcr, %0" :: "ri" (fpcr & ~0x1000000));
116#elif defined(__arm__)
117 // https://developer.arm.com/documentation/ddi0601/2026-03/AArch32-Registers/FPSCR--Floating-Point-Status-and-Control-Register
118 uint32_t fpscr;
119 __asm__ volatile ("vmrs %0, fpscr" : "=r" (fpscr));
120 __asm__ volatile ("vmsr fpscr, %0" :: "ri" (fpscr & ~0x1000000));
121#else
122 #warning nec_enable_denormals() is not implemented on this architecture and will have no effect.
123#endif
124}
static void nec_disable_denormals(void)
Disable IEEE 754 denormals on the current thread.
Definition denormals.h:40
static void nec_enable_denormals(void)
Enable IEEE 754 denormals on the current thread.
Definition denormals.h:97