stb/stb_hexwave.h
2021-04-01 01:53:09 -07:00

635 lines
24 KiB
C

// stb_hexwave - v0.5 - public domain, initial release 2021-04-01
//
// A flexible anti-aliased (bandlimited) digital audio oscillator.
//
// This library generates waveforms of a variety of shapes made of
// line segments. It does not do envelopes, LFO effects, etc.; it
// merely tries to solve the problem of generating an artifact-free
// morphable digital waveform with a variety of spectra, and leaves
// it to the user to rescale the waveform and mix multiple voices, etc.
//
// Compiling:
//
// In one C/CPP file that #includes this file, do
//
// #define STB_HEXWAVE_IMPLEMENTATION
// #include "stb_hexwave.h"
//
// Optionally, #define STB_HEXWAVE_STATIC before including
// the header to cause the definitions to be private to the
// implementation file (i.e. to be "static" instead of "extern").
//
// Notes:
//
// Optionally performs memory allocation during initialization,
// never allocates otherwise.
//
// Usage:
//
// Initialization:
//
// hexwave_init(32,16,NULL); // read "header section" for alternatives
//
// Create oscillator:
//
// HexWave *osc = malloc(sizeof(*osc)); // or "new HexWave", or declare globally or on stack
// hexwave_create(osc, reflect_flag, peak_time, half_height, zero_wait);
// see "Waveform shapes" below for the meaning of these parameters
//
// Generate audio:
//
// hexwave_generate_samples(output, number_of_samples, osc, oscillator_freq)
// where:
// output is a buffer where the library will store floating point audio samples
// number_of_samples is the number of audio samples to generate
// osc is a pointer to a Hexwave
// oscillator_freq is the frequency of the oscillator divided by the sample rate
//
// The output samples will continue from where the samples generated by the
// previous hexwave_generate_samples() on this oscillator ended.
//
// Change oscillator waveform:
//
// hexwave_change(osc, reflect_flag, peak_time, half_height, zero_wait);
// can call in between calls to hexwave_generate_samples
//
// Waveform shapes:
//
// All waveforms generated by hexwave are constructed from six line segments
// characterized by 3 parameters.
//
// See demonstration: https://www.youtube.com/watch?v=hsUCrAsDN-M
//
// reflect=0 reflect=1
//
// 0-----P---1 0-----P---1 peak_time = P
// . 1 . 1
// /\_ : /\_ :
// / \_ : / \_ :
// / \.H / \.H half_height = H
// / | : / | :
// _____/ |_:___ _____/ | : _____
// . : \ | . | : /
// . : \ | . | : /
// . : \ _/ . \_: /
// . : \ _/ . :_ /
// . -1 \/ . -1 \/
// 0 - Z - - - - 1 0 - Z - - - - 1 zero_wait = Z
//
// Classic waveforms:
// peak half zero
// reflect time height wait
// Sawtooth 1 0 0 0
// Square 1 0 1 0
// Triangle 1 0.5 0 0
//
// Some waveforms can be produced in multiple ways, which is useful when morphing
// into other waveforms, and there are a few more notable shapes:
//
// peak half zero
// reflect time height wait
// Sawtooth 1 1 any 0
// Sawtooth (8va) 1 0 -1 0
// Triangle 1 0.5 0 0
// Square 1 0 1 0
// Square 0 0 1 0
// Triangle 0 0.5 0 0
// Triangle 0 0 -1 0
// AlternatingSaw 0 0 0 0
// AlternatingSaw 0 1 any 0
// Stairs 0 0 1 0.5
//
// The "Sawtooth (8va)" waveform is identical to a sawtooth wave with 2x the
// frequency, but when morphed with other values, it becomes an overtone of
// the base frequency.
//
// Morphing waveforms:
//
// Sweeping peak_time morphs the waveform while producing various spectra.
// Sweeping half_height effectively crossfades between two waveforms; useful, but less exciting.
// Sweeping zero_wait produces a similar effect no matter the reset of the waveform,
// a sort of high-pass/PWM effect where the wave becomes silent at zero_wait=1.
//
// You can trivially morph between any two waveforms from the above table
// which only differ in one column.
//
// Crossfade between classic waveforms:
// peak half zero
// Start End reflect time height wait
// ----- --- ------- ---- ------ ----
// Triangle Square 0 0 -1..1 0
// Saw Square 1 0 0..1 0
// Triangle Saw 1 0.5 0..2 0
//
// The last morph uses uses half-height values larger than 1, which means it will
// be louder and the output should be scaled down by half to compensate, or better
// by dynamically tracking the morph: volume_scale = 1 - half_height/4
//
// Non-crossfade morph between classic waveforms, most require changing
// two parameters at the same time:
// peak half zero
// Start End reflect time height wait
// ----- --- ------- ---- ------ ----
// Square Triangle any 0..0.5 1..0 0
// Square Saw 1 0..1 1..any 0
// Triangle Saw 1 0.5..1 0..-1 0
//
// Other noteworthy morphs between simple shapes:
// peak half zero
// Start Halfway End reflect time height wait
// ----- --------- --- ------- ---- ------ ----
// Saw (8va,neg) Saw (pos) 1 0..1 -1 0
// Saw (neg) Saw (pos) 1 0..1 0 0
// Triangle AlternatingSaw 0 0..1 -1 0
// AlternatingSaw Triangle AlternatingSaw 0 0..1 0 0
// Square AlternatingSaw 0 0..1 1 0
// Triangle Triangle AlternatingSaw 0 0..1 -1..1 0
// Square AlternatingSaw 0 0..1 1..0 0
// Saw (8va) Triangle Saw 1 0..1 -1..1 0
// Saw (neg) Saw (pos) 1 0..1 0..1 0
// AlternatingSaw AlternatingSaw 0 0..1 0..any 0
//
// The last entry is noteworthy because the morph from the halfway point to either
// endpoint sounds very different. For example, an LFO sweeping back and forth over
// the whole range will morph between the middle timbre and the AlternatingSaw
// timbre in two different ways, alternating.
//
// Entries with "any" for half_height are whole families of morphs, as you can pick
// any value you want as the endpoint for half_height.
//
// You can always morph between any two waveforms with the same value of 'reflect'
// by just sweeping the parameters simultaneously. There will never be artifacts
// and the result will always be useful, if not necessarily what you want.
//
// You can vary the sound of two-parameter morphs by ramping them differently,
// e.g. if the morph goes from t=0..1, then square-to-triangle looks like:
// peak_time = lerp(t, 0, 0.5)
// half_height = lerp(t, 1, 0 )
// but you can also do things like:
// peak_time = lerp(smoothstep(t), 0, 0.5)
// half_height = cos(PI/2 * t)
//
// How it works:
//
// hexwave use BLEP to bandlimit discontinuities and BLAMP
// to bandlimit C1 discontinuities. This is not polyBLEP
// (polynomial BLEP), it is table-driven BLEP. It is
// also not minBLEP (minimum-phase BLEP), as that complicates
// things for little benefit once BLAMP is involved.
//
// The previous oscillator frequency is remembered, and when
// the frequency changes, a BLAMP is generated to remove the
// C1 discontinuity, which reduces artifacts for sweeps/LFO.
//
// Changes to an oscillator timbre using hexwave_change() actually
// wait until the oscillator finishes its current cycle. All
// waveforms with non-zero "zero_wait" settings pass through 0
// and have 0-slope at the start of a cycle, which means changing
// the settings is artifact free at that time. (If zero_wait is 0,
// the code still treats it as passing through 0 with 0-slope; it'll
// apply the necessary fixups to make it artifact free as if it does
// transition to 0 with 0-slope vs. the waveform at the end of
// the cycle, then adds the fixups for a non-0 and non-0 slope
// at the start of the cycle, which cancels out if zero_wait is 0,
// and still does the right thing if zero_wait is 0 when the
// settings are updated.)
//
// BLEP/BLAMP normally requires overlapping buffers, but this
// is hidden from the user by generating the waveform to a
// temporary buffer and saving the overlap regions internally
// between calls. (It is slightly more complicated; see code.)
//
// By design all shapes have 0 DC offset; this is one reason
// hexwave uses zero_wait instead of standard PWM.
//
// The internals of hexwave could support any arbitrary shape
// made of line segments, but I chose not to expose this
// generality in favor of a simple, easy-to-use API.
#ifndef STB_INCLUDE_STB_HEXWAVE_H
#define STB_INCLUDE_STB_HEXWAVE_H
#ifndef STB_HEXWAVE_MAX_BLEP_LENGTH
#define STB_HEXWAVE_MAX_BLEP_LENGTH 64 // good enough for anybody
#endif
#ifdef STB_HEXWAVE_STATIC
#define STB_HEXWAVE_DEF static
#else
#define STB_HEXWAVE_DEF extern
#endif
typedef struct HexWave HexWave;
STB_HEXWAVE_DEF void hexwave_init(int width, int oversample, float *user_buffer);
// width: size of BLEP, from 4..64, larger is slower & more memory but less aliasing
// oversample: 2+, number of subsample positions, larger uses more memory but less noise
// user_buffer: optional, if provided the library will perform no allocations.
// 16*width*(oversample+1) bytes, must stay allocated as long as library is used
// technically it only needs: 8*( width * (oversample + 1))
// + 8*((width * oversample) + 1) bytes
//
// width can be larger than 64 if you define STB_HEXWAVE_MAX_BLEP_LENGTH to a larger value
STB_HEXWAVE_DEF void hexwave_shutdown(float *user_buffer);
// user_buffer: pass in same parameter as passed to hexwave_init
STB_HEXWAVE_DEF void hexwave_create(HexWave *hex, int reflect, float peak_time, float half_height, float zero_wait);
// see docs above for description
//
// reflect is tested as 0 or non-zero
// peak_time is clamped to 0..1
// half_height is not clamped
// zero_wait is clamped to 0..1
STB_HEXWAVE_DEF void hexwave_change(HexWave *hex, int reflect, float peak_time, float half_height, float zero_wait);
// see docs
STB_HEXWAVE_DEF void hexwave_generate_samples(float *output, int num_samples, HexWave *hex, float freq);
// output: buffer where the library will store generated floating point audio samples
// number_of_samples: the number of audio samples to generate
// osc: pointer to a Hexwave initialized with 'hexwave_create'
// oscillator_freq: frequency of the oscillator divided by the sample rate
// private:
typedef struct
{
int reflect;
float peak_time;
float zero_wait;
float half_height;
} HexWaveParameters;
struct HexWave
{
float t, prev_dt;
HexWaveParameters current, pending;
int have_pending;
float buffer[STB_HEXWAVE_MAX_BLEP_LENGTH];
};
#endif
#ifdef STB_HEXWAVE_IMPLEMENTATION
#ifndef STB_HEXWAVE_NO_ALLOCATION
#include <stdlib.h> // malloc,free
#endif
#include <string.h> // memset,memcpy,memmove
#include <math.h> // sin,cos,fabs
#define hexwave_clamp(v,a,b) ((v) < (a) ? (a) : (v) > (b) ? (b) : (v))
STB_HEXWAVE_DEF void hexwave_change(HexWave *hex, int reflect, float peak_time, float half_height, float zero_wait)
{
hex->pending.reflect = reflect;
hex->pending.peak_time = hexwave_clamp(peak_time,0,1);
hex->pending.half_height = half_height;
hex->pending.zero_wait = hexwave_clamp(zero_wait,0,1);
// put a barrier here to allow changing from a different thread than the generator
hex->have_pending = 1;
}
STB_HEXWAVE_DEF void hexwave_create(HexWave *hex, int reflect, float peak_time, float half_height, float zero_wait)
{
memset(hex, 0, sizeof(*hex));
hexwave_change(hex, reflect, peak_time, half_height, zero_wait);
hex->current = hex->pending;
hex->have_pending = 0;
hex->t = 0;
hex->prev_dt = 0;
}
static struct
{
int width; // width of fixup in samples
int oversample; // number of oversampled versions (there's actually one more to allow lerpign)
float *blep;
float *blamp;
} hexblep;
static void hex_add_oversampled_bleplike(float *output, float time_since_transition, float scale, float *data)
{
float *d1,*d2;
float lerpweight;
int i, bw = hexblep.width;
int slot = (int) (time_since_transition * hexblep.oversample);
if (slot >= hexblep.oversample)
slot = hexblep.oversample-1; // clamp in case the floats overshoot
d1 = &data[ slot *bw];
d2 = &data[(slot+1)*bw];
lerpweight = time_since_transition * hexblep.oversample - slot;
for (i=0; i < bw; ++i)
output[i] += scale * (d1[i] + (d2[i]-d1[i])*lerpweight);
}
static void hex_blep (float *output, float time_since_transition, float scale)
{
hex_add_oversampled_bleplike(output, time_since_transition, scale, hexblep.blep);
}
static void hex_blamp(float *output, float time_since_transition, float scale)
{
hex_add_oversampled_bleplike(output, time_since_transition, scale, hexblep.blamp);
}
typedef struct
{
float t,v,s; // time, value, slope
} hexvert;
// each half of the waveform needs 4 vertices to represent 3 line
// segments, plus 1 more for wraparound
static void hexwave_generate_linesegs(hexvert vert[9], HexWave *hex, float dt)
{
int j;
float min_len = dt / 256.0f;
vert[0].t = 0;
vert[0].v = 0;
vert[1].t = hex->current.zero_wait*0.5f;
vert[1].v = 0;
vert[2].t = 0.5f*hex->current.peak_time + vert[1].t*(1-hex->current.peak_time);
vert[2].v = 1;
vert[3].t = 0.5f;
vert[3].v = hex->current.half_height;
if (hex->current.reflect) {
for (j=4; j <= 7; ++j) {
vert[j].t = 1 - vert[7-j].t;
vert[j].v = - vert[7-j].v;
}
} else {
for (j=4; j <= 7; ++j) {
vert[j].t = 0.5f + vert[j-4].t;
vert[j].v = - vert[j-4].v;
}
}
vert[8].t = 1;
vert[8].v = 0;
for (j=0; j < 8; ++j) {
if (vert[j+1].t <= vert[j].t + min_len) {
// if change takes place over less than a fraction of a sample treat as discontinuity
//
// otherwise the slope computation can blow up to arbitrarily large and we
// try to generate a huge BLAMP and the result is wrong.
//
// why does this happen if the math is right? i believe if done perfectly,
// the two BLAMPs on either side of the slope would cancel out, but our
// BLAMPs have only limited sub-sample precision and limited integration
// accuracy. or maybe it's just the math blowing up w/ floating point precision
// limits as we try to make x * (1/x) cancel out
//
// min_len verified artifact-free even near nyquist with only oversample=4
vert[j+1].t = vert[j].t;
}
}
if (vert[8].t != 1.0f) {
// if the above fixup moved the endpoint away from 1.0, move it back,
// along with any other vertices that got moved to the same time
float t = vert[8].t;
for (j=5; j <= 8; ++j)
if (vert[j].t == t)
vert[j].t = 1.0f;
}
// compute the exact slopes from the final fixed-up positions
for (j=0; j < 8; ++j)
if (vert[j+1].t == vert[j].t)
vert[j].s = 0;
else
vert[j].s = (vert[j+1].v - vert[j].v) / (vert[j+1].t - vert[j].t);
// wraparound at end
vert[8].t = 1;
vert[8].v = vert[0].v;
vert[8].s = vert[0].s;
}
STB_HEXWAVE_DEF void hexwave_generate_samples(float *output, int num_samples, HexWave *hex, float freq)
{
hexvert vert[9];
int pass,i,j;
float t = hex->t;
float temp_output[2*STB_HEXWAVE_MAX_BLEP_LENGTH];
int buffered_length = sizeof(float)*hexblep.width;
float dt = (float) fabs(freq);
float recip_dt = (dt == 0.0f) ? 0.0f : 1.0f / dt;
int halfw = hexblep.width/2;
// all sample times are biased by halfw to leave room for BLEP/BLAMP to go back in time
if (num_samples <= 0)
return;
// convert parameters to times and slopes
hexwave_generate_linesegs(vert, hex, dt);
if (hex->prev_dt != dt) {
// if frequency changes, add a fixup at the derivative discontinuity starting at now
float slope;
for (j=1; j < 6; ++j)
if (t < vert[j].t)
break;
slope = vert[j].s;
if (slope != 0)
hex_blamp(output, 0, (dt - hex->prev_dt)*slope);
hex->prev_dt = dt;
}
// copy the buffered data from last call and clear the rest of the output array
memset(output, 0, sizeof(float)*num_samples);
memset(temp_output, 0, 2*hexblep.width*sizeof(float));
if (num_samples >= hexblep.width) {
memcpy(output, hex->buffer, buffered_length);
} else {
// if the output is shorter than hexblep.width, we do all synthesis to temp_output
memcpy(temp_output, hex->buffer, buffered_length);
}
for (pass=0; pass < 2; ++pass) {
int i0,i1;
float *out;
// we want to simulate having one buffer that is num_output + hexblep.width
// samples long, without putting that requirement on the user, and without
// allocating a temp buffer that's as long as the whole thing. so we use two
// overlapping buffers, one the user's buffer and one a fixed-length temp
// buffer.
if (pass == 0) {
if (num_samples < hexblep.width)
continue;
// run as far as we can without overwriting the end of the user's buffer
out = output;
i0 = 0;
i1 = num_samples - hexblep.width;
} else {
// generate the rest into a temp buffer
out = temp_output;
i0 = 0;
if (num_samples >= hexblep.width)
i1 = hexblep.width;
else
i1 = num_samples;
}
// determine current segment
for (j=0; j < 8; ++j)
if (t < vert[j+1].t)
break;
i = i0;
for(;;) {
while (t < vert[j+1].t) {
if (i == i1)
goto done;
out[i+halfw] += vert[j].v + vert[j].s*(t - vert[j].t);
t += dt;
++i;
}
// transition from lineseg starting at j to lineseg starting at j+1
if (vert[j].t == vert[j+1].t)
hex_blep(out+i, recip_dt*(t-vert[j+1].t), (vert[j+1].v - vert[j].v));
hex_blamp(out+i, recip_dt*(t-vert[j+1].t), dt*(vert[j+1].s - vert[j].s));
++j;
if (j == 8) {
// change to different waveform if there's a change pending
j = 0;
t -= 1.0; // t was >= 1.f if j==8
if (hex->have_pending) {
float prev_s0 = vert[j].s;
float prev_v0 = vert[j].v;
hex->current = hex->pending;
hex->have_pending = 0;
hexwave_generate_linesegs(vert, hex, dt);
// the following never occurs with this oscillator, but it makes
// the code work in more general cases
if (vert[j].v != prev_v0)
hex_blep (out+i, recip_dt*t, (vert[j].v - prev_v0));
if (vert[j].s != prev_s0)
hex_blamp(out+i, recip_dt*t, dt*(vert[j].s - prev_s0));
}
}
}
done:
;
}
// at this point, we've written output[] and temp_output[]
if (num_samples >= hexblep.width) {
// the first half of temp[] overlaps the end of output, the second half will be the new start overlap
for (i=0; i < hexblep.width; ++i)
output[num_samples-hexblep.width + i] += temp_output[i];
memcpy(hex->buffer, temp_output+hexblep.width, buffered_length);
} else {
for (i=0; i < num_samples; ++i)
output[i] = temp_output[i];
memcpy(hex->buffer, temp_output+num_samples, buffered_length);
}
hex->t = t;
}
STB_HEXWAVE_DEF void hexwave_shutdown(float *user_buffer)
{
#ifndef STB_HEXWAVE_NO_ALLOCATION
if (user_buffer != 0) {
free(hexblep.blep);
free(hexblep.blamp);
}
#endif
}
// buffer should be NULL or must be 4*(width*(oversample+1)*2 +
STB_HEXWAVE_DEF void hexwave_init(int width, int oversample, float *user_buffer)
{
int halfwidth = width/2;
int half = halfwidth*oversample;
int blep_buffer_count = width*(oversample+1);
int n = 2*half+1;
#ifdef STB_HEXWAVE_NO_ALLOCATION
float *buffers = user_buffer;
#else
float *buffers = user_buffer ? user_buffer : (float *) malloc(sizeof(float) * n * 2);
#endif
float *step = buffers+0*n;
float *ramp = buffers+1*n;
float *blep_buffer, *blamp_buffer;
double integrate_impulse=0, integrate_step=0;
int i,j;
if (width > STB_HEXWAVE_MAX_BLEP_LENGTH)
width = STB_HEXWAVE_MAX_BLEP_LENGTH;
if (user_buffer == 0) {
#ifndef STB_HEXWAVE_NO_ALLOCATION
blep_buffer = (float *) malloc(sizeof(float)*blep_buffer_count);
blamp_buffer = (float *) malloc(sizeof(float)*blep_buffer_count);
#endif
} else {
blep_buffer = ramp+n;
blamp_buffer = blep_buffer + blep_buffer_count;
}
// compute BLEP and BLAMP by integerating windowed sinc
for (i=0; i < n; ++i) {
for (j=0; j < 16; ++j) {
float sinc_t = 3.141592f* (i-half) / oversample;
float sinc = (i==half) ? 1.0f : (float) sin(sinc_t) / (sinc_t);
float wt = 2.0f*3.1415926f * i / (n-1);
float window = (float) (0.355768 - 0.487396*cos(wt) + 0.144232*cos(2*wt) - 0.012604*cos(3*wt)); // Nuttall
double value = window * sinc;
integrate_impulse += value/16;
integrate_step += integrate_impulse/16;
}
step[i] = (float) integrate_impulse;
ramp[i] = (float) integrate_step;
}
// renormalize
for (i=0; i < n; ++i) {
step[i] = step[i] * (float) (1.0 / step[n-1]); // step needs to reach to 1.0
ramp[i] = ramp[i] * (float) (halfwidth / ramp[n-1]); // ramp needs to become a slope of 1.0 after oversampling
}
// deinterleave to allow efficient interpolation e.g. w/SIMD
for (j=0; j <= oversample; ++j) {
for (i=0; i < width; ++i) {
blep_buffer [j*width+i] = step[j+i*oversample];
blamp_buffer[j*width+i] = ramp[j+i*oversample];
}
}
// subtract out the naive waveform; note we can't do this to the raw data
// above, because we want the discontinuity to be in a different locations
// for j=0 and j=oversample (which exists to provide something to interpolate against)
for (j=0; j <= oversample; ++j) {
// subtract step
for (i=halfwidth; i < width; ++i)
blep_buffer [j*width+i] -= 1.0f;
// subtract ramp
for (i=halfwidth; i < width; ++i)
blamp_buffer[j*width+i] -= (j+i*oversample-half)*(1.0f/oversample);
}
hexblep.blep = blep_buffer;
hexblep.blamp = blamp_buffer;
hexblep.width = width;
hexblep.oversample = oversample;
#ifndef STB_HEXWAVE_NO_ALLOCATION
if (user_buffer == 0)
free(buffers);
#endif
}
#endif // STB_HEXWAVE_IMPLEMENTATION