fix macOS build (following Projucer changes made in Windows, which removed /Applications/JUCE/modules from its headers). move JUCE headers under source control, so that Windows and macOS can both build against same version of JUCE. remove AUv3 target (I think it's an iOS thing, so it will never work with this macOS fluidsynth dylib).

2018-06-17 13:34:53 +01:00
parent a2be47c887
commit dff4d13a1d
1563 changed files with 601601 additions and 3466 deletions
--- a/modules/juce_dsp/containers/juce_AudioBlock.h
+++ b/modules/juce_dsp/containers/juce_AudioBlock.h
@ -0,0 +1,696 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+#ifndef DOXYGEN
+namespace SampleTypeHelpers // Internal classes needed for handling sample type classes
+{
+    template <typename Container> struct ElementType { using Type = typename Container::value_type; };
+    template <> struct ElementType<float>            { using Type = float;  };
+    template <> struct ElementType<double>           { using Type = double; };
+    template <> struct ElementType<long double>      { using Type = long double; };
+}
+#endif
+
+//==============================================================================
+/**
+    Minimal and lightweight data-structure which contains a list of pointers to
+    channels containing some kind of sample data.
+
+    This class doesn't own any of the data which it points to, it's simply a view
+    into data that is owned elsewhere. You can construct one from some raw data
+    that you've allocated yourself, or give it a HeapBlock to use, or give it
+    an AudioBuffer which it can refer to, but in all cases the user is
+    responsible for making sure that the data doesn't get deleted while there's
+    still an AudioBlock using it.
+
+    @tags{DSP}
+*/
+template <typename SampleType>
+class AudioBlock
+{
+public:
+    //==============================================================================
+    using NumericType = typename SampleTypeHelpers::ElementType<SampleType>::Type;
+
+    //==============================================================================
+    /** Create a zero-sized AudioBlock. */
+    forcedinline AudioBlock() noexcept {}
+
+    /** Creates an AudioBlock from a pointer to an array of channels.
+        AudioBlock does not copy nor own the memory pointed to by dataToUse.
+        Therefore it is the user's responsibility to ensure that the memory is retained
+        throughout the life-time of the AudioBlock and released when no longer needed.
+    */
+    forcedinline AudioBlock (SampleType* const* channelData,
+                             size_t numberOfChannels, size_t numberOfSamples) noexcept
+        : channels (channelData),
+          numChannels (static_cast<ChannelCountType> (numberOfChannels)),
+          numSamples (numberOfSamples)
+    {
+    }
+
+    /** Creates an AudioBlock from a pointer to an array of channels.
+        AudioBlock does not copy nor own the memory pointed to by dataToUse.
+        Therefore it is the user's responsibility to ensure that the memory is retained
+        throughout the life-time of the AudioBlock and released when no longer needed.
+    */
+    forcedinline AudioBlock (SampleType* const* channelData, size_t numberOfChannels,
+                             size_t startSampleIndex, size_t numberOfSamples) noexcept
+        : channels (channelData),
+          numChannels (static_cast<ChannelCountType> (numberOfChannels)),
+          startSample (startSampleIndex),
+          numSamples (numberOfSamples)
+    {
+    }
+
+    /** Allocates a suitable amount of space in a HeapBlock, and initialises this object
+        to point into it.
+        The HeapBlock must of course not be freed or re-allocated while this object is still in
+        use, because it will be referencing its data.
+    */
+    AudioBlock (HeapBlock<char>& heapBlockToUseForAllocation,
+                size_t numberOfChannels, size_t numberOfSamples,
+                size_t alignmentInBytes = defaultAlignment) noexcept
+        : numChannels (static_cast<ChannelCountType> (numberOfChannels)),
+          numSamples (numberOfSamples)
+    {
+        auto roundedUpNumSamples = (numberOfSamples + elementMask) & ~elementMask;
+        auto channelSize = sizeof (SampleType) * roundedUpNumSamples;
+        auto channelListBytes = sizeof (SampleType*) * numberOfChannels;
+        auto extraBytes = alignmentInBytes - 1;
+
+        heapBlockToUseForAllocation.malloc (channelListBytes + extraBytes + channelSize * numberOfChannels);
+
+        auto* chanArray = reinterpret_cast<SampleType**> (heapBlockToUseForAllocation.getData());
+        channels = chanArray;
+
+        auto* data = reinterpret_cast<SampleType*> (addBytesToPointer (chanArray, channelListBytes));
+        data = snapPointerToAlignment (data, alignmentInBytes);
+
+        for (ChannelCountType i = 0; i < numChannels; ++i)
+        {
+            chanArray[i] = data;
+            data += roundedUpNumSamples;
+        }
+    }
+
+    /** Creates an AudioBlock that points to the data in an AudioBuffer.
+        AudioBlock does not copy nor own the memory pointed to by dataToUse.
+        Therefore it is the user's responsibility to ensure that the buffer is retained
+        throughout the life-time of the AudioBlock without being modified.
+    */
+    AudioBlock (AudioBuffer<SampleType>& buffer) noexcept
+        : channels (buffer.getArrayOfWritePointers()),
+          numChannels (static_cast<ChannelCountType> (buffer.getNumChannels())),
+          numSamples (static_cast<size_t> (buffer.getNumSamples()))
+    {
+    }
+
+    /** Creates an AudioBlock that points to the data in an AudioBuffer.
+        AudioBlock does not copy nor own the memory pointed to by dataToUse.
+        Therefore it is the user's responsibility to ensure that the buffer is retained
+        throughout the life-time of the AudioBlock without being modified.
+    */
+    AudioBlock (AudioBuffer<SampleType>& buffer, size_t startSampleIndex) noexcept
+        : channels (buffer.getArrayOfWritePointers()),
+          numChannels (static_cast<ChannelCountType> (buffer.getNumChannels())),
+          startSample (startSampleIndex),
+          numSamples (static_cast<size_t> (buffer.getNumSamples()))
+    {
+        jassert (startSample < numSamples);
+    }
+
+    AudioBlock (const AudioBlock& other) noexcept = default;
+    AudioBlock& operator= (const AudioBlock& other) noexcept = default;
+
+    //==============================================================================
+    forcedinline size_t getNumSamples() const noexcept           { return numSamples; }
+    forcedinline size_t getNumChannels() const noexcept          { return static_cast<size_t> (numChannels); }
+
+    /** Returns a raw pointer into one of the channels in this block. */
+    forcedinline const SampleType* getChannelPointer (size_t channel) const noexcept
+    {
+        jassert (channel < numChannels);
+        jassert (numSamples > 0);
+        return channels[channel] + startSample;
+    }
+
+    /** Returns a raw pointer into one of the channels in this block. */
+    forcedinline SampleType* getChannelPointer (size_t channel) noexcept
+    {
+        jassert (channel < numChannels);
+        jassert (numSamples > 0);
+        return channels[channel] + startSample;
+    }
+
+    /** Returns an AudioBlock that represents one of the channels in this block. */
+    forcedinline AudioBlock getSingleChannelBlock (size_t channel) const noexcept
+    {
+        jassert (channel < numChannels);
+        return AudioBlock (channels + channel, 1, startSample, numSamples);
+    }
+
+    /** Returns a subset of continguous channels
+        @param channelStart       First channel of the subset
+        @param numChannelsToUse   Count of channels in the subset
+    */
+    forcedinline AudioBlock getSubsetChannelBlock (size_t channelStart, size_t numChannelsToUse) noexcept
+    {
+        jassert (channelStart < numChannels);
+        jassert ((channelStart + numChannelsToUse) <= numChannels);
+
+        return AudioBlock (channels + channelStart, numChannelsToUse, startSample, numSamples);
+    }
+
+    /** Returns a sample from the buffer.
+        The channel and index are not checked - they are expected to be in-range. If not,
+        an assertion will be thrown, but in a release build, you're into 'undefined behaviour'
+        territory.
+    */
+    SampleType getSample (int channel, int sampleIndex) const noexcept
+    {
+        jassert (isPositiveAndBelow (channel, numChannels));
+        jassert (isPositiveAndBelow (sampleIndex, numSamples));
+        return channels[channel][startSample + sampleIndex];
+    }
+
+    /** Modifies a sample in the buffer.
+        The channel and index are not checked - they are expected to be in-range. If not,
+        an assertion will be thrown, but in a release build, you're into 'undefined behaviour'
+        territory.
+    */
+    void setSample (int destChannel, int destSample, SampleType newValue) noexcept
+    {
+        jassert (isPositiveAndBelow (destChannel, numChannels));
+        jassert (isPositiveAndBelow (destSample, numSamples));
+        channels[destChannel][startSample + destSample] = newValue;
+    }
+
+    /** Adds a value to a sample in the buffer.
+        The channel and index are not checked - they are expected to be in-range. If not,
+        an assertion will be thrown, but in a release build, you're into 'undefined behaviour'
+        territory.
+    */
+    void addSample (int destChannel, int destSample, SampleType valueToAdd) noexcept
+    {
+        jassert (isPositiveAndBelow (destChannel, numChannels));
+        jassert (isPositiveAndBelow (destSample, numSamples));
+        channels[destChannel][startSample + destSample] += valueToAdd;
+    }
+
+    //==============================================================================
+    /** Clear the memory described by this AudioBlock. */
+    forcedinline AudioBlock& clear() noexcept
+    {
+        auto n = static_cast<int> (numSamples * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::clear (channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Fill memory with value. */
+    forcedinline AudioBlock& JUCE_VECTOR_CALLTYPE fill (SampleType value) noexcept
+    {
+        auto n = static_cast<int> (numSamples * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::fill (channelPtr (ch), value, n);
+
+        return *this;
+    }
+
+    /** Copy the values in src to the receiver. */
+    forcedinline AudioBlock& copy (AudioBlock src) noexcept
+    {
+        auto maxChannels = jmin (src.numChannels, numChannels);
+        auto n = static_cast<int> (jmin (src.numSamples, numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < maxChannels; ++ch)
+            FloatVectorOperations::copy (channelPtr (ch), src.channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Copy the values from a JUCE's AudioBuffer to the receiver.
+
+        All indices and sizes are in the receiver's units, i.e. if SampleType is a
+        SIMDRegister then incrementing srcPos by one will increase the sample position
+        in the AudioBuffer's units by a factor of SIMDRegister<SampleType>::SIMDNumElements.
+    */
+    forcedinline AudioBlock& copyFrom (const AudioBuffer<NumericType>& src, size_t srcPos = 0, size_t dstPos = 0,
+                                       size_t numElements = std::numeric_limits<size_t>::max())
+    {
+        auto srclen = static_cast<size_t> (src.getNumSamples()) / sizeFactor;
+        auto n = static_cast<int> (jmin (srclen - srcPos, numSamples - dstPos, numElements) * sizeFactor);
+        auto maxChannels = jmin (static_cast<size_t> (src.getNumChannels()), static_cast<size_t> (numChannels));
+
+        for (size_t ch = 0; ch < maxChannels; ++ch)
+            FloatVectorOperations::copy (channelPtr (ch),
+                                         src.getReadPointer (static_cast<int> (ch),
+                                                             static_cast<int> (srcPos * sizeFactor)),
+                                         n);
+
+        return *this;
+    }
+
+    /** Copy the values from the receiver to a JUCE's AudioBuffer.
+
+        All indices and sizes are in the receiver's units, i.e. if SampleType is a
+        SIMDRegister then incrementing dstPos by one will increase the sample position
+        in the AudioBuffer's units by a factor of SIMDRegister<SampleType>::SIMDNumElements.
+    */
+    forcedinline const AudioBlock& copyTo (AudioBuffer<NumericType>& dst, size_t srcPos = 0, size_t dstPos = 0,
+                                           size_t numElements = std::numeric_limits<size_t>::max()) const
+    {
+        auto dstlen = static_cast<size_t> (dst.getNumSamples()) / sizeFactor;
+        auto n = static_cast<int> (jmin (numSamples - srcPos, dstlen - dstPos, numElements) * sizeFactor);
+        auto maxChannels = jmin (static_cast<size_t> (dst.getNumChannels()), static_cast<size_t> (numChannels));
+
+        for (size_t ch = 0; ch < maxChannels; ++ch)
+            FloatVectorOperations::copy (dst.getWritePointer (static_cast<int> (ch),
+                                                              static_cast<int> (dstPos * sizeFactor)),
+                                         channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Move memory within the receiver from the position srcPos to the position dstPos.
+        If numElements is not specified then move will move the maximum amount of memory.
+    */
+    forcedinline AudioBlock& move (size_t srcPos, size_t dstPos,
+                                   size_t numElements = std::numeric_limits<size_t>::max()) noexcept
+    {
+        jassert (srcPos <= numSamples && dstPos <= numSamples);
+        auto len = jmin (numSamples - srcPos, numSamples - dstPos, numElements) * sizeof (SampleType);
+
+        if (len != 0)
+            for (size_t ch = 0; ch < numChannels; ++ch)
+                ::memmove (getChannelPointer (ch) + dstPos,
+                           getChannelPointer (ch) + srcPos, len);
+
+        return *this;
+    }
+
+    //==============================================================================
+    /** Return a new AudioBlock pointing to a sub-block inside the receiver. This
+        function does not copy the memory and you must ensure that the original memory
+        pointed to by the receiver remains valid through-out the life-time of the
+        returned sub-block.
+
+        @param newOffset   The index of an element inside the reciever which will
+                           will become the first element of the return value.
+        @param newLength   The number of elements of the newly created sub-block.
+    */
+    inline AudioBlock getSubBlock (size_t newOffset, size_t newLength) const noexcept
+    {
+        jassert (newOffset < numSamples);
+        jassert (newOffset + newLength <= numSamples);
+
+        return AudioBlock (channels, numChannels, startSample + newOffset, newLength);
+    }
+
+    /** Return a new AudioBlock pointing to a sub-block inside the receiver. This
+        function does not copy the memory and you must ensure that the original memory
+        pointed to by the receiver remains valid through-out the life-time of the
+        returned sub-block.
+
+        @param newOffset   The index of an element inside the reciever which will
+                           will become the first element of the return value.
+                           The return value will include all subsequent elements
+                           of the receiver.
+    */
+    inline AudioBlock getSubBlock (size_t newOffset) const noexcept
+    {
+        return getSubBlock (newOffset, getNumSamples() - newOffset);
+    }
+
+    //==============================================================================
+    /** Adds a fixed value to the receiver. */
+    forcedinline AudioBlock& JUCE_VECTOR_CALLTYPE add (SampleType value) noexcept
+    {
+        auto n = static_cast<int> (numSamples * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::add (channelPtr (ch), value, n);
+
+        return *this;
+    }
+
+    /** Adds the source values to the receiver. */
+    forcedinline AudioBlock& add (AudioBlock src) noexcept
+    {
+        jassert (numChannels == src.numChannels);
+        auto n = static_cast<int> (jmin (numSamples, src.numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::add (channelPtr (ch), src.channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Adds a fixed value to each source value and stores it in the destination array of the receiver. */
+    forcedinline AudioBlock& JUCE_VECTOR_CALLTYPE add (AudioBlock src, SampleType value) noexcept
+    {
+        jassert (numChannels == src.numChannels);
+        auto n = static_cast<int> (jmin (numSamples, src.numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::add (channelPtr (ch), src.channelPtr (ch), value, n);
+
+        return *this;
+    }
+
+    /** Adds each source1 value to the corresponding source2 value and stores it in the destination array of the receiver. */
+    forcedinline AudioBlock& add (AudioBlock src1, AudioBlock src2) noexcept
+    {
+        jassert (numChannels == src1.numChannels && src1.numChannels == src2.numChannels);
+        auto n = static_cast<int> (jmin (numSamples, src1.numSamples, src2.numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::add (channelPtr (ch), src1.channelPtr (ch), src2.getChannelPointer (ch), n);
+
+        return *this;
+    }
+
+    /** Subtracts a fixed value from the receiver. */
+    forcedinline AudioBlock& JUCE_VECTOR_CALLTYPE subtract (SampleType value) noexcept
+    {
+        return add (value * static_cast<SampleType> (-1.0));
+    }
+
+    /** Subtracts the source values from the receiver. */
+    forcedinline AudioBlock& subtract (AudioBlock src) noexcept
+    {
+        jassert (numChannels == src.numChannels);
+        auto n = static_cast<int> (jmin (numSamples, src.numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::subtract (channelPtr (ch), src.channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Subtracts a fixed value from each source value and stores it in the destination array of the receiver. */
+    forcedinline AudioBlock& JUCE_VECTOR_CALLTYPE subtract (AudioBlock src, SampleType value) noexcept
+    {
+        return add (src, static_cast<SampleType> (-1.0) * value);
+    }
+
+    /** Subtracts each source2 value from the corresponding source1 value and stores it in the destination array of the receiver. */
+    forcedinline AudioBlock& subtract (AudioBlock src1, AudioBlock src2) noexcept
+    {
+        jassert (numChannels == src1.numChannels && src1.numChannels == src2.numChannels);
+        auto n = static_cast<int> (jmin (numSamples, src1.numSamples, src2.numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::subtract (channelPtr (ch), src1.channelPtr (ch), src2.channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Multiplies a fixed value to the receiver. */
+    forcedinline AudioBlock& JUCE_VECTOR_CALLTYPE multiply (SampleType value) noexcept
+    {
+        auto n = static_cast<int> (numSamples * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::multiply (channelPtr (ch), value, n);
+
+        return *this;
+    }
+
+    /** Multiplies the source values to the receiver. */
+    forcedinline AudioBlock& multiply (AudioBlock src) noexcept
+    {
+        jassert (numChannels == src.numChannels);
+        auto n = static_cast<int> (jmin (numSamples, src.numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::multiply (channelPtr (ch), src.channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Multiplies a fixed value to each source value and stores it in the destination array of the receiver. */
+    forcedinline AudioBlock& JUCE_VECTOR_CALLTYPE multiply (AudioBlock src, SampleType value) noexcept
+    {
+        jassert (numChannels == src.numChannels);
+        auto n = static_cast<int> (jmin (numSamples, src.numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::multiply (channelPtr (ch), src.channelPtr (ch), value, n);
+
+        return *this;
+    }
+
+    /** Multiplies each source1 value to the corresponding source2 value and stores it in the destination array of the receiver. */
+    forcedinline AudioBlock& multiply (AudioBlock src1, AudioBlock src2) noexcept
+    {
+        jassert (numChannels == src1.numChannels && src1.numChannels == src2.numChannels);
+        auto n = static_cast<int> (jmin (numSamples, src1.numSamples, src2.numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::multiply (channelPtr (ch), src1.channelPtr (ch), src2.channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Multiplies all channels of the AudioBlock by a smoothly changing value and stores them . */
+    AudioBlock& multiply (LinearSmoothedValue<SampleType>& value) noexcept
+    {
+        if (! value.isSmoothing())
+        {
+            *this *= value.getTargetValue();
+        }
+        else
+        {
+            for (size_t i = 0; i < numSamples; ++i)
+            {
+                const auto scaler = value.getNextValue();
+
+                for (size_t ch = 0; ch < numChannels; ++ch)
+                    channelPtr (ch)[i] *= scaler;
+            }
+        }
+
+        return *this;
+    }
+
+    /** Multiplies all channels of the source by a smoothly changing value and stores them in the receiver. */
+    AudioBlock& multiply (AudioBlock src, LinearSmoothedValue<SampleType>& value) noexcept
+    {
+        jassert (numChannels == src.numChannels);
+
+        if (! value.isSmoothing())
+        {
+            copy (src);
+        }
+        else
+        {
+            auto n = jmin (numSamples, src.numSamples) * sizeFactor;
+
+            for (size_t i = 0; i < n; ++i)
+            {
+                const auto scaler = value.getNextValue();
+
+                for (size_t ch = 0; ch < numChannels; ++ch)
+                    channelPtr (ch)[i] = scaler * src.getChannelPointer (ch)[i];
+            }
+        }
+
+        return *this;
+    }
+
+    /** Multiplies each value in src with factor and adds the result to the receiver. */
+    forcedinline AudioBlock& JUCE_VECTOR_CALLTYPE addWithMultiply (AudioBlock src, SampleType factor) noexcept
+    {
+        jassert (numChannels == src.numChannels);
+        auto n = static_cast<int> (jmin (numSamples, src.numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::addWithMultiply (channelPtr (ch), src.channelPtr (ch), factor, n);
+
+        return *this;
+    }
+
+    /** Multiplies each value in srcA with the corresponding value in srcB and adds the result to the receiver. */
+    forcedinline AudioBlock& addWithMultiply (AudioBlock src1, AudioBlock src2) noexcept
+    {
+        jassert (numChannels == src1.numChannels && src1.numChannels == src2.numChannels);
+        auto n = static_cast<int> (jmin (numSamples, src1.numSamples, src2.numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::addWithMultiply (channelPtr (ch), src1.channelPtr (ch), src2.channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Negates each value of the receiver. */
+    forcedinline AudioBlock& negate() noexcept
+    {
+        return multiply (static_cast<SampleType> (-1.0));
+    }
+
+    /** Negates each value of source and stores it in the receiver. */
+    forcedinline AudioBlock& replaceWithNegativeOf (AudioBlock src) noexcept
+    {
+        jassert (numChannels == src.numChannels);
+        auto n = static_cast<int> (jmin (numSamples, src.numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::negate (channelPtr (ch), src.channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Takes the absolute value of each element of src and stores it inside the receiver. */
+    forcedinline AudioBlock& replaceWithAbsoluteValueOf (AudioBlock src) noexcept
+    {
+        jassert (numChannels == src.numChannels);
+        auto n = static_cast<int> (jmin (numSamples, src.numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::abs (channelPtr (ch), src.channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Each element of receiver will be the minimum of the corresponding element of the source arrays. */
+    forcedinline AudioBlock& min (AudioBlock src1, AudioBlock src2) noexcept
+    {
+        jassert (numChannels == src1.numChannels && src1.numChannels == src2.numChannels);
+        auto n = static_cast<int> (jmin (src1.numSamples, src2.numSamples, numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::min (channelPtr (ch), src1.channelPtr (ch), src2.channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Each element of the receiver will be the maximum of the corresponding element of the source arrays. */
+    forcedinline AudioBlock& max (AudioBlock src1, AudioBlock src2) noexcept
+    {
+        jassert (numChannels == src1.numChannels && src1.numChannels == src2.numChannels);
+        auto n = static_cast<int> (jmin (src1.numSamples, src2.numSamples, numSamples) * sizeFactor);
+
+        for (size_t ch = 0; ch < numChannels; ++ch)
+            FloatVectorOperations::max (channelPtr (ch), src1.channelPtr (ch), src2.channelPtr (ch), n);
+
+        return *this;
+    }
+
+    /** Finds the minimum and maximum value of the buffer. */
+    forcedinline Range<NumericType> findMinAndMax() const noexcept
+    {
+        if (numChannels == 0)
+            return {};
+
+        auto n = static_cast<int> (numSamples * sizeFactor);
+        auto minmax = FloatVectorOperations::findMinAndMax (channelPtr (0), n);
+
+        for (size_t ch = 1; ch < numChannels; ++ch)
+            minmax = minmax.getUnionWith (FloatVectorOperations::findMinAndMax (channelPtr (ch), n));
+
+        return minmax;
+    }
+
+    //==============================================================================
+    // convenient operator wrappers
+    forcedinline AudioBlock& JUCE_VECTOR_CALLTYPE operator+= (SampleType src) noexcept   { return add (src); }
+    forcedinline AudioBlock&                      operator+= (AudioBlock src) noexcept   { return add (src); }
+    forcedinline AudioBlock& JUCE_VECTOR_CALLTYPE operator-= (SampleType src) noexcept   { return subtract (src); }
+    forcedinline AudioBlock&                      operator-= (AudioBlock src) noexcept   { return subtract (src); }
+    forcedinline AudioBlock& JUCE_VECTOR_CALLTYPE operator*= (SampleType src) noexcept   { return multiply (src); }
+    forcedinline AudioBlock&                      operator*= (AudioBlock src) noexcept   { return multiply (src); }
+    forcedinline AudioBlock&                      operator*= (LinearSmoothedValue<SampleType>& value) noexcept   { return multiply (value); }
+
+    //==============================================================================
+    // This class can only be used with floating point types
+    static_assert (std::is_same<SampleType, float>::value
+                    || std::is_same<SampleType, double>::value
+                  #if JUCE_USE_SIMD
+                    || std::is_same<SampleType, SIMDRegister<float>>::value
+                    || std::is_same<SampleType, SIMDRegister<double>>::value
+                  #endif
+                   , "AudioBlock only supports single or double precision floating point types");
+
+    //==============================================================================
+    /** Applies a function to each value in an input block, putting the result into an output block.
+        The function supplied must take a SampleType as its parameter, and return a SampleType.
+        The two blocks must have the same number of channels and samples.
+    */
+    template <typename FunctionType>
+    static void process (AudioBlock inBlock, AudioBlock outBlock, FunctionType&& function)
+    {
+        auto len = inBlock.getNumSamples();
+        auto numChans = inBlock.getNumChannels();
+
+        jassert (len == outBlock.getNumSamples());
+        jassert (numChans == outBlock.getNumChannels());
+
+        for (ChannelCountType c = 0; c < numChans; ++c)
+        {
+            auto* src = inBlock.getChannelPointer (c);
+            auto* dst = outBlock.getChannelPointer (c);
+
+            for (size_t i = 0; i < len; ++i)
+                dst[i] = function (src[i]);
+        }
+    }
+
+private:
+    //==============================================================================
+    NumericType*       channelPtr (size_t ch) noexcept          { return reinterpret_cast<NumericType*>       (getChannelPointer (ch)); }
+    const NumericType* channelPtr (size_t ch) const noexcept    { return reinterpret_cast<const NumericType*> (getChannelPointer (ch)); }
+
+    //==============================================================================
+    using ChannelCountType = unsigned int;
+
+    //==============================================================================
+    static constexpr size_t sizeFactor    = sizeof (SampleType) / sizeof (NumericType);
+    static constexpr size_t elementMask   = sizeFactor - 1;
+    static constexpr size_t byteMask      = (sizeFactor * sizeof (NumericType)) - 1;
+
+   #if JUCE_USE_SIMD
+    static constexpr size_t defaultAlignment = sizeof (SIMDRegister<NumericType>);
+   #else
+    static constexpr size_t defaultAlignment = sizeof (NumericType);
+   #endif
+
+    SampleType* const* channels;
+    ChannelCountType numChannels = 0;
+    size_t startSample = 0, numSamples = 0;
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/containers/juce_SIMDRegister.h
+++ b/modules/juce_dsp/containers/juce_SIMDRegister.h
@ -0,0 +1,399 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+#ifndef DOXYGEN
+ // This class is needed internally.
+ template <typename Scalar>
+ struct CmplxSIMDOps;
+#endif
+
+//==============================================================================
+/**
+    A wrapper around the platform's native SIMD register type.
+
+    This class is only availabe on SIMD machines. Use JUCE_USE_SIMD to query
+    if SIMD is avaialble for your system.
+
+    SIMDRegister<Type> is a templated class representing the native
+    vectorized version of FloatingType. SIMDRegister supports all numerical
+    primitive types and std:complex<float> and std::complex<double> supports
+    and most operations of the corresponding primitive
+    type. Additionally, SIMDRegister can be accessed like an array to extract
+    the individual elements.
+
+    If you are using SIMDRegister as a pointer, then you must ensure that the
+    memory is suffeciently aligned for SIMD vector operations. Failing to do so
+    will result in crashes or very slow code. Use SIMDRegister::isSIMDAligned
+    to query if a pointer is suffeciently aligned for SIMD vector operations.
+
+    Note that using SIMDRegister without enabling optimizations will result
+    in code with very poor performance.
+
+    @tags{DSP}
+*/
+template <typename Type>
+struct SIMDRegister
+{
+    //==============================================================================
+    /** The type that represents the individual constituents of the SIMD Register */
+    using ElementType = Type;
+
+    /** STL compatible value_type definition (same as ElementType). */
+    using value_type = ElementType;
+
+    /** The corresponding primitive integer type, for example, this will be int32_t
+        if type is a float. */
+    using MaskType = typename SIMDInternal::MaskTypeFor<ElementType>::type;
+
+    //==============================================================================
+    // Here are some types which are needed internally
+
+    /** The native primitive type (used internally). */
+    using PrimitiveType = typename SIMDInternal::PrimitiveType<ElementType>::type;
+
+    /** The native operations for this platform and type combination (used internally) */
+    using NativeOps = SIMDNativeOps<PrimitiveType>;
+
+    /** The native type (used internally). */
+    using vSIMDType = typename NativeOps::vSIMDType;
+
+    /** The corresponding integer SIMDRegister type (used internally). */
+    using vMaskType = SIMDRegister<MaskType>;
+
+    /** The internal native type for the corresponding mask type (used internally). */
+    using vMaskSIMDType = typename vMaskType::vSIMDType;
+
+    /** Wrapper for operations which need to be handled differently for complex
+        and scalar types (used internally). */
+    using CmplxOps = CmplxSIMDOps<ElementType>;
+
+    /** Type which is returned when using the subscript operator. The returned type
+        should be used just like the type ElementType. */
+    struct ElementAccess;
+
+    //==============================================================================
+    /** The size in bytes of this register. */
+    static constexpr size_t SIMDRegisterSize = sizeof (vSIMDType);
+
+    /** The number of elements that this vector can hold. */
+    static constexpr size_t SIMDNumElements = SIMDRegisterSize / sizeof (ElementType);
+
+    vSIMDType value;
+
+    /** Default constructor. */
+    inline SIMDRegister() noexcept {}
+
+    /** Constructs an object from the native SIMD type. */
+    inline SIMDRegister (vSIMDType a) noexcept : value (a) {}
+
+    /** Constructs an object from a scalar type by broadcasting it to all elements. */
+    inline SIMDRegister (Type s) noexcept  { *this = s; }
+
+    /** Destrutor. */
+    inline ~SIMDRegister() noexcept {}
+
+    //==============================================================================
+    /** Returns the number of elements in this vector. */
+    static constexpr size_t size() noexcept    { return SIMDNumElements; }
+
+    //==============================================================================
+    /** Creates a new SIMDRegister from the corresponding scalar primitive.
+        The scalar is extended to all elements of the vector. */
+    inline static SIMDRegister JUCE_VECTOR_CALLTYPE expand (ElementType s) noexcept         { return {CmplxOps::expand (s)}; }
+
+    /** Creates a new SIMDRegister from the internal SIMD type (for example
+        __mm128 for single-precision floating point on SSE architectures). */
+    inline static SIMDRegister JUCE_VECTOR_CALLTYPE fromNative (vSIMDType a) noexcept       { return {a}; }
+
+    /** Creates a new SIMDRegister from the first SIMDNumElements of a scalar array. */
+    inline static SIMDRegister JUCE_VECTOR_CALLTYPE fromRawArray (const ElementType* a) noexcept
+    {
+        jassert (isSIMDAligned (a));
+        return {CmplxOps::load (a)};
+    }
+
+    /** Copies the elements of the SIMDRegister to a scalar array in memory. */
+    inline void JUCE_VECTOR_CALLTYPE copyToRawArray (ElementType* a) const noexcept
+    {
+        jassert (isSIMDAligned (a));
+        CmplxOps::store (value, a);
+    }
+
+    //==============================================================================
+    /** Returns the idx-th element of the receiver. Note that this does not check if idx
+        is larger than the native register size. */
+    inline ElementType JUCE_VECTOR_CALLTYPE get (size_t idx) const noexcept
+    {
+        jassert (idx < SIMDNumElements);
+        return CmplxOps::get (value, idx);
+    }
+
+    /** Sets the idx-th element of the receiver. Note that this does not check if idx
+        is larger than the native register size. */
+    inline void JUCE_VECTOR_CALLTYPE set (size_t idx, ElementType v) noexcept
+    {
+        jassert (idx < SIMDNumElements);
+        value = CmplxOps::set (value, idx, v);
+    }
+
+    //==============================================================================
+    /** Returns the idx-th element of the receiver. Note that this does not check if idx
+        is larger than the native register size. */
+    inline ElementType JUCE_VECTOR_CALLTYPE operator[] (size_t idx) const noexcept
+    {
+        return get (idx);
+    }
+
+    /** Returns the idx-th element of the receiver. Note that this does not check if idx
+        is larger than the native register size. */
+    inline ElementAccess JUCE_VECTOR_CALLTYPE operator[] (size_t idx) noexcept
+    {
+        jassert (idx < SIMDNumElements);
+        return ElementAccess (*this, idx);
+    }
+
+    //==============================================================================
+    /** Adds another SIMDRegister to the receiver. */
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator+= (SIMDRegister v) noexcept      { value = NativeOps::add (value, v.value); return *this; }
+
+    /** Subtracts another SIMDRegister to the receiver. */
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator-= (SIMDRegister v) noexcept      { value = NativeOps::sub (value, v.value); return *this; }
+
+    /** Multiplies another SIMDRegister to the receiver. */
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator*= (SIMDRegister v) noexcept      { value = CmplxOps::mul (value, v.value); return *this; }
+
+    //==============================================================================
+    /** Broadcasts the scalar to all elements of the receiver. */
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator=  (ElementType s) noexcept       { value  = CmplxOps::expand (s); return *this; }
+
+    /** Adds a scalar to the receiver. */
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator+= (ElementType s) noexcept       { value = NativeOps::add (value, CmplxOps::expand (s)); return *this; }
+
+    /** Subtracts a scalar to the receiver. */
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator-= (ElementType s) noexcept       { value = NativeOps::sub (value, CmplxOps::expand (s)); return *this; }
+
+    /** Multiplies a scalar to the receiver. */
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator*= (ElementType s) noexcept       { value = CmplxOps::mul (value, CmplxOps::expand (s)); return *this; }
+
+    //==============================================================================
+    /** Bit-and the reciver with SIMDRegister v and store the result in the receiver. */
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator&= (vMaskType v) noexcept         { value = NativeOps::bit_and (value, toVecType (v.value)); return *this; }
+
+    /** Bit-or the reciver with SIMDRegister v and store the result in the receiver. */
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator|= (vMaskType v) noexcept         { value = NativeOps::bit_or  (value, toVecType (v.value)); return *this; }
+
+    /** Bit-xor the reciver with SIMDRegister v and store the result in the receiver. */
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator^= (vMaskType v) noexcept         { value = NativeOps::bit_xor (value, toVecType (v.value)); return *this; }
+
+    //==============================================================================
+    /** Bit-and each element of the reciver with the scalar s and store the result in the receiver.*/
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator&= (MaskType s) noexcept           { value = NativeOps::bit_and (value, toVecType (s)); return *this; }
+
+    /** Bit-or each element of the reciver with the scalar s and store the result in the receiver.*/
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator|= (MaskType s) noexcept           { value = NativeOps::bit_or  (value, toVecType (s)); return *this; }
+
+    /** Bit-xor each element of the reciver with the scalar s and store the result in the receiver.*/
+    inline SIMDRegister& JUCE_VECTOR_CALLTYPE operator^= (MaskType s) noexcept           { value = NativeOps::bit_xor (value, toVecType (s)); return *this; }
+
+    //==============================================================================
+    /** Returns the sum of the receiver and v.*/
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator+ (SIMDRegister v) const noexcept  { return { NativeOps::add (value, v.value) }; }
+
+    /** Returns the difference of the receiver and v.*/
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator- (SIMDRegister v) const noexcept  { return { NativeOps::sub (value, v.value) }; }
+
+    /** Returns the product of the receiver and v.*/
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator* (SIMDRegister v) const noexcept  { return { CmplxOps::mul (value, v.value) }; }
+
+    //==============================================================================
+    /** Returns a vector where each element is the sum of the corresponding element in the receiver and the scalar s.*/
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator+ (ElementType s) const noexcept   { return { NativeOps::add (value, CmplxOps::expand (s)) }; }
+
+    /** Returns a vector where each element is the difference of the corresponding element in the receiver and the scalar s.*/
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator- (ElementType s) const noexcept   { return { NativeOps::sub (value, CmplxOps::expand (s)) }; }
+
+    /** Returns a vector where each element is the product of the corresponding element in the receiver and the scalar s.*/
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator* (ElementType s) const noexcept   { return { CmplxOps::mul (value, CmplxOps::expand (s)) }; }
+
+    //==============================================================================
+    /** Returns the bit-and of the receiver and v. */
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator& (vMaskType v) const noexcept     { return { NativeOps::bit_and (value, toVecType (v.value)) }; }
+
+    /** Returns the bit-or of the receiver and v. */
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator| (vMaskType v) const noexcept     { return { NativeOps::bit_or  (value, toVecType (v.value)) }; }
+
+    /** Returns the bit-xor of the receiver and v. */
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator^ (vMaskType v) const noexcept     { return { NativeOps::bit_xor (value, toVecType (v.value)) }; }
+
+    /** Returns a vector where each element is the bit-inverted value of the corresponding element in the receiver.*/
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator~() const noexcept                 { return { NativeOps::bit_not (value) }; }
+
+    //==============================================================================
+    /** Returns a vector where each element is the bit-and'd value of the corresponding element in the receiver and the scalar s.*/
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator& (MaskType s) const noexcept      { return { NativeOps::bit_and (value, toVecType (s)) }; }
+
+    /** Returns a vector where each element is the bit-or'd value of the corresponding element in the receiver and the scalar s.*/
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator| (MaskType s) const noexcept      { return { NativeOps::bit_or  (value, toVecType (s)) }; }
+
+    /** Returns a vector where each element is the bit-xor'd value of the corresponding element in the receiver and the scalar s.*/
+    inline SIMDRegister JUCE_VECTOR_CALLTYPE operator^ (MaskType s) const noexcept      { return { NativeOps::bit_xor (value, toVecType (s)) }; }
+
+    //==============================================================================
+    /** Returns true if all elements-wise comparisons return true. */
+    inline bool JUCE_VECTOR_CALLTYPE operator== (SIMDRegister other) const noexcept    { return  NativeOps::allEqual (value, other.value); }
+
+    /** Returns true if any elements-wise comparisons return false. */
+    inline bool JUCE_VECTOR_CALLTYPE operator!= (SIMDRegister other) const noexcept    { return ! (*this == other); }
+
+    /** Returns true if all elements are equal to the scalar. */
+    inline bool JUCE_VECTOR_CALLTYPE operator== (Type s) const noexcept                { return *this == SIMDRegister::expand (s); }
+
+    /** Returns true if any elements are not equal to the scalar. */
+    inline bool JUCE_VECTOR_CALLTYPE operator!= (Type s) const noexcept                { return ! (*this == s); }
+
+    //==============================================================================
+    /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
+        if the corresponding element of a is equal to the corresponding element of b, or zero otherwise.
+        The result can then be used in bit operations defined above to avoid branches in vector SIMD code. */
+    static inline vMaskType JUCE_VECTOR_CALLTYPE equal              (SIMDRegister a, SIMDRegister b) noexcept { return toMaskType (NativeOps::equal (a.value, b.value)); }
+
+    /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
+        if the corresponding element of a is not equal to the corresponding element of b, or zero otherwise.
+        The result can then be used in bit operations defined above to avoid branches in vector SIMD code. */
+    static inline vMaskType JUCE_VECTOR_CALLTYPE notEqual           (SIMDRegister a, SIMDRegister b) noexcept { return toMaskType (NativeOps::notEqual (a.value, b.value)); }
+
+    /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
+        if the corresponding element of a is less than to the corresponding element of b, or zero otherwise.
+        The result can then be used in bit operations defined above to avoid branches in vector SIMD code. */
+    static inline vMaskType JUCE_VECTOR_CALLTYPE lessThan           (SIMDRegister a, SIMDRegister b) noexcept { return toMaskType (NativeOps::greaterThan (b.value, a.value)); }
+
+    /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
+        if the corresponding element of a is than or equal to the corresponding element of b, or zero otherwise.
+        The result can then be used in bit operations defined above to avoid branches in vector SIMD code. */
+    static inline vMaskType JUCE_VECTOR_CALLTYPE lessThanOrEqual    (SIMDRegister a, SIMDRegister b) noexcept { return toMaskType (NativeOps::greaterThanOrEqual (b.value, a.value)); }
+
+    /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
+        if the corresponding element of a is greater than to the corresponding element of b, or zero otherwise.
+        The result can then be used in bit operations defined above to avoid branches in vector SIMD code. */
+    static inline vMaskType JUCE_VECTOR_CALLTYPE greaterThan        (SIMDRegister a, SIMDRegister b) noexcept { return toMaskType (NativeOps::greaterThan (a.value, b.value)); }
+
+    /** Returns a SIMDRegister of the corresponding integral type where each element has each bit set
+        if the corresponding element of a is greater than or equal to the corresponding element of b, or zero otherwise.
+        The result can then be used in bit operations defined above to avoid branches in vector SIMD code. */
+    static inline vMaskType JUCE_VECTOR_CALLTYPE greaterThanOrEqual (SIMDRegister a, SIMDRegister b) noexcept { return toMaskType (NativeOps::greaterThanOrEqual (a.value, b.value)); }
+
+     //==============================================================================
+    /** Returns a new vector where each element is the minimum of the corresponding element of a and b. */
+    static inline SIMDRegister JUCE_VECTOR_CALLTYPE min (SIMDRegister a, SIMDRegister b) noexcept    { return { NativeOps::min (a.value, b.value) }; }
+
+    /** Returns a new vector where each element is the maximum of the corresponding element of a and b. */
+    static inline SIMDRegister JUCE_VECTOR_CALLTYPE max (SIMDRegister a, SIMDRegister b) noexcept    { return { NativeOps::max (a.value, b.value) }; }
+
+    //==============================================================================
+    /** Multiplies b and c and adds the result to a. */
+    static inline SIMDRegister JUCE_VECTOR_CALLTYPE multiplyAdd (SIMDRegister a, const SIMDRegister b, SIMDRegister c) noexcept
+    {
+        return { CmplxOps::muladd (a.value, b.value, c.value) };
+    }
+
+    //==============================================================================
+    /** Returns a scalar which is the sum of all elements of the receiver. */
+    inline ElementType sum() const noexcept          { return CmplxOps::sum (value); }
+
+    //==============================================================================
+    /** Checks if the given pointer is suffeciently aligned for using SIMD operations. */
+    static inline bool isSIMDAligned (const ElementType* ptr) noexcept
+    {
+        uintptr_t bitmask = SIMDRegisterSize - 1;
+        return (reinterpret_cast<uintptr_t> (ptr) & bitmask) == 0;
+    }
+
+    /** Returns the next position in memory where isSIMDAligned returns true.
+
+        If the current position in memory is already aligned then this method
+        will simply return the pointer.
+    */
+    static inline ElementType* getNextSIMDAlignedPtr (ElementType* ptr) noexcept
+    {
+        return snapPointerToAlignment (ptr, SIMDRegisterSize);
+    }
+
+   #ifndef DOXYGEN
+    static inline const ElementType* getNextSIMDAlignedPtr (const ElementType* ptr) noexcept
+    {
+        return snapPointerToAlignment (ptr, SIMDRegisterSize);
+    }
+   #endif
+
+private:
+    static inline vMaskType JUCE_VECTOR_CALLTYPE toMaskType (vSIMDType a) noexcept
+    {
+        union
+        {
+            vSIMDType in;
+            vMaskSIMDType out;
+        } u;
+
+        u.in = a;
+        return vMaskType::fromNative (u.out);
+    }
+
+    static inline vSIMDType JUCE_VECTOR_CALLTYPE toVecType (vMaskSIMDType a) noexcept
+    {
+        union
+        {
+            vMaskSIMDType in;
+            vSIMDType out;
+        } u;
+
+        u.in = a;
+        return u.out;
+    }
+
+    static inline vSIMDType JUCE_VECTOR_CALLTYPE toVecType (MaskType a) noexcept
+    {
+        union
+        {
+            vMaskSIMDType in;
+            vSIMDType out;
+        } u;
+
+        u.in = CmplxSIMDOps<MaskType>::expand (a);
+        return u.out;
+    }
+};
+
+} // namespace dsp
+} // namespace juce
+
+#ifndef DOXYGEN
+ #include "juce_SIMDRegister_Impl.h"
+#endif
--- a/modules/juce_dsp/containers/juce_SIMDRegister_Impl.h
+++ b/modules/juce_dsp/containers/juce_SIMDRegister_Impl.h
@ -0,0 +1,178 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+
+//==============================================================================
+template <typename Type>
+struct SIMDRegister<Type>::ElementAccess
+{
+    operator Type() const                                { return simd.get (idx); }
+    ElementAccess& operator= (Type scalar) noexcept      { simd.set (idx, scalar); return *this; }
+    ElementAccess& operator= (ElementAccess& o) noexcept { return operator= ((Type) o); }
+
+private:
+    friend struct SIMDRegister;
+    ElementAccess (SIMDRegister& owner, size_t index) noexcept : simd (owner), idx (index) {}
+    SIMDRegister& simd;
+    size_t idx;
+};
+
+#ifndef DOXYGEN
+//==============================================================================
+/* This class is used internally by SIMDRegister to abstract away differences
+   in operations which are different for complex and pure floating point types. */
+
+// the pure floating-point version
+template <typename Scalar>
+struct CmplxSIMDOps
+{
+    typedef typename SIMDNativeOps<Scalar>::vSIMDType vSIMDType;
+
+    static inline vSIMDType JUCE_VECTOR_CALLTYPE load (const Scalar* a) noexcept
+    {
+        return SIMDNativeOps<Scalar>::load (a);
+    }
+
+    static inline void JUCE_VECTOR_CALLTYPE store (vSIMDType value, Scalar* dest) noexcept
+    {
+        SIMDNativeOps<Scalar>::store (value, dest);
+    }
+
+    static inline vSIMDType JUCE_VECTOR_CALLTYPE expand (Scalar s) noexcept
+    {
+        return SIMDNativeOps<Scalar>::expand (s);
+    }
+
+    static inline Scalar JUCE_VECTOR_CALLTYPE get (vSIMDType v, std::size_t i) noexcept
+    {
+        return SIMDNativeOps<Scalar>::get (v, i);
+    }
+
+    static inline vSIMDType JUCE_VECTOR_CALLTYPE set (vSIMDType v, std::size_t i, Scalar s) noexcept
+    {
+        return SIMDNativeOps<Scalar>::set (v, i, s);
+    }
+
+    static inline Scalar JUCE_VECTOR_CALLTYPE sum (vSIMDType a)  noexcept
+    {
+        return SIMDNativeOps<Scalar>::sum (a);
+    }
+
+    static inline vSIMDType JUCE_VECTOR_CALLTYPE mul (vSIMDType a, vSIMDType b) noexcept
+    {
+        return SIMDNativeOps<Scalar>::mul (a, b);
+    }
+
+    static inline vSIMDType JUCE_VECTOR_CALLTYPE muladd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept
+    {
+        return SIMDNativeOps<Scalar>::multiplyAdd (a, b, c);
+    }
+};
+
+// The pure complex version
+template <typename Scalar>
+struct CmplxSIMDOps<std::complex<Scalar>>
+{
+    typedef typename SIMDNativeOps<Scalar>::vSIMDType vSIMDType;
+
+    static inline vSIMDType JUCE_VECTOR_CALLTYPE load (const std::complex<Scalar>* a) noexcept
+    {
+        return SIMDNativeOps<Scalar>::load (reinterpret_cast<const Scalar*> (a));
+    }
+
+    static inline void JUCE_VECTOR_CALLTYPE store (vSIMDType value, std::complex<Scalar>* dest) noexcept
+    {
+        SIMDNativeOps<Scalar>::store (value, reinterpret_cast<Scalar*> (dest));
+    }
+
+    static inline vSIMDType JUCE_VECTOR_CALLTYPE expand (std::complex<Scalar> s) noexcept
+    {
+        const int n = sizeof (vSIMDType) / sizeof (Scalar);
+
+        union
+        {
+            vSIMDType v;
+            Scalar floats[n];
+        } u;
+
+        for (int i = 0; i < n; ++i)
+            u.floats[i] = (i & 1) == 0 ? s.real() : s.imag();
+
+        return u.v;
+    }
+
+    static inline std::complex<Scalar> JUCE_VECTOR_CALLTYPE get (vSIMDType v, std::size_t i) noexcept
+    {
+        auto j = i << 1;
+        return std::complex<Scalar> (SIMDNativeOps<Scalar>::get (v, j), SIMDNativeOps<Scalar>::get (v, j + 1));
+    }
+
+    static inline vSIMDType JUCE_VECTOR_CALLTYPE set (vSIMDType v, std::size_t i, std::complex<Scalar> s) noexcept
+    {
+        auto j = i << 1;
+        return SIMDNativeOps<Scalar>::set (SIMDNativeOps<Scalar>::set (v, j, s.real()), j + 1, s.imag());
+    }
+
+    static inline std::complex<Scalar> JUCE_VECTOR_CALLTYPE sum (vSIMDType a)  noexcept
+    {
+        vSIMDType result = SIMDNativeOps<Scalar>::oddevensum (a);
+        auto* ptr = reinterpret_cast<const Scalar*> (&result);
+        return std::complex<Scalar> (ptr[0], ptr[1]);
+    }
+
+    static inline vSIMDType JUCE_VECTOR_CALLTYPE mul (vSIMDType a, vSIMDType b)  noexcept
+    {
+        return SIMDNativeOps<Scalar>::cmplxmul (a, b);
+    }
+
+    static inline vSIMDType JUCE_VECTOR_CALLTYPE muladd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept
+    {
+        return SIMDNativeOps<Scalar>::add (a, SIMDNativeOps<Scalar>::cmplxmul (b, c));
+    }
+};
+#endif
+
+//==============================================================================
+ namespace util
+ {
+     template <typename Type>
+     inline void snapToZero (SIMDRegister<Type>&) noexcept      {}
+ }
+
+} // namespace dsp
+
+// Extend some common used global functions to SIMDRegister types
+template <typename Type>
+inline dsp::SIMDRegister<Type> JUCE_VECTOR_CALLTYPE jmin (dsp::SIMDRegister<Type> a, dsp::SIMDRegister<Type> b) { return dsp::SIMDRegister<Type>::min (a, b); }
+template <typename Type>
+inline dsp::SIMDRegister<Type> JUCE_VECTOR_CALLTYPE jmax (dsp::SIMDRegister<Type> a, dsp::SIMDRegister<Type> b) { return dsp::SIMDRegister<Type>::max (a, b); }
+
+} // namespace juce
--- a/modules/juce_dsp/containers/juce_SIMDRegister_test.cpp
+++ b/modules/juce_dsp/containers/juce_SIMDRegister_test.cpp
@ -0,0 +1,842 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+namespace SIMDRegister_test_internal
+{
+    template <typename type, typename = void> struct RandomPrimitive {};
+
+    template <typename type>
+    struct RandomPrimitive<type, typename std::enable_if<std::is_floating_point<type>::value>::type>
+    {
+        static type next (Random& random)
+        {
+            return static_cast<type> (std::is_signed<type>::value ? (random.nextFloat() * 16.0) - 8.0
+                                                                  : (random.nextFloat() * 8.0));
+
+        }
+    };
+
+    template <typename type>
+    struct RandomPrimitive<type, typename std::enable_if<std::is_integral<type>::value>::type>
+    {
+        static type next (Random& random)
+        {
+            return static_cast<type> (random.nextInt64());
+
+        }
+    };
+
+    template <typename type> struct RandomValue { static type next (Random& random) { return RandomPrimitive<type>::next (random); } };
+    template <typename type>
+    struct RandomValue<std::complex<type>>
+    {
+        static std::complex<type> next (Random& random)
+        {
+            return {RandomPrimitive<type>::next (random), RandomPrimitive<type>::next (random)};
+        }
+    };
+
+
+    template <typename type>
+    struct VecFiller
+    {
+        static void fill (type* dst, const int size, Random& random)
+        {
+            for (int i = 0; i < size; ++i)
+                dst[i] = RandomValue<type>::next (random);
+        }
+    };
+
+    // We need to specialise for complex types: otherwise GCC 6 gives
+    // us an ICE internal compiler error after which the compiler seg faults.
+    template <typename type>
+    struct VecFiller<std::complex<type>>
+    {
+        static void fill (std::complex<type>* dst, const int size, Random& random)
+        {
+            for (int i = 0; i < size; ++i)
+                dst[i] = std::complex<type> (RandomValue<type>::next (random), RandomValue<type>::next (random));
+        }
+    };
+
+    template <typename type>
+    struct VecFiller<SIMDRegister<type>>
+    {
+        static SIMDRegister<type> fill(Random& random)
+        {
+            constexpr int size = (int) SIMDRegister<type>::SIMDNumElements;
+           #ifdef _MSC_VER
+            __declspec(align(sizeof (SIMDRegister<type>))) type elements[size];
+           #else
+            type elements[size] __attribute__((aligned(sizeof (SIMDRegister<type>))));
+           #endif
+
+            VecFiller<type>::fill (elements, size, random);
+            return SIMDRegister<type>::fromRawArray (elements);
+        }
+    };
+
+    // Avoid visual studio warning
+    template <typename type>
+    static type safeAbs (type a)
+    {
+        return static_cast<type> (std::abs (static_cast<double> (a)));
+    }
+
+    template <typename type>
+    static type safeAbs (std::complex<type> a)
+    {
+        return std::abs (a);
+    }
+
+    template <typename type>
+    static double difference (type a)
+    {
+        return static_cast<double> (safeAbs (a));
+    }
+
+    template <typename type>
+    static double difference (type a, type b)
+    {
+        return difference (a - b);
+    }
+}
+
+// These tests need to be strictly run on all platforms supported by JUCE as the
+// SIMD code is highly platform dependant.
+
+class SIMDRegisterUnitTests   : public UnitTest
+{
+public:
+    SIMDRegisterUnitTests()  : UnitTest ("SIMDRegister UnitTests", "DSP") {}
+
+    //==============================================================================
+    // Some helper classes
+    template <typename type>
+    static bool allValuesEqualTo (const SIMDRegister<type>& vec, const type scalar)
+    {
+       #ifdef _MSC_VER
+        __declspec(align(sizeof (SIMDRegister<type>))) type elements[SIMDRegister<type>::SIMDNumElements];
+       #else
+        type elements[SIMDRegister<type>::SIMDNumElements] __attribute__((aligned(sizeof (SIMDRegister<type>))));
+       #endif
+
+        vec.copyToRawArray (elements);
+
+        // as we do not want to rely on the access operator we cast this to a primitive pointer
+        for (size_t i = 0; i < SIMDRegister<type>::SIMDNumElements; ++i)
+            if (elements[i] != scalar) return false;
+
+        return true;
+    }
+
+    template <typename type>
+    static bool vecEqualToArray (const SIMDRegister<type>& vec, const type* array)
+    {
+        HeapBlock<type> vecElementsStorage (SIMDRegister<type>::SIMDNumElements * 2);
+        auto* ptr = SIMDRegister<type>::getNextSIMDAlignedPtr (vecElementsStorage.getData());
+        vec.copyToRawArray (ptr);
+
+        for (size_t i = 0; i < SIMDRegister<type>::SIMDNumElements; ++i)
+        {
+            double delta = SIMDRegister_test_internal::difference (ptr[i], array[i]);
+            if (delta > 1e-4)
+            {
+                DBG ("a: " << SIMDRegister_test_internal::difference (ptr[i]) << " b: " << SIMDRegister_test_internal::difference (array[i]) << " difference: " << delta);
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    template <typename type>
+    static void copy (SIMDRegister<type>& vec, const type* ptr)
+    {
+        if (SIMDRegister<type>::isSIMDAligned (ptr))
+        {
+            vec = SIMDRegister<type>::fromRawArray (ptr);
+        }
+        else
+        {
+            for (size_t i = 0; i < SIMDRegister<type>::SIMDNumElements; ++i)
+                vec[i] = ptr[i];
+        }
+    }
+
+    //==============================================================================
+    // Someuseful operations to test
+    struct Addition
+    {
+        template <typename typeOne, typename typeTwo>
+        static void inplace (typeOne& a, const typeTwo& b)
+        {
+            a += b;
+        }
+
+        template <typename typeOne, typename typeTwo>
+        static typeOne outofplace (const typeOne& a, const typeTwo& b)
+        {
+            return a + b;
+        }
+    };
+
+    struct Subtraction
+    {
+        template <typename typeOne, typename typeTwo>
+        static void inplace (typeOne& a, const typeTwo& b)
+        {
+            a -= b;
+        }
+
+        template <typename typeOne, typename typeTwo>
+        static typeOne outofplace (const typeOne& a, const typeTwo& b)
+        {
+            return a - b;
+        }
+    };
+
+    struct Multiplication
+    {
+        template <typename typeOne, typename typeTwo>
+        static void inplace (typeOne& a, const typeTwo& b)
+        {
+            a *= b;
+        }
+
+        template <typename typeOne, typename typeTwo>
+        static typeOne outofplace (const typeOne& a, const typeTwo& b)
+        {
+            return a * b;
+        }
+    };
+
+    struct BitAND
+    {
+        template <typename typeOne, typename typeTwo>
+        static void inplace (typeOne& a, const typeTwo& b)
+        {
+            a &= b;
+        }
+
+        template <typename typeOne, typename typeTwo>
+        static typeOne outofplace (const typeOne& a, const typeTwo& b)
+        {
+            return a & b;
+        }
+    };
+
+    struct BitOR
+    {
+        template <typename typeOne, typename typeTwo>
+        static void inplace (typeOne& a, const typeTwo& b)
+        {
+            a |= b;
+        }
+
+        template <typename typeOne, typename typeTwo>
+        static typeOne outofplace (const typeOne& a, const typeTwo& b)
+        {
+            return a | b;
+        }
+    };
+
+    struct BitXOR
+    {
+        template <typename typeOne, typename typeTwo>
+        static void inplace (typeOne& a, const typeTwo& b)
+        {
+            a ^= b;
+        }
+
+        template <typename typeOne, typename typeTwo>
+        static typeOne outofplace (const typeOne& a, const typeTwo& b)
+        {
+            return a ^ b;
+        }
+    };
+
+    //==============================================================================
+    // the individual tests
+    struct InitializationTest
+    {
+        template <typename type>
+        static void run (UnitTest& u, Random& random)
+        {
+            u.expect (allValuesEqualTo<type> (SIMDRegister<type>::expand (static_cast<type> (23)), 23));
+
+            {
+               #ifdef _MSC_VER
+                __declspec(align(sizeof (SIMDRegister<type>))) type elements[SIMDRegister<type>::SIMDNumElements];
+               #else
+                type elements[SIMDRegister<type>::SIMDNumElements] __attribute__((aligned(sizeof (SIMDRegister<type>))));
+               #endif
+                SIMDRegister_test_internal::VecFiller<type>::fill (elements, SIMDRegister<type>::SIMDNumElements, random);
+                SIMDRegister<type> a (SIMDRegister<type>::fromRawArray (elements));
+
+                u.expect (vecEqualToArray (a, elements));
+
+                SIMDRegister<type> b (a);
+                a *= static_cast<type> (2);
+
+                u.expect (vecEqualToArray (b, elements));
+            }
+        }
+    };
+
+    struct AccessTest
+    {
+        template <typename type>
+        static void run (UnitTest& u, Random& random)
+        {
+            // set-up
+            SIMDRegister<type> a;
+            type array [SIMDRegister<type>::SIMDNumElements];
+
+            SIMDRegister_test_internal::VecFiller<type>::fill (array, SIMDRegister<type>::SIMDNumElements, random);
+
+            // Test non-const access operator
+            for (size_t i = 0; i < SIMDRegister<type>::SIMDNumElements; ++i)
+                a[i] = array[i];
+
+            u.expect (vecEqualToArray (a, array));
+
+            // Test const access operator
+            const SIMDRegister<type>& b = a;
+
+            for (size_t i = 0; i < SIMDRegister<type>::SIMDNumElements; ++i)
+                u.expect (b[i] == array[i]);
+        }
+    };
+
+    template <class Operation>
+    struct OperatorTests
+    {
+        template <typename type>
+        static void run (UnitTest& u, Random& random)
+        {
+            for (int n = 0; n < 100; ++n)
+            {
+                // set-up
+                SIMDRegister<type> a (static_cast<type> (0));
+                SIMDRegister<type> b (static_cast<type> (0));
+                SIMDRegister<type> c (static_cast<type> (0));
+
+                type array_a [SIMDRegister<type>::SIMDNumElements];
+                type array_b [SIMDRegister<type>::SIMDNumElements];
+                type array_c [SIMDRegister<type>::SIMDNumElements];
+
+                SIMDRegister_test_internal::VecFiller<type>::fill (array_a, SIMDRegister<type>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<type>::fill (array_b, SIMDRegister<type>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<type>::fill (array_c, SIMDRegister<type>::SIMDNumElements, random);
+
+                copy (a, array_a); copy (b, array_b); copy (c, array_c);
+
+                // test in-place with both params being vectors
+                for (size_t i = 0; i < SIMDRegister<type>::SIMDNumElements; ++i)
+                    Operation::template inplace<type, type> (array_a[i], array_b[i]);
+
+                Operation::template inplace<SIMDRegister<type>, SIMDRegister<type>> (a, b);
+
+                u.expect (vecEqualToArray (a, array_a));
+                u.expect (vecEqualToArray (b, array_b));
+
+                SIMDRegister_test_internal::VecFiller<type>::fill (array_a, SIMDRegister<type>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<type>::fill (array_b, SIMDRegister<type>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<type>::fill (array_c, SIMDRegister<type>::SIMDNumElements, random);
+
+                copy (a, array_a); copy (b, array_b); copy (c, array_c);
+
+                // test in-place with one param being scalar
+                for (size_t i = 0; i < SIMDRegister<type>::SIMDNumElements; ++i)
+                    Operation::template inplace<type, type> (array_b[i], static_cast<type> (2));
+
+                Operation::template inplace<SIMDRegister<type>, type> (b, 2);
+
+                u.expect (vecEqualToArray (a, array_a));
+                u.expect (vecEqualToArray (b, array_b));
+
+                // set-up again
+                SIMDRegister_test_internal::VecFiller<type>::fill (array_a, SIMDRegister<type>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<type>::fill (array_b, SIMDRegister<type>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<type>::fill (array_c, SIMDRegister<type>::SIMDNumElements, random);
+                copy (a, array_a); copy (b, array_b); copy (c, array_c);
+
+                // test out-of-place with both params being vectors
+                for (size_t i = 0; i < SIMDRegister<type>::SIMDNumElements; ++i)
+                    array_c[i] = Operation::template outofplace<type, type> (array_a[i], array_b[i]);
+
+                c = Operation::template outofplace<SIMDRegister<type>, SIMDRegister<type>> (a, b);
+
+                u.expect (vecEqualToArray (a, array_a));
+                u.expect (vecEqualToArray (b, array_b));
+                u.expect (vecEqualToArray (c, array_c));
+
+                // test out-of-place with one param being scalar
+                for (size_t i = 0; i < SIMDRegister<type>::SIMDNumElements; ++i)
+                    array_c[i] = Operation::template outofplace<type, type> (array_b[i], static_cast<type> (2));
+
+                c = Operation::template outofplace<SIMDRegister<type>, type> (b, 2);
+
+                u.expect (vecEqualToArray (a, array_a));
+                u.expect (vecEqualToArray (b, array_b));
+                u.expect (vecEqualToArray (c, array_c));
+            }
+        }
+    };
+
+    template <class Operation>
+    struct BitOperatorTests
+    {
+        template <typename type>
+        static void run (UnitTest& u, Random& random)
+        {
+            typedef typename SIMDRegister<type>::vMaskType vMaskType;
+            typedef typename SIMDRegister<type>::MaskType MaskType;
+
+            for (int n = 0; n < 100; ++n)
+            {
+                // Check flip sign bit and using as a union
+                {
+                    type array_a [SIMDRegister<type>::SIMDNumElements];
+
+                    union ConversionUnion
+                    {
+                        inline ConversionUnion() : floatVersion (static_cast<type> (0)) {}
+                        inline ~ConversionUnion() {}
+                        SIMDRegister<type> floatVersion;
+                        vMaskType intVersion;
+                    } a, b;
+
+                    vMaskType bitmask = vMaskType::expand (static_cast<MaskType> (1) << (sizeof (MaskType) - 1));
+                    SIMDRegister_test_internal::VecFiller<type>::fill (array_a, SIMDRegister<type>::SIMDNumElements, random);
+                    copy (a.floatVersion, array_a);
+                    copy (b.floatVersion, array_a);
+
+                    Operation::template inplace<SIMDRegister<type>, vMaskType> (a.floatVersion, bitmask);
+                    Operation::template inplace<vMaskType, vMaskType> (b.intVersion, bitmask);
+
+                   #ifdef _MSC_VER
+                    __declspec(align(sizeof (SIMDRegister<type>))) type elements[SIMDRegister<type>::SIMDNumElements];
+                   #else
+                    type elements[SIMDRegister<type>::SIMDNumElements] __attribute__((aligned(sizeof (SIMDRegister<type>))));
+                   #endif
+                    b.floatVersion.copyToRawArray (elements);
+
+                    u.expect (vecEqualToArray (a.floatVersion, elements));
+                }
+
+                // set-up
+                SIMDRegister<type> a, c;
+                vMaskType b;
+
+                MaskType array_a [SIMDRegister<MaskType>::SIMDNumElements];
+                MaskType array_b [SIMDRegister<MaskType>::SIMDNumElements];
+                MaskType array_c [SIMDRegister<MaskType>::SIMDNumElements];
+
+                type float_a [SIMDRegister<type>::SIMDNumElements];
+                type float_c [SIMDRegister<type>::SIMDNumElements];
+
+                SIMDRegister_test_internal::VecFiller<type>::fill (float_a, SIMDRegister<type>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<MaskType>::fill (array_b, SIMDRegister<MaskType>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<type>::fill (float_c, SIMDRegister<type>::SIMDNumElements, random);
+
+                memcpy (array_a, float_a, sizeof (type) * SIMDRegister<type>::SIMDNumElements);
+                memcpy (array_c, float_c, sizeof (type) * SIMDRegister<type>::SIMDNumElements);
+                copy (a, float_a); copy (b, array_b); copy (c, float_c);
+
+                // test in-place with both params being vectors
+                for (size_t i = 0; i < SIMDRegister<MaskType>::SIMDNumElements; ++i)
+                    Operation::template inplace<MaskType, MaskType> (array_a[i], array_b[i]);
+                memcpy (float_a, array_a, sizeof (type) * SIMDRegister<type>::SIMDNumElements);
+
+                Operation::template inplace<SIMDRegister<type>, vMaskType> (a, b);
+
+                u.expect (vecEqualToArray (a, float_a));
+                u.expect (vecEqualToArray (b, array_b));
+
+                SIMDRegister_test_internal::VecFiller<type>::fill (float_a, SIMDRegister<type>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<MaskType>::fill (array_b, SIMDRegister<MaskType>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<type>::fill (float_c, SIMDRegister<type>::SIMDNumElements, random);
+                memcpy (array_a, float_a, sizeof (type) * SIMDRegister<type>::SIMDNumElements);
+                memcpy (array_c, float_c, sizeof (type) * SIMDRegister<type>::SIMDNumElements);
+                copy (a, float_a); copy (b, array_b); copy (c, float_c);
+
+                // test in-place with one param being scalar
+                for (size_t i = 0; i < SIMDRegister<MaskType>::SIMDNumElements; ++i)
+                    Operation::template inplace<MaskType, MaskType> (array_a[i], static_cast<MaskType> (9));
+                memcpy (float_a, array_a, sizeof (type) * SIMDRegister<type>::SIMDNumElements);
+
+                Operation::template inplace<SIMDRegister<type>, MaskType> (a, static_cast<MaskType> (9));
+
+                u.expect (vecEqualToArray (a, float_a));
+                u.expect (vecEqualToArray (b, array_b));
+
+                // set-up again
+                SIMDRegister_test_internal::VecFiller<type>::fill (float_a, SIMDRegister<type>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<MaskType>::fill (array_b, SIMDRegister<MaskType>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<type>::fill (float_c, SIMDRegister<type>::SIMDNumElements, random);
+                memcpy (array_a, float_a, sizeof (type) * SIMDRegister<type>::SIMDNumElements);
+                memcpy (array_c, float_c, sizeof (type) * SIMDRegister<type>::SIMDNumElements);
+                copy (a, float_a); copy (b, array_b); copy (c, float_c);
+
+                // test out-of-place with both params being vectors
+                for (size_t i = 0; i < SIMDRegister<MaskType>::SIMDNumElements; ++i)
+                {
+                    array_c[i] =
+                        Operation::template outofplace<MaskType, MaskType> (array_a[i], array_b[i]);
+                }
+                memcpy (float_a, array_a, sizeof (type) * SIMDRegister<type>::SIMDNumElements);
+                memcpy (float_c, array_c, sizeof (type) * SIMDRegister<type>::SIMDNumElements);
+
+                c = Operation::template outofplace<SIMDRegister<type>, vMaskType> (a, b);
+
+                u.expect (vecEqualToArray (a, float_a));
+                u.expect (vecEqualToArray (b, array_b));
+                u.expect (vecEqualToArray (c, float_c));
+
+                // test out-of-place with one param being scalar
+                for (size_t i = 0; i < SIMDRegister<MaskType>::SIMDNumElements; ++i)
+                    array_c[i] = Operation::template outofplace<MaskType, MaskType> (array_a[i], static_cast<MaskType> (9));
+                memcpy (float_a, array_a, sizeof (type) * SIMDRegister<type>::SIMDNumElements);
+                memcpy (float_c, array_c, sizeof (type) * SIMDRegister<type>::SIMDNumElements);
+
+                c = Operation::template outofplace<SIMDRegister<type>, MaskType> (a, static_cast<MaskType> (9));
+
+                u.expect (vecEqualToArray (a, float_a));
+                u.expect (vecEqualToArray (b, array_b));
+                u.expect (vecEqualToArray (c, float_c));
+            }
+        }
+    };
+
+    struct CheckComparisonOps
+    {
+        template <typename type>
+        static void run (UnitTest& u, Random& random)
+        {
+            typedef typename SIMDRegister<type>::vMaskType vMaskType;
+            typedef typename SIMDRegister<type>::MaskType MaskType;
+
+            for (int i = 0; i < 100; ++i)
+            {
+                // set-up
+                type array_a   [SIMDRegister<type>::SIMDNumElements];
+                type array_b   [SIMDRegister<type>::SIMDNumElements];
+                MaskType array_eq  [SIMDRegister<type>::SIMDNumElements];
+                MaskType array_neq [SIMDRegister<type>::SIMDNumElements];
+                MaskType array_lt  [SIMDRegister<type>::SIMDNumElements];
+                MaskType array_le  [SIMDRegister<type>::SIMDNumElements];
+                MaskType array_gt  [SIMDRegister<type>::SIMDNumElements];
+                MaskType array_ge  [SIMDRegister<type>::SIMDNumElements];
+
+
+                SIMDRegister_test_internal::VecFiller<type>::fill (array_a, SIMDRegister<type>::SIMDNumElements, random);
+                SIMDRegister_test_internal::VecFiller<type>::fill (array_b, SIMDRegister<type>::SIMDNumElements, random);
+
+                // do check
+                for (size_t j = 0; j < SIMDRegister<type>::SIMDNumElements; ++j)
+                {
+                    array_eq  [j] = (array_a[j] == array_b[j]) ? static_cast<MaskType> (-1) : 0;
+                    array_neq [j] = (array_a[j] != array_b[j]) ? static_cast<MaskType> (-1) : 0;
+                    array_lt  [j] = (array_a[j] <  array_b[j]) ? static_cast<MaskType> (-1) : 0;
+                    array_le  [j] = (array_a[j] <= array_b[j]) ? static_cast<MaskType> (-1) : 0;
+                    array_gt  [j] = (array_a[j] >  array_b[j]) ? static_cast<MaskType> (-1) : 0;
+                    array_ge  [j] = (array_a[j] >= array_b[j]) ? static_cast<MaskType> (-1) : 0;
+                }
+
+                SIMDRegister<type> a (static_cast<type> (0));
+                SIMDRegister<type> b (static_cast<type> (0));
+
+                vMaskType eq, neq, lt, le, gt, ge;
+
+                copy (a, array_a);
+                copy (b, array_b);
+
+                eq  = SIMDRegister<type>::equal              (a, b);
+                neq = SIMDRegister<type>::notEqual           (a, b);
+                lt  = SIMDRegister<type>::lessThan           (a, b);
+                le  = SIMDRegister<type>::lessThanOrEqual    (a, b);
+                gt  = SIMDRegister<type>::greaterThan        (a, b);
+                ge  = SIMDRegister<type>::greaterThanOrEqual (a, b);
+
+                u.expect (vecEqualToArray (eq,  array_eq ));
+                u.expect (vecEqualToArray (neq, array_neq));
+                u.expect (vecEqualToArray (lt,  array_lt ));
+                u.expect (vecEqualToArray (le,  array_le ));
+                u.expect (vecEqualToArray (gt,  array_gt ));
+                u.expect (vecEqualToArray (ge,  array_ge ));
+
+                do
+                {
+                    SIMDRegister_test_internal::VecFiller<type>::fill (array_a, SIMDRegister<type>::SIMDNumElements, random);
+                    SIMDRegister_test_internal::VecFiller<type>::fill (array_b, SIMDRegister<type>::SIMDNumElements, random);
+                } while (std::equal (array_a, array_a + SIMDRegister<type>::SIMDNumElements, array_b));
+
+                copy (a, array_a);
+                copy (b, array_b);
+                u.expect (a != b);
+                u.expect (b != a);
+                u.expect (! (a == b));
+                u.expect (! (b == a));
+
+                SIMDRegister_test_internal::VecFiller<type>::fill (array_a, SIMDRegister<type>::SIMDNumElements, random);
+                copy (a, array_a);
+                copy (b, array_a);
+
+                u.expect (a == b);
+                u.expect (b == a);
+                u.expect (! (a != b));
+                u.expect (! (b != a));
+
+                type scalar = a[0];
+                a = SIMDRegister<type>::expand (scalar);
+
+                u.expect (a == scalar);
+                u.expect (! (a != scalar));
+
+                scalar--;
+
+                u.expect (a != scalar);
+                u.expect (! (a == scalar));
+            }
+        }
+    };
+
+    struct CheckMultiplyAdd
+    {
+        template <typename type>
+        static void run (UnitTest& u, Random& random)
+        {
+            // set-up
+            type array_a [SIMDRegister<type>::SIMDNumElements];
+            type array_b [SIMDRegister<type>::SIMDNumElements];
+            type array_c [SIMDRegister<type>::SIMDNumElements];
+            type array_d [SIMDRegister<type>::SIMDNumElements];
+
+            SIMDRegister_test_internal::VecFiller<type>::fill (array_a, SIMDRegister<type>::SIMDNumElements, random);
+            SIMDRegister_test_internal::VecFiller<type>::fill (array_b, SIMDRegister<type>::SIMDNumElements, random);
+            SIMDRegister_test_internal::VecFiller<type>::fill (array_c, SIMDRegister<type>::SIMDNumElements, random);
+            SIMDRegister_test_internal::VecFiller<type>::fill (array_d, SIMDRegister<type>::SIMDNumElements, random);
+
+            // check
+            for (size_t i = 0; i < SIMDRegister<type>::SIMDNumElements; ++i)
+                array_d[i] = array_a[i] + (array_b[i] * array_c[i]);
+
+            SIMDRegister<type> a, b, c, d;
+
+            copy (a, array_a);
+            copy (b, array_b);
+            copy (c, array_c);
+
+            d = SIMDRegister<type>::multiplyAdd (a, b, c);
+
+            u.expect (vecEqualToArray (d, array_d));
+        }
+    };
+
+    struct CheckMinMax
+    {
+        template <typename type>
+        static void run (UnitTest& u, Random& random)
+        {
+            for (int i = 0; i < 100; ++i)
+            {
+                type array_a [SIMDRegister<type>::SIMDNumElements];
+                type array_b [SIMDRegister<type>::SIMDNumElements];
+                type array_min [SIMDRegister<type>::SIMDNumElements];
+                type array_max [SIMDRegister<type>::SIMDNumElements];
+
+                for (size_t j = 0; j < SIMDRegister<type>::SIMDNumElements; ++j)
+                {
+                    array_a[j] = static_cast<type> (random.nextInt (127));
+                    array_b[j] = static_cast<type> (random.nextInt (127));
+                }
+
+                for (size_t j = 0; j < SIMDRegister<type>::SIMDNumElements; ++j)
+                {
+                    array_min[j] = (array_a[j] < array_b[j]) ? array_a[j] : array_b[j];
+                    array_max[j] = (array_a[j] > array_b[j]) ? array_a[j] : array_b[j];
+                }
+
+                SIMDRegister<type> a (static_cast<type> (0));
+                SIMDRegister<type> b (static_cast<type> (0));
+                SIMDRegister<type> vMin (static_cast<type> (0));
+                SIMDRegister<type> vMax (static_cast<type> (0));
+
+                copy (a, array_a);
+                copy (b, array_b);
+
+                vMin = jmin (a, b);
+                vMax = jmax (a, b);
+
+                u.expect (vecEqualToArray (vMin, array_min));
+                u.expect (vecEqualToArray (vMax, array_max));
+
+                copy (vMin, array_a);
+                copy (vMax, array_a);
+
+                vMin = SIMDRegister<type>::min (a, b);
+                vMax = SIMDRegister<type>::max (a, b);
+
+                u.expect (vecEqualToArray (vMin, array_min));
+                u.expect (vecEqualToArray (vMax, array_max));
+            }
+        }
+    };
+
+    struct CheckSum
+    {
+        template <typename type>
+        static void run (UnitTest& u, Random& random)
+        {
+            type array [SIMDRegister<type>::SIMDNumElements];
+            type sumCheck = 0;
+
+            SIMDRegister_test_internal::VecFiller<type>::fill (array, SIMDRegister<type>::SIMDNumElements, random);
+
+            for (size_t j = 0; j < SIMDRegister<type>::SIMDNumElements; ++j)
+            {
+                sumCheck += array[j];
+            }
+
+            SIMDRegister<type> a;
+            copy (a, array);
+
+            u.expect (SIMDRegister_test_internal::difference (sumCheck, a.sum()) < 1e-4);
+        }
+    };
+
+    struct CheckBoolEquals
+    {
+        template <typename type>
+        static void run (UnitTest& u, Random& random)
+        {
+            bool is_signed = std::is_signed<type>::value;
+            type array [SIMDRegister<type>::SIMDNumElements];
+
+            auto value = is_signed ? static_cast<type> ((random.nextFloat() * 16.0) - 8.0)
+                                   : static_cast<type> (random.nextFloat() * 8.0);
+
+            std::fill (array, array + SIMDRegister<type>::SIMDNumElements, value);
+            SIMDRegister<type> a, b;
+            copy (a, array);
+
+            u.expect (a == value);
+            u.expect (! (a != value));
+            value += 1;
+
+            u.expect (a != value);
+            u.expect (! (a == value));
+
+            SIMDRegister_test_internal::VecFiller<type>::fill (array, SIMDRegister<type>::SIMDNumElements, random);
+            copy (a, array);
+            copy (b, array);
+
+            u.expect (a == b);
+            u.expect (! (a != b));
+
+            SIMDRegister_test_internal::VecFiller<type>::fill (array, SIMDRegister<type>::SIMDNumElements, random);
+            copy (b, array);
+
+            u.expect (a != b);
+            u.expect (! (a == b));
+        }
+    };
+
+    //==============================================================================
+    template <class TheTest>
+    void runTestForAllTypes (const char* unitTestName)
+    {
+        beginTest (unitTestName);
+
+        Random random = getRandom();
+
+        TheTest::template run<float>   (*this, random);
+        TheTest::template run<double>  (*this, random);
+        TheTest::template run<int8_t>  (*this, random);
+        TheTest::template run<uint8_t> (*this, random);
+        TheTest::template run<int16_t> (*this, random);
+        TheTest::template run<uint16_t>(*this, random);
+        TheTest::template run<int32_t> (*this, random);
+        TheTest::template run<uint32_t>(*this, random);
+        TheTest::template run<int64_t> (*this, random);
+        TheTest::template run<uint64_t>(*this, random);
+        TheTest::template run<std::complex<float>>   (*this, random);
+        TheTest::template run<std::complex<double>>  (*this, random);
+    }
+
+    template <class TheTest>
+    void runTestNonComplex (const char* unitTestName)
+    {
+        beginTest (unitTestName);
+
+        Random random = getRandom();
+
+        TheTest::template run<float>   (*this, random);
+        TheTest::template run<double>  (*this, random);
+        TheTest::template run<int8_t>  (*this, random);
+        TheTest::template run<uint8_t> (*this, random);
+        TheTest::template run<int16_t> (*this, random);
+        TheTest::template run<uint16_t>(*this, random);
+        TheTest::template run<int32_t> (*this, random);
+        TheTest::template run<uint32_t>(*this, random);
+        TheTest::template run<int64_t> (*this, random);
+        TheTest::template run<uint64_t>(*this, random);
+    }
+
+    void runTest()
+    {
+        runTestForAllTypes<InitializationTest> ("InitializationTest");
+
+        runTestForAllTypes<AccessTest> ("AccessTest");
+
+        runTestForAllTypes<OperatorTests<Addition>> ("AdditionOperators");
+        runTestForAllTypes<OperatorTests<Subtraction>> ("SubtractionOperators");
+        runTestForAllTypes<OperatorTests<Multiplication>> ("MultiplicationOperators");
+
+        runTestForAllTypes<BitOperatorTests<BitAND>> ("BitANDOperators");
+        runTestForAllTypes<BitOperatorTests<BitOR>>  ("BitOROperators");
+        runTestForAllTypes<BitOperatorTests<BitXOR>> ("BitXOROperators");
+
+        runTestNonComplex<CheckComparisonOps> ("CheckComparisons");
+        runTestNonComplex<CheckBoolEquals> ("CheckBoolEquals");
+        runTestNonComplex<CheckMinMax> ("CheckMinMax");
+
+        runTestForAllTypes<CheckMultiplyAdd> ("CheckMultiplyAdd");
+        runTestForAllTypes<CheckSum> ("CheckSum");
+    }
+};
+
+static SIMDRegisterUnitTests SIMDRegisterUnitTests;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/filter_design/juce_FilterDesign.cpp
+++ b/modules/juce_dsp/filter_design/juce_FilterDesign.cpp
@ -0,0 +1,698 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+template <typename FloatType>
+typename FIR::Coefficients<FloatType>::Ptr
+    FilterDesign<FloatType>::designFIRLowpassWindowMethod (FloatType frequency,
+                                                           double sampleRate, size_t order,
+                                                           WindowingMethod type,
+                                                           FloatType beta)
+{
+    jassert (sampleRate > 0);
+    jassert (frequency > 0 && frequency <= sampleRate * 0.5);
+
+    auto* result = new typename FIR::Coefficients<FloatType> (order + 1u);
+
+    auto* c = result->getRawCoefficients();
+    auto normalizedFrequency = frequency / sampleRate;
+
+    for (size_t i = 0; i <= order; ++i)
+    {
+        if (i == order * 0.5)
+        {
+            c[i] = static_cast<FloatType> (normalizedFrequency * 2);
+        }
+        else
+        {
+            auto indice = MathConstants<double>::pi * (static_cast<double> (i) - 0.5 * static_cast<double> (order));
+            c[i] = static_cast<FloatType> (std::sin (2.0 * indice * normalizedFrequency) / indice);
+        }
+    }
+
+    WindowingFunction<FloatType> theWindow (order + 1, type, false, beta);
+    theWindow.multiplyWithWindowingTable (c, order + 1);
+
+    return result;
+}
+
+template <typename FloatType>
+typename FIR::Coefficients<FloatType>::Ptr
+    FilterDesign<FloatType>::designFIRLowpassKaiserMethod (FloatType frequency, double sampleRate,
+                                                           FloatType normalizedTransitionWidth,
+                                                           FloatType attenuationdB)
+{
+    jassert (sampleRate > 0);
+    jassert (frequency > 0 && frequency <= sampleRate * 0.5);
+    jassert (normalizedTransitionWidth > 0 && normalizedTransitionWidth <= 0.5);
+    jassert (attenuationdB >= -100 && attenuationdB <= 0);
+
+    FloatType beta = 0;
+
+    if (attenuationdB < -50)
+        beta = static_cast<FloatType> (0.1102 * (-attenuationdB - 8.7));
+    else if (attenuationdB <= 21)
+        beta = static_cast<FloatType> (0.5842 * std::pow (-attenuationdB - 21, 0.4) + 0.07886 * (-attenuationdB - 21));
+
+    int order = attenuationdB < -21 ? roundToInt (std::ceil ((-attenuationdB - 7.95) / (2.285 * normalizedTransitionWidth * MathConstants<double>::twoPi)))
+                                    : roundToInt (std::ceil (5.79 / (normalizedTransitionWidth * MathConstants<double>::twoPi)));
+
+    jassert (order >= 0);
+
+    return designFIRLowpassWindowMethod (frequency, sampleRate, static_cast<size_t> (order),
+                                         WindowingFunction<FloatType>::kaiser, beta);
+}
+
+
+template <typename FloatType>
+typename FIR::Coefficients<FloatType>::Ptr
+    FilterDesign<FloatType>::designFIRLowpassTransitionMethod (FloatType frequency, double sampleRate, size_t order,
+                                                               FloatType normalizedTransitionWidth, FloatType spline)
+{
+    jassert (sampleRate > 0);
+    jassert (frequency > 0 && frequency <= sampleRate * 0.5);
+    jassert (normalizedTransitionWidth > 0 && normalizedTransitionWidth <= 0.5);
+    jassert (spline >= 1.0 && spline <= 4.0);
+
+    auto normalizedFrequency = frequency / static_cast<FloatType> (sampleRate);
+
+    auto* result = new typename FIR::Coefficients<FloatType> (order + 1u);
+    auto* c = result->getRawCoefficients();
+
+    for (size_t i = 0; i <= order; ++i)
+    {
+        if (i == order / 2)
+        {
+            c[i] = static_cast<FloatType> (2 * normalizedFrequency);
+        }
+        else
+        {
+            auto indice  = MathConstants<double>::pi * (i - 0.5 * order);
+            auto indice2 = MathConstants<double>::pi * normalizedTransitionWidth * (i - 0.5 * order) / spline;
+            c[i] = static_cast<FloatType> (std::sin (2 * indice * normalizedFrequency)
+                                            / indice * std::pow (std::sin (indice2) / indice2, spline));
+        }
+    }
+
+    return result;
+}
+
+template <typename FloatType>
+typename FIR::Coefficients<FloatType>::Ptr
+    FilterDesign<FloatType>::designFIRLowpassLeastSquaresMethod (FloatType frequency,
+                                                                 double sampleRate, size_t order,
+                                                                 FloatType normalizedTransitionWidth,
+                                                                 FloatType stopBandWeight)
+{
+    jassert (sampleRate > 0);
+    jassert (frequency > 0 && frequency <= sampleRate * 0.5);
+    jassert (normalizedTransitionWidth > 0 && normalizedTransitionWidth <= 0.5);
+    jassert (stopBandWeight >= 1.0 && stopBandWeight <= 100.0);
+
+    auto normalizedFrequency = static_cast<double> (frequency) / sampleRate;
+
+    auto wp = MathConstants<double>::twoPi * (static_cast<double> (normalizedFrequency - normalizedTransitionWidth / 2.0));
+    auto ws = MathConstants<double>::twoPi * (static_cast<double> (normalizedFrequency + normalizedTransitionWidth / 2.0));
+
+    auto N = order + 1;
+
+    auto* result = new typename FIR::Coefficients<FloatType> (static_cast<size_t> (N));
+    auto* c = result->getRawCoefficients();
+
+    if (N % 2 == 1)
+    {
+        // Type I
+        auto M = (N - 1) / 2;
+
+        Matrix<double> b (M + 1, 1),
+                       q (2 * M + 1, 1);
+
+        auto sinc = [](double x) { return x == 0 ? 1 : std::sin (x * MathConstants<double>::pi)
+                                                         / (MathConstants<double>::pi * x); };
+
+        auto factorp = wp / MathConstants<double>::pi;
+        auto factors = ws / MathConstants<double>::pi;
+
+        for (size_t i = 0; i <= M; ++i)
+            b (i, 0) = factorp * sinc (factorp * i);
+
+        q (0, 0) = factorp + stopBandWeight * (1.0 - factors);
+
+        for (size_t i = 1; i <= 2 * M; ++i)
+            q (i, 0) = factorp * sinc (factorp * i) - stopBandWeight * factors * sinc (factors * i);
+
+        auto Q1 = Matrix<double>::toeplitz (q, M + 1);
+        auto Q2 = Matrix<double>::hankel (q, M + 1, 0);
+
+        Q1 += Q2; Q1 *= 0.5;
+
+        Q1.solve (b);
+
+        c[M] = static_cast<FloatType> (b (0, 0));
+
+        for (size_t i = 1; i <= M; ++i)
+        {
+            c[M - i] = static_cast<FloatType> (b (i, 0) * 0.5);
+            c[M + i] = static_cast<FloatType> (b (i, 0) * 0.5);
+        }
+    }
+    else
+    {
+        // Type II
+        auto M = N / 2;
+
+        Matrix<double> b (M, 1);
+        Matrix<double> qp (2 * M, 1);
+        Matrix<double> qs (2 * M, 1);
+
+        auto sinc = [](double x) { return x == 0 ? 1 : std::sin (x * MathConstants<double>::pi)
+                                                         / (MathConstants<double>::pi * x); };
+
+        auto factorp = wp / MathConstants<double>::pi;
+        auto factors = ws / MathConstants<double>::pi;
+
+        for (size_t i = 0; i < M; ++i)
+            b (i, 0) = factorp * sinc (factorp * (i + 0.5));
+
+        for (size_t i = 0; i < 2 * M; ++i)
+        {
+            qp (i, 0) = 0.25 * factorp * sinc (factorp * i);
+            qs (i, 0) = -0.25 * stopBandWeight * factors * sinc (factors * i);
+        }
+
+        auto Q1p = Matrix<double>::toeplitz (qp, M);
+        auto Q2p = Matrix<double>::hankel (qp, M, 1);
+        auto Q1s = Matrix<double>::toeplitz (qs, M);
+        auto Q2s = Matrix<double>::hankel (qs, M, 1);
+
+        auto Id = Matrix<double>::identity (M);
+        Id *= (0.25 * stopBandWeight);
+
+        Q1p += Q2p;
+        Q1s += Q2s;
+        Q1s += Id;
+
+        auto& Q = Q1s;
+        Q += Q1p;
+
+        Q.solve (b);
+
+        for (size_t i = 0; i < M; ++i)
+        {
+            c[M - i - 1] = static_cast<FloatType> (b (i, 0) * 0.25);
+            c[M + i]     = static_cast<FloatType> (b (i, 0) * 0.25);
+        }
+    }
+
+    return result;
+}
+
+template <typename FloatType>
+typename FIR::Coefficients<FloatType>::Ptr
+    FilterDesign<FloatType>::designFIRLowpassHalfBandEquirippleMethod (FloatType normalizedTransitionWidth,
+                                                                       FloatType attenuationdB)
+{
+    jassert (normalizedTransitionWidth > 0 && normalizedTransitionWidth <= 0.5);
+    jassert (attenuationdB >= -300 && attenuationdB <= -10);
+
+    auto wpT = (0.5 - normalizedTransitionWidth) * MathConstants<double>::pi;
+
+    auto n = roundToInt (std::ceil ((attenuationdB - 18.18840664 * wpT + 33.64775300) / (18.54155181 * wpT - 29.13196871)));
+    auto kp = (n * wpT - 1.57111377 * n + 0.00665857) / (-1.01927560 * n + 0.37221484);
+    auto A = (0.01525753 * n + 0.03682344 + 9.24760314 / (double) n) * kp + 1.01701407 + 0.73512298 / (double) n;
+    auto B = (0.00233667 * n - 1.35418408 + 5.75145813 / (double) n) * kp + 1.02999650 - 0.72759508 / (double) n;
+
+    auto hn  = FilterDesign<FloatType>::getPartialImpulseResponseHn (n, kp);
+    auto hnm = FilterDesign<FloatType>::getPartialImpulseResponseHn (n - 1, kp);
+
+    auto diff = (hn.size() - hnm.size()) / 2;
+
+    for (int i = 0; i < diff; ++i)
+    {
+        hnm.add (0.0);
+        hnm.insert (0, 0.0);
+    }
+
+    auto hh = hn;
+
+    for (int i = 0; i < hn.size(); ++i)
+        hh.setUnchecked (i, A * hh[i] + B * hnm[i]);
+
+    auto* result = new typename FIR::Coefficients<FloatType> (static_cast<size_t> (hh.size()));
+    auto* c = result->getRawCoefficients();
+
+    for (int i = 0; i < hh.size(); ++i)
+        c[i] = (float) hh[i];
+
+    double NN;
+
+    if (n % 2 == 0)
+    {
+        NN = 2.0 * result->getMagnitudeForFrequency (0.5, 1.0);
+    }
+    else
+    {
+        auto w01 = std::sqrt (kp * kp + (1 - kp * kp) * std::pow (std::cos (MathConstants<double>::pi / (2.0 * n + 1.0)), 2.0));
+        auto om01 = std::acos (-w01);
+
+        NN = -2.0 * result->getMagnitudeForFrequency (om01 / MathConstants<double>::twoPi, 1.0);
+    }
+
+    for (int i = 0; i < hh.size(); ++i)
+        c[i] = static_cast<FloatType> ((A * hn[i] + B * hnm[i]) / NN);
+
+    c[2 * n + 1] = static_cast<FloatType> (0.5);
+
+    return result;
+}
+
+template <typename FloatType>
+Array<double> FilterDesign<FloatType>::getPartialImpulseResponseHn (int n, double kp)
+{
+    Array<double> alpha;
+    alpha.resize (2 * n + 1);
+
+    alpha.setUnchecked (2 * n, 1.0 / std::pow (1.0 - kp * kp, n));
+
+    if (n > 0)
+        alpha.setUnchecked (2 * n - 2, -(2 * n * kp * kp + 1) * alpha[2 * n]);
+
+    if (n > 1)
+        alpha.setUnchecked (2 * n - 4, -(4 * n + 1 + (n - 1) * (2 * n - 1) * kp * kp) / (2.0 * n) * alpha[2 * n - 2]
+                             - (2 * n + 1) * ((n + 1) * kp * kp + 1) / (2.0 * n) * alpha[2 * n]);
+
+    for (int k = n; k >= 3; --k)
+    {
+        auto c1 = (3 * (n*(n + 2) - k * (k - 2)) + 2 * k - 3 + 2 * (k - 2)*(2 * k - 3) * kp * kp) * alpha[2 * k - 4];
+        auto c2 = (3 * (n*(n + 2) - (k - 1) * (k + 1)) + 2 * (2 * k - 1) + 2 * k*(2 * k - 1) * kp * kp) * alpha[2 * k - 2];
+        auto c3 = (n * (n + 2) - (k - 1) * (k + 1)) * alpha[2 * k];
+        auto c4 = (n * (n + 2) - (k - 3) * (k - 1));
+
+        alpha.setUnchecked (2 * k - 6, -(c1 + c2 + c3) / c4);
+    }
+
+    Array<double> ai;
+    ai.resize (2 * n + 1 + 1);
+
+    for (int k = 0; k <= n; ++k)
+        ai.setUnchecked (2 * k + 1, alpha[2 * k] / (2.0 * k + 1.0));
+
+    Array<double> hn;
+    hn.resize (2 * n + 1 + 2 * n + 1 + 1);
+
+    for (int k = 0; k <= n; ++k)
+    {
+        hn.setUnchecked (2 * n + 1 + (2 * k + 1), 0.5 * ai[2 * k + 1]);
+        hn.setUnchecked (2 * n + 1 - (2 * k + 1), 0.5 * ai[2 * k + 1]);
+    }
+
+    return hn;
+}
+
+template <typename FloatType>
+Array<IIR::Coefficients<FloatType>>
+    FilterDesign<FloatType>::designIIRLowpassHighOrderButterworthMethod (FloatType frequency, double sampleRate,
+                                                                         FloatType normalizedTransitionWidth,
+                                                                         FloatType passbandAttenuationdB,
+                                                                         FloatType stopbandAttenuationdB)
+{
+    return designIIRLowpassHighOrderGeneralMethod (0, frequency, sampleRate, normalizedTransitionWidth,
+                                                   passbandAttenuationdB, stopbandAttenuationdB);
+}
+
+template <typename FloatType>
+Array<IIR::Coefficients<FloatType>>
+    FilterDesign<FloatType>::designIIRLowpassHighOrderChebyshev1Method (FloatType frequency, double sampleRate,
+                                                                        FloatType normalizedTransitionWidth,
+                                                                        FloatType passbandAttenuationdB,
+                                                                        FloatType stopbandAttenuationdB)
+{
+    return designIIRLowpassHighOrderGeneralMethod (1, frequency, sampleRate, normalizedTransitionWidth,
+                                                   passbandAttenuationdB, stopbandAttenuationdB);
+}
+
+template <typename FloatType>
+Array<IIR::Coefficients<FloatType>>
+    FilterDesign<FloatType>::designIIRLowpassHighOrderChebyshev2Method (FloatType frequency, double sampleRate,
+                                                                        FloatType normalizedTransitionWidth,
+                                                                        FloatType passbandAttenuationdB,
+                                                                        FloatType stopbandAttenuationdB)
+{
+    return designIIRLowpassHighOrderGeneralMethod (2, frequency, sampleRate, normalizedTransitionWidth,
+                                                   passbandAttenuationdB, stopbandAttenuationdB);
+}
+
+template <typename FloatType>
+Array<IIR::Coefficients<FloatType>>
+    FilterDesign<FloatType>::designIIRLowpassHighOrderEllipticMethod (FloatType frequency, double sampleRate,
+                                                                      FloatType normalizedTransitionWidth,
+                                                                      FloatType passbandAttenuationdB,
+                                                                      FloatType stopbandAttenuationdB)
+{
+    return designIIRLowpassHighOrderGeneralMethod (3, frequency, sampleRate, normalizedTransitionWidth,
+                                                   passbandAttenuationdB, stopbandAttenuationdB);
+}
+
+template <typename FloatType>
+Array<IIR::Coefficients<FloatType>>
+    FilterDesign<FloatType>::designIIRLowpassHighOrderGeneralMethod (int type, FloatType frequency, double sampleRate,
+                                                                     FloatType normalizedTransitionWidth,
+                                                                     FloatType passbandAttenuationdB,
+                                                                     FloatType stopbandAttenuationdB)
+{
+    jassert (sampleRate > 0);
+    jassert (frequency > 0 && frequency <= sampleRate * 0.5);
+    jassert (normalizedTransitionWidth > 0 && normalizedTransitionWidth <= 0.5);
+    jassert (passbandAttenuationdB > -20 && passbandAttenuationdB < 0);
+    jassert (stopbandAttenuationdB > -300 && stopbandAttenuationdB < -20);
+
+    auto normalizedFrequency = frequency / sampleRate;
+
+    auto fp = normalizedFrequency - normalizedTransitionWidth / 2;
+    auto fs = normalizedFrequency + normalizedTransitionWidth / 2;
+
+    double Ap = passbandAttenuationdB;
+    double As = stopbandAttenuationdB;
+    auto Gp = Decibels::decibelsToGain (Ap, -300.0);
+    auto Gs = Decibels::decibelsToGain (As, -300.0);
+    auto epsp = std::sqrt (1.0 / (Gp * Gp) - 1.0);
+    auto epss = std::sqrt (1.0 / (Gs * Gs) - 1.0);
+
+    auto omegap = std::tan (MathConstants<double>::pi * fp);
+    auto omegas = std::tan (MathConstants<double>::pi * fs);
+    constexpr auto halfPi = MathConstants<double>::halfPi;
+
+    auto k = omegap / omegas;
+    auto k1 = epsp / epss;
+
+    int N;
+
+    if (type == 0)
+    {
+        N = roundToInt (std::ceil (std::log (1.0 / k1) / std::log (1.0 / k)));
+    }
+    else if (type == 1 || type == 2)
+    {
+        N = roundToInt (std::ceil (std::acosh (1.0 / k1) / std::acosh (1.0 / k)));
+    }
+    else
+    {
+        double K, Kp, K1, K1p;
+
+        SpecialFunctions::ellipticIntegralK (k, K, Kp);
+        SpecialFunctions::ellipticIntegralK (k1, K1, K1p);
+
+        N = roundToInt (std::ceil ((K1p * K) / (K1 * Kp)));
+    }
+
+    const int r = N % 2;
+    const int L = (N - r) / 2;
+    const double H0 = (type == 1 || type == 3) ? std::pow (Gp, 1.0 - r) : 1.0;
+
+    Array<Complex<double>> pa, za;
+    Complex<double> j (0, 1);
+
+    if (type == 0)
+    {
+        if (r == 1)
+            pa.add (-omegap * std::pow (epsp, -1.0 / (double) N));
+
+        for (int i = 1; i <= L; ++i)
+        {
+            auto ui = (2 * i - 1.0) / (double) N;
+            pa.add (omegap * std::pow (epsp, -1.0 / (double) N) * j * exp (ui * halfPi * j));
+        }
+    }
+    else if (type == 1)
+    {
+        auto v0 = std::asinh (1.0 / epsp) / (N * halfPi);
+
+        if (r == 1)
+            pa.add (-omegap * std::sinh (v0 * halfPi));
+
+        for (int i = 1; i <= L; ++i)
+        {
+            auto ui = (2 * i - 1.0) / (double) N;
+            pa.add (omegap * j * std::cos ((ui - j * v0) * halfPi));
+        }
+    }
+    else if (type == 2)
+    {
+        auto v0 = std::asinh (epss) / (N * halfPi);
+
+        if (r == 1)
+            pa.add(-1.0 / (k / omegap * std::sinh (v0 * halfPi)));
+
+        for (int i = 1; i <= L; ++i)
+        {
+            auto ui = (2 * i - 1.0) / (double) N;
+
+            pa.add (1.0 / (k / omegap * j * std::cos ((ui - j * v0) * halfPi)));
+            za.add (1.0 / (k / omegap * j * std::cos (ui * halfPi)));
+        }
+    }
+    else
+    {
+        auto v0 = -j * (SpecialFunctions::asne (j / epsp, k1) / (double) N);
+
+        if (r == 1)
+            pa.add (omegap * j * SpecialFunctions::sne (j * v0, k));
+
+        for (int i = 1; i <= L; ++i)
+        {
+            auto ui = (2 * i - 1.0) / (double) N;
+            auto zetai = SpecialFunctions::cde (ui, k);
+
+            pa.add (omegap * j * SpecialFunctions::cde (ui - j * v0, k));
+            za.add (omegap * j / (k * zetai));
+        }
+    }
+
+    Array<Complex<double>> p, z, g;
+
+    if (r == 1)
+    {
+        p.add ((1.0 + pa[0]) / (1.0 - pa[0]));
+        g.add (0.5 * (1.0 - p[0]));
+    }
+
+    for (int i = 0; i < L; ++i)
+    {
+        p.add ((1.0 + pa[i + r]) / (1.0 - pa[i + r]));
+        z.add (za.size() == 0 ? -1.0 : (1.0 + za[i]) / (1.0 - za[i]));
+        g.add ((1.0 - p[i + r]) / (1.0 - z[i]));
+    }
+
+    Array<IIR::Coefficients<FloatType>> cascadedCoefficients;
+
+    if (r == 1)
+    {
+        auto b0 = static_cast<FloatType> (H0 * std::real (g[0]));
+        auto b1 = b0;
+        auto a1 = static_cast<FloatType> (-std::real (p[0]));
+
+        cascadedCoefficients.add ({ b0, b1, 1.0f, a1 });
+    }
+
+    for (int i = 0; i < L; ++i)
+    {
+        auto gain = std::pow (std::abs (g[i + r]), 2.0);
+
+        auto b0 = static_cast<FloatType> (gain);
+        auto b1 = static_cast<FloatType> (std::real (-z[i] - std::conj (z[i])) * gain);
+        auto b2 = static_cast<FloatType> (std::real ( z[i] * std::conj (z[i])) * gain);
+
+        auto a1 = static_cast<FloatType> (std::real (-p[i+r] - std::conj (p[i + r])));
+        auto a2 = static_cast<FloatType> (std::real ( p[i+r] * std::conj (p[i + r])));
+
+        cascadedCoefficients.add ({ b0, b1, b2, 1, a1, a2 });
+    }
+
+    return cascadedCoefficients;
+}
+
+template <typename FloatType>
+Array<IIR::Coefficients<FloatType>>
+    FilterDesign<FloatType>::designIIRLowpassHighOrderButterworthMethod (FloatType frequency,
+                                                                         double sampleRate, int order)
+{
+    jassert (sampleRate > 0);
+    jassert (frequency > 0 && frequency <= sampleRate * 0.5);
+    jassert (order > 0);
+
+    Array<IIR::Coefficients<FloatType>> arrayFilters;
+
+    if (order % 2 == 1)
+    {
+        arrayFilters.add (*IIR::Coefficients<FloatType>::makeFirstOrderLowPass (sampleRate, frequency));
+
+        for (auto i = 0; i < order / 2; ++i)
+        {
+            auto Q = 1.0 / (2.0 * std::cos ((i + 1.0) * MathConstants<double>::pi / order));
+            arrayFilters.add (*IIR::Coefficients<FloatType>::makeLowPass (sampleRate, frequency,
+                                                                          static_cast<FloatType> (Q)));
+        }
+    }
+    else
+    {
+        for (auto i = 0; i < order / 2; ++i)
+        {
+            auto Q = 1.0 / (2.0 * std::cos ((2.0 * i + 1.0) * MathConstants<double>::pi / (order * 2.0)));
+            arrayFilters.add (*IIR::Coefficients<FloatType>::makeLowPass (sampleRate, frequency,
+                                                                          static_cast<FloatType> (Q)));
+        }
+    }
+
+    return arrayFilters;
+}
+
+template <typename FloatType>
+Array<IIR::Coefficients<FloatType>>
+    FilterDesign<FloatType>::designIIRHighpassHighOrderButterworthMethod (FloatType frequency,
+                                                                          double sampleRate, int order)
+{
+    jassert (sampleRate > 0);
+    jassert (frequency > 0 && frequency <= sampleRate * 0.5);
+    jassert (order > 0);
+
+    Array<IIR::Coefficients<FloatType>> arrayFilters;
+
+    if (order % 2 == 1)
+    {
+        arrayFilters.add (*IIR::Coefficients<FloatType>::makeFirstOrderHighPass (sampleRate, frequency));
+
+        for (auto i = 0; i < order / 2; ++i)
+        {
+            auto Q = 1.0 / (2.0 * std::cos ((i + 1.0) * MathConstants<double>::pi / order));
+            arrayFilters.add (*IIR::Coefficients<FloatType>::makeHighPass (sampleRate, frequency,
+                                                                           static_cast<FloatType> (Q)));
+        }
+    }
+    else
+    {
+        for (auto i = 0; i < order / 2; ++i)
+        {
+            auto Q = 1.0 / (2.0 * std::cos ((2.0 * i + 1.0) * MathConstants<double>::pi / (order * 2.0)));
+            arrayFilters.add (*IIR::Coefficients<FloatType>::makeHighPass (sampleRate, frequency,
+                                                                           static_cast<FloatType> (Q)));
+        }
+    }
+
+    return arrayFilters;
+}
+
+template <typename FloatType>
+typename FilterDesign<FloatType>::IIRPolyphaseAllpassStructure
+    FilterDesign<FloatType>::designIIRLowpassHalfBandPolyphaseAllpassMethod (FloatType normalizedTransitionWidth,
+                                                                             FloatType stopbandAttenuationdB)
+{
+    jassert (normalizedTransitionWidth > 0 && normalizedTransitionWidth <= 0.5);
+    jassert (stopbandAttenuationdB > -300 && stopbandAttenuationdB < -10);
+
+    const double wt = MathConstants<double>::twoPi * normalizedTransitionWidth;
+    const double ds = Decibels::decibelsToGain (stopbandAttenuationdB, static_cast<FloatType> (-300.0));
+
+    auto k = std::pow (std::tan ((MathConstants<double>::pi - wt) / 4), 2.0);
+    auto kp = std::sqrt (1.0 - k * k);
+    auto e = (1 - std::sqrt (kp)) / (1 + std::sqrt (kp)) * 0.5;
+    auto q = e + 2 * std::pow (e, 5.0) + 15 * std::pow (e, 9.0) + 150 * std::pow (e, 13.0);
+
+    auto k1 = ds * ds / (1 - ds * ds);
+    int n = roundToInt (std::ceil (std::log (k1 * k1 / 16) / std::log (q)));
+
+    if (n % 2 == 0)
+        ++n;
+
+    if (n == 1)
+        n = 3;
+
+    auto q1 = std::pow (q, (double) n);
+    k1 = 4 * std::sqrt (q1);
+
+    const int N = (n - 1) / 2;
+    Array<double> ai;
+
+    for (int i = 1; i <= N; ++i)
+    {
+        double num = 0.0;
+        double delta = 1.0;
+        int m = 0;
+
+        while (std::abs (delta) > 1e-100)
+        {
+            delta = std::pow (-1, m) * std::pow (q, m * (m + 1))
+                     * std::sin ((2 * m + 1) * MathConstants<double>::pi * i / (double) n);
+            num += delta;
+            m++;
+        }
+
+        num *= 2 * std::pow (q, 0.25);
+
+        double den = 0.0;
+        delta = 1.0;
+        m = 1;
+
+        while (std::abs (delta) > 1e-100)
+        {
+            delta = std::pow (-1, m) * std::pow (q, m * m)
+                     * std::cos (m * MathConstants<double>::twoPi * i / (double) n);
+            den += delta;
+            ++m;
+        }
+
+        den = 1 + 2 * den;
+
+        auto wi = num / den;
+        auto api = std::sqrt ((1 - wi * wi * k) * (1 - wi * wi / k)) / (1 + wi * wi);
+
+        ai.add ((1 - api) / (1 + api));
+    }
+
+    IIRPolyphaseAllpassStructure structure;
+
+    for (int i = 0; i < N; i += 2)
+        structure.directPath.add (IIR::Coefficients<FloatType> (static_cast<FloatType> (ai[i]),
+                                                                0, 1, 1, 0, static_cast<FloatType> (ai[i])));
+
+    structure.delayedPath.add (IIR::Coefficients<FloatType> (0, 1, 1, 0));
+
+    for (int i = 1; i < N; i += 2)
+        structure.delayedPath.add (IIR::Coefficients<FloatType> (static_cast<FloatType> (ai[i]),
+                                                                 0, 1, 1, 0, static_cast<FloatType> (ai[i])));
+
+    return structure;
+}
+
+
+template struct FilterDesign<float>;
+template struct FilterDesign<double>;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/filter_design/juce_FilterDesign.h
+++ b/modules/juce_dsp/filter_design/juce_FilterDesign.h
@ -0,0 +1,295 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    This class provides a set of functions which generates FIR::Coefficients
+    and IIR::Coefficients, of high-order low-pass filters. They can be used
+    for processing directly audio as an equalizer, in resampling algorithms etc.
+
+    see FIRFilter::Coefficients, FIRFilter, WindowingFunction, IIRFilter::Coefficients, IIRFilter
+
+    @tags{DSP}
+*/
+template <typename FloatType>
+struct FilterDesign
+{
+    using FIRCoefficientsPtr = typename FIR::Coefficients<FloatType>::Ptr;
+    using IIRCoefficients    = typename IIR::Coefficients<FloatType>;
+
+    using WindowingMethod    = typename WindowingFunction<FloatType>::WindowingMethod;
+
+    //==============================================================================
+    /** This method generates a FIR::Coefficients for a low-pass filter, using
+        the windowing design method, applied to a sinc impulse response. It is one
+        of the simplest method used to generate a high order low-pass filter, which
+        has the downside of needing more coefficients than more complex method to
+        perform a given attenuation in the stop band.
+
+        It generates linear phase filters coefficients.
+
+        Note : the flatTop WindowingMethod generates an impulse response with a
+        maximum amplitude higher than one, and might be normalized if necessary
+        depending on the applications.
+
+        @param frequency            the cutoff frequency of the low-pass filter
+        @param sampleRate           the sample rate being used in the filter design
+        @param order                the order of the filter
+        @param type                 the type, must be a WindowingFunction::WindowingType
+        @param beta                 an optional additional parameter useful for the Kaiser windowing function
+    */
+
+    static FIRCoefficientsPtr designFIRLowpassWindowMethod (FloatType frequency, double sampleRate,
+                                                            size_t order, WindowingMethod type,
+                                                            FloatType beta = static_cast<FloatType> (2));
+
+    /** This a variant of the function designFIRLowpassWindowMethod, which allows the
+        user to specify a transition width and an attenuation in dB,
+        to get a low-pass filter using the Kaiser windowing function, with calculated
+        values of the filter order and of the beta parameter, to satisfy the constraints.
+
+        It generates linear phase filters coefficients.
+
+        @param frequency                    the cutoff frequency of the low-pass filter
+        @param sampleRate                   the sample rate being used in the filter design
+        @param normalizedTransitionWidth    the normalized size between 0 and 0.5 of the transition
+                                            between the pass band and the stop band
+        @param attenuationdB                the attenuation in dB expected in the stop band
+    */
+
+    static FIRCoefficientsPtr designFIRLowpassKaiserMethod (FloatType frequency, double sampleRate,
+                                                            FloatType normalizedTransitionWidth,
+                                                            FloatType attenuationdB);
+
+
+    /** This method is also a variant of the function designFIRLowpassWindowMethod, using
+        a rectangular window as a basis, and a spline transition between the pass band and
+        the stop band, to reduce the Gibbs phenomenon.
+
+        It generates linear phase filters coefficients.
+
+        @param frequency                    the cutoff frequency of the low-pass filter
+        @param sampleRate                   the sample rate being used in the filter design
+        @param order                        the order of the filter
+        @param normalizedTransitionWidth    the normalized size between 0 and 0.5 of the transition
+                                            between the pass band and the stop band
+        @param spline                       between 1.0 and 4.0, indicates how much the transition
+                                            is curved, with 1.0 meaning a straight line
+    */
+    static FIRCoefficientsPtr designFIRLowpassTransitionMethod (FloatType frequency, double sampleRate,
+                                                                size_t order,
+                                                                FloatType normalizedTransitionWidth,
+                                                                FloatType spline);
+
+    /** This method generates a FIR::Coefficients for a low-pass filter, by
+        minimizing the average error between the generated filter and an ideal one
+        using the least squares error criterion and matrices operations.
+
+        It generates linear phase filters coefficients.
+
+        @param frequency                    the cutoff frequency of the low-pass filter
+        @param sampleRate                   the sample rate being used in the filter design
+        @param order                        the order of the filter
+        @param normalizedTransitionWidth    the normalized size between 0 and 0.5 of the transition
+                                            between the pass band and the stop band
+        @param stopBandWeight               between 1.0 and 100.0, indicates how much we want
+                                            attenuation in the stop band, against some oscillation
+                                            in the pass band
+    */
+    static FIRCoefficientsPtr designFIRLowpassLeastSquaresMethod (FloatType frequency, double sampleRate, size_t order,
+                                                                  FloatType normalizedTransitionWidth,
+                                                                  FloatType stopBandWeight);
+
+    /** This method generates a FIR::Coefficients for a low-pass filter, with
+        a cutoff frequency at half band, using an algorithm described in the article
+        "Design of Half-Band FIR Filters for Signal Compression" from Pavel
+        Zahradnik, to get an equiripple like high order FIR filter, without the need
+        of an iterative method and convergence failure risks.
+
+        It generates linear phase filters coefficients.
+
+        @param normalizedTransitionWidth    the normalized size between 0 and 0.5 of the transition
+                                            between the pass band and the stop band
+        @param attenuationdB                the attenuation in dB expected in the stop band
+    */
+    static FIRCoefficientsPtr designFIRLowpassHalfBandEquirippleMethod (FloatType normalizedTransitionWidth,
+                                                                        FloatType attenuationdB);
+
+    //==============================================================================
+    /** This method returns an array of IIR::Coefficients, made to be used in
+        cascaded IIRFilters, providing a minimum phase low-pass filter without any
+        ripple in the pass band and in the stop band.
+
+        The algorithms are based on "Lecture Notes on Elliptic Filter Design" by
+        Sophocles J. Orfanidis.
+
+        @param frequency                    the cutoff frequency of the low-pass filter
+        @param sampleRate                   the sample rate being used in the filter design
+        @param normalizedTransitionWidth    the normalized size between 0 and 0.5 of the transition
+                                            between the pass band and the stop band
+        @param passbandAttenuationdB        the lowest attenuation in dB expected in the pass band
+        @param stopbandAttenuationdB        the attenuation in dB expected in the stop band
+    */
+
+    static Array<IIRCoefficients> designIIRLowpassHighOrderButterworthMethod (FloatType frequency, double sampleRate,
+                                                                              FloatType normalizedTransitionWidth,
+                                                                              FloatType passbandAttenuationdB,
+                                                                              FloatType stopbandAttenuationdB);
+
+    //==============================================================================
+    /** This method returns an array of IIR::Coefficients, made to be used in
+        cascaded IIRFilters, providing a minimum phase low-pass filter without any
+        ripple in the pass band and in the stop band.
+
+        @param frequency                    the cutoff frequency of the low-pass filter
+        @param sampleRate                   the sample rate being used in the filter design
+        @param order                        the order of the resulting IIR filter, providing
+                                            an attenuation of -6 dB times order / octave
+    */
+
+    static Array<IIRCoefficients> designIIRLowpassHighOrderButterworthMethod (FloatType frequency, double sampleRate,
+                                                                              int order);
+
+    /** This method returns an array of IIR::Coefficients, made to be used in
+        cascaded IIRFilters, providing a minimum phase high-pass filter without any
+        ripple in the pass band and in the stop band.
+
+        @param frequency                    the cutoff frequency of the high-pass filter
+        @param sampleRate                   the sample rate being used in the filter design
+        @param order                        the order of the resulting IIR filter, providing
+                                            an attenuation of -6 dB times order / octave
+    */
+
+    static Array<IIRCoefficients> designIIRHighpassHighOrderButterworthMethod (FloatType frequency, double sampleRate,
+                                                                               int order);
+
+    /** This method returns an array of IIR::Coefficients, made to be used in
+        cascaded IIRFilters, providing a minimum phase low-pass filter without any
+        ripple in the stop band only.
+
+        The algorithms are based on "Lecture Notes on Elliptic Filter Design" by
+        Sophocles J. Orfanidis.
+
+        @param frequency                    the cutoff frequency of the low-pass filter
+        @param sampleRate                   the sample rate being used in the filter design
+        @param normalizedTransitionWidth    the normalized size between 0 and 0.5 of the transition
+                                            between the pass band and the stop band
+        @param passbandAttenuationdB        the lowest attenuation in dB expected in the pass band
+        @param stopbandAttenuationdB        the attenuation in dB expected in the stop band
+    */
+    static Array<IIRCoefficients> designIIRLowpassHighOrderChebyshev1Method (FloatType frequency, double sampleRate,
+                                                                             FloatType normalizedTransitionWidth,
+                                                                             FloatType passbandAttenuationdB,
+                                                                             FloatType stopbandAttenuationdB);
+
+    /** This method returns an array of IIR::Coefficients, made to be used in
+        cascaded IIRFilters, providing a minimum phase low-pass filter without any
+        ripple in the pass band only.
+
+        The algorithms are based on "Lecture Notes on Elliptic Filter Design" by
+        Sophocles J. Orfanidis.
+
+        @param frequency                    the cutoff frequency of the low-pass filter
+        @param sampleRate                   the sample rate being used in the filter design
+        @param normalizedTransitionWidth    the normalized size between 0 and 0.5 of the transition
+                                            between the pass band and the stop band
+        @param passbandAttenuationdB        the lowest attenuation in dB expected in the pass band
+        @param stopbandAttenuationdB        the attenuation in dB expected in the stop band
+    */
+    static Array<IIRCoefficients> designIIRLowpassHighOrderChebyshev2Method (FloatType frequency, double sampleRate,
+                                                                             FloatType normalizedTransitionWidth,
+                                                                             FloatType passbandAttenuationdB,
+                                                                             FloatType stopbandAttenuationdB);
+
+    /** This method returns an array of IIR::Coefficients, made to be used in
+        cascaded IIR::Filters, providing a minimum phase low-pass filter with ripples
+        in both the pass band and in the stop band.
+
+        The algorithms are based on "Lecture Notes on Elliptic Filter Design" by
+        Sophocles J. Orfanidis.
+
+        @param frequency                    the cutoff frequency of the low-pass filter
+        @param sampleRate                   the sample rate being used in the filter design
+        @param normalizedTransitionWidth    the normalized size between 0 and 0.5 of the transition
+                                            between the pass band and the stop band
+        @param passbandAttenuationdB        the lowest attenuation in dB expected in the pass band
+        @param stopbandAttenuationdB        the attenuation in dB expected in the stop band
+    */
+    static Array<IIRCoefficients> designIIRLowpassHighOrderEllipticMethod (FloatType frequency, double sampleRate,
+                                                                           FloatType normalizedTransitionWidth,
+                                                                           FloatType passbandAttenuationdB,
+                                                                           FloatType stopbandAttenuationdB);
+
+    /** The structure returned by the function designIIRLowpassHalfBandPolyphaseAllpassMethod.
+
+        The two members of this structure directPath and delayedPath are arrays of
+        IIR::Coefficients, made of polyphase second order allpass filters and an additional
+        delay in the second array, that can be used in cascaded filters processed in two
+        parallel paths, which must be summed at the end to get the high order efficient
+        low-pass filtering.
+    */
+    struct IIRPolyphaseAllpassStructure { Array<IIRCoefficients> directPath, delayedPath; };
+
+    /** This method generates arrays of IIR::Coefficients for a low-pass filter, with
+        a cutoff frequency at half band, using an algorithm described in the article
+        "Digital Signal Processing Schemes for efficient interpolation and decimation" from
+        Pavel Valenzuela and Constantinides.
+
+        The result is a IIRPolyphaseAllpassStructure object.
+
+        The two members of this structure directPath and delayedPath are arrays of
+        IIR::Coefficients, made of polyphase second order allpass filters and an additional
+        delay in the second array, that can be used in cascaded filters processed in two
+        parallel paths, which must be summed at the end to get the high order efficient
+        low-pass filtering.
+
+        The gain of the resulting pass-band is 6 dB, so don't forget to compensate it if you
+        want to use that method for something else than two times oversampling.
+
+        @param normalizedTransitionWidth    the normalized size between 0 and 0.5 of the transition
+                                            between the pass band and the stop band
+        @param stopbandAttenuationdB        the attenuation in dB expected in the stop band
+    */
+    static IIRPolyphaseAllpassStructure designIIRLowpassHalfBandPolyphaseAllpassMethod (FloatType normalizedTransitionWidth,
+                                                                                        FloatType stopbandAttenuationdB);
+
+private:
+    //==============================================================================
+    static Array<double> getPartialImpulseResponseHn (int n, double kp);
+
+    static Array<IIRCoefficients> designIIRLowpassHighOrderGeneralMethod (int type, FloatType frequency, double sampleRate,
+                                                                          FloatType normalizedTransitionWidth,
+                                                                          FloatType passbandAttenuationdB,
+                                                                          FloatType stopbandAttenuationdB);
+    FilterDesign() = delete;
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/frequency/juce_Convolution.cpp
+++ b/modules/juce_dsp/frequency/juce_Convolution.cpp
--- a/modules/juce_dsp/frequency/juce_Convolution.h
+++ b/modules/juce_dsp/frequency/juce_Convolution.h
@ -0,0 +1,174 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Performs stereo uniform-partitioned convolution of an input signal with an
+    impulse response in the frequency domain, using the juce FFT class.
+
+    It provides some thread-safe functions to load impulse responses as well,
+    from audio files or memory on the fly without any noticeable artefacts,
+    performing resampling and trimming if necessary.
+
+    The processing is equivalent to the time domain convolution done in the
+    class FIRFilter, with a FIRFilter::Coefficients object having as
+    coefficients the samples of the impulse response. However, it is more
+    efficient in general to do frequency domain convolution when the size of
+    the impulse response is higher than 64 samples.
+
+    @see FIRFilter, FIRFilter::Coefficients, FFT
+
+    @tags{DSP}
+*/
+class JUCE_API  Convolution
+{
+public:
+    //==============================================================================
+    /** Initialises an object for performing convolution in the frequency domain. */
+    Convolution();
+
+    /** Destructor. */
+    ~Convolution();
+
+    //==============================================================================
+    /** Must be called before loading any impulse response, to provide to the
+        convolution the maximumBufferSize to handle, and the sample rate useful for
+        optional resampling.
+    */
+    void prepare (const ProcessSpec&);
+
+    /** Resets the processing pipeline, ready to start a new stream of data. */
+    void reset() noexcept;
+
+    /** Performs the filter operation on the given set of samples, with optional
+        stereo processing.
+    */
+    template <typename ProcessContext>
+    void process (const ProcessContext& context) noexcept
+    {
+        static_assert (std::is_same<typename ProcessContext::SampleType, float>::value,
+                       "Convolution engine only supports single precision floating point data");
+
+        processSamples (context.getInputBlock(), context.getOutputBlock(), context.isBypassed);
+    }
+
+    //==============================================================================
+    /** This function loads an impulse response audio file from memory, added in a
+        JUCE project with the Projucer as binary data. It can load any of the audio
+        formats registered in JUCE, and performs some resampling and pre-processing
+        as well if needed.
+
+        Note : obviously, don't try to use this function on float samples, since the
+        data is supposed to be an audio file in its binary format, and be sure that
+        the original data is not going to move at all its memory location during the
+        process !!
+
+        @param sourceData               the block of data to use as the stream's source
+        @param sourceDataSize           the number of bytes in the source data block
+        @param wantsStereo              requests to process both stereo channels or only one mono channel
+        @param wantsTrimming            requests to trim the start and the end of the impulse response
+        @param size                     the expected size for the impulse response after loading, can be
+                                        set to 0 for requesting maximum original impulse response size
+        @param wantsNormalization       requests to normalize the impulse response amplitude
+    */
+    void loadImpulseResponse (const void* sourceData, size_t sourceDataSize,
+                              bool wantsStereo, bool wantsTrimming, size_t size,
+                              bool wantsNormalization = true);
+
+    /** This function loads an impulse response from an audio file on any drive. It
+        can load any of the audio formats registered in JUCE, and performs some
+        resampling and pre-processing as well if needed.
+
+        @param fileImpulseResponse      the location of the audio file
+        @param wantsStereo              requests to process both stereo channels or only one mono channel
+        @param wantsTrimming            requests to trim the start and the end of the impulse response
+        @param size                     the expected size for the impulse response after loading, can be
+                                        set to 0 for requesting maximum original impulse response size
+        @param wantsNormalization       requests to normalize the impulse response amplitude
+    */
+    void loadImpulseResponse (const File& fileImpulseResponse,
+                              bool wantsStereo, bool wantsTrimming, size_t size,
+                              bool wantsNormalization = true);
+
+    /** This function loads an impulse response from an audio buffer, which is
+        copied before doing anything else. Performs some resampling and
+        pre-processing as well if needed.
+
+        @param buffer                   the AudioBuffer to use
+        @param bufferSampleRate         the sampleRate of the data in the AudioBuffer
+        @param wantsStereo              requests to process both stereo channels or only one mono channel
+        @param wantsTrimming            requests to trim the start and the end of the impulse response
+        @param wantsNormalization       requests to normalize the impulse response amplitude
+        @param size                     the expected size for the impulse response after loading, can be
+                                        set to 0 for requesting maximum original impulse response size
+    */
+    void copyAndLoadImpulseResponseFromBuffer (AudioBuffer<float>& buffer, double bufferSampleRate,
+                                               bool wantsStereo, bool wantsTrimming, bool wantsNormalization,
+                                               size_t size);
+
+    /** This function loads an impulse response from an audio block, which is
+        copied before doing anything else. Performs some resampling and
+        pre-processing as well if needed.
+
+        @param block                    the AudioBlock to use
+        @param bufferSampleRate         the sampleRate of the data in the AudioBuffer
+        @param wantsStereo              requests to process both stereo channels or only one channel
+        @param wantsTrimming            requests to trim the start and the end of the impulse response
+        @param wantsNormalization       requests to normalize the impulse response amplitude
+        @param size                     the expected size for the impulse response after loading,
+                                        -1 for maximum length
+    */
+    void copyAndLoadImpulseResponseFromBlock (AudioBlock<float> block, double bufferSampleRate,
+                                              bool wantsStereo, bool wantsTrimming, bool wantsNormalization,
+                                              size_t size);
+
+
+private:
+    //==============================================================================
+    struct Pimpl;
+    std::unique_ptr<Pimpl> pimpl;
+
+    //==============================================================================
+    void processSamples (const AudioBlock<float>&, AudioBlock<float>&, bool isBypassed) noexcept;
+
+    //==============================================================================
+    double sampleRate;
+    bool currentIsBypassed = false;
+    bool isActive = false;
+    LinearSmoothedValue<float> volumeDry[2], volumeWet[2];
+    AudioBlock<float> dryBuffer;
+    HeapBlock<char> dryBufferStorage;
+
+    //==============================================================================
+    JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (Convolution)
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/frequency/juce_FFT.cpp
+++ b/modules/juce_dsp/frequency/juce_FFT.cpp
@ -0,0 +1,837 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+struct FFT::Instance
+{
+    virtual ~Instance() {}
+    virtual void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept = 0;
+    virtual void performRealOnlyForwardTransform (float*, bool) const noexcept = 0;
+    virtual void performRealOnlyInverseTransform (float*) const noexcept = 0;
+};
+
+struct FFT::Engine
+{
+    Engine (int priorityToUse) : enginePriority (priorityToUse)
+    {
+        auto& list = getEngines();
+        list.add (this);
+        std::sort (list.begin(), list.end(), [] (Engine* a, Engine* b) { return b->enginePriority < a->enginePriority; });
+    }
+
+    virtual ~Engine() {}
+
+    virtual FFT::Instance* create (int order) const = 0;
+
+    //==============================================================================
+    static FFT::Instance* createBestEngineForPlatform (int order)
+    {
+        for (auto* engine : getEngines())
+            if (auto* instance = engine->create (order))
+                return instance;
+
+        jassertfalse;  // This should never happen as the fallback engine should always work!
+        return nullptr;
+    }
+
+private:
+    static Array<Engine*>& getEngines()
+    {
+        static Array<Engine*> engines;
+        return engines;
+    }
+
+    int enginePriority; // used so that faster engines have priority over slower ones
+};
+
+template <typename InstanceToUse>
+struct FFT::EngineImpl  : public FFT::Engine
+{
+    EngineImpl() : FFT::Engine (InstanceToUse::priority)        {}
+    FFT::Instance* create (int order) const override            { return InstanceToUse::create (order); }
+};
+
+//==============================================================================
+//==============================================================================
+struct FFTFallback  : public FFT::Instance
+{
+    // this should have the least priority of all engines
+    static constexpr int priority = -1;
+
+    static FFTFallback* create (int order)
+    {
+        return new FFTFallback (order);
+    }
+
+    FFTFallback (int order)
+    {
+        configForward.reset (new FFTConfig (1 << order, false));
+        configInverse.reset (new FFTConfig (1 << order, true));
+
+        size = 1 << order;
+    }
+
+    void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
+    {
+        if (size == 1)
+        {
+            *output = *input;
+            return;
+        }
+
+        const SpinLock::ScopedLockType sl(processLock);
+
+        jassert (configForward != nullptr);
+
+        if (inverse)
+        {
+            configInverse->perform (input, output);
+
+            const float scaleFactor = 1.0f / size;
+
+            for (int i = 0; i < size; ++i)
+                output[i] *= scaleFactor;
+        }
+        else
+        {
+            configForward->perform (input, output);
+        }
+    }
+
+    const size_t maxFFTScratchSpaceToAlloca = 256 * 1024;
+
+    void performRealOnlyForwardTransform (float* d, bool) const noexcept override
+    {
+        if (size == 1)
+            return;
+
+        const size_t scratchSize = 16 + sizeof (Complex<float>) * (size_t) size;
+
+        if (scratchSize < maxFFTScratchSpaceToAlloca)
+        {
+            performRealOnlyForwardTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
+        }
+        else
+        {
+            HeapBlock<char> heapSpace (scratchSize);
+            performRealOnlyForwardTransform (reinterpret_cast<Complex<float>*> (heapSpace.getData()), d);
+        }
+    }
+
+    void performRealOnlyInverseTransform (float* d) const noexcept override
+    {
+        if (size == 1)
+            return;
+
+        const size_t scratchSize = 16 + sizeof (Complex<float>) * (size_t) size;
+
+        if (scratchSize < maxFFTScratchSpaceToAlloca)
+        {
+            performRealOnlyInverseTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
+        }
+        else
+        {
+            HeapBlock<char> heapSpace (scratchSize);
+            performRealOnlyInverseTransform (reinterpret_cast<Complex<float>*> (heapSpace.getData()), d);
+        }
+    }
+
+    void performRealOnlyForwardTransform (Complex<float>* scratch, float* d) const noexcept
+    {
+        for (int i = 0; i < size; ++i)
+            scratch[i] = { d[i], 0 };
+
+        perform (scratch, reinterpret_cast<Complex<float>*> (d), false);
+    }
+
+    void performRealOnlyInverseTransform (Complex<float>* scratch, float* d) const noexcept
+    {
+        auto* input = reinterpret_cast<Complex<float>*> (d);
+
+        for (auto i = size >> 1; i < size; ++i)
+            input[i] = std::conj (input[size - i]);
+
+        perform (input, scratch, true);
+
+        for (int i = 0; i < size; ++i)
+        {
+            d[i] = scratch[i].real();
+            d[i + size] = scratch[i].imag();
+        }
+    }
+
+    //==============================================================================
+    struct FFTConfig
+    {
+        FFTConfig (int sizeOfFFT, bool isInverse)
+            : fftSize (sizeOfFFT), inverse (isInverse), twiddleTable ((size_t) sizeOfFFT)
+        {
+            auto inverseFactor = (inverse ? 2.0 : -2.0) * MathConstants<double>::pi / (double) fftSize;
+
+            if (fftSize <= 4)
+            {
+                for (int i = 0; i < fftSize; ++i)
+                {
+                    auto phase = i * inverseFactor;
+
+                    twiddleTable[i] = { (float) std::cos (phase),
+                                        (float) std::sin (phase) };
+                }
+            }
+            else
+            {
+                for (int i = 0; i < fftSize / 4; ++i)
+                {
+                    auto phase = i * inverseFactor;
+
+                    twiddleTable[i] = { (float) std::cos (phase),
+                                        (float) std::sin (phase) };
+                }
+
+                for (int i = fftSize / 4; i < fftSize / 2; ++i)
+                {
+                    auto other = twiddleTable[i - fftSize / 4];
+
+                    twiddleTable[i] = { inverse ? -other.imag() :  other.imag(),
+                                        inverse ?  other.real() : -other.real() };
+                }
+
+                twiddleTable[fftSize / 2].real (-1.0f);
+                twiddleTable[fftSize / 2].imag (0.0f);
+
+                for (int i = fftSize / 2; i < fftSize; ++i)
+                {
+                    auto index = fftSize / 2 - (i - fftSize / 2);
+                    twiddleTable[i] = conj(twiddleTable[index]);
+                }
+            }
+
+            auto root = (int) std::sqrt ((double) fftSize);
+            int divisor = 4, n = fftSize;
+
+            for (int i = 0; i < numElementsInArray (factors); ++i)
+            {
+                while ((n % divisor) != 0)
+                {
+                    if (divisor == 2)       divisor = 3;
+                    else if (divisor == 4)  divisor = 2;
+                    else                    divisor += 2;
+
+                    if (divisor > root)
+                        divisor = n;
+                }
+
+                n /= divisor;
+
+                jassert (divisor == 1 || divisor == 2 || divisor == 4);
+                factors[i].radix = divisor;
+                factors[i].length = n;
+            }
+        }
+
+        void perform (const Complex<float>* input, Complex<float>* output) const noexcept
+        {
+            perform (input, output, 1, 1, factors);
+        }
+
+        const int fftSize;
+        const bool inverse;
+
+        struct Factor { int radix, length; };
+        Factor factors[32];
+        HeapBlock<Complex<float>> twiddleTable;
+
+        void perform (const Complex<float>* input, Complex<float>* output, int stride, int strideIn, const Factor* facs) const noexcept
+        {
+            auto factor = *facs++;
+            auto* originalOutput = output;
+            auto* outputEnd = output + factor.radix * factor.length;
+
+            if (stride == 1 && factor.radix <= 5)
+            {
+                for (int i = 0; i < factor.radix; ++i)
+                    perform (input + stride * strideIn * i, output + i * factor.length, stride * factor.radix, strideIn, facs);
+
+                butterfly (factor, output, stride);
+                return;
+            }
+
+            if (factor.length == 1)
+            {
+                do
+                {
+                    *output++ = *input;
+                    input += stride * strideIn;
+                }
+                while (output < outputEnd);
+            }
+            else
+            {
+                do
+                {
+                    perform (input, output, stride * factor.radix, strideIn, facs);
+                    input += stride * strideIn;
+                    output += factor.length;
+                }
+                while (output < outputEnd);
+            }
+
+            butterfly (factor, originalOutput, stride);
+        }
+
+        void butterfly (const Factor factor, Complex<float>* data, int stride) const noexcept
+        {
+            switch (factor.radix)
+            {
+                case 1:   break;
+                case 2:   butterfly2 (data, stride, factor.length); return;
+                case 4:   butterfly4 (data, stride, factor.length); return;
+                default:  jassertfalse; break;
+            }
+
+            auto* scratch = static_cast<Complex<float>*> (alloca (sizeof (Complex<float>) * (size_t) factor.radix));
+
+            for (int i = 0; i < factor.length; ++i)
+            {
+                for (int k = i, q1 = 0; q1 < factor.radix; ++q1)
+                {
+                    scratch[q1] = data[k];
+                    k += factor.length;
+                }
+
+                for (int k = i, q1 = 0; q1 < factor.radix; ++q1)
+                {
+                    int twiddleIndex = 0;
+                    data[k] = scratch[0];
+
+                    for (int q = 1; q < factor.radix; ++q)
+                    {
+                        twiddleIndex += stride * k;
+
+                        if (twiddleIndex >= fftSize)
+                            twiddleIndex -= fftSize;
+
+                        data[k] += scratch[q] * twiddleTable[twiddleIndex];
+                    }
+
+                    k += factor.length;
+                }
+            }
+        }
+
+        void butterfly2 (Complex<float>* data, const int stride, const int length) const noexcept
+        {
+            auto* dataEnd = data + length;
+            auto* tw = twiddleTable.getData();
+
+            for (int i = length; --i >= 0;)
+            {
+                auto s = *dataEnd;
+                s *= (*tw);
+                tw += stride;
+                *dataEnd++ = *data - s;
+                *data++ += s;
+            }
+        }
+
+        void butterfly4 (Complex<float>* data, const int stride, const int length) const noexcept
+        {
+            auto lengthX2 = length * 2;
+            auto lengthX3 = length * 3;
+
+            auto strideX2 = stride * 2;
+            auto strideX3 = stride * 3;
+
+            auto* twiddle1 = twiddleTable.getData();
+            auto* twiddle2 = twiddle1;
+            auto* twiddle3 = twiddle1;
+
+            for (int i = length; --i >= 0;)
+            {
+                auto s0 = data[length]   * *twiddle1;
+                auto s1 = data[lengthX2] * *twiddle2;
+                auto s2 = data[lengthX3] * *twiddle3;
+                auto s3 = s0;             s3 += s2;
+                auto s4 = s0;             s4 -= s2;
+                auto s5 = *data;          s5 -= s1;
+
+                *data += s1;
+                data[lengthX2] = *data;
+                data[lengthX2] -= s3;
+                twiddle1 += stride;
+                twiddle2 += strideX2;
+                twiddle3 += strideX3;
+                *data += s3;
+
+                if (inverse)
+                {
+                    data[length] = { s5.real() - s4.imag(),
+                                     s5.imag() + s4.real() };
+
+                    data[lengthX3] = { s5.real() + s4.imag(),
+                                       s5.imag() - s4.real() };
+                }
+                else
+                {
+                    data[length] = { s5.real() + s4.imag(),
+                                     s5.imag() - s4.real() };
+
+                    data[lengthX3] = { s5.real() - s4.imag(),
+                                       s5.imag() + s4.real() };
+                }
+
+                ++data;
+            }
+        }
+
+        JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (FFTConfig)
+    };
+
+    //==============================================================================
+    SpinLock processLock;
+    std::unique_ptr<FFTConfig> configForward, configInverse;
+    int size;
+};
+
+FFT::EngineImpl<FFTFallback> fftFallback;
+
+//==============================================================================
+//==============================================================================
+#if (JUCE_MAC || JUCE_IOS) && JUCE_USE_VDSP_FRAMEWORK
+struct AppleFFT  : public FFT::Instance
+{
+    static constexpr int priority = 5;
+
+    static AppleFFT* create (int order)
+    {
+        return new AppleFFT (order);
+    }
+
+    AppleFFT (int orderToUse)
+        : order (static_cast<vDSP_Length> (orderToUse)),
+          fftSetup (vDSP_create_fftsetup (order, 2)),
+          forwardNormalisation (0.5f),
+          inverseNormalisation (1.0f / static_cast<float> (1 << order))
+    {}
+
+    ~AppleFFT() override
+    {
+        if (fftSetup != nullptr)
+        {
+            vDSP_destroy_fftsetup (fftSetup);
+            fftSetup = nullptr;
+        }
+    }
+
+    void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
+    {
+        auto size = (1 << order);
+
+        DSPSplitComplex splitInput  (toSplitComplex (const_cast<Complex<float>*> (input)));
+        DSPSplitComplex splitOutput (toSplitComplex (output));
+
+        vDSP_fft_zop (fftSetup, &splitInput,  2, &splitOutput, 2,
+                      order, inverse ?  kFFTDirection_Inverse : kFFTDirection_Forward);
+
+        float factor = (inverse ? inverseNormalisation : forwardNormalisation * 2.0f);
+        vDSP_vsmul ((float*) output, 1, &factor, (float*) output, 1, static_cast<size_t> (size << 1));
+    }
+
+    void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
+    {
+        auto size = (1 << order);
+        auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
+        auto splitInOut (toSplitComplex (inout));
+
+        inoutData[size] = 0.0f;
+        vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Forward);
+        vDSP_vsmul (inoutData, 1, &forwardNormalisation, inoutData, 1, static_cast<size_t> (size << 1));
+
+        mirrorResult (inout, ignoreNegativeFreqs);
+    }
+
+    void performRealOnlyInverseTransform (float* inoutData) const noexcept override
+    {
+        auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
+        auto size = (1 << order);
+        auto splitInOut (toSplitComplex (inout));
+
+        // Imaginary part of nyquist and DC frequencies are always zero
+        // so Apple uses the imaginary part of the DC frequency to store
+        // the real part of the nyquist frequency
+        if (size != 1)
+            inout[0] = Complex<float> (inout[0].real(), inout[size >> 1].real());
+
+        vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Inverse);
+        vDSP_vsmul (inoutData, 1, &inverseNormalisation, inoutData, 1, static_cast<size_t> (size << 1));
+        vDSP_vclr (inoutData + size, 1, static_cast<size_t> (size));
+    }
+
+private:
+    //==============================================================================
+    void mirrorResult (Complex<float>* out, bool ignoreNegativeFreqs) const noexcept
+    {
+        auto size = (1 << order);
+        auto i = size >> 1;
+
+        // Imaginary part of nyquist and DC frequencies are always zero
+        // so Apple uses the imaginary part of the DC frequency to store
+        // the real part of the nyquist frequency
+        out[i++] = { out[0].imag(), 0.0 };
+        out[0]   = { out[0].real(), 0.0 };
+
+        if (! ignoreNegativeFreqs)
+            for (; i < size; ++i)
+                out[i] = std::conj (out[size - i]);
+    }
+
+    static DSPSplitComplex toSplitComplex (Complex<float>* data) noexcept
+    {
+        // this assumes that Complex interleaves real and imaginary parts
+        // and is tightly packed.
+        return { reinterpret_cast<float*> (data),
+                 reinterpret_cast<float*> (data) + 1};
+    }
+
+    //==============================================================================
+    vDSP_Length order;
+    FFTSetup fftSetup;
+    float forwardNormalisation, inverseNormalisation;
+};
+
+FFT::EngineImpl<AppleFFT> appleFFT;
+#endif
+
+//==============================================================================
+//==============================================================================
+#if JUCE_DSP_USE_SHARED_FFTW || JUCE_DSP_USE_STATIC_FFTW
+
+#if JUCE_DSP_USE_STATIC_FFTW
+extern "C"
+{
+    void* fftwf_plan_dft_1d     (int, void*, void*, int, int);
+    void* fftwf_plan_dft_r2c_1d (int, void*, void*, int);
+    void* fftwf_plan_dft_c2r_1d (int, void*, void*, int);
+    void fftwf_destroy_plan     (void*);
+    void fftwf_execute_dft      (void*, void*, void*);
+    void fftwf_execute_dft_r2c  (void*, void*, void*);
+    void fftwf_execute_dft_c2r  (void*, void*, void*);
+}
+#endif
+
+struct FFTWImpl  : public FFT::Instance
+{
+   #if JUCE_DSP_USE_STATIC_FFTW
+    // if the JUCE developer has gone through the hassle of statically
+    // linking in fftw, they probably want to use it
+    static constexpr int priority = 10;
+   #else
+    static constexpr int priority = 3;
+   #endif
+
+    struct FFTWPlan;
+    using FFTWPlanRef = FFTWPlan*;
+
+    enum
+    {
+        measure   = 0,
+        unaligned = (1 << 1),
+        estimate  = (1 << 6)
+    };
+
+    struct Symbols
+    {
+        FFTWPlanRef (*plan_dft_fftw) (unsigned, Complex<float>*, Complex<float>*, int, unsigned);
+        FFTWPlanRef (*plan_r2c_fftw) (unsigned, float*, Complex<float>*, unsigned);
+        FFTWPlanRef (*plan_c2r_fftw) (unsigned, Complex<float>*, float*, unsigned);
+        void (*destroy_fftw) (FFTWPlanRef);
+
+        void (*execute_dft_fftw) (FFTWPlanRef, const Complex<float>*, Complex<float>*);
+        void (*execute_r2c_fftw) (FFTWPlanRef, float*, Complex<float>*);
+        void (*execute_c2r_fftw) (FFTWPlanRef, Complex<float>*, float*);
+
+       #if JUCE_DSP_USE_STATIC_FFTW
+        template <typename FuncPtr, typename ActualSymbolType>
+        static bool symbol (FuncPtr& dst, ActualSymbolType sym)
+        {
+            dst = reinterpret_cast<FuncPtr> (sym);
+            return true;
+        }
+       #else
+        template <typename FuncPtr>
+        static bool symbol (DynamicLibrary& lib, FuncPtr& dst, const char* name)
+        {
+            dst = reinterpret_cast<FuncPtr> (lib.getFunction (name));
+            return (dst != nullptr);
+        }
+       #endif
+    };
+
+    static FFTWImpl* create (int order)
+    {
+        DynamicLibrary lib;
+
+      #if ! JUCE_DSP_USE_STATIC_FFTW
+       #if JUCE_MAC
+        auto libName = "libfftw3f.dylib";
+       #elif JUCE_WINDOWS
+        auto libName = "libfftw3f.dll";
+       #else
+        auto libName = "libfftw3f.so";
+       #endif
+
+        if (lib.open (libName))
+      #endif
+        {
+            Symbols symbols;
+
+           #if JUCE_DSP_USE_STATIC_FFTW
+            if (! Symbols::symbol (symbols.plan_dft_fftw, fftwf_plan_dft_1d))     return nullptr;
+            if (! Symbols::symbol (symbols.plan_r2c_fftw, fftwf_plan_dft_r2c_1d)) return nullptr;
+            if (! Symbols::symbol (symbols.plan_c2r_fftw, fftwf_plan_dft_c2r_1d)) return nullptr;
+            if (! Symbols::symbol (symbols.destroy_fftw,  fftwf_destroy_plan))    return nullptr;
+
+            if (! Symbols::symbol (symbols.execute_dft_fftw, fftwf_execute_dft))     return nullptr;
+            if (! Symbols::symbol (symbols.execute_r2c_fftw, fftwf_execute_dft_r2c)) return nullptr;
+            if (! Symbols::symbol (symbols.execute_c2r_fftw, fftwf_execute_dft_c2r)) return nullptr;
+           #else
+            if (! Symbols::symbol (lib, symbols.plan_dft_fftw, "fftwf_plan_dft_1d"))     return nullptr;
+            if (! Symbols::symbol (lib, symbols.plan_r2c_fftw, "fftwf_plan_dft_r2c_1d")) return nullptr;
+            if (! Symbols::symbol (lib, symbols.plan_c2r_fftw, "fftwf_plan_dft_c2r_1d")) return nullptr;
+            if (! Symbols::symbol (lib, symbols.destroy_fftw,  "fftwf_destroy_plan"))    return nullptr;
+
+            if (! Symbols::symbol (lib, symbols.execute_dft_fftw, "fftwf_execute_dft"))     return nullptr;
+            if (! Symbols::symbol (lib, symbols.execute_r2c_fftw, "fftwf_execute_dft_r2c")) return nullptr;
+            if (! Symbols::symbol (lib, symbols.execute_c2r_fftw, "fftwf_execute_dft_c2r")) return nullptr;
+           #endif
+
+            return new FFTWImpl (static_cast<size_t> (order), static_cast<DynamicLibrary&&> (lib), symbols);
+        }
+
+        return nullptr;
+    }
+
+    FFTWImpl (size_t orderToUse, DynamicLibrary&& libraryToUse, const Symbols& symbols)
+        : fftwLibrary (std::move (libraryToUse)), fftw (symbols), order (static_cast<size_t> (orderToUse))
+    {
+        auto n = (1u << order);
+        HeapBlock<Complex<float>> in (n), out (n);
+
+        c2cForward = fftw.plan_dft_fftw (n, in.getData(), out.getData(), -1, unaligned | estimate);
+        c2cInverse = fftw.plan_dft_fftw (n, in.getData(), out.getData(), +1, unaligned | estimate);
+
+        r2c = fftw.plan_r2c_fftw (n, (float*) in.getData(), in.getData(), unaligned | estimate);
+        c2r = fftw.plan_c2r_fftw (n, in.getData(), (float*) in.getData(), unaligned | estimate);
+    }
+
+    ~FFTWImpl() override
+    {
+        fftw.destroy_fftw (c2cForward);
+        fftw.destroy_fftw (c2cInverse);
+        fftw.destroy_fftw (r2c);
+        fftw.destroy_fftw (c2r);
+    }
+
+    void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
+    {
+        if (inverse)
+        {
+            auto n = (1u << order);
+            fftw.execute_dft_fftw (c2cInverse, input, output);
+            FloatVectorOperations::multiply ((float*) output, 1.0f / static_cast<float> (n), (int) n << 1);
+        }
+        else
+        {
+            fftw.execute_dft_fftw (c2cForward, input, output);
+        }
+    }
+
+    void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
+    {
+        if (order == 0)
+            return;
+
+        auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
+
+        fftw.execute_r2c_fftw (r2c, inputOutputData, out);
+
+        auto size = (1 << order);
+
+        if (! ignoreNegativeFreqs)
+            for (auto i = size >> 1; i < size; ++i)
+                out[i] = std::conj (out[size - i]);
+    }
+
+    void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
+    {
+        auto n = (1u << order);
+
+        fftw.execute_c2r_fftw (c2r, (Complex<float>*) inputOutputData, inputOutputData);
+        FloatVectorOperations::multiply ((float*) inputOutputData, 1.0f / static_cast<float> (n), (int) n);
+    }
+
+    //==============================================================================
+    DynamicLibrary fftwLibrary;
+    Symbols fftw;
+    size_t order;
+
+    FFTWPlanRef c2cForward, c2cInverse, r2c, c2r;
+};
+
+FFT::EngineImpl<FFTWImpl> fftwEngine;
+#endif
+
+//==============================================================================
+//==============================================================================
+#if JUCE_DSP_USE_INTEL_MKL
+struct IntelFFT  : public FFT::Instance
+{
+    static constexpr int priority = 8;
+
+    static bool succeeded (MKL_LONG status) noexcept        { return status == 0; }
+
+    static IntelFFT* create (int orderToUse)
+    {
+        DFTI_DESCRIPTOR_HANDLE mklc2c, mklc2r;
+
+        if (DftiCreateDescriptor (&mklc2c, DFTI_SINGLE, DFTI_COMPLEX, 1, 1 << orderToUse) == 0)
+        {
+            if (succeeded (DftiSetValue (mklc2c, DFTI_PLACEMENT, DFTI_NOT_INPLACE))
+                 && succeeded (DftiSetValue (mklc2c, DFTI_BACKWARD_SCALE, 1.0f / static_cast<float> (1 << orderToUse)))
+                 && succeeded (DftiCommitDescriptor (mklc2c)))
+            {
+                if (succeeded (DftiCreateDescriptor (&mklc2r, DFTI_SINGLE, DFTI_REAL, 1, 1 << orderToUse)))
+                {
+                    if (succeeded (DftiSetValue (mklc2r, DFTI_PLACEMENT, DFTI_INPLACE))
+                         && succeeded (DftiSetValue (mklc2r, DFTI_BACKWARD_SCALE, 1.0f / static_cast<float> (1 << orderToUse)))
+                         && succeeded (DftiCommitDescriptor (mklc2r)))
+                    {
+                        return new IntelFFT (static_cast<size_t> (orderToUse), mklc2c, mklc2r);
+                    }
+
+                    DftiFreeDescriptor (&mklc2r);
+                }
+            }
+
+            DftiFreeDescriptor (&mklc2c);
+        }
+
+        return {};
+    }
+
+    IntelFFT (size_t orderToUse, DFTI_DESCRIPTOR_HANDLE c2cToUse, DFTI_DESCRIPTOR_HANDLE cr2ToUse)
+        : order (orderToUse), c2c (c2cToUse), c2r (cr2ToUse)
+    {}
+
+    ~IntelFFT()
+    {
+        DftiFreeDescriptor (&c2c);
+        DftiFreeDescriptor (&c2r);
+    }
+
+    void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
+    {
+        if (inverse)
+            DftiComputeBackward (c2c, (void*) input, output);
+        else
+            DftiComputeForward (c2c, (void*) input, output);
+    }
+
+    void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
+    {
+        if (order == 0)
+            return;
+
+        DftiComputeForward (c2r, inputOutputData);
+
+        auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
+        auto size = (1 << order);
+
+        if (! ignoreNegativeFreqs)
+            for (auto i = size >> 1; i < size; ++i)
+                out[i] = std::conj (out[size - i]);
+    }
+
+    void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
+    {
+        DftiComputeBackward (c2r, inputOutputData);
+    }
+
+    size_t order;
+    DFTI_DESCRIPTOR_HANDLE c2c, c2r;
+};
+
+FFT::EngineImpl<IntelFFT> fftwEngine;
+#endif
+
+//==============================================================================
+//==============================================================================
+FFT::FFT (int order)
+    : engine (FFT::Engine::createBestEngineForPlatform (order)),
+      size (1 << order)
+{
+}
+
+FFT::~FFT() {}
+
+void FFT::perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept
+{
+    if (engine != nullptr)
+        engine->perform (input, output, inverse);
+}
+
+void FFT::performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNeagtiveFreqs) const noexcept
+{
+    if (engine != nullptr)
+        engine->performRealOnlyForwardTransform (inputOutputData, ignoreNeagtiveFreqs);
+}
+
+void FFT::performRealOnlyInverseTransform (float* inputOutputData) const noexcept
+{
+    if (engine != nullptr)
+        engine->performRealOnlyInverseTransform (inputOutputData);
+}
+
+void FFT::performFrequencyOnlyForwardTransform (float* inputOutputData) const noexcept
+{
+    if (size == 1)
+        return;
+
+    performRealOnlyForwardTransform (inputOutputData);
+    auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
+
+    for (auto i = 0; i < size; ++i)
+        inputOutputData[i] = std::abs (out[i]);
+
+    zeromem (&inputOutputData[size], sizeof (float) * static_cast<size_t> (size));
+}
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/frequency/juce_FFT.h
+++ b/modules/juce_dsp/frequency/juce_FFT.h
@ -0,0 +1,122 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Performs a fast fourier transform.
+
+    This is only a simple low-footprint implementation and isn't tuned for speed - it may
+    be useful for simple applications where one of the more complex FFT libraries would be
+    overkill. (But in the future it may end up becoming optimised of course...)
+
+    The FFT class itself contains lookup tables, so there's some overhead in creating
+    one, you should create and cache an FFT object for each size/direction of transform
+    that you need, and re-use them to perform the actual operation.
+
+    @tags{DSP}
+*/
+class JUCE_API  FFT
+{
+public:
+    //==============================================================================
+    /** Initialises an object for performing forward and inverse FFT with the given size.
+        The number of points the FFT will operate on will be 2 ^ order.
+    */
+    FFT (int order);
+
+    /** Destructor. */
+    ~FFT();
+
+    //==============================================================================
+    /** Performs an out-of-place FFT, either forward or inverse.
+        The arrays must contain at least getSize() elements.
+    */
+    void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept;
+
+    /** Performs an in-place forward transform on a block of real data.
+
+        As the coefficients of the negative frequences (frequencies higher than
+        N/2 or pi) are the complex conjugate of their positive counterparts,
+        it may not be necessary to calculate them for your particular application.
+        You can use dontCalculateNegativeFrequencies to let the FFT
+        engine know that you do not plan on using them. Note that this is only a
+        hint: some FFT engines (currently only the Fallback engine), will still
+        calculate the negative frequencies even if dontCalculateNegativeFrequencies
+        is true.
+
+        The size of the array passed in must be 2 * getSize(), and the first half
+        should contain your raw input sample data. On return, if
+        dontCalculateNegativeFrequencies is false, the array will contain size
+        complex real + imaginary parts data interleaved. If
+        dontCalculateNegativeFrequencies is true, the array will contain at least
+        (size / 2) + 1 complex numbers. Both outputs can be passed to
+        performRealOnlyInverseTransform() in order to convert it back to reals.
+    */
+    void performRealOnlyForwardTransform (float* inputOutputData,
+                                          bool dontCalculateNegativeFrequencies = false) const noexcept;
+
+    /** Performs a reverse operation to data created in performRealOnlyForwardTransform().
+
+        Although performRealOnlyInverseTransform will only use the first ((size / 2) + 1)
+        complex numbers, the size of the array passed in must still be 2 * getSize(), as some
+        FFT engines require the extra space for the calculation. On return, the first half of the
+        array will contain the reconstituted samples.
+    */
+    void performRealOnlyInverseTransform (float* inputOutputData) const noexcept;
+
+    /** Takes an array and simply transforms it to the magnitude frequency response
+        spectrum. This may be handy for things like frequency displays or analysis.
+        The size of the array passed in must be 2 * getSize().
+    */
+    void performFrequencyOnlyForwardTransform (float* inputOutputData) const noexcept;
+
+    /** Returns the number of data points that this FFT was created to work with. */
+    int getSize() const noexcept            { return size; }
+
+    //==============================================================================
+   #ifndef DOXYGEN
+    /* internal */
+    struct Instance;
+    template <typename> struct EngineImpl;
+   #endif
+
+private:
+    //==============================================================================
+    struct Engine;
+
+    std::unique_ptr<Instance> engine;
+    int size;
+
+    //==============================================================================
+    JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (FFT)
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/frequency/juce_FFT_test.cpp
+++ b/modules/juce_dsp/frequency/juce_FFT_test.cpp
@ -0,0 +1,213 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+struct FFTUnitTest  : public UnitTest
+{
+    FFTUnitTest()  : UnitTest ("FFT", "DSP") {}
+
+    static void fillRandom (Random& random, Complex<float>* buffer, size_t n)
+    {
+        for (size_t i = 0; i < n; ++i)
+            buffer[i] = Complex<float> ((2.0f * random.nextFloat()) - 1.0f,
+                                             (2.0f * random.nextFloat()) - 1.0f);
+    }
+
+    static void fillRandom (Random& random, float* buffer, size_t n)
+    {
+        for (size_t i = 0; i < n; ++i)
+            buffer[i] = (2.0f * random.nextFloat()) - 1.0f;
+    }
+
+    static Complex<float> freqConvolution (const Complex<float>* in, float freq, size_t n)
+    {
+        Complex<float> sum (0.0, 0.0);
+        for (size_t i = 0; i < n; ++i)
+            sum += in[i] * exp (Complex<float> (0, static_cast<float> (i) * freq));
+
+        return sum;
+    }
+
+    static void performReferenceFourier (const Complex<float>* in, Complex<float>* out,
+                                         size_t n, bool reverse)
+    {
+        auto base_freq = static_cast<float> (((reverse ? 1.0 : -1.0) * MathConstants<double>::twoPi)
+                                               / static_cast<float> (n));
+
+        for (size_t i = 0; i < n; ++i)
+            out[i] = freqConvolution (in, static_cast<float>(i) * base_freq, n);
+    }
+
+    static void performReferenceFourier (const float* in, Complex<float>* out,
+                                         size_t n, bool reverse)
+    {
+        HeapBlock<Complex<float>> buffer (n);
+
+        for (size_t i = 0; i < n; ++i)
+            buffer.getData()[i] = Complex<float> (in[i], 0.0f);
+
+        float base_freq = static_cast<float> (((reverse ? 1.0 : -1.0) * MathConstants<double>::twoPi)
+                                                / static_cast<float> (n));
+
+        for (size_t i = 0; i < n; ++i)
+            out[i] = freqConvolution (buffer.getData(), static_cast<float>(i) * base_freq, n);
+    }
+
+
+    //==============================================================================
+    template <typename Type>
+    static bool checkArrayIsSimilar (Type* a, Type* b, size_t n) noexcept
+    {
+        for (size_t i = 0; i < n; ++i)
+            if (std::abs (a[i] - b[i]) > 1e-3f)
+                return false;
+
+        return true;
+    }
+
+    struct RealTest
+    {
+        static void run (FFTUnitTest& u)
+        {
+            Random random (378272);
+
+            for (size_t order = 0; order <= 8; ++order)
+            {
+                auto n = (1u << order);
+
+                FFT fft ((int) order);
+
+                HeapBlock<float> input (n);
+                HeapBlock<Complex<float>> reference (n), output (n);
+
+                fillRandom (random, input.getData(), n);
+                performReferenceFourier (input.getData(), reference.getData(), n, false);
+
+                // fill only first half with real numbers
+                zeromem (output.getData(), n * sizeof (Complex<float>));
+                memcpy (reinterpret_cast<float*> (output.getData()), input.getData(), n * sizeof (float));
+
+                fft.performRealOnlyForwardTransform ((float*) output.getData());
+                u.expect (checkArrayIsSimilar (reference.getData(), output.getData(), n));
+
+                // fill only first half with real numbers
+                zeromem (output.getData(), n * sizeof (Complex<float>));
+                memcpy (reinterpret_cast<float*> (output.getData()), input.getData(), n * sizeof (float));
+
+                fft.performRealOnlyForwardTransform ((float*) output.getData(), true);
+                std::fill (reference.getData() + ((n >> 1) + 1), reference.getData() + n, std::complex<float> (0.0f));
+                u.expect (checkArrayIsSimilar (reference.getData(), output.getData(), (n >> 1) + 1));
+
+                memcpy (output.getData(), reference.getData(), n * sizeof (Complex<float>));
+                fft.performRealOnlyInverseTransform ((float*) output.getData());
+                u.expect (checkArrayIsSimilar ((float*) output.getData(), input.getData(), n));
+            }
+        }
+    };
+
+    struct FrequencyOnlyTest
+    {
+        static void run(FFTUnitTest& u)
+        {
+            Random random (378272);
+            for (size_t order = 0; order <= 8; ++order)
+            {
+                auto n = (1u << order);
+
+                FFT fft ((int) order);
+
+                HeapBlock<float> inout (n << 1), reference (n << 1);
+                HeapBlock<Complex<float>> frequency (n);
+
+                fillRandom (random, inout.getData(), n);
+                zeromem (reference.getData(), sizeof (float) * (n << 1));
+                performReferenceFourier (inout.getData(), frequency.getData(), n, false);
+
+                for (size_t i = 0; i < n; ++i)
+                    reference.getData()[i] = std::abs (frequency.getData()[i]);
+
+                fft.performFrequencyOnlyForwardTransform (inout.getData());
+
+                u.expect (checkArrayIsSimilar (inout.getData(), reference.getData(), n));
+            }
+        }
+    };
+
+    struct ComplexTest
+    {
+        static void run(FFTUnitTest& u)
+        {
+            Random random (378272);
+
+            for (size_t order = 0; order <= 7; ++order)
+            {
+                auto n = (1u << order);
+
+                FFT fft ((int) order);
+
+                HeapBlock<Complex<float>> input (n), buffer (n), output (n), reference (n);
+
+                fillRandom (random, input.getData(), n);
+                performReferenceFourier (input.getData(), reference.getData(), n, false);
+
+                memcpy (buffer.getData(), input.getData(), sizeof (Complex<float>) * n);
+                fft.perform (buffer.getData(), output.getData(), false);
+
+                u.expect (checkArrayIsSimilar (output.getData(), reference.getData(), n));
+
+                memcpy (buffer.getData(), reference.getData(), sizeof (Complex<float>) * n);
+                fft.perform (buffer.getData(), output.getData(), true);
+
+
+                u.expect (checkArrayIsSimilar (output.getData(), input.getData(), n));
+            }
+        }
+    };
+
+    template <class TheTest>
+    void runTestForAllTypes (const char* unitTestName)
+    {
+        beginTest (unitTestName);
+
+        TheTest::run (*this);
+    }
+
+    void runTest() override
+    {
+        runTestForAllTypes<RealTest> ("Real input numbers Test");
+        runTestForAllTypes<FrequencyOnlyTest> ("Frequency only Test");
+        runTestForAllTypes<ComplexTest> ("Complex input numbers Test");
+    }
+};
+
+static FFTUnitTest fftUnitTest;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/frequency/juce_Windowing.cpp
+++ b/modules/juce_dsp/frequency/juce_Windowing.cpp
@ -0,0 +1,194 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+template <typename FloatType>
+static inline FloatType ncos (size_t order, size_t i, size_t size) noexcept
+{
+    return std::cos (static_cast<FloatType> (order * i)
+                      * MathConstants<FloatType>::pi / static_cast<FloatType> (size - 1));
+}
+
+template <typename FloatType>
+WindowingFunction<FloatType>::WindowingFunction (size_t size, WindowingMethod type, bool normalize, FloatType beta)
+{
+    fillWindowingTables (size, type, normalize, beta);
+}
+
+template <typename FloatType>
+void WindowingFunction<FloatType>::fillWindowingTables (size_t size, WindowingMethod type,
+                                                        bool normalize, FloatType beta) noexcept
+{
+    windowTable.resize (static_cast<int> (size));
+    fillWindowingTables (windowTable.getRawDataPointer(), size, type, normalize, beta);
+}
+
+template <typename FloatType>
+void WindowingFunction<FloatType>::fillWindowingTables (FloatType* samples, size_t size,
+                                                        WindowingMethod type, bool normalize,
+                                                        FloatType beta) noexcept
+{
+    switch (type)
+    {
+        case rectangular:
+        {
+            for (size_t i = 0; i < size; ++i)
+                samples[i] = static_cast<FloatType> (1);
+        }
+        break;
+
+        case triangular:
+        {
+            auto halfSlots = static_cast<FloatType> (0.5) * static_cast<FloatType> (size - 1);
+
+            for (size_t i = 0; i < size; ++i)
+                samples[i] = static_cast<FloatType> (1.0) - std::abs ((static_cast<FloatType> (i) - halfSlots) / halfSlots);
+        }
+        break;
+
+        case hann:
+        {
+            for (size_t i = 0; i < size; ++i)
+            {
+                auto cos2 = ncos<FloatType> (2, i, size);
+                samples[i] = static_cast<FloatType> (0.5 - 0.5 * cos2);
+            }
+        }
+        break;
+
+        case hamming:
+        {
+            for (size_t i = 0; i < size; ++i)
+            {
+                auto cos2 = ncos<FloatType> (2, i, size);
+                samples[i] = static_cast<FloatType> (0.54 - 0.46 * cos2);
+            }
+        }
+        break;
+
+        case blackman:
+        {
+            constexpr FloatType alpha = 0.16f;
+
+            for (size_t i = 0; i < size; ++i)
+            {
+                auto cos2 = ncos<FloatType> (2, i, size);
+                auto cos4 = ncos<FloatType> (4, i, size);
+
+                samples[i] = static_cast<FloatType> (0.5 * (1 - alpha) - 0.5 * cos2 + 0.5 * alpha * cos4);
+            }
+        }
+        break;
+
+        case blackmanHarris:
+        {
+            for (size_t i = 0; i < size; ++i)
+            {
+                auto cos2 = ncos<FloatType> (2, i, size);
+                auto cos4 = ncos<FloatType> (4, i, size);
+                auto cos6 = ncos<FloatType> (6, i, size);
+
+                samples[i] = static_cast<FloatType> (0.35875 - 0.48829 * cos2 + 0.14128 * cos4 - 0.01168 * cos6);
+            }
+        }
+        break;
+
+        case flatTop:
+        {
+            for (size_t i = 0; i < size; ++i)
+            {
+                auto cos2 = ncos<FloatType> (2, i, size);
+                auto cos4 = ncos<FloatType> (4, i, size);
+                auto cos6 = ncos<FloatType> (6, i, size);
+                auto cos8 = ncos<FloatType> (8, i, size);
+
+                samples[i] = static_cast<FloatType> (1.0 - 1.93 * cos2 + 1.29 * cos4 - 0.388 * cos6 + 0.028 * cos8);
+            }
+        }
+        break;
+
+        case kaiser:
+        {
+            const double factor = 1.0 / SpecialFunctions::besselI0 (beta);
+
+            for (size_t i = 0; i < size; ++i)
+                samples[i] = static_cast<FloatType> (SpecialFunctions::besselI0 (beta * std::sqrt (1.0 - std::pow ((i - 0.5 * (size - 1.0))
+                                                                                                                     / ( 0.5 * (size - 1.0)), 2.0)))
+                                                      * factor);
+        }
+        break;
+
+        default:
+            jassertfalse;
+            break;
+    }
+
+    // DC frequency amplitude must be one
+    if (normalize)
+    {
+        FloatType sum (0);
+
+        for (size_t i = 0; i < size; ++i)
+            sum += samples[i];
+
+        auto factor = static_cast<FloatType> (size) / sum;
+
+        FloatVectorOperations::multiply (samples, factor, static_cast<int> (size));
+    }
+}
+
+template <typename FloatType>
+void WindowingFunction<FloatType>::multiplyWithWindowingTable (FloatType* samples, size_t size) noexcept
+{
+    FloatVectorOperations::multiply (samples, windowTable.getRawDataPointer(), jmin (static_cast<int> (size), windowTable.size()));
+}
+
+template <typename FloatType>
+const char* WindowingFunction<FloatType>::getWindowingMethodName (WindowingMethod type) noexcept
+{
+    switch (type)
+    {
+        case rectangular:       return "Rectangular";
+        case triangular:        return "Triangular";
+        case hann:              return "Hann";
+        case hamming:           return "Hamming";
+        case blackman:          return "Blackman";
+        case blackmanHarris:    return "Blackman-Harris";
+        case flatTop:           return "FlatTop";
+        case kaiser:            return "Kaiser";
+        default: jassertfalse;  return "";
+    }
+}
+
+template struct WindowingFunction<float>;
+template struct WindowingFunction<double>;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/frequency/juce_Windowing.h
+++ b/modules/juce_dsp/frequency/juce_Windowing.h
@ -0,0 +1,82 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    A class which provides multiple windowing functions useful for filter design
+    and spectrum analyzers
+
+    @tags{DSP}
+*/
+template <typename FloatType>
+struct WindowingFunction
+{
+    enum WindowingMethod
+    {
+        rectangular = 0,
+        triangular,
+        hann,
+        hamming,
+        blackman,
+        blackmanHarris,
+        flatTop,
+        kaiser,
+        numWindowingMethods
+    };
+
+    //==============================================================================
+    WindowingFunction (size_t size, WindowingMethod,
+                       bool normalize = true, FloatType beta = 0);
+
+    //==============================================================================
+    /** Fills the content of an array with a given windowing method table */
+    void fillWindowingTables (size_t size, WindowingMethod type,
+                              bool normalize = true, FloatType beta = 0) noexcept;
+
+    /** Fills the content of an array with a given windowing method table */
+    static void fillWindowingTables (FloatType* samples, size_t size, WindowingMethod,
+                                     bool normalize = true, FloatType beta = 0) noexcept;
+
+    /** Multiply the content of a buffer with the given window */
+    void multiplyWithWindowingTable (FloatType* samples, size_t size) noexcept;
+
+    /** Returns the name of a given windowing method */
+    static const char* getWindowingMethodName (WindowingMethod) noexcept;
+
+
+private:
+    //==============================================================================
+    Array<FloatType> windowTable;
+
+    JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (WindowingFunction)
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/juce_dsp.cpp
+++ b/modules/juce_dsp/juce_dsp.cpp
@ -0,0 +1,92 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#ifdef JUCE_DSP_H_INCLUDED
+ /* When you add this cpp file to your project, you mustn't include it in a file where you've
+    already included any other headers - just put it inside a file on its own, possibly with your config
+    flags preceding it, but don't include anything else. That also includes avoiding any automatic prefix
+    header files that the compiler may be using.
+ */
+ #error "Incorrect use of JUCE cpp file"
+#endif
+
+#include "juce_dsp.h"
+
+#if ! JUCE_HAS_CONSTEXPR
+ #ifndef JUCE_DEMO_RUNNER
+  #error "The juce_dsp module requires a compiler that supports constexpr"
+ #endif
+#else
+
+#ifndef JUCE_USE_VDSP_FRAMEWORK
+ #define JUCE_USE_VDSP_FRAMEWORK 1
+#endif
+
+#if (JUCE_MAC || JUCE_IOS) && JUCE_USE_VDSP_FRAMEWORK
+ #include <Accelerate/Accelerate.h>
+#else
+ #undef JUCE_USE_VDSP_FRAMEWORK
+#endif
+
+#if JUCE_DSP_USE_INTEL_MKL
+ #include <mkl_dfti.h>
+#endif
+
+#include "processors/juce_FIRFilter.cpp"
+#include "processors/juce_IIRFilter.cpp"
+#include "processors/juce_LadderFilter.cpp"
+#include "processors/juce_Oversampling.cpp"
+#include "maths/juce_SpecialFunctions.cpp"
+#include "maths/juce_Matrix.cpp"
+#include "maths/juce_LookupTable.cpp"
+#include "frequency/juce_FFT.cpp"
+#include "frequency/juce_Convolution.cpp"
+#include "frequency/juce_Windowing.cpp"
+#include "filter_design/juce_FilterDesign.cpp"
+
+#if JUCE_USE_SIMD
+#if defined(__i386__) || defined(__amd64__) || defined(_M_X64) || defined(_X86_) || defined(_M_IX86)
+ #ifdef __AVX2__
+  #include "native/juce_avx_SIMDNativeOps.cpp"
+ #else
+  #include "native/juce_sse_SIMDNativeOps.cpp"
+ #endif
+#elif defined(__arm__) || defined(_M_ARM) || defined (__arm64__) || defined (__aarch64__)
+  #include "native/juce_neon_SIMDNativeOps.cpp"
+#else
+  #error "SIMD register support not implemented for this platform"
+#endif
+#endif
+
+#if JUCE_UNIT_TESTS
+#include "maths/juce_Matrix_test.cpp"
+#if JUCE_USE_SIMD
+#include "containers/juce_SIMDRegister_test.cpp"
+#endif
+#include "frequency/juce_FFT_test.cpp"
+#include "processors/juce_FIRFilter_test.cpp"
+#endif
+#endif
--- a/modules/juce_dsp/juce_dsp.h
+++ b/modules/juce_dsp/juce_dsp.h
@ -0,0 +1,273 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+
+/*******************************************************************************
+ The block below describes the properties of this module, and is read by
+ the Projucer to automatically generate project code that uses it.
+ For details about the syntax and how to create or use a module, see the
+ JUCE Module Format.txt file.
+
+
+ BEGIN_JUCE_MODULE_DECLARATION
+
+  ID:                 juce_dsp
+  vendor:             juce
+  version:            5.3.2
+  name:               JUCE DSP classes
+  description:        Classes for audio buffer manipulation, digital audio processing, filtering, oversampling, fast math functions etc.
+  website:            http://www.juce.com/juce
+  license:            GPL/Commercial
+  minimumCppStandard: 14
+
+  dependencies:       juce_audio_basics, juce_audio_formats
+  OSXFrameworks:      Accelerate
+  iOSFrameworks:      Accelerate
+
+ END_JUCE_MODULE_DECLARATION
+
+*******************************************************************************/
+
+
+#pragma once
+
+#define JUCE_DSP_H_INCLUDED
+
+#include <juce_audio_basics/juce_audio_basics.h>
+#include <juce_audio_formats/juce_audio_formats.h>
+
+#if ! JUCE_HAS_CONSTEXPR
+ #ifndef JUCE_DEMO_RUNNER
+  #error "The juce_dsp module requires a compiler that supports constexpr"
+ #endif
+#else
+
+#if defined(_M_X64) || defined(__amd64__) || defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)
+
+ #if defined(_M_X64) || defined(__amd64__)
+  #ifndef __SSE2__
+   #define __SSE2__
+  #endif
+ #endif
+
+ #ifndef JUCE_USE_SIMD
+  #define JUCE_USE_SIMD 1
+ #endif
+
+ #if JUCE_USE_SIMD
+  #include <immintrin.h>
+ #endif
+
+#elif defined (__ARM_NEON__) || defined (__ARM_NEON) || defined (__arm64__) || defined (__aarch64__)
+
+ #ifndef JUCE_USE_SIMD
+  #define JUCE_USE_SIMD 1
+ #endif
+
+ #include <arm_neon.h>
+
+#else
+
+ // No SIMD Support
+ #ifndef JUCE_USE_SIMD
+  #define JUCE_USE_SIMD 0
+ #endif
+
+#endif
+
+#ifndef JUCE_VECTOR_CALLTYPE
+ // __vectorcall does not work on 64-bit due to internal compiler error in
+ // release mode in both VS2015 and VS2017. Re-enable when Microsoft fixes this
+ #if _MSC_VER && JUCE_USE_SIMD && ! (defined(_M_X64) || defined(__amd64__))
+  #define JUCE_VECTOR_CALLTYPE __vectorcall
+ #else
+  #define JUCE_VECTOR_CALLTYPE
+ #endif
+#endif
+
+#include <atomic>
+#include <complex>
+#include <cmath>
+#include <array>
+
+
+//==============================================================================
+/** Config: JUCE_ASSERTION_FIRFILTER
+
+    When this flag is enabled, an assertion will be generated during the
+    execution of DEBUG configurations if you use a FIRFilter class to process
+    FIRCoefficients with a size higher than 128, to tell you that's it would be
+    more efficient to use the Convolution class instead. It is enabled by
+    default, but you may want to disable it if you really want to process such
+    a filter in the time domain.
+*/
+#ifndef JUCE_ASSERTION_FIRFILTER
+ #define JUCE_ASSERTION_FIRFILTER 1
+#endif
+
+/** Config: JUCE_DSP_USE_INTEL_MKL
+
+    If this flag is set, then JUCE will use Intel's MKL for JUCE's FFT and
+    convolution classes.
+
+    The folder containing the mkl_dfti.h header must be in your header
+    search paths when using this flag. You also need to add all the necessary
+    intel mkl libraries to the "External Libraries to Link" field in the
+    Projucer.
+*/
+#ifndef JUCE_DSP_USE_INTEL_MKL
+ #define JUCE_DSP_USE_INTEL_MKL 0
+#endif
+
+/** Config: JUCE_DSP_USE_SHARED_FFTW
+
+    If this flag is set, then JUCE will search for the fftw shared libraries
+    at runtime and use the library for JUCE's FFT and convolution classes.
+
+    If the library is not found, then JUCE's fallback FFT routines will be used.
+
+    This is especially useful on linux as fftw often comes pre-installed on
+    popular linux distros.
+
+    You must respect the FFTW license when enabling this option.
+*/
+ #ifndef JUCE_DSP_USE_SHARED_FFTW
+ #define JUCE_DSP_USE_SHARED_FFTW 0
+#endif
+
+/** Config: JUCE_DSP_USE_STATIC_FFTW
+
+    If this flag is set, then JUCE will use the statically linked fftw libraries
+    for JUCE's FFT and convolution classes.
+
+    You must add the fftw header/library folder to the extra header/library search
+    paths of your JUCE project. You also need to add the fftw library itself
+    to the extra libraries supplied to your JUCE project during linking.
+
+    You must respect the FFTW license when enabling this option.
+*/
+#ifndef JUCE_DSP_USE_STATIC_FFTW
+ #define JUCE_DSP_USE_STATIC_FFTW 0
+#endif
+
+/** Config: JUCE_DSP_ENABLE_SNAP_TO_ZERO
+
+    Enables code in the dsp module to avoid floating point denormals during the
+    processing of some of the dsp module's filters.
+
+    Enabling this will add a slight performance overhead to the DSP module's
+    filters and algorithms. If your audio app already disables denormals altogether
+    (for example, by using the ScopedNoDenormals class or the
+    FloatVectorOperations::disableDenormalisedNumberSupport method), then you
+    can safely disable this flag to shave off a few cpu cycles from the DSP module's
+    filters and algorithms.
+*/
+#ifndef JUCE_DSP_ENABLE_SNAP_TO_ZERO
+ #define JUCE_DSP_ENABLE_SNAP_TO_ZERO 1
+#endif
+
+
+//==============================================================================
+#undef Complex  // apparently some C libraries actually define these symbols (!)
+#undef Factor
+
+namespace juce
+{
+    namespace dsp
+    {
+        template <typename Type>
+        using Complex = std::complex<Type>;
+
+        //==============================================================================
+        namespace util
+        {
+            /** Use this function to prevent denormals on intel CPUs.
+                This function will work with both primitives and simple containers.
+            */
+          #if JUCE_DSP_ENABLE_SNAP_TO_ZERO
+            inline void snapToZero (float&       x) noexcept            { JUCE_SNAP_TO_ZERO (x); }
+           #ifndef DOXYGEN
+            inline void snapToZero (double&      x) noexcept            { JUCE_SNAP_TO_ZERO (x); }
+            inline void snapToZero (long double& x) noexcept            { JUCE_SNAP_TO_ZERO (x); }
+           #endif
+          #else
+            inline void snapToZero (float&       x) noexcept            { ignoreUnused (x); }
+           #ifndef DOXYGEN
+            inline void snapToZero (double&      x) noexcept            { ignoreUnused (x); }
+            inline void snapToZero (long double& x) noexcept            { ignoreUnused (x); }
+           #endif
+          #endif
+        }
+    }
+}
+
+//==============================================================================
+#if JUCE_USE_SIMD
+ #include "native/juce_fallback_SIMDNativeOps.h"
+
+ // include the correct native file for this build target CPU
+ #if defined(__i386__) || defined(__amd64__) || defined(_M_X64) || defined(_X86_) || defined(_M_IX86)
+  #ifdef __AVX2__
+   #include "native/juce_avx_SIMDNativeOps.h"
+  #else
+   #include "native/juce_sse_SIMDNativeOps.h"
+  #endif
+ #elif defined(__arm__) || defined(_M_ARM) || defined (__arm64__) || defined (__aarch64__)
+  #include "native/juce_neon_SIMDNativeOps.h"
+ #else
+  #error "SIMD register support not implemented for this platform"
+ #endif
+
+ #include "containers/juce_SIMDRegister.h"
+#endif
+
+#include "maths/juce_SpecialFunctions.h"
+#include "maths/juce_Matrix.h"
+#include "maths/juce_Phase.h"
+#include "maths/juce_Polynomial.h"
+#include "maths/juce_FastMathApproximations.h"
+#include "maths/juce_LookupTable.h"
+#include "containers/juce_AudioBlock.h"
+#include "processors/juce_ProcessContext.h"
+#include "processors/juce_ProcessorWrapper.h"
+#include "processors/juce_ProcessorChain.h"
+#include "processors/juce_ProcessorDuplicator.h"
+#include "processors/juce_Bias.h"
+#include "processors/juce_Gain.h"
+#include "processors/juce_WaveShaper.h"
+#include "processors/juce_IIRFilter.h"
+#include "processors/juce_FIRFilter.h"
+#include "processors/juce_Oscillator.h"
+#include "processors/juce_LadderFilter.h"
+#include "processors/juce_StateVariableFilter.h"
+#include "processors/juce_Oversampling.h"
+#include "processors/juce_Reverb.h"
+#include "frequency/juce_FFT.h"
+#include "frequency/juce_Convolution.h"
+#include "frequency/juce_Windowing.h"
+#include "filter_design/juce_FilterDesign.h"
+
+#endif
--- a/modules/juce_dsp/juce_dsp.mm
+++ b/modules/juce_dsp/juce_dsp.mm
@ -0,0 +1,27 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+#include "juce_dsp.cpp"
--- a/modules/juce_dsp/maths/juce_FastMathApproximations.h
+++ b/modules/juce_dsp/maths/juce_FastMathApproximations.h
@ -0,0 +1,265 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    This class contains various fast mathematical function approximations.
+
+    @tags{DSP}
+*/
+struct FastMathApproximations
+{
+    /** Provides a fast approximation of the function cosh(x) using a Pade approximant
+        continued fraction, calculated sample by sample.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -5 and +5 for limiting the error.
+    */
+    template <typename FloatType>
+    static FloatType cosh (FloatType x) noexcept
+    {
+        auto x2 = x * x;
+        auto numerator = -(39251520 + x2 * (18471600 + x2 * (1075032 + 14615 * x2)));
+        auto denominator = -39251520 + x2 * (1154160 + x2 * (-16632 + 127 * x2));
+        return numerator / denominator;
+    }
+
+    /** Provides a fast approximation of the function cosh(x) using a Pade approximant
+        continued fraction, calculated on a whole buffer.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -5 and +5 for limiting the error.
+    */
+    template <typename FloatType>
+    static void cosh (FloatType* values, size_t numValues) noexcept
+    {
+        for (size_t i = 0; i < numValues; ++i)
+            values[i] = FastMathApproximations::cosh (values[i]);
+    }
+
+    /** Provides a fast approximation of the function sinh(x) using a Pade approximant
+        continued fraction, calculated sample by sample.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -5 and +5 for limiting the error.
+    */
+    template <typename FloatType>
+    static FloatType sinh (FloatType x) noexcept
+    {
+        auto x2 = x * x;
+        auto numerator = -x * (11511339840 + x2 * (1640635920 + x2 * (52785432 + x2 * 479249)));
+        auto denominator = -11511339840 + x2 * (277920720 + x2 * (-3177720 + x2 * 18361));
+        return numerator / denominator;
+    }
+
+    /** Provides a fast approximation of the function sinh(x) using a Pade approximant
+        continued fraction, calculated on a whole buffer.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -5 and +5 for limiting the error.
+    */
+    template <typename FloatType>
+    static void sinh (FloatType* values, size_t numValues) noexcept
+    {
+        for (size_t i = 0; i < numValues; ++i)
+            values[i] = FastMathApproximations::sinh (values[i]);
+    }
+
+    /** Provides a fast approximation of the function tanh(x) using a Pade approximant
+        continued fraction, calculated sample by sample.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -5 and +5 for limiting the error.
+    */
+    template <typename FloatType>
+    static FloatType tanh (FloatType x) noexcept
+    {
+        auto x2 = x * x;
+        auto numerator = x * (135135 + x2 * (17325 + x2 * (378 + x2)));
+        auto denominator = 135135 + x2 * (62370 + x2 * (3150 + 28 * x2));
+        return numerator / denominator;
+    }
+
+    /** Provides a fast approximation of the function tanh(x) using a Pade approximant
+        continued fraction, calculated on a whole buffer.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -5 and +5 for limiting the error.
+    */
+    template <typename FloatType>
+    static void tanh (FloatType* values, size_t numValues) noexcept
+    {
+        for (size_t i = 0; i < numValues; ++i)
+            values[i] = FastMathApproximations::tanh (values[i]);
+    }
+
+    //==============================================================================
+    /** Provides a fast approximation of the function cos(x) using a Pade approximant
+        continued fraction, calculated sample by sample.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -pi and +pi for limiting the error.
+    */
+    template <typename FloatType>
+    static FloatType cos (FloatType x) noexcept
+    {
+        auto x2 = x * x;
+        auto numerator = -(-39251520 + x2 * (18471600 + x2 * (-1075032 + 14615 * x2)));
+        auto denominator = 39251520 + x2 * (1154160 + x2 * (16632 + x2 * 127));
+        return numerator / denominator;
+    }
+
+    /** Provides a fast approximation of the function cos(x) using a Pade approximant
+        continued fraction, calculated on a whole buffer.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -pi and +pi for limiting the error.
+    */
+    template <typename FloatType>
+    static void cos (FloatType* values, size_t numValues) noexcept
+    {
+        for (size_t i = 0; i < numValues; ++i)
+            values[i] = FastMathApproximations::cos (values[i]);
+    }
+
+    /** Provides a fast approximation of the function sin(x) using a Pade approximant
+        continued fraction, calculated sample by sample.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -pi and +pi for limiting the error.
+    */
+    template <typename FloatType>
+    static FloatType sin (FloatType x) noexcept
+    {
+        auto x2 = x * x;
+        auto numerator = -x * (-11511339840 + x2 * (1640635920 + x2 * (-52785432 + x2 * 479249)));
+        auto denominator = 11511339840 + x2 * (277920720 + x2 * (3177720 + x2 * 18361));
+        return numerator / denominator;
+    }
+
+    /** Provides a fast approximation of the function sin(x) using a Pade approximant
+        continued fraction, calculated on a whole buffer.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -pi and +pi for limiting the error.
+    */
+    template <typename FloatType>
+    static void sin (FloatType* values, size_t numValues) noexcept
+    {
+        for (size_t i = 0; i < numValues; ++i)
+            values[i] = FastMathApproximations::sin (values[i]);
+    }
+
+    /** Provides a fast approximation of the function tan(x) using a Pade approximant
+        continued fraction, calculated sample by sample.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -pi/2 and +pi/2 for limiting the error.
+    */
+    template <typename FloatType>
+    static FloatType tan (FloatType x) noexcept
+    {
+        auto x2 = x * x;
+        auto numerator = x * (-135135 + x2 * (17325 + x2 * (-378 + x2)));
+        auto denominator = -135135 + x2 * (62370 + x2 * (-3150 + 28 * x2));
+        return numerator / denominator;
+    }
+
+    /** Provides a fast approximation of the function tan(x) using a Pade approximant
+        continued fraction, calculated on a whole buffer.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -pi/2 and +pi/2 for limiting the error.
+    */
+    template <typename FloatType>
+    static void tan (FloatType* values, size_t numValues) noexcept
+    {
+        for (size_t i = 0; i < numValues; ++i)
+            values[i] = FastMathApproximations::tan (values[i]);
+    }
+
+    //==============================================================================
+    /** Provides a fast approximation of the function exp(x) using a Pade approximant
+        continued fraction, calculated sample by sample.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -6 and +4 for limiting the error.
+    */
+    template <typename FloatType>
+    static FloatType exp (FloatType x) noexcept
+    {
+        auto numerator = 1680 + x * (840 + x * (180 + x * (20 + x)));
+        auto denominator = 1680 + x *(-840 + x * (180 + x * (-20 + x)));
+        return numerator / denominator;
+    }
+
+    /** Provides a fast approximation of the function exp(x) using a Pade approximant
+        continued fraction, calculated on a whole buffer.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -6 and +4 for limiting the error.
+    */
+    template <typename FloatType>
+    static void exp (FloatType* values, size_t numValues) noexcept
+    {
+        for (size_t i = 0; i < numValues; ++i)
+            values[i] = FastMathApproximations::exp (values[i]);
+    }
+
+    /** Provides a fast approximation of the function log(x+1) using a Pade approximant
+        continued fraction, calculated sample by sample.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -0.8 and +5 for limiting the error.
+    */
+    template <typename FloatType>
+    static FloatType logNPlusOne (FloatType x) noexcept
+    {
+        auto numerator = x * (7560 + x * (15120 + x * (9870 + x * (2310 + x * 137))));
+        auto denominator = 7560 + x * (18900 + x * (16800 + x * (6300 + x * (900 + 30 * x))));
+        return numerator / denominator;
+    }
+
+    /** Provides a fast approximation of the function log(x+1) using a Pade approximant
+        continued fraction, calculated on a whole buffer.
+
+        Note : this is an approximation which works on a limited range. You are
+        advised to use input values only between -0.8 and +5 for limiting the error.
+    */
+    template <typename FloatType>
+    static void logNPlusOne (FloatType* values, size_t numValues) noexcept
+    {
+        for (size_t i = 0; i < numValues; ++i)
+            values[i] = FastMathApproximations::logNPlusOne (values[i]);
+    }
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/maths/juce_LookupTable.cpp
+++ b/modules/juce_dsp/maths/juce_LookupTable.cpp
@ -0,0 +1,157 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+template <typename FloatType>
+LookupTable<FloatType>::LookupTable()
+{
+    data.resize (1);
+}
+
+template <typename FloatType>
+LookupTable<FloatType>::LookupTable (const std::function<FloatType (size_t)>& functionToApproximate,
+                                     size_t numPointsToUse)
+{
+    initialise (functionToApproximate, numPointsToUse);
+}
+
+//==============================================================================
+template <typename FloatType>
+void LookupTable<FloatType>::initialise (const std::function<FloatType (size_t)>& functionToApproximate,
+                                         size_t numPointsToUse)
+{
+    data.resize (static_cast<int> (getRequiredBufferSize (numPointsToUse)));
+
+    for (size_t i = 0; i < numPointsToUse; ++i)
+    {
+        auto value = functionToApproximate (i);
+
+        jassert (! std::isnan (value));
+        jassert (! std::isinf (value));
+        // Make sure functionToApproximate returns a sensible value for the entire specified range.
+        // E.g., this won't work for zero:  [] (size_t i) { return 1.0f / i; }
+
+        data.getReference (static_cast<int> (i)) = value;
+    }
+
+    prepare();
+}
+
+template <typename FloatType>
+void LookupTable<FloatType>::prepare() noexcept
+{
+    auto guardIndex = static_cast<int> (getGuardIndex());
+    data.getReference (guardIndex) = data.getUnchecked (guardIndex - 1);
+}
+
+template <typename FloatType>
+void LookupTableTransform<FloatType>::initialise (const std::function<FloatType (FloatType)>& functionToApproximate,
+                                                  FloatType minInputValueToUse,
+                                                  FloatType maxInputValueToUse,
+                                                  size_t numPoints)
+{
+    jassert (maxInputValueToUse > minInputValueToUse);
+
+    minInputValue = minInputValueToUse;
+    maxInputValue = maxInputValueToUse;
+    scaler = FloatType (numPoints - 1) / (maxInputValueToUse - minInputValueToUse);
+    offset = -minInputValueToUse * scaler;
+
+    const auto initFn = [functionToApproximate, minInputValueToUse, maxInputValueToUse, numPoints] (size_t i)
+    {
+        return functionToApproximate (
+            jlimit (
+                minInputValueToUse, maxInputValueToUse,
+                jmap (FloatType (i), FloatType (0), FloatType (numPoints - 1), minInputValueToUse, maxInputValueToUse))
+            );
+    };
+
+    lookupTable.initialise (initFn, numPoints);
+}
+
+//==============================================================================
+template <typename FloatType>
+double LookupTableTransform<FloatType>::calculateMaxRelativeError (const std::function<FloatType (FloatType)>& functionToApproximate,
+                                                                   FloatType minInputValue,
+                                                                   FloatType maxInputValue,
+                                                                   size_t numPoints,
+                                                                   size_t numTestPoints)
+{
+    jassert (maxInputValue > minInputValue);
+
+    if (numTestPoints == 0)
+        numTestPoints = 100 * numPoints;    // use default
+
+    LookupTableTransform transform (functionToApproximate, minInputValue, maxInputValue, numPoints);
+
+    double maxError = 0;
+
+    for (size_t i = 0; i < numTestPoints; ++i)
+    {
+        auto inputValue = jmap (FloatType (i), FloatType (0), FloatType (numTestPoints - 1), minInputValue, maxInputValue);
+        auto approximatedOutputValue = transform.processSample (inputValue);
+        auto referenceOutputValue = functionToApproximate (inputValue);
+
+        maxError = jmax (maxError, calculateRelativeDifference ((double) referenceOutputValue, (double) approximatedOutputValue));
+    }
+
+    return maxError;
+}
+
+//==============================================================================
+template <typename FloatType>
+double LookupTableTransform<FloatType>::calculateRelativeDifference (double x, double y) noexcept
+{
+    static const auto eps = std::numeric_limits<double>::min();
+
+    auto absX = std::abs (x);
+    auto absY = std::abs (y);
+    auto absDiff = std::abs (x - y);
+
+    if (absX < eps)
+    {
+        if (absY >= eps)
+            return absDiff / absY;
+
+        return absDiff;    // return the absolute error if both numbers are too close to zero
+    }
+
+    return absDiff / std::min (absX, absY);
+}
+
+//==============================================================================
+template class LookupTable<float>;
+template class LookupTable<double>;
+
+template class LookupTableTransform<float>;
+template class LookupTableTransform<double>;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/maths/juce_LookupTable.h
+++ b/modules/juce_dsp/maths/juce_LookupTable.h
@ -0,0 +1,332 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Class for efficiently approximating expensive arithmetic operations.
+
+    The approximation is based on linear interpolation between pre-calculated values.
+    The approximated function should be passed as a callable object to the constructor
+    along with the number of data points to be pre-calculated. The accuracy of the
+    approximation can be increased by using more points at the cost of a larger memory
+    footprint.
+
+    Consider using LookupTableTransform as an easy-to-use alternative.
+
+    Example:
+
+        LookupTable<float> lut ([] (size_t i) { return std::sqrt ((float) i); }, 64);
+        auto outValue = lut[17];
+
+    @see LookupTableTransform
+
+    @tags{DSP}
+*/
+template <typename FloatType>
+class LookupTable
+{
+public:
+    /** Creates an uninitialised LookupTable object.
+
+        You need to call initialise() before using the object. Prefer using the
+        non-default constructor instead.
+
+        @see initialise
+    */
+    LookupTable();
+
+    /** Creates and initialises a LookupTable object.
+
+        @param functionToApproximate The function to be approximated. This should be a
+                                     mapping from the integer range [0, numPointsToUse - 1].
+        @param numPointsToUse        The number of pre-calculated values stored.
+    */
+    LookupTable (const std::function<FloatType (size_t)>& functionToApproximate, size_t numPointsToUse);
+
+    /** Initialises or changes the parameters of a LookupTable object.
+
+        This function can be used to change what function is approximated by an already
+        constructed LookupTable along with the number of data points used. If the function
+        to be approximated won't ever change, prefer using the non-default constructor.
+
+        @param functionToApproximate The function to be approximated. This should be a
+                                     mapping from the integer range [0, numPointsToUse - 1].
+        @param numPointsToUse        The number of pre-calculated values stored.
+    */
+    void initialise (const std::function<FloatType (size_t)>& functionToApproximate, size_t numPointsToUse);
+
+    //==============================================================================
+    /** Calculates the approximated value for the given index without range checking.
+
+        Use this if you can guarantee that the index is non-negative and less than numPoints.
+        Otherwise use get().
+
+        @param index The approximation is calculated for this non-integer index.
+        @return      The approximated value at the given index.
+
+        @see get, operator[]
+    */
+    FloatType getUnchecked (FloatType index) const noexcept
+    {
+        jassert (isInitialised());  // Use the non-default constructor or call initialise() before first use
+        jassert (isPositiveAndBelow (index, FloatType (getNumPoints())));
+
+        auto i = truncatePositiveToUnsignedInt (index);
+        auto f = index - FloatType (i);
+        jassert (isPositiveAndBelow (f, FloatType (1)));
+
+        auto x0 = data.getUnchecked (static_cast<int> (i));
+        auto x1 = data.getUnchecked (static_cast<int> (i + 1));
+
+        return jmap (f, x0, x1);
+    }
+
+    //==============================================================================
+    /** Calculates the approximated value for the given index with range checking.
+
+        This can be called with any input indices. If the provided index is out-of-range
+        either the bottom or the top element of the LookupTable is returned.
+
+        If the index is guaranteed to be in range use the faster getUnchecked() instead.
+
+        @param index The approximation is calculated for this non-integer index.
+        @return      The approximated value at the given index.
+
+        @see getUnchecked, operator[]
+    */
+    FloatType get (FloatType index) const noexcept
+    {
+        if (index >= getNumPoints())
+            index = static_cast<FloatType> (getGuardIndex());
+        else if (index < 0)
+            index = {};
+
+        return getUnchecked (index);
+    }
+
+    //==============================================================================
+    /** @see getUnchecked */
+    FloatType operator[] (FloatType index) const noexcept       { return getUnchecked (index); }
+
+    /** Returns the size of the LookupTable, i.e., the number of pre-calculated data points. */
+    size_t getNumPoints() const noexcept                        { return static_cast<size_t> (data.size()) - 1; }
+
+    /** Returns true if the LookupTable is initialised and ready to be used. */
+    bool isInitialised() const noexcept                         { return data.size() > 1; }
+
+private:
+    //==============================================================================
+    Array<FloatType> data;
+
+    void prepare() noexcept;
+    static size_t getRequiredBufferSize (size_t numPointsToUse) noexcept { return numPointsToUse + 1; }
+    size_t getGuardIndex() const noexcept                                { return getRequiredBufferSize (getNumPoints()) - 1; }
+
+    JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (LookupTable)
+};
+
+
+//==============================================================================
+/** Class for approximating expensive arithmetic operations.
+
+    Once initialised, this class can be used just like the function it approximates
+    via operator().
+
+    Example:
+
+        LookupTableTransform<float> tanhApprox ([] (float x) { return std::tanh (x); }, -5.0f, 5.0f, 64);
+        auto outValue = tanhApprox (4.2f);
+
+    Note : if you try to call the function with an input outside the provided
+    range, it will return either the first or the last recorded LookupTable value.
+
+    @see LookupTable
+
+    @tags{DSP}
+*/
+template <typename FloatType>
+class LookupTableTransform
+{
+public:
+    //==============================================================================
+    /** Creates an uninitialised LookupTableTransform object.
+
+        You need to call initialise() before using the object. Prefer using the
+        non-default constructor instead.
+
+        @see initialise
+    */
+    LookupTableTransform()
+    {}
+
+    //==============================================================================
+    /** Creates and initialises a LookupTableTransform object.
+
+        @param functionToApproximate The function to be approximated. This should be a
+                                     mapping from a FloatType to FloatType.
+        @param minInputValueToUse    The lowest input value used. The approximation will
+                                     fail for values lower than this.
+        @param maxInputValueToUse    The highest input value used. The approximation will
+                                     fail for values higher than this.
+        @param numPoints             The number of pre-calculated values stored.
+    */
+    LookupTableTransform (const std::function<FloatType (FloatType)>& functionToApproximate,
+                          FloatType minInputValueToUse,
+                          FloatType maxInputValueToUse,
+                          size_t numPoints)
+    {
+        initialise (functionToApproximate, minInputValueToUse, maxInputValueToUse, numPoints);
+    }
+
+    //==============================================================================
+    /** Initialises or changes the parameters of a LookupTableTransform object.
+
+        @param functionToApproximate The function to be approximated. This should be a
+                                     mapping from a FloatType to FloatType.
+        @param minInputValueToUse    The lowest input value used. The approximation will
+                                     fail for values lower than this.
+        @param maxInputValueToUse    The highest input value used. The approximation will
+                                     fail for values higher than this.
+        @param numPoints             The number of pre-calculated values stored.
+    */
+    void initialise (const std::function<FloatType (FloatType)>& functionToApproximate,
+                     FloatType minInputValueToUse,
+                     FloatType maxInputValueToUse,
+                     size_t numPoints);
+
+    //==============================================================================
+    /** Calculates the approximated value for the given input value without range checking.
+
+        Use this if you can guarantee that the input value is within the range specified
+        in the constructor or initialise(), otherwise use processSample().
+
+        @param value The approximation is calculated for this input value.
+        @return      The approximated value for the provided input value.
+
+        @see processSample, operator(), operator[]
+    */
+    FloatType processSampleUnchecked (FloatType value) const noexcept
+    {
+        jassert (value >= minInputValue && value <= maxInputValue);
+        return lookupTable[scaler * value + offset];
+    }
+
+    //==============================================================================
+    /** Calculates the approximated value for the given input value with range checking.
+
+        This can be called with any input values. Out-of-range input values will be
+        clipped to the specified input range.
+
+        If the index is guaranteed to be in range use the faster processSampleUnchecked()
+        instead.
+
+        @param value The approximation is calculated for this input value.
+        @return      The approximated value for the provided input value.
+
+        @see processSampleUnchecked, operator(), operator[]
+    */
+    FloatType processSample (FloatType value) const noexcept
+    {
+        auto index = scaler * jlimit (minInputValue, maxInputValue, value) + offset;
+        jassert (isPositiveAndBelow (index, FloatType (lookupTable.getNumPoints())));
+
+        return lookupTable[index];
+    }
+
+    //==============================================================================
+    /** @see processSampleUnchecked */
+    FloatType operator[] (FloatType index) const noexcept       { return processSampleUnchecked (index); }
+
+    /** @see processSample */
+    FloatType operator() (FloatType index) const noexcept       { return processSample (index); }
+
+    //==============================================================================
+    /** Processes an array of input values without range checking
+        @see process
+    */
+    void processUnchecked (const FloatType* input, FloatType* output, size_t numSamples) const noexcept
+    {
+        for (size_t i = 0; i < numSamples; ++i)
+            output[i] = processSampleUnchecked (input[i]);
+    }
+
+    //==============================================================================
+    /** Processes an array of input values with range checking
+        @see processUnchecked
+    */
+    void process (const FloatType* input, FloatType* output, size_t numSamples) const noexcept
+    {
+        for (size_t i = 0; i < numSamples; ++i)
+            output[i] = processSample (input[i]);
+    }
+
+    //==============================================================================
+    /** Calculates the maximum relative error of the approximation for the specified
+        parameter set.
+
+        The closer the returned value is to zero the more accurate the approximation
+        is.
+
+        This function compares the approximated output of this class to the function
+        it approximates at a range of points and returns the maximum relative error.
+        This can be used to determine if the approximation is suitable for the given
+        problem. The accuracy of the approximation can generally be improved by
+        increasing numPoints.
+
+        @param functionToApproximate The approximated function. This should be a
+                                     mapping from a FloatType to FloatType.
+        @param minInputValue         The lowest input value used.
+        @param maxInputValue         The highest input value used.
+        @param numPoints             The number of pre-calculated values stored.
+        @param numTestPoints         The number of input values used for error
+                                     calculation. Higher numbers can increase the
+                                     accuracy of the error calculation. If it's zero
+                                     then 100 * numPoints will be used.
+    */
+    static double calculateMaxRelativeError (const std::function<FloatType (FloatType)>& functionToApproximate,
+                                             FloatType minInputValue,
+                                             FloatType maxInputValue,
+                                             size_t numPoints,
+                                             size_t numTestPoints = 0);
+private:
+    //==============================================================================
+    static double calculateRelativeDifference (double, double) noexcept;
+
+    //==============================================================================
+    LookupTable<FloatType> lookupTable;
+
+    FloatType minInputValue, maxInputValue;
+    FloatType scaler, offset;
+
+    JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (LookupTableTransform)
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/maths/juce_Matrix.cpp
+++ b/modules/juce_dsp/maths/juce_Matrix.cpp
@ -0,0 +1,318 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+template <typename ElementType>
+Matrix<ElementType> Matrix<ElementType>::identity (size_t size)
+{
+    Matrix result (size, size);
+
+    for (size_t i = 0; i < size; ++i)
+        result(i, i) = 1;
+
+    return result;
+}
+
+template <typename ElementType>
+Matrix<ElementType> Matrix<ElementType>::toeplitz (const Matrix& vector, size_t size)
+{
+    jassert (vector.isOneColumnVector());
+    jassert (size <= vector.rows);
+
+    Matrix result (size, size);
+
+    for (size_t i = 0; i < size; ++i)
+        result (i, i) = vector (0, 0);
+
+    for (size_t i = 1; i < size; ++i)
+        for (size_t j = i; j < size; ++j)
+            result (j, j - i) = result (j - i, j) = vector (i, 0);
+
+    return result;
+}
+
+template <typename ElementType>
+Matrix<ElementType> Matrix<ElementType>::hankel (const Matrix& vector, size_t size, size_t offset)
+{
+    jassert(vector.isOneColumnVector());
+    jassert(vector.rows >= (2 * (size - 1) + 1));
+
+    Matrix result (size, size);
+
+    for (size_t i = 0; i < size; ++i)
+        result (i, i) = vector ((2 * i) + offset, 0);
+
+    for (size_t i = 1; i < size; ++i)
+        for (size_t j = i; j < size; ++j)
+            result (j, j - i) = result (j - i, j) = vector (i + 2 * (j - i) + offset, 0);
+
+    return result;
+}
+
+//==============================================================================
+template <typename ElementType>
+Matrix<ElementType>& Matrix<ElementType>::swapColumns (size_t columnOne, size_t columnTwo) noexcept
+{
+    jassert (columnOne < columns && columnTwo < columns);
+
+    auto* p = data.getRawDataPointer();
+
+    for (size_t i = 0; i < rows; ++i)
+    {
+        auto offset = dataAcceleration.getUnchecked (static_cast<int> (i));
+        std::swap (p[offset + columnOne], p[offset + columnTwo]);
+    }
+
+    return *this;
+}
+
+template <typename ElementType>
+Matrix<ElementType>& Matrix<ElementType>::swapRows (size_t rowOne, size_t rowTwo) noexcept
+{
+    jassert (rowOne < rows && rowTwo < rows);
+
+    auto offset1 = rowOne * columns;
+    auto offset2 = rowTwo * columns;
+
+    auto* p = data.getRawDataPointer();
+
+    for (size_t i = 0; i < columns; ++i)
+        std::swap (p[offset1 + i], p[offset2 + i]);
+
+    return *this;
+}
+
+//==============================================================================
+template <typename ElementType>
+Matrix<ElementType> Matrix<ElementType>::operator* (const Matrix<ElementType>& other) const
+{
+    auto n = getNumRows(), m = other.getNumColumns(), p = getNumColumns();
+    Matrix result (n, m);
+
+    jassert (p == other.getNumRows());
+
+    size_t offsetMat = 0, offsetlhs = 0;
+
+    auto* dst = result.getRawDataPointer();
+    auto* a = getRawDataPointer();
+    auto* b = other.getRawDataPointer();
+
+    for (size_t i = 0; i < n; ++i)
+    {
+        size_t offsetrhs = 0;
+
+        for (size_t k = 0; k < p; ++k)
+        {
+            auto ak = a[offsetlhs++];
+
+            for (size_t j = 0; j < m; ++j)
+                dst[offsetMat + j] += ak * b[offsetrhs + j];
+
+            offsetrhs += m;
+        }
+
+        offsetMat += m;
+    }
+
+    return result;
+}
+
+//==============================================================================
+template <typename ElementType>
+bool Matrix<ElementType>::compare (const Matrix& a, const Matrix& b, ElementType tolerance) noexcept
+{
+    if (a.rows != b.rows || a.columns != b.columns)
+        return false;
+
+    tolerance = std::abs (tolerance);
+
+    auto* bPtr = b.begin();
+    for (auto aValue : a)
+        if (std::abs (aValue - *bPtr++) > tolerance)
+            return false;
+
+    return true;
+}
+
+//==============================================================================
+template <typename ElementType>
+bool Matrix<ElementType>::solve (Matrix& b) const noexcept
+{
+    auto n = columns;
+    jassert (n == n && n == b.rows && b.isOneColumnVector());
+
+    auto* x = b.getRawDataPointer();
+    const auto& A = *this;
+
+    switch (n)
+    {
+        case 1:
+        {
+            auto denominator = A (0,0);
+
+            if (denominator == 0)
+                return false;
+
+            b (0, 0) /= denominator;
+        }
+        break;
+
+        case 2:
+        {
+            auto denominator = A (0, 0) * A (1, 1) - A (0, 1) * A (1, 0);
+
+            if (denominator == 0)
+                return false;
+
+            auto factor = (1 / denominator);
+            auto b0 = x[0], b1 = x[1];
+
+            x[0] = factor * (A (1, 1) * b0 - A (0, 1) * b1);
+            x[1] = factor * (A (0, 0) * b1 - A (1, 0) * b0);
+        }
+        break;
+
+        case 3:
+        {
+            auto denominator = A (0, 0) * (A (1, 1) * A (2, 2) - A (1, 2) * A (2, 1))
+                             + A (0, 1) * (A (1, 2) * A (2, 0) - A (1, 0) * A (2, 2))
+                             + A (0, 2) * (A (1, 0) * A (2, 1) - A (1, 1) * A (2, 0));
+
+            if (denominator == 0)
+                return false;
+
+            auto factor = 1 / denominator;
+            auto b0 = x[0], b1 = x[1], b2 = x[2];
+
+            x[0] =  ( ( A (0, 1) * A (1, 2) - A (0, 2) * A (1, 1)) * b2
+                    + (-A (0, 1) * A (2, 2) + A (0, 2) * A (2, 1)) * b1
+                    + ( A (1, 1) * A (2, 2) - A (1, 2) * A (2, 1)) * b0) * factor;
+
+            x[1] = -( ( A (0, 0) * A (1, 2) - A (0, 2) * A (1, 0)) * b2
+                    + (-A (0, 0) * A (2, 2) + A (0, 2) * A (2, 0)) * b1
+                    + ( A (1, 0) * A (2, 2) - A (1, 2) * A (2, 0)) * b0) * factor;
+
+            x[2] =  ( ( A (0, 0) * A (1, 1) - A (0, 1) * A (1, 0)) * b2
+                    + (-A (0, 0) * A (2, 1) + A (0, 1) * A (2, 0)) * b1
+                    + ( A (1, 0) * A (2, 1) - A (1, 1) * A (2, 0)) * b0) * factor;
+        }
+        break;
+
+
+        default:
+        {
+            Matrix<ElementType> M (A);
+
+            for (size_t j = 0; j < n; ++j)
+            {
+                if (M (j, j) == 0)
+                {
+                    auto i = j;
+                    while (i < n && M (i, j) == 0)
+                        ++i;
+
+                    if (i == n)
+                        return false;
+
+                    for (size_t k = 0; k < n; ++k)
+                        M (j, k) += M (i, k);
+
+                    x[j] += x[i];
+                }
+
+                auto t = 1 / M (j, j);
+
+                for (size_t k = 0; k < n; ++k)
+                    M (j, k) *= t;
+
+                x[j] *= t;
+
+                for (size_t k = j + 1; k < n; ++k)
+                {
+                    auto u = -M (k, j);
+
+                    for (size_t l = 0; l < n; ++l)
+                        M (k, l) += u * M (j, l);
+
+                    x[k] += u * x[j];
+                }
+            }
+
+            for (int k = static_cast<int> (n) - 2; k >= 0; --k)
+                for (size_t i = static_cast<size_t> (k) + 1; i < n; ++i)
+                    x[k] -= M (static_cast<size_t> (k), i) * x[i];
+        }
+    }
+
+    return true;
+}
+
+//==============================================================================
+template <typename ElementType>
+String Matrix<ElementType>::toString() const
+{
+    StringArray entries;
+    int sizeMax = 0;
+
+    auto* p = data.begin();
+
+    for (size_t i = 0; i < rows; ++i)
+    {
+        for (size_t j = 0; j < columns; ++j)
+        {
+            String entry (*p++, 4);
+            sizeMax = jmax (sizeMax, entry.length());
+
+            entries.add (entry);
+        }
+    }
+
+    sizeMax = ((sizeMax + 1) / 4 + 1) * 4;
+
+    MemoryOutputStream result;
+
+    auto n = static_cast<size_t> (entries.size());
+
+    for (size_t i = 0; i < n; ++i)
+    {
+        result << entries[(int) i].paddedRight (' ', sizeMax);
+
+        if (i % columns == (columns - 1))
+            result << newLine;
+    }
+
+    return result.toString();
+}
+
+template class Matrix<float>;
+template class Matrix<double>;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/maths/juce_Matrix.h
+++ b/modules/juce_dsp/maths/juce_Matrix.h
@ -0,0 +1,255 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    General matrix and vectors class, meant for classic math manipulation such as
+    additions, multiplications, and linear systems of equations solving.
+
+    @see LinearAlgebra
+
+    @tags{DSP}
+*/
+template<typename ElementType>
+class Matrix
+{
+public:
+    //==============================================================================
+    /** Creates a new matrix with a given number of rows and columns. */
+    Matrix (size_t numRows, size_t numColumns)
+        : rows (numRows), columns (numColumns)
+    {
+        resize();
+        clear();
+    }
+
+    /** Creates a new matrix with a given number of rows and columns, with initial
+        data coming from an array, stored in row-major order.
+    */
+    Matrix (size_t numRows, size_t numColumns, const ElementType* dataPointer)
+        : rows (numRows), columns (numColumns)
+    {
+        resize();
+        memcpy (data.getRawDataPointer(), dataPointer, rows * columns * sizeof (ElementType));
+    }
+
+    /** Creates a copy of another matrix. */
+    Matrix (const Matrix&) = default;
+
+    /** Moves a copy of another matrix. */
+    Matrix (Matrix&&) noexcept = default;
+
+    /** Creates a copy of another matrix. */
+    Matrix& operator= (const Matrix&) = default;
+
+    /** Moves another matrix into this one */
+    Matrix& operator= (Matrix&&) noexcept = default;
+
+    //==============================================================================
+    /** Creates the identity matrix */
+    static Matrix identity (size_t size);
+
+    /** Creates a Toeplitz Matrix from a vector with a given squared size */
+    static Matrix toeplitz (const Matrix& vector, size_t size);
+
+    /** Creates a squared size x size Hankel Matrix from a vector with an optional offset.
+
+        @param vector    The vector from which the Hankel matrix should be generated.
+                         Its number of rows should be at least 2 * (size - 1) + 1
+        @param size      The size of resulting square matrix.
+        @param offset    An optional offset into the given vector.
+    */
+    static Matrix hankel (const Matrix& vector, size_t size, size_t offset = 0);
+
+    //==============================================================================
+    /** Returns the number of rows in the matrix. */
+    size_t getNumRows() const noexcept                 { return rows; }
+
+    /** Returns the number of columns in the matrix. */
+    size_t getNumColumns() const noexcept              { return columns; }
+
+    /** Returns an Array of 2 integers with the number of rows and columns in the
+        matrix.
+    */
+    Array<size_t> getSize() const noexcept             { return { rows, columns }; }
+
+    /** Fills the contents of the matrix with zeroes. */
+    void clear() noexcept                              { zeromem (data.begin(), sizeof (ElementType) * (size_t) data.size()); }
+
+    //==============================================================================
+    /** Swaps the contents of two rows in the matrix and returns a reference to itself. */
+    Matrix& swapRows (size_t rowOne, size_t rowTwo) noexcept;
+
+    /** Swaps the contents of two columns in the matrix and returns a reference to itself. */
+    Matrix& swapColumns (size_t columnOne, size_t columnTwo) noexcept;
+
+    //==============================================================================
+    /** Returns the value of the matrix at a given row and column (for reading). */
+    inline ElementType operator() (size_t row, size_t column) const noexcept
+    {
+        jassert (row < rows && column < columns);
+        return data.getReference (static_cast<int> (dataAcceleration.getReference (static_cast<int> (row))) + static_cast<int> (column));
+    }
+
+    /** Returns the value of the matrix at a given row and column (for modifying). */
+    inline ElementType& operator() (size_t row, size_t column) noexcept
+    {
+        jassert (row < rows && column < columns);
+        return data.getReference (static_cast<int> (dataAcceleration.getReference (static_cast<int> (row))) + static_cast<int> (column));
+    }
+
+    /** Returns a pointer to the raw data of the matrix object, ordered in row-major
+        order (for modifying).
+    */
+    inline ElementType* getRawDataPointer() noexcept                    { return data.getRawDataPointer(); }
+
+    /** Returns a pointer to the raw data of the matrix object, ordered in row-major
+        order (for reading).
+     */
+    inline const ElementType* getRawDataPointer() const noexcept        { return data.begin(); }
+
+    //==============================================================================
+    /** Addition of two matrices */
+    inline Matrix& operator+= (const Matrix& other) noexcept            { return apply (other, [] (ElementType a, ElementType b) { return a + b; } ); }
+
+    /** Subtraction of two matrices */
+    inline Matrix& operator-= (const Matrix& other) noexcept            { return apply (other, [] (ElementType a, ElementType b) { return a - b; } ); }
+
+    /** Scalar multiplication */
+    inline Matrix& operator*= (ElementType scalar) noexcept
+    {
+        std::for_each (begin(), end(), [scalar] (ElementType& x) { x *= scalar; });
+        return *this;
+    }
+
+    /** Addition of two matrices */
+    inline Matrix operator+ (const Matrix& other) const                 { Matrix result (*this); result += other;  return result; }
+
+    /** Addition of two matrices */
+    inline Matrix operator- (const Matrix& other) const                 { Matrix result (*this); result -= other;  return result; }
+
+    /** Scalar multiplication */
+    inline Matrix operator* (ElementType scalar) const                  { Matrix result (*this); result *= scalar; return result; }
+
+    /** Matrix multiplication */
+    Matrix operator* (const Matrix& other) const;
+
+    /** Does a hadarmard product with the receiver and other and stores the result in the receiver */
+    inline Matrix& hadarmard (const Matrix& other) noexcept             { return apply (other, [] (ElementType a, ElementType b) { return a * b; } ); }
+
+    /** Does a hadarmard product with a and b returns the result. */
+    static inline Matrix hadarmard (const Matrix& a, const Matrix& b)   { Matrix result (a); result.hadarmard (b); return result; }
+
+    //==============================================================================
+    /** Compare to matrices with a given tolerance */
+    static bool compare (const Matrix& a, const Matrix& b, ElementType tolerance = 0) noexcept;
+
+    /* Comparison operator */
+    inline bool operator== (const Matrix& other) const noexcept      { return compare (*this, other); }
+
+    //==============================================================================
+    /** Tells if the matrix is a square matrix */
+    bool isSquare() const noexcept                                   { return rows == columns; }
+
+    /** Tells if the matrix is a vector */
+    bool isVector() const noexcept                                   { return isOneColumnVector() || isOneRowVector(); }
+
+    /** Tells if the matrix is a one column vector */
+    bool isOneColumnVector() const noexcept                          { return columns == 1; }
+
+    /** Tells if the matrix is a one row vector */
+    bool isOneRowVector() const noexcept                             { return rows == 1; }
+
+    /** Tells if the matrix is a null matrix */
+    bool isNullMatrix() const noexcept                               { return rows == 0 || columns == 0; }
+
+    //==============================================================================
+    /** Solves a linear system of equations represented by this object and the argument b,
+        using various algorithms depending on the size of the arguments.
+
+        The matrix must be a square matrix N times N, and b must be a vector N times 1,
+        with the coefficients of b. After the execution of the algorithm,
+        the vector b will contain the solution.
+
+        Returns true if the linear system of euqations was successfully solved.
+     */
+    bool solve (Matrix& b) const noexcept;
+
+    //==============================================================================
+    /** Returns a String displaying in a convenient way the matrix contents. */
+    String toString() const;
+
+    //==============================================================================
+    ElementType* begin() noexcept                   { return data.begin(); }
+    ElementType* end() noexcept                     { return data.end(); }
+
+    const ElementType* begin() const noexcept       { return &data.getReference (0); }
+    const ElementType* end()   const noexcept       { return begin() + data.size(); }
+
+private:
+    //==============================================================================
+    /** Resizes the matrix. */
+    void resize()
+    {
+        data.resize (static_cast<int> (columns * rows));
+        dataAcceleration.resize (static_cast<int> (rows));
+
+        for (size_t i = 0; i < rows; ++i)
+            dataAcceleration.setUnchecked (static_cast<int> (i), i * columns);
+    }
+
+    template <typename BinaryOperation>
+    Matrix& apply (const Matrix& other, BinaryOperation binaryOp)
+    {
+        jassert (rows == other.rows && columns == other.columns);
+
+        auto* dst = getRawDataPointer();
+
+        for (auto src : other)
+        {
+            *dst = binaryOp (*dst, src);
+            ++dst;
+        }
+
+        return *this;
+    }
+
+    //==============================================================================
+    Array<ElementType> data;
+    Array<size_t> dataAcceleration;
+
+    size_t rows, columns;
+
+    //==============================================================================
+    JUCE_LEAK_DETECTOR (Matrix)
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/maths/juce_Matrix_test.cpp
+++ b/modules/juce_dsp/maths/juce_Matrix_test.cpp
@ -0,0 +1,172 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+struct LinearAlgebraUnitTest  : public UnitTest
+{
+    LinearAlgebraUnitTest()  : UnitTest ("Linear Algebra UnitTests", "DSP") {}
+
+    struct AdditionTest
+    {
+        template <typename ElementType>
+        static void run (LinearAlgebraUnitTest& u)
+        {
+            const ElementType data1[] = { 1,  2, 3,  4,  5,  6,  7,  8 };
+            const ElementType data2[] = { 1, -1, 3, -1,  5, -1,  7, -1 };
+            const ElementType data3[] = { 2,  1, 6,  3, 10,  5, 14,  7 };
+
+            Matrix<ElementType> mat1 (2, 4, data1);
+            Matrix<ElementType> mat2 (2, 4, data2);
+            Matrix<ElementType> mat3 (2, 4, data3);
+
+            u.expect((mat1 + mat2) == mat3);
+        }
+    };
+
+    struct DifferenceTest
+    {
+        template <typename ElementType>
+        static void run (LinearAlgebraUnitTest& u)
+        {
+            const ElementType data1[] = { 1,  2, 3,  4, 5,  6, 7,  8 };
+            const ElementType data2[] = { 1, -1, 3, -1, 5, -1, 7, -1 };
+            const ElementType data3[] = { 0,  3, 0,  5, 0,  7, 0,  9 };
+
+            Matrix<ElementType> mat1 (2, 4, data1);
+            Matrix<ElementType> mat2 (2, 4, data2);
+            Matrix<ElementType> mat3 (2, 4, data3);
+
+            u.expect((mat1 - mat2) == mat3);
+        }
+    };
+
+    struct ScalarMultiplicationTest
+    {
+        template <typename ElementType>
+        static void run (LinearAlgebraUnitTest& u)
+        {
+            const ElementType data1[] = { 1,  2, 3,  4, 5,  6, 7,  8 };
+            const ElementType scalar = 2.0;
+            const ElementType data2[] = { 2, 4, 6, 8, 10, 12, 14, 16 };
+
+            Matrix<ElementType> x (2, 4, data1);
+            Matrix<ElementType> expected (2, 4, data2);
+
+            u.expect ((x * scalar) == expected);
+        }
+    };
+
+    struct HadamardProductTest
+    {
+        template <typename ElementType>
+        static void run (LinearAlgebraUnitTest& u)
+        {
+            const ElementType data1[] = { 1,  2, 3,  4,  5,  6,  7,  8 };
+            const ElementType data2[] = { 1, -1, 3, -1,  5, -1,  7, -1 };
+            const ElementType data3[] = { 1, -2, 9, -4, 25, -6, 49, -8 };
+
+            Matrix<ElementType> mat1 (2, 4, data1);
+            Matrix<ElementType> mat2 (2, 4, data2);
+            Matrix<ElementType> mat3 (2, 4, data3);
+
+            u.expect (Matrix<ElementType>::hadarmard (mat1, mat2) == mat3);
+        }
+    };
+
+    struct MultiplicationTest
+    {
+        template <typename ElementType>
+        static void run (LinearAlgebraUnitTest& u)
+        {
+            const ElementType data1[] = { 1,  2, 3,  4,  5,  6,  7,  8 };
+            const ElementType data2[] = { 1, -1, 3, -1,  5, -1,  7, -1 };
+            const ElementType data3[] = { 50, -10, 114, -26 };
+
+            Matrix<ElementType> mat1 (2, 4, data1);
+            Matrix<ElementType> mat2 (4, 2, data2);
+            Matrix<ElementType> mat3 (2, 2, data3);
+
+            u.expect((mat1 * mat2) == mat3);
+        }
+    };
+
+    struct IdentityMatrixTest
+    {
+        template <typename ElementType>
+        static void run (LinearAlgebraUnitTest& u)
+        {
+            const ElementType data1[] = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1};
+            u.expect (Matrix<ElementType>::identity (4) == Matrix<ElementType> (4, 4, data1));
+        }
+    };
+
+    struct SolvingTest
+    {
+        template <typename ElementType>
+        static void run (LinearAlgebraUnitTest& u)
+        {
+            const ElementType data1[] = { 1, -1, 2, -2 };
+            const ElementType data2[] = { -1, 0, -1, -7 };
+            const ElementType data3[] = { 1, 4, 2, 1, -1, 1, 4, 3, -2, -1, 1, 1, -1, 0, 1, 4 };
+
+            Matrix<ElementType> X (4, 1, data1);
+            Matrix<ElementType> B (4, 1, data2);
+            Matrix<ElementType> A (4, 4, data3);
+
+            u.expect (A.solve (B));
+            u.expect (Matrix<ElementType>::compare (X, B, (ElementType) 1e-4));
+        }
+    };
+
+    template <class TheTest>
+    void runTestForAllTypes (const char* unitTestName)
+    {
+        beginTest (unitTestName);
+
+        TheTest::template run<float> (*this);
+        TheTest::template run<double> (*this);
+    }
+
+    void runTest() override
+    {
+        runTestForAllTypes<AdditionTest> ("AdditionTest");
+        runTestForAllTypes<DifferenceTest> ("DifferenceTest");
+        runTestForAllTypes<ScalarMultiplicationTest> ("ScalarMultiplication");
+        runTestForAllTypes<HadamardProductTest> ("HadamardProductTest");
+        runTestForAllTypes<MultiplicationTest> ("MultiplicationTest");
+        runTestForAllTypes<IdentityMatrixTest> ("IdentityMatrixTest");
+        runTestForAllTypes<SolvingTest> ("SolvingTest");
+    }
+};
+
+static LinearAlgebraUnitTest linearAlgebraUnitTest;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/maths/juce_Phase.h
+++ b/modules/juce_dsp/maths/juce_Phase.h
@ -0,0 +1,68 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Represents an increasing phase value between 0 and 2*pi.
+
+    This represents a value which can be incremented, and which wraps back to 0 when it
+    goes past 2 * pi.
+
+    @tags{DSP}
+*/
+template <typename Type>
+struct Phase
+{
+    /** Resets the phase to 0. */
+    void reset() noexcept               { phase = 0; }
+
+    /** Returns the current value, and increments the phase by the given increment.
+        The increment must be a positive value, it can't go backwards!
+        The new value of the phase after calling this function will be (phase + increment) % (2 * pi).
+    */
+    Type advance (Type increment) noexcept
+    {
+        jassert (increment >= 0); // cannot run this value backwards!
+
+        auto last = phase;
+        auto next = last + increment;
+
+        while (next >= MathConstants<Type>::twoPi)
+            next -= MathConstants<Type>::twoPi;
+
+        phase = next;
+        return last;
+    }
+
+    Type phase = 0;
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/maths/juce_Polynomial.h
+++ b/modules/juce_dsp/maths/juce_Polynomial.h
@ -0,0 +1,169 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    A class representing a polynomial
+
+    @tags{DSP}
+*/
+template <typename FloatingType>
+class Polynomial
+{
+public:
+    //==============================================================================
+    /** Creates a new polynomial which will always evaluate to zero. */
+    Polynomial()
+    {
+        coeffs.add (0);
+    }
+
+    /** Creates a new polynomial with given coefficients.
+
+        @param numCoefficients The number of coefficients stored in coefficients.
+                               This is also the order of the returned polynomial.
+        @param coefficients    The coefficients which will be used by the newly
+                               created polynomial. The Polynomial class will keep
+                               a private copy of the coefficients.
+    */
+    Polynomial (const FloatingType* coefficients, int numCoefficients)
+        : coeffs (coefficients, numCoefficients)
+    {
+        jassert (! coeffs.isEmpty());
+    }
+
+    /** Creates a copy of another polynomial. */
+    Polynomial (const Polynomial&) = default;
+
+    /** Creates a copy of another polynomial. */
+    Polynomial (Polynomial&&) = default;
+
+    /** Creates a copy of another polynomial. */
+    Polynomial& operator= (const Polynomial&) = default;
+
+    /** Creates a copy of another polynomial. */
+    Polynomial& operator= (Polynomial&&) = default;
+
+    /** Creates a new polynomial with coefficients by a C++11 initializer list.
+        This function can be used in the following way:
+        Polynomial<float> p ({0.5f, -0.3f, 0.2f});
+    */
+    template <typename... Values>
+    Polynomial (Values... items)  : coeffs (items...)
+    {
+        jassert (! coeffs.isEmpty());
+    }
+
+    //==============================================================================
+    /** Returns a single coefficient of the receiver for reading */
+    FloatingType operator[] (int index) const noexcept              { return coeffs.getUnchecked (index); }
+
+    /** Returns a single coefficient of the receiver for modifying. */
+    FloatingType& operator[] (int index) noexcept                   { return coeffs.getReference (index); }
+
+    /** Evaluates the value of the polynomial at a single point x. */
+    FloatingType operator() (FloatingType x) const noexcept
+    {
+        // Horner's method
+        FloatingType y (0);
+
+        for (int i = coeffs.size(); --i >= 0;)
+            y = (x * y) + coeffs.getUnchecked(i);
+
+        return y;
+    }
+
+    /** Returns the order of the polynomial. */
+    int getOrder() noexcept
+    {
+        return coeffs.size() - 1;
+    }
+
+    //==============================================================================
+    /** Returns the polynomial with all its coefficients multiplied with a gain factor */
+    Polynomial<FloatingType> withGain (double gain) const
+    {
+        auto result = *this;
+
+        for (auto& c : result.coeffs)
+            c *= gain;
+
+        return result;
+    }
+
+    /** Returns the sum of this polynomial with another */
+    Polynomial<FloatingType> getSumWith (const Polynomial<FloatingType>& other) const
+    {
+        if (coeffs.size() < other.coeffs.size())
+            return other.getSumWith (*this);
+
+        auto result = *this;
+
+        for (int i = 0; i < other.coeffs.size(); ++i)
+            result[i] += other[i];
+
+        return result;
+    }
+
+    /** computes the product of two polynomials and return the result */
+    Polynomial<FloatingType> getProductWith (const Polynomial<FloatingType>& other) const
+    {
+        Polynomial<FloatingType> result;
+        result.coeffs.clearQuick();
+
+        auto N1 = coeffs.size();
+        auto N2 = other.coeffs.size();
+        auto Nmax = jmax (N1, N2);
+
+        auto N = N1 + N2 - 1;
+
+        for (int i = 0; i < N; ++i)
+        {
+            FloatingType value (0);
+
+            for (int j = 0; j < Nmax; ++j)
+                if (j >= 0 && j < N1 && i - j >= 0 && i - j < N2)
+                    value = value + (*this)[j] * other[i - j];
+
+            result.coeffs.add (value);
+        }
+
+        return result;
+    }
+
+private:
+    //==============================================================================
+    Array<FloatingType> coeffs;
+
+    JUCE_LEAK_DETECTOR (Polynomial)
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/maths/juce_SpecialFunctions.cpp
+++ b/modules/juce_dsp/maths/juce_SpecialFunctions.cpp
@ -0,0 +1,144 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+double SpecialFunctions::besselI0 (double x) noexcept
+{
+    auto ax = std::abs (x);
+
+    if (ax < 3.75)
+    {
+        auto y = x / 3.75;
+        y *= y;
+
+        return 1.0 + y * (3.5156229 + y * (3.0899424 + y * (1.2067492
+                + y * (0.2659732 + y * (0.360768e-1 + y * 0.45813e-2)))));
+    }
+
+    auto y = 3.75 / ax;
+
+    return (std::exp (ax) / std::sqrt (ax))
+             * (0.39894228 + y * (0.1328592e-1 + y * (0.225319e-2 + y * (-0.157565e-2 + y * (0.916281e-2
+                 + y * (-0.2057706e-1 + y * (0.2635537e-1 + y * (-0.1647633e-1 + y * 0.392377e-2))))))));
+}
+
+void SpecialFunctions::ellipticIntegralK (double k, double& K, double& Kp) noexcept
+{
+    constexpr int M = 4;
+
+    K = MathConstants<double>::halfPi;
+    auto lastK = k;
+
+    for (int i = 0; i < M; ++i)
+    {
+        lastK = std::pow (lastK / (1 + std::sqrt (1 - std::pow (lastK, 2.0))), 2.0);
+        K *= 1 + lastK;
+    }
+
+    Kp = MathConstants<double>::halfPi;
+    auto last = std::sqrt (1 - k * k);
+
+    for (int i = 0; i < M; ++i)
+    {
+        last = std::pow (last / (1.0 + std::sqrt (1.0 - std::pow (last, 2.0))), 2.0);
+        Kp *= 1 + last;
+    }
+}
+
+Complex<double> SpecialFunctions::cde (Complex<double> u, double k) noexcept
+{
+    constexpr int M = 4;
+
+    double ke[M + 1];
+    double* kei = ke;
+    *kei = k;
+
+    for (int i = 0; i < M; ++i)
+    {
+        auto next = std::pow (*kei / (1.0 + std::sqrt (1.0 - std::pow (*kei, 2.0))), 2.0);
+        *++kei = next;
+    }
+
+    // NB: the spurious cast to double here is a workaround for a very odd link-time failure
+    std::complex<double> last = std::cos (u * (double) MathConstants<double>::halfPi);
+
+    for (int i = M - 1; i >= 0; --i)
+        last = (1.0 + ke[i + 1]) / (1.0 / last + ke[i + 1] * last);
+
+    return last;
+}
+
+Complex<double> SpecialFunctions::sne (Complex<double> u, double k) noexcept
+{
+    constexpr int M = 4;
+
+    double ke[M + 1];
+    double* kei = ke;
+    *kei = k;
+
+    for (int i = 0; i < M; ++i)
+    {
+        auto next = std::pow (*kei / (1 + std::sqrt (1 - std::pow (*kei, 2.0))), 2.0);
+        *++kei = next;
+    }
+
+    // NB: the spurious cast to double here is a workaround for a very odd link-time failure
+    std::complex<double> last = std::sin (u * (double) MathConstants<double>::halfPi);
+
+    for (int i = M - 1; i >= 0; --i)
+        last = (1.0 + ke[i + 1]) / (1.0 / last + ke[i + 1] * last);
+
+    return last;
+}
+
+Complex<double> SpecialFunctions::asne (Complex<double> w, double k) noexcept
+{
+    constexpr int M = 4;
+
+    double ke[M + 1];
+    double* kei = ke;
+    *kei = k;
+
+    for (int i = 0; i < M; ++i)
+    {
+        auto next = std::pow (*kei / (1.0 + std::sqrt (1.0 - std::pow (*kei, 2.0))), 2.0);
+        *++kei = next;
+    }
+
+    std::complex<double> last = w;
+
+    for (int i = 1; i <= M; ++i)
+        last = 2.0 * last / ((1.0 + ke[i]) * (1.0 + std::sqrt (1.0 - std::pow (ke[i - 1] * last, 2.0))));
+
+    return 2.0 / MathConstants<double>::pi * std::asin (last);
+}
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/maths/juce_SpecialFunctions.h
+++ b/modules/juce_dsp/maths/juce_SpecialFunctions.h
@ -0,0 +1,68 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Contains miscellaneous filter design and windowing functions.
+
+    @tags{DSP}
+*/
+struct SpecialFunctions
+{
+    /** Computes the modified Bessel function of the first kind I0 for a
+        given double value x. Modified Bessel functions are useful to solve
+        various mathematical problems involving differential equations.
+    */
+    static double besselI0 (double x) noexcept;
+
+    /** Computes the complete elliptic integral of the first kind K for a
+        given double value k, and the associated complete elliptic integral
+        of the first kind Kp for the complementary modulus of k.
+    */
+    static void ellipticIntegralK (double k, double& K, double& Kp) noexcept;
+
+    /** Computes the Jacobian elliptic function cd for the elliptic
+        modulus k and the quarter-period units u.
+    */
+    static Complex<double> cde (Complex<double> u, double k) noexcept;
+
+    /** Computes the Jacobian elliptic function sn for the elliptic
+        modulus k and the quarter-period units u.
+    */
+    static Complex<double> sne (Complex<double> u, double k) noexcept;
+
+    /** Computes the inverse of the Jacobian elliptic function sn
+        for the elliptic modulus k and the quarter-period units u.
+    */
+    static Complex<double> asne (Complex<double> w, double k) noexcept;
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/native/juce_avx_SIMDNativeOps.cpp
+++ b/modules/juce_dsp/native/juce_avx_SIMDNativeOps.cpp
@ -0,0 +1,59 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+    namespace dsp
+    {
+        DEFINE_AVX_SIMD_CONST (int32_t, float, kAllBitsSet)     = { -1, -1, -1, -1, -1, -1, -1, -1 };
+        DEFINE_AVX_SIMD_CONST (int32_t, float, kEvenHighBit)    = { static_cast<int32_t>(0x80000000), 0, static_cast<int32_t>(0x80000000), 0, static_cast<int32_t>(0x80000000), 0, static_cast<int32_t>(0x80000000), 0 };
+        DEFINE_AVX_SIMD_CONST (float, float, kOne)              = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
+
+        DEFINE_AVX_SIMD_CONST (int64_t, double, kAllBitsSet)    = { -1, -1, -1, -1 };
+        DEFINE_AVX_SIMD_CONST (int64_t, double, kEvenHighBit)   = { static_cast<int64_t> (0x8000000000000000), 0, static_cast<int64_t> (0x8000000000000000), 0 };
+        DEFINE_AVX_SIMD_CONST (double, double, kOne)            = { 1.0, 1.0, 1.0, 1.0 };
+
+        DEFINE_AVX_SIMD_CONST (int8_t, int8_t, kAllBitsSet)     = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
+
+        DEFINE_AVX_SIMD_CONST (uint8_t, uint8_t, kAllBitsSet)   = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+        DEFINE_AVX_SIMD_CONST (uint8_t, uint8_t, kHighBit)      = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
+
+        DEFINE_AVX_SIMD_CONST (int16_t, int16_t, kAllBitsSet)   = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
+
+        DEFINE_AVX_SIMD_CONST (uint16_t, uint16_t, kAllBitsSet) = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff };
+        DEFINE_AVX_SIMD_CONST (uint16_t, uint16_t, kHighBit)    = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 };
+
+        DEFINE_AVX_SIMD_CONST (int32_t, int32_t, kAllBitsSet)   = { -1, -1, -1, -1, -1, -1, -1, -1 };
+
+        DEFINE_AVX_SIMD_CONST (uint32_t, uint32_t, kAllBitsSet) = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
+        DEFINE_AVX_SIMD_CONST (uint32_t, uint32_t, kHighBit)    = { 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+
+        DEFINE_AVX_SIMD_CONST (int64_t, int64_t, kAllBitsSet)   = { -1LL, -1LL, -1LL, -1LL };
+
+        DEFINE_AVX_SIMD_CONST (uint64_t, uint64_t, kAllBitsSet) = { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL };
+        DEFINE_AVX_SIMD_CONST (uint64_t, uint64_t, kHighBit)    = { 0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL };
+    }
+}
--- a/modules/juce_dsp/native/juce_avx_SIMDNativeOps.h
+++ b/modules/juce_dsp/native/juce_avx_SIMDNativeOps.h
@ -0,0 +1,649 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+#ifndef DOXYGEN
+
+#if JUCE_GCC && (__GNUC__ >= 6)
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wignored-attributes"
+#endif
+
+#ifdef _MSC_VER
+ #define DECLARE_AVX_SIMD_CONST(type, name) \
+    static __declspec(align(32)) const type name[32 / sizeof (type)]
+
+ #define DEFINE_AVX_SIMD_CONST(type, class_type, name) \
+    __declspec(align(32)) const type SIMDNativeOps<class_type>:: name[32 / sizeof (type)]
+
+#else
+ #define DECLARE_AVX_SIMD_CONST(type, name) \
+    static const type name[32 / sizeof (type)] __attribute__((aligned(32)))
+
+ #define DEFINE_AVX_SIMD_CONST(type, class_type, name) \
+    const type SIMDNativeOps<class_type>:: name[32 / sizeof (type)] __attribute__((aligned(32)))
+
+#endif
+
+template <typename type>
+struct SIMDNativeOps;
+
+//==============================================================================
+/** Single-precision floating point AVX intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<float>
+{
+    using vSIMDType = __m256;
+
+    //==============================================================================
+    DECLARE_AVX_SIMD_CONST (int32_t, kAllBitsSet);
+    DECLARE_AVX_SIMD_CONST (int32_t, kEvenHighBit);
+    DECLARE_AVX_SIMD_CONST (float, kOne);
+
+    //==============================================================================
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE vconst (const float* a) noexcept                     { return load (a); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE vconst (const int32_t* a) noexcept                   { return _mm256_castsi256_ps (_mm256_load_si256 ((const __m256i*) a)); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE expand (float s) noexcept                            { return _mm256_broadcast_ss (&s); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE load (const float* a) noexcept                       { return _mm256_load_ps (a); }
+    static forcedinline void   JUCE_VECTOR_CALLTYPE store (__m256 value, float* dest) noexcept           { _mm256_store_ps (dest, value); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE add (__m256 a, __m256 b) noexcept                    { return _mm256_add_ps (a, b); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE sub (__m256 a, __m256 b) noexcept                    { return _mm256_sub_ps (a, b); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE mul (__m256 a, __m256 b) noexcept                    { return _mm256_mul_ps (a, b); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE bit_and (__m256 a, __m256 b) noexcept                { return _mm256_and_ps (a, b); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE bit_or  (__m256 a, __m256 b) noexcept                { return _mm256_or_ps  (a, b); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE bit_xor (__m256 a, __m256 b) noexcept                { return _mm256_xor_ps (a, b); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE bit_notand (__m256 a, __m256 b) noexcept             { return _mm256_andnot_ps (a, b); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE bit_not (__m256 a) noexcept                          { return bit_notand (a, vconst (kAllBitsSet)); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE min (__m256 a, __m256 b) noexcept                    { return _mm256_min_ps (a, b); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE max (__m256 a, __m256 b) noexcept                    { return _mm256_max_ps (a, b); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE equal (__m256 a, __m256 b) noexcept                  { return _mm256_cmp_ps (a, b, _CMP_EQ_OQ); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE notEqual (__m256 a, __m256 b) noexcept               { return _mm256_cmp_ps (a, b, _CMP_NEQ_OQ); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE greaterThan (__m256 a, __m256 b) noexcept            { return _mm256_cmp_ps (a, b, _CMP_GT_OQ); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256 a, __m256 b) noexcept     { return _mm256_cmp_ps (a, b, _CMP_GE_OQ); }
+    static forcedinline bool   JUCE_VECTOR_CALLTYPE allEqual (__m256 a, __m256 b) noexcept               { return (_mm256_movemask_ps (equal (a, b)) == 0xff); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE multiplyAdd (__m256 a, __m256 b, __m256 c) noexcept  { return _mm256_fmadd_ps (b, c, a); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE dupeven (__m256 a) noexcept                          { return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (2, 2, 0, 0)); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE dupodd (__m256 a) noexcept                           { return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (3, 3, 1, 1)); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE swapevenodd (__m256 a) noexcept                      { return _mm256_shuffle_ps (a, a, _MM_SHUFFLE (2, 3, 0, 1)); }
+    static forcedinline float  JUCE_VECTOR_CALLTYPE get (__m256 v, size_t i) noexcept                    { return SIMDFallbackOps<float, __m256>::get (v, i); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE set (__m256 v, size_t i, float s) noexcept           { return SIMDFallbackOps<float, __m256>::set (v, i, s); }
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE oddevensum (__m256 a) noexcept
+    {
+        a = _mm256_add_ps (_mm256_shuffle_ps (a, a, _MM_SHUFFLE (1, 0, 3, 2)), a);
+        return add (_mm256_permute2f128_ps (a, a, 1), a);
+    }
+
+    //==============================================================================
+    static forcedinline __m256 JUCE_VECTOR_CALLTYPE cmplxmul (__m256 a, __m256 b) noexcept
+    {
+        __m256 rr_ir = mul (a, dupeven (b));
+        __m256 ii_ri = mul (swapevenodd (a), dupodd (b));
+        return add (rr_ir, bit_xor (ii_ri, vconst (kEvenHighBit)));
+    }
+
+    static forcedinline float JUCE_VECTOR_CALLTYPE sum (__m256 a) noexcept
+    {
+       __m256 retval = _mm256_dp_ps (a, vconst (kOne), 0xff);
+       __m256 tmp = _mm256_permute2f128_ps (retval, retval, 1);
+       retval = _mm256_add_ps (retval, tmp);
+
+      #if JUCE_GCC
+       return retval[0];
+      #else
+       return _mm256_cvtss_f32 (retval);
+      #endif
+    }
+};
+
+//==============================================================================
+/** Double-precision floating point AVX intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<double>
+{
+    using vSIMDType = __m256d;
+
+    //==============================================================================
+    DECLARE_AVX_SIMD_CONST (int64_t, kAllBitsSet);
+    DECLARE_AVX_SIMD_CONST (int64_t, kEvenHighBit);
+    DECLARE_AVX_SIMD_CONST (double, kOne);
+
+    //==============================================================================
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE vconst (const double* a) noexcept                      { return load (a); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE vconst (const int64_t* a) noexcept                     { return _mm256_castsi256_pd (_mm256_load_si256 ((const __m256i*) a)); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE expand (double s) noexcept                             { return _mm256_broadcast_sd (&s); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE load (const double* a) noexcept                        { return _mm256_load_pd (a); }
+    static forcedinline void JUCE_VECTOR_CALLTYPE store (__m256d value, double* dest) noexcept              { _mm256_store_pd (dest, value); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE add (__m256d a, __m256d b) noexcept                    { return _mm256_add_pd (a, b); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE sub (__m256d a, __m256d b) noexcept                    { return _mm256_sub_pd (a, b); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE mul (__m256d a, __m256d b) noexcept                    { return _mm256_mul_pd (a, b); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE bit_and (__m256d a, __m256d b) noexcept                { return _mm256_and_pd (a, b); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE bit_or  (__m256d a, __m256d b) noexcept                { return _mm256_or_pd  (a, b); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE bit_xor (__m256d a, __m256d b) noexcept                { return _mm256_xor_pd (a, b); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE bit_notand (__m256d a, __m256d b) noexcept             { return _mm256_andnot_pd (a, b); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE bit_not (__m256d a) noexcept                           { return bit_notand (a, vconst (kAllBitsSet)); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE min (__m256d a, __m256d b) noexcept                    { return _mm256_min_pd (a, b); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE max (__m256d a, __m256d b) noexcept                    { return _mm256_max_pd (a, b); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE equal (__m256d a, __m256d b) noexcept                  { return _mm256_cmp_pd (a, b, _CMP_EQ_OQ); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE notEqual (__m256d a, __m256d b) noexcept               { return _mm256_cmp_pd (a, b, _CMP_NEQ_OQ); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE greaterThan (__m256d a, __m256d b) noexcept            { return _mm256_cmp_pd (a, b, _CMP_GT_OQ); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256d a, __m256d b) noexcept     { return _mm256_cmp_pd (a, b, _CMP_GE_OQ); }
+    static forcedinline bool    JUCE_VECTOR_CALLTYPE allEqual (__m256d a, __m256d b) noexcept               { return (_mm256_movemask_pd (equal (a, b)) == 0xf); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE multiplyAdd (__m256d a, __m256d b, __m256d c) noexcept { return _mm256_add_pd (a, _mm256_mul_pd (b, c)); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE dupeven (__m256d a) noexcept                           { return _mm256_shuffle_pd (a, a, 0); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE dupodd (__m256d a) noexcept                            { return _mm256_shuffle_pd (a, a, (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE swapevenodd (__m256d a) noexcept                       { return _mm256_shuffle_pd (a, a, (1 << 0) | (0 << 1) | (1 << 2) | (0 << 3)); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE oddevensum (__m256d a) noexcept                        { return _mm256_add_pd (_mm256_permute2f128_pd (a, a, 1), a); }
+    static forcedinline double  JUCE_VECTOR_CALLTYPE get (__m256d v, size_t i) noexcept                     { return SIMDFallbackOps<double, __m256d>::get (v, i); }
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE set (__m256d v, size_t i, double s) noexcept           { return SIMDFallbackOps<double, __m256d>::set (v, i, s); }
+
+
+    //==============================================================================
+    static forcedinline __m256d JUCE_VECTOR_CALLTYPE cmplxmul (__m256d a, __m256d b) noexcept
+    {
+        __m256d rr_ir = mul (a, dupeven (b));
+        __m256d ii_ri = mul (swapevenodd (a), dupodd (b));
+        return add (rr_ir, bit_xor (ii_ri, vconst (kEvenHighBit)));
+    }
+
+    static forcedinline double JUCE_VECTOR_CALLTYPE sum (__m256d a) noexcept
+    {
+        __m256d retval = _mm256_hadd_pd (a, a);
+        __m256d tmp = _mm256_permute2f128_pd (retval, retval, 1);
+        retval = _mm256_add_pd (retval, tmp);
+
+       #if JUCE_GCC
+        return retval[0];
+       #else
+        return _mm256_cvtsd_f64 (retval);
+       #endif
+    }
+};
+
+//==============================================================================
+/** Signed 8-bit integer AVX intrinsics
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<int8_t>
+{
+    using vSIMDType = __m256i;
+
+    //==============================================================================
+    DECLARE_AVX_SIMD_CONST (int8_t, kAllBitsSet);
+
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (int8_t s) noexcept                             { return _mm256_set1_epi8 (s); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int8_t* p) noexcept                        { return _mm256_load_si256 ((const __m256i*) p); }
+    static forcedinline void JUCE_VECTOR_CALLTYPE store (__m256i value, int8_t* dest) noexcept              { _mm256_store_si256 ((__m256i*) dest, value); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b) noexcept                    { return _mm256_add_epi8 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b) noexcept                    { return _mm256_sub_epi8 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b) noexcept                { return _mm256_and_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or  (__m256i a, __m256i b) noexcept                { return _mm256_or_si256  (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b) noexcept                { return _mm256_xor_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b) noexcept             { return _mm256_andnot_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a) noexcept                           { return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b) noexcept                    { return _mm256_min_epi8 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b) noexcept                    { return _mm256_max_epi8 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b) noexcept                  { return _mm256_cmpeq_epi8 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b) noexcept            { return _mm256_cmpgt_epi8 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept     { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline bool    JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept               { return _mm256_movemask_epi8 (equal (a, b)) == -1; }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept { return add (a, mul (b, c)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept               { return bit_not (equal (a, b)); }
+    static forcedinline int8_t  JUCE_VECTOR_CALLTYPE get (__m256i v, size_t i) noexcept                     { return SIMDFallbackOps<int8_t, __m256i>::get (v, i); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE set (__m256i v, size_t i, int8_t s) noexcept           { return SIMDFallbackOps<int8_t, __m256i>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline int8_t JUCE_VECTOR_CALLTYPE sum (__m256i a) noexcept
+    {
+        __m256i lo = _mm256_unpacklo_epi8 (a, _mm256_setzero_si256());
+        __m256i hi = _mm256_unpackhi_epi8 (a, _mm256_setzero_si256());
+
+        for (int i = 0; i < 3; ++i)
+        {
+            lo = _mm256_hadd_epi16 (lo, lo);
+            hi = _mm256_hadd_epi16 (hi, hi);
+        }
+
+       #if JUCE_GCC
+        return (int8_t) ((lo[0] & 0xff) +
+                         (hi[0] & 0xff) +
+                         (lo[2] & 0xff) +
+                         (hi[2] & 0xff));
+       #else
+        constexpr int mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
+
+        return (int8_t) ((_mm256_cvtsi256_si32 (lo) & 0xff) +
+                         (_mm256_cvtsi256_si32 (hi) & 0xff) +
+                         (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (lo, mask)) & 0xff) +
+                         (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (hi, mask)) & 0xff));
+       #endif
+    }
+
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b)
+    {
+        // unpack and multiply
+        __m256i even = _mm256_mullo_epi16 (a, b);
+        __m256i odd  = _mm256_mullo_epi16 (_mm256_srli_epi16 (a, 8), _mm256_srli_epi16 (b, 8));
+
+        return _mm256_or_si256 (_mm256_slli_epi16 (odd, 8),
+                             _mm256_srli_epi16 (_mm256_slli_epi16 (even, 8), 8));
+    }
+};
+
+//==============================================================================
+/** Unsigned 8-bit integer AVX intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<uint8_t>
+{
+    //==============================================================================
+    using vSIMDType = __m256i;
+
+    //==============================================================================
+    DECLARE_AVX_SIMD_CONST (uint8_t, kHighBit);
+    DECLARE_AVX_SIMD_CONST (uint8_t, kAllBitsSet);
+
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE ssign (__m256i a) noexcept                              { return _mm256_xor_si256 (a, load (kHighBit)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (uint8_t s) noexcept                             { return _mm256_set1_epi8 ((int8_t) s); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const uint8_t* p) noexcept                        { return _mm256_load_si256 ((const __m256i*) p); }
+    static forcedinline void JUCE_VECTOR_CALLTYPE store (__m256i value, uint8_t* dest) noexcept              { _mm256_store_si256 ((__m256i*) dest, value); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b) noexcept                     { return _mm256_add_epi8 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b) noexcept                     { return _mm256_sub_epi8 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b) noexcept                 { return _mm256_and_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or  (__m256i a, __m256i b) noexcept                 { return _mm256_or_si256  (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b) noexcept                 { return _mm256_xor_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b) noexcept              { return _mm256_andnot_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a) noexcept                            { return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b) noexcept                     { return _mm256_min_epu8 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b) noexcept                     { return _mm256_max_epu8 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b) noexcept                   { return _mm256_cmpeq_epi8 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b) noexcept             { return _mm256_cmpgt_epi8 (ssign (a), ssign (b)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline bool    JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept                { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline uint8_t JUCE_VECTOR_CALLTYPE get (__m256i v, size_t i) noexcept                      { return SIMDFallbackOps<uint8_t, __m256i>::get (v, i); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE set (__m256i v, size_t i, uint8_t s) noexcept           { return SIMDFallbackOps<uint8_t, __m256i>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline uint8_t JUCE_VECTOR_CALLTYPE sum (__m256i a) noexcept
+    {
+        __m256i lo = _mm256_unpacklo_epi8 (a, _mm256_setzero_si256());
+        __m256i hi = _mm256_unpackhi_epi8 (a, _mm256_setzero_si256());
+
+        for (int i = 0; i < 3; ++i)
+        {
+            lo = _mm256_hadd_epi16 (lo, lo);
+            hi = _mm256_hadd_epi16 (hi, hi);
+        }
+
+       #if JUCE_GCC
+        return (uint8_t) ((static_cast<uint32_t> (lo[0]) & 0xffu) +
+                          (static_cast<uint32_t> (hi[0]) & 0xffu) +
+                          (static_cast<uint32_t> (lo[2]) & 0xffu) +
+                          (static_cast<uint32_t> (hi[2]) & 0xffu));
+       #else
+        constexpr int mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
+
+        return (uint8_t) ((static_cast<uint32_t> (_mm256_cvtsi256_si32 (lo)) & 0xffu) +
+                          (static_cast<uint32_t> (_mm256_cvtsi256_si32 (hi)) & 0xffu) +
+                          (static_cast<uint32_t> (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (lo, mask))) & 0xffu) +
+                          (static_cast<uint32_t> (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (hi, mask))) & 0xffu));
+       #endif
+    }
+
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b)
+    {
+        // unpack and multiply
+        __m256i even = _mm256_mullo_epi16 (a, b);
+        __m256i odd  = _mm256_mullo_epi16 (_mm256_srli_epi16 (a, 8), _mm256_srli_epi16 (b, 8));
+
+        return _mm256_or_si256 (_mm256_slli_epi16 (odd, 8),
+                             _mm256_srli_epi16 (_mm256_slli_epi16 (even, 8), 8));
+    }
+};
+
+//==============================================================================
+/** Signed 16-bit integer AVX intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<int16_t>
+{
+    //==============================================================================
+    using vSIMDType = __m256i;
+
+    //==============================================================================
+    DECLARE_AVX_SIMD_CONST (int16_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (int16_t s) noexcept                             { return _mm256_set1_epi16 (s); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int16_t* p) noexcept                        { return _mm256_load_si256 ((const __m256i*) p); }
+    static forcedinline void JUCE_VECTOR_CALLTYPE store (__m256i value, int16_t* dest) noexcept              { _mm256_store_si256 ((__m256i*) dest, value); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b) noexcept                     { return _mm256_add_epi16 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b) noexcept                     { return _mm256_sub_epi16 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b) noexcept                     { return _mm256_mullo_epi16 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b) noexcept                 { return _mm256_and_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or  (__m256i a, __m256i b) noexcept                 { return _mm256_or_si256  (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b) noexcept                 { return _mm256_xor_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b) noexcept              { return _mm256_andnot_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a) noexcept                            { return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b) noexcept                     { return _mm256_min_epi16 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b) noexcept                     { return _mm256_max_epi16 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b) noexcept                   { return _mm256_cmpeq_epi16 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b) noexcept             { return _mm256_cmpgt_epi16 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool    JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept                { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
+    static forcedinline int16_t JUCE_VECTOR_CALLTYPE get (__m256i v, size_t i) noexcept                      { return SIMDFallbackOps<int16_t, __m256i>::get (v, i); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE set (__m256i v, size_t i, int16_t s) noexcept           { return SIMDFallbackOps<int16_t, __m256i>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline int16_t JUCE_VECTOR_CALLTYPE sum (__m256i a) noexcept
+    {
+        __m256i tmp = _mm256_hadd_epi16 (a, a);
+        tmp = _mm256_hadd_epi16 (tmp, tmp);
+        tmp = _mm256_hadd_epi16 (tmp, tmp);
+
+       #if JUCE_GCC
+        return (int16_t) ((tmp[0] & 0xffff) + (tmp[2] & 0xffff));
+       #else
+        constexpr int mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
+
+        return (int16_t) ((_mm256_cvtsi256_si32 (tmp) & 0xffff) +
+                          (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (tmp, mask)) & 0xffff));
+       #endif
+    }
+};
+
+//==============================================================================
+/** Unsigned 16-bit integer AVX intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<uint16_t>
+{
+    //==============================================================================
+    using vSIMDType = __m256i;
+
+    //==============================================================================
+    DECLARE_AVX_SIMD_CONST (uint16_t, kHighBit);
+    DECLARE_AVX_SIMD_CONST (uint16_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE ssign (__m256i a) noexcept                              { return _mm256_xor_si256 (a, load (kHighBit)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE expand (uint16_t s) noexcept                            { return _mm256_set1_epi16 ((int16_t) s); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE load (const uint16_t* p) noexcept                       { return _mm256_load_si256 ((const __m256i*) p); }
+    static forcedinline void     JUCE_VECTOR_CALLTYPE store (__m256i value, uint16_t* dest) noexcept          { _mm256_store_si256 ((__m256i*) dest, value); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b) noexcept                     { return _mm256_add_epi16 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b) noexcept                     { return _mm256_sub_epi16 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b) noexcept                     { return _mm256_mullo_epi16 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b) noexcept                 { return _mm256_and_si256 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_or  (__m256i a, __m256i b) noexcept                 { return _mm256_or_si256  (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b) noexcept                 { return _mm256_xor_si256 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b) noexcept              { return _mm256_andnot_si256 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_not (__m256i a) noexcept                            { return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b) noexcept                     { return _mm256_min_epu16 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b) noexcept                     { return _mm256_max_epu16 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b) noexcept                   { return _mm256_cmpeq_epi16 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b) noexcept             { return _mm256_cmpgt_epi16 (ssign (a), ssign (b)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool     JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept                { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
+    static forcedinline uint16_t JUCE_VECTOR_CALLTYPE get (__m256i v, size_t i) noexcept                      { return SIMDFallbackOps<uint16_t, __m256i>::get (v, i); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE set (__m256i v, size_t i, uint16_t s) noexcept          { return SIMDFallbackOps<uint16_t, __m256i>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline uint16_t JUCE_VECTOR_CALLTYPE sum (__m256i a) noexcept
+    {
+        __m256i tmp = _mm256_hadd_epi16 (a, a);
+        tmp = _mm256_hadd_epi16 (tmp, tmp);
+        tmp = _mm256_hadd_epi16 (tmp, tmp);
+
+       #if JUCE_GCC
+        return (uint16_t) ((static_cast<uint32_t> (tmp[0]) & 0xffffu) +
+                           (static_cast<uint32_t> (tmp[2]) & 0xffffu));
+       #else
+        constexpr int mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
+
+        return (uint16_t) ((static_cast<uint32_t> (_mm256_cvtsi256_si32 (tmp)) & 0xffffu) +
+                           (static_cast<uint32_t> (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (tmp, mask))) & 0xffffu));
+       #endif
+    }
+};
+
+//==============================================================================
+/** Signed 32-bit integer AVX intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<int32_t>
+{
+    //==============================================================================
+    using vSIMDType = __m256i;
+
+    //==============================================================================
+    DECLARE_AVX_SIMD_CONST (int32_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (int32_t s) noexcept                             { return _mm256_set1_epi32 (s); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int32_t* p) noexcept                        { return _mm256_load_si256 ((const __m256i*) p); }
+    static forcedinline void    JUCE_VECTOR_CALLTYPE store (__m256i value, int32_t* dest) noexcept           { _mm256_store_si256 ((__m256i*) dest, value); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b) noexcept                     { return _mm256_add_epi32 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b) noexcept                     { return _mm256_sub_epi32 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b) noexcept                     { return _mm256_mullo_epi32 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b) noexcept                 { return _mm256_and_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or  (__m256i a, __m256i b) noexcept                 { return _mm256_or_si256  (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b) noexcept                 { return _mm256_xor_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b) noexcept              { return _mm256_andnot_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a) noexcept                            { return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b) noexcept                     { return _mm256_min_epi32 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b) noexcept                     { return _mm256_max_epi32 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b) noexcept                   { return _mm256_cmpeq_epi32 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b) noexcept             { return _mm256_cmpgt_epi32 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool    JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept                { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
+    static forcedinline int32_t JUCE_VECTOR_CALLTYPE get (__m256i v, size_t i) noexcept                      { return SIMDFallbackOps<int32_t, __m256i>::get (v, i); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE set (__m256i v, size_t i, int32_t s) noexcept           { return SIMDFallbackOps<int32_t, __m256i>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline int32_t JUCE_VECTOR_CALLTYPE sum (__m256i a) noexcept
+    {
+        __m256i tmp = _mm256_hadd_epi32 (a, a);
+        tmp = _mm256_hadd_epi32 (tmp, tmp);
+
+       #if JUCE_GCC
+        return tmp[0] + tmp[2];
+       #else
+        constexpr int mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
+
+        return _mm256_cvtsi256_si32 (tmp) + _mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (tmp, mask));
+       #endif
+    }
+};
+
+//==============================================================================
+/** Unsigned 32-bit integer AVX intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<uint32_t>
+{
+    //==============================================================================
+    using vSIMDType = __m256i;
+
+    //==============================================================================
+    DECLARE_AVX_SIMD_CONST (uint32_t, kAllBitsSet);
+    DECLARE_AVX_SIMD_CONST (uint32_t, kHighBit);
+
+    //==============================================================================
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE ssign (__m256i a) noexcept                              { return _mm256_xor_si256 (a, load (kHighBit)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE expand (uint32_t s) noexcept                            { return _mm256_set1_epi32 ((int32_t) s); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE load (const uint32_t* p) noexcept                       { return _mm256_load_si256 ((const __m256i*) p); }
+    static forcedinline void     JUCE_VECTOR_CALLTYPE store (__m256i value, uint32_t* dest) noexcept          { _mm256_store_si256 ((__m256i*) dest, value); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b) noexcept                     { return _mm256_add_epi32 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b) noexcept                     { return _mm256_sub_epi32 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b) noexcept                     { return _mm256_mullo_epi32 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b) noexcept                 { return _mm256_and_si256 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_or  (__m256i a, __m256i b) noexcept                 { return _mm256_or_si256  (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b) noexcept                 { return _mm256_xor_si256 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b) noexcept              { return _mm256_andnot_si256 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_not (__m256i a) noexcept                            { return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b) noexcept                     { return _mm256_min_epu32 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b) noexcept                     { return _mm256_max_epu32 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b) noexcept                   { return _mm256_cmpeq_epi32 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b) noexcept             { return _mm256_cmpgt_epi32 (ssign (a), ssign (b)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool     JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept                { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
+    static forcedinline uint32_t JUCE_VECTOR_CALLTYPE get (__m256i v, size_t i) noexcept                      { return SIMDFallbackOps<uint32_t, __m256i>::get (v, i); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE set (__m256i v, size_t i, uint32_t s) noexcept          { return SIMDFallbackOps<uint32_t, __m256i>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline uint32_t JUCE_VECTOR_CALLTYPE sum (__m256i a) noexcept
+    {
+        __m256i tmp = _mm256_hadd_epi32 (a, a);
+        tmp = _mm256_hadd_epi32 (tmp, tmp);
+
+       #if JUCE_GCC
+        return static_cast<uint32_t> (tmp[0]) + static_cast<uint32_t> (tmp[2]);
+       #else
+        constexpr int mask = (2 << 0) | (3 << 2) | (0 << 4) | (1 << 6);
+
+        return static_cast<uint32_t> (_mm256_cvtsi256_si32 (tmp))
+            + static_cast<uint32_t> (_mm256_cvtsi256_si32 (_mm256_permute4x64_epi64 (tmp, mask)));
+       #endif
+    }
+};
+
+//==============================================================================
+/** Signed 64-bit integer AVX intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<int64_t>
+{
+    //==============================================================================
+    using vSIMDType = __m256i;
+
+    //==============================================================================
+    DECLARE_AVX_SIMD_CONST (int64_t, kAllBitsSet);
+
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE expand (int64_t s) noexcept                             { return _mm256_set1_epi64x ((int64_t) s); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE load (const int64_t* p) noexcept                        { return _mm256_load_si256 ((const __m256i*) p); }
+    static forcedinline void    JUCE_VECTOR_CALLTYPE store (__m256i value, int64_t* dest) noexcept           { _mm256_store_si256 ((__m256i*) dest, value); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b) noexcept                     { return _mm256_add_epi64 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b) noexcept                     { return _mm256_sub_epi64 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b) noexcept                 { return _mm256_and_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_or  (__m256i a, __m256i b) noexcept                 { return _mm256_or_si256  (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b) noexcept                 { return _mm256_xor_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b) noexcept              { return _mm256_andnot_si256 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE bit_not (__m256i a) noexcept                            { return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b) noexcept                     { __m256i lt = greaterThan (b, a); return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b) noexcept                     { __m256i gt = greaterThan (a, b); return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b) noexcept                   { return _mm256_cmpeq_epi64 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b) noexcept             { return _mm256_cmpgt_epi64 (a, b); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool    JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept                { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
+    static forcedinline int64_t JUCE_VECTOR_CALLTYPE get (__m256i v, size_t i) noexcept                      { return SIMDFallbackOps<int64_t, __m256i>::get (v, i); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE set (__m256i v, size_t i, int64_t s) noexcept           { return SIMDFallbackOps<int64_t, __m256i>::set (v, i, s); }
+    static forcedinline int64_t JUCE_VECTOR_CALLTYPE sum (__m256i a) noexcept                                { return SIMDFallbackOps<int64_t, __m256i>::sum (a); }
+    static forcedinline __m256i JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b) noexcept                     { return SIMDFallbackOps<int64_t, __m256i>::mul (a, b); }
+};
+
+//==============================================================================
+/** Unsigned 64-bit integer AVX intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<uint64_t>
+{
+    //==============================================================================
+    using vSIMDType = __m256i;
+
+    //==============================================================================
+    DECLARE_AVX_SIMD_CONST (uint64_t, kAllBitsSet);
+    DECLARE_AVX_SIMD_CONST (uint64_t, kHighBit);
+
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE expand (uint64_t s) noexcept                            { return _mm256_set1_epi64x ((int64_t) s); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE load (const uint64_t* p) noexcept                       { return _mm256_load_si256 ((const __m256i*) p); }
+    static forcedinline void     JUCE_VECTOR_CALLTYPE store (__m256i value, uint64_t* dest) noexcept          { _mm256_store_si256 ((__m256i*) dest, value); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE ssign (__m256i a) noexcept                              { return _mm256_xor_si256 (a, load (kHighBit)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE add (__m256i a, __m256i b) noexcept                     { return _mm256_add_epi64 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE sub (__m256i a, __m256i b) noexcept                     { return _mm256_sub_epi64 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_and (__m256i a, __m256i b) noexcept                 { return _mm256_and_si256 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_or  (__m256i a, __m256i b) noexcept                 { return _mm256_or_si256  (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_xor (__m256i a, __m256i b) noexcept                 { return _mm256_xor_si256 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_andnot (__m256i a, __m256i b) noexcept              { return _mm256_andnot_si256 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE bit_not (__m256i a) noexcept                            { return _mm256_andnot_si256 (a, load (kAllBitsSet)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE min (__m256i a, __m256i b) noexcept                     { __m256i lt = greaterThan (b, a); return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE max (__m256i a, __m256i b) noexcept                     { __m256i gt = greaterThan (a, b); return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE equal (__m256i a, __m256i b) noexcept                   { return _mm256_cmpeq_epi64 (a, b); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE greaterThan (__m256i a, __m256i b) noexcept             { return _mm256_cmpgt_epi64 (ssign (a), ssign (b)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m256i a, __m256i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE multiplyAdd (__m256i a, __m256i b, __m256i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE notEqual (__m256i a, __m256i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool     JUCE_VECTOR_CALLTYPE allEqual (__m256i a, __m256i b) noexcept                { return (_mm256_movemask_epi8 (equal (a, b)) == -1); }
+    static forcedinline uint64_t JUCE_VECTOR_CALLTYPE get (__m256i v, size_t i) noexcept                      { return SIMDFallbackOps<uint64_t, __m256i>::get (v, i); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE set (__m256i v, size_t i, uint64_t s) noexcept          { return SIMDFallbackOps<uint64_t, __m256i>::set (v, i, s); }
+    static forcedinline uint64_t JUCE_VECTOR_CALLTYPE sum (__m256i a) noexcept                                { return SIMDFallbackOps<uint64_t, __m256i>::sum (a); }
+    static forcedinline __m256i  JUCE_VECTOR_CALLTYPE mul (__m256i a, __m256i b) noexcept                     { return SIMDFallbackOps<uint64_t, __m256i>::mul (a, b); }
+};
+
+#endif
+
+#if JUCE_GCC && (__GNUC__ >= 6)
+ #pragma GCC diagnostic pop
+#endif
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/native/juce_fallback_SIMDNativeOps.h
+++ b/modules/juce_dsp/native/juce_fallback_SIMDNativeOps.h
@ -0,0 +1,256 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/** A template specialisation to find corresponding mask type for primitives. */
+namespace SIMDInternal
+{
+    template <typename Primitive> struct MaskTypeFor        { using type = Primitive; };
+    template <> struct MaskTypeFor <float>                  { using type = uint32_t; };
+    template <> struct MaskTypeFor <double>                 { using type = uint64_t; };
+    template <> struct MaskTypeFor <char>                   { using type = uint8_t; };
+    template <> struct MaskTypeFor <int8_t>                 { using type = uint8_t; };
+    template <> struct MaskTypeFor <int16_t>                { using type = uint16_t; };
+    template <> struct MaskTypeFor <int32_t>                { using type = uint32_t; };
+    template <> struct MaskTypeFor <int64_t>                { using type = uint64_t; };
+    template <> struct MaskTypeFor <std::complex<float>>    { using type = uint32_t; };
+    template <> struct MaskTypeFor <std::complex<double>>   { using type = uint64_t; };
+
+    template <typename Primitive> struct PrimitiveType                           { using type = Primitive; };
+    template <typename Primitive> struct PrimitiveType<std::complex<Primitive>>  { using type = Primitive; };
+
+    template <int n>    struct Log2Helper    { enum { value = Log2Helper<n/2>::value + 1 }; };
+    template <>         struct Log2Helper<1> { enum { value = 0 }; };
+}
+
+/**
+    Useful fallback routines to use if the native SIMD op is not supported. You
+    should never need to use this directly. Use juce_SIMDRegister instead.
+
+    @tags{DSP}
+*/
+template <typename ScalarType, typename vSIMDType>
+struct SIMDFallbackOps
+{
+    static constexpr size_t n    =  sizeof (vSIMDType) / sizeof (ScalarType);
+    static constexpr size_t mask = (sizeof (vSIMDType) / sizeof (ScalarType)) - 1;
+    static constexpr size_t bits = SIMDInternal::Log2Helper<n>::value;
+
+    // helper types
+    using MaskType = typename SIMDInternal::MaskTypeFor<ScalarType>::type;
+    union UnionType     { vSIMDType v; ScalarType s[n]; };
+    union UnionMaskType { vSIMDType v; MaskType   m[n]; };
+
+
+    // fallback methods
+    static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept        { return apply<ScalarAdd> (a, b); }
+    static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept        { return apply<ScalarSub> (a, b); }
+    static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept        { return apply<ScalarMul> (a, b); }
+    static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept    { return bitapply<ScalarAnd> (a, b); }
+    static forcedinline vSIMDType bit_or  (vSIMDType a, vSIMDType b) noexcept    { return bitapply<ScalarOr > (a, b); }
+    static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept    { return bitapply<ScalarXor> (a, b); }
+    static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return bitapply<ScalarNot> (a, b); }
+
+    static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept                { return apply<ScalarMin> (a, b); }
+    static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept                { return apply<ScalarMax> (a, b); }
+    static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept              { return cmp<ScalarEq > (a, b); }
+    static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept           { return cmp<ScalarNeq> (a, b); }
+    static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept        { return cmp<ScalarGt > (a, b); }
+    static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return cmp<ScalarGeq> (a, b); }
+
+    static forcedinline ScalarType get (vSIMDType v, size_t i) noexcept
+    {
+        UnionType u {v};
+        return u.s[i];
+    }
+
+    static forcedinline vSIMDType set (vSIMDType v, size_t i, ScalarType s) noexcept
+    {
+        UnionType u {v};
+
+        u.s[i] = s;
+        return u.v;
+    }
+
+    static forcedinline vSIMDType bit_not (vSIMDType av) noexcept
+    {
+        UnionMaskType a {av};
+
+        for (size_t i = 0; i < n; ++i)
+            a.m[i] = ~a.m[i];
+
+        return a.v;
+    }
+
+    static forcedinline ScalarType sum (vSIMDType av) noexcept
+    {
+        UnionType a {av};
+        auto retval = static_cast<ScalarType> (0);
+
+        for (size_t i = 0; i < n; ++i)
+            retval += a.s[i];
+
+        return retval;
+    }
+
+    static forcedinline vSIMDType multiplyAdd (vSIMDType av, vSIMDType bv, vSIMDType cv) noexcept
+    {
+        UnionType a {av}, b {bv}, c {cv};
+
+        for (size_t i = 0; i < n; ++i)
+            a.s[i] += b.s[i] * c.s[i];
+
+        return a.v;
+    }
+
+    //==============================================================================
+    static forcedinline bool allEqual (vSIMDType av, vSIMDType bv) noexcept
+    {
+        UnionType a {av}, b {bv};
+
+        for (size_t i = 0; i < n; ++i)
+            if (a.s[i] != b.s[i])
+                return false;
+
+        return true;
+    }
+
+    //==============================================================================
+    static forcedinline vSIMDType cmplxmul (vSIMDType av, vSIMDType bv) noexcept
+    {
+        UnionType a {av}, b {bv}, r;
+
+        const int m = n >> 1;
+        for (int i = 0; i < m; ++i)
+        {
+            std::complex<ScalarType> result
+                  = std::complex<ScalarType> (a.s[i<<1], a.s[(i<<1)|1])
+                  * std::complex<ScalarType> (b.s[i<<1], b.s[(i<<1)|1]);
+
+            r.s[i<<1]     = result.real();
+            r.s[(i<<1)|1] = result.imag();
+        }
+
+        return r.v;
+    }
+
+    struct ScalarAdd { static forcedinline ScalarType   op (ScalarType a, ScalarType b)   noexcept { return a + b; } };
+    struct ScalarSub { static forcedinline ScalarType   op (ScalarType a, ScalarType b)   noexcept { return a - b; } };
+    struct ScalarMul { static forcedinline ScalarType   op (ScalarType a, ScalarType b)   noexcept { return a * b; } };
+    struct ScalarMin { static forcedinline ScalarType   op (ScalarType a, ScalarType b)   noexcept { return jmin (a, b); } };
+    struct ScalarMax { static forcedinline ScalarType   op (ScalarType a, ScalarType b)   noexcept { return jmax (a, b); } };
+    struct ScalarAnd { static forcedinline MaskType     op (MaskType a,   MaskType b)     noexcept { return a & b; } };
+    struct ScalarOr  { static forcedinline MaskType     op (MaskType a,   MaskType b)     noexcept { return a | b; } };
+    struct ScalarXor { static forcedinline MaskType     op (MaskType a,   MaskType b)     noexcept { return a ^ b; } };
+    struct ScalarNot { static forcedinline MaskType     op (MaskType a,   MaskType b)     noexcept { return (~a) & b; } };
+    struct ScalarEq  { static forcedinline bool         op (ScalarType a, ScalarType b)   noexcept { return (a == b); } };
+    struct ScalarNeq { static forcedinline bool         op (ScalarType a, ScalarType b)   noexcept { return (a != b); } };
+    struct ScalarGt  { static forcedinline bool         op (ScalarType a, ScalarType b)   noexcept { return (a >  b); } };
+    struct ScalarGeq { static forcedinline bool         op (ScalarType a, ScalarType b)   noexcept { return (a >= b); } };
+
+    // generic apply routines for operations above
+    template <typename Op>
+    static forcedinline vSIMDType apply (vSIMDType av, vSIMDType bv) noexcept
+    {
+        UnionType a {av}, b {bv};
+
+        for (size_t i = 0; i < n; ++i)
+            a.s[i] = Op::op (a.s[i], b.s[i]);
+
+        return a.v;
+    }
+
+    template <typename Op>
+    static forcedinline vSIMDType cmp (vSIMDType av, vSIMDType bv) noexcept
+    {
+        UnionType a {av}, b {bv};
+        UnionMaskType r;
+
+        for (size_t i = 0; i < n; ++i)
+            r.m[i] = Op::op (a.s[i], b.s[i]) ? static_cast<MaskType> (-1) : static_cast<MaskType> (0);
+
+        return r.v;
+    }
+
+    template <typename Op>
+    static forcedinline vSIMDType bitapply (vSIMDType av, vSIMDType bv) noexcept
+    {
+        UnionMaskType a {av}, b {bv};
+
+        for (size_t i = 0; i < n; ++i)
+            a.m[i] = Op::op (a.m[i], b.m[i]);
+
+        return a.v;
+    }
+
+    static forcedinline vSIMDType expand (ScalarType s) noexcept
+    {
+        UnionType r;
+
+        for (size_t i = 0; i < n; ++i)
+            r.s[i] = s;
+
+        return r.v;
+    }
+
+    static forcedinline vSIMDType load (const ScalarType* a) noexcept
+    {
+        UnionType r;
+
+        for (size_t i = 0; i < n; ++i)
+            r.s[i] = a[i];
+
+        return r.v;
+    }
+
+    static forcedinline void store (vSIMDType av, ScalarType* dest) noexcept
+    {
+        UnionType a {av};
+
+        for (size_t i = 0; i < n; ++i)
+            dest[i] = a.s[i];
+    }
+
+    template <unsigned int shuffle_idx>
+    static forcedinline vSIMDType shuffle (vSIMDType av) noexcept
+    {
+        UnionType a {av}, r;
+
+        // the compiler will unroll this loop and the index can
+        // be computed at compile-time, so this will be super fast
+        for (size_t i = 0; i < n; ++i)
+            r.s[i] = a.s[(shuffle_idx >> (bits * i)) & mask];
+
+        return r.v;
+    }
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/native/juce_neon_SIMDNativeOps.cpp
+++ b/modules/juce_dsp/native/juce_neon_SIMDNativeOps.cpp
@ -0,0 +1,44 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+    namespace dsp
+    {
+        DEFINE_NEON_SIMD_CONST (int32_t, float, kAllBitsSet)     = { -1, -1, -1, -1 };
+        DEFINE_NEON_SIMD_CONST (int32_t, float, kEvenHighBit)    = { static_cast<int32_t>(0x80000000), 0, static_cast<int32_t>(0x80000000), 0 };
+        DEFINE_NEON_SIMD_CONST (float, float, kOne)              = { 1.0f, 1.0f, 1.0f, 1.0f };
+
+        DEFINE_NEON_SIMD_CONST (int8_t, int8_t, kAllBitsSet)     = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
+        DEFINE_NEON_SIMD_CONST (uint8_t, uint8_t, kAllBitsSet)   = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+        DEFINE_NEON_SIMD_CONST (int16_t, int16_t, kAllBitsSet)   = { -1, -1, -1, -1, -1, -1, -1, -1 };
+        DEFINE_NEON_SIMD_CONST (uint16_t, uint16_t, kAllBitsSet) = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff };
+        DEFINE_NEON_SIMD_CONST (int32_t, int32_t, kAllBitsSet)   = { -1, -1, -1, -1 };
+        DEFINE_NEON_SIMD_CONST (uint32_t, uint32_t, kAllBitsSet) = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
+        DEFINE_NEON_SIMD_CONST (int64_t, int64_t, kAllBitsSet)   = { -1, -1 };
+        DEFINE_NEON_SIMD_CONST (uint64_t, uint64_t, kAllBitsSet) = { 0xffffffffffffffff, 0xffffffffffffffff };
+    }
+}
--- a/modules/juce_dsp/native/juce_neon_SIMDNativeOps.h
+++ b/modules/juce_dsp/native/juce_neon_SIMDNativeOps.h
@ -0,0 +1,495 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+#ifndef DOXYGEN
+
+#if JUCE_GCC && (__GNUC__ >= 6)
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wignored-attributes"
+#endif
+
+#ifdef _MSC_VER
+ #define DECLARE_NEON_SIMD_CONST(type, name) \
+    static __declspec(align(16)) const type name [16 / sizeof (type)]
+
+ #define DEFINE_NEON_SIMD_CONST(type, class_type, name) \
+    __declspec(align(16)) const type SIMDNativeOps<class_type>:: name [16 / sizeof (type)]
+
+#else
+ #define DECLARE_NEON_SIMD_CONST(type, name) \
+    static const type name [16 / sizeof (type)] __attribute__((aligned(16)))
+
+ #define DEFINE_NEON_SIMD_CONST(type, class_type, name) \
+    const type SIMDNativeOps<class_type>:: name [16 / sizeof (type)] __attribute__((aligned(16)))
+
+#endif
+
+template <typename type>
+struct SIMDNativeOps;
+
+//==============================================================================
+/** Unsigned 32-bit integer NEON intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<uint32_t>
+{
+    //==============================================================================
+    using vSIMDType = uint32x4_t;
+    using fb = SIMDFallbackOps<uint32_t, vSIMDType>;
+
+    //==============================================================================
+    DECLARE_NEON_SIMD_CONST (uint32_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline vSIMDType expand (uint32_t s) noexcept                                  { return vdupq_n_u32 (s); }
+    static forcedinline vSIMDType load (const uint32_t* a) noexcept                             { return vld1q_u32 (a); }
+    static forcedinline void store (vSIMDType value, uint32_t* a) noexcept                      { vst1q_u32 (a, value); }
+    static forcedinline uint32_t get (vSIMDType v, size_t i) noexcept                           { return v[i]; }
+    static forcedinline vSIMDType set (vSIMDType v, size_t i, uint32_t s) noexcept              { v[i] = s; return v; }
+    static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept                       { return vaddq_u32 (a, b); }
+    static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept                       { return vsubq_u32 (a, b); }
+    static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept                       { return vmulq_u32 (a, b); }
+    static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept                   { return vandq_u32 (a, b); }
+    static forcedinline vSIMDType bit_or  (vSIMDType a, vSIMDType b) noexcept                   { return vorrq_u32  (a, b); }
+    static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept                   { return veorq_u32 (a, b); }
+    static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept                { return vbicq_u32 (b, a); }
+    static forcedinline vSIMDType bit_not (vSIMDType a) noexcept                                { return bit_notand (a, vld1q_u32 ((uint32_t*) kAllBitsSet)); }
+    static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept                       { return vminq_u32 (a, b); }
+    static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept                       { return vmaxq_u32 (a, b); }
+    static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept                     { return (vSIMDType) vceqq_u32 (a, b); }
+    static forcedinline bool      allEqual (vSIMDType a, vSIMDType b) noexcept                  { return (sum (notEqual (a, b)) == 0); }
+    static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept                  { return bit_not (equal (a, b)); }
+    static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept               { return (vSIMDType) vcgtq_u32 (a, b); }
+    static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept        { return (vSIMDType) vcgeq_u32 (a, b); }
+    static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept  { return vmlaq_u32 (a, b, c); }
+    static forcedinline uint32_t sum (vSIMDType a) noexcept
+    {
+        auto rr = vadd_u32 (vget_high_u32 (a), vget_low_u32 (a));
+        return vget_lane_u32 (vpadd_u32 (rr, rr), 0);
+    }
+};
+
+//==============================================================================
+/** Signed 32-bit integer NEON intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<int32_t>
+{
+    //==============================================================================
+    using vSIMDType = int32x4_t;
+    using fb = SIMDFallbackOps<int32_t, vSIMDType>;
+
+    //==============================================================================
+    DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline vSIMDType expand (int32_t s) noexcept                                   { return vdupq_n_s32 (s); }
+    static forcedinline vSIMDType load (const int32_t* a) noexcept                              { return vld1q_s32 (a); }
+    static forcedinline void store (vSIMDType value, int32_t* a) noexcept                       { vst1q_s32 (a, value); }
+    static forcedinline int32_t get (vSIMDType v, size_t i) noexcept                            { return v[i]; }
+    static forcedinline vSIMDType set (vSIMDType v, size_t i, int32_t s) noexcept               { v[i] = s; return v; }
+    static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept                       { return vaddq_s32 (a, b); }
+    static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept                       { return vsubq_s32 (a, b); }
+    static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept                       { return vmulq_s32 (a, b); }
+    static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept                   { return vandq_s32 (a, b); }
+    static forcedinline vSIMDType bit_or  (vSIMDType a, vSIMDType b) noexcept                   { return vorrq_s32 (a, b); }
+    static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept                   { return veorq_s32 (a, b); }
+    static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept                { return vbicq_s32 (b, a); }
+    static forcedinline vSIMDType bit_not (vSIMDType a) noexcept                                { return bit_notand (a, vld1q_s32 ((int32_t*) kAllBitsSet)); }
+    static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept                       { return vminq_s32 (a, b); }
+    static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept                       { return vmaxq_s32 (a, b); }
+    static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept                     { return (vSIMDType) vceqq_s32 (a, b); }
+    static forcedinline bool      allEqual (vSIMDType a, vSIMDType b) noexcept                  { return (sum (notEqual (a, b)) == 0); }
+    static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept                  { return bit_not (equal (a, b)); }
+    static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept               { return (vSIMDType) vcgtq_s32 (a, b); }
+    static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept        { return (vSIMDType) vcgeq_s32 (a, b); }
+    static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept  { return vmlaq_s32 (a, b, c); }
+    static forcedinline int32_t sum (vSIMDType a) noexcept
+    {
+        auto rr = vadd_s32 (vget_high_s32 (a), vget_low_s32 (a));
+        rr = vpadd_s32 (rr, rr);
+        return vget_lane_s32 (rr, 0);
+    }
+};
+
+//==============================================================================
+/** Signed 8-bit integer NEON intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<int8_t>
+{
+    //==============================================================================
+    using vSIMDType = int8x16_t;
+    using fb = SIMDFallbackOps<int8_t, vSIMDType>;
+
+    //==============================================================================
+    DECLARE_NEON_SIMD_CONST (int8_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline vSIMDType expand (int8_t s) noexcept                                   { return vdupq_n_s8 (s); }
+    static forcedinline vSIMDType load (const int8_t* a) noexcept                              { return vld1q_s8 (a); }
+    static forcedinline void store (vSIMDType value, int8_t* a) noexcept                       { vst1q_s8 (a, value); }
+    static forcedinline int8_t get (vSIMDType v, size_t i) noexcept                            { return v[i]; }
+    static forcedinline vSIMDType set (vSIMDType v, size_t i, int8_t s) noexcept               { v[i] = s; return v; }
+    static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept                      { return vaddq_s8 (a, b); }
+    static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept                      { return vsubq_s8 (a, b); }
+    static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept                      { return vmulq_s8 (a, b); }
+    static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept                  { return vandq_s8 (a, b); }
+    static forcedinline vSIMDType bit_or  (vSIMDType a, vSIMDType b) noexcept                  { return vorrq_s8 (a, b); }
+    static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept                  { return veorq_s8 (a, b); }
+    static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept               { return vbicq_s8 (b, a); }
+    static forcedinline vSIMDType bit_not (vSIMDType a) noexcept                               { return bit_notand (a, vld1q_s8 ((int8_t*) kAllBitsSet)); }
+    static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept                      { return vminq_s8 (a, b); }
+    static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept                      { return vmaxq_s8 (a, b); }
+    static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept                    { return (vSIMDType) vceqq_s8 (a, b); }
+    static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept                 { return bit_not (equal (a, b)); }
+    static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept              { return (vSIMDType) vcgtq_s8 (a, b); }
+    static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept       { return (vSIMDType) vcgeq_s8 (a, b); }
+    static forcedinline bool      allEqual (vSIMDType a, vSIMDType b) noexcept                 { return (SIMDNativeOps<int32_t>::sum ((SIMDNativeOps<int32_t>::vSIMDType) notEqual (a, b)) == 0); }
+    static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s8 (a, b, c); }
+    static forcedinline int8_t sum (vSIMDType a) noexcept                                      { return fb::sum (a); }
+};
+
+//==============================================================================
+/** Unsigned 8-bit integer NEON intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<uint8_t>
+{
+    //==============================================================================
+    using vSIMDType = uint8x16_t;
+    using fb = SIMDFallbackOps<uint8_t, vSIMDType>;
+
+    //==============================================================================
+    DECLARE_NEON_SIMD_CONST (uint8_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline vSIMDType expand (uint8_t s) noexcept                                  { return vdupq_n_u8 (s); }
+    static forcedinline vSIMDType load (const uint8_t* a) noexcept                             { return vld1q_u8 (a); }
+    static forcedinline void store (vSIMDType value, uint8_t* a) noexcept                      { vst1q_u8 (a, value); }
+    static forcedinline uint8_t get (vSIMDType v, size_t i) noexcept                           { return v[i]; }
+    static forcedinline vSIMDType set (vSIMDType v, size_t i, uint8_t s) noexcept              { v[i] = s; return v; }
+    static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept                      { return vaddq_u8 (a, b); }
+    static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept                      { return vsubq_u8 (a, b); }
+    static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept                      { return vmulq_u8 (a, b); }
+    static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept                  { return vandq_u8 (a, b); }
+    static forcedinline vSIMDType bit_or  (vSIMDType a, vSIMDType b) noexcept                  { return vorrq_u8 (a, b); }
+    static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept                  { return veorq_u8 (a, b); }
+    static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept               { return vbicq_u8 (b, a); }
+    static forcedinline vSIMDType bit_not (vSIMDType a) noexcept                               { return bit_notand (a, vld1q_u8 ((uint8_t*) kAllBitsSet)); }
+    static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept                      { return vminq_u8 (a, b); }
+    static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept                      { return vmaxq_u8 (a, b); }
+    static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept                    { return (vSIMDType) vceqq_u8 (a, b); }
+    static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept                 { return bit_not (equal (a, b)); }
+    static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept              { return (vSIMDType) vcgtq_u8 (a, b); }
+    static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept       { return (vSIMDType) vcgeq_u8 (a, b); }
+    static forcedinline bool      allEqual (vSIMDType a, vSIMDType b) noexcept                 { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
+    static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u8 (a, b, c); }
+    static forcedinline uint8_t sum (vSIMDType a) noexcept                                     { return fb::sum (a); }
+};
+
+//==============================================================================
+/** Signed 16-bit integer NEON intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<int16_t>
+{
+    //==============================================================================
+    using vSIMDType = int16x8_t;
+    using fb = SIMDFallbackOps<int16_t, vSIMDType>;
+
+    //==============================================================================
+    DECLARE_NEON_SIMD_CONST (int16_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline vSIMDType expand (int16_t s) noexcept                                  { return vdupq_n_s16 (s); }
+    static forcedinline vSIMDType load (const int16_t* a) noexcept                             { return vld1q_s16 (a); }
+    static forcedinline void store (vSIMDType value, int16_t* a) noexcept                      { vst1q_s16 (a, value); }
+    static forcedinline int16_t get (vSIMDType v, size_t i) noexcept                           { return v[i]; }
+    static forcedinline vSIMDType set (vSIMDType v, size_t i, int16_t s) noexcept              { v[i] = s; return v; }
+    static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept                      { return vaddq_s16 (a, b); }
+    static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept                      { return vsubq_s16 (a, b); }
+    static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept                      { return vmulq_s16 (a, b); }
+    static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept                  { return vandq_s16 (a, b); }
+    static forcedinline vSIMDType bit_or  (vSIMDType a, vSIMDType b) noexcept                  { return vorrq_s16 (a, b); }
+    static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept                  { return veorq_s16 (a, b); }
+    static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept               { return vbicq_s16 (b, a); }
+    static forcedinline vSIMDType bit_not (vSIMDType a) noexcept                               { return bit_notand (a, vld1q_s16 ((int16_t*) kAllBitsSet)); }
+    static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept                      { return vminq_s16 (a, b); }
+    static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept                      { return vmaxq_s16 (a, b); }
+    static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept                    { return (vSIMDType) vceqq_s16 (a, b); }
+    static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept                 { return bit_not (equal (a, b)); }
+    static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept              { return (vSIMDType) vcgtq_s16 (a, b); }
+    static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept       { return (vSIMDType) vcgeq_s16 (a, b); }
+    static forcedinline bool      allEqual (vSIMDType a, vSIMDType b) noexcept                 { return (SIMDNativeOps<int32_t>::sum ((SIMDNativeOps<int32_t>::vSIMDType) notEqual (a, b)) == 0); }
+    static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s16 (a, b, c); }
+    static forcedinline int16_t sum (vSIMDType a) noexcept                                     { return fb::sum (a); }
+};
+
+
+//==============================================================================
+/** Unsigned 16-bit integer NEON intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<uint16_t>
+{
+    //==============================================================================
+    using vSIMDType = uint16x8_t;
+    using fb = SIMDFallbackOps<uint16_t, vSIMDType>;
+
+    //==============================================================================
+    DECLARE_NEON_SIMD_CONST (uint16_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline vSIMDType expand (uint16_t s) noexcept                                 { return vdupq_n_u16 (s); }
+    static forcedinline vSIMDType load (const uint16_t* a) noexcept                            { return vld1q_u16 (a); }
+    static forcedinline void store (vSIMDType value, uint16_t* a) noexcept                     { vst1q_u16 (a, value); }
+    static forcedinline uint16_t get (vSIMDType v, size_t i) noexcept                          { return v[i]; }
+    static forcedinline vSIMDType set (vSIMDType v, size_t i, uint16_t s) noexcept             { v[i] = s; return v; }
+    static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept                      { return vaddq_u16 (a, b); }
+    static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept                      { return vsubq_u16 (a, b); }
+    static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept                      { return vmulq_u16 (a, b); }
+    static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept                  { return vandq_u16 (a, b); }
+    static forcedinline vSIMDType bit_or  (vSIMDType a, vSIMDType b) noexcept                  { return vorrq_u16 (a, b); }
+    static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept                  { return veorq_u16 (a, b); }
+    static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept               { return vbicq_u16 (b, a); }
+    static forcedinline vSIMDType bit_not (vSIMDType a) noexcept                               { return bit_notand (a, vld1q_u16 ((uint16_t*) kAllBitsSet)); }
+    static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept                      { return vminq_u16 (a, b); }
+    static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept                      { return vmaxq_u16 (a, b); }
+    static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept                    { return (vSIMDType) vceqq_u16 (a, b); }
+    static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept                 { return bit_not (equal (a, b)); }
+    static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept              { return (vSIMDType) vcgtq_u16 (a, b); }
+    static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept       { return (vSIMDType) vcgeq_u16 (a, b); }
+    static forcedinline bool      allEqual (vSIMDType a, vSIMDType b) noexcept                 { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
+    static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u16 (a, b, c); }
+    static forcedinline uint16_t sum (vSIMDType a) noexcept                                    { return fb::sum (a); }
+};
+
+//==============================================================================
+/** Signed 64-bit integer NEON intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<int64_t>
+{
+    //==============================================================================
+    using vSIMDType = int64x2_t;
+    using fb = SIMDFallbackOps<int64_t, vSIMDType>;
+
+    //==============================================================================
+    DECLARE_NEON_SIMD_CONST (int64_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline vSIMDType expand (int64_t s) noexcept                                  { return vdupq_n_s64 (s); }
+    static forcedinline vSIMDType load (const int64_t* a) noexcept                             { return vld1q_s64 (a); }
+    static forcedinline void store (vSIMDType value, int64_t* a) noexcept                      { vst1q_s64 (a, value); }
+    static forcedinline int64_t get (vSIMDType v, size_t i) noexcept                           { return v[i]; }
+    static forcedinline vSIMDType set (vSIMDType v, size_t i, int64_t s) noexcept              { v[i] = s; return v; }
+    static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept                      { return vaddq_s64 (a, b); }
+    static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept                      { return vsubq_s64 (a, b); }
+    static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept                      { return fb::mul (a, b); }
+    static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept                  { return vandq_s64 (a, b); }
+    static forcedinline vSIMDType bit_or  (vSIMDType a, vSIMDType b) noexcept                  { return vorrq_s64 (a, b); }
+    static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept                  { return veorq_s64 (a, b); }
+    static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept               { return vbicq_s64 (b, a); }
+    static forcedinline vSIMDType bit_not (vSIMDType a) noexcept                               { return bit_notand (a, vld1q_s64 ((int64_t*) kAllBitsSet)); }
+    static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept                      { return fb::min (a, b); }
+    static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept                      { return fb::max (a, b); }
+    static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept                    { return fb::equal (a, b); }
+    static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept                 { return fb::notEqual (a, b); }
+    static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept              { return fb::greaterThan (a, b); }
+    static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept       { return fb::greaterThanOrEqual (a, b); }
+    static forcedinline bool      allEqual (vSIMDType a, vSIMDType b) noexcept                 { return (SIMDNativeOps<int32_t>::sum ((SIMDNativeOps<int32_t>::vSIMDType) notEqual (a, b)) == 0); }
+    static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
+    static forcedinline int64_t sum (vSIMDType a) noexcept                                     { return fb::sum (a); }
+};
+
+
+//==============================================================================
+/** Unsigned 64-bit integer NEON intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<uint64_t>
+{
+    //==============================================================================
+    using vSIMDType = uint64x2_t;
+    using fb = SIMDFallbackOps<uint64_t, vSIMDType>;
+
+    //==============================================================================
+    DECLARE_NEON_SIMD_CONST (uint64_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline vSIMDType expand (uint64_t s) noexcept                                  { return vdupq_n_u64 (s); }
+    static forcedinline vSIMDType load (const uint64_t* a) noexcept                             { return vld1q_u64 (a); }
+    static forcedinline void store (vSIMDType value, uint64_t* a) noexcept                      { vst1q_u64 (a, value); }
+    static forcedinline uint64_t get (vSIMDType v, size_t i) noexcept                           { return v[i]; }
+    static forcedinline vSIMDType set (vSIMDType v, size_t i, uint64_t s) noexcept              { v[i] = s; return v; }
+    static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept                       { return vaddq_u64 (a, b); }
+    static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept                       { return vsubq_u64 (a, b); }
+    static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept                       { return fb::mul (a, b); }
+    static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept                   { return vandq_u64 (a, b); }
+    static forcedinline vSIMDType bit_or  (vSIMDType a, vSIMDType b) noexcept                   { return vorrq_u64 (a, b); }
+    static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept                   { return veorq_u64 (a, b); }
+    static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept                { return vbicq_u64 (b, a); }
+    static forcedinline vSIMDType bit_not (vSIMDType a) noexcept                                { return bit_notand (a, vld1q_u64 ((uint64_t*) kAllBitsSet)); }
+    static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept                       { return fb::min (a, b); }
+    static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept                       { return fb::max (a, b); }
+    static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept                     { return fb::equal (a, b); }
+    static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept                  { return fb::notEqual (a, b); }
+    static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept               { return fb::greaterThan (a, b); }
+    static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept        { return fb::greaterThanOrEqual (a, b); }
+    static forcedinline bool      allEqual (vSIMDType a, vSIMDType b) noexcept                  { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
+    static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept  { return fb::multiplyAdd (a, b, c); }
+    static forcedinline uint64_t sum (vSIMDType a) noexcept { return fb::sum (a); }
+};
+
+    //==============================================================================
+/** Single-precision floating point NEON intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<float>
+{
+    //==============================================================================
+    using vSIMDType = float32x4_t;
+    using vMaskType = uint32x4_t;
+    using fb = SIMDFallbackOps<float, vSIMDType>;
+
+    //==============================================================================
+    DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
+    DECLARE_NEON_SIMD_CONST (int32_t, kEvenHighBit);
+    DECLARE_NEON_SIMD_CONST (float, kOne);
+
+    //==============================================================================
+    static forcedinline vSIMDType expand (float s) noexcept                                    { return vdupq_n_f32 (s); }
+    static forcedinline vSIMDType load (const float* a) noexcept                               { return vld1q_f32 (a); }
+    static forcedinline float get (vSIMDType v, size_t i) noexcept                             { return v[i]; }
+    static forcedinline vSIMDType set (vSIMDType v, size_t i, float s) noexcept                { v[i] = s; return v; }
+    static forcedinline void store (vSIMDType value, float* a) noexcept                        { vst1q_f32 (a, value); }
+    static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept                      { return vaddq_f32 (a, b); }
+    static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept                      { return vsubq_f32 (a, b); }
+    static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept                      { return vmulq_f32 (a, b); }
+    static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept                  { return (vSIMDType) vandq_u32 ((vMaskType) a, (vMaskType) b); }
+    static forcedinline vSIMDType bit_or  (vSIMDType a, vSIMDType b) noexcept                  { return (vSIMDType) vorrq_u32 ((vMaskType) a, (vMaskType) b); }
+    static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept                  { return (vSIMDType) veorq_u32 ((vMaskType) a, (vMaskType) b); }
+    static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept               { return (vSIMDType) vbicq_u32 ((vMaskType) b, (vMaskType) a); }
+    static forcedinline vSIMDType bit_not (vSIMDType a) noexcept                               { return bit_notand (a, vld1q_f32 ((float*) kAllBitsSet)); }
+    static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept                      { return vminq_f32 (a, b); }
+    static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept                      { return vmaxq_f32 (a, b); }
+    static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept                    { return (vSIMDType) vceqq_f32 (a, b); }
+    static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept                 { return bit_not (equal (a, b)); }
+    static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept              { return (vSIMDType) vcgtq_f32 (a, b); }
+    static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept       { return (vSIMDType) vcgeq_f32 (a, b); }
+    static forcedinline bool      allEqual (vSIMDType a, vSIMDType b) noexcept                 { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
+    static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_f32 (a, b, c); }
+    static forcedinline vSIMDType dupeven (vSIMDType a) noexcept                               { return fb::shuffle<(0 << 0) | (0 << 2) | (2 << 4) | (2 << 6)>     (a); }
+    static forcedinline vSIMDType dupodd  (vSIMDType a) noexcept                               { return fb::shuffle<(1 << 0) | (1 << 2) | (3 << 4) | (3 << 6)>     (a); }
+    static forcedinline vSIMDType swapevenodd (vSIMDType a) noexcept                           { return fb::shuffle<(1 << 0) | (0 << 2) | (3 << 4) | (2 << 6)> (a); }
+    static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept                            { return add (fb::shuffle<(2 << 0) | (3 << 2) | (0 << 4) | (1 << 6)> (a), a); }
+
+    //==============================================================================
+    static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept
+    {
+        vSIMDType rr_ir = mul (a, dupeven (b));
+        vSIMDType ii_ri = mul (swapevenodd (a), dupodd (b));
+        return add (rr_ir, bit_xor (ii_ri, vld1q_f32 ((float*) kEvenHighBit)));
+    }
+
+    static forcedinline float sum (vSIMDType a) noexcept
+    {
+        auto rr = vadd_f32 (vget_high_f32 (a), vget_low_f32 (a));
+        return vget_lane_f32 (vpadd_f32 (rr, rr), 0);
+    }
+};
+
+//==============================================================================
+/** Double-precision floating point NEON intrinsics does not exist in NEON
+    so we need to emulate this.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<double>
+{
+    //==============================================================================
+    using vSIMDType = struct { double v[2]; };
+    using fb = SIMDFallbackOps<double, vSIMDType>;
+
+    static forcedinline vSIMDType expand (double s) noexcept                                   { return {{s, s}}; }
+    static forcedinline vSIMDType load (const double* a) noexcept                              { return {{a[0], a[1]}}; }
+    static forcedinline void store (vSIMDType v, double* a) noexcept                           { a[0] = v.v[0]; a[1] = v.v[1]; }
+    static forcedinline double get (vSIMDType v, size_t i) noexcept                            { return v.v[i]; }
+    static forcedinline vSIMDType set (vSIMDType v, size_t i, double s) noexcept               { v.v[i] = s; return v; }
+    static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept                      { return {{a.v[0] + b.v[0], a.v[1] + b.v[1]}}; }
+    static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept                      { return {{a.v[0] - b.v[0], a.v[1] - b.v[1]}}; }
+    static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept                      { return {{a.v[0] * b.v[0], a.v[1] * b.v[1]}}; }
+    static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept                  { return fb::bit_and (a, b); }
+    static forcedinline vSIMDType bit_or  (vSIMDType a, vSIMDType b) noexcept                  { return fb::bit_or  (a, b); }
+    static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept                  { return fb::bit_xor (a, b); }
+    static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept               { return fb::bit_notand (a, b); }
+    static forcedinline vSIMDType bit_not (vSIMDType a) noexcept                               { return fb::bit_not (a); }
+    static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept                      { return fb::min (a, b); }
+    static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept                      { return fb::max (a, b); }
+    static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept                    { return fb::equal (a, b); }
+    static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept                 { return fb::notEqual (a, b); }
+    static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept              { return fb::greaterThan (a, b); }
+    static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept       { return fb::greaterThanOrEqual (a, b); }
+    static forcedinline bool      allEqual (vSIMDType a, vSIMDType b) noexcept                 { return fb::allEqual (a, b); }
+    static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
+    static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept                 { return fb::cmplxmul (a, b); }
+    static forcedinline double sum (vSIMDType a) noexcept                                      { return fb::sum (a); }
+    static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept                            { return a; }
+};
+
+#endif
+
+#if JUCE_GCC && (__GNUC__ >= 6)
+ #pragma GCC diagnostic pop
+#endif
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/native/juce_sse_SIMDNativeOps.cpp
+++ b/modules/juce_dsp/native/juce_sse_SIMDNativeOps.cpp
@ -0,0 +1,59 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+    namespace dsp
+    {
+        DEFINE_SSE_SIMD_CONST (int32_t, float, kAllBitsSet)     = { -1, -1, -1, -1 };
+        DEFINE_SSE_SIMD_CONST (int32_t, float, kEvenHighBit)    = { static_cast<int32_t>(0x80000000), 0, static_cast<int32_t>(0x80000000), 0 };
+        DEFINE_SSE_SIMD_CONST (float, float, kOne)              = { 1.0f, 1.0f, 1.0f, 1.0f };
+
+        DEFINE_SSE_SIMD_CONST (int64_t, double, kAllBitsSet)    = { -1LL, -1LL };
+        DEFINE_SSE_SIMD_CONST (int64_t, double, kEvenHighBit)   = { static_cast<int64_t>(0x8000000000000000), 0 };
+        DEFINE_SSE_SIMD_CONST (double, double, kOne)            = { 1.0, 1.0 };
+
+        DEFINE_SSE_SIMD_CONST (int8_t, int8_t, kAllBitsSet)     = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
+
+        DEFINE_SSE_SIMD_CONST (uint8_t, uint8_t, kAllBitsSet)   = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+        DEFINE_SSE_SIMD_CONST (uint8_t, uint8_t, kHighBit)      = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
+
+        DEFINE_SSE_SIMD_CONST (int16_t, int16_t, kAllBitsSet)   = { -1, -1, -1, -1, -1, -1, -1, -1 };
+
+        DEFINE_SSE_SIMD_CONST (uint16_t, uint16_t, kAllBitsSet) = { 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff };
+        DEFINE_SSE_SIMD_CONST (uint16_t, uint16_t, kHighBit)    = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 };
+
+        DEFINE_SSE_SIMD_CONST (int32_t, int32_t, kAllBitsSet)   = { -1, -1, -1, -1 };
+
+        DEFINE_SSE_SIMD_CONST (uint32_t, uint32_t, kAllBitsSet) = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
+        DEFINE_SSE_SIMD_CONST (uint32_t, uint32_t, kHighBit)    = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+
+        DEFINE_SSE_SIMD_CONST (int64_t, int64_t, kAllBitsSet)   = { -1, -1 };
+
+        DEFINE_SSE_SIMD_CONST (uint64_t, uint64_t, kAllBitsSet) = { 0xffffffffffffffff, 0xffffffffffffffff };
+        DEFINE_SSE_SIMD_CONST (uint64_t, uint64_t, kHighBit)    = { 0x8000000000000000, 0x8000000000000000 };
+    }
+}
--- a/modules/juce_dsp/native/juce_sse_SIMDNativeOps.h
+++ b/modules/juce_dsp/native/juce_sse_SIMDNativeOps.h
@ -0,0 +1,725 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+#ifndef DOXYGEN
+
+#if JUCE_GCC && (__GNUC__ >= 6)
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wignored-attributes"
+#endif
+
+#ifdef _MSC_VER
+ #define DECLARE_SSE_SIMD_CONST(type, name) \
+    static __declspec(align(16)) const type name [16 / sizeof (type)]
+
+ #define DEFINE_SSE_SIMD_CONST(type, class_type, name) \
+    __declspec(align(16)) const type SIMDNativeOps<class_type>:: name [16 / sizeof (type)]
+
+#else
+ #define DECLARE_SSE_SIMD_CONST(type, name) \
+    static const type name [16 / sizeof (type)] __attribute__((aligned(16)))
+
+ #define DEFINE_SSE_SIMD_CONST(type, class_type, name) \
+    const type SIMDNativeOps<class_type>:: name [16 / sizeof (type)] __attribute__((aligned(16)))
+
+#endif
+
+template <typename type>
+struct SIMDNativeOps;
+
+//==============================================================================
+/** Single-precision floating point SSE intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<float>
+{
+    //==============================================================================
+    using vSIMDType = __m128;
+
+    //==============================================================================
+    DECLARE_SSE_SIMD_CONST (int32_t, kAllBitsSet);
+    DECLARE_SSE_SIMD_CONST (int32_t, kEvenHighBit);
+    DECLARE_SSE_SIMD_CONST (float, kOne);
+
+    //==============================================================================
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE expand (float s) noexcept                            { return _mm_load1_ps (&s); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE load (const float* a) noexcept                       { return _mm_load_ps (a); }
+    static forcedinline void JUCE_VECTOR_CALLTYPE store (__m128 value, float* dest) noexcept             { _mm_store_ps (dest, value); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE add (__m128 a, __m128 b) noexcept                    { return _mm_add_ps (a, b); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE sub (__m128 a, __m128 b) noexcept                    { return _mm_sub_ps (a, b); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE mul (__m128 a, __m128 b) noexcept                    { return _mm_mul_ps (a, b); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_and (__m128 a, __m128 b) noexcept                { return _mm_and_ps (a, b); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_or  (__m128 a, __m128 b) noexcept                { return _mm_or_ps  (a, b); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_xor (__m128 a, __m128 b) noexcept                { return _mm_xor_ps (a, b); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_notand (__m128 a, __m128 b) noexcept             { return _mm_andnot_ps (a, b); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE bit_not (__m128 a) noexcept                          { return bit_notand (a, _mm_loadu_ps ((float*) kAllBitsSet)); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE min (__m128 a, __m128 b) noexcept                    { return _mm_min_ps (a, b); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE max (__m128 a, __m128 b) noexcept                    { return _mm_max_ps (a, b); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE equal (__m128 a, __m128 b) noexcept                  { return _mm_cmpeq_ps (a, b); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE notEqual (__m128 a, __m128 b) noexcept               { return _mm_cmpneq_ps (a, b); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE greaterThan (__m128 a, __m128 b) noexcept            { return _mm_cmpgt_ps (a, b); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128 a, __m128 b) noexcept     { return _mm_cmpge_ps (a, b); }
+    static forcedinline bool   JUCE_VECTOR_CALLTYPE allEqual (__m128 a, __m128 b ) noexcept              { return (_mm_movemask_ps (equal (a, b)) == 0xf); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE multiplyAdd (__m128 a, __m128 b, __m128 c) noexcept  { return _mm_add_ps (a, _mm_mul_ps (b, c)); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE dupeven (__m128 a) noexcept                          { return _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 2, 0, 0)); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE dupodd (__m128 a) noexcept                           { return _mm_shuffle_ps (a, a, _MM_SHUFFLE (3, 3, 1, 1)); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE swapevenodd (__m128 a) noexcept                      { return _mm_shuffle_ps (a, a, _MM_SHUFFLE (2, 3, 0, 1)); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE oddevensum (__m128 a) noexcept                       { return _mm_add_ps (_mm_shuffle_ps (a, a, _MM_SHUFFLE (1, 0, 3, 2)), a); }
+    static forcedinline float  JUCE_VECTOR_CALLTYPE get (__m128 v, size_t i) noexcept                    { return SIMDFallbackOps<float, __m128>::get (v, i); }
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE set (__m128 v, size_t i, float s) noexcept           { return SIMDFallbackOps<float, __m128>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline __m128 JUCE_VECTOR_CALLTYPE cmplxmul (__m128 a, __m128 b) noexcept
+    {
+        __m128 rr_ir = mul (a, dupeven (b));
+        __m128 ii_ri = mul (swapevenodd (a), dupodd (b));
+        return add (rr_ir, bit_xor (ii_ri, _mm_loadu_ps ((float*) kEvenHighBit)));
+    }
+
+    static forcedinline float JUCE_VECTOR_CALLTYPE sum (__m128 a) noexcept
+    {
+       #if defined(__SSE4__)
+        __m128 retval = _mm_dp_ps (a, _mm_loadu_ps (kOne), 0xff);
+       #elif defined(__SSE3__)
+        __m128 retval = _mm_hadd_ps (_mm_hadd_ps (a, a), a);
+       #else
+        __m128 retval = _mm_add_ps (_mm_shuffle_ps (a, a, 0x4e), a);
+        retval = _mm_add_ps (retval, _mm_shuffle_ps (retval, retval, 0xb1));
+       #endif
+        return _mm_cvtss_f32 (retval);
+    }
+};
+
+//==============================================================================
+/** Double-precision floating point SSE intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<double>
+{
+    //==============================================================================
+    using vSIMDType = __m128d;
+
+    //==============================================================================
+    DECLARE_SSE_SIMD_CONST (int64_t, kAllBitsSet);
+    DECLARE_SSE_SIMD_CONST (int64_t, kEvenHighBit);
+    DECLARE_SSE_SIMD_CONST (double, kOne);
+
+    //==============================================================================
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE vconst (const double* a) noexcept                       { return load (a); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE vconst (const int64_t* a) noexcept                      { return _mm_castsi128_pd (_mm_load_si128 ((const __m128i*) a)); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE expand (double s) noexcept                              { return _mm_load1_pd (&s); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE load (const double* a) noexcept                         { return _mm_load_pd (a); }
+    static forcedinline void JUCE_VECTOR_CALLTYPE store (__m128d value, double* dest) noexcept               { _mm_store_pd (dest, value); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE add (__m128d a, __m128d b) noexcept                     { return _mm_add_pd (a, b); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE sub (__m128d a, __m128d b) noexcept                     { return _mm_sub_pd (a, b); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE mul (__m128d a, __m128d b) noexcept                     { return _mm_mul_pd (a, b); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_and (__m128d a, __m128d b) noexcept                 { return _mm_and_pd (a, b); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_or  (__m128d a, __m128d b) noexcept                 { return _mm_or_pd  (a, b); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_xor (__m128d a, __m128d b) noexcept                 { return _mm_xor_pd (a, b); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_notand (__m128d a, __m128d b) noexcept              { return _mm_andnot_pd (a, b); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE bit_not (__m128d a) noexcept                            { return bit_notand (a, vconst (kAllBitsSet)); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE min (__m128d a, __m128d b) noexcept                     { return _mm_min_pd (a, b); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE max (__m128d a, __m128d b) noexcept                     { return _mm_max_pd (a, b); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE equal (__m128d a, __m128d b) noexcept                   { return _mm_cmpeq_pd (a, b); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE notEqual (__m128d a, __m128d b) noexcept                { return _mm_cmpneq_pd (a, b); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE greaterThan (__m128d a, __m128d b) noexcept             { return _mm_cmpgt_pd (a, b); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128d a, __m128d b) noexcept      { return _mm_cmpge_pd (a, b); }
+    static forcedinline bool    JUCE_VECTOR_CALLTYPE allEqual (__m128d a, __m128d b ) noexcept               { return (_mm_movemask_pd (equal (a, b)) == 0x3); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE multiplyAdd (__m128d a, __m128d b, __m128d c) noexcept  { return _mm_add_pd (a, _mm_mul_pd (b, c)); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE dupeven (__m128d a) noexcept                            { return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (0, 0)); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE dupodd (__m128d a) noexcept                             { return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (1, 1)); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE swapevenodd (__m128d a) noexcept                        { return _mm_shuffle_pd (a, a, _MM_SHUFFLE2 (0, 1)); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE oddevensum (__m128d a) noexcept                         { return a; }
+    static forcedinline double  JUCE_VECTOR_CALLTYPE get (__m128d v, size_t i) noexcept                      { return SIMDFallbackOps<double, __m128d>::get (v, i); }
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE set (__m128d v, size_t i, double s) noexcept            { return SIMDFallbackOps<double, __m128d>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline __m128d JUCE_VECTOR_CALLTYPE cmplxmul (__m128d a, __m128d b) noexcept
+    {
+        __m128d rr_ir = mul (a, dupeven (b));
+        __m128d ii_ri = mul (swapevenodd (a), dupodd (b));
+        return add (rr_ir, bit_xor (ii_ri, vconst (kEvenHighBit)));
+    }
+
+    static forcedinline double JUCE_VECTOR_CALLTYPE sum (__m128d a) noexcept
+    {
+       #if defined(__SSE4__)
+        __m128d retval = _mm_dp_pd (a, vconst (kOne), 0xff);
+       #elif defined(__SSE3__)
+        __m128d retval = _mm_hadd_pd (a, a);
+       #else
+        __m128d retval = _mm_add_pd (_mm_shuffle_pd (a, a, 0x01), a);
+       #endif
+        return _mm_cvtsd_f64 (retval);
+    }
+};
+
+//==============================================================================
+/** Signed 8-bit integer SSE intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<int8_t>
+{
+    //==============================================================================
+    using vSIMDType = __m128i;
+
+    //==============================================================================
+    DECLARE_SSE_SIMD_CONST (int8_t, kAllBitsSet);
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (const int8_t* a) noexcept                       { return load (a); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const int8_t* a) noexcept                         { return _mm_load_si128 ((const __m128i*) a); }
+    static forcedinline void    JUCE_VECTOR_CALLTYPE store (__m128i v, int8_t* p) noexcept                   { _mm_store_si128 ((__m128i*) p, v); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (int8_t s) noexcept                              { return _mm_set1_epi8 (s); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b) noexcept                     { return _mm_add_epi8 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b) noexcept                     { return _mm_sub_epi8 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b) noexcept                 { return _mm_and_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or  (__m128i a, __m128i b) noexcept                 { return _mm_or_si128  (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b) noexcept                 { return _mm_xor_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b) noexcept              { return _mm_andnot_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a) noexcept                            { return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
+   #if defined(__SSE4__)
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b) noexcept                     { return _mm_min_epi8 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b) noexcept                     { return _mm_max_epi8 (a, b); }
+   #else
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b) noexcept                     { __m128i lt = greaterThan (b, a); return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b) noexcept                     { __m128i gt = greaterThan (a, b); return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
+   #endif
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b) noexcept                   { return _mm_cmpeq_epi8 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b) noexcept             { return _mm_cmpgt_epi8 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool    JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept                { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
+    static forcedinline int8_t  JUCE_VECTOR_CALLTYPE get (__m128i v, size_t i) noexcept                      { return SIMDFallbackOps<int8_t, __m128i>::get (v, i); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v, size_t i, int8_t s) noexcept            { return SIMDFallbackOps<int8_t, __m128i>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline int8_t JUCE_VECTOR_CALLTYPE sum (__m128i a) noexcept
+    {
+       #ifdef __SSSE3__
+        __m128i lo = _mm_unpacklo_epi8 (a, _mm_setzero_si128());
+        __m128i hi = _mm_unpackhi_epi8 (a, _mm_setzero_si128());
+
+        for (int i = 0; i < 3; ++i)
+        {
+            lo = _mm_hadd_epi16 (lo, lo);
+            hi = _mm_hadd_epi16 (hi, hi);
+        }
+
+        return static_cast<int8_t> ((_mm_cvtsi128_si32 (lo) & 0xff) + (_mm_cvtsi128_si32 (hi) & 0xff));
+       #else
+        return SIMDFallbackOps<int8_t, __m128i>::sum (a);
+       #endif
+    }
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
+    {
+        // unpack and multiply
+        __m128i even = _mm_mullo_epi16 (a, b);
+        __m128i odd  = _mm_mullo_epi16 (_mm_srli_epi16 (a, 8), _mm_srli_epi16 (b, 8));
+
+        return _mm_or_si128 (_mm_slli_epi16 (odd, 8),
+                             _mm_srli_epi16 (_mm_slli_epi16 (even, 8), 8));
+    }
+};
+
+//==============================================================================
+/** Unsigned 8-bit integer SSE intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<uint8_t>
+{
+    //==============================================================================
+    using vSIMDType = __m128i;
+
+    //==============================================================================
+    DECLARE_SSE_SIMD_CONST (uint8_t, kHighBit);
+    DECLARE_SSE_SIMD_CONST (uint8_t, kAllBitsSet);
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (const uint8_t* a) noexcept                      { return load (a); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE ssign (__m128i a) noexcept                              { return _mm_xor_si128 (a, vconst (kHighBit)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const uint8_t* a) noexcept                        { return _mm_load_si128 ((const __m128i*) a); }
+    static forcedinline void JUCE_VECTOR_CALLTYPE store (__m128i v, uint8_t* p) noexcept                     { _mm_store_si128 ((__m128i*) p, v); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (uint8_t s) noexcept                             { return _mm_set1_epi8 ((int8_t) s); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b) noexcept                     { return _mm_add_epi8 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b) noexcept                     { return _mm_sub_epi8 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b) noexcept                 { return _mm_and_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or  (__m128i a, __m128i b) noexcept                 { return _mm_or_si128  (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b) noexcept                 { return _mm_xor_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b) noexcept              { return _mm_andnot_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a) noexcept                            { return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b) noexcept                     { return _mm_min_epu8 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b) noexcept                     { return _mm_max_epu8 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b) noexcept                   { return _mm_cmpeq_epi8 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b) noexcept             { return _mm_cmpgt_epi8 (ssign (a), ssign (b)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool    JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept                { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
+    static forcedinline uint8_t JUCE_VECTOR_CALLTYPE get (__m128i v, size_t i) noexcept                      { return SIMDFallbackOps<uint8_t, __m128i>::get (v, i); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v, size_t i, uint8_t s) noexcept           { return SIMDFallbackOps<uint8_t, __m128i>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline uint8_t JUCE_VECTOR_CALLTYPE sum (__m128i a) noexcept
+    {
+       #ifdef __SSSE3__
+        __m128i lo = _mm_unpacklo_epi8 (a, _mm_setzero_si128());
+        __m128i hi = _mm_unpackhi_epi8 (a, _mm_setzero_si128());
+
+        for (int i = 0; i < 3; ++i)
+        {
+            lo = _mm_hadd_epi16 (lo, lo);
+            hi = _mm_hadd_epi16 (hi, hi);
+        }
+
+        return static_cast<uint8_t> ((static_cast<uint32_t> (_mm_cvtsi128_si32 (lo)) & 0xffu)
+                                   + (static_cast<uint32_t> (_mm_cvtsi128_si32 (hi)) & 0xffu));
+       #else
+        return SIMDFallbackOps<uint8_t, __m128i>::sum (a);
+       #endif
+    }
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b)
+    {
+        // unpack and multiply
+        __m128i even = _mm_mullo_epi16 (a, b);
+        __m128i odd  = _mm_mullo_epi16 (_mm_srli_epi16 (a, 8), _mm_srli_epi16 (b, 8));
+
+        return _mm_or_si128 (_mm_slli_epi16 (odd, 8),
+                             _mm_srli_epi16 (_mm_slli_epi16 (even, 8), 8));
+    }
+};
+
+//==============================================================================
+/** Signed 16-bit integer SSE intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<int16_t>
+{
+    //==============================================================================
+    using vSIMDType = __m128i;
+
+    //==============================================================================
+    DECLARE_SSE_SIMD_CONST (int16_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (const int16_t* a) noexcept                      { return load (a); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const int16_t* a) noexcept                        { return _mm_load_si128 ((const __m128i*) a); }
+    static forcedinline void    JUCE_VECTOR_CALLTYPE store (__m128i v, int16_t* p) noexcept                  { _mm_store_si128 ((__m128i*) p, v); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (int16_t s) noexcept                             { return _mm_set1_epi16 (s); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b) noexcept                     { return _mm_add_epi16 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b) noexcept                     { return _mm_sub_epi16 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b) noexcept                     { return _mm_mullo_epi16 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b) noexcept                 { return _mm_and_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or  (__m128i a, __m128i b) noexcept                 { return _mm_or_si128  (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b) noexcept                 { return _mm_xor_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b) noexcept              { return _mm_andnot_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a) noexcept                            { return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b) noexcept                     { return _mm_min_epi16 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b) noexcept                     { return _mm_max_epi16 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b) noexcept                   { return _mm_cmpeq_epi16 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b) noexcept             { return _mm_cmpgt_epi16 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool    JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept                { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
+    static forcedinline int16_t JUCE_VECTOR_CALLTYPE get (__m128i v, size_t i) noexcept                      { return SIMDFallbackOps<int16_t, __m128i>::get (v, i); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v, size_t i, int16_t s) noexcept           { return SIMDFallbackOps<int16_t, __m128i>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline int16_t JUCE_VECTOR_CALLTYPE sum (__m128i a) noexcept
+    {
+       #ifdef __SSSE3__
+        __m128i tmp = _mm_hadd_epi16 (a, a);
+        tmp = _mm_hadd_epi16 (tmp, tmp);
+        tmp = _mm_hadd_epi16 (tmp, tmp);
+
+        return static_cast<int16_t> (_mm_cvtsi128_si32 (tmp) & 0xffff);
+       #else
+        return SIMDFallbackOps<int16_t, __m128i>::sum (a);
+       #endif
+    }
+};
+
+//==============================================================================
+/** Unsigned 16-bit integer SSE intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<uint16_t>
+{
+    //==============================================================================
+    using vSIMDType = __m128i;
+
+    //==============================================================================
+    DECLARE_SSE_SIMD_CONST (uint16_t, kHighBit);
+    DECLARE_SSE_SIMD_CONST (uint16_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE vconst (const uint16_t* a) noexcept                     { return load (a); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE ssign (__m128i a) noexcept                              { return _mm_xor_si128 (a, vconst (kHighBit)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE load (const uint16_t* a) noexcept                       { return _mm_load_si128 ((const __m128i*) a); }
+    static forcedinline void     JUCE_VECTOR_CALLTYPE store (__m128i v, uint16_t* p) noexcept                    { _mm_store_si128 ((__m128i*) p, v); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE expand (uint16_t s) noexcept                            { return _mm_set1_epi16 ((int16_t) s); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b) noexcept                     { return _mm_add_epi16 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b) noexcept                     { return _mm_sub_epi16 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b) noexcept                     { return _mm_mullo_epi16 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b) noexcept                 { return _mm_and_si128 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_or  (__m128i a, __m128i b) noexcept                 { return _mm_or_si128  (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b) noexcept                 { return _mm_xor_si128 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b) noexcept              { return _mm_andnot_si128 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_not (__m128i a) noexcept                            { return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
+   #if defined(__SSE4__)
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b) noexcept                     { return _mm_min_epu16 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b) noexcept                     { return _mm_max_epu16 (a, b); }
+   #else
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b) noexcept                     { __m128i lt = greaterThan (b, a); return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b) noexcept                     { __m128i gt = greaterThan (a, b); return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
+   #endif
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b) noexcept                   { return _mm_cmpeq_epi16 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b) noexcept             { return _mm_cmpgt_epi16 (ssign (a), ssign (b)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool     JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept                { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
+    static forcedinline uint16_t JUCE_VECTOR_CALLTYPE get (__m128i v, size_t i) noexcept                      { return SIMDFallbackOps<uint16_t, __m128i>::get (v, i); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE set (__m128i v, size_t i, uint16_t s) noexcept          { return SIMDFallbackOps<uint16_t, __m128i>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline uint16_t JUCE_VECTOR_CALLTYPE sum (__m128i a) noexcept
+    {
+       #ifdef __SSSE3__
+        __m128i tmp = _mm_hadd_epi16 (a, a);
+        tmp = _mm_hadd_epi16 (tmp, tmp);
+        tmp = _mm_hadd_epi16 (tmp, tmp);
+
+        return static_cast<uint16_t> (static_cast<uint32_t> (_mm_cvtsi128_si32 (tmp)) & 0xffffu);
+       #else
+        return SIMDFallbackOps<uint16_t, __m128i>::sum (a);
+       #endif
+    }
+};
+
+//==============================================================================
+/** Signed 32-bit integer SSE intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<int32_t>
+{
+    //==============================================================================
+    using vSIMDType = __m128i;
+
+    //==============================================================================
+    DECLARE_SSE_SIMD_CONST (int32_t, kAllBitsSet);
+
+    //==============================================================================
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (const int32_t* a) noexcept                      { return load (a); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const int32_t* a) noexcept                        { return _mm_load_si128 ((const __m128i*) a); }
+    static forcedinline void    JUCE_VECTOR_CALLTYPE store (__m128i v, int32_t* p) noexcept                  { _mm_store_si128 ((__m128i*) p, v); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (int32_t s) noexcept                             { return _mm_set1_epi32 (s); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b) noexcept                     { return _mm_add_epi32 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b) noexcept                     { return _mm_sub_epi32 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b) noexcept                 { return _mm_and_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or  (__m128i a, __m128i b) noexcept                 { return _mm_or_si128  (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b) noexcept                 { return _mm_xor_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b) noexcept              { return _mm_andnot_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a) noexcept                            { return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b) noexcept                   { return _mm_cmpeq_epi32 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b) noexcept             { return _mm_cmpgt_epi32 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool    JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept                { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
+    static forcedinline int32_t JUCE_VECTOR_CALLTYPE get (__m128i v, size_t i) noexcept                      { return SIMDFallbackOps<int32_t, __m128i>::get (v, i); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v, size_t i, int32_t s) noexcept           { return SIMDFallbackOps<int32_t, __m128i>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline int32_t JUCE_VECTOR_CALLTYPE sum (__m128i a) noexcept
+    {
+       #ifdef __SSSE3__
+        __m128i tmp = _mm_hadd_epi32 (a, a);
+        return _mm_cvtsi128_si32 (_mm_hadd_epi32 (tmp, tmp));
+       #else
+        return SIMDFallbackOps<int32_t, __m128i>::sum (a);
+       #endif
+    }
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b) noexcept
+    {
+       #if defined(__SSE4_1__)
+        return _mm_mullo_epi32 (a, b);
+       #else
+        __m128i even = _mm_mul_epu32 (a,b);
+        __m128i odd = _mm_mul_epu32 (_mm_srli_si128 (a,4), _mm_srli_si128 (b,4));
+        return _mm_unpacklo_epi32 (_mm_shuffle_epi32(even, _MM_SHUFFLE (0,0,2,0)),
+                                   _mm_shuffle_epi32(odd,  _MM_SHUFFLE (0,0,2,0)));
+       #endif
+    }
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b) noexcept
+    {
+       #if defined(__SSE4_1__)
+        return _mm_min_epi32 (a, b);
+       #else
+        __m128i lt = greaterThan (b, a);
+        return bit_or (bit_and (lt, a), bit_andnot (lt, b));
+       #endif
+    }
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b) noexcept
+    {
+       #if defined(__SSE4_1__)
+        return _mm_max_epi32 (a, b);
+       #else
+        __m128i gt = greaterThan (a, b);
+        return bit_or (bit_and (gt, a), bit_andnot (gt, b));
+       #endif
+    }
+};
+
+//==============================================================================
+/** Unsigned 32-bit integer SSE intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<uint32_t>
+{
+    //==============================================================================
+    using vSIMDType = __m128i;
+
+    //==============================================================================
+    DECLARE_SSE_SIMD_CONST (uint32_t, kAllBitsSet);
+    DECLARE_SSE_SIMD_CONST (uint32_t, kHighBit);
+
+    //==============================================================================
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE vconst (const uint32_t* a) noexcept                     { return load (a); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE ssign (__m128i a) noexcept                              { return _mm_xor_si128 (a, vconst (kHighBit)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE load (const uint32_t* a) noexcept                       { return _mm_load_si128 ((const __m128i*) a); }
+    static forcedinline void     JUCE_VECTOR_CALLTYPE store (__m128i v, uint32_t* p) noexcept                    { _mm_store_si128 ((__m128i*) p, v); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE expand (uint32_t s) noexcept                            { return _mm_set1_epi32 ((int32_t) s); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b) noexcept                     { return _mm_add_epi32 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b) noexcept                     { return _mm_sub_epi32 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b) noexcept                 { return _mm_and_si128 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_or  (__m128i a, __m128i b) noexcept                 { return _mm_or_si128  (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b) noexcept                 { return _mm_xor_si128 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b) noexcept              { return _mm_andnot_si128 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_not (__m128i a) noexcept                            { return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b) noexcept                   { return _mm_cmpeq_epi32 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b) noexcept             { return _mm_cmpgt_epi32 (ssign (a), ssign (b)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool     JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept                { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
+    static forcedinline uint32_t JUCE_VECTOR_CALLTYPE get (__m128i v, size_t i) noexcept                      { return SIMDFallbackOps<uint32_t, __m128i>::get (v, i); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE set (__m128i v, size_t i, uint32_t s) noexcept          { return SIMDFallbackOps<uint32_t, __m128i>::set (v, i, s); }
+
+    //==============================================================================
+    static forcedinline uint32_t JUCE_VECTOR_CALLTYPE sum (__m128i a) noexcept
+    {
+       #ifdef __SSSE3__
+        __m128i tmp = _mm_hadd_epi32 (a, a);
+        return static_cast<uint32_t> (_mm_cvtsi128_si32 (_mm_hadd_epi32 (tmp, tmp)));
+       #else
+        return SIMDFallbackOps<uint32_t, __m128i>::sum (a);
+       #endif
+    }
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b) noexcept
+    {
+       #if defined(__SSE4_1__)
+        return _mm_mullo_epi32 (a, b);
+       #else
+        __m128i even = _mm_mul_epu32 (a,b);
+        __m128i odd = _mm_mul_epu32 (_mm_srli_si128 (a,4), _mm_srli_si128 (b,4));
+        return _mm_unpacklo_epi32 (_mm_shuffle_epi32(even, _MM_SHUFFLE (0,0,2,0)),
+                                   _mm_shuffle_epi32(odd,  _MM_SHUFFLE (0,0,2,0)));
+       #endif
+    }
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b) noexcept
+    {
+       #if defined(__SSE4_1__)
+        return _mm_min_epi32 (a, b);
+       #else
+        __m128i lt = greaterThan (b, a);
+        return bit_or (bit_and (lt, a), bit_andnot (lt, b));
+       #endif
+    }
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b) noexcept
+    {
+       #if defined(__SSE4_1__)
+        return _mm_max_epi32 (a, b);
+       #else
+        __m128i gt = greaterThan (a, b);
+        return bit_or (bit_and (gt, a), bit_andnot (gt, b));
+       #endif
+    }
+};
+
+//==============================================================================
+/** Signed 64-bit integer SSE intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<int64_t>
+{
+    //==============================================================================
+    using vSIMDType = __m128i;
+
+    //==============================================================================
+    DECLARE_SSE_SIMD_CONST (int64_t, kAllBitsSet);
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE vconst (const int64_t* a) noexcept                      { return load (a); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE expand (int64_t s) noexcept                             { return _mm_set1_epi64x (s); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE load (const int64_t* a) noexcept                        { return _mm_load_si128 ((const __m128i*) a); }
+    static forcedinline void    JUCE_VECTOR_CALLTYPE store (__m128i v, int64_t* p) noexcept                  { _mm_store_si128 ((__m128i*) p, v); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b) noexcept                     { return _mm_add_epi64 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b) noexcept                     { return _mm_sub_epi64 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b) noexcept                 { return _mm_and_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_or  (__m128i a, __m128i b) noexcept                 { return _mm_or_si128  (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b) noexcept                 { return _mm_xor_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b) noexcept              { return _mm_andnot_si128 (a, b); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE bit_not (__m128i a) noexcept                            { return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b) noexcept                     { __m128i lt = greaterThan (b, a); return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b) noexcept                     { __m128i gt = greaterThan (a, b); return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool    JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept                { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
+    static forcedinline int64_t JUCE_VECTOR_CALLTYPE get (__m128i v, size_t i) noexcept                      { return SIMDFallbackOps<int64_t, __m128i>::get (v, i); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE set (__m128i v, size_t i, int64_t s) noexcept           { return SIMDFallbackOps<int64_t, __m128i>::set (v, i, s); }
+    static forcedinline int64_t JUCE_VECTOR_CALLTYPE sum (__m128i a) noexcept                                { return SIMDFallbackOps<int64_t, __m128i>::sum (a); }
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b) noexcept                     { return SIMDFallbackOps<int64_t, __m128i>::mul (a, b); }
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b) noexcept
+    {
+       #if defined(__SSE4_1__)
+        return _mm_cmpeq_epi64 (a, b);
+       #else
+        __m128i bitmask = _mm_cmpeq_epi32 (a, b);
+        bitmask = _mm_and_si128 (bitmask, _mm_shuffle_epi32 (bitmask, _MM_SHUFFLE (2, 3, 0, 1)));
+        return _mm_shuffle_epi32 (bitmask, _MM_SHUFFLE (2, 2, 0, 0));
+       #endif
+    }
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b) noexcept
+    {
+       #if defined(__SSE4_1__)
+        return _mm_cmpgt_epi64 (a, b);
+       #else
+        return SIMDFallbackOps<int64_t, __m128i>::greaterThan (a, b);
+       #endif
+    }
+};
+
+//==============================================================================
+/** Unsigned 64-bit integer SSE intrinsics.
+
+    @tags{DSP}
+*/
+template <>
+struct SIMDNativeOps<uint64_t>
+{
+    //==============================================================================
+    using vSIMDType = __m128i;
+
+    //==============================================================================
+    DECLARE_SSE_SIMD_CONST (uint64_t, kAllBitsSet);
+    DECLARE_SSE_SIMD_CONST (uint64_t, kHighBit);
+
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE vconst (const uint64_t* a) noexcept                     { return load (a); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE expand (uint64_t s) noexcept                            { return _mm_set1_epi64x ((int64_t) s); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE ssign (__m128i a) noexcept                              { return _mm_xor_si128 (a, vconst (kHighBit)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE load (const uint64_t* a) noexcept                       { return _mm_load_si128 ((const __m128i*) a); }
+    static forcedinline void     JUCE_VECTOR_CALLTYPE store (__m128i v, uint64_t* p) noexcept                 { _mm_store_si128 ((__m128i*) p, v); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE add (__m128i a, __m128i b) noexcept                     { return _mm_add_epi64 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE sub (__m128i a, __m128i b) noexcept                     { return _mm_sub_epi64 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_and (__m128i a, __m128i b) noexcept                 { return _mm_and_si128 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_or  (__m128i a, __m128i b) noexcept                 { return _mm_or_si128  (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_xor (__m128i a, __m128i b) noexcept                 { return _mm_xor_si128 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_andnot (__m128i a, __m128i b) noexcept              { return _mm_andnot_si128 (a, b); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE bit_not (__m128i a) noexcept                            { return _mm_andnot_si128 (a, vconst (kAllBitsSet)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE min (__m128i a, __m128i b) noexcept                     { __m128i lt = greaterThan (b, a); return bit_or (bit_and (lt, a), bit_andnot (lt, b)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE max (__m128i a, __m128i b) noexcept                     { __m128i gt = greaterThan (a, b); return bit_or (bit_and (gt, a), bit_andnot (gt, b)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE greaterThanOrEqual (__m128i a, __m128i b) noexcept      { return bit_or (greaterThan (a, b), equal (a,b)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE multiplyAdd (__m128i a, __m128i b, __m128i c) noexcept  { return add (a, mul (b, c)); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE notEqual (__m128i a, __m128i b) noexcept                { return bit_not (equal (a, b)); }
+    static forcedinline bool     JUCE_VECTOR_CALLTYPE allEqual (__m128i a, __m128i b) noexcept                { return (_mm_movemask_epi8 (equal (a, b)) == 0xffff); }
+    static forcedinline uint64_t JUCE_VECTOR_CALLTYPE get (__m128i v, size_t i) noexcept                      { return SIMDFallbackOps<uint64_t, __m128i>::get (v, i); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE set (__m128i v, size_t i, uint64_t s) noexcept          { return SIMDFallbackOps<uint64_t, __m128i>::set (v, i, s); }
+    static forcedinline uint64_t JUCE_VECTOR_CALLTYPE sum (__m128i a) noexcept                                { return SIMDFallbackOps<uint64_t, __m128i>::sum (a); }
+    static forcedinline __m128i  JUCE_VECTOR_CALLTYPE mul (__m128i a, __m128i b) noexcept                     { return SIMDFallbackOps<uint64_t, __m128i>::mul (a, b); }
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE equal (__m128i a, __m128i b) noexcept
+    {
+       #if defined(__SSE4_1__)
+        return _mm_cmpeq_epi64 (a, b);
+       #else
+        __m128i bitmask = _mm_cmpeq_epi32 (a, b);
+        bitmask = _mm_and_si128 (bitmask, _mm_shuffle_epi32 (bitmask, _MM_SHUFFLE (2, 3, 0, 1)));
+        return _mm_shuffle_epi32 (bitmask, _MM_SHUFFLE (2, 2, 0, 0));
+       #endif
+    }
+
+    static forcedinline __m128i JUCE_VECTOR_CALLTYPE greaterThan (__m128i a, __m128i b) noexcept
+    {
+       #if defined(__SSE4_1__)
+        return _mm_cmpgt_epi64 (ssign (a), ssign (b));
+       #else
+        return SIMDFallbackOps<uint64_t, __m128i>::greaterThan (a, b);
+       #endif
+    }
+};
+
+#endif
+
+#if JUCE_GCC && (__GNUC__ >= 6)
+ #pragma GCC diagnostic pop
+#endif
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_Bias.h
+++ b/modules/juce_dsp/processors/juce_Bias.h
@ -0,0 +1,158 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Adds a DC offset (voltage bias) to the audio samples.
+
+    This is a useful preprocessor for asymmetric waveshaping when a waveshaper is
+    bookended by a bias on input and a DC-offset removing high pass filter on output.
+
+    This is an extremely simple bias implementation that simply adds a value to a signal.
+    More complicated bias behaviours exist in real circuits - for your homework ;).
+
+    @tags{DSP}
+*/
+template <typename FloatType>
+class Bias
+{
+public:
+    Bias() noexcept {}
+
+    //==============================================================================
+    /** Sets the DC bias
+        @param newBias DC offset in range [-1, 1]
+    */
+    void setBias (FloatType newBias) noexcept
+    {
+        jassert (newBias >= static_cast<FloatType> (-1) && newBias <= static_cast<FloatType> (1));
+        bias.setValue(newBias);
+    }
+
+    //==============================================================================
+    /** Returns the DC bias
+        @return DC bias, which should be in the range [-1, 1]
+    */
+    FloatType getBias() const noexcept              { return bias.getTargetValue(); }
+
+    /** Sets the length of the ramp used for smoothing gain changes. */
+    void setRampDurationSeconds (double newDurationSeconds) noexcept
+    {
+        if (rampDurationSeconds != newDurationSeconds)
+        {
+            rampDurationSeconds = newDurationSeconds;
+            updateRamp();
+        }
+    }
+
+    double getRampDurationSeconds() const noexcept  { return rampDurationSeconds; }
+
+    //==============================================================================
+    /** Called before processing starts */
+    void prepare (const ProcessSpec& spec) noexcept
+    {
+        sampleRate = spec.sampleRate;
+        updateRamp();
+    }
+
+    void reset() noexcept
+    {
+        bias.reset (sampleRate, rampDurationSeconds);
+    }
+
+    //==============================================================================
+    /** Returns the result of processing a single sample. */
+    template <typename SampleType>
+    SampleType processSample (SampleType inputSample) const noexcept
+    {
+        return inputSample + bias.getNextValue();
+    }
+
+    //==============================================================================
+    /** Processes the input and output buffers supplied in the processing context. */
+    template<typename ProcessContext>
+    void process (const ProcessContext& context) noexcept
+    {
+        auto&& inBlock  = context.getInputBlock();
+        auto&& outBlock = context.getOutputBlock();
+
+        jassert (inBlock.getNumChannels() == outBlock.getNumChannels());
+        jassert (inBlock.getNumSamples() == outBlock.getNumSamples());
+
+        auto len         = inBlock.getNumSamples();
+        auto numChannels = inBlock.getNumChannels();
+
+        if (context.isBypassed)
+        {
+            bias.skip (static_cast<int> (len));
+
+            if (context.usesSeparateInputAndOutputBlocks())
+                outBlock.copy (inBlock);
+
+            return;
+        }
+
+        if (numChannels == 1)
+        {
+            auto* src = inBlock.getChannelPointer (0);
+            auto* dst = outBlock.getChannelPointer (0);
+
+            for (size_t i = 0; i < len; ++i)
+                dst[i] = src[i] + bias.getNextValue();
+        }
+        else
+        {
+            auto* biases = static_cast<FloatType*> (alloca (sizeof (FloatType) * len));
+
+            for (size_t i = 0; i < len; ++i)
+                biases[i] = bias.getNextValue();
+
+            for (size_t chan = 0; chan < numChannels; ++chan)
+                FloatVectorOperations::add (outBlock.getChannelPointer (chan),
+                                            inBlock.getChannelPointer (chan),
+                                            biases, static_cast<int> (len));
+        }
+    }
+
+
+private:
+    //==============================================================================
+    LinearSmoothedValue<FloatType> bias;
+    double sampleRate = 0, rampDurationSeconds = 0;
+
+    void updateRamp() noexcept
+    {
+        if (sampleRate > 0)
+            bias.reset (sampleRate, rampDurationSeconds);
+    }
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_FIRFilter.cpp
+++ b/modules/juce_dsp/processors/juce_FIRFilter.cpp
@ -0,0 +1,162 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+template <typename NumericType>
+double FIR::Coefficients<NumericType>::Coefficients::getMagnitudeForFrequency (double frequency, double theSampleRate) const noexcept
+{
+    jassert (theSampleRate > 0.0);
+    jassert (frequency >= 0.0 && frequency <= theSampleRate * 0.5);
+
+    constexpr Complex<double> j (0, 1);
+    auto order = getFilterOrder();
+
+    Complex<double> numerator = 0.0, factor = 1.0;
+    Complex<double> jw = std::exp (-MathConstants<double>::twoPi * frequency * j / theSampleRate);
+
+    const auto* coefs = coefficients.begin();
+
+    for (size_t n = 0; n <= order; ++n)
+    {
+        numerator += static_cast<double> (coefs[n]) * factor;
+        factor *= jw;
+    }
+
+    return std::abs (numerator);
+}
+
+//==============================================================================
+template <typename NumericType>
+void FIR::Coefficients<NumericType>::Coefficients::getMagnitudeForFrequencyArray (double* frequencies, double* magnitudes,
+                                                                        size_t numSamples, double theSampleRate) const noexcept
+{
+    jassert (theSampleRate > 0.0);
+
+    constexpr Complex<double> j (0, 1);
+    const auto* coefs = coefficients.begin();
+    auto order = getFilterOrder();
+
+    for (size_t i = 0; i < numSamples; ++i)
+    {
+        jassert (frequencies[i] >= 0.0 && frequencies[i] <= theSampleRate * 0.5);
+
+        Complex<double> numerator = 0.0;
+        Complex<double> factor = 1.0;
+        Complex<double> jw = std::exp (-MathConstants<double>::twoPi * frequencies[i] * j / theSampleRate);
+
+        for (size_t n = 0; n <= order; ++n)
+        {
+            numerator += static_cast<double> (coefs[n]) * factor;
+            factor *= jw;
+        }
+
+        magnitudes[i] = std::abs (numerator);
+    }
+}
+
+//==============================================================================
+template <typename NumericType>
+double FIR::Coefficients<NumericType>::Coefficients::getPhaseForFrequency (double frequency, double theSampleRate) const noexcept
+{
+    jassert (theSampleRate > 0.0);
+    jassert (frequency >= 0.0 && frequency <= theSampleRate * 0.5);
+
+    constexpr Complex<double> j (0, 1);
+
+    Complex<double> numerator = 0.0;
+    Complex<double> factor = 1.0;
+    Complex<double> jw = std::exp (-MathConstants<double>::twoPi * frequency * j / theSampleRate);
+
+    const auto* coefs = coefficients.begin();
+    auto order = getFilterOrder();
+
+    for (size_t n = 0; n <= order; ++n)
+    {
+        numerator += static_cast<double> (coefs[n]) * factor;
+        factor *= jw;
+    }
+
+    return std::arg (numerator);
+}
+
+//==============================================================================
+template <typename NumericType>
+void FIR::Coefficients<NumericType>::Coefficients::getPhaseForFrequencyArray (double* frequencies, double* phases,
+                                                                    size_t numSamples, double theSampleRate) const noexcept
+{
+    jassert (theSampleRate > 0.0);
+
+    constexpr Complex<double> j (0, 1);
+    const auto* coefs = coefficients.begin();
+    auto order = getFilterOrder();
+
+    for (size_t i = 0; i < numSamples; ++i)
+    {
+        jassert (frequencies[i] >= 0.0 && frequencies[i] <= theSampleRate * 0.5);
+
+        Complex<double> numerator = 0.0, factor = 1.0;
+        Complex<double> jw = std::exp (-MathConstants<double>::twoPi * frequencies[i] * j / theSampleRate);
+
+        for (size_t n = 0; n <= order; ++n)
+        {
+            numerator += static_cast<double> (coefs[n]) * factor;
+            factor *= jw;
+        }
+
+        phases[i] = std::arg (numerator);
+    }
+}
+
+//==============================================================================
+template <typename NumericType>
+void FIR::Coefficients<NumericType>::Coefficients::normalise() noexcept
+{
+    auto magnitude = static_cast<NumericType> (0);
+
+    auto* coefs = coefficients.getRawDataPointer();
+    auto n = static_cast<size_t> (coefficients.size());
+
+    for (size_t i = 0; i < n; ++i)
+    {
+        auto c = coefs[i];
+        magnitude += c * c;
+    }
+
+    auto magnitudeInv = 1 / (4 * std::sqrt (magnitude));
+
+    FloatVectorOperations::multiply (coefs, magnitudeInv, static_cast<int> (n));
+}
+
+//==============================================================================
+template struct FIR::Coefficients<float>;
+template struct FIR::Coefficients<double>;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_FIRFilter.h
+++ b/modules/juce_dsp/processors/juce_FIRFilter.h
@ -0,0 +1,284 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Classes for FIR filter processing.
+*/
+namespace FIR
+{
+    template <typename NumericType>
+    struct Coefficients;
+
+    //==============================================================================
+    /**
+        A processing class that can perform FIR filtering on an audio signal, in the
+        time domain.
+
+        Using FIRFilter is fast enough for FIRCoefficients with a size lower than 128
+        samples. For longer filters, it might be more efficient to use the class
+        Convolution instead, which does the same processing in the frequency domain
+        thanks to FFT.
+
+        @see FIRFilter::Coefficients, Convolution, FFT
+
+        @tags{DSP}
+    */
+    template <typename SampleType>
+    class Filter
+    {
+    public:
+        /** The NumericType is the underlying primitive type used by the SampleType (which
+            could be either a primitive or vector)
+        */
+        using NumericType = typename SampleTypeHelpers::ElementType<SampleType>::Type;
+
+        //==============================================================================
+        /** This will create a filter which will produce silence. */
+        Filter() : coefficients (new Coefficients<NumericType>)                                     { reset(); }
+
+        /** Creates a filter with a given set of coefficients. */
+        Filter (Coefficients<NumericType>* coefficientsToUse)  : coefficients (coefficientsToUse)   { reset(); }
+
+        Filter (const Filter&) = default;
+        Filter (Filter&&) = default;
+        Filter& operator= (const Filter&) = default;
+        Filter& operator= (Filter&&) = default;
+
+        //==============================================================================
+        /** Prepare this filter for processing. */
+        inline void prepare (const ProcessSpec& spec) noexcept
+        {
+            // This class can only process mono signals. Use the ProcessorDuplicator class
+            // to apply this filter on a multi-channel audio stream.
+            jassert (spec.numChannels == 1);
+            ignoreUnused (spec);
+            reset();
+        }
+
+        /** Resets the filter's processing pipeline, ready to start a new stream of data.
+
+            Note that this clears the processing state, but the type of filter and
+            its coefficients aren't changed. To disable the filter, call setEnabled (false).
+        */
+        void reset()
+        {
+            if (coefficients != nullptr)
+            {
+                auto newSize = coefficients->getFilterOrder() + 1;
+
+                if (newSize != size)
+                {
+                    memory.malloc (1 + jmax (newSize, size, static_cast<size_t> (128)));
+
+                    fifo = snapPointerToAlignment (memory.getData(), sizeof (SampleType));
+                    size = newSize;
+                }
+
+                for (size_t i = 0; i < size; ++i)
+                    fifo[i] = SampleType {0};
+            }
+        }
+
+        //==============================================================================
+        /** The coefficients of the FIR filter. It's up to the called to ensure that
+            these coefficients are modified in a thread-safe way.
+
+            If you change the order of the coefficients then you must call reset after
+            modifying them.
+        */
+        typename Coefficients<NumericType>::Ptr coefficients;
+
+        //==============================================================================
+        /** Processes as a block of samples */
+        template <typename ProcessContext>
+        void process (const ProcessContext& context) noexcept
+        {
+            static_assert (std::is_same<typename ProcessContext::SampleType, SampleType>::value,
+                           "The sample-type of the FIR filter must match the sample-type supplied to this process callback");
+            check();
+
+            auto&& inputBlock  = context.getInputBlock();
+            auto&& outputBlock = context.getOutputBlock();
+
+            // This class can only process mono signals. Use the ProcessorDuplicator class
+            // to apply this filter on a multi-channel audio stream.
+            jassert (inputBlock.getNumChannels()  == 1);
+            jassert (outputBlock.getNumChannels() == 1);
+
+            auto numSamples = inputBlock.getNumSamples();
+            auto* src = inputBlock .getChannelPointer (0);
+            auto* dst = outputBlock.getChannelPointer (0);
+
+            auto* fir = coefficients->getRawCoefficients();
+            size_t p = pos;
+
+            if (context.isBypassed)
+            {
+                for (size_t i = 0; i < numSamples; ++i)
+                {
+                    fifo[p] = dst[i] = src[i];
+                    p = (p == 0 ? size - 1 : p - 1);
+                }
+            }
+            else
+            {
+                for (size_t i = 0; i < numSamples; ++i)
+                    dst[i] = processSingleSample (src[i], fifo, fir, size, p);
+            }
+
+            pos = p;
+        }
+
+
+        /** Processes a single sample, without any locking.
+            Use this if you need processing of a single value.
+        */
+        SampleType JUCE_VECTOR_CALLTYPE processSample (SampleType sample) noexcept
+        {
+            check();
+            return processSingleSample (sample, fifo, coefficients->getRawCoefficients(), size, pos);
+        }
+
+    private:
+        //==============================================================================
+        HeapBlock<SampleType> memory;
+        SampleType* fifo = nullptr;
+        size_t pos = 0, size = 0;
+
+        //==============================================================================
+        void check()
+        {
+            jassert (coefficients != nullptr);
+
+            if (size != (coefficients->getFilterOrder() + 1))
+                reset();
+        }
+
+        static SampleType JUCE_VECTOR_CALLTYPE processSingleSample (SampleType sample, SampleType* buf,
+                                                                    const NumericType* fir, size_t m, size_t& p) noexcept
+        {
+            SampleType out (0);
+
+            buf[p] = sample;
+
+            size_t k;
+            for (k = 0; k < m - p; ++k)
+                out += buf[(p + k)] * fir[k];
+
+            for (size_t j = 0; j < p; ++j)
+                out += buf[j] * fir[j + k];
+
+            p = (p == 0 ? m - 1 : p - 1);
+
+            return out;
+        }
+
+
+        JUCE_LEAK_DETECTOR (Filter)
+    };
+
+    //==============================================================================
+    /**
+        A set of coefficients for use in an FIRFilter object.
+
+        @see FIRFilter
+
+        @tags{DSP}
+    */
+    template <typename NumericType>
+    struct Coefficients  : public ProcessorState
+    {
+        //==============================================================================
+        /** Creates a null set of coefficients (which will produce silence). */
+        Coefficients()  : coefficients ({ NumericType() }) {}
+
+        /** Creates a null set of coefficients of a given size. */
+        Coefficients (size_t size)    { coefficients.resize ((int) size); }
+
+        /** Creates a set of coefficients from an array of samples. */
+        Coefficients (const NumericType* samples, size_t numSamples)   : coefficients (samples, (int) numSamples) {}
+
+        Coefficients (const Coefficients&) = default;
+        Coefficients (Coefficients&&) = default;
+        Coefficients& operator= (const Coefficients&) = default;
+        Coefficients& operator= (Coefficients&&) = default;
+
+        /** The Coefficients structure is ref-counted, so this is a handy type that can be used
+            as a pointer to one.
+        */
+        using Ptr = ReferenceCountedObjectPtr<Coefficients>;
+
+        //==============================================================================
+        /** Returns the filter order associated with the coefficients. */
+        size_t getFilterOrder() const noexcept  { return static_cast<size_t> (coefficients.size()) - 1; }
+
+        /** Returns the magnitude frequency response of the filter for a given frequency
+            and sample rate.
+        */
+        double getMagnitudeForFrequency (double frequency, double sampleRate) const noexcept;
+
+        /** Returns the magnitude frequency response of the filter for a given frequency array
+            and sample rate.
+        */
+        void getMagnitudeForFrequencyArray (double* frequencies, double* magnitudes,
+                                            size_t numSamples, double sampleRate) const noexcept;
+
+        /** Returns the phase frequency response of the filter for a given frequency and
+            sample rate.
+        */
+        double getPhaseForFrequency (double frequency, double sampleRate) const noexcept;
+
+        /** Returns the phase frequency response of the filter for a given frequency array
+            and sample rate.
+        */
+        void getPhaseForFrequencyArray (double* frequencies, double* phases,
+                                        size_t numSamples, double sampleRate) const noexcept;
+
+        /** Returns a raw data pointer to the coefficients. */
+        NumericType* getRawCoefficients() noexcept              { return coefficients.getRawDataPointer(); }
+
+        /** Returns a raw data pointer to the coefficients. */
+        const NumericType* getRawCoefficients() const noexcept  { return coefficients.begin(); }
+
+        //==============================================================================
+        /** Scales the values of the FIR filter with the sum of the squared coefficients. */
+        void normalise() noexcept;
+
+        //==============================================================================
+        /** The raw coefficients.
+            You should leave these numbers alone unless you really know what you're doing.
+        */
+        Array<NumericType> coefficients;
+    };
+}
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_FIRFilter_test.cpp
+++ b/modules/juce_dsp/processors/juce_FIRFilter_test.cpp
@ -0,0 +1,221 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+class FIRFilterTest : public UnitTest
+{
+    template <typename Type>
+    struct Helpers
+    {
+        static void fillRandom (Random& random, Type* buffer, size_t n)
+        {
+            for (size_t i = 0; i < n; ++i)
+                buffer[i] = (2.0f * random.nextFloat()) - 1.0f;
+        }
+
+        static bool checkArrayIsSimilar (Type* a, Type* b, size_t n) noexcept
+        {
+            for (size_t i = 0; i < n; ++i)
+                if (std::abs (a[i] - b[i]) > 1e-6f)
+                    return false;
+
+            return true;
+        }
+    };
+
+   #if JUCE_USE_SIMD
+    template <typename Type>
+    struct Helpers<SIMDRegister<Type>>
+    {
+        static void fillRandom (Random& random, SIMDRegister<Type>* buffer, size_t n)
+        {
+            Helpers<Type>::fillRandom (random, reinterpret_cast<Type*> (buffer), n * SIMDRegister<Type>::size());
+        }
+
+        static bool checkArrayIsSimilar (SIMDRegister<Type>* a, SIMDRegister<Type>* b, size_t n) noexcept
+        {
+            return Helpers<Type>::checkArrayIsSimilar (reinterpret_cast<Type*> (a),
+                                                       reinterpret_cast<Type*> (b),
+                                                       n * SIMDRegister<Type>::size());
+        }
+    };
+   #endif
+
+    template <typename Type>
+    static void fillRandom (Random& random, Type* buffer, size_t n) { Helpers<Type>::fillRandom (random, buffer, n); }
+
+    template <typename Type>
+    static bool checkArrayIsSimilar (Type* a, Type* b, size_t n) noexcept { return Helpers<Type>::checkArrayIsSimilar (a, b, n); }
+
+    //==============================================================================
+    // reference implementation of an FIR
+    template <typename SampleType, typename NumericType>
+    static void reference (const NumericType* firCoefficients, size_t numCoefficients,
+                           const SampleType* input, SampleType* output, size_t n) noexcept
+    {
+        if (numCoefficients == 0)
+        {
+            zeromem (output, sizeof (SampleType) * n);
+            return;
+        }
+
+        HeapBlock<SampleType> scratchBuffer (numCoefficients
+                                            #if JUCE_USE_SIMD
+                                             + (SIMDRegister<NumericType>::SIMDRegisterSize / sizeof (SampleType))
+                                            #endif
+                                             );
+       #if JUCE_USE_SIMD
+        SampleType* buffer = reinterpret_cast<SampleType*> (SIMDRegister<NumericType>::getNextSIMDAlignedPtr (reinterpret_cast<NumericType*> (scratchBuffer.getData())));
+       #else
+        SampleType* buffer = scratchBuffer.getData();
+       #endif
+
+        zeromem (buffer, sizeof (SampleType) * numCoefficients);
+
+        for (size_t i = 0; i < n; ++i)
+        {
+            for (size_t j = (numCoefficients - 1); j >= 1; --j)
+                buffer[j] = buffer[j-1];
+
+            buffer[0] = input[i];
+
+            SampleType sum (0);
+
+            for (size_t j = 0; j < numCoefficients; ++j)
+                sum += buffer[j] * firCoefficients[j];
+
+            output[i] = sum;
+        }
+    }
+
+    //==============================================================================
+    struct LargeBlockTest
+    {
+        template <typename FloatType>
+        static void run (FIR::Filter<FloatType>& filter, FloatType* src, FloatType* dst, size_t n)
+        {
+            AudioBlock<FloatType> input  (&src, 1, n);
+            AudioBlock<FloatType> output (&dst, 1, n);
+            ProcessContextNonReplacing<FloatType> context (input, output);
+
+            filter.process (context);
+        }
+    };
+
+    struct SampleBySampleTest
+    {
+        template <typename FloatType>
+        static void run (FIR::Filter<FloatType>& filter, FloatType* src, FloatType* dst, size_t n)
+        {
+            for (size_t i = 0; i < n; ++i)
+                dst[i] = filter.processSample (src[i]);
+        }
+    };
+
+    struct SplitBlockTest
+    {
+        template <typename FloatType>
+        static void run (FIR::Filter<FloatType>& filter, FloatType* input, FloatType* output, size_t n)
+        {
+            size_t len = 0;
+            for (size_t i = 0; i < n; i += len)
+            {
+                len = jmin (n - i, n / 3);
+                auto* src = input + i;
+                auto* dst = output + i;
+
+                AudioBlock<FloatType> inBlock  (&src, 1, len);
+                AudioBlock<FloatType> outBlock (&dst, 1, len);
+                ProcessContextNonReplacing<FloatType> context (inBlock, outBlock);
+
+                filter.process (context);
+            }
+        }
+    };
+
+    //==============================================================================
+    template <typename TheTest, typename SampleType, typename NumericType>
+    void runTestForType()
+    {
+        Random random (8392829);
+
+        for (auto size : {1, 2, 4, 8, 12, 13, 25})
+        {
+            constexpr size_t n = 813;
+
+            HeapBlock<char> inputBuffer, outputBuffer, refBuffer;
+            AudioBlock<SampleType> input (inputBuffer, 1, n), output (outputBuffer, 1, n), ref (refBuffer, 1, n);
+            fillRandom (random, input.getChannelPointer (0), n);
+
+            HeapBlock<char> firBlock;
+            AudioBlock<NumericType> fir (firBlock, 1, static_cast<size_t> (size));
+            fillRandom (random, fir.getChannelPointer (0), static_cast<size_t> (size));
+
+            FIR::Filter<SampleType> filter (new FIR::Coefficients<NumericType> (fir.getChannelPointer (0), static_cast<size_t> (size)));
+            ProcessSpec spec {0.0, n, 1};
+            filter.prepare (spec);
+
+            reference<SampleType, NumericType> (fir.getChannelPointer (0), static_cast<size_t> (size),
+                                                input.getChannelPointer (0), ref.getChannelPointer (0), n);
+
+            TheTest::template run<SampleType> (filter, input.getChannelPointer (0), output.getChannelPointer (0), n);
+            expect (checkArrayIsSimilar (output.getChannelPointer (0), ref.getChannelPointer (0), n));
+        }
+    }
+
+    template <typename TheTest>
+    void runTestForAllTypes (const char* unitTestName)
+    {
+        beginTest (unitTestName);
+
+        runTestForType<TheTest, float, float>();
+        runTestForType<TheTest, double, double>();
+       #if JUCE_USE_SIMD
+        runTestForType<TheTest, SIMDRegister<float>, float>();
+        runTestForType<TheTest, SIMDRegister<double>, double>();
+       #endif
+    }
+
+
+public:
+    FIRFilterTest() : UnitTest ("FIR Filter", "DSP") {}
+
+    void runTest() override
+    {
+        runTestForAllTypes<LargeBlockTest> ("Large Blocks");
+        runTestForAllTypes<SampleBySampleTest> ("Sample by Sample");
+        runTestForAllTypes<SplitBlockTest> ("Split Block");
+    }
+};
+
+static FIRFilterTest firFilterUnitTest;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_Gain.h
+++ b/modules/juce_dsp/processors/juce_Gain.h
@ -0,0 +1,147 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Applies a gain to audio samples as single samples or AudioBlocks.
+
+    @tags{DSP}
+*/
+template <typename FloatType>
+class Gain
+{
+public:
+    Gain() noexcept {}
+
+    //==============================================================================
+    /** Applies a new gain as a linear value. */
+    void setGainLinear (FloatType newGain) noexcept             { gain.setValue (newGain); }
+
+    /** Applies a new gain as a decibel value. */
+    void setGainDecibels (FloatType newGainDecibels) noexcept   { setGainLinear (Decibels::decibelsToGain<FloatType> (newGainDecibels)); }
+
+    /** Returns the current gain as a linear value. */
+    FloatType getGainLinear() const noexcept                    { return gain.getTargetValue(); }
+
+    /** Returns the current gain in decibels. */
+    FloatType getGainDecibels() const noexcept                  { return Decibels::gainToDecibels<FloatType> (getGainLinear()); }
+
+    /** Sets the length of the ramp used for smoothing gain changes. */
+    void setRampDurationSeconds (double newDurationSeconds) noexcept
+    {
+        if (rampDurationSeconds != newDurationSeconds)
+        {
+            rampDurationSeconds = newDurationSeconds;
+            reset();
+        }
+    }
+
+    /** Returns the ramp duration in seconds. */
+    double getRampDurationSeconds() const noexcept              { return rampDurationSeconds; }
+
+    /** Returns true if the current value is currently being interpolated. */
+    bool isSmoothing() const noexcept                           { return gain.isSmoothing(); }
+
+    //==============================================================================
+    /** Called before processing starts. */
+    void prepare (const ProcessSpec& spec) noexcept
+    {
+        sampleRate = spec.sampleRate;
+        reset();
+    }
+
+    /** Resets the internal state of the gain */
+    void reset() noexcept
+    {
+        if (sampleRate > 0)
+            gain.reset (sampleRate, rampDurationSeconds);
+    }
+
+    //==============================================================================
+    /** Returns the result of processing a single sample. */
+    template <typename SampleType>
+    SampleType JUCE_VECTOR_CALLTYPE processSample (SampleType s) noexcept
+    {
+        return s * gain.getNextValue();
+    }
+
+    /** Processes the input and output buffers supplied in the processing context. */
+    template <typename ProcessContext>
+    void process (const ProcessContext& context) noexcept
+    {
+        auto&& inBlock  = context.getInputBlock();
+        auto&& outBlock = context.getOutputBlock();
+
+        jassert (inBlock.getNumChannels() == outBlock.getNumChannels());
+        jassert (inBlock.getNumSamples() == outBlock.getNumSamples());
+
+        auto len         = inBlock.getNumSamples();
+        auto numChannels = inBlock.getNumChannels();
+
+        if (context.isBypassed)
+        {
+            gain.skip (static_cast<int> (len));
+
+            if (context.usesSeparateInputAndOutputBlocks())
+                outBlock.copy (inBlock);
+
+            return;
+        }
+
+        if (numChannels == 1)
+        {
+            auto* src = inBlock.getChannelPointer (0);
+            auto* dst = outBlock.getChannelPointer (0);
+
+            for (size_t i = 0; i < len; ++i)
+                dst[i] = src[i] * gain.getNextValue();
+        }
+        else
+        {
+            auto* gains = static_cast<FloatType*> (alloca (sizeof (FloatType) * len));
+
+            for (size_t i = 0; i < len; ++i)
+                gains[i] = gain.getNextValue();
+
+            for (size_t chan = 0; chan < numChannels; ++chan)
+                FloatVectorOperations::multiply (outBlock.getChannelPointer (chan),
+                                                 inBlock.getChannelPointer (chan),
+                                                 gains, static_cast<int> (len));
+        }
+    }
+
+private:
+    //==============================================================================
+    LinearSmoothedValue<FloatType> gain;
+    double sampleRate = 0, rampDurationSeconds = 0;
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_IIRFilter.cpp
+++ b/modules/juce_dsp/processors/juce_IIRFilter.cpp
@ -0,0 +1,481 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+template <typename NumericType>
+IIR::Coefficients<NumericType>::Coefficients()
+    : coefficients ({ NumericType(),
+                      NumericType(),
+                      NumericType(),
+                      NumericType(),
+                      NumericType() })
+{
+}
+
+template <typename NumericType>
+IIR::Coefficients<NumericType>::Coefficients (NumericType b0, NumericType b1,
+                                              NumericType a0, NumericType a1)
+{
+    jassert (a0 != 0);
+
+    coefficients.clear();
+
+    auto a0inv = static_cast<NumericType> (1) / a0;
+
+    coefficients.add (b0 * a0inv,
+                      b1 * a0inv,
+                      a1 * a0inv);
+}
+
+template <typename NumericType>
+IIR::Coefficients<NumericType>::Coefficients (NumericType b0, NumericType b1, NumericType b2,
+                                              NumericType a0, NumericType a1, NumericType a2)
+{
+    jassert (a0 != 0);
+
+    coefficients.clear();
+
+    auto a0inv = static_cast<NumericType> (1) / a0;
+
+    coefficients.add (b0 * a0inv,
+                      b1 * a0inv,
+                      b2 * a0inv,
+                      a1 * a0inv,
+                      a2 * a0inv);
+}
+
+template <typename NumericType>
+IIR::Coefficients<NumericType>::Coefficients (NumericType b0, NumericType b1, NumericType b2, NumericType b3,
+                                              NumericType a0, NumericType a1, NumericType a2, NumericType a3)
+{
+    jassert (a0 != 0);
+
+    coefficients.clear();
+
+    auto a0inv = static_cast<NumericType> (1) / a0;
+
+    coefficients.add (b0 * a0inv,
+                      b1 * a0inv,
+                      b2 * a0inv,
+                      b3 * a0inv,
+                      a1 * a0inv,
+                      a2 * a0inv,
+                      a3 * a0inv);
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeFirstOrderLowPass (double sampleRate,
+                                                                                                    NumericType frequency)
+{
+    jassert (sampleRate > 0.0);
+    jassert (frequency > 0 && frequency <= static_cast<float> (sampleRate * 0.5));
+
+    auto n = std::tan (MathConstants<NumericType>::pi * frequency / static_cast<NumericType> (sampleRate));
+
+    return new Coefficients (n, n, n + 1, n - 1);
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeFirstOrderHighPass (double sampleRate,
+                                                                                                     NumericType frequency)
+{
+    jassert (sampleRate > 0.0);
+    jassert (frequency > 0 && frequency <= static_cast<float> (sampleRate * 0.5));
+
+    auto n = std::tan (MathConstants<NumericType>::pi * frequency / static_cast<NumericType> (sampleRate));
+
+    return new Coefficients (1, -1, n + 1, n - 1);
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeFirstOrderAllPass (double sampleRate,
+                                                                                                    NumericType frequency)
+{
+    jassert (sampleRate > 0.0);
+    jassert (frequency > 0 && frequency <= static_cast<float> (sampleRate * 0.5));
+
+    auto n = std::tan (MathConstants<NumericType>::pi * frequency / static_cast<NumericType> (sampleRate));
+
+    return new Coefficients (n - 1, n + 1, n + 1, n - 1);
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeLowPass (double sampleRate,
+                                                                                          NumericType frequency)
+{
+    return makeLowPass (sampleRate, frequency, inverseRootTwo);
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeLowPass (double sampleRate,
+                                                                                          NumericType frequency,
+                                                                                          NumericType Q)
+{
+    jassert (sampleRate > 0.0);
+    jassert (frequency > 0 && frequency <= static_cast<float> (sampleRate * 0.5));
+    jassert (Q > 0.0);
+
+    auto n = 1 / std::tan (MathConstants<NumericType>::pi * frequency / static_cast<NumericType> (sampleRate));
+    auto nSquared = n * n;
+    auto invQ = 1 / Q;
+    auto c1 = 1 / (1 + invQ * n + nSquared);
+
+    return new Coefficients (c1, c1 * 2, c1,
+                             1, c1 * 2 * (1 - nSquared),
+                             c1 * (1 - invQ * n + nSquared));
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeHighPass (double sampleRate,
+                                                                                           NumericType frequency)
+{
+    return makeHighPass (sampleRate, frequency, inverseRootTwo);
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeHighPass (double sampleRate,
+                                                                                           NumericType frequency,
+                                                                                           NumericType Q)
+{
+    jassert (sampleRate > 0.0);
+    jassert (frequency > 0 && frequency <= static_cast<float> (sampleRate * 0.5));
+    jassert (Q > 0.0);
+
+    auto n = std::tan (MathConstants<NumericType>::pi * frequency / static_cast<NumericType> (sampleRate));
+    auto nSquared = n * n;
+    auto invQ = 1 / Q;
+    auto c1 = 1 / (1 + invQ * n + nSquared);
+
+    return new Coefficients (c1, c1 * -2, c1,
+                             1, c1 * 2 * (nSquared - 1),
+                             c1 * (1 - invQ * n + nSquared));
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeBandPass (double sampleRate,
+                                                                                           NumericType frequency)
+{
+    return makeBandPass (sampleRate, frequency, inverseRootTwo);
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeBandPass (double sampleRate,
+                                                                                           NumericType frequency,
+                                                                                           NumericType Q)
+{
+    jassert (sampleRate > 0.0);
+    jassert (frequency > 0 && frequency <= static_cast<float> (sampleRate * 0.5));
+    jassert (Q > 0.0);
+
+    auto n = 1 / std::tan (MathConstants<NumericType>::pi * frequency / static_cast<NumericType> (sampleRate));
+    auto nSquared = n * n;
+    auto invQ = 1 / Q;
+    auto c1 = 1 / (1 + invQ * n + nSquared);
+
+    return new Coefficients (c1 * n * invQ, 0,
+                            -c1 * n * invQ, 1,
+                             c1 * 2 * (1 - nSquared),
+                             c1 * (1 - invQ * n + nSquared));
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeNotch (double sampleRate,
+                                                                                        NumericType frequency)
+{
+    return makeNotch (sampleRate, frequency, inverseRootTwo);
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeNotch (double sampleRate,
+                                                                                        NumericType frequency,
+                                                                                        NumericType Q)
+{
+    jassert (sampleRate > 0.0);
+    jassert (frequency > 0 && frequency <= static_cast<float> (sampleRate * 0.5));
+    jassert (Q > 0.0);
+
+    auto n = 1 / std::tan (MathConstants<NumericType>::pi * frequency / static_cast<NumericType> (sampleRate));
+    auto nSquared = n * n;
+    auto invQ = 1 / Q;
+    auto c1 = 1 / (1 + n * invQ + nSquared);
+    auto b0 = c1 * (1 + nSquared);
+    auto b1 = 2 * c1 * (1 - nSquared);
+
+    return new Coefficients (b0, b1, b0, 1, b1, c1 * (1 - n * invQ + nSquared));
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeAllPass (double sampleRate,
+                                                                                          NumericType frequency)
+{
+    return makeAllPass (sampleRate, frequency, inverseRootTwo);
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeAllPass (double sampleRate,
+                                                                                          NumericType frequency,
+                                                                                          NumericType Q)
+{
+    jassert (sampleRate > 0);
+    jassert (frequency > 0 && frequency <= sampleRate * 0.5);
+    jassert (Q > 0);
+
+    auto n = 1 / std::tan (MathConstants<NumericType>::pi * frequency / static_cast<NumericType> (sampleRate));
+    auto nSquared = n * n;
+    auto invQ = 1 / Q;
+    auto c1 = 1 / (1 + invQ * n + nSquared);
+    auto b0 = c1 * (1 - n * invQ + nSquared);
+    auto b1 = c1 * 2 * (1 - nSquared);
+
+    return new Coefficients (b0, b1, 1, 1, b1, b0);
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeLowShelf (double sampleRate,
+                                                                                           NumericType cutOffFrequency,
+                                                                                           NumericType Q,
+                                                                                           NumericType gainFactor)
+{
+    jassert (sampleRate > 0.0);
+    jassert (cutOffFrequency > 0.0 && cutOffFrequency <= sampleRate * 0.5);
+    jassert (Q > 0.0);
+
+    auto A = jmax (static_cast<NumericType> (0.0), std::sqrt (gainFactor));
+    auto aminus1 = A - 1;
+    auto aplus1 = A + 1;
+    auto omega = (2 * MathConstants<NumericType>::pi * jmax (cutOffFrequency, static_cast<NumericType> (2.0))) / static_cast<NumericType> (sampleRate);
+    auto coso = std::cos (omega);
+    auto beta = std::sin (omega) * std::sqrt (A) / Q;
+    auto aminus1TimesCoso = aminus1 * coso;
+
+    return new Coefficients (A * (aplus1 - aminus1TimesCoso + beta),
+                             A * 2 * (aminus1 - aplus1 * coso),
+                             A * (aplus1 - aminus1TimesCoso - beta),
+                             aplus1 + aminus1TimesCoso + beta,
+                             -2 * (aminus1 + aplus1 * coso),
+                             aplus1 + aminus1TimesCoso - beta);
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makeHighShelf (double sampleRate,
+                                                                                            NumericType cutOffFrequency,
+                                                                                            NumericType Q,
+                                                                                            NumericType gainFactor)
+{
+    jassert (sampleRate > 0);
+    jassert (cutOffFrequency > 0 && cutOffFrequency <= static_cast<NumericType> (sampleRate * 0.5));
+    jassert (Q > 0);
+
+    auto A = jmax (static_cast<NumericType> (0.0), std::sqrt (gainFactor));
+    auto aminus1 = A - 1;
+    auto aplus1 = A + 1;
+    auto omega = (2 * MathConstants<NumericType>::pi * jmax (cutOffFrequency, static_cast<NumericType> (2.0))) / static_cast<NumericType> (sampleRate);
+    auto coso = std::cos (omega);
+    auto beta = std::sin (omega) * std::sqrt (A) / Q;
+    auto aminus1TimesCoso = aminus1 * coso;
+
+    return new Coefficients (A * (aplus1 + aminus1TimesCoso + beta),
+                             A * -2 * (aminus1 + aplus1 * coso),
+                             A * (aplus1 + aminus1TimesCoso - beta),
+                             aplus1 - aminus1TimesCoso + beta,
+                             2 * (aminus1 - aplus1 * coso),
+                             aplus1 - aminus1TimesCoso - beta);
+}
+
+template <typename NumericType>
+typename IIR::Coefficients<NumericType>::Ptr IIR::Coefficients<NumericType>::makePeakFilter (double sampleRate,
+                                                                                             NumericType frequency,
+                                                                                             NumericType Q,
+                                                                                             NumericType gainFactor)
+{
+    jassert (sampleRate > 0);
+    jassert (frequency > 0 && frequency <= static_cast<NumericType> (sampleRate * 0.5));
+    jassert (Q > 0);
+    jassert (gainFactor > 0);
+
+    auto A = jmax (static_cast<NumericType> (0.0), std::sqrt (gainFactor));
+    auto omega = (2 * MathConstants<NumericType>::pi * jmax (frequency, static_cast<NumericType> (2.0))) / static_cast<NumericType> (sampleRate);
+    auto alpha = std::sin (omega) / (Q * 2);
+    auto c2 = -2 * std::cos (omega);
+    auto alphaTimesA = alpha * A;
+    auto alphaOverA = alpha / A;
+
+    return new Coefficients (1 + alphaTimesA, c2,
+                             1 - alphaTimesA,
+                             1 + alphaOverA, c2,
+                             1 - alphaOverA);
+}
+
+template <typename NumericType>
+size_t IIR::Coefficients<NumericType>::getFilterOrder() const noexcept
+{
+    return (static_cast<size_t> (coefficients.size()) - 1) / 2;
+}
+
+template <typename NumericType>
+double IIR::Coefficients<NumericType>::getMagnitudeForFrequency (double frequency, double sampleRate) const noexcept
+{
+    constexpr Complex<double> j (0, 1);
+    const auto order = getFilterOrder();
+    const auto* coefs = coefficients.begin();
+
+    jassert (frequency >= 0 && frequency <= sampleRate * 0.5);
+
+    Complex<double> numerator = 0.0, denominator = 0.0, factor = 1.0;
+    Complex<double> jw = std::exp (-MathConstants<double>::twoPi * frequency * j / sampleRate);
+
+    for (size_t n = 0; n <= order; ++n)
+    {
+        numerator += static_cast<double> (coefs[n]) * factor;
+        factor *= jw;
+    }
+
+    denominator = 1.0;
+    factor = jw;
+
+    for (size_t n = order + 1; n <= 2 * order; ++n)
+    {
+        denominator += static_cast<double> (coefs[n]) * factor;
+        factor *= jw;
+    }
+
+    return std::abs (numerator / denominator);
+}
+
+template <typename NumericType>
+void IIR::Coefficients<NumericType>::getMagnitudeForFrequencyArray (const double* frequencies, double* magnitudes,
+                                                                    size_t numSamples, double sampleRate) const noexcept
+{
+    constexpr Complex<double> j (0, 1);
+    const auto order = getFilterOrder();
+    const auto* coefs = coefficients.begin();
+
+    jassert (order >= 0);
+
+    for (size_t i = 0; i < numSamples; ++i)
+    {
+        jassert (frequencies[i] >= 0 && frequencies[i] <= sampleRate * 0.5);
+
+        Complex<double> numerator = 0.0, denominator = 0.0, factor = 1.0;
+        Complex<double> jw = std::exp (-MathConstants<double>::twoPi * frequencies[i] * j / sampleRate);
+
+        for (size_t n = 0; n <= order; ++n)
+        {
+            numerator += static_cast<double> (coefs[n]) * factor;
+            factor *= jw;
+        }
+
+        denominator = 1.0;
+        factor = jw;
+
+        for (size_t n = order + 1; n <= 2 * order; ++n)
+        {
+            denominator += static_cast<double> (coefs[n]) * factor;
+            factor *= jw;
+        }
+
+        magnitudes[i] = std::abs(numerator / denominator);
+    }
+}
+
+template <typename NumericType>
+double IIR::Coefficients<NumericType>::getPhaseForFrequency (double frequency, double sampleRate) const noexcept
+{
+    constexpr Complex<double> j (0, 1);
+    const auto order = getFilterOrder();
+    const auto* coefs = coefficients.begin();
+
+    jassert (frequency >= 0 && frequency <= sampleRate * 0.5);
+
+    Complex<double> numerator = 0.0, denominator = 0.0, factor = 1.0;
+    Complex<double> jw = std::exp (-MathConstants<double>::twoPi * frequency * j / sampleRate);
+
+    for (size_t n = 0; n <= order; ++n)
+    {
+        numerator += static_cast<double> (coefs[n]) * factor;
+        factor *= jw;
+    }
+
+    denominator = 1.0;
+    factor = jw;
+
+    for (size_t n = order + 1; n <= 2 * order; ++n)
+    {
+        denominator += static_cast<double> (coefs[n]) * factor;
+        factor *= jw;
+    }
+
+    return std::arg (numerator / denominator);
+}
+
+template <typename NumericType>
+void IIR::Coefficients<NumericType>::getPhaseForFrequencyArray (double* frequencies, double* phases,
+                                                                size_t numSamples, double sampleRate) const noexcept
+{
+    jassert (sampleRate > 0);
+
+    constexpr Complex<double> j (0, 1);
+    const auto order = getFilterOrder();
+    const auto* coefs = coefficients.begin();
+    auto invSampleRate = 1 / sampleRate;
+
+    jassert (order >= 0);
+
+    for (size_t i = 0; i < numSamples; ++i)
+    {
+        jassert (frequencies[i] >= 0 && frequencies[i] <= sampleRate * 0.5);
+
+        Complex<double> numerator = 0.0, denominator = 0.0, factor = 1.0;
+        Complex<double> jw = std::exp (-MathConstants<double>::twoPi * frequencies[i] * j * invSampleRate);
+
+        for (size_t n = 0; n <= order; ++n)
+        {
+            numerator += static_cast<double> (coefs[n]) * factor;
+            factor *= jw;
+        }
+
+        denominator = 1.0;
+        factor = jw;
+
+        for (size_t n = order + 1; n <= 2 * order; ++n)
+        {
+            denominator += static_cast<double> (coefs[n]) * factor;
+            factor *= jw;
+        }
+
+        phases[i] = std::arg (numerator / denominator);
+    }
+}
+
+template struct IIR::Coefficients<float>;
+template struct IIR::Coefficients<double>;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_IIRFilter.h
+++ b/modules/juce_dsp/processors/juce_IIRFilter.h
@ -0,0 +1,303 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Classes for IIR filter processing.
+*/
+namespace IIR
+{
+    template <typename NumericType>
+    struct Coefficients;
+
+    /**
+        A processing class that can perform IIR filtering on an audio signal, using
+        the Transposed Direct Form II digital structure.
+
+        If you need a lowpass, bandpass or highpass filter with fast modulation of
+        its cutoff frequency, you might use the class StateVariableFilter instead,
+        which is designed to prevent artefacts at parameter changes, instead of the
+        class Filter.
+
+        @see Filter::Coefficients, FilterAudioSource, StateVariableFilter
+
+        @tags{DSP}
+    */
+    template <typename SampleType>
+    class Filter
+    {
+    public:
+        /** The NumericType is the underlying primitive type used by the SampleType (which
+            could be either a primitive or vector)
+        */
+        using NumericType = typename SampleTypeHelpers::ElementType<SampleType>::Type;
+
+        //==============================================================================
+        /** Creates a filter.
+
+            Initially the filter is inactive, so will have no effect on samples that
+            you process with it. Use the setCoefficients() method to turn it into the
+            type of filter needed.
+        */
+        Filter();
+
+        /** Creates a filter with a given set of coefficients. */
+        Filter (Coefficients<NumericType>* coefficientsToUse);
+
+        Filter (const Filter&) = default;
+        Filter (Filter&&) = default;
+        Filter& operator= (const Filter&) = default;
+        Filter& operator= (Filter&&) = default;
+
+        //==============================================================================
+        /** The coefficients of the IIR filter. It's up to the called to ensure that
+            these coefficients are modified in a thread-safe way.
+
+            If you change the order of the coefficients then you must call reset after
+            modifying them.
+        */
+        typename Coefficients<NumericType>::Ptr coefficients;
+
+        //==============================================================================
+        /** Resets the filter's processing pipeline, ready to start a new stream of data.
+
+            Note that this clears the processing state, but the type of filter and
+            its coefficients aren't changed.
+        */
+        void reset()            { reset (SampleType {0}); }
+
+        /** Resets the filter's processing pipeline to a specific value.
+            @see reset
+        */
+        void reset (SampleType resetToValue);
+
+        //==============================================================================
+        /** Called before processing starts. */
+        void prepare (const ProcessSpec&) noexcept;
+
+        /** Processes as a block of samples */
+        template <typename ProcessContext>
+        void process (const ProcessContext& context) noexcept
+        {
+            if (context.isBypassed)
+                processInternal<ProcessContext, true> (context);
+            else
+                processInternal<ProcessContext, false> (context);
+        }
+
+        /** Processes a single sample, without any locking.
+
+            Use this if you need processing of a single value.
+
+            Moreover, you might need the function snapToZero after a few calls to avoid
+            potential denormalisation issues.
+        */
+        SampleType JUCE_VECTOR_CALLTYPE processSample (SampleType sample) noexcept;
+
+        /** Ensure that the state variables are rounded to zero if the state
+            variables are denormals. This is only needed if you are doing
+            sample by sample processing.
+        */
+        void snapToZero() noexcept;
+
+    private:
+        //==============================================================================
+        void check();
+
+        /** Processes as a block of samples */
+        template <typename ProcessContext, bool isBypassed>
+        void processInternal (const ProcessContext& context) noexcept;
+
+        //==============================================================================
+        HeapBlock<SampleType> memory;
+        SampleType* state = nullptr;
+        size_t order = 0;
+
+        JUCE_LEAK_DETECTOR (Filter)
+    };
+
+
+    //==============================================================================
+    /** A set of coefficients for use in an Filter object.
+        @see IIR::Filter
+
+        @tags{DSP}
+    */
+    template <typename NumericType>
+    struct Coefficients  : public ProcessorState
+    {
+        /** Creates a null set of coefficients (which will produce silence). */
+        Coefficients();
+
+        /** Directly constructs an object from the raw coefficients.
+            Most people will want to use the static methods instead of this, but the
+            constructor is public to allow tinkerers to create their own custom filters!
+        */
+        Coefficients (NumericType b0, NumericType b1,
+                      NumericType a0, NumericType a1);
+
+        Coefficients (NumericType b0, NumericType b1, NumericType b2,
+                      NumericType a0, NumericType a1, NumericType a2);
+
+        Coefficients (NumericType b0, NumericType b1, NumericType b2, NumericType b3,
+                      NumericType a0, NumericType a1, NumericType a2, NumericType a3);
+
+        Coefficients (const Coefficients&) = default;
+        Coefficients (Coefficients&&) = default;
+        Coefficients& operator= (const Coefficients&) = default;
+        Coefficients& operator= (Coefficients&&) = default;
+
+        /** The Coefficients structure is ref-counted, so this is a handy type that can be used
+            as a pointer to one.
+        */
+        using Ptr = ReferenceCountedObjectPtr<Coefficients>;
+
+        //==============================================================================
+        /** Returns the coefficients for a first order low-pass filter. */
+        static Ptr makeFirstOrderLowPass (double sampleRate, NumericType frequency);
+
+        /** Returns the coefficients for a first order high-pass filter. */
+        static Ptr makeFirstOrderHighPass (double sampleRate, NumericType frequency);
+
+        /** Returns the coefficients for a first order all-pass filter. */
+        static Ptr makeFirstOrderAllPass (double sampleRate, NumericType frequency);
+
+        //==============================================================================
+        /** Returns the coefficients for a low-pass filter. */
+        static Ptr makeLowPass (double sampleRate, NumericType frequency);
+
+        /** Returns the coefficients for a low-pass filter with variable Q. */
+        static Ptr makeLowPass (double sampleRate, NumericType frequency, NumericType Q);
+
+        //==============================================================================
+        /** Returns the coefficients for a high-pass filter. */
+        static Ptr makeHighPass (double sampleRate, NumericType frequency);
+
+        /** Returns the coefficients for a high-pass filter with variable Q. */
+        static Ptr makeHighPass (double sampleRate, NumericType frequency, NumericType Q);
+
+        //==============================================================================
+        /** Returns the coefficients for a band-pass filter. */
+        static Ptr makeBandPass (double sampleRate, NumericType frequency);
+
+        /** Returns the coefficients for a band-pass filter with variable Q. */
+        static Ptr makeBandPass (double sampleRate, NumericType frequency, NumericType Q);
+
+        //==============================================================================
+        /** Returns the coefficients for a notch filter. */
+        static Ptr makeNotch (double sampleRate, NumericType frequency);
+
+        /** Returns the coefficients for a notch filter with variable Q. */
+        static Ptr makeNotch (double sampleRate, NumericType frequency, NumericType Q);
+
+        //==============================================================================
+        /** Returns the coefficients for an all-pass filter. */
+        static Ptr makeAllPass (double sampleRate, NumericType frequency);
+
+        /** Returns the coefficients for an all-pass filter with variable Q. */
+        static Ptr makeAllPass (double sampleRate, NumericType frequency, NumericType Q);
+
+        //==============================================================================
+        /** Returns the coefficients for a low-pass shelf filter with variable Q and gain.
+
+            The gain is a scale factor that the low frequencies are multiplied by, so values
+            greater than 1.0 will boost the low frequencies, values less than 1.0 will
+            attenuate them.
+        */
+        static Ptr makeLowShelf (double sampleRate, NumericType cutOffFrequency,
+                                 NumericType Q, NumericType gainFactor);
+
+        /** Returns the coefficients for a high-pass shelf filter with variable Q and gain.
+
+            The gain is a scale factor that the high frequencies are multiplied by, so values
+            greater than 1.0 will boost the high frequencies, values less than 1.0 will
+            attenuate them.
+        */
+        static Ptr makeHighShelf (double sampleRate, NumericType cutOffFrequency,
+                                  NumericType Q, NumericType gainFactor);
+
+        /** Returns the coefficients for a peak filter centred around a
+            given frequency, with a variable Q and gain.
+
+            The gain is a scale factor that the centre frequencies are multiplied by, so
+            values greater than 1.0 will boost the centre frequencies, values less than
+            1.0 will attenuate them.
+        */
+        static Ptr makePeakFilter (double sampleRate, NumericType centreFrequency,
+                                   NumericType Q, NumericType gainFactor);
+
+        //==============================================================================
+        /** Returns the filter order associated with the coefficients */
+        size_t getFilterOrder() const noexcept;
+
+        /** Returns the magnitude frequency response of the filter for a given frequency
+            and sample rate
+        */
+        double getMagnitudeForFrequency (double frequency, double sampleRate) const noexcept;
+
+        /** Returns the magnitude frequency response of the filter for a given frequency array
+            and sample rate.
+        */
+        void getMagnitudeForFrequencyArray (const double* frequencies, double* magnitudes,
+                                            size_t numSamples, double sampleRate) const noexcept;
+
+        /** Returns the phase frequency response of the filter for a given frequency and
+            sample rate
+        */
+        double getPhaseForFrequency (double frequency, double sampleRate) const noexcept;
+
+        /** Returns the phase frequency response of the filter for a given frequency array
+            and sample rate.
+        */
+        void getPhaseForFrequencyArray (double* frequencies, double* phases,
+                                        size_t numSamples, double sampleRate) const noexcept;
+
+        /** Returns a raw data pointer to the coefficients. */
+        NumericType* getRawCoefficients() noexcept              { return coefficients.getRawDataPointer(); }
+
+        /** Returns a raw data pointer to the coefficients. */
+        const NumericType* getRawCoefficients() const noexcept  { return coefficients.begin(); }
+
+        //==============================================================================
+        /** The raw coefficients.
+            You should leave these numbers alone unless you really know what you're doing.
+        */
+        Array<NumericType> coefficients;
+
+    private:
+        // Unfortunately, std::sqrt is not marked as constexpr just yet in all compilers
+        static constexpr NumericType inverseRootTwo = static_cast<NumericType> (0.70710678118654752440L);
+    };
+
+} // namespace IIR
+} // namespace dsp
+} // namespace juce
+
+#include "juce_IIRFilter_Impl.h"
--- a/modules/juce_dsp/processors/juce_IIRFilter_Impl.h
+++ b/modules/juce_dsp/processors/juce_IIRFilter_Impl.h
@ -0,0 +1,233 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+namespace IIR
+{
+
+#ifndef DOXYGEN
+
+//==============================================================================
+template <typename SampleType>
+Filter<SampleType>::Filter()
+    : coefficients (new Coefficients<typename Filter<SampleType>::NumericType> (1, 0, 1, 0))
+{
+    reset();
+}
+
+template <typename SampleType>
+Filter<SampleType>::Filter (Coefficients<typename Filter<SampleType>::NumericType>* c)
+    : coefficients (c)
+{
+    reset();
+}
+
+template <typename SampleType>
+void Filter<SampleType>::reset (SampleType resetToValue)
+{
+    auto newOrder = coefficients->getFilterOrder();
+
+    if (newOrder != order)
+    {
+        memory.malloc (jmax (order, newOrder, static_cast<size_t> (3)) + 1);
+        state = snapPointerToAlignment (memory.getData(), sizeof (SampleType));
+        order = newOrder;
+    }
+
+    for (size_t i = 0; i < order; ++i)
+        state[i] = resetToValue;
+}
+
+template <typename SampleType>
+void Filter<SampleType>::prepare (const ProcessSpec&) noexcept     { reset(); }
+
+
+template <typename SampleType>
+template <typename ProcessContext, bool bypassed>
+void Filter<SampleType>::processInternal (const ProcessContext& context) noexcept
+{
+    static_assert (std::is_same<typename ProcessContext::SampleType, SampleType>::value,
+                   "The sample-type of the IIR filter must match the sample-type supplied to this process callback");
+    check();
+
+    auto&& inputBlock  = context.getInputBlock();
+    auto&& outputBlock = context.getOutputBlock();
+
+    // This class can only process mono signals. Use the ProcessorDuplicator class
+    // to apply this filter on a multi-channel audio stream.
+    jassert (inputBlock.getNumChannels()  == 1);
+    jassert (outputBlock.getNumChannels() == 1);
+
+    auto numSamples = inputBlock.getNumSamples();
+    auto* src = inputBlock .getChannelPointer (0);
+    auto* dst = outputBlock.getChannelPointer (0);
+    auto* coeffs = coefficients->getRawCoefficients();
+
+    // we need to copy this template parameter into a constexpr
+    // otherwise MSVC will moan that the tenary expressions below
+    // are constant conditional expressions
+    constexpr bool isBypassed = bypassed;
+
+    switch (order)
+    {
+        case 1:
+        {
+            auto b0 = coeffs[0];
+            auto b1 = coeffs[1];
+            auto a1 = coeffs[2];
+
+            auto lv1 = state[0];
+
+            for (size_t i = 0; i < numSamples; ++i)
+            {
+                auto in = src[i];
+                auto out = in * b0 + lv1;
+
+                dst[i] = isBypassed ? in : out;
+
+                lv1 = (in * b1) - (out * a1);
+            }
+
+            util::snapToZero (lv1); state[0] = lv1;
+        }
+        break;
+
+        case 2:
+        {
+            auto b0 = coeffs[0];
+            auto b1 = coeffs[1];
+            auto b2 = coeffs[2];
+            auto a1 = coeffs[3];
+            auto a2 = coeffs[4];
+
+            auto lv1 = state[0];
+            auto lv2 = state[1];
+
+            for (size_t i = 0; i < numSamples; ++i)
+            {
+                auto in = src[i];
+                auto out = (in * b0) + lv1;
+                dst[i] = isBypassed ? in : out;
+
+                lv1 = (in * b1) - (out * a1) + lv2;
+                lv2 = (in * b2) - (out * a2);
+            }
+
+            util::snapToZero (lv1); state[0] = lv1;
+            util::snapToZero (lv2); state[1] = lv2;
+        }
+        break;
+
+        case 3:
+        {
+            auto b0 = coeffs[0];
+            auto b1 = coeffs[1];
+            auto b2 = coeffs[2];
+            auto b3 = coeffs[3];
+            auto a1 = coeffs[4];
+            auto a2 = coeffs[5];
+            auto a3 = coeffs[6];
+
+            auto lv1 = state[0];
+            auto lv2 = state[1];
+            auto lv3 = state[2];
+
+            for (size_t i = 0; i < numSamples; ++i)
+            {
+                auto in = src[i];
+                auto out = (in * b0) + lv1;
+                dst[i] = isBypassed ? in : out;
+
+                lv1 = (in * b1) - (out * a1) + lv2;
+                lv2 = (in * b2) - (out * a2) + lv3;
+                lv3 = (in * b3) - (out * a3);
+            }
+
+            util::snapToZero (lv1); state[0] = lv1;
+            util::snapToZero (lv2); state[1] = lv2;
+            util::snapToZero (lv3); state[2] = lv3;
+        }
+        break;
+
+        default:
+        {
+            for (size_t i = 0; i < numSamples; ++i)
+            {
+                auto in = src[i];
+                auto out = (in * coeffs[0]) + state[0];
+                dst[i] = isBypassed ? in : out;
+
+                for (size_t j = 0; j < order - 1; ++j)
+                    state[j] = (in * coeffs[j + 1]) - (out * coeffs[order + j + 1]) + state[j + 1];
+
+                state[order - 1] = (in * coeffs[order]) - (out * coeffs[order * 2]);
+            }
+
+            snapToZero();
+        }
+    }
+}
+
+template <typename SampleType>
+SampleType JUCE_VECTOR_CALLTYPE Filter<SampleType>::processSample (SampleType sample) noexcept
+{
+    check();
+    auto* c = coefficients->getRawCoefficients();
+
+    auto out = (c[0] * sample) + state[0];
+
+    for (size_t j = 0; j < order - 1; ++j)
+        state[j] = (c[j + 1] * sample) - (c[order + j + 1] * out) + state[j + 1];
+
+    state[order - 1] = (c[order] * sample) - (c[order * 2] * out);
+
+    return out;
+}
+
+template <typename SampleType>
+void Filter<SampleType>::snapToZero() noexcept
+{
+    for (size_t i = 0; i < order; ++i)
+        util::snapToZero (state[i]);
+}
+
+template <typename SampleType>
+void Filter<SampleType>::check()
+{
+    jassert (coefficients != nullptr);
+
+    if (order != coefficients->getFilterOrder())
+        reset();
+}
+
+#endif
+
+} // namespace IIR
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_LadderFilter.cpp
+++ b/modules/juce_dsp/processors/juce_LadderFilter.cpp
@ -0,0 +1,170 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+//==============================================================================
+template <typename Type>
+LadderFilter<Type>::LadderFilter()  : state (2)
+{
+    setSampleRate (Type (1000));    // intentionally setting unrealistic default
+                                    // sample rate to catch missing initialisation bugs
+    setResonance (Type (0));
+    setDrive (Type (1.2));
+    setMode (Mode::LPF12);
+}
+
+//==============================================================================
+template <typename Type>
+void LadderFilter<Type>::setMode (Mode newValue) noexcept
+{
+    switch (newValue)
+    {
+        case Mode::LPF12:   A = {{ Type (0), Type (0),  Type (1), Type (0),  Type (0) }}; comp = Type (0.5);  break;
+        case Mode::HPF12:   A = {{ Type (1), Type (-2), Type (1), Type (0),  Type (0) }}; comp = Type (0);    break;
+        case Mode::LPF24:   A = {{ Type (0), Type (0),  Type (0), Type (0),  Type (1) }}; comp = Type (0.5);  break;
+        case Mode::HPF24:   A = {{ Type (1), Type (-4), Type (6), Type (-4), Type (1) }}; comp = Type (0);    break;
+        default:            jassertfalse;                                                                     break;
+    }
+
+    static constexpr auto outputGain = Type (1.2);
+
+    for (auto& a : A)
+        a *= outputGain;
+
+    mode = newValue;
+    reset();
+}
+
+//==============================================================================
+template <typename Type>
+void LadderFilter<Type>::prepare (const juce::dsp::ProcessSpec& spec)
+{
+    setSampleRate (Type (spec.sampleRate));
+    setNumChannels (spec.numChannels);
+    reset();
+}
+
+//==============================================================================
+template <typename Type>
+void LadderFilter<Type>::reset() noexcept
+{
+    for (auto& s : state)
+        s.fill (Type (0));
+
+    cutoffTransformSmoother.setValue (cutoffTransformSmoother.getTargetValue(), true);
+    scaledResonanceSmoother.setValue (scaledResonanceSmoother.getTargetValue(), true);
+}
+
+//==============================================================================
+template <typename Type>
+void LadderFilter<Type>::setCutoffFrequencyHz (Type newValue) noexcept
+{
+    jassert (newValue > Type (0));
+    cutoffFreqHz = newValue;
+    updateCutoffFreq();
+}
+
+//==============================================================================
+template <typename Type>
+void LadderFilter<Type>::setResonance (Type newValue) noexcept
+{
+    jassert (newValue >= Type (0) && newValue <= Type (1));
+    resonance = newValue;
+    updateResonance();
+}
+
+//==============================================================================
+template <typename Type>
+void LadderFilter<Type>::setDrive (Type newValue) noexcept
+{
+    jassert (newValue >= Type (1));
+
+    drive = newValue;
+    gain = std::pow (drive, Type (-2.642))   * Type (0.6103) + Type (0.3903);
+    drive2 = drive                           * Type (0.04)   + Type (0.96);
+    gain2 = std::pow (drive2, Type (-2.642)) * Type (0.6103) + Type (0.3903);
+}
+
+//==============================================================================
+template <typename Type>
+Type LadderFilter<Type>::processSample (Type inputValue, size_t channelToUse) noexcept
+{
+    auto& s = state[channelToUse];
+
+    const auto a1 = cutoffTransformValue;
+    const auto g =  a1 * Type (-1) + Type (1);
+    const auto b0 = g * Type (0.76923076923);
+    const auto b1 = g * Type (0.23076923076);
+
+    const auto dx = gain * saturationLUT (drive * inputValue);
+    const auto a = dx + scaledResonanceValue * Type (-4) * (gain2 * saturationLUT (drive2 * s[4]) - dx * comp);
+
+    const auto b = b1 * s[0] + a1 * s[1] + b0 * a;
+    const auto c = b1 * s[1] + a1 * s[2] + b0 * b;
+    const auto d = b1 * s[2] + a1 * s[3] + b0 * c;
+    const auto e = b1 * s[3] + a1 * s[4] + b0 * d;
+
+    s[0] = a;
+    s[1] = b;
+    s[2] = c;
+    s[3] = d;
+    s[4] = e;
+
+    return a * A[0] + b * A[1] + c * A[2] + d * A[3] + e * A[4];
+}
+
+//==============================================================================
+template <typename Type>
+void LadderFilter<Type>::updateSmoothers() noexcept
+{
+    cutoffTransformValue = cutoffTransformSmoother.getNextValue();
+    scaledResonanceValue = scaledResonanceSmoother.getNextValue();
+}
+
+//==============================================================================
+template <typename Type>
+void LadderFilter<Type>::setSampleRate (Type newValue) noexcept
+{
+    jassert (newValue > Type (0));
+    cutoffFreqScaler = Type (-2.0 * juce::MathConstants<double>::pi) / newValue;
+
+    static constexpr Type smootherRampTimeSec = Type (0.05);
+    cutoffTransformSmoother.reset (newValue, smootherRampTimeSec);
+    scaledResonanceSmoother.reset (newValue, smootherRampTimeSec);
+
+    updateCutoffFreq();
+}
+
+//==============================================================================
+template class LadderFilter<float>;
+template class LadderFilter<double>;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_LadderFilter.h
+++ b/modules/juce_dsp/processors/juce_LadderFilter.h
@ -0,0 +1,144 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Multi-mode filter based on the Moog ladder filter.
+
+    @tags{DSP}
+*/
+template <typename Type>
+class LadderFilter
+{
+public:
+    enum class Mode
+    {
+        LPF12,  // low-pass  12 dB/octave
+        HPF12,  // high-pass 12 dB/octave
+        LPF24,  // low-pass  24 dB/octave
+        HPF24   // high-pass 24 dB/octave
+    };
+
+    //==============================================================================
+    /** Creates an uninitialised filter. Call prepare() before first use. */
+    LadderFilter();
+
+    /** Enables or disables the filter. If disabled it will simply pass through the input signal. */
+    void setEnabled (bool newValue) noexcept    { enabled = newValue; }
+
+    /** Sets filter mode. */
+    void setMode (Mode newValue) noexcept;
+
+    /** Initialises the filter. */
+    void prepare (const juce::dsp::ProcessSpec& spec);
+
+    /** Returns the current number of channels. */
+    size_t getNumChannels() const noexcept      { return state.size(); }
+
+    /** Resets the internal state variables of the filter. */
+    void reset() noexcept;
+
+    /** Sets the cutoff frequency of the filter.
+        @param newValue cutoff frequency in Hz */
+    void setCutoffFrequencyHz (Type newValue) noexcept;
+
+    /** Sets the resonance of the filter.
+        @param newValue a value between 0 and 1; higher values increase the resonance and can result in self oscillation! */
+    void setResonance (Type newValue) noexcept;
+
+    /** Sets the amound of saturation in the filter.
+        @param newValue saturation amount; it can be any number greater than or equal to one. Higher values result in more distortion.*/
+    void setDrive (Type newValue) noexcept;
+
+    //==============================================================================
+    template <typename ProcessContext>
+    void process (const ProcessContext& context) noexcept
+    {
+        const auto& inputBlock = context.getInputBlock();
+        auto& outputBlock = context.getOutputBlock();
+        const auto numChannels = outputBlock.getNumChannels();
+        const auto numSamples = outputBlock.getNumSamples();
+
+        jassert (inputBlock.getNumChannels() <= getNumChannels());
+        jassert (inputBlock.getNumChannels() == numChannels);
+        jassert (inputBlock.getNumSamples()  == numSamples);
+
+        if (! enabled || context.isBypassed)
+        {
+            outputBlock.copy (inputBlock);
+            return;
+        }
+
+        for (size_t n = 0; n < numSamples; ++n)
+        {
+            updateSmoothers();
+
+            for (size_t ch = 0; ch < numChannels; ++ch)
+                outputBlock.getChannelPointer (ch)[n] = processSample (inputBlock.getChannelPointer (ch)[n], ch);
+        }
+    }
+
+protected:
+    //==============================================================================
+    Type processSample (Type inputValue, size_t channelToUse) noexcept;
+    void updateSmoothers() noexcept;
+
+private:
+    //==============================================================================
+    Type drive, drive2, gain, gain2, comp;
+
+    static constexpr size_t numStates = 5;
+    std::vector<std::array<Type, numStates>> state;
+    std::array<Type, numStates> A;
+
+    LinearSmoothedValue<Type> cutoffTransformSmoother;
+    LinearSmoothedValue<Type> scaledResonanceSmoother;
+    Type cutoffTransformValue;
+    Type scaledResonanceValue;
+
+    LookupTableTransform<Type> saturationLUT { [] (Type x) { return std::tanh (x); }, Type (-5), Type (5), 128 };
+
+    Type cutoffFreqHz { Type (200) };
+    Type resonance;
+
+    Type cutoffFreqScaler;
+
+    Mode mode;
+    bool enabled = true;
+
+    //==============================================================================
+    void setSampleRate (Type newValue) noexcept;
+    void setNumChannels (size_t newValue)   { state.resize (newValue); }
+    void updateCutoffFreq() noexcept        { cutoffTransformSmoother.setValue (std::exp (cutoffFreqHz * cutoffFreqScaler)); }
+    void updateResonance() noexcept         { scaledResonanceSmoother.setValue (jmap (resonance, Type (0.1), Type (1.0))); }
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_Oscillator.h
+++ b/modules/juce_dsp/processors/juce_Oscillator.h
@ -0,0 +1,244 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Generates a signal based on a user-supplied function.
+
+    @tags{DSP}
+*/
+template <typename SampleType>
+class Oscillator
+{
+public:
+    /** The NumericType is the underlying primitive type used by the SampleType (which
+        could be either a primitive or vector)
+    */
+    using NumericType = typename SampleTypeHelpers::ElementType<SampleType>::Type;
+
+    /** Creates an uninitialised oscillator. Call initialise before first use. */
+    Oscillator()
+    {}
+
+    /** Creates an oscillator with a periodic input function (-pi..pi).
+
+        If lookup table is not zero, then the function will be approximated
+        with a lookup table.
+    */
+    Oscillator (const std::function<NumericType (NumericType)>& function,
+                size_t lookupTableNumPoints = 0)
+    {
+        initialise (function, lookupTableNumPoints);
+    }
+
+    /** Returns true if the Oscillator has been initialised. */
+    bool isInitialised() const noexcept     { return static_cast<bool> (generator); }
+
+    /** Initialises the oscillator with a waveform. */
+    void initialise (const std::function<NumericType (NumericType)>& function,
+                     size_t lookupTableNumPoints = 0)
+    {
+        if (lookupTableNumPoints != 0)
+        {
+            auto* table = new LookupTableTransform<NumericType> (function,
+                                                                 -MathConstants<NumericType>::pi,
+                                                                 MathConstants<NumericType>::pi,
+                                                                 lookupTableNumPoints);
+
+            lookupTable.reset (table);
+            generator = [table] (NumericType x) { return (*table) (x); };
+        }
+        else
+        {
+            generator = function;
+        }
+    }
+
+    //==============================================================================
+    /** Sets the frequency of the oscillator. */
+    void setFrequency (NumericType newFrequency, bool force = false) noexcept    { frequency.setValue (newFrequency, force); }
+
+    /** Returns the current frequency of the oscillator. */
+    NumericType getFrequency() const noexcept                    { return frequency.getTargetValue(); }
+
+    //==============================================================================
+    /** Called before processing starts. */
+    void prepare (const ProcessSpec& spec) noexcept
+    {
+        sampleRate = static_cast<NumericType> (spec.sampleRate);
+        rampBuffer.resize ((int) spec.maximumBlockSize);
+
+        reset();
+    }
+
+    /** Resets the internal state of the oscillator */
+    void reset() noexcept
+    {
+        phase.reset();
+
+        if (sampleRate > 0)
+            frequency.reset (sampleRate, 0.05);
+    }
+
+    //==============================================================================
+    /** Returns the result of processing a single sample. */
+    SampleType JUCE_VECTOR_CALLTYPE processSample (SampleType input) noexcept
+    {
+        jassert (isInitialised());
+        auto increment = MathConstants<NumericType>::twoPi * frequency.getNextValue() / sampleRate;
+        return input + generator (phase.advance (increment) - MathConstants<NumericType>::pi);
+    }
+
+    /** Processes the input and output buffers supplied in the processing context. */
+    template <typename ProcessContext>
+    void process (const ProcessContext& context) noexcept
+    {
+        jassert (isInitialised());
+        auto&& outBlock = context.getOutputBlock();
+        auto&& inBlock  = context.getInputBlock();
+
+        // this is an output-only processory
+        jassert (outBlock.getNumSamples() <= static_cast<size_t> (rampBuffer.size()));
+
+        auto len           = outBlock.getNumSamples();
+        auto numChannels   = outBlock.getNumChannels();
+        auto inputChannels = inBlock.getNumChannels();
+        auto baseIncrement = MathConstants<NumericType>::twoPi / sampleRate;
+
+        if (context.isBypassed)
+            context.getOutputBlock().clear();
+
+        if (frequency.isSmoothing())
+        {
+            auto* buffer = rampBuffer.getRawDataPointer();
+
+            for (size_t i = 0; i < len; ++i)
+                buffer[i] = phase.advance (baseIncrement * frequency.getNextValue())
+                              - MathConstants<NumericType>::pi;
+
+            if (! context.isBypassed)
+            {
+                size_t ch;
+
+                if (context.usesSeparateInputAndOutputBlocks())
+                {
+                    for (ch = 0; ch < jmin (numChannels, inputChannels); ++ch)
+                    {
+                        auto* dst = outBlock.getChannelPointer (ch);
+                        auto* src = inBlock.getChannelPointer (ch);
+
+                        for (size_t i = 0; i < len; ++i)
+                            dst[i] = src[i] + generator (buffer[i]);
+                    }
+                }
+                else
+                {
+                    for (ch = 0; ch < jmin (numChannels, inputChannels); ++ch)
+                    {
+                        auto* dst = outBlock.getChannelPointer (ch);
+
+                        for (size_t i = 0; i < len; ++i)
+                            dst[i] += generator (buffer[i]);
+                    }
+                }
+
+                for (; ch < numChannels; ++ch)
+                {
+                    auto* dst = outBlock.getChannelPointer (ch);
+
+                    for (size_t i = 0; i < len; ++i)
+                        dst[i] = generator (buffer[i]);
+                }
+            }
+        }
+        else
+        {
+            auto freq = baseIncrement * frequency.getNextValue();
+            auto p = phase;
+
+            if (context.isBypassed)
+            {
+                frequency.skip (static_cast<int> (len));
+                p.advance (freq * static_cast<NumericType> (len));
+            }
+            else
+            {
+                size_t ch;
+
+                if (context.usesSeparateInputAndOutputBlocks())
+                {
+                    for (ch = 0; ch < jmin (numChannels, inputChannels); ++ch)
+                    {
+                        p = phase;
+                        auto* dst = outBlock.getChannelPointer (ch);
+                        auto* src = inBlock.getChannelPointer (ch);
+
+                        for (size_t i = 0; i < len; ++i)
+                            dst[i] = src[i] + generator (p.advance (freq) - MathConstants<NumericType>::pi);
+                    }
+                }
+                else
+                {
+                    for (ch = 0; ch < jmin (numChannels, inputChannels); ++ch)
+                    {
+                        p = phase;
+                        auto* dst = outBlock.getChannelPointer (ch);
+
+                        for (size_t i = 0; i < len; ++i)
+                            dst[i] += generator (p.advance (freq) - MathConstants<NumericType>::pi);
+                    }
+                }
+
+                for (; ch < numChannels; ++ch)
+                {
+                    p = phase;
+                    auto* dst = outBlock.getChannelPointer (ch);
+
+                    for (size_t i = 0; i < len; ++i)
+                        dst[i] = generator (p.advance (freq) - MathConstants<NumericType>::pi);
+                }
+            }
+
+            phase = p;
+        }
+    }
+
+private:
+    //==============================================================================
+    std::function<NumericType (NumericType)> generator;
+    std::unique_ptr<LookupTableTransform<NumericType>> lookupTable;
+    Array<NumericType> rampBuffer;
+    LinearSmoothedValue<NumericType> frequency { static_cast<NumericType> (440.0) };
+    NumericType sampleRate = 48000.0;
+    Phase<NumericType> phase;
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_Oversampling.cpp
+++ b/modules/juce_dsp/processors/juce_Oversampling.cpp
@ -0,0 +1,717 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/** Abstract class for the provided oversampling engines used internally in
+    the Oversampling class.
+*/
+template <typename SampleType>
+class OversamplingEngine
+{
+public:
+    //===============================================================================
+    OversamplingEngine (size_t newNumChannels, size_t newFactor)
+    {
+        numChannels = newNumChannels;
+        factor = newFactor;
+    }
+
+    virtual ~OversamplingEngine() {}
+
+    //===============================================================================
+    virtual SampleType getLatencyInSamples() = 0;
+    size_t getFactor() { return factor; }
+
+    virtual void initProcessing (size_t maximumNumberOfSamplesBeforeOversampling)
+    {
+        buffer.setSize (static_cast<int> (numChannels), static_cast<int> (maximumNumberOfSamplesBeforeOversampling * factor), false, false, true);
+    }
+
+    virtual void reset()
+    {
+        buffer.clear();
+    }
+
+    dsp::AudioBlock<SampleType> getProcessedSamples (size_t numSamples)
+    {
+        return dsp::AudioBlock<SampleType> (buffer).getSubBlock (0, numSamples);
+    }
+
+    virtual void processSamplesUp (dsp::AudioBlock<SampleType> &inputBlock) = 0;
+    virtual void processSamplesDown (dsp::AudioBlock<SampleType> &outputBlock) = 0;
+
+protected:
+    //===============================================================================
+    AudioBuffer<SampleType> buffer;
+    size_t factor;
+    size_t numChannels;
+};
+
+
+//===============================================================================
+/** Dummy oversampling engine class which simply copies and pastes the input
+    signal, which could be equivalent to a "one time" oversampling processing.
+*/
+template <typename SampleType>
+class OversamplingDummy : public OversamplingEngine<SampleType>
+{
+public:
+    //===============================================================================
+    OversamplingDummy (size_t numChannels) : OversamplingEngine<SampleType> (numChannels, 1) {}
+    ~OversamplingDummy() {}
+
+    //===============================================================================
+    SampleType getLatencyInSamples() override
+    {
+        return 0.f;
+    }
+
+    void processSamplesUp (dsp::AudioBlock<SampleType> &inputBlock) override
+    {
+        jassert (inputBlock.getNumChannels() <= static_cast<size_t> (OversamplingEngine<SampleType>::buffer.getNumChannels()));
+        jassert (inputBlock.getNumSamples() * OversamplingEngine<SampleType>::factor <= static_cast<size_t> (OversamplingEngine<SampleType>::buffer.getNumSamples()));
+
+        for (size_t channel = 0; channel < inputBlock.getNumChannels(); channel++)
+            OversamplingEngine<SampleType>::buffer.copyFrom (static_cast<int> (channel), 0,
+                inputBlock.getChannelPointer (channel), static_cast<int> (inputBlock.getNumSamples()));
+    }
+
+    void processSamplesDown (dsp::AudioBlock<SampleType> &outputBlock) override
+    {
+        jassert (outputBlock.getNumChannels() <= static_cast<size_t> (OversamplingEngine<SampleType>::buffer.getNumChannels()));
+        jassert (outputBlock.getNumSamples() * OversamplingEngine<SampleType>::factor <= static_cast<size_t> (OversamplingEngine<SampleType>::buffer.getNumSamples()));
+
+        outputBlock.copy (OversamplingEngine<SampleType>::getProcessedSamples (outputBlock.getNumSamples()));
+    }
+
+private:
+    //===============================================================================
+    JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (OversamplingDummy)
+};
+
+//===============================================================================
+/** Oversampling engine class performing 2 times oversampling using the Filter
+    Design FIR Equiripple method. The resulting filter is linear phase,
+    symmetric, and has every two samples but the middle one equal to zero,
+    leading to specific processing optimizations.
+*/
+template <typename SampleType>
+class Oversampling2TimesEquirippleFIR : public OversamplingEngine<SampleType>
+{
+public:
+    //===============================================================================
+    Oversampling2TimesEquirippleFIR (size_t numChannels,
+                                     SampleType normalizedTransitionWidthUp,
+                                     SampleType stopbandAttenuationdBUp,
+                                     SampleType normalizedTransitionWidthDown,
+                                     SampleType stopbandAttenuationdBDown) : OversamplingEngine<SampleType> (numChannels, 2)
+    {
+        coefficientsUp = *dsp::FilterDesign<SampleType>::designFIRLowpassHalfBandEquirippleMethod (normalizedTransitionWidthUp, stopbandAttenuationdBUp);
+        coefficientsDown = *dsp::FilterDesign<SampleType>::designFIRLowpassHalfBandEquirippleMethod (normalizedTransitionWidthDown, stopbandAttenuationdBDown);
+
+        auto N = coefficientsUp.getFilterOrder() + 1;
+        stateUp.setSize (static_cast<int> (numChannels), static_cast<int> (N));
+
+        N = coefficientsDown.getFilterOrder() + 1;
+        auto Ndiv2 = N / 2;
+        auto Ndiv4 = Ndiv2 / 2;
+
+        stateDown.setSize (static_cast<int> (numChannels), static_cast<int> (N));
+        stateDown2.setSize (static_cast<int> (numChannels), static_cast<int> (Ndiv4 + 1));
+
+        position.resize (static_cast<int> (numChannels));
+    }
+
+    ~Oversampling2TimesEquirippleFIR() {}
+
+    //===============================================================================
+    SampleType getLatencyInSamples() override
+    {
+        return static_cast<SampleType> (coefficientsUp.getFilterOrder() + coefficientsDown.getFilterOrder()) * 0.5f;
+    }
+
+    void reset() override
+    {
+        OversamplingEngine<SampleType>::reset();
+
+        stateUp.clear();
+        stateDown.clear();
+        stateDown2.clear();
+
+        position.fill (0);
+    }
+
+    void processSamplesUp (dsp::AudioBlock<SampleType> &inputBlock) override
+    {
+        jassert (inputBlock.getNumChannels() <= static_cast<size_t> (OversamplingEngine<SampleType>::buffer.getNumChannels()));
+        jassert (inputBlock.getNumSamples() * OversamplingEngine<SampleType>::factor <= static_cast<size_t> (OversamplingEngine<SampleType>::buffer.getNumSamples()));
+
+        // Initialization
+        auto fir = coefficientsUp.getRawCoefficients();
+        auto N = coefficientsUp.getFilterOrder() + 1;
+        auto Ndiv2 = N / 2;
+        auto numSamples = inputBlock.getNumSamples();
+
+        // Processing
+        for (size_t channel = 0; channel < inputBlock.getNumChannels(); channel++)
+        {
+            auto bufferSamples = OversamplingEngine<SampleType>::buffer.getWritePointer (static_cast<int> (channel));
+            auto buf = stateUp.getWritePointer (static_cast<int> (channel));
+            auto samples = inputBlock.getChannelPointer (channel);
+
+            for (size_t i = 0; i < numSamples; i++)
+            {
+                // Input
+                buf[N - 1] = 2 * samples[i];
+
+                // Convolution
+                auto out = static_cast<SampleType> (0.0);
+                for (size_t k = 0; k < Ndiv2; k += 2)
+                    out += (buf[k] + buf[N - k - 1]) * fir[k];
+
+                // Outputs
+                bufferSamples[i << 1] = out;
+                bufferSamples[(i << 1) + 1] = buf[Ndiv2 + 1] * fir[Ndiv2];
+
+                // Shift data
+                for (size_t k = 0; k < N - 2; k += 2)
+                    buf[k] = buf[k + 2];
+            }
+        }
+    }
+
+    void processSamplesDown (dsp::AudioBlock<SampleType> &outputBlock) override
+    {
+        jassert (outputBlock.getNumChannels() <= static_cast<size_t> (OversamplingEngine<SampleType>::buffer.getNumChannels()));
+        jassert (outputBlock.getNumSamples() * OversamplingEngine<SampleType>::factor <= static_cast<size_t> (OversamplingEngine<SampleType>::buffer.getNumSamples()));
+
+        // Initialization
+        auto fir = coefficientsDown.getRawCoefficients();
+        auto N = coefficientsDown.getFilterOrder() + 1;
+        auto Ndiv2 = N / 2;
+        auto Ndiv4 = Ndiv2 / 2;
+        auto numSamples = outputBlock.getNumSamples();
+
+        // Processing
+        for (size_t channel = 0; channel < outputBlock.getNumChannels(); channel++)
+        {
+            auto bufferSamples = OversamplingEngine<SampleType>::buffer.getWritePointer (static_cast<int> (channel));
+            auto buf = stateDown.getWritePointer (static_cast<int> (channel));
+            auto buf2 = stateDown2.getWritePointer (static_cast<int> (channel));
+            auto samples = outputBlock.getChannelPointer (channel);
+            auto pos = position.getUnchecked (static_cast<int> (channel));
+
+            for (size_t i = 0; i < numSamples; i++)
+            {
+                // Input
+                buf[N - 1] = bufferSamples[i << 1];
+
+                // Convolution
+                auto out = static_cast<SampleType> (0.0);
+                for (size_t k = 0; k < Ndiv2; k += 2)
+                    out += (buf[k] + buf[N - k - 1]) * fir[k];
+
+                // Output
+                out += buf2[pos] * fir[Ndiv2];
+                buf2[pos] = bufferSamples[(i << 1) + 1];
+
+                samples[i] = out;
+
+                // Shift data
+                for (size_t k = 0; k < N - 2; k++)
+                    buf[k] = buf[k + 2];
+
+                // Circular buffer
+                pos = (pos == 0 ? Ndiv4 : pos - 1);
+            }
+
+            position.setUnchecked (static_cast<int> (channel), pos);
+        }
+
+    }
+
+private:
+    //===============================================================================
+    dsp::FIR::Coefficients<SampleType> coefficientsUp, coefficientsDown;
+    AudioBuffer<SampleType> stateUp, stateDown, stateDown2;
+    Array<size_t> position;
+
+    //===============================================================================
+    JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (Oversampling2TimesEquirippleFIR)
+};
+
+
+//===============================================================================
+/** Oversampling engine class performing 2 times oversampling using the Filter
+    Design IIR Polyphase Allpass Cascaded method. The resulting filter is minimum
+    phase, and provided with a method to get the exact resulting latency.
+*/
+template <typename SampleType>
+class Oversampling2TimesPolyphaseIIR : public OversamplingEngine<SampleType>
+{
+public:
+    //===============================================================================
+    Oversampling2TimesPolyphaseIIR (size_t numChannels,
+                                    SampleType normalizedTransitionWidthUp,
+                                    SampleType stopbandAttenuationdBUp,
+                                    SampleType normalizedTransitionWidthDown,
+                                    SampleType stopbandAttenuationdBDown) : OversamplingEngine<SampleType> (numChannels, 2)
+    {
+        auto structureUp = dsp::FilterDesign<SampleType>::designIIRLowpassHalfBandPolyphaseAllpassMethod (normalizedTransitionWidthUp, stopbandAttenuationdBUp);
+        dsp::IIR::Coefficients<SampleType> coeffsUp = getCoefficients (structureUp);
+        latency = static_cast<SampleType> (-(coeffsUp.getPhaseForFrequency (0.0001, 1.0)) / (0.0001 * MathConstants<double>::twoPi));
+
+        auto structureDown = dsp::FilterDesign<SampleType>::designIIRLowpassHalfBandPolyphaseAllpassMethod (normalizedTransitionWidthDown, stopbandAttenuationdBDown);
+        dsp::IIR::Coefficients<SampleType> coeffsDown = getCoefficients (structureDown);
+        latency += static_cast<SampleType> (-(coeffsDown.getPhaseForFrequency (0.0001, 1.0)) / (0.0001 * MathConstants<double>::twoPi));
+
+        for (auto i = 0; i < structureUp.directPath.size(); i++)
+            coefficientsUp.add (structureUp.directPath[i].coefficients[0]);
+
+        for (auto i = 1; i < structureUp.delayedPath.size(); i++)
+            coefficientsUp.add (structureUp.delayedPath[i].coefficients[0]);
+
+        for (auto i = 0; i < structureDown.directPath.size(); i++)
+            coefficientsDown.add (structureDown.directPath[i].coefficients[0]);
+
+        for (auto i = 1; i < structureDown.delayedPath.size(); i++)
+            coefficientsDown.add (structureDown.delayedPath[i].coefficients[0]);
+
+        v1Up.setSize (static_cast<int> (numChannels), coefficientsUp.size());
+        v1Down.setSize (static_cast<int> (numChannels), coefficientsDown.size());
+        delayDown.resize (static_cast<int> (numChannels));
+    }
+
+    ~Oversampling2TimesPolyphaseIIR() {}
+
+    //===============================================================================
+    SampleType getLatencyInSamples() override
+    {
+        return latency;
+    }
+
+    void reset() override
+    {
+        OversamplingEngine<SampleType>::reset();
+
+        v1Up.clear();
+        v1Down.clear();
+        delayDown.fill (0);
+    }
+
+    void processSamplesUp (dsp::AudioBlock<SampleType> &inputBlock) override
+    {
+        jassert (inputBlock.getNumChannels() <= static_cast<size_t> (OversamplingEngine<SampleType>::buffer.getNumChannels()));
+        jassert (inputBlock.getNumSamples() * OversamplingEngine<SampleType>::factor <= static_cast<size_t> (OversamplingEngine<SampleType>::buffer.getNumSamples()));
+
+        // Initialization
+        auto coeffs = coefficientsUp.getRawDataPointer();
+        auto numStages = coefficientsUp.size();
+        auto delayedStages = numStages / 2;
+        auto directStages = numStages - delayedStages;
+        auto numSamples = inputBlock.getNumSamples();
+
+        // Processing
+        for (size_t channel = 0; channel < inputBlock.getNumChannels(); channel++)
+        {
+            auto bufferSamples = OversamplingEngine<SampleType>::buffer.getWritePointer (static_cast<int> (channel));
+            auto lv1 = v1Up.getWritePointer (static_cast<int> (channel));
+            auto samples = inputBlock.getChannelPointer (channel);
+
+            for (size_t i = 0; i < numSamples; i++)
+            {
+                // Direct path cascaded allpass filters
+                auto input = samples[i];
+                for (auto n = 0; n < directStages; n++)
+                {
+                    auto alpha = coeffs[n];
+                    auto output = alpha * input + lv1[n];
+                    lv1[n] = input - alpha * output;
+                    input = output;
+                }
+
+                // Output
+                bufferSamples[i << 1] = input;
+
+                // Delayed path cascaded allpass filters
+                input = samples[i];
+                for (auto n = directStages; n < numStages; n++)
+                {
+                    auto alpha = coeffs[n];
+                    auto output = alpha * input + lv1[n];
+                    lv1[n] = input - alpha * output;
+                    input = output;
+                }
+
+                // Output
+                bufferSamples[(i << 1) + 1] = input;
+            }
+        }
+
+        // Snap To Zero
+        snapToZero (true);
+    }
+
+    void processSamplesDown (dsp::AudioBlock<SampleType> &outputBlock) override
+    {
+        jassert (outputBlock.getNumChannels() <= static_cast<size_t> (OversamplingEngine<SampleType>::buffer.getNumChannels()));
+        jassert (outputBlock.getNumSamples() * OversamplingEngine<SampleType>::factor <= static_cast<size_t> (OversamplingEngine<SampleType>::buffer.getNumSamples()));
+
+        // Initialization
+        auto coeffs = coefficientsDown.getRawDataPointer();
+        auto numStages = coefficientsDown.size();
+        auto delayedStages = numStages / 2;
+        auto directStages = numStages - delayedStages;
+        auto numSamples = outputBlock.getNumSamples();
+
+        // Processing
+        for (size_t channel = 0; channel < outputBlock.getNumChannels(); channel++)
+        {
+            auto bufferSamples = OversamplingEngine<SampleType>::buffer.getWritePointer (static_cast<int> (channel));
+            auto lv1 = v1Down.getWritePointer (static_cast<int> (channel));
+            auto samples = outputBlock.getChannelPointer (channel);
+            auto delay = delayDown.getUnchecked (static_cast<int> (channel));
+
+            for (size_t i = 0; i < numSamples; i++)
+            {
+                // Direct path cascaded allpass filters
+                auto input = bufferSamples[i << 1];
+                for (auto n = 0; n < directStages; n++)
+                {
+                    auto alpha = coeffs[n];
+                    auto output = alpha * input + lv1[n];
+                    lv1[n] = input - alpha * output;
+                    input = output;
+                }
+                auto directOut = input;
+
+                // Delayed path cascaded allpass filters
+                input = bufferSamples[(i << 1) + 1];
+                for (auto n = directStages; n < numStages; n++)
+                {
+                    auto alpha = coeffs[n];
+                    auto output = alpha * input + lv1[n];
+                    lv1[n] = input - alpha * output;
+                    input = output;
+                }
+
+                // Output
+                samples[i] = (delay + directOut) * static_cast<SampleType> (0.5);
+                delay = input;
+            }
+
+            delayDown.setUnchecked (static_cast<int> (channel), delay);
+        }
+
+        // Snap To Zero
+        snapToZero (false);
+    }
+
+    void snapToZero (bool snapUpProcessing)
+    {
+        if (snapUpProcessing)
+        {
+            for (auto channel = 0; channel < OversamplingEngine<SampleType>::buffer.getNumChannels(); channel++)
+            {
+                auto lv1 = v1Up.getWritePointer (channel);
+                auto numStages = coefficientsUp.size();
+
+                for (auto n = 0; n < numStages; n++)
+                    util::snapToZero (lv1[n]);
+            }
+        }
+        else
+        {
+            for (auto channel = 0; channel < OversamplingEngine<SampleType>::buffer.getNumChannels(); channel++)
+            {
+                auto lv1 = v1Down.getWritePointer (channel);
+                auto numStages = coefficientsDown.size();
+
+                for (auto n = 0; n < numStages; n++)
+                    util::snapToZero (lv1[n]);
+            }
+        }
+    }
+
+private:
+    //===============================================================================
+    /** This function calculates the equivalent high order IIR filter of a given
+        polyphase cascaded allpass filters structure.
+    */
+    const dsp::IIR::Coefficients<SampleType> getCoefficients (typename dsp::FilterDesign<SampleType>::IIRPolyphaseAllpassStructure& structure) const
+    {
+        dsp::Polynomial<SampleType> numerator1   ({ static_cast<SampleType> (1.0) });
+        dsp::Polynomial<SampleType> denominator1 ({ static_cast<SampleType> (1.0) });
+        dsp::Polynomial<SampleType> numerator2   ({ static_cast<SampleType> (1.0) });
+        dsp::Polynomial<SampleType> denominator2 ({ static_cast<SampleType> (1.0) });
+
+        dsp::Polynomial<SampleType> temp;
+
+        for (auto n = 0; n < structure.directPath.size(); n++)
+        {
+            auto* coeffs = structure.directPath.getReference (n).getRawCoefficients();
+
+            if (structure.directPath[n].getFilterOrder() == 1)
+            {
+                temp = dsp::Polynomial<SampleType> ({ coeffs[0], coeffs[1] });
+                numerator1 = numerator1.getProductWith (temp);
+
+                temp = dsp::Polynomial<SampleType> ({ static_cast<SampleType> (1.0), coeffs[2] });
+                denominator1 = denominator1.getProductWith (temp);
+            }
+            else
+            {
+                temp = dsp::Polynomial<SampleType> ({ coeffs[0], coeffs[1], coeffs[2] });
+                numerator1 = numerator1.getProductWith (temp);
+
+                temp = dsp::Polynomial<SampleType> ({ static_cast<SampleType> (1.0), coeffs[3], coeffs[4] });
+                denominator1 = denominator1.getProductWith (temp);
+            }
+        }
+
+        for (auto n = 0; n < structure.delayedPath.size(); n++)
+        {
+            auto* coeffs = structure.delayedPath.getReference (n).getRawCoefficients();
+
+            if (structure.delayedPath[n].getFilterOrder() == 1)
+            {
+                temp = dsp::Polynomial<SampleType> ({ coeffs[0], coeffs[1] });
+                numerator2 = numerator2.getProductWith (temp);
+
+                temp = dsp::Polynomial<SampleType> ({ static_cast<SampleType> (1.0), coeffs[2] });
+                denominator2 = denominator2.getProductWith (temp);
+            }
+            else
+            {
+                temp = dsp::Polynomial<SampleType> ({ coeffs[0], coeffs[1], coeffs[2] });
+                numerator2 = numerator2.getProductWith (temp);
+
+                temp = dsp::Polynomial<SampleType> ({ static_cast<SampleType> (1.0), coeffs[3], coeffs[4] });
+                denominator2 = denominator2.getProductWith (temp);
+            }
+        }
+
+        dsp::Polynomial<SampleType> numeratorf1 = numerator1.getProductWith (denominator2);
+        dsp::Polynomial<SampleType> numeratorf2 = numerator2.getProductWith (denominator1);
+        dsp::Polynomial<SampleType> numerator   = numeratorf1.getSumWith (numeratorf2);
+        dsp::Polynomial<SampleType> denominator = denominator1.getProductWith (denominator2);
+
+        dsp::IIR::Coefficients<SampleType> coeffs;
+
+        coeffs.coefficients.clear();
+        auto inversion = static_cast<SampleType> (1.0) / denominator[0];
+
+        for (auto i = 0; i <= numerator.getOrder(); i++)
+            coeffs.coefficients.add (numerator[i] * inversion);
+
+        for (auto i = 1; i <= denominator.getOrder(); i++)
+            coeffs.coefficients.add (denominator[i] * inversion);
+
+        return coeffs;
+    }
+
+    //===============================================================================
+    Array<SampleType> coefficientsUp, coefficientsDown;
+    SampleType latency;
+
+    AudioBuffer<SampleType> v1Up, v1Down;
+    Array<SampleType> delayDown;
+
+    //===============================================================================
+    JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (Oversampling2TimesPolyphaseIIR)
+};
+
+
+//===============================================================================
+template <typename SampleType>
+Oversampling<SampleType>::Oversampling (size_t newNumChannels, size_t newFactor, FilterType newType, bool newMaxQuality)
+{
+    jassert (newFactor >= 0 && newFactor <= 4 && newNumChannels > 0);
+
+    factorOversampling = static_cast<size_t> (1) << newFactor;
+    isMaximumQuality = newMaxQuality;
+    type = newType;
+    numChannels = newNumChannels;
+
+    if (newFactor == 0)
+    {
+        numStages = 1;
+        engines.add (new OversamplingDummy<SampleType> (numChannels));
+    }
+    else if (type == FilterType::filterHalfBandPolyphaseIIR)
+    {
+        numStages = newFactor;
+
+        for (size_t n = 0; n < numStages; n++)
+        {
+            auto twUp = (isMaximumQuality ? 0.10f : 0.12f) * (n == 0 ? 0.5f : 1.f);
+            auto twDown = (isMaximumQuality ? 0.12f : 0.15f) * (n == 0 ? 0.5f : 1.f);
+
+            auto gaindBStartUp = (isMaximumQuality ? -75.f : -65.f);
+            auto gaindBStartDown = (isMaximumQuality ? -70.f : -60.f);
+            auto gaindBFactorUp = (isMaximumQuality ? 10.f : 8.f);
+            auto gaindBFactorDown = (isMaximumQuality ? 10.f : 8.f);
+
+            engines.add (new Oversampling2TimesPolyphaseIIR<SampleType> (numChannels,
+                                                                         twUp, gaindBStartUp + gaindBFactorUp * n,
+                                                                         twDown, gaindBStartDown + gaindBFactorDown * n));
+        }
+    }
+    else if (type == FilterType::filterHalfBandFIREquiripple)
+    {
+        numStages = newFactor;
+
+        for (size_t n = 0; n < numStages; n++)
+        {
+            auto twUp = (isMaximumQuality ? 0.10f : 0.12f) * (n == 0 ? 0.5f : 1.f);
+            auto twDown = (isMaximumQuality ? 0.12f : 0.15f) * (n == 0 ? 0.5f : 1.f);
+
+            auto gaindBStartUp = (isMaximumQuality ? -90.f : -70.f);
+            auto gaindBStartDown = (isMaximumQuality ? -70.f : -60.f);
+            auto gaindBFactorUp = (isMaximumQuality ? 10.f : 8.f);
+            auto gaindBFactorDown = (isMaximumQuality ? 10.f : 8.f);
+
+            engines.add (new Oversampling2TimesEquirippleFIR<SampleType> (numChannels,
+                                                                          twUp, gaindBStartUp + gaindBFactorUp * n,
+                                                                          twDown, gaindBStartDown + gaindBFactorDown * n));
+        }
+    }
+}
+
+template <typename SampleType>
+Oversampling<SampleType>::~Oversampling()
+{
+    engines.clear();
+}
+
+//===============================================================================
+template <typename SampleType>
+SampleType Oversampling<SampleType>::getLatencyInSamples() noexcept
+{
+    auto latency = static_cast<SampleType> (0);
+    size_t order = 1;
+
+    for (size_t n = 0; n < numStages; n++)
+    {
+        auto& engine = *engines[static_cast<int> (n)];
+
+        order *= engine.getFactor();
+        latency += engine.getLatencyInSamples() / static_cast<SampleType> (order);
+    }
+
+    return latency;
+}
+
+template <typename SampleType>
+size_t Oversampling<SampleType>::getOversamplingFactor() noexcept
+{
+    return factorOversampling;
+}
+
+//===============================================================================
+template <typename SampleType>
+void Oversampling<SampleType>::initProcessing (size_t maximumNumberOfSamplesBeforeOversampling)
+{
+    jassert (! engines.isEmpty());
+    auto currentNumSamples = maximumNumberOfSamplesBeforeOversampling;
+
+    for (size_t n = 0; n < numStages; n++)
+    {
+        auto& engine = *engines[static_cast<int> (n)];
+
+        engine.initProcessing (currentNumSamples);
+        currentNumSamples *= engine.getFactor();
+    }
+
+    isReady = true;
+    reset();
+}
+
+template <typename SampleType>
+void Oversampling<SampleType>::reset() noexcept
+{
+    jassert (! engines.isEmpty());
+
+    if (isReady)
+        for (auto n = 0; n < engines.size(); n++)
+            engines[n]->reset();
+}
+
+template <typename SampleType>
+typename dsp::AudioBlock<SampleType> Oversampling<SampleType>::processSamplesUp (const dsp::AudioBlock<SampleType> &inputBlock) noexcept
+{
+    jassert (! engines.isEmpty());
+
+    if (! isReady)
+        return dsp::AudioBlock<SampleType>();
+
+    dsp::AudioBlock<SampleType> audioBlock = inputBlock;
+
+    for (size_t n = 0; n < numStages; n++)
+    {
+        auto& engine = *engines[static_cast<int> (n)];
+        engine.processSamplesUp (audioBlock);
+        audioBlock = engine.getProcessedSamples (audioBlock.getNumSamples() * engine.getFactor());
+    }
+
+    return audioBlock;
+}
+
+template <typename SampleType>
+void Oversampling<SampleType>::processSamplesDown (dsp::AudioBlock<SampleType> &outputBlock) noexcept
+{
+    jassert (! engines.isEmpty());
+
+    if (! isReady)
+        return;
+
+    auto currentNumSamples = outputBlock.getNumSamples();
+
+    for (size_t n = 0; n < numStages - 1; n++)
+        currentNumSamples *= engines[static_cast<int> (n)]->getFactor();
+
+    for (size_t n = numStages - 1; n > 0; n--)
+    {
+        auto& engine = *engines[static_cast<int> (n)];
+
+        auto audioBlock = engines[static_cast<int> (n - 1)]->getProcessedSamples (currentNumSamples);
+        engine.processSamplesDown (audioBlock);
+
+        currentNumSamples /= engine.getFactor();
+    }
+
+    engines[static_cast<int> (0)]->processSamplesDown (outputBlock);
+}
+
+template class Oversampling<float>;
+template class Oversampling<double>;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_Oversampling.h
+++ b/modules/juce_dsp/processors/juce_Oversampling.h
@ -0,0 +1,150 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+#ifndef DOXYGEN
+template <typename NumericType>
+class OversamplingEngine;
+#endif
+
+//===============================================================================
+/**
+    A processing class performing multi-channel oversampling.
+
+    It can be configured to do 2 times, 4 times, 8 times or 16 times oversampling
+    using a multi-stage approach, either polyphase allpass IIR filters or FIR
+    filters for the filtering, and reports successfully the latency added by the
+    filter stages.
+
+    The principle of oversampling is to increase the sample rate of a given
+    non-linear process, to prevent it from creating aliasing. Oversampling works
+    by upsampling N times the input signal, processing the upsampling signal
+    with the increased internal sample rate, and downsample the result to get
+    back the original processing sample rate.
+
+    Choose between FIR or IIR filtering depending on your needs in term of
+    latency and phase distortion. With FIR filters, the phase is linear but the
+    latency is maximum. With IIR filtering, the phase is compromised around the
+    Nyquist frequency but the latency is minimum.
+
+    @see FilterDesign.
+
+    @tags{DSP}
+*/
+template <typename SampleType>
+class JUCE_API  Oversampling
+{
+public:
+    /** The type of filter that can be used for the oversampling processing. */
+    enum FilterType
+    {
+        filterHalfBandFIREquiripple = 0,
+        filterHalfBandPolyphaseIIR,
+        numFilterTypes
+    };
+
+    //===============================================================================
+    /**
+        Constructor of the oversampling class. All the processing parameters must be
+        provided at the creation of the oversampling object.
+
+        Note : you might want to create a class heriting from Oversampling with a
+        different constructor if you need more control on what happens in the process.
+
+        @param numChannels      the number of channels to process with this object
+        @param factor           the processing will perform 2 ^ factor times oversampling
+        @param type             the type of filter design employed for filtering during
+                                oversampling
+        @param isMaxQuality     if the oversampling is done using the maximum quality,
+                                the filters will be more efficient, but the CPU load will
+                                increase as well
+    */
+    Oversampling (size_t numChannels, size_t factor, FilterType type, bool isMaxQuality = true);
+
+    /** Destructor. */
+    ~Oversampling();
+
+    //===============================================================================
+    /** Returns the latency in samples of the whole processing. Use this information
+        in your main processor to compensate the additional latency involved with
+        the oversampling, for example with a dry / wet functionality, and to report
+        the latency to the DAW.
+
+        Note : the latency might not be integer, so you might need to round its value
+        or to compensate it properly in your processing code.
+    */
+    SampleType getLatencyInSamples() noexcept;
+
+    /** Returns the current oversampling factor. */
+    size_t getOversamplingFactor() noexcept;
+
+    //===============================================================================
+    /** Must be called before any processing, to set the buffer sizes of the internal
+        buffers of the oversampling processing.
+    */
+    void initProcessing (size_t maximumNumberOfSamplesBeforeOversampling);
+
+    /** Resets the processing pipeline, ready to oversample a new stream of data. */
+    void reset() noexcept;
+
+    /** Must be called to perform the upsampling, prior to any oversampled processing.
+
+        Returns an AudioBlock referencing the oversampled input signal, which must be
+        used to perform the non-linear processing which needs the higher sample rate.
+        Don't forget to set the sample rate of that processing to N times the original
+        sample rate.
+    */
+    dsp::AudioBlock<SampleType> processSamplesUp (const dsp::AudioBlock<SampleType> &inputBlock) noexcept;
+
+    /** Must be called to perform the downsampling, after the upsampling and the
+        non-linear processing. The output signal is probably delayed by the internal
+        latency of the whole oversampling behaviour, so don't forget to take this
+        into account.
+    */
+    void processSamplesDown (dsp::AudioBlock<SampleType> &outputBlock) noexcept;
+
+private:
+    //===============================================================================
+    bool isMaximumQuality;
+    size_t factorOversampling, numStages;
+    FilterType type;
+    size_t numChannels;
+
+    //===============================================================================
+    bool isReady = false;
+
+    OwnedArray<OversamplingEngine<SampleType>> engines;
+
+    //===============================================================================
+    JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (Oversampling)
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_ProcessContext.h
+++ b/modules/juce_dsp/processors/juce_ProcessContext.h
@ -0,0 +1,170 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    This structure is passed into a DSP algorithm's prepare() method, and contains
+    information about various aspects of the context in which it can expect to be called.
+
+    @tags{DSP}
+*/
+struct ProcessSpec
+{
+    /** The sample rate that will be used for the data that is sent to the processor. */
+    double sampleRate;
+
+    /** The maximum number of samples that will be in the blocks sent to process() method. */
+    uint32 maximumBlockSize;
+
+    /** The number of channels that the process() method will be expected to handle. */
+    uint32 numChannels;
+};
+
+//==============================================================================
+/**
+    This is a handy base class for the state of a processor (such as parameter values)
+    which is typically shared among several procoessors. This is useful to for
+    multi-mono filters which share the same state among several mono processors.
+
+    @tags{DSP}
+*/
+struct ProcessorState  : public ReferenceCountedObject
+{
+    /** The ProcessorState structure is ref-counted, so this is a handy type that can be used
+        as a pointer to one.
+    */
+    using Ptr = ReferenceCountedObjectPtr<ProcessorState>;
+};
+
+//==============================================================================
+/**
+    Contains context information that is passed into an algorithm's process method.
+
+    This context is intended for use in situations where a single block is being used
+    for both the input and output, so it will return the same object for both its
+    getInputBlock() and getOutputBlock() methods.
+
+    @see ProcessContextNonReplacing
+
+    @tags{DSP}
+*/
+template <typename ContextSampleType>
+struct ProcessContextReplacing
+{
+public:
+    /** The type of a single sample (which may be a vector if multichannel). */
+    using SampleType     = ContextSampleType;
+    /** The type of audio block that this context handles. */
+    using AudioBlockType = AudioBlock<SampleType>;
+
+    /** Creates a ProcessContextReplacing that uses the given audio block.
+        Note that the caller must not delete the block while it is still in use by this object!
+    */
+    ProcessContextReplacing (AudioBlockType& block) noexcept : ioBlock (block) {}
+
+    ProcessContextReplacing (const ProcessContextReplacing&) = default;
+    ProcessContextReplacing (ProcessContextReplacing&&) = default;
+
+    /** Returns the audio block to use as the input to a process function. */
+    const AudioBlockType& getInputBlock() const noexcept        { return ioBlock; }
+
+    /** Returns the audio block to use as the output to a process function. */
+    AudioBlockType& getOutputBlock() const noexcept             { return const_cast<AudioBlockType&> (ioBlock); }
+
+    /** All process context classes will define this constant method so that templated
+        code can determine whether the input and output blocks refer to the same buffer,
+        or to two different ones.
+    */
+    static constexpr bool usesSeparateInputAndOutputBlocks()    { return false; }
+
+    /** If set to true, then a processor's process() method is expected to do whatever
+        is appropriate for it to be in a bypassed state.
+    */
+    bool isBypassed = false;
+
+private:
+    AudioBlockType& ioBlock;
+};
+
+//==============================================================================
+/**
+    Contains context information that is passed into an algorithm's process method.
+
+    This context is intended for use in situations where two different blocks are being
+    used the input and output to the process algorithm, so the processor must read from
+    the block returned by getInputBlock() and write its results to the block returned by
+    getOutputBlock().
+
+    @see ProcessContextReplacing
+
+    @tags{DSP}
+*/
+template <typename ContextSampleType>
+struct ProcessContextNonReplacing
+{
+public:
+    /** The type of a single sample (which may be a vector if multichannel). */
+    using SampleType     = ContextSampleType;
+    /** The type of audio block that this context handles. */
+    using AudioBlockType = AudioBlock<SampleType>;
+
+    /** Creates a ProcessContextReplacing that uses the given input and output blocks.
+        Note that the caller must not delete these blocks while they are still in use by this object!
+    */
+    ProcessContextNonReplacing (const AudioBlockType& input, AudioBlockType& output) noexcept
+        : inputBlock (input), outputBlock (output) {}
+
+    ProcessContextNonReplacing (const ProcessContextNonReplacing&) = default;
+    ProcessContextNonReplacing (ProcessContextNonReplacing&&) = default;
+
+    /** Returns the audio block to use as the input to a process function. */
+    const AudioBlockType& getInputBlock() const noexcept        { return inputBlock; }
+
+    /** Returns the audio block to use as the output to a process function. */
+    AudioBlockType& getOutputBlock() const noexcept             { return const_cast<AudioBlockType&> (outputBlock); }
+
+    /** All process context classes will define this constant method so that templated
+        code can determine whether the input and output blocks refer to the same buffer,
+        or to two different ones.
+    */
+    static constexpr bool usesSeparateInputAndOutputBlocks()    { return true; }
+
+    /** If set to true, then a processor's process() method is expected to do whatever
+        is appropriate for it to be in a bypassed state.
+    */
+    bool isBypassed = false;
+
+private:
+    const AudioBlockType& inputBlock;
+    AudioBlockType& outputBlock;
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_ProcessorChain.h
+++ b/modules/juce_dsp/processors/juce_ProcessorChain.h
@ -0,0 +1,128 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+#ifndef DOXYGEN
+namespace ProcessorHelpers  // Internal helper classes used in building the ProcessorChain
+{
+    template <int arg>
+    struct AccessHelper
+    {
+        template <typename ProcessorType>
+        static auto& get (ProcessorType& a) noexcept                 { return AccessHelper<arg - 1>::get (a.processors); }
+
+        template <typename ProcessorType>
+        static void setBypassed (ProcessorType& a, bool bypassed)    { AccessHelper<arg - 1>::setBypassed (a.processors, bypassed); }
+    };
+
+    template <>
+    struct AccessHelper<0>
+    {
+        template <typename ProcessorType>
+        static auto& get (ProcessorType& a) noexcept                 { return a.getProcessor(); }
+
+        template <typename ProcessorType>
+        static void setBypassed (ProcessorType& a, bool bypassed)    { a.isBypassed = bypassed; }
+    };
+
+    //==============================================================================
+    template <bool isFirst, typename Processor, typename Subclass>
+    struct ChainElement
+    {
+        void prepare (const ProcessSpec& spec)
+        {
+            processor.prepare (spec);
+        }
+
+        template <typename ProcessContext>
+        void process (const ProcessContext& context) noexcept
+        {
+            if (context.usesSeparateInputAndOutputBlocks() && ! isFirst)
+            {
+                jassert (context.getOutputBlock().getNumChannels() == context.getInputBlock().getNumChannels());
+                ProcessContextReplacing<typename ProcessContext::SampleType> replacingContext (context.getOutputBlock());
+                replacingContext.isBypassed = (isBypassed || context.isBypassed);
+
+                processor.process (replacingContext);
+            }
+            else
+            {
+                ProcessContext contextCopy (context);
+                contextCopy.isBypassed = (isBypassed || context.isBypassed);
+
+                processor.process (contextCopy);
+            }
+        }
+
+        void reset()
+        {
+            processor.reset();
+        }
+
+        bool isBypassed = false;
+        Processor processor;
+
+        Processor& getProcessor() noexcept       { return processor; }
+        Subclass& getThis() noexcept             { return *static_cast<Subclass*> (this); }
+
+        template <int arg> auto& get() noexcept                      { return AccessHelper<arg>::get (getThis()); }
+        template <int arg> void setBypassed (bool bypassed) noexcept { AccessHelper<arg>::setBypassed (getThis(), bypassed); }
+    };
+
+    //==============================================================================
+    template <bool isFirst, typename FirstProcessor, typename... SubsequentProcessors>
+    struct ChainBase  : public ChainElement<isFirst, FirstProcessor, ChainBase<isFirst, FirstProcessor, SubsequentProcessors...>>
+    {
+        using Base = ChainElement<isFirst, FirstProcessor, ChainBase<isFirst, FirstProcessor, SubsequentProcessors...>>;
+
+        template <typename ProcessContext>
+        void process (const ProcessContext& context) noexcept  { Base::process (context); processors.process (context); }
+        void prepare (const ProcessSpec& spec)                 { Base::prepare (spec); processors.prepare (spec); }
+        void reset()                                           { Base::reset(); processors.reset(); }
+
+        ChainBase<false, SubsequentProcessors...> processors;
+    };
+
+    template <bool isFirst, typename ProcessorType>
+    struct ChainBase<isFirst, ProcessorType>  : public ChainElement<isFirst, ProcessorType, ChainBase<isFirst, ProcessorType>> {};
+}
+#endif
+
+
+//==============================================================================
+/**
+    This variadically-templated class lets you join together any number of processor
+    classes into a single processor which will call process() on them all in sequence.
+*/
+template <typename... Processors>
+using ProcessorChain = ProcessorHelpers::ChainBase<true, Processors...>;
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_ProcessorDuplicator.h
+++ b/modules/juce_dsp/processors/juce_ProcessorDuplicator.h
@ -0,0 +1,99 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Converts a mono processor class into a multi-channel version by duplicating it
+    and applying multichannel buffers across an array of instances.
+
+    When the prepare method is called, it uses the specified number of channels to
+    instantiate the appropriate number of instances, which it then uses in its
+    process() method.
+
+    @tags{DSP}
+*/
+template <typename MonoProcessorType, typename StateType>
+struct ProcessorDuplicator
+{
+    ProcessorDuplicator() : state (new StateType()) {}
+    ProcessorDuplicator (StateType* stateToUse) : state (stateToUse) {}
+    ProcessorDuplicator (const ProcessorDuplicator&) = default;
+    ProcessorDuplicator (ProcessorDuplicator&&) = default;
+
+    void prepare (const ProcessSpec& spec)
+    {
+        processors.removeRange ((int) spec.numChannels, processors.size());
+
+        while (static_cast<size_t> (processors.size()) < spec.numChannels)
+            processors.add (new MonoProcessorType (state));
+
+        auto monoSpec = spec;
+        monoSpec.numChannels = 1;
+
+        for (auto* p : processors)
+            p->prepare (monoSpec);
+    }
+
+    void reset() noexcept      { for (auto* p : processors) p->reset(); }
+
+    template<typename ProcessContext>
+    void process (const ProcessContext& context) noexcept
+    {
+        jassert ((int) context.getInputBlock().getNumChannels()  <= processors.size());
+        jassert ((int) context.getOutputBlock().getNumChannels() <= processors.size());
+
+        auto numChannels = static_cast<size_t> (jmin (context.getInputBlock().getNumChannels(),
+                                                      context.getOutputBlock().getNumChannels()));
+
+        for (size_t chan = 0; chan < numChannels; ++chan)
+            processors[(int) chan]->process (MonoProcessContext<ProcessContext> (context, chan));
+    }
+
+    typename StateType::Ptr state;
+
+private:
+    template <typename ProcessContext>
+    struct MonoProcessContext : public ProcessContext
+    {
+        MonoProcessContext (const ProcessContext& multiChannelContext, size_t channelToUse)
+            : ProcessContext (multiChannelContext), channel (channelToUse)
+        {}
+
+        size_t channel;
+
+        typename ProcessContext::AudioBlockType getInputBlock()  const noexcept       { return ProcessContext::getInputBlock().getSingleChannelBlock (channel); }
+        typename ProcessContext::AudioBlockType getOutputBlock() const noexcept       { return ProcessContext::getOutputBlock().getSingleChannelBlock (channel); }
+    };
+
+    juce::OwnedArray<MonoProcessorType> processors;
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_ProcessorWrapper.h
+++ b/modules/juce_dsp/processors/juce_ProcessorWrapper.h
@ -0,0 +1,82 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Acts as a polymorphic base class for processors.
+    This exposes the same set of methods that a processor must implement as virtual
+    methods, so that you can use the ProcessorWrapper class to wrap an instance of
+    a subclass, and then pass that around using ProcessorBase as a base class.
+    @see ProcessorWrapper
+
+    @tags{DSP}
+*/
+struct ProcessorBase
+{
+    ProcessorBase() = default;
+    virtual ~ProcessorBase() = default;
+
+    virtual void prepare (const ProcessSpec&)  = 0;
+    virtual void process (const ProcessContextReplacing<float>&) = 0;
+    virtual void reset() = 0;
+};
+
+
+//==============================================================================
+/**
+    Wraps an instance of a given processor class, and exposes it through the
+    ProcessorBase interface.
+    @see ProcessorBase
+
+    @tags{DSP}
+*/
+template <typename ProcessorType>
+struct ProcessorWrapper  : public ProcessorBase
+{
+    void prepare (const ProcessSpec& spec) override
+    {
+        processor.prepare (spec);
+    }
+
+    void process (const ProcessContextReplacing<float>& context) override
+    {
+        processor.process (context);
+    }
+
+    void reset() override
+    {
+        processor.reset();
+    }
+
+    ProcessorType processor;
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_Reverb.h
+++ b/modules/juce_dsp/processors/juce_Reverb.h
@ -0,0 +1,117 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Processor wrapper around juce::Reverb for easy integration into ProcessorChain.
+
+    @tags{DSP}
+*/
+class Reverb
+{
+public:
+    //==============================================================================
+    /** Creates an uninitialised Reverb processor. Call prepare() before first use. */
+    Reverb()
+    {}
+
+    //==============================================================================
+    using Parameters = juce::Reverb::Parameters;
+
+    /** Returns the reverb's current parameters. */
+    const Parameters& getParameters() const noexcept    { return reverb.getParameters(); }
+
+    /** Applies a new set of parameters to the reverb.
+        Note that this doesn't attempt to lock the reverb, so if you call this in parallel with
+        the process method, you may get artifacts.
+    */
+    void setParameters (const Parameters& newParams)    { reverb.setParameters (newParams); }
+
+    /** Returns true if the reverb is enabled. */
+    bool isEnabled() const noexcept                     { return enabled; }
+
+    /** Enables/disables the reverb. */
+    void setEnabled (bool newValue) noexcept            { enabled = newValue; }
+
+    //==============================================================================
+    /** Initialises the reverb. */
+    void prepare (const juce::dsp::ProcessSpec& spec)
+    {
+        reverb.setSampleRate (spec.sampleRate);
+    }
+
+    /** Resets the reverb's internal state. */
+    void reset() noexcept
+    {
+        reverb.reset();
+    }
+
+    //==============================================================================
+    /** Applies the reverb to a mono or stereo buffer. */
+    template <typename ProcessContext>
+    void process (const ProcessContext& context) noexcept
+    {
+        const auto& inputBlock = context.getInputBlock();
+        auto& outputBlock = context.getOutputBlock();
+        const auto numInChannels = inputBlock.getNumChannels();
+        const auto numOutChannels = outputBlock.getNumChannels();
+        const auto numSamples = outputBlock.getNumSamples();
+
+        jassert (inputBlock.getNumSamples() == numSamples);
+
+        outputBlock.copy (inputBlock);
+
+        if (! enabled || context.isBypassed)
+            return;
+
+        if (numInChannels == 1 && numOutChannels == 1)
+        {
+            reverb.processMono (outputBlock.getChannelPointer (0), (int) numSamples);
+        }
+        else if (numInChannels == 2 && numOutChannels == 2)
+        {
+            reverb.processStereo (outputBlock.getChannelPointer (0),
+                                  outputBlock.getChannelPointer (1),
+                                  (int) numSamples);
+        }
+        else
+        {
+            jassertfalse;   // invalid channel configuration
+        }
+    }
+
+private:
+    //==============================================================================
+    juce::Reverb reverb;
+    bool enabled = true;
+};
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_StateVariableFilter.h
+++ b/modules/juce_dsp/processors/juce_StateVariableFilter.h
@ -0,0 +1,238 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Classes for state variable filter processing.
+*/
+namespace StateVariableFilter
+{
+    template <typename NumericType>
+    struct Parameters;
+
+    /**
+        An IIR filter that can perform low, band and high-pass filtering on an audio
+        signal, with 12 dB of attenuation / octave, using a TPT structure, designed
+        for fast modulation (see Vadim Zavalishin's documentation about TPT
+        structures for more information). Its behaviour is based on the analog
+        state variable filter circuit.
+
+        Note : the bandpass here is not the one in the RBJ CookBook, its gain can be
+        higher than 0 dB. For the classic 0 dB bandpass, we need to multiply the
+        result with R2
+
+        @tags{DSP}
+    */
+    template <typename SampleType>
+    class Filter
+    {
+    public:
+        //==============================================================================
+        /** The NumericType is the underlying primitive type used by the SampleType (which
+            could be either a primitive or vector)
+        */
+        using NumericType = typename SampleTypeHelpers::ElementType<SampleType>::Type;
+
+        //==============================================================================
+        /** Creates a filter with default parameters. */
+        Filter()                            : parameters (new Parameters<NumericType>) { reset(); }
+
+        Filter (Parameters<NumericType>* paramtersToUse) : parameters (paramtersToUse) { reset(); }
+
+        /** Creates a copy of another filter. */
+        Filter (const Filter&) = default;
+
+        /** Move constructor */
+        Filter (Filter&&) = default;
+
+        //==============================================================================
+        /** Initialization of the filter */
+        void prepare (const ProcessSpec&) noexcept     { reset(); }
+
+        /** Resets the filter's processing pipeline. */
+        void reset() noexcept                          { s1 = s2 = SampleType {0}; }
+
+        /** Ensure that the state variables are rounded to zero if the state
+            variables are denormals. This is only needed if you are doing
+            sample by sample processing.
+        */
+        void snapToZero() noexcept                     { util::snapToZero (s1); util::snapToZero (s2); }
+
+        //==============================================================================
+        /** The parameters of the state variable filter. It's up to the called to ensure
+            that these parameters are modified in a thread-safe way. */
+        typename Parameters<NumericType>::Ptr parameters;
+
+        //==============================================================================
+        template <typename ProcessContext>
+        void process (const ProcessContext& context) noexcept
+        {
+            static_assert (std::is_same<typename ProcessContext::SampleType, SampleType>::value,
+                           "The sample-type of the filter must match the sample-type supplied to this process callback");
+
+            if (context.isBypassed)
+                processInternal<true, ProcessContext> (context);
+            else
+                processInternal<false, ProcessContext> (context);
+        }
+
+        /** Processes a single sample, without any locking or checking.
+            Use this if you need processing of a single value. */
+        SampleType JUCE_VECTOR_CALLTYPE processSample (SampleType sample) noexcept
+        {
+            switch (parameters->type)
+            {
+                case Parameters<NumericType>::Type::lowPass:  return processLoop<false, Parameters<NumericType>::Type::lowPass>  (sample, *parameters); break;
+                case Parameters<NumericType>::Type::bandPass: return processLoop<false, Parameters<NumericType>::Type::bandPass> (sample, *parameters); break;
+                case Parameters<NumericType>::Type::highPass: return processLoop<false, Parameters<NumericType>::Type::highPass> (sample, *parameters); break;
+                default: jassertfalse;
+            }
+
+            return SampleType{0};
+        }
+
+    private:
+        //==============================================================================
+        template <bool isBypassed, typename Parameters<NumericType>::Type type>
+        SampleType JUCE_VECTOR_CALLTYPE processLoop (SampleType sample, Parameters<NumericType>& state) noexcept
+        {
+            y[2] = (sample - s1 * state.R2 - s1 * state.g - s2) * state.h;
+
+            y[1] = y[2] * state.g + s1;
+            s1   = y[2] * state.g + y[1];
+
+            y[0] = y[1] * state.g + s2;
+            s2   = y[1] * state.g + y[0];
+
+            return isBypassed ? sample : y[static_cast<size_t> (type)];
+        }
+
+        template <bool isBypassed, typename Parameters<NumericType>::Type type>
+        void processBlock (const SampleType* input, SampleType* output, size_t n) noexcept
+        {
+            auto state = *parameters;
+
+            for (size_t i = 0 ; i < n; ++i)
+                output[i] = processLoop<isBypassed, type> (input[i], state);
+
+            snapToZero();
+            *parameters = state;
+        }
+
+        template <bool isBypassed, typename ProcessContext>
+        void processInternal (const ProcessContext& context) noexcept
+        {
+            auto&& inputBlock  = context.getInputBlock();
+            auto&& outputBlock = context.getOutputBlock();
+
+            // This class can only process mono signals. Use the ProcessorDuplicator class
+            // to apply this filter on a multi-channel audio stream.
+            jassert (inputBlock.getNumChannels()  == 1);
+            jassert (outputBlock.getNumChannels() == 1);
+
+            auto n = inputBlock.getNumSamples();
+            auto* src = inputBlock .getChannelPointer (0);
+            auto* dst = outputBlock.getChannelPointer (0);
+
+            switch (parameters->type)
+            {
+                case Parameters<NumericType>::Type::lowPass:  processBlock<isBypassed, Parameters<NumericType>::Type::lowPass>  (src, dst, n); break;
+                case Parameters<NumericType>::Type::bandPass: processBlock<isBypassed, Parameters<NumericType>::Type::bandPass> (src, dst, n); break;
+                case Parameters<NumericType>::Type::highPass: processBlock<isBypassed, Parameters<NumericType>::Type::highPass> (src, dst, n); break;
+                default: jassertfalse;
+            }
+        }
+
+        //==============================================================================
+        std::array<SampleType, 3> y;
+        SampleType s1, s2;
+
+        //==============================================================================
+        JUCE_LEAK_DETECTOR (Filter)
+    };
+
+    //==============================================================================
+    /**
+        Structure used for the state variable filter parameters.
+
+        @tags{DSP}
+    */
+    template <typename NumericType>
+    struct Parameters  : public ProcessorState
+    {
+        //==============================================================================
+        enum class Type
+        {
+            lowPass,
+            bandPass,
+            highPass
+        };
+
+        //==============================================================================
+        /** The type of the IIR filter */
+        Type type = Type::lowPass;
+
+        /** Sets the cutoff frequency and resonance of the IIR filter.
+            Note : the bandwidth of the resonance increases with the value of the
+            parameter. To have a standard 12 dB/octave filter, the value must be set
+            at 1 / sqrt(2).
+        */
+        void setCutOffFrequency (double sampleRate, NumericType frequency,
+                                 NumericType resonance = static_cast<NumericType> (1.0 / MathConstants<double>::sqrt2)) noexcept
+        {
+            jassert (sampleRate > 0);
+            jassert (resonance > NumericType (0));
+            jassert (frequency > NumericType (0) && frequency <= NumericType (sampleRate * 0.5));
+
+            g  = static_cast<NumericType> (std::tan (MathConstants<double>::pi * frequency / sampleRate));
+            R2 = static_cast<NumericType> (1.0 / resonance);
+            h  = static_cast<NumericType> (1.0 / (1.0 + R2 * g + g * g));
+        }
+
+        //==============================================================================
+        /** The Coefficients structure is ref-counted, so this is a handy type that can be used
+            as a pointer to one.
+        */
+        using Ptr = ReferenceCountedObjectPtr<Parameters>;
+
+        //==============================================================================
+        Parameters() = default;
+        Parameters (const Parameters& o) : g (o.g), R2 (o.R2), h (o.h) {}
+        Parameters& operator= (const Parameters& o) noexcept    { g = o.g; R2 = o.R2; h = o.h; return *this; }
+
+        //==============================================================================
+        NumericType g   = static_cast<NumericType> (std::tan (MathConstants<double>::pi * 200.0 / 44100.0));
+        NumericType R2  = static_cast<NumericType> (MathConstants<double>::sqrt2);
+        NumericType h   = static_cast<NumericType> (1.0 / (1.0 + R2 * g + g * g));
+    };
+}
+
+} // namespace dsp
+} // namespace juce
--- a/modules/juce_dsp/processors/juce_WaveShaper.h
+++ b/modules/juce_dsp/processors/juce_WaveShaper.h
@ -0,0 +1,86 @@
+/*
+  ==============================================================================
+
+   This file is part of the JUCE library.
+   Copyright (c) 2017 - ROLI Ltd.
+
+   JUCE is an open source library subject to commercial or open-source
+   licensing.
+
+   By using JUCE, you agree to the terms of both the JUCE 5 End-User License
+   Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
+   27th April 2017).
+
+   End User License Agreement: www.juce.com/juce-5-licence
+   Privacy Policy: www.juce.com/juce-5-privacy-policy
+
+   Or: You may also use this code under the terms of the GPL v3 (see
+   www.gnu.org/licenses).
+
+   JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
+   EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
+   DISCLAIMED.
+
+  ==============================================================================
+*/
+
+namespace juce
+{
+namespace dsp
+{
+
+/**
+    Applies waveshaping to audio samples as single samples or AudioBlocks.
+
+    @tags{DSP}
+*/
+template <typename FloatType, typename Function = FloatType (*) (FloatType)>
+struct WaveShaper
+{
+    Function functionToUse;
+
+    //==============================================================================
+    /** Called before processing starts. */
+    void prepare (const ProcessSpec&) noexcept   {}
+
+    //==============================================================================
+    /** Returns the result of processing a single sample. */
+    template <typename SampleType>
+    SampleType JUCE_VECTOR_CALLTYPE processSample (SampleType inputSample) const noexcept
+    {
+        return functionToUse (inputSample);
+    }
+
+    /** Processes the input and output buffers supplied in the processing context. */
+    template <typename ProcessContext>
+    void process (const ProcessContext& context) const noexcept
+    {
+        if (context.isBypassed)
+        {
+            if (context.usesSeparateInputAndOutputBlocks())
+                context.getOutputBlock().copy (context.getInputBlock());
+        }
+        else
+        {
+            AudioBlock<FloatType>::process (context.getInputBlock(),
+                                            context.getOutputBlock(),
+                                            functionToUse);
+        }
+    }
+
+    void reset() noexcept {}
+};
+
+//==============================================================================
+// Although clang supports C++17, their standard library still has no invoke_result
+// support. Remove the "|| JUCE_CLANG" once clang supports this properly!
+#if (! JUCE_CXX17_IS_AVAILABLE) || JUCE_CLANG
+template <typename Functor>
+static WaveShaper<typename std::result_of<Functor>, Functor> CreateWaveShaper (Functor functionToUse)   { return {functionToUse}; }
+#else
+template <typename Functor>
+static WaveShaper<typename std::invoke_result<Functor>, Functor> CreateWaveShaper (Functor functionToUse)   { return {functionToUse}; }
+#endif
+
+} // namespace dsp
+} // namespace juce