You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
222 lines
8.3 KiB
222 lines
8.3 KiB
/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com )
|
|
|
|
Based on original fortran 77 code from FFTPACKv4 from NETLIB,
|
|
authored by Dr Paul Swarztrauber of NCAR, in 1985.
|
|
|
|
As confirmed by the NCAR fftpack software curators, the following
|
|
FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
|
|
released under the same terms.
|
|
|
|
FFTPACK license:
|
|
|
|
http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
|
|
|
|
Copyright (c) 2004 the University Corporation for Atmospheric
|
|
Research ("UCAR"). All rights reserved. Developed by NCAR's
|
|
Computational and Information Systems Laboratory, UCAR,
|
|
www.cisl.ucar.edu.
|
|
|
|
Redistribution and use of the Software in source and binary forms,
|
|
with or without modification, is permitted provided that the
|
|
following conditions are met:
|
|
|
|
- Neither the names of NCAR's Computational and Information Systems
|
|
Laboratory, the University Corporation for Atmospheric Research,
|
|
nor the names of its sponsors or contributors may be used to
|
|
endorse or promote products derived from this Software without
|
|
specific prior written permission.
|
|
|
|
- Redistributions of source code must retain the above copyright
|
|
notices, this list of conditions, and the disclaimer below.
|
|
|
|
- Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions, and the disclaimer below in the
|
|
documentation and/or other materials provided with the
|
|
distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
|
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
|
|
HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
|
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
|
SOFTWARE.
|
|
*/
|
|
/*
|
|
NOTE: This file is adapted from Julien Pommier's original PFFFT,
|
|
which works on 32 bit floating point precision using SSE instructions,
|
|
to work with 64 bit floating point precision using AVX instructions.
|
|
Author: Dario Mambro @ https://github.com/unevens/pffft
|
|
*/
|
|
/*
|
|
PFFFT : a Pretty Fast FFT.
|
|
|
|
This is basically an adaptation of the single precision fftpack
|
|
(v4) as found on netlib taking advantage of SIMD instruction found
|
|
on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
|
|
|
|
For architectures where no SIMD instruction is available, the code
|
|
falls back to a scalar version.
|
|
|
|
Restrictions:
|
|
|
|
- 1D transforms only, with 64-bit double precision.
|
|
|
|
- supports only transforms for inputs of length N of the form
|
|
N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128,
|
|
144, 160, etc are all acceptable lengths). Performance is best for
|
|
128<=N<=8192.
|
|
|
|
- all (double*) pointers in the functions below are expected to
|
|
have an "simd-compatible" alignment, that is 32 bytes on x86 and
|
|
powerpc CPUs.
|
|
|
|
You can allocate such buffers with the functions
|
|
pffft_aligned_malloc / pffft_aligned_free (or with stuff like
|
|
posix_memalign..)
|
|
|
|
*/
|
|
|
|
#ifndef PFFFT_DOUBLE_H
|
|
#define PFFFT_DOUBLE_H
|
|
|
|
#include <stddef.h> /* for size_t */
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/* opaque struct holding internal stuff (precomputed twiddle factors)
|
|
this struct can be shared by many threads as it contains only
|
|
read-only data.
|
|
*/
|
|
typedef struct PFFFTD_Setup PFFFTD_Setup;
|
|
|
|
#ifndef PFFFT_COMMON_ENUMS
|
|
#define PFFFT_COMMON_ENUMS
|
|
|
|
/* direction of the transform */
|
|
typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
|
|
|
|
/* type of transform */
|
|
typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
|
|
|
|
#endif
|
|
|
|
/*
|
|
prepare for performing transforms of size N -- the returned
|
|
PFFFTD_Setup structure is read-only so it can safely be shared by
|
|
multiple concurrent threads.
|
|
*/
|
|
PFFFTD_Setup *pffftd_new_setup(int N, pffft_transform_t transform);
|
|
void pffftd_destroy_setup(PFFFTD_Setup *);
|
|
/*
|
|
Perform a Fourier transform , The z-domain data is stored in the
|
|
most efficient order for transforming it back, or using it for
|
|
convolution. If you need to have its content sorted in the
|
|
"usual" way, that is as an array of interleaved complex numbers,
|
|
either use pffft_transform_ordered , or call pffft_zreorder after
|
|
the forward fft, and before the backward fft.
|
|
|
|
Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
|
|
Typically you will want to scale the backward transform by 1/N.
|
|
|
|
The 'work' pointer should point to an area of N (2*N for complex
|
|
fft) doubles, properly aligned. If 'work' is NULL, then stack will
|
|
be used instead (this is probably the best strategy for small
|
|
FFTs, say for N < 16384). Threads usually have a small stack, that
|
|
there's no sufficient amount of memory, usually leading to a crash!
|
|
Use the heap with pffft_aligned_malloc() in this case.
|
|
|
|
input and output may alias.
|
|
*/
|
|
void pffftd_transform(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffft_direction_t direction);
|
|
|
|
/*
|
|
Similar to pffft_transform, but makes sure that the output is
|
|
ordered as expected (interleaved complex numbers). This is
|
|
similar to calling pffft_transform and then pffft_zreorder.
|
|
|
|
input and output may alias.
|
|
*/
|
|
void pffftd_transform_ordered(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffft_direction_t direction);
|
|
|
|
/*
|
|
call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
|
|
PFFFT_FORWARD) if you want to have the frequency components in
|
|
the correct "canonical" order, as interleaved complex numbers.
|
|
|
|
(for real transforms, both 0-frequency and half frequency
|
|
components, which are real, are assembled in the first entry as
|
|
F(0)+i*F(n/2+1). Note that the original fftpack did place
|
|
F(n/2+1) at the end of the arrays).
|
|
|
|
input and output should not alias.
|
|
*/
|
|
void pffftd_zreorder(PFFFTD_Setup *setup, const double *input, double *output, pffft_direction_t direction);
|
|
|
|
/*
|
|
Perform a multiplication of the frequency components of dft_a and
|
|
dft_b and accumulate them into dft_ab. The arrays should have
|
|
been obtained with pffft_transform(.., PFFFT_FORWARD) and should
|
|
*not* have been reordered with pffft_zreorder (otherwise just
|
|
perform the operation yourself as the dft coefs are stored as
|
|
interleaved complex numbers).
|
|
|
|
the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
|
|
|
|
The dft_a, dft_b and dft_ab pointers may alias.
|
|
*/
|
|
void pffftd_zconvolve_accumulate(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double *dft_ab, double scaling);
|
|
|
|
/*
|
|
Perform a multiplication of the frequency components of dft_a and
|
|
dft_b and put result in dft_ab. The arrays should have
|
|
been obtained with pffft_transform(.., PFFFT_FORWARD) and should
|
|
*not* have been reordered with pffft_zreorder (otherwise just
|
|
perform the operation yourself as the dft coefs are stored as
|
|
interleaved complex numbers).
|
|
|
|
the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
|
|
|
|
The dft_a, dft_b and dft_ab pointers may alias.
|
|
*/
|
|
void pffftd_zconvolve_no_accu(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double*dft_ab, double scaling);
|
|
|
|
/* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */
|
|
int pffftd_simd_size();
|
|
|
|
/* return string identifier of used architecture (AVX/..) */
|
|
const char * pffftd_simd_arch();
|
|
|
|
|
|
/* following functions are identical to the pffft_ functions */
|
|
|
|
/* simple helper to get minimum possible fft size */
|
|
int pffftd_min_fft_size(pffft_transform_t transform);
|
|
|
|
/* simple helper to determine next power of 2
|
|
- without inexact/rounding floating point operations
|
|
*/
|
|
int pffftd_next_power_of_two(int N);
|
|
|
|
/* simple helper to determine if power of 2 - returns bool */
|
|
int pffftd_is_power_of_two(int N);
|
|
|
|
/*
|
|
the double buffers must have the correct alignment (32-byte boundary
|
|
on intel and powerpc). This function may be used to obtain such
|
|
correctly aligned buffers.
|
|
*/
|
|
void *pffftd_aligned_malloc(size_t nb_bytes);
|
|
void pffftd_aligned_free(void *);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* PFFFT_DOUBLE_H */
|
|
|