fast_convert.h

00001 /*
00002  * SpanDSP - a series of DSP components for telephony
00003  *
00004  * fast_convert.h - Quick ways to convert floating point numbers to integers
00005  *
00006  * Written by Steve Underwood <steveu@coppice.org>
00007  *
00008  * Copyright (C) 2009 Steve Underwood
00009  *
00010  * All rights reserved.
00011  *
00012  * This program is free software; you can redistribute it and/or modify
00013  * it under the terms of the GNU Lesser General Public License version 2.1,
00014  * as published by the Free Software Foundation.
00015  *
00016  * This program is distributed in the hope that it will be useful,
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  * GNU Lesser General Public License for more details.
00020  *
00021  * You should have received a copy of the GNU Lesser General Public
00022  * License along with this program; if not, write to the Free Software
00023  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00024  */
00025 
00026 #if !defined(_SPANDSP_FAST_CONVERT_H_)
00027 #define _SPANDSP_FAST_CONVERT_H_
00028 
00029 #if defined(__cplusplus)
00030 extern "C"
00031 {
00032 #endif
00033 
00034 /* The following code, to handle issues with lrint() and lrintf() on various
00035  * platforms, is adapted from similar code in libsndfile, which is:
00036  *
00037  * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com>
00038  *
00039  * This program is free software; you can redistribute it and/or modify
00040  * it under the terms of the GNU Lesser General Public License as published by
00041  * the Free Software Foundation; either version 2.1 of the License, or
00042  * (at your option) any later version.
00043  *
00044  * This program is distributed in the hope that it will be useful,
00045  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00046  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00047  * GNU Lesser General Public License for more details.
00048  */
00049 
00050 /*
00051  *    On Intel Pentium processors (especially PIII and probably P4), converting
00052  *    from float to int is very slow. To meet the C specs, the code produced by
00053  *    most C compilers targeting Pentium needs to change the FPU rounding mode
00054  *    before the float to int conversion is performed.
00055  *
00056  *    Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
00057  *    is this flushing of the pipeline which is so slow.
00058  *
00059  *    Fortunately the ISO C99 specification defines the functions lrint, lrintf,
00060  *    llrint and llrintf which fix this problem as a side effect.
00061  *
00062  *    On Unix-like systems, the configure process should have detected the
00063  *    presence of these functions. If they weren't found we have to replace them
00064  *    here with a standard C cast.
00065  */
00066 
00067 /*
00068  *    The C99 prototypes for these functions are as follows:
00069  *
00070  *        int rintf(float x);
00071  *        int rint(double x);
00072  *        long int lrintf(float x);
00073  *        long int lrint(double x);
00074  *        long long int llrintf(float x);
00075  *        long long int llrint(double x);
00076  *
00077  *    The presence of the required functions are detected during the configure
00078  *    process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
00079  *    the config file.
00080  */
00081 
00082 #if defined(__CYGWIN__)
00083 #if !defined(__cplusplus)  &&  (__GNUC__ < 4)
00084     /*
00085      *    CYGWIN versions prior to 1.7.1 have lrint and lrintf functions, but
00086      *    they are slow and buggy:
00087      *        http://sourceware.org/ml/cygwin/2005-06/msg00153.html
00088      *        http://sourceware.org/ml/cygwin/2005-09/msg00047.html
00089      *    These replacement functions (pulled from the Public Domain MinGW
00090      *    math.h header) replace the native versions.
00091      */
00092     static __inline__ long int lrint(double x)
00093     {
00094         long int retval;
00095 
00096         __asm__ __volatile__
00097         (
00098             "fistpl %0"
00099             : "=m" (retval)
00100             : "t" (x)
00101             : "st"
00102         );
00103 
00104         return retval;
00105     }
00106 
00107     static __inline__ long int lrintf(float x)
00108     {
00109         long int retval;
00110 
00111         __asm__ __volatile__
00112         (
00113             "fistpl %0"
00114             : "=m" (retval)
00115             : "t" (x)
00116             : "st"
00117         );
00118         return retval;
00119     }
00120 #endif
00121 
00122     /* The fastest way to convert is the equivalent of lrint() */
00123     static __inline__ long int lfastrint(double x)
00124     {
00125         long int retval;
00126 
00127         __asm__ __volatile__
00128         (
00129             "fistpl %0"
00130             : "=m" (retval)
00131             : "t" (x)
00132             : "st"
00133         );
00134 
00135         return retval;
00136     }
00137 
00138     static __inline__ long int lfastrintf(float x)
00139     {
00140         long int retval;
00141 
00142         __asm__ __volatile__
00143         (
00144             "fistpl %0"
00145             : "=m" (retval)
00146             : "t" (x)
00147             : "st"
00148         );
00149         return retval;
00150     }
00151 #elif defined(__GNUC__)  ||  (__SUNPRO_C >= 0x0590)
00152 
00153 #if defined(__i386__)
00154     /* These routines are guaranteed fast on an i386 machine. Using the built in
00155        lrint() and lrintf() should be similar, but they may not always be enabled.
00156        Sometimes, especially with "-O0", you might get slow calls to routines. */
00157     static __inline__ long int lfastrint(double x)
00158     {
00159         long int retval;
00160 
00161         __asm__ __volatile__
00162         (
00163             "fistpl %0"
00164             : "=m" (retval)
00165             : "t" (x)
00166             : "st"
00167         );
00168 
00169         return retval;
00170     }
00171 
00172     static __inline__ long int lfastrintf(float x)
00173     {
00174         long int retval;
00175 
00176         __asm__ __volatile__
00177         (
00178             "fistpl %0"
00179             : "=m" (retval)
00180             : "t" (x)
00181             : "st"
00182         );
00183         return retval;
00184     }
00185 #elif defined(__x86_64__)
00186     /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a
00187        double or float to an int. It looks like the design on the x86_64 took account
00188        of the default behaviour specified for C. */
00189     static __inline__ long int lfastrint(double x)
00190     {
00191         return (long int) (x);
00192     }
00193 
00194     static __inline__ long int lfastrintf(float x)
00195     {
00196         return (long int) (x);
00197     }
00198 #elif defined(__ppc__)  ||   defined(__powerpc__)
00199     static __inline__ long int lfastrint(register double x)
00200     {
00201         int res[2];
00202 
00203         __asm__ __volatile__
00204         (
00205             "fctiw %1, %1\n\t"
00206             "stfd %1, %0"
00207             : "=m" (res)    /* Output */
00208             : "f" (x)       /* Input */
00209             : "memory"
00210         );
00211 
00212         return res[1];
00213     }
00214 
00215     static __inline__ long int lfastrintf(register float x)
00216     {
00217         int res[2];
00218 
00219         __asm__ __volatile__
00220         (
00221             "fctiw %1, %1\n\t"
00222             "stfd %1, %0"
00223             : "=m" (res)    /* Output */
00224             : "f" (x)       /* Input */
00225             : "memory"
00226         );
00227 
00228         return res[1];
00229     }
00230 #else
00231     /* Fallback routines, for unrecognised platforms */
00232     static __inline__ long int lfastrint(double x)
00233     {
00234         return (long int) x;
00235     }
00236 
00237     static __inline__ long int lfastrintf(float x)
00238     {
00239         return (long int) x;
00240     }
00241 #endif
00242 
00243 #elif defined(_M_IX86)
00244     /* Visual Studio i386 */
00245     /*
00246      *    Win32 doesn't seem to have the lrint() and lrintf() functions.
00247      *    Therefore implement inline versions of these functions here.
00248      */
00249 
00250     __inline long int lrint(double x)
00251     {
00252         long int i;
00253 
00254         _asm
00255         {
00256             fld x
00257             fistp i
00258         };
00259         return i;
00260     }
00261 
00262     __inline long int lrintf(float x)
00263     {
00264         long int i;
00265 
00266         _asm
00267         {
00268             fld x
00269             fistp i
00270         };
00271         return i;
00272     }
00273 
00274     __inline float rintf(float flt)
00275     {
00276         _asm
00277         {       fld flt
00278                 frndint
00279         }
00280     }
00281 
00282     __inline double rint(double dbl)
00283     {
00284         _asm 
00285         {
00286             fld dbl
00287             frndint
00288         }
00289     }
00290 
00291     __inline long int lfastrint(double x)
00292     {
00293         long int i;
00294 
00295         _asm
00296         {
00297             fld x
00298             fistp i
00299         };
00300         return i;
00301     }
00302 
00303     __inline long int lfastrintf(float x)
00304     {
00305         long int i;
00306 
00307         _asm
00308         {
00309             fld x
00310             fistp i
00311         };
00312         return i;
00313     }
00314 #elif defined(_M_X64)
00315     /* Visual Studio x86_64 */
00316     /* x86_64 machines will do best with a simple assignment. */
00317 #include <intrin.h>
00318 
00319     __inline long int lrint(double x)
00320     {
00321                 return (long int)_mm_cvtsd_si64x( _mm_loadu_pd ((const double*)&x) );
00322     }
00323 
00324     __inline long int lrintf(float x)
00325     {
00326                 return _mm_cvt_ss2si( _mm_load_ss((const float*)&x) );
00327     }
00328 
00329     __inline long int lfastrint(double x)
00330     {
00331         return (long int) (x);
00332     }
00333 
00334     __inline long int lfastrintf(float x)
00335     {
00336         return (long int) (x);
00337     }
00338 #elif defined(__MWERKS__)  &&  defined(macintosh)
00339     /* This MacOS 9 solution was provided by Stephane Letz */
00340 
00341     long int __inline__ lfastrint(register double x)
00342     {
00343         long int res[2];
00344 
00345         asm
00346         {
00347             fctiw x, x
00348             stfd x, res
00349         }
00350         return res[1];
00351     }
00352 
00353     long int __inline__ lfastrintf(register float x)
00354     {
00355         long int res[2];
00356 
00357         asm
00358         {
00359             fctiw x, x
00360             stfd x, res
00361         }
00362         return res[1];
00363     }
00364 #elif defined(__MACH__)  &&  defined(__APPLE__)  &&  (defined(__ppc__)  ||  defined(__powerpc__))
00365     /* For Apple Mac OS/X - do recent versions still need this? */
00366 
00367     static __inline__ long int lfastrint(register double x)
00368     {
00369         int res[2];
00370 
00371         __asm__ __volatile__
00372         (
00373             "fctiw %1, %1\n\t"
00374             "stfd %1, %0"
00375             : "=m" (res)    /* Output */
00376             : "f" (x)       /* Input */
00377             : "memory"
00378         );
00379 
00380         return res[1];
00381     }
00382 
00383     static __inline__ long int lfastrintf(register float x)
00384     {
00385         int res[2];
00386 
00387         __asm__ __volatile__
00388         (
00389             "fctiw %1, %1\n\t"
00390             "stfd %1, %0"
00391             : "=m" (res)    /* Output */
00392             : "f" (x)       /* Input */
00393             : "memory"
00394         );
00395 
00396         return res[1];
00397     }
00398 #else
00399     /* There is nothing else to do, but use a simple casting operation, instead of a real
00400        rint() type function. Since we are only trying to use rint() to speed up conversions,
00401        the accuracy issues related to changing the rounding scheme are of little concern
00402        to us. */
00403 
00404     #if !defined(__sgi)  &&  !defined(__sunos)  &&  !defined(__solaris)  &&  !defined(__sun)
00405         #warning "No usable lrint() and lrintf() functions available."
00406         #warning "Replacing these functions with a simple C cast."
00407     #endif
00408 
00409     static __inline__ long int lrint(double x)
00410     {
00411         return (long int) (x);
00412     }
00413 
00414     static __inline__ long int lrintf(float x)
00415     {
00416         return (long int) (x);
00417     }
00418 
00419     static __inline__ long int lfastrint(double x)
00420     {
00421         return (long int) (x);
00422     }
00423 
00424     static __inline__ long int lfastrintf(float x)
00425     {
00426         return (long int) (x);
00427     }
00428 #endif
00429 
00430 #if defined(__cplusplus)
00431 }
00432 #endif
00433 
00434 #endif
00435 
00436 /*- End of file ------------------------------------------------------------*/