[Cfp-interest] FUNCTIONS: revised reduction functions text

Mon Jun 7 09:05:10 PDT 2010

Please review that the following reflects consensus of our last meeting:

R = f or d for binary or decimal
N = number of bits
BUT 
RN = "f" for float
RN = "" for double
RN = "l" for long double

binary types are _FloatN
decimal types are _DecimalN

/* 754 sum: reduc_sum = sum(p[i]) 			*/
_FloatN  reduc_sumRN( const _FloatN * p, size_t n ); 

/* 754 sumAbs: reduc_sumabs = sum(fabs(p[i])) 			*/
_FloatN  reduc_sumabsRN( const _FloatN * p, size_t n ); 

/* 754 sumSquare: reduc_sumsquare = sum(p[i]*p[i]) 			*/
_FloatN  reduc_sumsquareRN( const _FloatN * p, size_t n ); 

/* 754 dot: reduc_sumprod = sum(p[i]*q[i]) 			*/
_FloatN  reduc_sumprodRN( const _FloatN * restrict p, const _FloatN * restrict q, size_t n ); 

/* 754 scaledProd: scalbln(scaled_prod, *sf) = product(p[i]) 	*/
_FloatN scaled_prodRN( const _FloatN * restrict p, size_t n, long * restrict sf ); 

/* 754 scaledProdSum:	scalbln(scaled_prodsum, *sf) = product(p[i]+q[i]) */
_FloatN scaled_prodsumRN( const _FloatN * restrict p, const _FloatN * restrict q, size_t n, long * restrict sf ); 

/* 754 scaledProdDiff:	scalbln(scaled_proddiff, *sf) = product(p[i]-q[i]) */
_FloatN scaled_proddiffRN( const _FloatN * restrict p, const _FloatN * restrict q, size_t n, long * restrict sf ); 

/* Notes:
Exceptions are described in 754-1988.
Those rules for zero, inf, and NaN operands 
preclude the obvious simple loop implementations.
The intent was a simple loop would trap on invalid exceptions 
when they arose and would start over with a more complicated loop that would
handle all exceptions correctly.

The functions listed above correspond to the recommendations of 754-2008.
Unscaled versions of sums were deemed suitable for most common uses.
The scaled versions of prod, prodsum, and proddiff were specified to 
facilitate computations like Clebsch-Gordan coefficients without
specifying 754-1985's counting mode for overflow and underflow exception
handling.

Implementations might go further to include the following to make a
complete orthogonal set: 
*/

/* not 754: scalbln(scaled_sum, *sf) = sum(p[i]) 		*/
_FloatN scaled_sumRN( const _FloatN * restrict p, size_t n, long * restrict sf ); 

/* not 754: scalbln(scaled_sumabs, *sf) = sum(fabs(p[i])) 		*/
_FloatN scaled_sumabsRN( const _FloatN * restrict p, size_t n, long * restrict sf ); 

/* not 754: scalbln(scaled_sumsquare, *sf) = sum(p[i]*p[i]) 		*/
_FloatN scaled_sumsquareRN( const _FloatN * restrict p, size_t n, long * restrict sf ); 

/* not 754: scalbln(scaled_sumprod, *sf) = sum(p[i]*q[i]) 		*/
_FloatN scaled_sumprodRN( const _FloatN * restrict p, const _FloatN * restrict q, size_t n, long * restrict sf ); 

/* not 754: reduc_prod = product(p[i]) 			*/
_FloatN  reduc_prodRN( const _FloatN * p, size_t n ); 

/* not 754: reduc_prodsum = product(p[i]+q[i]) 			*/
_FloatN  reduc_prodsumRN( const _FloatN * restrict p, const _FloatN * restrict q, size_t n ); 

/* not 754: reduc_prodiff = product(p[i]-q[i]) 			*/
_FloatN  reduc_proddiffRN( const _FloatN * restrict p, const _FloatN * restrict q, size_t n );