diff --git a/.github/.keepalive b/.github/.keepalive deleted file mode 100644 index 20ba148..0000000 --- a/.github/.keepalive +++ /dev/null @@ -1 +0,0 @@ -2024-09-01T02:34:49.555Z diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c77dca..d71fbfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,17 @@
-## Unreleased (2024-09-01) +## Unreleased (2024-09-18) + +
+ +### Features + +- [`f7edbe0`](https://github.com/stdlib-js/stdlib/commit/f7edbe044289b877586d0140467cb43cdfd18c19) - add C `ndarray` implementation for `blas/base/sdsdot` [(#2921)](https://github.com/stdlib-js/stdlib/pull/2921) + +
+ +
@@ -12,6 +22,7 @@
+- [`f7edbe0`](https://github.com/stdlib-js/stdlib/commit/f7edbe044289b877586d0140467cb43cdfd18c19) - **feat:** add C `ndarray` implementation for `blas/base/sdsdot` [(#2921)](https://github.com/stdlib-js/stdlib/pull/2921) _(by Aman Bhansali, Athan Reines)_ - [`2777e4b`](https://github.com/stdlib-js/stdlib/commit/2777e4be161869d09406e3b17947d24c64b47af2) - **bench:** resolve lint errors in benchmarks _(by Athan Reines)_
@@ -24,8 +35,9 @@ ### Contributors -A total of 1 person contributed to this release. Thank you to this contributor: +A total of 2 people contributed to this release. Thank you to the following contributors: +- Aman Bhansali - Athan Reines
diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 57d1184..147a89e 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -2,6 +2,7 @@ # # Contributors listed in alphabetical order. +Aayush Khanna <96649223+aayush0325@users.noreply.github.com> Adarsh Palaskar Aditya Sapra AgPriyanshu18 <113460573+AgPriyanshu18@users.noreply.github.com> @@ -26,17 +27,20 @@ EuniceSim142 <77243938+EuniceSim142@users.noreply.github.com> Frank Kovacs Golden Kumar <103646877+AuenKr@users.noreply.github.com> Gunj Joshi +HarshaNP <96897754+GittyHarsha@users.noreply.github.com> Harshita Kalani Hridyanshu <124202756+HRIDYANSHU054@users.noreply.github.com> Jaimin Godhani <112328542+Jai0401@users.noreply.github.com> James Gelok Jaysukh Makvana +Jenish Thapa <141203631+jenish-thapa@users.noreply.github.com> Jithin KS Joel Mathew Koshy Joey Reed Jordan Gallivan <115050475+Jordan-Gallivan@users.noreply.github.com> Joris Labie Justin Dennison +Kaif Mohd Karthik Prakash <116057817+skoriop@users.noreply.github.com> Khaldon Krishnendu Das <86651039+itskdhere@users.noreply.github.com> @@ -86,8 +90,10 @@ Stephannie Jiménez Gacha Suraj kumar <125961509+kumarsuraj212003@users.noreply.github.com> Tirtadwipa Manunggal Tudor Pagu <104032457+tudor-pagu@users.noreply.github.com> +Tufailahmed Bargir <142114244+Tufailahmed-Bargir@users.noreply.github.com> Utkarsh Utkarsh Raj +Vaibhav Patel <98279986+noobCoderVP@users.noreply.github.com> Varad Gupta Xiaochuan Ye Yernar Yergaziyev @@ -96,3 +102,4 @@ nishant-s7 <97207366+nishant-s7@users.noreply.github.com> orimiles5 <97595296+orimiles5@users.noreply.github.com> rainn <88160429+AmCodesLame@users.noreply.github.com> rei2hu +yaswanth <116426380+yaswanthkosuru@users.noreply.github.com> diff --git a/README.md b/README.md index 11dc1e8..e439ff7 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,140 @@ console.log( out ); + + +* * * + +
+ +## C APIs + + + +
+ +
+ + + + + +
+ +### Usage + +```c +#include "stdlib/blas/base/sdsdot.h" +``` + +#### c_sdsdot( N, scalar, \*X, strideX, \*Y, strideY ) + +Calculates the dot product of vectors `x` and `y` with extended accumulation. + +```c +const float x[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f }; +const float y[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f }; + +float v = c_sdsdot( 5, 0.0f, x, 1, y, -1 ); +// returns -120.0f +``` + +The function accepts the following arguments: + +- **N**: `[in] CBLAS_INT` number of indexed elements. +- **scalar**: `[in] float` scalar constant to add to dot product. +- **X**: `[in] float*` first input array. +- **strideX**: `[in] CBLAS_INT` index increment for `X`. +- **Y**: `[in] float*` second input array. +- **strideY**: `[in] CBLAS_INT` index increment for `Y`. + +```c +float c_sdsdot( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY ); +``` + +#### c_sdsdot_ndarray( N, scalar, \*X, strideX, offsetX, \*Y, strideY, offsetY ) + +Calculates the dot product of vectors `x` and `y` with extended accumulation using alternative indexing semantics. + +```c +const float x[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f }; +const float y[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f }; + +float v = c_sdsdot_ndarray( 5, 0.0f, x, 1, 0, y, -1, 7 ); +// returns -80.0f +``` + +The function accepts the following arguments: + +- **N**: `[in] CBLAS_INT` number of indexed elements. +- **scalar**: `[in] float` scalar constant to add to dot product. +- **X**: `[in] float*` first input array. +- **strideX**: `[in] CBLAS_INT` index increment for `X`. +- **offsetX**: `[in] CBLAS_INT` starting index for `X`. +- **Y**: `[in] float*` second input array. +- **strideY**: `[in] CBLAS_INT` index increment for `Y`. +- **offsetY**: `[in] CBLAS_INT` starting index for `Y`. + +```c +float c_sdsdot_ndarray( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ); +``` + +
+ + + + + +
+ +
+ + + + + +
+ +### Examples + +```c +#include "stdlib/blas/base/sdsdot.h" +#include + +int main( void ) { + // Create strided arrays: + const float x[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f }; + const float y[] = { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f }; + + // Specify the number of indexed elements: + const int N = 8; + + // Specify strides: + const int strideX = 1; + const int strideY = -1; + + // Compute the dot product: + float d = c_sdsdot( N, 0.0f, x, strideX, y, strideY ); + + // Print the result: + printf( "dot product: %f\n", d ); + + // Compute the dot product: + d = c_sdsdot_ndarray( N, 0.0f, x, strideX, 0, y, strideY, 7 ); + + // Print the result: + printf( "dot product: %f\n", d ); +} +``` + +
+ + + +
+ + + * * *
diff --git a/benchmark/c/benchmark.length.c b/benchmark/c/benchmark.length.c index 8251209..565da1c 100644 --- a/benchmark/c/benchmark.length.c +++ b/benchmark/c/benchmark.length.c @@ -94,7 +94,7 @@ static float rand_float( void ) { * @param len array length * @return elapsed time in seconds */ -static double benchmark( int iterations, int len ) { +static double benchmark1( int iterations, int len ) { double elapsed; float x[ len ]; float y[ len ]; @@ -122,6 +122,41 @@ static double benchmark( int iterations, int len ) { return elapsed; } +/** +* Runs a benchmark. +* +* @param iterations number of iterations +* @param len array length +* @return elapsed time in seconds +*/ +static double benchmark2( int iterations, int len ) { + double elapsed; + float x[ len ]; + float y[ len ]; + float z; + double t; + int i; + + for ( i = 0; i < len; i++ ) { + x[ i ] = ( rand_float()*20000.0f ) - 10000.0f; + y[ i ] = ( rand_float()*20000.0f ) - 10000.0f; + } + z = 0.0f; + t = tic(); + for ( i = 0; i < iterations; i++ ) { + z = c_sdsdot_ndarray( len, 0.0f, x, 1, 0, y, 1, 0 ); + if ( z != z ) { + printf( "should not return NaN\n" ); + break; + } + } + elapsed = tic() - t; + if ( z != z ) { + printf( "should not return NaN\n" ); + } + return elapsed; +} + /** * Main execution sequence. */ @@ -144,7 +179,14 @@ int main( void ) { for ( j = 0; j < REPEATS; j++ ) { count += 1; printf( "# c::%s:len=%d\n", NAME, len ); - elapsed = benchmark( iter, len ); + elapsed = benchmark1( iter, len ); + print_results( iter, elapsed ); + printf( "ok %d benchmark finished\n", count ); + } + for ( j = 0; j < REPEATS; j++ ) { + count += 1; + printf( "# c::%s:ndarray:len=%d\n", NAME, len ); + elapsed = benchmark2( iter, len ); print_results( iter, elapsed ); printf( "ok %d benchmark finished\n", count ); } diff --git a/examples/c/example.c b/examples/c/example.c index 0238b0f..c46ff68 100644 --- a/examples/c/example.c +++ b/examples/c/example.c @@ -36,4 +36,10 @@ int main( void ) { // Print the result: printf( "dot product: %f\n", d ); + + // Compute the dot product: + d = c_sdsdot_ndarray( N, 0.0f, x, strideX, 0, y, strideY, 7 ); + + // Print the result: + printf( "dot product: %f\n", d ); } diff --git a/include/stdlib/blas/base/sdsdot.h b/include/stdlib/blas/base/sdsdot.h index d4832b5..96c7690 100644 --- a/include/stdlib/blas/base/sdsdot.h +++ b/include/stdlib/blas/base/sdsdot.h @@ -22,6 +22,8 @@ #ifndef SDSDOT_H #define SDSDOT_H +#include "stdlib/blas/base/shared.h" + /* * If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler. */ @@ -32,7 +34,12 @@ extern "C" { /** * Computes the dot product of two single-precision floating-point vectors with extended accumulation. */ -float c_sdsdot( const int N, const float scalar, const float *X, const int strideX, const float *Y, const int strideY ); +float API_SUFFIX(c_sdsdot)( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY ); + +/** +* Computes the dot product of two single-precision floating-point vectors with extended accumulation using alternative indexing semantics. +*/ +float API_SUFFIX(c_sdsdot_ndarray)( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ); #ifdef __cplusplus } diff --git a/include/stdlib/blas/base/sdsdot_cblas.h b/include/stdlib/blas/base/sdsdot_cblas.h index b2b0274..31a1546 100644 --- a/include/stdlib/blas/base/sdsdot_cblas.h +++ b/include/stdlib/blas/base/sdsdot_cblas.h @@ -22,6 +22,8 @@ #ifndef SDSDOT_CBLAS_H #define SDSDOT_CBLAS_H +#include "stdlib/blas/base/shared.h" + /* * If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler. */ @@ -32,7 +34,7 @@ extern "C" { /** * Computes the dot product of two single-precision floating-point vectors with extended accumulation. */ -float cblas_sdsdot( const int N, const float scalar, const float *X, const int strideX, const float *Y, const int strideY ); +float API_SUFFIX(cblas_sdsdot)( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY ); #ifdef __cplusplus } diff --git a/lib/ndarray.native.js b/lib/ndarray.native.js index 2b0aacc..d80a797 100644 --- a/lib/ndarray.native.js +++ b/lib/ndarray.native.js @@ -20,9 +20,7 @@ // MODULES // -var minViewBufferIndex = require( '@stdlib/strided-base-min-view-buffer-index' ); -var offsetView = require( '@stdlib/strided-base-offset-view' ); -var addon = require( './sdsdot.native.js' ); +var addon = require( './../src/addon.node' ); // MAIN // @@ -50,16 +48,7 @@ var addon = require( './sdsdot.native.js' ); * // returns -5.0 */ function sdsdot( N, scalar, x, strideX, offsetX, y, strideY, offsetY ) { - var viewX; - var viewY; - - offsetX = minViewBufferIndex( N, strideX, offsetX ); - offsetY = minViewBufferIndex( N, strideY, offsetY ); - - viewX = offsetView( x, offsetX ); - viewY = offsetView( y, offsetY ); - - return addon( N, scalar, viewX, strideX, viewY, strideY ); + return addon.ndarray( N, scalar, x, strideX, offsetX, y, strideY, offsetY ); } diff --git a/manifest.json b/manifest.json index b03a602..f4d1208 100644 --- a/manifest.json +++ b/manifest.json @@ -45,9 +45,11 @@ "libpath": [], "dependencies": [ "@stdlib/napi-export", + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index", "@stdlib/napi-argv", - "@stdlib/napi-argv-int64", "@stdlib/napi-argv-float", + "@stdlib/napi-argv-int64", "@stdlib/napi-argv-strided-float32array", "@stdlib/napi-create-double" ] @@ -58,14 +60,18 @@ "blas": "", "wasm": false, "src": [ - "./src/sdsdot.c" + "./src/sdsdot.c", + "./src/sdsdot_ndarray.c" ], "include": [ "./include" ], "libraries": [], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" + ] }, { "task": "examples", @@ -73,14 +79,18 @@ "blas": "", "wasm": false, "src": [ - "./src/sdsdot.c" + "./src/sdsdot.c", + "./src/sdsdot_ndarray.c" ], "include": [ "./include" ], "libraries": [], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" + ] }, { @@ -101,9 +111,11 @@ "libpath": [], "dependencies": [ "@stdlib/napi-export", + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index", "@stdlib/napi-argv", - "@stdlib/napi-argv-int64", "@stdlib/napi-argv-float", + "@stdlib/napi-argv-int64", "@stdlib/napi-argv-strided-float32array", "@stdlib/napi-create-double" ] @@ -124,7 +136,10 @@ "-lpthread" ], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index" + ] }, { "task": "examples", @@ -142,7 +157,10 @@ "-lpthread" ], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index" + ] }, { @@ -162,9 +180,11 @@ "libpath": [], "dependencies": [ "@stdlib/napi-export", + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index", "@stdlib/napi-argv", - "@stdlib/napi-argv-int64", "@stdlib/napi-argv-float", + "@stdlib/napi-argv-int64", "@stdlib/napi-argv-strided-float32array", "@stdlib/napi-create-double" ] @@ -175,14 +195,18 @@ "blas": "", "wasm": false, "src": [ - "./src/sdsdot.c" + "./src/sdsdot.c", + "./src/sdsdot_ndarray.c" ], "include": [ "./include" ], "libraries": [], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" + ] }, { "task": "examples", @@ -190,14 +214,18 @@ "blas": "", "wasm": false, "src": [ - "./src/sdsdot.c" + "./src/sdsdot.c", + "./src/sdsdot_ndarray.c" ], "include": [ "./include" ], "libraries": [], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" + ] }, { @@ -217,9 +245,11 @@ "libpath": [], "dependencies": [ "@stdlib/napi-export", + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index", "@stdlib/napi-argv", - "@stdlib/napi-argv-int64", "@stdlib/napi-argv-float", + "@stdlib/napi-argv-int64", "@stdlib/napi-argv-strided-float32array", "@stdlib/napi-create-double" ] @@ -239,7 +269,10 @@ "-lblas" ], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index" + ] }, { "task": "examples", @@ -256,7 +289,10 @@ "-lblas" ], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index" + ] }, { @@ -277,9 +313,11 @@ "libpath": [], "dependencies": [ "@stdlib/napi-export", + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index", "@stdlib/napi-argv", - "@stdlib/napi-argv-int64", "@stdlib/napi-argv-float", + "@stdlib/napi-argv-int64", "@stdlib/napi-argv-strided-float32array", "@stdlib/napi-create-double" ] @@ -300,7 +338,10 @@ "-lpthread" ], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index" + ] }, { "task": "examples", @@ -318,7 +359,10 @@ "-lpthread" ], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-min-view-buffer-index" + ] }, { @@ -327,7 +371,8 @@ "blas": "", "wasm": false, "src": [ - "./src/sdsdot.c" + "./src/sdsdot.c", + "./src/sdsdot_ndarray.c" ], "include": [ "./include" @@ -336,9 +381,11 @@ "libpath": [], "dependencies": [ "@stdlib/napi-export", + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset", "@stdlib/napi-argv", - "@stdlib/napi-argv-int64", "@stdlib/napi-argv-float", + "@stdlib/napi-argv-int64", "@stdlib/napi-argv-strided-float32array", "@stdlib/napi-create-double" ] @@ -349,14 +396,18 @@ "blas": "", "wasm": false, "src": [ - "./src/sdsdot.c" + "./src/sdsdot.c", + "./src/sdsdot_ndarray.c" ], "include": [ "./include" ], "libraries": [], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" + ] }, { "task": "examples", @@ -364,14 +415,18 @@ "blas": "", "wasm": false, "src": [ - "./src/sdsdot.c" + "./src/sdsdot.c", + "./src/sdsdot_ndarray.c" ], "include": [ "./include" ], "libraries": [], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" + ] }, { @@ -380,14 +435,18 @@ "blas": "", "wasm": true, "src": [ - "./src/sdsdot.c" + "./src/sdsdot.c", + "./src/sdsdot_ndarray.c" ], "include": [ "./include" ], "libraries": [], "libpath": [], - "dependencies": [] + "dependencies": [ + "@stdlib/blas-base-shared", + "@stdlib/strided-base-stride2offset" + ] } ] } diff --git a/package.json b/package.json index 19f7242..b527201 100644 --- a/package.json +++ b/package.json @@ -42,6 +42,7 @@ }, "dependencies": { "@stdlib/assert-is-error": "^0.2.2", + "@stdlib/blas-base-shared": "^0.1.0", "@stdlib/napi-argv": "^0.2.2", "@stdlib/napi-argv-float": "^0.2.2", "@stdlib/napi-argv-int64": "^0.2.2", @@ -49,6 +50,7 @@ "@stdlib/napi-create-double": "^0.0.2", "@stdlib/napi-export": "^0.2.2", "@stdlib/number-float64-base-to-float32": "^0.2.2", + "@stdlib/strided-base-min-view-buffer-index": "^0.2.2", "@stdlib/strided-base-stride2offset": "^0.1.0", "@stdlib/utils-define-nonenumerable-read-only-property": "^0.2.2", "@stdlib/utils-library-manifest": "^0.2.2", @@ -61,8 +63,6 @@ "@stdlib/math-base-special-pow": "^0.3.0", "@stdlib/random-array-discrete-uniform": "^0.2.1", "@stdlib/random-array-uniform": "^0.2.1", - "@stdlib/strided-base-min-view-buffer-index": "^0.2.2", - "@stdlib/strided-base-offset-view": "^0.2.2", "proxyquire": "^2.0.0", "tape": "git+https://github.com/kgryte/tape.git#fix/globby", "istanbul": "^0.4.1", diff --git a/src/addon.c b/src/addon.c index 637e437..597ffa2 100644 --- a/src/addon.c +++ b/src/addon.c @@ -17,6 +17,7 @@ */ #include "stdlib/blas/base/sdsdot.h" +#include "stdlib/blas/base/shared.h" #include "stdlib/napi/export.h" #include "stdlib/napi/argv.h" #include "stdlib/napi/argv_int64.h" @@ -40,8 +41,29 @@ static napi_value addon( napi_env env, napi_callback_info info ) { STDLIB_NAPI_ARGV_INT64( env, strideY, argv, 5 ); STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, X, N, strideX, argv, 2 ); STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, Y, N, strideY, argv, 4 ); - STDLIB_NAPI_CREATE_DOUBLE( env, (double)c_sdsdot( N, scalar, X, strideX, Y, strideY ), v ); + STDLIB_NAPI_CREATE_DOUBLE( env, (double)API_SUFFIX(c_sdsdot)( N, scalar, X, strideX, Y, strideY ), v ); return v; } -STDLIB_NAPI_MODULE_EXPORT_FCN( addon ) +/** +* Receives JavaScript callback invocation data. +* +* @param env environment under which the function is invoked +* @param info callback data +* @return Node-API value +*/ +static napi_value addon_method( napi_env env, napi_callback_info info ) { + STDLIB_NAPI_ARGV( env, info, argv, argc, 8 ); + STDLIB_NAPI_ARGV_INT64( env, N, argv, 0 ); + STDLIB_NAPI_ARGV_FLOAT( env, scalar, argv, 1 ); + STDLIB_NAPI_ARGV_INT64( env, strideX, argv, 3 ); + STDLIB_NAPI_ARGV_INT64( env, offsetX, argv, 4 ); + STDLIB_NAPI_ARGV_INT64( env, strideY, argv, 6 ); + STDLIB_NAPI_ARGV_INT64( env, offsetY, argv, 7 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, X, N, strideX, argv, 2 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, Y, N, strideY, argv, 5 ); + STDLIB_NAPI_CREATE_DOUBLE( env, (double)API_SUFFIX(c_sdsdot_ndarray)( N, scalar, X, strideX, offsetX, Y, strideY, offsetY ), v ); + return v; +} + +STDLIB_NAPI_MODULE_EXPORT_FCN_WITH_METHOD( addon, "ndarray", addon_method ) diff --git a/src/sdsdot.c b/src/sdsdot.c index ba3d601..1d745b1 100644 --- a/src/sdsdot.c +++ b/src/sdsdot.c @@ -22,6 +22,8 @@ * @see sdsdot */ #include "stdlib/blas/base/sdsdot.h" +#include "stdlib/blas/base/shared.h" +#include "stdlib/strided/base/stride2offset.h" /** * Computes the dot product of two single-precision floating-point vectors with extended accumulation. @@ -34,50 +36,9 @@ * @param strideY Y stride length * @return dot product */ -float c_sdsdot( const int N, const float scalar, const float *X, const int strideX, const float *Y, const int strideY ) { - double dot; - int ix; - int iy; - int m; - int i; - - dot = (double)scalar; - if ( N <= 0 ) { - return dot; - } - // If both strides are equal to `1`, use unrolled loops... - if ( strideX == 1 && strideY == 1 ) { - m = N % 5; - - // If we have a remainder, do a clean-up loop... - if ( m > 0 ) { - for ( i = 0; i < m; i++ ) { - dot += (double)X[ i ] * (double)Y[ i ]; - } - } - if ( N < 5 ) { - return dot; - } - for ( i = m; i < N; i += 5 ) { - dot += ( (double)X[i]*(double)Y[i] ) + ( (double)X[i+1]*(double)Y[i+1]) + ( (double)X[i+2]*(double)Y[i+2] ) + ( (double)X[i+3]*(double)Y[i+3] ) + ( (double)X[i+4]*(double)Y[i+4] ); - } - return dot; - } - if ( strideX < 0 ) { - ix = (1-N) * strideX; - } else { - ix = 0; - } - if ( strideY < 0 ) { - iy = (1-N) * strideY; - } else { - iy = 0; - } - for ( i = 0; i < N; i++ ) { - dot += (double)X[ ix ] * (double)Y[ iy ]; - ix += strideX; - iy += strideY; - } - return dot; +float API_SUFFIX(c_sdsdot)( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY ) { + CBLAS_INT ox = stdlib_strided_stride2offset( N, strideX ); + CBLAS_INT oy = stdlib_strided_stride2offset( N, strideY ); + return API_SUFFIX(c_sdsdot_ndarray)( N, scalar, X, strideX, ox, Y, strideY, oy ); } diff --git a/src/sdsdot_cblas.c b/src/sdsdot_cblas.c index bd47331..8a8fdec 100644 --- a/src/sdsdot_cblas.c +++ b/src/sdsdot_cblas.c @@ -18,6 +18,8 @@ #include "stdlib/blas/base/sdsdot.h" #include "stdlib/blas/base/sdsdot_cblas.h" +#include "stdlib/blas/base/shared.h" +#include "stdlib/strided/base/min_view_buffer_index.h" /** * Computes the dot product of two single-precision floating-point vectors with extended accumulation. @@ -30,6 +32,25 @@ * @param strideY Y stride length * @return dot product */ -float c_sdsdot( const int N, const float scalar, const float *X, const int strideX, const float *Y, const int strideY ) { - return cblas_sdsdot( N, scalar, X, strideX, Y, strideY ); +float API_SUFFIX(c_sdsdot)( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY ) { + return API_SUFFIX(cblas_sdsdot)( N, scalar, X, strideX, Y, strideY ); +} + +/** +* Computes the dot product of two single-precision floating-point vectors with extended accumulation using alternative indexing semantics. +* +* @param N number of indexed elements +* @param scalar scalar constant added to the dot product +* @param X first array +* @param strideX X stride length +* @param offsetX starting index for X +* @param Y second array +* @param strideY Y stride length +* @param offsetY starting index for Y +* @return dot product +*/ +float API_SUFFIX(c_sdsdot_ndarray)( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) { + X += stdlib_strided_min_view_buffer_index( N, strideX, offsetX ); // adjust array pointer + Y += stdlib_strided_min_view_buffer_index( N, strideY, offsetY ); // adjust array pointer + return API_SUFFIX(cblas_sdsdot)( N, scalar, X, strideX, Y, strideY ); } diff --git a/src/sdsdot_f.c b/src/sdsdot_f.c index 8e5de1c..e99160e 100644 --- a/src/sdsdot_f.c +++ b/src/sdsdot_f.c @@ -23,6 +23,8 @@ */ #include "stdlib/blas/base/sdsdot.h" #include "stdlib/blas/base/sdsdot_fortran.h" +#include "stdlib/blas/base/shared.h" +#include "stdlib/strided/base/min_view_buffer_index.h" /** * Computes the dot product of two single-precision floating-point vectors with extended accumulation. @@ -37,8 +39,32 @@ * @param strideY Y stride length * @return dot product */ -float c_sdsdot( const int N, const float scalar, const float *X, const int strideX, const float *Y, const int strideY ) { +float API_SUFFIX(c_sdsdot)( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY ) { float dot; sdsdotsub( &N, &scalar, X, &strideX, Y, &strideY, &dot ); return dot; } + +/** +* Computes the dot product of two single-precision floating-point vectors with extended accumulation using alternative indexing semantics. +* +* Arguments are passed by reference to a Fortran subroutine implementing `sdsdot`. +* +* @param N number of indexed elements +* @param scalar scalar constant added to the dot product +* @param X first array +* @param strideX X stride length +* @param offsetX starting index for X +* @param Y second array +* @param strideY Y stride length +* @param offsetY starting index for Y +* @return dot product +*/ +float API_SUFFIX(c_sdsdot_ndarray)( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) { + float dot; + + X += stdlib_strided_min_view_buffer_index( N, strideX, offsetX ); // adjust array pointer + Y += stdlib_strided_min_view_buffer_index( N, strideY, offsetY ); // adjust array pointer + sdsdotsub( &N, &scalar, X, &strideX, Y, &strideY, &dot ); + return dot; +} diff --git a/src/sdsdot_ndarray.c b/src/sdsdot_ndarray.c new file mode 100644 index 0000000..6aed77a --- /dev/null +++ b/src/sdsdot_ndarray.c @@ -0,0 +1,85 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2023 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +/** +* Compute the dot product of two single-precision floating-point vectors with extended accumulation. +* +* @see sdsdot +*/ +#include "stdlib/blas/base/sdsdot.h" +#include "stdlib/blas/base/shared.h" + +static const CBLAS_INT M = 5; + +/** +* Computes the dot product of two single-precision floating-point vectors with extended accumulation using alternative indexing semantics. +* +* @param N number of indexed elements +* @param scalar scalar constant added to the dot product +* @param X first array +* @param strideX X stride length +* @param offsetX starting index for X +* @param Y second array +* @param strideY Y stride length +* @param offsetY starting index for Y +* @return dot product +*/ +float API_SUFFIX(c_sdsdot_ndarray)( const CBLAS_INT N, const float scalar, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) { + CBLAS_INT ix; + CBLAS_INT iy; + CBLAS_INT m; + CBLAS_INT i; + double dot; + + dot = (double)scalar; + if ( N <= 0 ) { + return dot; + } + ix = offsetX; + iy = offsetY; + + // If both strides are equal to `1`, use unrolled loops... + if ( strideX == 1 && strideY == 1 ) { + m = N % M; + + // If we have a remainder, do a clean-up loop... + if ( m > 0 ) { + for ( i = 0; i < m; i++ ) { + dot += (double)X[ ix ] * (double)Y[ iy ]; + ix += strideX; + iy += strideY; + } + } + if ( N < M ) { + return dot; + } + for ( i = m; i < N; i += M ) { + dot += ( (double)X[ ix ]*(double)Y[ iy ] ) + ( (double)X[ ix+1 ]*(double)Y[ iy+1 ]) + ( (double)X[ ix+2 ]*(double)Y[ iy+2 ] ) + ( (double)X[ ix+3 ]*(double)Y[ iy+3 ] ) + ( (double)X[ ix+4 ]*(double)Y[ iy+4 ] ); + ix += M; + iy += M; + } + return dot; + } + for ( i = 0; i < N; i++ ) { + dot += (double)X[ ix ] * (double)Y[ iy ]; + ix += strideX; + iy += strideY; + } + return dot; +} +