Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revisiting Keccak and Dilithium Implementations on ARMv7-M #338

Merged
merged 3 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions benchmarks.csv
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ cross-sha3-r-sdpg-3-fast (10 executions),ref,627948,625525,637639,43573841,43565
cross-sha3-r-sdpg-5-fast (10 executions),ref,1146280,1142409,1153794,93557878,93547167,93566329,59948216,59857434,60043852
dilithium2 (90 executions),clean,1873447,1838554,1903845,7846622,3321671,28761609,2062804,2062332,2063181
dilithium2 (100 executions),m4f,1427684,1390524,1466437,4219137,1813668,12587382,1417706,1417251,1418128
dilithium3 (90 executions),clean,3205542,3204354,3206592,12108503,5097440,50759276,3377010,3376729,3377395
dilithium3 (100 executions),m4f,2515970,2514894,2516922,5896583,2935265,23718896,2411234,2410948,2411551
dilithium3 (1000 executions),clean,3205551,3204090,3207411,12696585,5097364,74392293,3376992,3376581,3377393
dilithium3 (1000 executions),m4f,2515969,2514498,2517634,5884832,2917322,25268693,2411257,2410858,2411717
dilithium5 (90 executions),clean,5346066,5287239,5395626,15205929,7953360,49173429,5609664,5609137,5610119
dilithium5 (100 executions),m4f,4273211,4210308,4329697,8062110,4882708,18398575,4185407,4184878,4185954
falcon-1024 (10 executions),m4-ct,354880005,284902033,635131652,87741288,87506676,87922628,991320,982548,997219
Expand Down Expand Up @@ -341,8 +341,8 @@ cross-sha3-r-sdpg-3-fast,ref,71.7,68.2,68.7,,,,,,
cross-sha3-r-sdpg-5-fast,ref,71.1,66.1,66.8,,,,,,
dilithium2,clean,60.9,30.2,52.9,,,,,,
dilithium2,m4f,79.9,62.2,76.8,,,,,,
dilithium3,clean,64.7,33.8,56.8,,,,,,
dilithium3,m4f,82.3,57.9,79.4,,,,,,
dilithium3,clean,64.7,31.3,56.8,,,,,,
dilithium3,m4f,82.3,60.3,79.4,,,,,,
dilithium5,clean,67.0,38.4,61.1,,,,,,
dilithium5,m4f,83.4,63.5,81.7,,,,,,
falcon-1024,clean,6.5,0.3,23.7,,,,,,
Expand Down Expand Up @@ -491,7 +491,7 @@ cross-sha3-r-sdpg-5-fast,ref,18593,0,208,18801,,,,,
dilithium2,clean,8064,0,0,8064,,,,,
dilithium2,m4f,18596,0,0,18596,,,,,
dilithium3,clean,7580,0,0,7580,,,,,
dilithium3,m4f,20108,0,0,20108,,,,,
dilithium3,m4f,18588,0,0,18588,,,,,
dilithium5,clean,7808,0,0,7808,,,,,
dilithium5,m4f,18468,0,0,18468,,,,,
falcon-1024,clean,82647,0,0,82647,,,,,
Expand Down
10 changes: 5 additions & 5 deletions benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@
| cross-sha3-r-sdpg-5-fast (10 executions) | ref | AVG: 1,146,280 <br /> MIN: 1,142,409 <br /> MAX: 1,153,794 | AVG: 93,557,878 <br /> MIN: 93,547,167 <br /> MAX: 93,566,329 | AVG: 59,948,216 <br /> MIN: 59,857,434 <br /> MAX: 60,043,852 |
| dilithium2 (90 executions) | clean | AVG: 1,873,447 <br /> MIN: 1,838,554 <br /> MAX: 1,903,845 | AVG: 7,846,622 <br /> MIN: 3,321,671 <br /> MAX: 28,761,609 | AVG: 2,062,804 <br /> MIN: 2,062,332 <br /> MAX: 2,063,181 |
| dilithium2 (100 executions) | m4f | AVG: 1,427,684 <br /> MIN: 1,390,524 <br /> MAX: 1,466,437 | AVG: 4,219,137 <br /> MIN: 1,813,668 <br /> MAX: 12,587,382 | AVG: 1,417,706 <br /> MIN: 1,417,251 <br /> MAX: 1,418,128 |
| dilithium3 (90 executions) | clean | AVG: 3,205,542 <br /> MIN: 3,204,354 <br /> MAX: 3,206,592 | AVG: 12,108,503 <br /> MIN: 5,097,440 <br /> MAX: 50,759,276 | AVG: 3,377,010 <br /> MIN: 3,376,729 <br /> MAX: 3,377,395 |
| dilithium3 (100 executions) | m4f | AVG: 2,515,970 <br /> MIN: 2,514,894 <br /> MAX: 2,516,922 | AVG: 5,896,583 <br /> MIN: 2,935,265 <br /> MAX: 23,718,896 | AVG: 2,411,234 <br /> MIN: 2,410,948 <br /> MAX: 2,411,551 |
| dilithium3 (1000 executions) | clean | AVG: 3,205,551 <br /> MIN: 3,204,090 <br /> MAX: 3,207,411 | AVG: 12,696,585 <br /> MIN: 5,097,364 <br /> MAX: 74,392,293 | AVG: 3,376,992 <br /> MIN: 3,376,581 <br /> MAX: 3,377,393 |
| dilithium3 (1000 executions) | m4f | AVG: 2,515,969 <br /> MIN: 2,514,498 <br /> MAX: 2,517,634 | AVG: 5,884,832 <br /> MIN: 2,917,322 <br /> MAX: 25,268,693 | AVG: 2,411,257 <br /> MIN: 2,410,858 <br /> MAX: 2,411,717 |
| dilithium5 (90 executions) | clean | AVG: 5,346,066 <br /> MIN: 5,287,239 <br /> MAX: 5,395,626 | AVG: 15,205,929 <br /> MIN: 7,953,360 <br /> MAX: 49,173,429 | AVG: 5,609,664 <br /> MIN: 5,609,137 <br /> MAX: 5,610,119 |
| dilithium5 (100 executions) | m4f | AVG: 4,273,211 <br /> MIN: 4,210,308 <br /> MAX: 4,329,697 | AVG: 8,062,110 <br /> MIN: 4,882,708 <br /> MAX: 18,398,575 | AVG: 4,185,407 <br /> MIN: 4,184,878 <br /> MAX: 4,185,954 |
| falcon-1024 (10 executions) | m4-ct | AVG: 354,880,005 <br /> MIN: 284,902,033 <br /> MAX: 635,131,652 | AVG: 87,741,288 <br /> MIN: 87,506,676 <br /> MAX: 87,922,628 | AVG: 991,320 <br /> MIN: 982,548 <br /> MAX: 997,219 |
Expand Down Expand Up @@ -347,8 +347,8 @@
| cross-sha3-r-sdpg-5-fast | ref | 71.1% | 66.1% | 66.8% |
| dilithium2 | clean | 60.9% | 30.2% | 52.9% |
| dilithium2 | m4f | 79.9% | 62.2% | 76.8% |
| dilithium3 | clean | 64.7% | 33.8% | 56.8% |
| dilithium3 | m4f | 82.3% | 57.9% | 79.4% |
| dilithium3 | clean | 64.7% | 31.3% | 56.8% |
| dilithium3 | m4f | 82.3% | 60.3% | 79.4% |
| dilithium5 | clean | 67.0% | 38.4% | 61.1% |
| dilithium5 | m4f | 83.4% | 63.5% | 81.7% |
| falcon-1024 | clean | 6.5% | 0.3% | 23.7% |
Expand Down Expand Up @@ -499,7 +499,7 @@
| dilithium2 | clean | 8,064 | 0 | 0 | 8,064 |
| dilithium2 | m4f | 18,596 | 0 | 0 | 18,596 |
| dilithium3 | clean | 7,580 | 0 | 0 | 7,580 |
| dilithium3 | m4f | 20,108 | 0 | 0 | 20,108 |
| dilithium3 | m4f | 18,588 | 0 | 0 | 18,588 |
| dilithium5 | clean | 7,808 | 0 | 0 | 7,808 |
| dilithium5 | m4f | 18,468 | 0 | 0 | 18,468 |
| falcon-1024 | clean | 82,647 | 0 | 0 | 82,647 |
Expand Down
1 change: 0 additions & 1 deletion crypto_sign/dilithium3/m4f/macros_fnt.i

This file was deleted.

98 changes: 98 additions & 0 deletions crypto_sign/dilithium3/m4f/macros_smallntt.i
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/**
* Copyright (c) 2023 Junhao Huang (jhhuang_nuaa@126.com)
*
* Licensed under the Apache License, Version 2.0(the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MACROS_SMALLNTT_I
#define MACROS_SMALLNTT_I

// general macros
.macro load a, a0, a1, a2, a3, mem0, mem1, mem2, mem3
ldr.w \a0, [\a, \mem0]
ldr.w \a1, [\a, \mem1]
ldr.w \a2, [\a, \mem2]
ldr.w \a3, [\a, \mem3]
.endm

.macro store a, a0, a1, a2, a3, mem0, mem1, mem2, mem3
str.w \a0, [\a, \mem0]
str.w \a1, [\a, \mem1]
str.w \a2, [\a, \mem2]
str.w \a3, [\a, \mem3]
.endm

.macro doubleplant a, tmp, q, qa, plantconst
smulwb \tmp, \plantconst, \a
smulwt \a, \plantconst, \a
smlabt \tmp, \tmp, \q, \qa
smlabt \a, \a, \q, \qa
pkhtb \a, \a, \tmp, asr#16
.endm

.macro doublebarrett a, tmp, tmp2, q, barrettconst
smulbb \tmp, \a, \barrettconst
smultb \tmp2, \a, \barrettconst
asr \tmp, \tmp, #26
asr \tmp2, \tmp2, #26
smulbb \tmp, \tmp, \q
smulbb \tmp2, \tmp2, \q
pkhbt \tmp, \tmp, \tmp2, lsl#16
usub16 \a, \a, \tmp
.endm

// q locate in the top half of the register
.macro plant_red q, qa, qinv, tmp
mul \tmp, \tmp, \qinv
//tmp*qinv mod 2^2n/ 2^n; in high half
smlatt \tmp, \tmp, \q, \qa
// result in high half
.endm

.macro mul_twiddle_plant a, twiddle, tmp, q, qa
smulwb \tmp, \twiddle, \a
smulwt \a, \twiddle, \a
smlabt \tmp, \tmp, \q, \qa
smlabt \a, \a, \q, \qa
pkhtb \a, \a, \tmp, asr#16
.endm

.macro doublebutterfly_plant a0, a1, twiddle, tmp, q, qa
smulwb \tmp, \twiddle, \a1
smulwt \a1, \twiddle, \a1
smlabt \tmp, \tmp, \q, \qa
smlabt \a1, \a1, \q, \qa
pkhtb \tmp, \a1, \tmp, asr#16
usub16 \a1, \a0, \tmp
uadd16 \a0, \a0, \tmp
.endm

.macro two_doublebutterfly_plant a0, a1, a2, a3, twiddle0, twiddle1, tmp, q, qa
doublebutterfly_plant \a0, \a1, \twiddle0, \tmp, \q, \qa
doublebutterfly_plant \a2, \a3, \twiddle1, \tmp, \q, \qa
.endm

//For 3329
.macro fullplant a0, a1, a2, a3, a4, a5, a6, a7, tmp, q, qa, plantconst
movw \plantconst, #44984
movt \plantconst, #19
doubleplant \a0, \tmp, \q, \qa, \plantconst
doubleplant \a1, \tmp, \q, \qa, \plantconst
doubleplant \a2, \tmp, \q, \qa, \plantconst
doubleplant \a3, \tmp, \q, \qa, \plantconst
doubleplant \a4, \tmp, \q, \qa, \plantconst
doubleplant \a5, \tmp, \q, \qa, \plantconst
doubleplant \a6, \tmp, \q, \qa, \plantconst
doubleplant \a7, \tmp, \q, \qa, \plantconst
.endm

#endif
Loading
Loading