aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1
diff options
context:
space:
mode:
Diffstat (limited to 'linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1')
-rw-r--r--[-rwxr-xr-x]linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/distfiles10
-rw-r--r--[-rwxr-xr-x]linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul1.S226
-rw-r--r--[-rwxr-xr-x]linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul2.S230
-rw-r--r--[-rwxr-xr-x]linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul3.S250
-rw-r--r--[-rwxr-xr-x]linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/udiv-qrnnd.S180
5 files changed, 448 insertions, 448 deletions
diff --git a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/distfiles b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/distfiles
index 565f24e..d68227a 100755..100644
--- a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/distfiles
+++ b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/distfiles
@@ -1,5 +1,5 @@
1udiv-qrnnd.S 1udiv-qrnnd.S
2mpih-mul1.S 2mpih-mul1.S
3mpih-mul2.S 3mpih-mul2.S
4mpih-mul3.S 4mpih-mul3.S
5 5
diff --git a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul1.S b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul1.S
index f847ece..93166e3 100755..100644
--- a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul1.S
+++ b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul1.S
@@ -1,113 +1,113 @@
1/* hppa1.1 mul_1 -- Multiply a limb vector with a limb and store 1/* hppa1.1 mul_1 -- Multiply a limb vector with a limb and store
2 * the result in a second limb vector. 2 * the result in a second limb vector.
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1998, 4 * Copyright (C) 1992, 1993, 1994, 1998,
5 * 2001, 2002 Free Software Foundation, Inc. 5 * 2001, 2002 Free Software Foundation, Inc.
6 * 6 *
7 * This file is part of Libgcrypt. 7 * This file is part of Libgcrypt.
8 * 8 *
9 * Libgcrypt is free software; you can redistribute it and/or modify 9 * Libgcrypt is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as 10 * it under the terms of the GNU Lesser General Public License as
11 * published by the Free Software Foundation; either version 2.1 of 11 * published by the Free Software Foundation; either version 2.1 of
12 * the License, or (at your option) any later version. 12 * the License, or (at your option) any later version.
13 * 13 *
14 * Libgcrypt is distributed in the hope that it will be useful, 14 * Libgcrypt is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details. 17 * GNU Lesser General Public License for more details.
18 * 18 *
19 * You should have received a copy of the GNU Lesser General Public 19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this program; if not, write to the Free Software 20 * License along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
22 * 22 *
23 * Note: This code is heavily based on the GNU MP Library. 23 * Note: This code is heavily based on the GNU MP Library.
24 * Actually it's the same code with only minor changes in the 24 * Actually it's the same code with only minor changes in the
25 * way the data is stored; this is to support the abstraction 25 * way the data is stored; this is to support the abstraction
26 * of an optional secure memory allocation which may be used 26 * of an optional secure memory allocation which may be used
27 * to avoid revealing of sensitive data due to paging etc. 27 * to avoid revealing of sensitive data due to paging etc.
28 */ 28 */
29 29
30 30
31/******************* 31/*******************
32 * mpi_limb_t 32 * mpi_limb_t
33 * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (r26) 33 * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (r26)
34 * mpi_ptr_t s1_ptr, (r25) 34 * mpi_ptr_t s1_ptr, (r25)
35 * mpi_size_t s1_size, (r24) 35 * mpi_size_t s1_size, (r24)
36 * mpi_limb_t s2_limb) (r23) 36 * mpi_limb_t s2_limb) (r23)
37 * 37 *
38 * 38 *
39 * 39 *
40 * This runs at 9 cycles/limb on a PA7000. With the used instructions, it can 40 * This runs at 9 cycles/limb on a PA7000. With the used instructions, it can
41 * not become faster due to data cache contention after a store. On the 41 * not become faster due to data cache contention after a store. On the
42 * PA7100 it runs at 7 cycles/limb, and that can not be improved either, since 42 * PA7100 it runs at 7 cycles/limb, and that can not be improved either, since
43 * only the xmpyu does not need the integer pipeline, so the only dual-issue 43 * only the xmpyu does not need the integer pipeline, so the only dual-issue
44 * we will get are addc+xmpyu. Unrolling would not help either CPU. 44 * we will get are addc+xmpyu. Unrolling would not help either CPU.
45 * 45 *
46 * We could use fldds to read two limbs at a time from the S1 array, and that 46 * We could use fldds to read two limbs at a time from the S1 array, and that
47 * could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and 47 * could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
48 * PA7100, respectively. We don't do that since it does not seem worth the 48 * PA7100, respectively. We don't do that since it does not seem worth the
49 * (alignment) troubles... 49 * (alignment) troubles...
50 * 50 *
51 * At least the PA7100 is rumored to be able to deal with cache-misses 51 * At least the PA7100 is rumored to be able to deal with cache-misses
52 * without stalling instruction issue. If this is true, and the cache is 52 * without stalling instruction issue. If this is true, and the cache is
53 * actually also lockup-free, we should use a deeper software pipeline, and 53 * actually also lockup-free, we should use a deeper software pipeline, and
54 * load from S1 very early! (The loads and stores to -12(sp) will surely be 54 * load from S1 very early! (The loads and stores to -12(sp) will surely be
55 * in the cache.) 55 * in the cache.)
56 */ 56 */
57 57
58 .code 58 .code
59 .export _gcry_mpih_mul_1 59 .export _gcry_mpih_mul_1
60 .label _gcry_mpih_mul_1 60 .label _gcry_mpih_mul_1
61 .proc 61 .proc
62 .callinfo frame=64,no_calls 62 .callinfo frame=64,no_calls
63 .entry 63 .entry
64 64
65 ldo 64(%r30),%r30 65 ldo 64(%r30),%r30
66 fldws,ma 4(%r25),%fr5 66 fldws,ma 4(%r25),%fr5
67 stw %r23,-16(%r30) ; move s2_limb ... 67 stw %r23,-16(%r30) ; move s2_limb ...
68 addib,= -1,%r24,L$just_one_limb 68 addib,= -1,%r24,L$just_one_limb
69 fldws -16(%r30),%fr4 ; ... into fr4 69 fldws -16(%r30),%fr4 ; ... into fr4
70 add %r0,%r0,%r0 ; clear carry 70 add %r0,%r0,%r0 ; clear carry
71 xmpyu %fr4,%fr5,%fr6 71 xmpyu %fr4,%fr5,%fr6
72 fldws,ma 4(%r25),%fr7 72 fldws,ma 4(%r25),%fr7
73 fstds %fr6,-16(%r30) 73 fstds %fr6,-16(%r30)
74 xmpyu %fr4,%fr7,%fr8 74 xmpyu %fr4,%fr7,%fr8
75 ldw -12(%r30),%r19 ; least significant limb in product 75 ldw -12(%r30),%r19 ; least significant limb in product
76 ldw -16(%r30),%r28 76 ldw -16(%r30),%r28
77 77
78 fstds %fr8,-16(%r30) 78 fstds %fr8,-16(%r30)
79 addib,= -1,%r24,L$end 79 addib,= -1,%r24,L$end
80 ldw -12(%r30),%r1 80 ldw -12(%r30),%r1
81 81
82; Main loop 82; Main loop
83 .label L$loop 83 .label L$loop
84 fldws,ma 4(%r25),%fr5 84 fldws,ma 4(%r25),%fr5
85 stws,ma %r19,4(%r26) 85 stws,ma %r19,4(%r26)
86 addc %r28,%r1,%r19 86 addc %r28,%r1,%r19
87 xmpyu %fr4,%fr5,%fr6 87 xmpyu %fr4,%fr5,%fr6
88 ldw -16(%r30),%r28 88 ldw -16(%r30),%r28
89 fstds %fr6,-16(%r30) 89 fstds %fr6,-16(%r30)
90 addib,<> -1,%r24,L$loop 90 addib,<> -1,%r24,L$loop
91 ldw -12(%r30),%r1 91 ldw -12(%r30),%r1
92 92
93 .label L$end 93 .label L$end
94 stws,ma %r19,4(%r26) 94 stws,ma %r19,4(%r26)
95 addc %r28,%r1,%r19 95 addc %r28,%r1,%r19
96 ldw -16(%r30),%r28 96 ldw -16(%r30),%r28
97 stws,ma %r19,4(%r26) 97 stws,ma %r19,4(%r26)
98 addc %r0,%r28,%r28 98 addc %r0,%r28,%r28
99 bv 0(%r2) 99 bv 0(%r2)
100 ldo -64(%r30),%r30 100 ldo -64(%r30),%r30
101 101
102 .label L$just_one_limb 102 .label L$just_one_limb
103 xmpyu %fr4,%fr5,%fr6 103 xmpyu %fr4,%fr5,%fr6
104 fstds %fr6,-16(%r30) 104 fstds %fr6,-16(%r30)
105 ldw -16(%r30),%r28 105 ldw -16(%r30),%r28
106 ldo -64(%r30),%r30 106 ldo -64(%r30),%r30
107 bv 0(%r2) 107 bv 0(%r2)
108 fstws %fr6R,0(%r26) 108 fstws %fr6R,0(%r26)
109 109
110 .exit 110 .exit
111 .procend 111 .procend
112 112
113 113
diff --git a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul2.S b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul2.S
index 0fdbbf2..84b9d4a 100755..100644
--- a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul2.S
+++ b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul2.S
@@ -1,115 +1,115 @@
1/* hppa1.1 addmul_1 -- Multiply a limb vector with a limb and add 1/* hppa1.1 addmul_1 -- Multiply a limb vector with a limb and add
2 * the result to a second limb vector. 2 * the result to a second limb vector.
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1998, 4 * Copyright (C) 1992, 1993, 1994, 1998,
5 * 2001, 2002 Free Software Foundation, Inc. 5 * 2001, 2002 Free Software Foundation, Inc.
6 * 6 *
7 * This file is part of Libgcrypt. 7 * This file is part of Libgcrypt.
8 * 8 *
9 * Libgcrypt is free software; you can redistribute it and/or modify 9 * Libgcrypt is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as 10 * it under the terms of the GNU Lesser General Public License as
11 * published by the Free Software Foundation; either version 2.1 of 11 * published by the Free Software Foundation; either version 2.1 of
12 * the License, or (at your option) any later version. 12 * the License, or (at your option) any later version.
13 * 13 *
14 * Libgcrypt is distributed in the hope that it will be useful, 14 * Libgcrypt is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details. 17 * GNU Lesser General Public License for more details.
18 * 18 *
19 * You should have received a copy of the GNU Lesser General Public 19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this program; if not, write to the Free Software 20 * License along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
22 * 22 *
23 * Note: This code is heavily based on the GNU MP Library. 23 * Note: This code is heavily based on the GNU MP Library.
24 * Actually it's the same code with only minor changes in the 24 * Actually it's the same code with only minor changes in the
25 * way the data is stored; this is to support the abstraction 25 * way the data is stored; this is to support the abstraction
26 * of an optional secure memory allocation which may be used 26 * of an optional secure memory allocation which may be used
27 * to avoid revealing of sensitive data due to paging etc. 27 * to avoid revealing of sensitive data due to paging etc.
28 */ 28 */
29 29
30 30
31/******************* 31/*******************
32 * mpi_limb_t 32 * mpi_limb_t
33 * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (r26) 33 * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (r26)
34 * mpi_ptr_t s1_ptr, (r25) 34 * mpi_ptr_t s1_ptr, (r25)
35 * mpi_size_t s1_size, (r24) 35 * mpi_size_t s1_size, (r24)
36 * mpi_limb_t s2_limb) (r23) 36 * mpi_limb_t s2_limb) (r23)
37 * 37 *
38 * This runs at 11 cycles/limb on a PA7000. With the used instructions, it 38 * This runs at 11 cycles/limb on a PA7000. With the used instructions, it
39 * can not become faster due to data cache contention after a store. On the 39 * can not become faster due to data cache contention after a store. On the
40 * PA7100 it runs at 10 cycles/limb, and that can not be improved either, 40 * PA7100 it runs at 10 cycles/limb, and that can not be improved either,
41 * since only the xmpyu does not need the integer pipeline, so the only 41 * since only the xmpyu does not need the integer pipeline, so the only
42 * dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb 42 * dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb
43 * on the PA7100. 43 * on the PA7100.
44 * 44 *
45 * There are some ideas described in mul1.S that applies to this code too. 45 * There are some ideas described in mul1.S that applies to this code too.
46 */ 46 */
47 47
48 .code 48 .code
49 .export _gcry_mpih_addmul_1 49 .export _gcry_mpih_addmul_1
50 .label _gcry_mpih_addmul_1 50 .label _gcry_mpih_addmul_1
51 .proc 51 .proc
52 .callinfo frame=64,no_calls 52 .callinfo frame=64,no_calls
53 .entry 53 .entry
54 54
55 ldo 64(%r30),%r30 55 ldo 64(%r30),%r30
56 fldws,ma 4(%r25),%fr5 56 fldws,ma 4(%r25),%fr5
57 stw %r23,-16(%r30) ; move s2_limb ... 57 stw %r23,-16(%r30) ; move s2_limb ...
58 addib,= -1,%r24,L$just_one_limb 58 addib,= -1,%r24,L$just_one_limb
59 fldws -16(%r30),%fr4 ; ... into fr4 59 fldws -16(%r30),%fr4 ; ... into fr4
60 add %r0,%r0,%r0 ; clear carry 60 add %r0,%r0,%r0 ; clear carry
61 xmpyu %fr4,%fr5,%fr6 61 xmpyu %fr4,%fr5,%fr6
62 fldws,ma 4(%r25),%fr7 62 fldws,ma 4(%r25),%fr7
63 fstds %fr6,-16(%r30) 63 fstds %fr6,-16(%r30)
64 xmpyu %fr4,%fr7,%fr8 64 xmpyu %fr4,%fr7,%fr8
65 ldw -12(%r30),%r19 ; least significant limb in product 65 ldw -12(%r30),%r19 ; least significant limb in product
66 ldw -16(%r30),%r28 66 ldw -16(%r30),%r28
67 67
68 fstds %fr8,-16(%r30) 68 fstds %fr8,-16(%r30)
69 addib,= -1,%r24,L$end 69 addib,= -1,%r24,L$end
70 ldw -12(%r30),%r1 70 ldw -12(%r30),%r1
71 71
72; Main loop 72; Main loop
73 .label L$loop 73 .label L$loop
74 ldws 0(%r26),%r29 74 ldws 0(%r26),%r29
75 fldws,ma 4(%r25),%fr5 75 fldws,ma 4(%r25),%fr5
76 add %r29,%r19,%r19 76 add %r29,%r19,%r19
77 stws,ma %r19,4(%r26) 77 stws,ma %r19,4(%r26)
78 addc %r28,%r1,%r19 78 addc %r28,%r1,%r19
79 xmpyu %fr4,%fr5,%fr6 79 xmpyu %fr4,%fr5,%fr6
80 ldw -16(%r30),%r28 80 ldw -16(%r30),%r28
81 fstds %fr6,-16(%r30) 81 fstds %fr6,-16(%r30)
82 addc %r0,%r28,%r28 82 addc %r0,%r28,%r28
83 addib,<> -1,%r24,L$loop 83 addib,<> -1,%r24,L$loop
84 ldw -12(%r30),%r1 84 ldw -12(%r30),%r1
85 85
86 .label L$end 86 .label L$end
87 ldw 0(%r26),%r29 87 ldw 0(%r26),%r29
88 add %r29,%r19,%r19 88 add %r29,%r19,%r19
89 stws,ma %r19,4(%r26) 89 stws,ma %r19,4(%r26)
90 addc %r28,%r1,%r19 90 addc %r28,%r1,%r19
91 ldw -16(%r30),%r28 91 ldw -16(%r30),%r28
92 ldws 0(%r26),%r29 92 ldws 0(%r26),%r29
93 addc %r0,%r28,%r28 93 addc %r0,%r28,%r28
94 add %r29,%r19,%r19 94 add %r29,%r19,%r19
95 stws,ma %r19,4(%r26) 95 stws,ma %r19,4(%r26)
96 addc %r0,%r28,%r28 96 addc %r0,%r28,%r28
97 bv 0(%r2) 97 bv 0(%r2)
98 ldo -64(%r30),%r30 98 ldo -64(%r30),%r30
99 99
100 .label L$just_one_limb 100 .label L$just_one_limb
101 xmpyu %fr4,%fr5,%fr6 101 xmpyu %fr4,%fr5,%fr6
102 ldw 0(%r26),%r29 102 ldw 0(%r26),%r29
103 fstds %fr6,-16(%r30) 103 fstds %fr6,-16(%r30)
104 ldw -12(%r30),%r1 104 ldw -12(%r30),%r1
105 ldw -16(%r30),%r28 105 ldw -16(%r30),%r28
106 add %r29,%r1,%r19 106 add %r29,%r1,%r19
107 stw %r19,0(%r26) 107 stw %r19,0(%r26)
108 addc %r0,%r28,%r28 108 addc %r0,%r28,%r28
109 bv 0(%r2) 109 bv 0(%r2)
110 ldo -64(%r30),%r30 110 ldo -64(%r30),%r30
111 111
112 .exit 112 .exit
113 .procend 113 .procend
114 114
115 115
diff --git a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul3.S b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul3.S
index 7daf7f4..c4246e4 100755..100644
--- a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul3.S
+++ b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/mpih-mul3.S
@@ -1,125 +1,125 @@
1/* hppa1.1 submul_1 -- Multiply a limb vector with a limb and add 1/* hppa1.1 submul_1 -- Multiply a limb vector with a limb and add
2 * the result to a second limb vector. 2 * the result to a second limb vector.
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1998, 4 * Copyright (C) 1992, 1993, 1994, 1998,
5 * 2001, 2002 Free Software Foundation, Inc. 5 * 2001, 2002 Free Software Foundation, Inc.
6 * 6 *
7 * This file is part of Libgcrypt. 7 * This file is part of Libgcrypt.
8 * 8 *
9 * Libgcrypt is free software; you can redistribute it and/or modify 9 * Libgcrypt is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as 10 * it under the terms of the GNU Lesser General Public License as
11 * published by the Free Software Foundation; either version 2.1 of 11 * published by the Free Software Foundation; either version 2.1 of
12 * the License, or (at your option) any later version. 12 * the License, or (at your option) any later version.
13 * 13 *
14 * Libgcrypt is distributed in the hope that it will be useful, 14 * Libgcrypt is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details. 17 * GNU Lesser General Public License for more details.
18 * 18 *
19 * You should have received a copy of the GNU Lesser General Public 19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this program; if not, write to the Free Software 20 * License along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
22 * 22 *
23 * Note: This code is heavily based on the GNU MP Library. 23 * Note: This code is heavily based on the GNU MP Library.
24 * Actually it's the same code with only minor changes in the 24 * Actually it's the same code with only minor changes in the
25 * way the data is stored; this is to support the abstraction 25 * way the data is stored; this is to support the abstraction
26 * of an optional secure memory allocation which may be used 26 * of an optional secure memory allocation which may be used
27 * to avoid revealing of sensitive data due to paging etc. 27 * to avoid revealing of sensitive data due to paging etc.
28 */ 28 */
29 29
30 30
31/******************* 31/*******************
32 * mpi_limb_t 32 * mpi_limb_t
33 * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (r26) 33 * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (r26)
34 * mpi_ptr_t s1_ptr, (r25) 34 * mpi_ptr_t s1_ptr, (r25)
35 * mpi_size_t s1_size, (r24) 35 * mpi_size_t s1_size, (r24)
36 * mpi_limb_t s2_limb) (r23) 36 * mpi_limb_t s2_limb) (r23)
37 * 37 *
38 * 38 *
39 * This runs at 12 cycles/limb on a PA7000. With the used instructions, it 39 * This runs at 12 cycles/limb on a PA7000. With the used instructions, it
40 * can not become faster due to data cache contention after a store. On the 40 * can not become faster due to data cache contention after a store. On the
41 * PA7100 it runs at 11 cycles/limb, and that can not be improved either, 41 * PA7100 it runs at 11 cycles/limb, and that can not be improved either,
42 * since only the xmpyu does not need the integer pipeline, so the only 42 * since only the xmpyu does not need the integer pipeline, so the only
43 * dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb 43 * dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb
44 * on the PA7100. 44 * on the PA7100.
45 * 45 *
46 * There are some ideas described in mul1.S that applies to this code too. 46 * There are some ideas described in mul1.S that applies to this code too.
47 * 47 *
48 * It seems possible to make this run as fast as addmul_1, if we use 48 * It seems possible to make this run as fast as addmul_1, if we use
49 * sub,>>= %r29,%r19,%r22 49 * sub,>>= %r29,%r19,%r22
50 * addi 1,%r28,%r28 50 * addi 1,%r28,%r28
51 * but that requires reworking the hairy software pipeline... 51 * but that requires reworking the hairy software pipeline...
52 */ 52 */
53 53
54 54
55 .code 55 .code
56 .export _gcry_mpih_submul_1 56 .export _gcry_mpih_submul_1
57 .label _gcry_mpih_submul_1 57 .label _gcry_mpih_submul_1
58 .proc 58 .proc
59 .callinfo frame=64,no_calls 59 .callinfo frame=64,no_calls
60 .entry 60 .entry
61 61
62 ldo 64(%r30),%r30 62 ldo 64(%r30),%r30
63 fldws,ma 4(%r25),%fr5 63 fldws,ma 4(%r25),%fr5
64 stw %r23,-16(%r30) ; move s2_limb ... 64 stw %r23,-16(%r30) ; move s2_limb ...
65 addib,= -1,%r24,L$just_one_limb 65 addib,= -1,%r24,L$just_one_limb
66 fldws -16(%r30),%fr4 ; ... into fr4 66 fldws -16(%r30),%fr4 ; ... into fr4
67 add %r0,%r0,%r0 ; clear carry 67 add %r0,%r0,%r0 ; clear carry
68 xmpyu %fr4,%fr5,%fr6 68 xmpyu %fr4,%fr5,%fr6
69 fldws,ma 4(%r25),%fr7 69 fldws,ma 4(%r25),%fr7
70 fstds %fr6,-16(%r30) 70 fstds %fr6,-16(%r30)
71 xmpyu %fr4,%fr7,%fr8 71 xmpyu %fr4,%fr7,%fr8
72 ldw -12(%r30),%r19 ; least significant limb in product 72 ldw -12(%r30),%r19 ; least significant limb in product
73 ldw -16(%r30),%r28 73 ldw -16(%r30),%r28
74 74
75 fstds %fr8,-16(%r30) 75 fstds %fr8,-16(%r30)
76 addib,= -1,%r24,L$end 76 addib,= -1,%r24,L$end
77 ldw -12(%r30),%r1 77 ldw -12(%r30),%r1
78 78
79; Main loop 79; Main loop
80 .label L$loop 80 .label L$loop
81 ldws 0(%r26),%r29 81 ldws 0(%r26),%r29
82 fldws,ma 4(%r25),%fr5 82 fldws,ma 4(%r25),%fr5
83 sub %r29,%r19,%r22 83 sub %r29,%r19,%r22
84 add %r22,%r19,%r0 84 add %r22,%r19,%r0
85 stws,ma %r22,4(%r26) 85 stws,ma %r22,4(%r26)
86 addc %r28,%r1,%r19 86 addc %r28,%r1,%r19
87 xmpyu %fr4,%fr5,%fr6 87 xmpyu %fr4,%fr5,%fr6
88 ldw -16(%r30),%r28 88 ldw -16(%r30),%r28
89 fstds %fr6,-16(%r30) 89 fstds %fr6,-16(%r30)
90 addc %r0,%r28,%r28 90 addc %r0,%r28,%r28
91 addib,<> -1,%r24,L$loop 91 addib,<> -1,%r24,L$loop
92 ldw -12(%r30),%r1 92 ldw -12(%r30),%r1
93 93
94 .label L$end 94 .label L$end
95 ldw 0(%r26),%r29 95 ldw 0(%r26),%r29
96 sub %r29,%r19,%r22 96 sub %r29,%r19,%r22
97 add %r22,%r19,%r0 97 add %r22,%r19,%r0
98 stws,ma %r22,4(%r26) 98 stws,ma %r22,4(%r26)
99 addc %r28,%r1,%r19 99 addc %r28,%r1,%r19
100 ldw -16(%r30),%r28 100 ldw -16(%r30),%r28
101 ldws 0(%r26),%r29 101 ldws 0(%r26),%r29
102 addc %r0,%r28,%r28 102 addc %r0,%r28,%r28
103 sub %r29,%r19,%r22 103 sub %r29,%r19,%r22
104 add %r22,%r19,%r0 104 add %r22,%r19,%r0
105 stws,ma %r22,4(%r26) 105 stws,ma %r22,4(%r26)
106 addc %r0,%r28,%r28 106 addc %r0,%r28,%r28
107 bv 0(%r2) 107 bv 0(%r2)
108 ldo -64(%r30),%r30 108 ldo -64(%r30),%r30
109 109
110 .label L$just_one_limb 110 .label L$just_one_limb
111 xmpyu %fr4,%fr5,%fr6 111 xmpyu %fr4,%fr5,%fr6
112 ldw 0(%r26),%r29 112 ldw 0(%r26),%r29
113 fstds %fr6,-16(%r30) 113 fstds %fr6,-16(%r30)
114 ldw -12(%r30),%r1 114 ldw -12(%r30),%r1
115 ldw -16(%r30),%r28 115 ldw -16(%r30),%r28
116 sub %r29,%r1,%r22 116 sub %r29,%r1,%r22
117 add %r22,%r1,%r0 117 add %r22,%r1,%r0
118 stw %r22,0(%r26) 118 stw %r22,0(%r26)
119 addc %r0,%r28,%r28 119 addc %r0,%r28,%r28
120 bv 0(%r2) 120 bv 0(%r2)
121 ldo -64(%r30),%r30 121 ldo -64(%r30),%r30
122 122
123 .exit 123 .exit
124 .procend 124 .procend
125 125
diff --git a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/udiv-qrnnd.S b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/udiv-qrnnd.S
index cdf987e..020c31e 100755..100644
--- a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/udiv-qrnnd.S
+++ b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa1.1/udiv-qrnnd.S
@@ -1,90 +1,90 @@
1/* HP-PA __udiv_qrnnd division support, used from longlong.h. 1/* HP-PA __udiv_qrnnd division support, used from longlong.h.
2 * This version runs fast on PA 7000 and later. 2 * This version runs fast on PA 7000 and later.
3 * 3 *
4 * Copyright (C) 1993, 1994, 1998, 4 * Copyright (C) 1993, 1994, 1998,
5 * 2001, 2002, 2004 Free Software Foundation, Inc. 5 * 2001, 2002, 2004 Free Software Foundation, Inc.
6 * 6 *
7 * This file is part of Libgcrypt. 7 * This file is part of Libgcrypt.
8 * 8 *
9 * Libgcrypt is free software; you can redistribute it and/or modify 9 * Libgcrypt is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as 10 * it under the terms of the GNU Lesser General Public License as
11 * published by the Free Software Foundation; either version 2.1 of 11 * published by the Free Software Foundation; either version 2.1 of
12 * the License, or (at your option) any later version. 12 * the License, or (at your option) any later version.
13 * 13 *
14 * Libgcrypt is distributed in the hope that it will be useful, 14 * Libgcrypt is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details. 17 * GNU Lesser General Public License for more details.
18 * 18 *
19 * You should have received a copy of the GNU Lesser General Public 19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this program; if not, write to the Free Software 20 * License along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
22 * 22 *
23 * Note: This code is heavily based on the GNU MP Library. 23 * Note: This code is heavily based on the GNU MP Library.
24 * Actually it's the same code with only minor changes in the 24 * Actually it's the same code with only minor changes in the
25 * way the data is stored; this is to support the abstraction 25 * way the data is stored; this is to support the abstraction
26 * of an optional secure memory allocation which may be used 26 * of an optional secure memory allocation which may be used
27 * to avoid revealing of sensitive data due to paging etc. 27 * to avoid revealing of sensitive data due to paging etc.
28 */ 28 */
29 29
30 30
31 31
32/* INPUT PARAMETERS 32/* INPUT PARAMETERS
33 * rem_ptr gr26 33 * rem_ptr gr26
34 * n1 gr25 34 * n1 gr25
35 * n0 gr24 35 * n0 gr24
36 * d gr23 36 * d gr23
37 */ 37 */
38 38
39 .data 39 .data
40 .align 8 40 .align 8
41 .label L$0000 41 .label L$0000
42 .word 0x43f00000 42 .word 0x43f00000
43 .word 0x0 43 .word 0x0
44 .code 44 .code
45 .export __udiv_qrnnd 45 .export __udiv_qrnnd
46 .label __udiv_qrnnd 46 .label __udiv_qrnnd
47 .proc 47 .proc
48 .callinfo frame=64,no_calls 48 .callinfo frame=64,no_calls
49 .entry 49 .entry
50 ldo 64(%r30),%r30 50 ldo 64(%r30),%r30
51 51
52 stws %r25,-16(0,%r30) ; n_hi 52 stws %r25,-16(0,%r30) ; n_hi
53 stws %r24,-12(0,%r30) ; n_lo 53 stws %r24,-12(0,%r30) ; n_lo
54 stw %r19,-32(%r30) 54 stw %r19,-32(%r30)
55 addil LT%L$0000,%r19 55 addil LT%L$0000,%r19
56 ldw RT%L$0000(%r1),%r1 56 ldw RT%L$0000(%r1),%r1
57 fldds -16(0,%r30),%fr5 57 fldds -16(0,%r30),%fr5
58 stws %r23,-12(0,%r30) 58 stws %r23,-12(0,%r30)
59 comib,<= 0,%r25,L$1 59 comib,<= 0,%r25,L$1
60 fcnvxf,dbl,dbl %fr5,%fr5 60 fcnvxf,dbl,dbl %fr5,%fr5
61 fldds 0(0,%r1),%fr4 61 fldds 0(0,%r1),%fr4
62 fadd,dbl %fr4,%fr5,%fr5 62 fadd,dbl %fr4,%fr5,%fr5
63 .label L$1 63 .label L$1
64 fcpy,sgl %fr0,%fr6L 64 fcpy,sgl %fr0,%fr6L
65 fldws -12(0,%r30),%fr6R 65 fldws -12(0,%r30),%fr6R
66 fcnvxf,dbl,dbl %fr6,%fr4 66 fcnvxf,dbl,dbl %fr6,%fr4
67 67
68 fdiv,dbl %fr5,%fr4,%fr5 68 fdiv,dbl %fr5,%fr4,%fr5
69 69
70 fcnvfx,dbl,dbl %fr5,%fr4 70 fcnvfx,dbl,dbl %fr5,%fr4
71 fstws %fr4R,-16(%r30) 71 fstws %fr4R,-16(%r30)
72 xmpyu %fr4R,%fr6R,%fr6 72 xmpyu %fr4R,%fr6R,%fr6
73 ldws -16(%r30),%r28 73 ldws -16(%r30),%r28
74 fstds %fr6,-16(0,%r30) 74 fstds %fr6,-16(0,%r30)
75 ldws -12(0,%r30),%r21 75 ldws -12(0,%r30),%r21
76 ldws -16(0,%r30),%r20 76 ldws -16(0,%r30),%r20
77 sub %r24,%r21,%r22 77 sub %r24,%r21,%r22
78 subb %r25,%r20,%r1 78 subb %r25,%r20,%r1
79 comib,= 0,%r1,L$2 79 comib,= 0,%r1,L$2
80 ldo -64(%r30),%r30 80 ldo -64(%r30),%r30
81 81
82 add %r22,%r23,%r22 82 add %r22,%r23,%r22
83 ldo -1(%r28),%r28 83 ldo -1(%r28),%r28
84 .label L$2 84 .label L$2
85 bv 0(%r2) 85 bv 0(%r2)
86 stws %r22,0(0,%r26) 86 stws %r22,0(0,%r26)
87 87
88 .exit 88 .exit
89 .procend 89 .procend
90 90