diff options
Diffstat (limited to 'linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README')
-rw-r--r--[-rwxr-xr-x] | linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README | 168 |
1 files changed, 84 insertions, 84 deletions
diff --git a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README index ae42a48..5a2d5fd 100755..100644 --- a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README +++ b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README | |||
@@ -1,84 +1,84 @@ | |||
1 | This directory contains mpn functions for various HP PA-RISC chips. Code | 1 | This directory contains mpn functions for various HP PA-RISC chips. Code |
2 | that runs faster on the PA7100 and later implementations, is in the pa7100 | 2 | that runs faster on the PA7100 and later implementations, is in the pa7100 |
3 | directory. | 3 | directory. |
4 | 4 | ||
5 | RELEVANT OPTIMIZATION ISSUES | 5 | RELEVANT OPTIMIZATION ISSUES |
6 | 6 | ||
7 | Load and Store timing | 7 | Load and Store timing |
8 | 8 | ||
9 | On the PA7000 no memory instructions can issue the two cycles after a store. | 9 | On the PA7000 no memory instructions can issue the two cycles after a store. |
10 | For the PA7100, this is reduced to one cycle. | 10 | For the PA7100, this is reduced to one cycle. |
11 | 11 | ||
12 | The PA7100 has a lookup-free cache, so it helps to schedule loads and the | 12 | The PA7100 has a lookup-free cache, so it helps to schedule loads and the |
13 | dependent instruction really far from each other. | 13 | dependent instruction really far from each other. |
14 | 14 | ||
15 | STATUS | 15 | STATUS |
16 | 16 | ||
17 | 1. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the | 17 | 1. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the |
18 | instructions bwlow (but some sw pipelining is needed to avoid the | 18 | instructions bwlow (but some sw pipelining is needed to avoid the |
19 | xmpyu-fstds delay): | 19 | xmpyu-fstds delay): |
20 | 20 | ||
21 | fldds s1_ptr | 21 | fldds s1_ptr |
22 | 22 | ||
23 | xmpyu | 23 | xmpyu |
24 | fstds N(%r30) | 24 | fstds N(%r30) |
25 | xmpyu | 25 | xmpyu |
26 | fstds N(%r30) | 26 | fstds N(%r30) |
27 | 27 | ||
28 | ldws N(%r30) | 28 | ldws N(%r30) |
29 | ldws N(%r30) | 29 | ldws N(%r30) |
30 | ldws N(%r30) | 30 | ldws N(%r30) |
31 | ldws N(%r30) | 31 | ldws N(%r30) |
32 | 32 | ||
33 | addc | 33 | addc |
34 | stws res_ptr | 34 | stws res_ptr |
35 | addc | 35 | addc |
36 | stws res_ptr | 36 | stws res_ptr |
37 | 37 | ||
38 | addib Loop | 38 | addib Loop |
39 | 39 | ||
40 | 2. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb | 40 | 2. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb |
41 | (asymptotically) on the PA7100, using the instructions below. With proper | 41 | (asymptotically) on the PA7100, using the instructions below. With proper |
42 | sw pipelining and the unrolling level below, the speed becomes 8 | 42 | sw pipelining and the unrolling level below, the speed becomes 8 |
43 | cycles/limb. | 43 | cycles/limb. |
44 | 44 | ||
45 | fldds s1_ptr | 45 | fldds s1_ptr |
46 | fldds s1_ptr | 46 | fldds s1_ptr |
47 | 47 | ||
48 | xmpyu | 48 | xmpyu |
49 | fstds N(%r30) | 49 | fstds N(%r30) |
50 | xmpyu | 50 | xmpyu |
51 | fstds N(%r30) | 51 | fstds N(%r30) |
52 | xmpyu | 52 | xmpyu |
53 | fstds N(%r30) | 53 | fstds N(%r30) |
54 | xmpyu | 54 | xmpyu |
55 | fstds N(%r30) | 55 | fstds N(%r30) |
56 | 56 | ||
57 | ldws N(%r30) | 57 | ldws N(%r30) |
58 | ldws N(%r30) | 58 | ldws N(%r30) |
59 | ldws N(%r30) | 59 | ldws N(%r30) |
60 | ldws N(%r30) | 60 | ldws N(%r30) |
61 | ldws N(%r30) | 61 | ldws N(%r30) |
62 | ldws N(%r30) | 62 | ldws N(%r30) |
63 | ldws N(%r30) | 63 | ldws N(%r30) |
64 | ldws N(%r30) | 64 | ldws N(%r30) |
65 | addc | 65 | addc |
66 | addc | 66 | addc |
67 | addc | 67 | addc |
68 | addc | 68 | addc |
69 | addc %r0,%r0,cy-limb | 69 | addc %r0,%r0,cy-limb |
70 | 70 | ||
71 | ldws res_ptr | 71 | ldws res_ptr |
72 | ldws res_ptr | 72 | ldws res_ptr |
73 | ldws res_ptr | 73 | ldws res_ptr |
74 | ldws res_ptr | 74 | ldws res_ptr |
75 | add | 75 | add |
76 | stws res_ptr | 76 | stws res_ptr |
77 | addc | 77 | addc |
78 | stws res_ptr | 78 | stws res_ptr |
79 | addc | 79 | addc |
80 | stws res_ptr | 80 | stws res_ptr |
81 | addc | 81 | addc |
82 | stws res_ptr | 82 | stws res_ptr |
83 | 83 | ||
84 | addib | 84 | addib |