aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README
diff options
context:
space:
mode:
Diffstat (limited to 'linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README')
-rw-r--r--[-rwxr-xr-x]linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README168
1 files changed, 84 insertions, 84 deletions
diff --git a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README
index ae42a48..5a2d5fd 100755..100644
--- a/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README
+++ b/linden/indra/libgcrypt/libgcrypt-1.2.2/mpi/hppa/README
@@ -1,84 +1,84 @@
1This directory contains mpn functions for various HP PA-RISC chips. Code 1This directory contains mpn functions for various HP PA-RISC chips. Code
2that runs faster on the PA7100 and later implementations, is in the pa7100 2that runs faster on the PA7100 and later implementations, is in the pa7100
3directory. 3directory.
4 4
5RELEVANT OPTIMIZATION ISSUES 5RELEVANT OPTIMIZATION ISSUES
6 6
7 Load and Store timing 7 Load and Store timing
8 8
9On the PA7000 no memory instructions can issue the two cycles after a store. 9On the PA7000 no memory instructions can issue the two cycles after a store.
10For the PA7100, this is reduced to one cycle. 10For the PA7100, this is reduced to one cycle.
11 11
12The PA7100 has a lookup-free cache, so it helps to schedule loads and the 12The PA7100 has a lookup-free cache, so it helps to schedule loads and the
13dependent instruction really far from each other. 13dependent instruction really far from each other.
14 14
15STATUS 15STATUS
16 16
171. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the 171. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the
18 instructions bwlow (but some sw pipelining is needed to avoid the 18 instructions bwlow (but some sw pipelining is needed to avoid the
19 xmpyu-fstds delay): 19 xmpyu-fstds delay):
20 20
21 fldds s1_ptr 21 fldds s1_ptr
22 22
23 xmpyu 23 xmpyu
24 fstds N(%r30) 24 fstds N(%r30)
25 xmpyu 25 xmpyu
26 fstds N(%r30) 26 fstds N(%r30)
27 27
28 ldws N(%r30) 28 ldws N(%r30)
29 ldws N(%r30) 29 ldws N(%r30)
30 ldws N(%r30) 30 ldws N(%r30)
31 ldws N(%r30) 31 ldws N(%r30)
32 32
33 addc 33 addc
34 stws res_ptr 34 stws res_ptr
35 addc 35 addc
36 stws res_ptr 36 stws res_ptr
37 37
38 addib Loop 38 addib Loop
39 39
402. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb 402. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb
41 (asymptotically) on the PA7100, using the instructions below. With proper 41 (asymptotically) on the PA7100, using the instructions below. With proper
42 sw pipelining and the unrolling level below, the speed becomes 8 42 sw pipelining and the unrolling level below, the speed becomes 8
43 cycles/limb. 43 cycles/limb.
44 44
45 fldds s1_ptr 45 fldds s1_ptr
46 fldds s1_ptr 46 fldds s1_ptr
47 47
48 xmpyu 48 xmpyu
49 fstds N(%r30) 49 fstds N(%r30)
50 xmpyu 50 xmpyu
51 fstds N(%r30) 51 fstds N(%r30)
52 xmpyu 52 xmpyu
53 fstds N(%r30) 53 fstds N(%r30)
54 xmpyu 54 xmpyu
55 fstds N(%r30) 55 fstds N(%r30)
56 56
57 ldws N(%r30) 57 ldws N(%r30)
58 ldws N(%r30) 58 ldws N(%r30)
59 ldws N(%r30) 59 ldws N(%r30)
60 ldws N(%r30) 60 ldws N(%r30)
61 ldws N(%r30) 61 ldws N(%r30)
62 ldws N(%r30) 62 ldws N(%r30)
63 ldws N(%r30) 63 ldws N(%r30)
64 ldws N(%r30) 64 ldws N(%r30)
65 addc 65 addc
66 addc 66 addc
67 addc 67 addc
68 addc 68 addc
69 addc %r0,%r0,cy-limb 69 addc %r0,%r0,cy-limb
70 70
71 ldws res_ptr 71 ldws res_ptr
72 ldws res_ptr 72 ldws res_ptr
73 ldws res_ptr 73 ldws res_ptr
74 ldws res_ptr 74 ldws res_ptr
75 add 75 add
76 stws res_ptr 76 stws res_ptr
77 addc 77 addc
78 stws res_ptr 78 stws res_ptr
79 addc 79 addc
80 stws res_ptr 80 stws res_ptr
81 addc 81 addc
82 stws res_ptr 82 stws res_ptr
83 83
84 addib 84 addib