# salsa20_word_sparc version 20050327 # D. J. Bernstein # Public domain. register int64 y4 register int64 y8 register int64 y12 register int64 y0 register int64 u1 register int64 u2 register int64 u3 register int64 u0 register int64 y9 register int64 y13 register int64 y1 register int64 y5 register int64 u6 register int64 u7 register int64 u4 register int64 u5 register int64 y14 register int64 y2 register int64 y6 register int64 y10 register int64 u11 register int64 u8 register int64 u9 register int64 u10 register int64 y3 register int64 y7 register int64 y11 register int64 y15 register int64 u12 register int64 u13 register int64 u14 register int64 u15 register int64 x0 register int64 x1 register int64 x2 register int64 x3 register int64 x4 register int64 x5 register int64 x6 register int64 x7 register int64 x8 register int64 x9 register int64 x10 register int64 x11 register int64 x12 register int64 x13 register int64 x14 register int64 x15 register int64 i register int64 z4 register int64 z14 register int64 z9 register int64 z3 register int64 z0 register int64 z1 register int64 z2 register int64 z5 register int64 z6 register int64 z7 register int64 z8 register int64 z10 register int64 z11 register int64 z12 register int64 z13 register int64 z15 register int64 q0 register int64 q1 register int64 q2 register int64 q3 register int64 q4 register int64 q5 register int64 q6 register int64 q7 register int64 q8 register int64 q9 register int64 q10 register int64 q11 register int64 q12 register int64 q13 register int64 q14 register int64 q15 register int64 in register int64 out enter salsa20_word_sparc input out input in x0 = *(uint32 *) (in + 0) x1 = *(uint32 *) (in + 4) x2 = *(uint32 *) (in + 8) x3 = *(uint32 *) (in + 12) x4 = *(uint32 *) (in + 16) x5 = *(uint32 *) (in + 20) x6 = *(uint32 *) (in + 24) x7 = *(uint32 *) (in + 28) x8 = *(uint32 *) (in + 32) x9 = *(uint32 *) (in + 36) x10 = *(uint32 *) (in + 40) x11 = *(uint32 *) (in + 44) x12 = *(uint32 *) (in + 48) x13 = *(uint32 *) (in + 52) x14 = *(uint32 *) (in + 56) x15 = *(uint32 *) (in + 60) store in store out i = 20 mainloop: y4 = x0 + x12 y9 = x5 + x1 z4 = (uint32) y4 << 7 y14 = x10 + x6 y4 = (uint32) y4 >> 25 y3 = x15 + x11 z9 = (uint32) y9 << 7 y4 |= z4 y9 = (uint32) y9 >> 25 x4 ^= y4 z14 = (uint32) y14 << 7 y9 |= z9 y14 = (uint32) y14 >> 25 y8 = x4 + x0 z3 = (uint32) y3 << 7 x9 ^= y9 y3 = (uint32) y3 >> 25 y14 |= z14 z8 = (uint32) y8 << 9 y13 = x9 + x5 y8 = (uint32) y8 >> 23 x14 ^= y14 y3 |= z3 y2 = x14 + x10 z13 = (uint32) y13 << 9 x3 ^= y3 y13 = (uint32) y13 >> 23 y8 |= z8 z2 = (uint32) y2 << 9 y7 = x3 + x15 y2 = (uint32) y2 >> 23 x8 ^= y8 y13 |= z13 y12 = x8 + x4 z7 = (uint32) y7 << 9 x13 ^= y13 y7 = (uint32) y7 >> 23 y2 |= z2 z12 = (uint32) y12 << 13 y1 = x13 + x9 y12 = (uint32) y12 >> 19 x2 ^= y2 y7 |= z7 y6 = x2 + x14 z1 = (uint32) y1 << 13 x7 ^= y7 y1 = (uint32) y1 >> 19 y12 |= z12 z6 = (uint32) y6 << 13 y11 = x7 + x3 y6 = (uint32) y6 >> 19 x12 ^= y12 y1 |= z1 y0 = x12 + x8 z11 = (uint32) y11 << 13 x1 ^= y1 y11 = (uint32) y11 >> 19 y6 |= z6 z0 = (uint32) y0 << 18 y5 = x1 + x13 y0 = (uint32) y0 >> 14 x6 ^= y6 y11 |= z11 y10 = x6 + x2 z5 = (uint32) y5 << 18 x11 ^= y11 y5 = (uint32) y5 >> 14 y0 |= z0 z10 = (uint32) y10 << 18 y15 = x11 + x7 y10 = (uint32) y10 >> 14 x0 ^= y0 y5 |= z5 u1 = x0 + x3 z15 = (uint32) y15 << 18 x5 ^= y5 y15 = (uint32) y15 >> 14 y10 |= z10 z1 = (uint32) u1 << 7 u6 = x5 + x4 u1 = (uint32) u1 >> 25 x10 ^= y10 y15 |= z15 u11 = x10 + x9 z6 = (uint32) u6 << 7 x15 ^= y15 u6 = (uint32) u6 >> 25 u1 |= z1 z11 = (uint32) u11 << 7 u12 = x15 + x14 u11 = (uint32) u11 >> 25 u6 |= z6 x1 ^= u1 z12 = (uint32) u12 << 7 x6 ^= u6 u12 = (uint32) u12 >> 25 u11 |= z11 u2 = x1 + x0 u7 = x6 + x5 z2 = (uint32) u2 << 9 x11 ^= u11 u2 = (uint32) u2 >> 23 u12 |= z12 z7 = (uint32) u7 << 9 u8 = x11 + x10 u7 = (uint32) u7 >> 23 x12 ^= u12 z8 = (uint32) u8 << 9 u2 |= z2 u8 = (uint32) u8 >> 23 u13 = x12 + x15 u7 |= z7 x2 ^= u2 z13 = (uint32) u13 << 9 x7 ^= u7 u13 = (uint32) u13 >> 23 u8 |= z8 u3 = x2 + x1 u4 = x7 + x6 z3 = (uint32) u3 << 13 x8 ^= u8 z4 = (uint32) u4 << 13 u13 |= z13 u3 = (uint32) u3 >> 19 u9 = x8 + x11 u4 = (uint32) u4 >> 19 x13 ^= u13 z9 = (uint32) u9 << 13 u3 |= z3 u9 = (uint32) u9 >> 19 u14 = x13 + x12 u4 |= z4 x3 ^= u3 z14 = (uint32) u14 << 13 x4 ^= u4 u14 = (uint32) u14 >> 19 u9 |= z9 u0 = x3 + x2 u5 = x4 + x7 z0 = (uint32) u0 << 18 x9 ^= u9 z5 = (uint32) u5 << 18 u14 |= z14 u0 = (uint32) u0 >> 14 u10 = x9 + x8 u5 = (uint32) u5 >> 14 x14 ^= u14 z10 = (uint32) u10 << 18 u0 |= z0 u10 = (uint32) u10 >> 14 u15 = x14 + x13 u5 |= z5 x0 ^= u0 z15 = (uint32) u15 << 18 x5 ^= u5 u15 = (uint32) u15 >> 14 flags i -= 2 u10 |= z10 u15 |= z15 x10 ^= u10 x15 ^= u15 goto mainloop if uint64 > load in load out q0 = *(uint32 *) (in + 0) q1 = *(uint32 *) (in + 4) q2 = *(uint32 *) (in + 8) q3 = *(uint32 *) (in + 12) x0 += q0 q4 = *(uint32 *) (in + 16) x1 += q1 q5 = *(uint32 *) (in + 20) x2 += q2 q6 = *(uint32 *) (in + 24) x3 += q3 q7 = *(uint32 *) (in + 28) x4 += q4 q8 = *(uint32 *) (in + 32) x5 += q5 q9 = *(uint32 *) (in + 36) x6 += q6 q10 = *(uint32 *) (in + 40) x7 += q7 q11 = *(uint32 *) (in + 44) x8 += q8 q12 = *(uint32 *) (in + 48) x9 += q9 q13 = *(uint32 *) (in + 52) x10 += q10 q14 = *(uint32 *) (in + 56) x11 += q11 q15 = *(uint32 *) (in + 60) x12 += q12 *(uint32 *) (out + 0) = x0 x13 += q13 *(uint32 *) (out + 4) = x1 x14 += q14 *(uint32 *) (out + 8) = x2 x15 += q15 *(uint32 *) (out + 12) = x3 *(uint32 *) (out + 16) = x4 *(uint32 *) (out + 20) = x5 *(uint32 *) (out + 24) = x6 *(uint32 *) (out + 28) = x7 *(uint32 *) (out + 32) = x8 *(uint32 *) (out + 36) = x9 *(uint32 *) (out + 40) = x10 *(uint32 *) (out + 44) = x11 *(uint32 *) (out + 48) = x12 *(uint32 *) (out + 52) = x13 *(uint32 *) (out + 56) = x14 *(uint32 *) (out + 60) = x15 leave