# salsa20_word_pii version 20050327 # D. J. Bernstein # Public domain. register int32 out register int32 out15 register int32 out14 register int32 out13 register int32 out12 register int32 in register int32 out11 register int32 out10 register int32 out9 register int32 out8 register int32 out7 register int32 out6 register int32 out5 register int32 out4 register int32 out3 register int32 out2 register int32 out1 register int32 out0 register int32 i register int32 a register int32 b register int32 c register int32 r register int32 s register int32 t register int32 d register int32 u register int32 e register int32 v register int32 in0 register int32 in1 register int32 in2 register int32 in3 register int32 in4 register int32 in5 register int32 in6 register int32 in7 register int32 in8 register int32 in9 register int32 in10 register int32 in11 register int32 in12 register int32 in13 register int32 in14 register int32 in15 temporary mem32 x0 register mmx x1 register mmx x2 temporary mem32 x3 temporary mem32 x4 temporary mem32 x5 register mmx x6 register mmx x7 temporary mem32 x8 temporary mem32 x9 temporary mem32 x10 register mmx x11 register mmx x12 temporary mem32 x13 temporary mem32 x14 temporary mem32 x15 enter salsa20_word_pii input out input in load in store callerint ebx store callerint esi store callerint edi store callerint ebp in12 = *(uint32 *) (in + 48) in0 = *(uint32 *) (in + 0) in4 = *(uint32 *) (in + 16) in8 = *(uint32 *) (in + 32) x12 = in12 in1 = *(uint32 *) (in + 4) x0 = in0 in5 = *(uint32 *) (in + 20) x4 = in4 in9 = *(uint32 *) (in + 36) x8 = in8 in13 = *(uint32 *) (in + 52) x1 = in1 in6 = *(uint32 *) (in + 24) x5 = in5 in10 = *(uint32 *) (in + 40) x9 = in9 in14 = *(uint32 *) (in + 56) x13 = in13 in2 = *(uint32 *) (in + 8) x6 = in6 in11 = *(uint32 *) (in + 44) x10 = in10 in15 = *(uint32 *) (in + 60) x14 = in14 in3 = *(uint32 *) (in + 12) x2 = in2 in7 = *(uint32 *) (in + 28) x11 = in11 x15 = in15 x3 = in3 x7 = in7 i = 20 mainloop: a = x12 b = x0 c = x4 r = x1 e = a + b inplace e <<<= 7 inplace c ^= e x4 = c d = x8 e = b + c inplace e <<<= 9 inplace d ^= e s = x5 x8 = d inplace c += d inplace c <<<= 13 inplace a ^= c t = x9 x12 = a inplace a += d inplace a <<<= 18 v = r + s inplace v <<<= 7 inplace t ^= v x9 = t u = x13 inplace b ^= a v = s + t inplace v <<<= 9 inplace u ^= v x13 = u a = x6 x0 = b b = x10 inplace t += u inplace t <<<= 13 inplace r ^= t c = x14 x1 = r inplace r += u inplace r <<<= 18 e = a + b inplace e <<<= 7 inplace c ^= e x14 = c d = x2 inplace s ^= r e = b + c inplace e <<<= 9 inplace d ^= e x2 = d r = x11 x5 = s s = x15 inplace c += d inplace c <<<= 13 inplace a ^= c t = x3 x6 = a inplace a += d inplace a <<<= 18 v = r + s inplace v <<<= 7 inplace t ^= v u = x7 inplace b ^= a v = s + t inplace v <<<= 9 inplace u ^= v x7 = u a = t x10 = b b = x0 inplace t += u inplace t <<<= 13 inplace r ^= t c = x1 x11 = r inplace r += u inplace r <<<= 18 e = a + b inplace e <<<= 7 inplace c ^= e x1 = c d = x2 inplace s ^= r e = b + c inplace e <<<= 9 inplace d ^= e x2 = d r = x4 x15 = s s = x5 inplace c += d inplace c <<<= 13 inplace a ^= c t = x6 x3 = a inplace a += d inplace a <<<= 18 v = r + s inplace v <<<= 7 inplace t ^= v x6 = t u = x7 inplace b ^= a v = s + t inplace v <<<= 9 inplace u ^= v x7 = u a = x9 x0 = b b = x10 inplace t += u inplace t <<<= 13 inplace r ^= t c = x11 x4 = r inplace r += u inplace r <<<= 18 e = a + b inplace e <<<= 7 inplace c ^= e x11 = c d = x8 inplace s ^= r e = b + c inplace e <<<= 9 inplace d ^= e x8 = d r = x14 x5 = s s = x15 inplace c += d inplace c <<<= 13 inplace a ^= c t = x12 x9 = a inplace a += d inplace a <<<= 18 v = r + s inplace v <<<= 7 inplace t ^= v u = x13 inplace b ^= a v = s + t inplace v <<<= 9 inplace u ^= v x13 = u a = t x10 = b b = x0 inplace t += u inplace t <<<= 13 inplace r ^= t c = x4 x14 = r inplace r += u inplace r <<<= 18 e = a + b inplace e <<<= 7 inplace c ^= e x4 = c inplace s ^= r d = x8 e = b + c inplace e <<<= 9 inplace d ^= e r = x1 x15 = s s = x5 x8 = d inplace c += d inplace c <<<= 13 inplace a ^= c t = x9 x12 = a inplace a += d inplace a <<<= 18 v = r + s inplace v <<<= 7 inplace t ^= v x9 = t u = x13 inplace b ^= a v = s + t inplace v <<<= 9 inplace u ^= v x13 = u a = x6 x0 = b b = x10 inplace t += u inplace t <<<= 13 inplace r ^= t c = x14 x1 = r inplace r += u inplace r <<<= 18 e = a + b inplace e <<<= 7 inplace c ^= e x14 = c d = x2 inplace s ^= r e = b + c inplace e <<<= 9 inplace d ^= e x2 = d r = x11 x5 = s s = x15 inplace c += d inplace c <<<= 13 inplace a ^= c t = x3 x6 = a inplace a += d inplace a <<<= 18 v = r + s inplace v <<<= 7 inplace t ^= v u = x7 inplace b ^= a v = s + t inplace v <<<= 9 inplace u ^= v x7 = u a = t x10 = b b = x0 inplace t += u inplace t <<<= 13 inplace r ^= t c = x1 x11 = r inplace r += u inplace r <<<= 18 e = a + b inplace e <<<= 7 inplace c ^= e x1 = c d = x2 inplace s ^= r e = b + c inplace e <<<= 9 inplace d ^= e x2 = d r = x4 x15 = s s = x5 inplace c += d inplace c <<<= 13 inplace a ^= c t = x6 x3 = a inplace a += d inplace a <<<= 18 v = r + s inplace v <<<= 7 inplace t ^= v x6 = t u = x7 inplace b ^= a v = s + t inplace v <<<= 9 inplace u ^= v x7 = u a = x9 x0 = b b = x10 inplace t += u inplace t <<<= 13 inplace r ^= t c = x11 x4 = r inplace r += u inplace r <<<= 18 e = a + b inplace e <<<= 7 inplace c ^= e x11 = c d = x8 inplace s ^= r e = b + c inplace e <<<= 9 inplace d ^= e x8 = d r = x14 x5 = s s = x15 inplace c += d inplace c <<<= 13 inplace a ^= c t = x12 x9 = a inplace a += d inplace a <<<= 18 v = r + s inplace v <<<= 7 inplace t ^= v x12 = t u = x13 inplace b ^= a v = s + t inplace v <<<= 9 inplace u ^= v x13 = u inplace t += u x10 = b inplace t <<<= 13 inplace r ^= t x14 = r inplace r += u inplace r <<<= 18 inplace s ^= r x15 = s inplace i -= 4 goto mainloop if unsigned > load in load out out0 = x0 out1 = x1 out2 = x2 out3 = x3 out0 += *(uint32 *) (in + 0) out1 += *(uint32 *) (in + 4) out2 += *(uint32 *) (in + 8) out3 += *(uint32 *) (in + 12) *(uint32 *) (out + 0) = out0 out4 = x4 *(uint32 *) (out + 4) = out1 out5 = x5 *(uint32 *) (out + 8) = out2 out6 = x6 *(uint32 *) (out + 12) = out3 out7 = x7 out4 += *(uint32 *) (in + 16) out5 += *(uint32 *) (in + 20) out6 += *(uint32 *) (in + 24) out7 += *(uint32 *) (in + 28) *(uint32 *) (out + 16) = out4 out8 = x8 *(uint32 *) (out + 20) = out5 out9 = x9 *(uint32 *) (out + 24) = out6 out10 = x10 *(uint32 *) (out + 28) = out7 out11 = x11 out8 += *(uint32 *) (in + 32) out9 += *(uint32 *) (in + 36) out10 += *(uint32 *) (in + 40) out11 += *(uint32 *) (in + 44) *(uint32 *) (out + 32) = out8 out12 = x12 *(uint32 *) (out + 36) = out9 out13 = x13 *(uint32 *) (out + 40) = out10 out14 = x14 *(uint32 *) (out + 44) = out11 out15 = x15 emms out12 += *(uint32 *) (in + 48) out13 += *(uint32 *) (in + 52) out14 += *(uint32 *) (in + 56) out15 += *(uint32 *) (in + 60) *(uint32 *) (out + 48) = out12 load callerint ebx *(uint32 *) (out + 52) = out13 load callerint esi *(uint32 *) (out + 56) = out14 load callerint edi *(uint32 *) (out + 60) = out15 load callerint ebp leave