From 86d80a7d0bb3e6a6064eff237c5a33e2e8ffbe42 Mon Sep 17 00:00:00 2001 From: Kurt Date: Thu, 7 Jan 2021 20:50:28 -0800 Subject: [PATCH] Minor perf improvement for advancing keys Further improvements can be made if there's a way to have a union struct (4 u16s, lumped u64) that has a simple 4bit diagonal mirror operation on it. Can be at least twice as fast. Benchmarking with optimizations shows about 80-90% of the prior time taken, so at least a 10% speed optimization Rarely used, was fun to try and optimize a little more. Eliminate bounds checks by accessing/setting the highest element, and only index twice instead of 6x. Eliminate u16 casts by leaving as int type (same result) Eliminate temp value caching and instead directly write to storage. (no more _0123). End result looks neat too, since the >> 0's removed looks like a diagonal, like the nibble rotation :D --- PKHeX.Core/Saves/Encryption/GeniusCrypto.cs | 26 +++++++++++---------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/PKHeX.Core/Saves/Encryption/GeniusCrypto.cs b/PKHeX.Core/Saves/Encryption/GeniusCrypto.cs index d95964091..eed13c701 100644 --- a/PKHeX.Core/Saves/Encryption/GeniusCrypto.cs +++ b/PKHeX.Core/Saves/Encryption/GeniusCrypto.cs @@ -55,20 +55,22 @@ public static void Encrypt(byte[] input, int start, int end, ushort[] keys, byte private static void AdvanceKeys(ushort[] keys) { - keys[0] += 0x43; - keys[1] += 0x29; - keys[2] += 0x17; - keys[3] += 0x13; + var k3 = keys[3] + 0x13; + var k2 = keys[2] + 0x17; + var k1 = keys[1] + 0x29; + var k0 = keys[0] + 0x43; - var _0 = (ushort)((keys[0] >> 00 & 0xf) | (keys[1] << 4 & 0xf0) | (keys[2] << 8 & 0xf00) | (keys[3] << 12 & 0xf000)); - var _1 = (ushort)((keys[0] >> 04 & 0xf) | (keys[1] << 0 & 0xf0) | (keys[2] << 4 & 0xf00) | (keys[3] << 08 & 0xf000)); - var _2 = (ushort)((keys[0] >> 08 & 0xf) | (keys[1] >> 4 & 0xf0) | (keys[2] >> 0 & 0xf00) | (keys[3] << 04 & 0xf000)); - var _3 = (ushort)((keys[0] >> 12 & 0xf) | (keys[1] >> 8 & 0xf0) | (keys[2] >> 4 & 0xf00) | (keys[3] << 00 & 0xf000)); + // Rotate 4bit groups across the diagonal [ / ] after biasing each u16 (no overflow): + // 0123 FB73 + // 4567 EA62 + // 89AB becomes D951 + // CDEF C840 + // We can leave our intermediary types as int as the bit-masks remove any overflow. - keys[0] = _0; - keys[1] = _1; - keys[2] = _2; - keys[3] = _3; + keys[3] = (ushort)((k0 >> 12 & 0xf) | (k1 >> 8 & 0xf0) | (k2 >> 4 & 0xf00) | (k3 & 0xf000)); + keys[2] = (ushort)((k0 >> 08 & 0xf) | (k1 >> 4 & 0xf0) | (k2 & 0xf00) | (k3 << 04 & 0xf000)); + keys[1] = (ushort)((k0 >> 04 & 0xf) | (k1 & 0xf0) | (k2 << 4 & 0xf00) | (k3 << 08 & 0xf000)); + keys[0] = (ushort)((k0 & 0xf) | (k1 << 4 & 0xf0) | (k2 << 8 & 0xf00) | (k3 << 12 & 0xf000)); } } }