diff options
author | Ian Moffett <ian@osmora.org> | 2024-02-16 22:19:00 -0500 |
---|---|---|
committer | Ian Moffett <ian@osmora.org> | 2024-02-16 22:19:00 -0500 |
commit | 204517801a33f1bafab3ce1e80f71f41aa5f61a6 (patch) | |
tree | 99637f94145ec3f0b046bfea81c5c8f3a3992a7a | |
parent | 08700c01fb0fbc8089a2f681a2f95de341c0675c (diff) |
Add AVX support
Signed-off-by: Ian Moffett <ian@osmora.org>
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | include/accel.h | 4 | ||||
-rw-r--r-- | include/info.h | 1 | ||||
-rw-r--r-- | src/avx_accel.S | 39 | ||||
-rw-r--r-- | src/main.c | 21 |
5 files changed, 65 insertions, 2 deletions
@@ -1,6 +1,6 @@ CFLAGS = -pedantic -Iinclude/ CFILES = src/main.c -ASMFILES = src/sse_accel.S +ASMFILES = src/sse_accel.S src/avx_accel.S CC = gcc bin/fobfuscate: $(CFILES) $(ASMFILES) diff --git a/include/accel.h b/include/accel.h index f66e3e1..d7bdc96 100644 --- a/include/accel.h +++ b/include/accel.h @@ -35,4 +35,8 @@ __attribute__((naked)) void accel_invert128(uint64_t addr); + +__attribute__((naked)) +void accel_invert256(uint64_t addr); + #endif /* ACCEL_H */ diff --git a/include/info.h b/include/info.h index c748baa..4c39266 100644 --- a/include/info.h +++ b/include/info.h @@ -35,6 +35,7 @@ struct cpu_info { uint8_t has_sse2 : 1; uint8_t has_sse3 : 1; + uint8_t has_avx : 1; }; #endif /* INFO_H */ diff --git a/src/avx_accel.S b/src/avx_accel.S new file mode 100644 index 0000000..9340a84 --- /dev/null +++ b/src/avx_accel.S @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Osmora nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +.section .text +.globl accel_invert256 + +accel_invert256: + vmovdqu (%rdi), %ymm1 // Load data into %ymm1 + vpcmpeqb %ymm0, %ymm0, %ymm0 // Set %ymm0 to all 1s + + vpxor %ymm1, %ymm0, %ymm0 // NOT %xmm1; result stored in %xmm0 + vmovdqu %ymm0, (%rax) // Writeback the result + retq @@ -110,6 +110,14 @@ is_sse2_supported(void) return (edx & (1 << 26)) != 0; } +static inline bool +is_avx_supported(void) +{ + uint32_t ecx, unused; + cpuid(0x0000001, unused, unused, ecx, unused); + return (ecx & (1 << 28)) != 0; +} + static void amd64_cpu_tests(struct cpu_info *info) { @@ -129,6 +137,11 @@ amd64_cpu_tests(struct cpu_info *info) info->has_sse2 = 1; } } + + if (is_avx_supported()) { + printf("[?]: AVX supported, may use as optimization\n"); + info->has_avx = 1; + } } #endif /* defined(__x86_64__) */ @@ -146,12 +159,15 @@ encrypt(const struct cpu_info *info, char *buf, size_t buf_size) if (info->has_sse2 || info->has_sse3) { step = 16; /* Start at 16 bytes (128 bits) */ } + if (info->has_avx) { + step = 32; + } #endif /* defined(__x86_64__) */ while (current_pos < buf_size) { /* Ensure we aren't over 16 bytes and a power of two */ if (step != 1) { - assert((step & 1) == 0 && step <= 16); + assert((step & 1) == 0 && step <= 32); } /* Ensure we don't cause any overflows */ @@ -160,6 +176,9 @@ encrypt(const struct cpu_info *info, char *buf, size_t buf_size) step >>= 1; switch (step) { + case 32: + accel_invert256((uintptr_t)buf + current_pos); + break; case 16: accel_invert128((uintptr_t)buf + current_pos); break; |