diff options
| author | Ian Moffett <ian@osmora.org> | 2024-02-16 22:19:00 -0500 | 
|---|---|---|
| committer | Ian Moffett <ian@osmora.org> | 2024-02-16 22:19:00 -0500 | 
| commit | 204517801a33f1bafab3ce1e80f71f41aa5f61a6 (patch) | |
| tree | 99637f94145ec3f0b046bfea81c5c8f3a3992a7a /src | |
| parent | 08700c01fb0fbc8089a2f681a2f95de341c0675c (diff) | |
Add AVX support
Signed-off-by: Ian Moffett <ian@osmora.org>
Diffstat (limited to 'src')
| -rw-r--r-- | src/avx_accel.S | 39 | ||||
| -rw-r--r-- | src/main.c | 21 | 
2 files changed, 59 insertions, 1 deletions
| diff --git a/src/avx_accel.S b/src/avx_accel.S new file mode 100644 index 0000000..9340a84 --- /dev/null +++ b/src/avx_accel.S @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + *    this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the name of Osmora nor the names of its + *    contributors may be used to endorse or promote products derived from + *    this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +.section .text +.globl accel_invert256 + +accel_invert256: +    vmovdqu (%rdi), %ymm1           // Load data into %ymm1 +    vpcmpeqb %ymm0, %ymm0, %ymm0    // Set %ymm0 to all 1s + +    vpxor %ymm1, %ymm0, %ymm0       // NOT %xmm1; result stored in %xmm0 +    vmovdqu %ymm0, (%rax)           // Writeback the result +    retq @@ -110,6 +110,14 @@ is_sse2_supported(void)      return (edx & (1 << 26)) != 0;  } +static inline bool +is_avx_supported(void) +{ +    uint32_t ecx, unused; +    cpuid(0x0000001, unused, unused, ecx, unused); +    return (ecx & (1 << 28)) != 0; +} +  static void  amd64_cpu_tests(struct cpu_info *info)  { @@ -129,6 +137,11 @@ amd64_cpu_tests(struct cpu_info *info)              info->has_sse2 = 1;          }      } + +    if (is_avx_supported()) { +        printf("[?]: AVX supported, may use as optimization\n"); +        info->has_avx = 1; +    }  }  #endif  /* defined(__x86_64__) */ @@ -146,12 +159,15 @@ encrypt(const struct cpu_info *info, char *buf, size_t buf_size)      if (info->has_sse2 || info->has_sse3) {          step = 16;         /* Start at 16 bytes (128 bits) */      } +    if (info->has_avx) { +        step = 32; +    }  #endif  /* defined(__x86_64__) */      while (current_pos < buf_size) {          /* Ensure we aren't over 16 bytes and a power of two */          if (step != 1) { -            assert((step & 1) == 0 && step <= 16); +            assert((step & 1) == 0 && step <= 32);          }          /* Ensure we don't cause any overflows */ @@ -160,6 +176,9 @@ encrypt(const struct cpu_info *info, char *buf, size_t buf_size)              step >>= 1;          switch (step) { +        case 32: +            accel_invert256((uintptr_t)buf + current_pos); +            break;          case 16:              accel_invert128((uintptr_t)buf + current_pos);              break; | 
