diff options
Diffstat (limited to 'src/avx_accel.S')
-rw-r--r-- | src/avx_accel.S | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/src/avx_accel.S b/src/avx_accel.S new file mode 100644 index 0000000..9340a84 --- /dev/null +++ b/src/avx_accel.S @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Osmora nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +.section .text +.globl accel_invert256 + +accel_invert256: + vmovdqu (%rdi), %ymm1 // Load data into %ymm1 + vpcmpeqb %ymm0, %ymm0, %ymm0 // Set %ymm0 to all 1s + + vpxor %ymm1, %ymm0, %ymm0 // NOT %xmm1; result stored in %xmm0 + vmovdqu %ymm0, (%rax) // Writeback the result + retq |