228 lines
5.3 KiB
ArmAsm
228 lines
5.3 KiB
ArmAsm
//
|
|
// AngelCode Scripting Library
|
|
// Copyright (c) 2020-2021 Andreas Jonsson
|
|
//
|
|
// This software is provided 'as-is', without any express or implied
|
|
// warranty. In no event will the authors be held liable for any
|
|
// damages arising from the use of this software.
|
|
//
|
|
// Permission is granted to anyone to use this software for any
|
|
// purpose, including commercial applications, and to alter it and
|
|
// redistribute it freely, subject to the following restrictions:
|
|
//
|
|
// 1. The origin of this software must not be misrepresented// you
|
|
// must not claim that you wrote the original software. If you use
|
|
// this software in a product, an acknowledgment in the product
|
|
// documentation would be appreciated but is not required.
|
|
//
|
|
// 2. Altered source versions must be plainly marked as such, and
|
|
// must not be misrepresented as being the original software.
|
|
//
|
|
// 3. This notice may not be removed or altered from any source
|
|
// distribution.
|
|
//
|
|
// The original version of this library can be located at:
|
|
// http://www.angelcode.com/angelscript/
|
|
//
|
|
// Andreas Jonsson
|
|
// andreas@angelcode.com
|
|
//
|
|
|
|
|
|
// Assembly routines for the ARM64/AArch64 call convention used for Linux
|
|
// Written by Max Waine in July 2020, based on as_callfunc_arm_msvc.asm,
|
|
// with assistance & guidance provided by Sir Kane
|
|
|
|
// Compile with GCC/GAS
|
|
|
|
#if !defined(AS_MAX_PORTABILITY)
|
|
|
|
#if defined(__aarch64__)
|
|
|
|
.arch armv8-a
|
|
.text
|
|
|
|
.global GetHFAReturnDouble
|
|
.global GetHFAReturnFloat
|
|
.global CallARM64Ret128
|
|
.global CallARM64RetInMemory
|
|
.global CallARM64Double
|
|
.global CallARM64Float
|
|
.global CallARM64
|
|
|
|
.type GetHFAReturnDouble, %function
|
|
.type GetHFAReturnFloat, %function
|
|
.type CallARM64Ret128, %function
|
|
.type CallARM64RetInMemory, %function
|
|
.type CallARM64Double, %function
|
|
.type CallARM64Float, %function
|
|
.type CallARM64, %function
|
|
|
|
.align 2
|
|
GetHFAReturnDouble:
|
|
adr x9, populateDoubles
|
|
sub x9, x9, x1, lsr 1 // x9 -= returnSize >> 1; (/2 because double is 2x instruction size)
|
|
br x9
|
|
|
|
str d3, [x0, #0x18]
|
|
str d2, [x0, #0x10]
|
|
str d1, [x1]
|
|
str d0, [x0]
|
|
populateDoubles:
|
|
|
|
ret
|
|
|
|
.align 2
|
|
GetHFAReturnFloat:
|
|
adr x9, populateFloats
|
|
sub x9, x9, x2 // x9 -= returnSize; (already 4 bytes per return)
|
|
br x9
|
|
|
|
str s3, [x1, #0x4]
|
|
str s2, [x1]
|
|
str s1, [x0, #0x4]
|
|
str s0, [x0]
|
|
populateFloats:
|
|
|
|
ret
|
|
|
|
|
|
//[returnType] CallARM64[type](
|
|
// const asQWORD *gpRegArgs, asQWORD numGPRegArgs,
|
|
// const asQWORD *floatRegArgs, asQWORD numFloatRegArgs,
|
|
// const asQWORD *stackArgs, asQWORD numStackArgs,
|
|
// asFUNCTION_t func
|
|
//)
|
|
.align 2
|
|
CallARM64Double:
|
|
CallARM64Float:
|
|
CallARM64:
|
|
.cfi_startproc
|
|
stp fp, lr, [sp,#-0x20]!
|
|
str x20, [sp,#0x10]
|
|
.cfi_def_cfa_offset 0x20
|
|
.cfi_offset 20, 0x10
|
|
.cfi_offset fp, -0x20
|
|
.cfi_offset lr, -0x18
|
|
mov fp, sp
|
|
|
|
mov x20, #0
|
|
|
|
cbz x5, stackArgsLoopEnd
|
|
|
|
// Align count to 2, then multiply by 8, resulting in a size aligned to 16
|
|
add x20, x5, #1
|
|
lsl x20, x20, #3
|
|
and x20, x20, #-0x10
|
|
// Multiply count by 8
|
|
lsl x10, x5, #3
|
|
sub sp, sp, x20
|
|
stackArgsLoopStart:
|
|
ldp x9,x11, [x4],#16
|
|
stp x9,x11, [sp],#16
|
|
subs x10, x10, #16
|
|
bgt stackArgsLoopStart
|
|
stackArgsLoopEnd:
|
|
|
|
// Calculate amount to jump forward, avoiding pointless instructions
|
|
adr x9, populateFloatRegisterArgsEnd
|
|
sub x9, x9, x3, lsl 2 // x9 -= numFloatRegArgs * 4
|
|
br x9
|
|
|
|
ldr d7, [x2, #0x38]
|
|
ldr d6, [x2, #0x30]
|
|
ldr d5, [x2, #0x28]
|
|
ldr d4, [x2, #0x20]
|
|
ldr d3, [x2, #0x18]
|
|
ldr d2, [x2, #0x10]
|
|
ldr d1, [x2, #0x08]
|
|
ldr d0, [x2]
|
|
populateFloatRegisterArgsEnd:
|
|
|
|
mov x15, x6
|
|
// Calculate amount to jump forward, avoiding pointless instructions
|
|
adr x9, populateGPRegisterArgsEnd
|
|
sub x9, x9, x1, lsl 2 // x9 -= numGPRegArgs * 4
|
|
br x9
|
|
|
|
ldr x7, [x0, #0x38]
|
|
ldr x6, [x0, #0x30]
|
|
ldr x5, [x0, #0x28]
|
|
ldr x4, [x0, #0x20]
|
|
ldr x3, [x0, #0x18]
|
|
ldr x2, [x0, #0x10]
|
|
ldr x1, [x0, #0x08]
|
|
ldr x0, [x0]
|
|
populateGPRegisterArgsEnd:
|
|
|
|
// Actually call function
|
|
sub sp, sp, x20
|
|
blr x15
|
|
add sp, sp, x20
|
|
|
|
ldr x20, [sp,#0x10]
|
|
ldp fp, lr, [sp],#0x20
|
|
|
|
.cfi_restore lr
|
|
.cfi_restore fp
|
|
.cfi_restore 20
|
|
.cfi_def_cfa_offset 0
|
|
ret
|
|
.cfi_endproc
|
|
|
|
.align 2
|
|
CallARM64Ret128:
|
|
.cfi_startproc
|
|
stp fp, lr, [sp,#-0x20]!
|
|
str x20, [sp,#0x10]
|
|
.cfi_def_cfa_offset 0x20
|
|
.cfi_offset 20, 0x10
|
|
.cfi_offset fp, -0x20
|
|
.cfi_offset lr, -0x18
|
|
mov fp, sp
|
|
|
|
mov x20, x6
|
|
mov x6, x7
|
|
mov x7, #0
|
|
bl CallARM64
|
|
|
|
str x1, [x20]
|
|
|
|
ldr x20, [sp,#0x10]
|
|
ldp fp, lr, [sp],#0x20
|
|
|
|
.cfi_restore lr
|
|
.cfi_restore fp
|
|
.cfi_restore 20
|
|
.cfi_def_cfa_offset 0
|
|
ret
|
|
.cfi_endproc
|
|
|
|
.align 2
|
|
CallARM64RetInMemory:
|
|
.cfi_startproc
|
|
stp fp, lr, [sp,#-0x10]!
|
|
mov fp, sp
|
|
.cfi_def_cfa_offset 0x10
|
|
.cfi_offset fp, -0x10
|
|
.cfi_offset lr, -0x08
|
|
|
|
mov x8, x6
|
|
mov x6, x7
|
|
mov x7, #0
|
|
bl CallARM64
|
|
|
|
mov x0, x8
|
|
|
|
ldp fp, lr, [sp],#0x10
|
|
|
|
.cfi_restore lr
|
|
.cfi_restore fp
|
|
.cfi_def_cfa_offset 0
|
|
ret
|
|
.cfi_endproc
|
|
|
|
#endif /* __aarch64__ */
|
|
|
|
#endif /* !AS_MAX_PORTABILITY */
|