Angelscript/angelscript/source/as_callfunc_arm64_msvc.asm

206 lines
5.1 KiB
NASM

;
; AngelCode Scripting Library
; Copyright (c) 2020-2020 Andreas Jonsson
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any
; damages arising from the use of this software.
;
; Permission is granted to anyone to use this software for any
; purpose, including commercial applications, and to alter it and
; redistribute it freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you
; must not claim that you wrote the original software. If you use
; this software in a product, an acknowledgment in the product
; documentation would be appreciated but is not required.
;
; 2. Altered source versions must be plainly marked as such, and
; must not be misrepresented as being the original software.
;
; 3. This notice may not be removed or altered from any source
; distribution.
;
; The original version of this library can be located at:
; http://www.angelcode.com/angelscript/
;
; Andreas Jonsson
; andreas@angelcode.com
;
; Assembly routines for the ARM64/AArch64 call convention used for Windows 10 on ARM
; Written by Max Waine in July 2020, based on as_callfunc_arm_msvc.asm
; MSVC currently doesn't support inline assembly for the ARM64 platform,
; and if they're treating it like x64 /won't/ ever support inline assembly,
; so this separate file is needed.
; Compile with Microsoft ARM64 assembler (armasm64)
; http://msdn.microsoft.com/en-us/library/hh873190.aspx
AREA |.rdata|, DATA, READONLY
EXPORT GetHFAReturnDouble
EXPORT GetHFAReturnFloat
EXPORT CallARM64Ret128
EXPORT CallARM64RetInMemory
EXPORT CallARM64Double
EXPORT CallARM64Float
EXPORT CallARM64
AREA |.text|, CODE, ALIGN=2
ALIGN 4
GetHFAReturnDouble PROC
adr x9, |populateDoubles|
sub x9, x9, x1, lsr 1 ; x9 -= returnSize >> 1; (/2 because double is 2x instruction size)
br x9
str d3, [x0, #0x18]
str d2, [x0, #0x10]
str d1, [x1]
str d0, [x0]
|populateDoubles|
ret
ENDP ; GetHFAReturnDouble
ALIGN 4
GetHFAReturnFloat PROC
adr x9, |populateFloats|
sub x9, x9, x2 // x9 -= returnSize; (already 4 bytes per return)
br x9
str s3, [x1, #0x4]
str s2, [x1]
str s1, [x0, #0x4]
str s0, [x0]
|populateFloats|
ret
ENDP ; GetHFAReturnFloat
;[returnType] CallARM64[type](
; const asQWORD *gpRegArgs, asQWORD numGPRegArgs,
; const asQWORD *floatRegArgs, asQWORD numFloatRegArgs,
; const asQWORD *stackArgs, asQWORD numStackArgs,
; asFUNCTION_t func
;)
ALIGN 4
CallARM64Double PROC
stp fp, lr, [sp,#-0x10]!
bl CallARM64
ldp fp, lr, [sp,#-0x10]!
ret
ENDP ; CallARM64Double
ALIGN 4
CallARM64Float PROC
stp fp, lr, [sp,#-0x10]!
bl CallARM64
ldp fp, lr, [sp,#-0x10]!
ret
ENDP ; CallARM64Float
ALIGN 4
CallARM64 PROC
stp fp, lr, [sp,#-0x20]!
str x20, [sp,#0x10]
mov x20, #0;
cbz x5, |stackArgsLoopEnd|
; Align count to 2, then multiply by 8, resulting in a size aligned to 16
add x20, x5, #1
lsl x20, x20, #3
and x20, x20, #-0x10
; Multiply count by 8
lsl x10, x5, #3
sub sp, sp, x20
|stackArgsLoopStart|
ldp x9,x11, [x4],#16
stp x9,x11, [sp],#16
subs x10, x10, #16
bgt |stackArgsLoopStart|
|stackArgsLoopEnd|
; Calculate amount to jump forward, avoiding pointless instructions
adr x9, |populateFloatRegisterArgsEnd|
sub x9, x9, x3, lsl 2 ; x9 -= numFloatRegArgs * 4
br x9
ldr d7, [x2, #0x38]
ldr d6, [x2, #0x30]
ldr d5, [x2, #0x28]
ldr d4, [x2, #0x20]
ldr d3, [x2, #0x18]
ldr d2, [x2, #0x10]
ldr d1, [x2, #0x08]
ldr d0, [x2]
|populateFloatRegisterArgsEnd|
mov x15, x6
; Calculate amount to jump forward, avoiding pointless instructions
adr x9, |populateGPRegisterArgsEnd|
sub x9, x9, x1, lsl 2 ; x9 -= numGPRegArgs * 4
br x9
ldr x7, [x0, #0x38]
ldr x6, [x0, #0x30]
ldr x5, [x0, #0x28]
ldr x4, [x0, #0x20]
ldr x3, [x0, #0x18]
ldr x2, [x0, #0x10]
ldr x1, [x0, #0x08]
ldr x0, [x0]
|populateGPRegisterArgsEnd|
; Actually call function
sub sp, sp, x20
blr x15
add sp, sp, x20
ldr x20, [sp,#0x10]
ldp fp, lr, [sp],#0x20
ret
ENDP ; CallARM64
ALIGN 4
CallARM64Ret128 PROC
stp fp, lr, [sp,#-0x20]!
str x20, [sp,#0x10]
mov fp, sp
mov x20, x6
mov x6, x7
mov x7, #0
bl CallARM64
str x1, [x20]
ldr x20, [sp,#0x10]
ldp fp, lr, [sp],#0x20
ret ; CallARM64Ret128
ALIGN 4
CallARM64RetInMemory PROC
stp fp, lr, [sp,#-0x10]!
mov fp, sp
mov x8, x6
mov x6, x7
mov x7, #0
bl CallARM64
mov x0, x8
ldp fp, lr, [sp],#0x10
ret ; CallARM64RetInMemory
END