diff --git a/aarch64-asm.S b/aarch64-asm.S index 842b9e2..165c8ac 100644 --- a/aarch64-asm.S +++ b/aarch64-asm.S @@ -31,8 +31,7 @@ .macro asm_function function_name .global \function_name - .type \function_name,%function -.func \function_name + .type \function_name,%function \function_name: DST .req x0 SRC .req x1 @@ -54,7 +53,6 @@ asm_function aligned_block_copy_ldpstp_x_aarch64 subs SIZE, SIZE, #64 bgt 0b ret -.endfunc asm_function aligned_block_copy_ldpstp_q_aarch64 0: @@ -67,7 +65,6 @@ asm_function aligned_block_copy_ldpstp_q_aarch64 subs SIZE, SIZE, #64 bgt 0b ret -.endfunc asm_function aligned_block_copy_ldpstp_q_pf32_l2strm_aarch64 0: @@ -82,7 +79,6 @@ asm_function aligned_block_copy_ldpstp_q_pf32_l2strm_aarch64 subs SIZE, SIZE, #64 bgt 0b ret -.endfunc asm_function aligned_block_copy_ldpstp_q_pf64_l2strm_aarch64 0: @@ -96,7 +92,6 @@ asm_function aligned_block_copy_ldpstp_q_pf64_l2strm_aarch64 subs SIZE, SIZE, #64 bgt 0b ret -.endfunc asm_function aligned_block_copy_ldpstp_q_pf32_l1keep_aarch64 0: @@ -111,7 +106,6 @@ asm_function aligned_block_copy_ldpstp_q_pf32_l1keep_aarch64 subs SIZE, SIZE, #64 bgt 0b ret -.endfunc asm_function aligned_block_copy_ldpstp_q_pf64_l1keep_aarch64 0: @@ -125,7 +119,6 @@ asm_function aligned_block_copy_ldpstp_q_pf64_l1keep_aarch64 subs SIZE, SIZE, #64 bgt 0b ret -.endfunc asm_function aligned_block_fill_stp_x_aarch64 0: @@ -137,7 +130,6 @@ asm_function aligned_block_fill_stp_x_aarch64 subs SIZE, SIZE, #64 bgt 0b ret -.endfunc asm_function aligned_block_fill_stp_q_aarch64 0: @@ -147,7 +139,6 @@ asm_function aligned_block_fill_stp_q_aarch64 subs SIZE, SIZE, #64 bgt 0b ret -.endfunc asm_function aligned_block_fill_stnp_x_aarch64 0: @@ -159,7 +150,6 @@ asm_function aligned_block_fill_stnp_x_aarch64 subs SIZE, SIZE, #64 bgt 0b ret -.endfunc asm_function aligned_block_fill_stnp_q_aarch64 0: @@ -169,7 +159,6 @@ asm_function aligned_block_fill_stnp_q_aarch64 subs SIZE, SIZE, #64 bgt 0b ret -.endfunc asm_function aligned_block_copy_ld1st1_aarch64 0: @@ -180,6 +169,5 @@ asm_function aligned_block_copy_ld1st1_aarch64 subs SIZE, SIZE, #64 bgt 0b ret -.endfunc #endif diff --git a/arm-neon.S b/arm-neon.S index 4db78ce..9631d82 100644 --- a/arm-neon.S +++ b/arm-neon.S @@ -32,7 +32,6 @@ .macro asm_function function_name .global \function_name -.func \function_name \function_name: DST .req r0 SRC .req r1 @@ -66,7 +65,6 @@ asm_function aligned_block_read_neon vpadd.u32 d31, d31, d31 vmov.u32 r0, d31[0] bx lr -.endfunc /* Actually this calculates a sum of 32-bit values */ asm_function aligned_block_read_pf32_neon @@ -97,7 +95,6 @@ asm_function aligned_block_read_pf32_neon vpadd.u32 d31, d31, d31 vmov.u32 r0, d31[0] bx lr -.endfunc /* Actually this calculates a sum of 32-bit values */ asm_function aligned_block_read_pf64_neon @@ -127,7 +124,6 @@ asm_function aligned_block_read_pf64_neon vpadd.u32 d31, d31, d31 vmov.u32 r0, d31[0] bx lr -.endfunc /* Actually this calculates a sum of 32-bit values */ asm_function aligned_block_read2_neon @@ -156,7 +152,6 @@ asm_function aligned_block_read2_neon vpadd.u32 d31, d31, d31 vmov.u32 r0, d31[0] bx lr -.endfunc /* Actually this calculates a sum of 32-bit values */ asm_function aligned_block_read2_pf32_neon @@ -187,7 +182,6 @@ asm_function aligned_block_read2_pf32_neon vpadd.u32 d31, d31, d31 vmov.u32 r0, d31[0] bx lr -.endfunc /* Actually this calculates a sum of 32-bit values */ asm_function aligned_block_read2_pf64_neon @@ -217,7 +211,6 @@ asm_function aligned_block_read2_pf64_neon vpadd.u32 d31, d31, d31 vmov.u32 r0, d31[0] bx lr -.endfunc asm_function aligned_block_copy_neon 0: @@ -226,7 +219,6 @@ asm_function aligned_block_copy_neon subs SIZE, SIZE, #32 bgt 0b bx lr -.endfunc asm_function aligned_block_copy_unrolled_neon vpush {d8-d15} @@ -244,7 +236,6 @@ asm_function aligned_block_copy_unrolled_neon bgt 0b vpop {d8-d15} bx lr -.endfunc asm_function aligned_block_copy_pf32_neon 0: @@ -254,7 +245,6 @@ asm_function aligned_block_copy_pf32_neon subs SIZE, SIZE, #32 bgt 0b bx lr -.endfunc asm_function aligned_block_copy_unrolled_pf32_neon vpush {d8-d15} @@ -280,7 +270,6 @@ asm_function aligned_block_copy_unrolled_pf32_neon bgt 0b vpop {d8-d15} bx lr -.endfunc asm_function aligned_block_copy_pf64_neon 0: @@ -292,7 +281,6 @@ asm_function aligned_block_copy_pf64_neon subs SIZE, SIZE, #64 bgt 0b bx lr -.endfunc asm_function aligned_block_copy_unrolled_pf64_neon vpush {d8-d15} @@ -314,7 +302,6 @@ asm_function aligned_block_copy_unrolled_pf64_neon bgt 0b vpop {d8-d15} bx lr -.endfunc asm_function aligned_block_copy_backwards_neon add SRC, SRC, SIZE @@ -328,7 +315,6 @@ asm_function aligned_block_copy_backwards_neon subs SIZE, SIZE, #32 bgt 0b bx lr -.endfunc asm_function aligned_block_copy_backwards_pf32_neon add SRC, SRC, SIZE @@ -343,7 +329,6 @@ asm_function aligned_block_copy_backwards_pf32_neon subs SIZE, SIZE, #32 bgt 0b bx lr -.endfunc asm_function aligned_block_copy_backwards_pf64_neon add SRC, SRC, SIZE @@ -360,7 +345,6 @@ asm_function aligned_block_copy_backwards_pf64_neon subs SIZE, SIZE, #64 bgt 0b bx lr -.endfunc asm_function aligned_block_fill_neon vld1.8 {d0, d1, d2, d3}, [SRC]! @@ -370,7 +354,6 @@ asm_function aligned_block_fill_neon subs SIZE, SIZE, #64 bgt 0b bx lr -.endfunc asm_function aligned_block_fill_backwards_neon add SRC, SRC, SIZE @@ -383,7 +366,6 @@ asm_function aligned_block_fill_backwards_neon subs SIZE, SIZE, #32 bgt 0b bx lr -.endfunc /* some code for older ARM processors */ @@ -398,7 +380,6 @@ asm_function aligned_block_fill_stm4_armv4 subs SIZE, SIZE, #64 bgt 0b pop {r4-r12, pc} -.endfunc asm_function aligned_block_fill_stm8_armv4 push {r4-r12, lr} @@ -409,7 +390,6 @@ asm_function aligned_block_fill_stm8_armv4 subs SIZE, SIZE, #64 bgt 0b pop {r4-r12, pc} -.endfunc asm_function aligned_block_fill_strd_armv5te push {r4-r12, lr} @@ -426,7 +406,6 @@ asm_function aligned_block_fill_strd_armv5te subs SIZE, SIZE, #64 bgt 0b pop {r4-r12, pc} -.endfunc asm_function aligned_block_copy_incr_armv5te push {r4-r12, lr} @@ -442,7 +421,6 @@ asm_function aligned_block_copy_incr_armv5te stmia DST!, {r8-r11} bgt 0b pop {r4-r12, pc} -.endfunc asm_function aligned_block_copy_wrap_armv5te push {r4-r12, lr} @@ -458,7 +436,6 @@ asm_function aligned_block_copy_wrap_armv5te stmia DST!, {r8-r11} bgt 0b pop {r4-r12, pc} -.endfunc asm_function aligned_block_copy_vfp push {r4-r12, lr} @@ -470,6 +447,5 @@ asm_function aligned_block_copy_vfp bgt 0b vpop {d8-d15} pop {r4-r12, pc} -.endfunc #endif diff --git a/mips-32.S b/mips-32.S index 17b2b7f..4f7ddae 100644 --- a/mips-32.S +++ b/mips-32.S @@ -32,7 +32,6 @@ .macro asm_function function_name .global \function_name .type \function_name, @function - .func \function_name \function_name: .endm @@ -93,7 +92,7 @@ asm_function aligned_block_fill_pf32_mips32 2: jr $ra nop -.endfunc + /* * void aligned_block_copy_pf32_mips32(int64_t *dst, int64_t *src, int size) @@ -178,6 +177,6 @@ asm_function aligned_block_copy_pf32_mips32 lw $s7, 28($sp) jr $ra addi $sp, $sp, 32 -.endfunc + #endif diff --git a/x86-sse2.S b/x86-sse2.S index d8840e4..409031b 100644 --- a/x86-sse2.S +++ b/x86-sse2.S @@ -30,7 +30,6 @@ .macro asm_function_helper function_name .global \function_name -.func \function_name \function_name: #ifdef __amd64__ #ifdef _WIN64 @@ -90,7 +89,7 @@ asm_function aligned_block_copy_movsb pop3 edi esi ecx #endif ret -.endfunc + asm_function aligned_block_copy_movsd 0: @@ -110,7 +109,7 @@ asm_function aligned_block_copy_movsd pop3 edi esi ecx #endif ret -.endfunc + asm_function aligned_block_copy_sse2 0: @@ -127,7 +126,7 @@ asm_function aligned_block_copy_sse2 sub SIZE, 64 jg 0b ret -.endfunc + asm_function aligned_block_copy_nt_sse2 0: @@ -144,7 +143,7 @@ asm_function aligned_block_copy_nt_sse2 sub SIZE, 64 jg 0b ret -.endfunc + asm_function aligned_block_copy_pf32_sse2 0: @@ -163,7 +162,7 @@ asm_function aligned_block_copy_pf32_sse2 sub SIZE, 64 jg 0b ret -.endfunc + asm_function aligned_block_copy_nt_pf32_sse2 0: @@ -182,7 +181,7 @@ asm_function aligned_block_copy_nt_pf32_sse2 sub SIZE, 64 jg 0b ret -.endfunc + asm_function aligned_block_copy_pf64_sse2 0: @@ -200,7 +199,7 @@ asm_function aligned_block_copy_pf64_sse2 sub SIZE, 64 jg 0b ret -.endfunc + asm_function aligned_block_copy_nt_pf64_sse2 0: @@ -218,7 +217,7 @@ asm_function aligned_block_copy_nt_pf64_sse2 sub SIZE, 64 jg 0b ret -.endfunc + asm_function aligned_block_fill_sse2 movdqa xmm0, [SRC + 0] @@ -231,7 +230,7 @@ asm_function aligned_block_fill_sse2 sub SIZE, 64 jg 0b ret -.endfunc + asm_function aligned_block_fill_nt_sse2 movdqa xmm0, [SRC + 0] @@ -244,7 +243,7 @@ asm_function aligned_block_fill_nt_sse2 sub SIZE, 64 jg 0b ret -.endfunc + /*****************************************************************************/