diff --git a/aarch64-asm.S b/aarch64-asm.S
index 842b9e2..165c8ac 100644
--- a/aarch64-asm.S
+++ b/aarch64-asm.S
@@ -31,8 +31,7 @@
 
 .macro asm_function function_name
     .global \function_name
-    .type \function_name,%function 
-.func \function_name
+    .type \function_name,%function
 \function_name:
     DST         .req x0
     SRC         .req x1
@@ -54,7 +53,6 @@ asm_function aligned_block_copy_ldpstp_x_aarch64
     subs        SIZE, SIZE, #64
     bgt         0b
     ret
-.endfunc
 
 asm_function aligned_block_copy_ldpstp_q_aarch64
 0:
@@ -67,7 +65,6 @@ asm_function aligned_block_copy_ldpstp_q_aarch64
     subs        SIZE, SIZE, #64
     bgt         0b
     ret
-.endfunc
 
 asm_function aligned_block_copy_ldpstp_q_pf32_l2strm_aarch64
 0:
@@ -82,7 +79,6 @@ asm_function aligned_block_copy_ldpstp_q_pf32_l2strm_aarch64
     subs        SIZE, SIZE, #64
     bgt         0b
     ret
-.endfunc
 
 asm_function aligned_block_copy_ldpstp_q_pf64_l2strm_aarch64
 0:
@@ -96,7 +92,6 @@ asm_function aligned_block_copy_ldpstp_q_pf64_l2strm_aarch64
     subs        SIZE, SIZE, #64
     bgt         0b
     ret
-.endfunc
 
 asm_function aligned_block_copy_ldpstp_q_pf32_l1keep_aarch64
 0:
@@ -111,7 +106,6 @@ asm_function aligned_block_copy_ldpstp_q_pf32_l1keep_aarch64
     subs        SIZE, SIZE, #64
     bgt         0b
     ret
-.endfunc
 
 asm_function aligned_block_copy_ldpstp_q_pf64_l1keep_aarch64
 0:
@@ -125,7 +119,6 @@ asm_function aligned_block_copy_ldpstp_q_pf64_l1keep_aarch64
     subs        SIZE, SIZE, #64
     bgt         0b
     ret
-.endfunc
 
 asm_function aligned_block_fill_stp_x_aarch64
 0:
@@ -137,7 +130,6 @@ asm_function aligned_block_fill_stp_x_aarch64
     subs        SIZE, SIZE, #64
     bgt         0b
     ret
-.endfunc
 
 asm_function aligned_block_fill_stp_q_aarch64
 0:
@@ -147,7 +139,6 @@ asm_function aligned_block_fill_stp_q_aarch64
     subs        SIZE, SIZE, #64
     bgt         0b
     ret
-.endfunc
 
 asm_function aligned_block_fill_stnp_x_aarch64
 0:
@@ -159,7 +150,6 @@ asm_function aligned_block_fill_stnp_x_aarch64
     subs        SIZE, SIZE, #64
     bgt         0b
     ret
-.endfunc
 
 asm_function aligned_block_fill_stnp_q_aarch64
 0:
@@ -169,7 +159,6 @@ asm_function aligned_block_fill_stnp_q_aarch64
     subs        SIZE, SIZE, #64
     bgt         0b
     ret
-.endfunc
 
 asm_function aligned_block_copy_ld1st1_aarch64
 0:
@@ -180,6 +169,5 @@ asm_function aligned_block_copy_ld1st1_aarch64
     subs        SIZE, SIZE, #64
     bgt         0b
     ret
-.endfunc
 
 #endif
diff --git a/arm-neon.S b/arm-neon.S
index 4db78ce..9631d82 100644
--- a/arm-neon.S
+++ b/arm-neon.S
@@ -32,7 +32,6 @@
 
 .macro asm_function function_name
     .global \function_name
-.func \function_name
 \function_name:
     DST         .req r0
     SRC         .req r1
@@ -66,7 +65,6 @@ asm_function aligned_block_read_neon
     vpadd.u32   d31, d31, d31
     vmov.u32    r0, d31[0]
     bx          lr
-.endfunc
 
 /* Actually this calculates a sum of 32-bit values */
 asm_function aligned_block_read_pf32_neon
@@ -97,7 +95,6 @@ asm_function aligned_block_read_pf32_neon
     vpadd.u32   d31, d31, d31
     vmov.u32    r0, d31[0]
     bx          lr
-.endfunc
 
 /* Actually this calculates a sum of 32-bit values */
 asm_function aligned_block_read_pf64_neon
@@ -127,7 +124,6 @@ asm_function aligned_block_read_pf64_neon
     vpadd.u32   d31, d31, d31
     vmov.u32    r0, d31[0]
     bx          lr
-.endfunc
 
 /* Actually this calculates a sum of 32-bit values */
 asm_function aligned_block_read2_neon
@@ -156,7 +152,6 @@ asm_function aligned_block_read2_neon
     vpadd.u32   d31, d31, d31
     vmov.u32    r0, d31[0]
     bx          lr
-.endfunc
 
 /* Actually this calculates a sum of 32-bit values */
 asm_function aligned_block_read2_pf32_neon
@@ -187,7 +182,6 @@ asm_function aligned_block_read2_pf32_neon
     vpadd.u32   d31, d31, d31
     vmov.u32    r0, d31[0]
     bx          lr
-.endfunc
 
 /* Actually this calculates a sum of 32-bit values */
 asm_function aligned_block_read2_pf64_neon
@@ -217,7 +211,6 @@ asm_function aligned_block_read2_pf64_neon
     vpadd.u32   d31, d31, d31
     vmov.u32    r0, d31[0]
     bx          lr
-.endfunc
 
 asm_function aligned_block_copy_neon
 0:
@@ -226,7 +219,6 @@ asm_function aligned_block_copy_neon
     subs        SIZE, SIZE, #32
     bgt         0b
     bx          lr
-.endfunc
 
 asm_function aligned_block_copy_unrolled_neon
     vpush       {d8-d15}
@@ -244,7 +236,6 @@ asm_function aligned_block_copy_unrolled_neon
     bgt         0b
     vpop        {d8-d15}
     bx          lr
-.endfunc
 
 asm_function aligned_block_copy_pf32_neon
 0:
@@ -254,7 +245,6 @@ asm_function aligned_block_copy_pf32_neon
     subs        SIZE, SIZE, #32
     bgt         0b
     bx          lr
-.endfunc
 
 asm_function aligned_block_copy_unrolled_pf32_neon
     vpush       {d8-d15}
@@ -280,7 +270,6 @@ asm_function aligned_block_copy_unrolled_pf32_neon
     bgt         0b
     vpop        {d8-d15}
     bx          lr
-.endfunc
 
 asm_function aligned_block_copy_pf64_neon
 0:
@@ -292,7 +281,6 @@ asm_function aligned_block_copy_pf64_neon
     subs        SIZE, SIZE, #64
     bgt         0b
     bx          lr
-.endfunc
 
 asm_function aligned_block_copy_unrolled_pf64_neon
     vpush       {d8-d15}
@@ -314,7 +302,6 @@ asm_function aligned_block_copy_unrolled_pf64_neon
     bgt         0b
     vpop        {d8-d15}
     bx          lr
-.endfunc
 
 asm_function aligned_block_copy_backwards_neon
     add         SRC, SRC, SIZE
@@ -328,7 +315,6 @@ asm_function aligned_block_copy_backwards_neon
     subs        SIZE, SIZE, #32
     bgt         0b
     bx          lr
-.endfunc
 
 asm_function aligned_block_copy_backwards_pf32_neon
     add         SRC, SRC, SIZE
@@ -343,7 +329,6 @@ asm_function aligned_block_copy_backwards_pf32_neon
     subs        SIZE, SIZE, #32
     bgt         0b
     bx          lr
-.endfunc
 
 asm_function aligned_block_copy_backwards_pf64_neon
     add         SRC, SRC, SIZE
@@ -360,7 +345,6 @@ asm_function aligned_block_copy_backwards_pf64_neon
     subs        SIZE, SIZE, #64
     bgt         0b
     bx          lr
-.endfunc
 
 asm_function aligned_block_fill_neon
     vld1.8      {d0, d1, d2, d3}, [SRC]!
@@ -370,7 +354,6 @@ asm_function aligned_block_fill_neon
     subs        SIZE, SIZE, #64
     bgt         0b
     bx          lr
-.endfunc
 
 asm_function aligned_block_fill_backwards_neon
     add         SRC, SRC, SIZE
@@ -383,7 +366,6 @@ asm_function aligned_block_fill_backwards_neon
     subs        SIZE, SIZE, #32
     bgt         0b
     bx          lr
-.endfunc
 
 /* some code for older ARM processors */
 
@@ -398,7 +380,6 @@ asm_function aligned_block_fill_stm4_armv4
     subs        SIZE, SIZE, #64
     bgt         0b
     pop         {r4-r12, pc}
-.endfunc
 
 asm_function aligned_block_fill_stm8_armv4
     push        {r4-r12, lr}
@@ -409,7 +390,6 @@ asm_function aligned_block_fill_stm8_armv4
     subs        SIZE, SIZE, #64
     bgt         0b
     pop         {r4-r12, pc}
-.endfunc
 
 asm_function aligned_block_fill_strd_armv5te
     push        {r4-r12, lr}
@@ -426,7 +406,6 @@ asm_function aligned_block_fill_strd_armv5te
     subs        SIZE, SIZE, #64
     bgt         0b
     pop         {r4-r12, pc}
-.endfunc
 
 asm_function aligned_block_copy_incr_armv5te
     push        {r4-r12, lr}
@@ -442,7 +421,6 @@ asm_function aligned_block_copy_incr_armv5te
     stmia       DST!, {r8-r11}
     bgt         0b
     pop         {r4-r12, pc}
-.endfunc
 
 asm_function aligned_block_copy_wrap_armv5te
     push        {r4-r12, lr}
@@ -458,7 +436,6 @@ asm_function aligned_block_copy_wrap_armv5te
     stmia       DST!, {r8-r11}
     bgt         0b
     pop         {r4-r12, pc}
-.endfunc
 
 asm_function aligned_block_copy_vfp
     push        {r4-r12, lr}
@@ -470,6 +447,5 @@ asm_function aligned_block_copy_vfp
     bgt         0b
     vpop        {d8-d15}
     pop         {r4-r12, pc}
-.endfunc
 
 #endif
diff --git a/mips-32.S b/mips-32.S
index 17b2b7f..4f7ddae 100644
--- a/mips-32.S
+++ b/mips-32.S
@@ -32,7 +32,6 @@
 .macro asm_function function_name
     .global \function_name
     .type \function_name, @function
-    .func \function_name
 \function_name:
 .endm
 
@@ -93,7 +92,7 @@ asm_function aligned_block_fill_pf32_mips32
 2:
     jr          $ra
     nop
-.endfunc
+
 
 /*
  * void aligned_block_copy_pf32_mips32(int64_t *dst, int64_t *src, int size)
@@ -178,6 +177,6 @@ asm_function aligned_block_copy_pf32_mips32
     lw          $s7,    28($sp)
     jr          $ra
     addi        $sp,    $sp,    32
-.endfunc
+
 
 #endif
diff --git a/x86-sse2.S b/x86-sse2.S
index d8840e4..409031b 100644
--- a/x86-sse2.S
+++ b/x86-sse2.S
@@ -30,7 +30,6 @@
 
 .macro asm_function_helper function_name
     .global \function_name
-.func \function_name
 \function_name:
 #ifdef __amd64__
   #ifdef _WIN64
@@ -90,7 +89,7 @@ asm_function aligned_block_copy_movsb
     pop3        edi esi ecx
 #endif
     ret
-.endfunc
+
 
 asm_function aligned_block_copy_movsd
 0:
@@ -110,7 +109,7 @@ asm_function aligned_block_copy_movsd
     pop3        edi esi ecx
 #endif
     ret
-.endfunc
+
 
 asm_function aligned_block_copy_sse2
 0:
@@ -127,7 +126,7 @@ asm_function aligned_block_copy_sse2
     sub         SIZE, 64
     jg          0b
     ret
-.endfunc
+
 
 asm_function aligned_block_copy_nt_sse2
 0:
@@ -144,7 +143,7 @@ asm_function aligned_block_copy_nt_sse2
     sub         SIZE, 64
     jg          0b
     ret
-.endfunc
+
 
 asm_function aligned_block_copy_pf32_sse2
 0:
@@ -163,7 +162,7 @@ asm_function aligned_block_copy_pf32_sse2
     sub         SIZE,       64
     jg          0b
     ret
-.endfunc
+
 
 asm_function aligned_block_copy_nt_pf32_sse2
 0:
@@ -182,7 +181,7 @@ asm_function aligned_block_copy_nt_pf32_sse2
     sub         SIZE,       64
     jg          0b
     ret
-.endfunc
+
 
 asm_function aligned_block_copy_pf64_sse2
 0:
@@ -200,7 +199,7 @@ asm_function aligned_block_copy_pf64_sse2
     sub         SIZE,       64
     jg          0b
     ret
-.endfunc
+
 
 asm_function aligned_block_copy_nt_pf64_sse2
 0:
@@ -218,7 +217,7 @@ asm_function aligned_block_copy_nt_pf64_sse2
     sub         SIZE,       64
     jg          0b
     ret
-.endfunc
+
 
 asm_function aligned_block_fill_sse2
     movdqa      xmm0,       [SRC + 0]
@@ -231,7 +230,7 @@ asm_function aligned_block_fill_sse2
     sub         SIZE,       64
     jg          0b
     ret
-.endfunc
+
 
 asm_function aligned_block_fill_nt_sse2
     movdqa      xmm0,       [SRC + 0]
@@ -244,7 +243,7 @@ asm_function aligned_block_fill_nt_sse2
     sub         SIZE,       64
     jg          0b
     ret
-.endfunc
+
 
 /*****************************************************************************/