[oe-commits] Khem Raj : gcc-4.4.4: Use CP15 register for TLS access on armv7-a.

git version control git at git.openembedded.org
Fri Jul 9 22:15:27 UTC 2010


Module: openembedded.git
Branch: org.openembedded.dev
Commit: 5e067562e2acfd3c3d3313494ad0495492738de3
URL:    http://gitweb.openembedded.net/?p=openembedded.git&a=commit;h=5e067562e2acfd3c3d3313494ad0495492738de3

Author: Khem Raj <raj.khem at gmail.com>
Date:   Fri Jul  9 15:14:45 2010 -0700

gcc-4.4.4: Use CP15 register for TLS access on armv7-a.

* ARMv7 was using -mtp=soft where as the CP15 register for TLS
  is available and should be used. This should improve the performance
  of TLS access.

Signed-off-by: Khem Raj <raj.khem at gmail.com>

---

 recipes/gcc/gcc-4.4.4.inc                          |    3 +-
 .../gcc/gcc-4.4.4/gcc-arm-cp15-tpreg-for-TLS.patch |  217 ++++++++++++++++++++
 2 files changed, 219 insertions(+), 1 deletions(-)

diff --git a/recipes/gcc/gcc-4.4.4.inc b/recipes/gcc/gcc-4.4.4.inc
index 37c8cd0..d1987f9 100644
--- a/recipes/gcc/gcc-4.4.4.inc
+++ b/recipes/gcc/gcc-4.4.4.inc
@@ -8,7 +8,7 @@ LICENSE = "GPLv3"
 DEPENDS = "mpfr gmp"
 NATIVEDEPS = "mpfr-native gmp-native"
 
-INC_PR = "r2"
+INC_PR = "r3"
 
 FILESPATHPKG .= ":gcc-$PV"
 
@@ -27,6 +27,7 @@ SRC_URI = "${GNU_MIRROR}/gcc/gcc-${PV}/gcc-${PV}.tar.bz2;name=gcc444tarbz2 \
 	file://gcc-armv4-pass-fix-v4bx-to-ld.patch \
 	file://gcc-add-t-slibgcc-libgcc.patch \
 	file://gcc-4.3.3-fix-EXTRA_BUILD.patch \
+	file://gcc-arm-cp15-tpreg-for-TLS.patch \
 	"
 SRC_URI[gcc444tarbz2.md5sum] = "7ff5ce9e5f0b088ab48720bbd7203530"
 SRC_URI[gcc444tarbz2.sha256sum] = "e1c13696b45752ad3f652304fab5120a43a8a5c0f438d3bda78cf16b620c0c58"
diff --git a/recipes/gcc/gcc-4.4.4/gcc-arm-cp15-tpreg-for-TLS.patch b/recipes/gcc/gcc-4.4.4/gcc-arm-cp15-tpreg-for-TLS.patch
new file mode 100644
index 0000000..a3fbdce
--- /dev/null
+++ b/recipes/gcc/gcc-4.4.4/gcc-arm-cp15-tpreg-for-TLS.patch
@@ -0,0 +1,217 @@
+Backport the below patch from trunk. This will let gcc use 
+Hard TLS register on ARMv7 so far it has been using soft access
+this should help improve performance.
+
+2010-02-01  Richard Earnshaw  <rearnsha at arm.com>
+
+       * arm.c (FL_FOR_ARCH_7A): is also a superset of ARMv6K.
+       (arm_override_options): Allow automatic selection of the thread
+       pointer register if thumb2.
+       (legitimize_pic_address): Improve code sequences for Thumb2.
+       (arm_call_tls_get_addr): Likewise.
+       (legitimize_tls_address): Likewise.
+       * arm.md (pic_load_addr_arm): Delete.  Replace with ...
+       (pic_load_addr_32bit): ... this.  New named pattern.
+       * thumb2.md (pic_load_addr_thumb2): Delete.
+       (pic_load_dot_plus_four): Delete.
+       (tls_load_dot_plus_four): New named pattern.
+
+
+Index: gcc-4.4.4/gcc/config/arm/arm.c
+===================================================================
+--- gcc-4.4.4.orig/gcc/config/arm/arm.c	2010-02-18 05:13:03.000000000 -0800
++++ gcc-4.4.4/gcc/config/arm/arm.c	2010-07-09 15:07:03.829739455 -0700
+@@ -495,7 +495,7 @@ static int thumb_call_reg_needed;
+ #define FL_FOR_ARCH6T2	(FL_FOR_ARCH6 | FL_THUMB2)
+ #define FL_FOR_ARCH6M	(FL_FOR_ARCH6 & ~FL_NOTM)
+ #define FL_FOR_ARCH7	(FL_FOR_ARCH6T2 &~ FL_NOTM)
+-#define FL_FOR_ARCH7A	(FL_FOR_ARCH7 | FL_NOTM)
++#define FL_FOR_ARCH7A	(FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
+ #define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_DIV)
+ #define FL_FOR_ARCH7M	(FL_FOR_ARCH7 | FL_DIV)
+ 
+@@ -1549,7 +1549,7 @@ arm_override_options (void)
+   /* Use the cp15 method if it is available.  */
+   if (target_thread_pointer == TP_AUTO)
+     {
+-      if (arm_arch6k && !TARGET_THUMB)
++      if (arm_arch6k && !TARGET_THUMB1)
+ 	target_thread_pointer = TP_CP15;
+       else
+ 	target_thread_pointer = TP_SOFT;
+@@ -3634,10 +3634,8 @@ legitimize_pic_address (rtx orig, enum m
+       else
+ 	address = reg;
+ 
+-      if (TARGET_ARM)
+-	emit_insn (gen_pic_load_addr_arm (address, orig));
+-      else if (TARGET_THUMB2)
+-	emit_insn (gen_pic_load_addr_thumb2 (address, orig));
++      if (TARGET_32BIT)
++	emit_insn (gen_pic_load_addr_32bit (address, orig));
+       else /* TARGET_THUMB1 */
+ 	emit_insn (gen_pic_load_addr_thumb1 (address, orig));
+ 
+@@ -3814,7 +3812,7 @@ arm_load_pic_register (unsigned long sav
+     {
+       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
+       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
+-      emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
++      emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
+ 
+       emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
+ 
+@@ -3837,29 +3835,13 @@ arm_load_pic_register (unsigned long sav
+ 				UNSPEC_GOTSYM_OFF);
+       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
+ 
+-      if (TARGET_ARM)
+-	{
+-	  emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
+-	  emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
+-	}
+-      else if (TARGET_THUMB2)
++      if (TARGET_32BIT)
+ 	{
+-	  /* Thumb-2 only allows very limited access to the PC.  Calculate the
+-	     address in a temporary register.  */
+-	  if (arm_pic_register != INVALID_REGNUM)
+-	    {
+-	      pic_tmp = gen_rtx_REG (SImode,
+-				     thumb_find_work_register (saved_regs));
+-	    }
++	  emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
++	  if (TARGET_ARM)
++	    emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
+ 	  else
+-	    {
+-	      gcc_assert (can_create_pseudo_p ());
+-	      pic_tmp = gen_reg_rtx (Pmode);
+-	    }
+-
+-	  emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
+-	  emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
+-	  emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
++	    emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
+ 	}
+       else /* TARGET_THUMB1 */
+ 	{
+@@ -4499,14 +4481,7 @@ arm_call_tls_get_addr (rtx x, rtx reg, r
+   if (TARGET_ARM)
+     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
+   else if (TARGET_THUMB2)
+-    {
+-      rtx tmp;
+-      /* Thumb-2 only allows very limited access to the PC.  Calculate
+-	 the address in a temporary register.  */
+-      tmp = gen_reg_rtx (SImode);
+-      emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
+-      emit_insn (gen_addsi3(reg, reg, tmp));
+-    }
++    emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+   else /* TARGET_THUMB1 */
+     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+ 
+@@ -4562,15 +4537,7 @@ legitimize_tls_address (rtx x, rtx reg)
+       if (TARGET_ARM)
+ 	emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
+       else if (TARGET_THUMB2)
+-	{
+-	  rtx tmp;
+-	  /* Thumb-2 only allows very limited access to the PC.  Calculate
+-	     the address in a temporary register.  */
+-	  tmp = gen_reg_rtx (SImode);
+-	  emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
+-	  emit_insn (gen_addsi3(reg, reg, tmp));
+-	  emit_move_insn (reg, gen_const_mem (SImode, reg));
+-	}
++	emit_insn (gen_tls_load_dot_plus_four (reg, reg, labelno));
+       else
+ 	{
+ 	  emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+Index: gcc-4.4.4/gcc/config/arm/arm.md
+===================================================================
+--- gcc-4.4.4.orig/gcc/config/arm/arm.md	2010-02-18 05:13:03.000000000 -0800
++++ gcc-4.4.4/gcc/config/arm/arm.md	2010-07-09 15:07:03.833742490 -0700
+@@ -5091,14 +5091,17 @@
+ ;; the insn alone, and to force the minipool generation pass to then move
+ ;; the GOT symbol to memory.
+ 
+-(define_insn "pic_load_addr_arm"
++(define_insn "pic_load_addr_32bit"
+   [(set (match_operand:SI 0 "s_register_operand" "=r")
+ 	(unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))]
+-  "TARGET_ARM && flag_pic"
++  "TARGET_32BIT && flag_pic"
+   "ldr%?\\t%0, %1"
+   [(set_attr "type" "load1")
+-   (set (attr "pool_range")     (const_int 4096))
+-   (set (attr "neg_pool_range") (const_int 4084))]
++   (set_attr "pool_range" "4096")
++   (set (attr "neg_pool_range")
++	(if_then_else (eq_attr "is_thumb" "no")
++		      (const_int 4084)
++		      (const_int 0)))]
+ )
+ 
+ (define_insn "pic_load_addr_thumb1"
+@@ -5116,7 +5119,7 @@
+ 		    (const_int 4)
+ 		    (match_operand 2 "" "")]
+ 		   UNSPEC_PIC_BASE))]
+-  "TARGET_THUMB1"
++  "TARGET_THUMB"
+   "*
+   (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\",
+ 				     INTVAL (operands[2]));
+Index: gcc-4.4.4/gcc/config/arm/thumb2.md
+===================================================================
+--- gcc-4.4.4.orig/gcc/config/arm/thumb2.md	2010-02-24 06:50:43.000000000 -0800
++++ gcc-4.4.4/gcc/config/arm/thumb2.md	2010-07-09 15:07:03.829739455 -0700
+@@ -243,37 +243,19 @@
+    (set_attr "neg_pool_range" "*,*,*,*,0,*")]
+ )
+ 
+-;; ??? We can probably do better with thumb2
+-(define_insn "pic_load_addr_thumb2"
+-  [(set (match_operand:SI 0 "s_register_operand" "=r")
+-	(unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))]
+-  "TARGET_THUMB2 && flag_pic"
+-  "ldr%?\\t%0, %1"
+-  [(set_attr "type" "load1")
+-   (set_attr "pool_range" "4096")
+-   (set_attr "neg_pool_range" "0")]
+-)
+-
+-;; Set reg to the address of this instruction plus four.  The low two
+-;; bits of the PC are always read as zero, so ensure the instructions is
+-;; word aligned.
+-(define_insn "pic_load_dot_plus_four"
+-  [(set (match_operand:SI 0 "register_operand" "=r")
+-	(unspec:SI [(const_int 4)
+-		    (match_operand 1 "" "")]
+-		   UNSPEC_PIC_BASE))]
++(define_insn "tls_load_dot_plus_four"
++  [(set (match_operand:SI 0 "register_operand" "=l,r")
++	(mem:SI (unspec:SI [(match_operand:SI 1 "register_operand" "+l,r")
++			    (const_int 4)
++			    (match_operand 2 "" "")]
++			   UNSPEC_PIC_BASE)))]
+   "TARGET_THUMB2"
+   "*
+-  assemble_align(BITS_PER_WORD);
+   (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\",
+-			     INTVAL (operands[1]));
+-  /* We use adr because some buggy gas assemble add r8, pc, #0
+-     to add.w r8, pc, #0, not addw r8, pc, #0.  */
+-  asm_fprintf (asm_out_file, \"\\tadr\\t%r, %LLPIC%d + 4\\n\",
+-	       REGNO(operands[0]), (int)INTVAL (operands[1]));
+-  return \"\";
++			     INTVAL (operands[2]));
++  return \"add\\t%1, %|pc\;ldr%?\\t%0, [%1]\";
+   "
+-  [(set_attr "length" "6")]
++  [(set_attr "length" "4,6")]
+ )
+ 
+ ;; Thumb-2 always has load/store halfword instructions, so we can avoid a lot





More information about the Openembedded-commits mailing list