patch-2.4.4 linux/arch/ia64/lib/strlen.S

Next file: linux/arch/ia64/lib/strlen_user.S
Previous file: linux/arch/ia64/lib/memset.S
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.3/linux/arch/ia64/lib/strlen.S linux/arch/ia64/lib/strlen.S
@@ -5,12 +5,12 @@
  *
  * Inputs:
  *	in0	address of string
- * 
- * Outputs: 
- * 	ret0	the number of characters in the string (0 if empty string)
- *         	does not count the \0
  *
- * Copyright (C) 1999 Hewlett-Packard Co
+ * Outputs:
+ *	ret0	the number of characters in the string (0 if empty string)
+ *	does not count the \0
+ *
+ * Copyright (C) 1999, 2001 Hewlett-Packard Co
  * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
  *
  * 09/24/99 S.Eranian add speculation recovery code
@@ -30,7 +30,7 @@
 //	  string may not be 8-byte aligned. In this case we load the 8byte
 //	  quantity which includes the start of the string and mask the unused
 //	  bytes with 0xff to avoid confusing czx.
-//	  We use speculative loads and software pipelining to hide memory 
+//	  We use speculative loads and software pipelining to hide memory
 //	  latency and do read ahead safely. This way we defer any exception.
 //
 //	  Because we don't want the kernel to be relying on particular
@@ -42,7 +42,7 @@
 //	  The fact that speculation may fail can be caused, for instance, by
 //	  the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
 //	  a NaT bit will be set if the translation is not present. The normal
-//	  load, on the other hand, will cause the translation to be inserted 
+//	  load, on the other hand, will cause the translation to be inserted
 //	  if the mapping exists.
 //
 //	  It should be noted that we execute recovery code only when we need
@@ -50,22 +50,22 @@
 //	  recovery code on pure read ahead data.
 //
 // Remarks:
-//	- the cmp r0,r0 is used as a fast way to initialize a predicate 
+//	- the cmp r0,r0 is used as a fast way to initialize a predicate
 //	  register to 1. This is required to make sure that we get the parallel
 //	  compare correct.
 //
 //	- we don't use the epilogue counter to exit the loop but we need to set
 //	  it to zero beforehand.
 //
-//	- after the loop we must test for Nat values because neither the 
+//	- after the loop we must test for Nat values because neither the
 //	  czx nor cmp instruction raise a NaT consumption fault. We must be
-//	  careful not to look too far for a Nat for which we don't care. 
+//	  careful not to look too far for a Nat for which we don't care.
 //	  For instance we don't need to look at a NaT in val2 if the zero byte
 //	  was in val1.
 //
 //	- Clearly performance tuning is required.
 //
-// 
+//
 //
 #define saved_pfs	r11
 #define	tmp		r10
@@ -78,15 +78,9 @@
 #define val1		r22
 #define val2		r23
 
-
-	.text
-	.psr abi64
-	.psr lsb
-	.lsb
-
 GLOBAL_ENTRY(strlen)
-	UNW(.prologue)
-	UNW(.save ar.pfs, saved_pfs)
+	.prologue
+	.save ar.pfs, saved_pfs
 	alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8
 
 	.rotr v[2], w[2]	// declares our 4 aliases
@@ -94,11 +88,11 @@
 	extr.u tmp=in0,0,3	// tmp=least significant 3 bits
 	mov orig=in0		// keep trackof initial byte address
 	dep src=0,in0,0,3	// src=8byte-aligned in0 address
-	UNW(.save pr, saved_pr)
+	.save pr, saved_pr
 	mov saved_pr=pr		// preserve predicates (rotation)
 	;;
 
-	UNW(.body)
+	.body
 
 	ld8 v[1]=[src],8	// must not speculate: can fail here
 	shl tmp=tmp,3		// multiply by 8bits/byte
@@ -115,8 +109,8 @@
 	or v[1]=v[1],mask	// now we have a safe initial byte pattern
 	;;
 1:
-	ld8.s v[0]=[src],8	// speculatively load next 
-	czx1.r val1=v[1]	// search 0 byte from right 
+	ld8.s v[0]=[src],8	// speculatively load next
+	czx1.r val1=v[1]	// search 0 byte from right
 	czx1.r val2=w[1]	// search 0 byte from right following 8bytes
 	;;
 	ld8.s w[0]=[src],8	// speculatively load next to next
@@ -132,11 +126,7 @@
 	//	- there must be a better way of doing the test
 	//
 	cmp.eq  p8,p9=8,val1	// p6 = val1 had zero (disambiguate)
-#ifdef notyet
 	tnat.nz p6,p7=val1	// test NaT on val1
-#else
-	tnat.z p7,p6=val1	// test NaT on val1
-#endif
 (p6)	br.cond.spnt.few recover// jump to recovery if val1 is NaT
 	;;
 	//
@@ -154,7 +144,7 @@
 	sub tmp=8,val1		// which byte in word
 	mov pr=saved_pr,0xffffffffffff0000
 	;;
-	sub ret0=ret0,tmp	// adjust 
+	sub ret0=ret0,tmp	// adjust
 	mov ar.pfs=saved_pfs	// because of ar.ec, restore no matter what
 	br.ret.sptk.few rp	// end of normal execution
 
@@ -167,8 +157,8 @@
 	//
 	// IMPORTANT:
 	// Please note that in the case of strlen() as opposed to strlen_user()
-	// we don't use the exception mechanism, as this function is not 
-	// supposed to fail. If that happens it means we have a bug and the 
+	// we don't use the exception mechanism, as this function is not
+	// supposed to fail. If that happens it means we have a bug and the
 	// code will cause of kernel fault.
 	//
 	// XXX Fixme
@@ -187,7 +177,7 @@
 2:
 (p6)	ld8 val=[base],8	// will fail if unrecoverable fault
 	;;
-	czx1.r val1=val		// search 0 byte from right 
+	czx1.r val1=val		// search 0 byte from right
 	;;
 	cmp.eq p6,p0=8,val1	// val1==8 ?
 (p6)	br.wtop.dptk.few 2b	// loop until p6 == 0

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)