1/* Copyright (C) 2014-2022 Free Software Foundation, Inc.
2   This file is part of the GNU C Library.
3
4   The GNU C Library is free software; you can redistribute it and/or
5   modify it under the terms of the GNU Lesser General Public
6   License as published by the Free Software Foundation; either
7   version 2.1 of the License, or (at your option) any later version.
8
9   The GNU C Library is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Lesser General Public License for more details.
13
14   You should have received a copy of the GNU Lesser General Public
15   License along with the GNU C Library; if not, see
16   <https://www.gnu.org/licenses/>.  */
17
18#include <sysdep.h>
19
20/* Implements the functions
21
22   char * [r3] strncpy (char *dst [r3], const char *src [r4], size_t n [r5])
23
24   AND
25
26   char * [r3] stpncpy (char *dst [r3], const char *src [r4], size_t n [r5])
27
28   The algorithm is as follows:
29   > if src and dest are 8 byte aligned, perform double word copy
30     else
31   > copy byte by byte on unaligned addresses.
32
33   The aligned comparison are made using cmpb instructions.  */
34
35/* The focus on optimization for performance improvements are as follows:
36   1. data alignment [gain from aligned memory access on read/write]
37   2. POWER7 gains performance with loop unrolling/unwinding
38      [gain by reduction of branch penalty].
39   3. The final pad with null bytes is done by calling an optimized
40      memset.  */
41
42#ifdef USE_AS_STPNCPY
43# ifndef STPNCPY
44#  define FUNC_NAME __stpncpy
45# else
46#  define FUNC_NAME STPNCPY
47# endif
48#else
49# ifndef STRNCPY
50#  define FUNC_NAME strncpy
51# else
52#  define FUNC_NAME STRNCPY
53# endif
54#endif  /* !USE_AS_STPNCPY  */
55
56#define		FRAMESIZE	(FRAME_MIN_SIZE+16)
57
58#ifndef MEMSET
59/* For builds with no IFUNC support, local calls should be made to internal
60   GLIBC symbol (created by libc_hidden_builtin_def).  */
61# ifdef SHARED
62#  define MEMSET_is_local
63#  define MEMSET   __GI_memset
64# else
65#  define MEMSET   memset
66# endif
67#endif
68
69	.machine  power7
70#ifdef MEMSET_is_local
71ENTRY_TOCLESS (FUNC_NAME, 4)
72#else
73ENTRY (FUNC_NAME, 4)
74#endif
75	CALL_MCOUNT 3
76
77	or r10, r3, r4		/* to verify source and destination  */
78	rldicl. r8, r10, 0, 61	/* is double word aligned .. ?  */
79
80	std r19, -8(r1)		/* save callers register , r19  */
81	std r18, -16(r1)	/* save callers register , r18  */
82	cfi_offset(r19, -8)
83	cfi_offset(r18, -16)
84
85	mr r9, r3		/* save r3 into r9 for use  */
86	mr r18, r3		/* save r3 for retCode of strncpy  */
87	bne 0, L(unaligned)
88
89L(aligned):
90	srdi r11, r5, 3		/* compute count for CTR ; count = n/8  */
91	cmpldi cr7, r11, 3	/* if count > 4 ; perform unrolling 4 times  */
92	ble 7, L(update1)
93
94	ld r10, 0(r4)		/* load doubleWord from src  */
95	cmpb r8, r10, r8	/* compare src with NULL ,we read just now  */
96	cmpdi cr7, r8, 0	/* if cmpb returned NULL ; we continue  */
97	bne cr7, L(update3)
98
99	std r10, 0(r3)		/* copy doubleword at offset=0  */
100	ld r10, 8(r4)		/* load next doubleword from offset=8  */
101	cmpb r8, r10, r8	/* compare src with NULL , we read just now  */
102	cmpdi cr7, r8, 0	/* if cmpb returned NULL ; we continue  */
103	bne 7,L(HopBy8)
104
105	addi r8, r11, -4
106	mr r7, r3
107	srdi r8, r8, 2
108	mr r6, r4
109	addi r8, r8, 1
110	li r12, 0
111	mtctr r8
112	b L(dwordCopy)
113
114	.p2align 4
115L(dWordUnroll):
116	std r8, 16(r9)
117	ld r8, 24(r4)		/* load dword,perform loop unrolling again  */
118	cmpb r10, r8, r10
119	cmpdi cr7, r10, 0
120	bne cr7, L(HopBy24)
121
122	std r8, 24(r7)		/* copy dword at offset=24  */
123	addi r9, r9, 32
124	addi r4, r4, 32
125	bdz  L(leftDwords)	/* continue with loop on counter  */
126
127	ld r3, 32(r6)
128	cmpb r8, r3, r10
129	cmpdi cr7, r8, 0
130	bne cr7, L(update2)
131
132	std r3, 32(r7)
133	ld r10, 40(r6)
134	cmpb r8, r10, r8
135	cmpdi cr7, r8, 0
136	bne cr7, L(HopBy40)
137
138	mr r6, r4		/* update values  */
139	mr r7, r9
140	mr r11, r0
141	mr r5, r19
142
143L(dwordCopy):
144	std r10, 8(r9)		/* copy dword at offset=8  */
145	addi r19, r5, -32
146	addi r0, r11, -4
147	ld r8, 16(r4)
148	cmpb r10, r8, r12
149	cmpdi cr7, r10, 0
150	beq cr7, L(dWordUnroll)
151
152	addi r9, r9, 16		/* increment dst by 16  */
153	addi r4, r4, 16		/* increment src by 16  */
154	addi r5, r5, -16	/* decrement length 'n' by 16  */
155	addi r0, r11, -2	/* decrement loop counter  */
156
157L(dWordUnrollOFF):
158	ld r10, 0(r4)		/* load first dword  */
159	li r8, 0		/* load mask  */
160	cmpb r8, r10, r8
161	cmpdi cr7, r8, 0
162	bne cr7, L(byte_by_byte)
163	mtctr r0
164	li r7, 0
165	b L(CopyDword)
166
167	.p2align 4
168L(loadDWordandCompare):
169	ld r10, 0(r4)
170	cmpb r8, r10, r7
171	cmpdi cr7, r8, 0
172	bne cr7, L(byte_by_byte)
173
174L(CopyDword):
175	addi r9, r9, 8
176	std r10, -8(r9)
177	addi r4, r4, 8
178	addi r5, r5, -8
179	bdnz L(loadDWordandCompare)
180
181L(byte_by_byte):
182	cmpldi cr7, r5, 3
183	ble cr7, L(verifyByte)
184	srdi r10, r5, 2
185	mr r19, r9
186	mtctr r10
187	b L(firstByteUnroll)
188
189	.p2align 4
190L(bytes_unroll):
191	lbz r10, 1(r4)		/* load byte from src  */
192	cmpdi cr7, r10, 0	/* compare for NULL  */
193	stb r10, 1(r19)		/* store byte to dst  */
194	beq cr7, L(updtDestComputeN2ndByte)
195
196	addi r4, r4, 4		/* advance src  */
197
198	lbz r10, -2(r4)		/* perform loop unrolling for byte r/w  */
199	cmpdi cr7, r10, 0
200	stb r10, 2(r19)
201	beq cr7, L(updtDestComputeN3rdByte)
202
203	lbz r10, -1(r4)		/* perform loop unrolling for byte r/w  */
204	addi r19, r19, 4
205	cmpdi cr7, r10, 0
206	stb r10, -1(r19)
207	beq cr7, L(ComputeNByte)
208
209	bdz L(update0)
210
211L(firstByteUnroll):
212	lbz r10, 0(r4)		/* perform loop unrolling for byte r/w  */
213	cmpdi cr7, 10, 0
214	stb r10, 0(r19)
215	bne cr7, L(bytes_unroll)
216	addi r19, r19, 1
217
218L(ComputeNByte):
219	subf r9, r19, r9	/* compute 'n'n bytes to fill  */
220	add r8, r9, r5
221
222L(zeroFill):
223	cmpdi cr7, r8, 0	/* compare if length is zero  */
224	beq cr7, L(update3return)
225
226	mflr r0			/* load link register LR to r0  */
227	std r0, 16(r1)		/* store the link register  */
228	stdu r1, -FRAMESIZE(r1)	/* create the stack frame  */
229	cfi_adjust_cfa_offset(FRAMESIZE)
230	cfi_offset(lr, 16)
231	mr r3, r19		/* fill buffer with  */
232	li r4, 0		/* zero fill buffer  */
233	mr r5, r8		/* how many bytes to fill buffer with  */
234	bl MEMSET		/* call optimized memset  */
235#ifndef MEMSET_is_local
236	nop
237#endif
238	ld r0, FRAMESIZE+16(r1) /* read the saved link register  */
239	addi r1, r1, FRAMESIZE	/* restore stack pointer  */
240	cfi_adjust_cfa_offset(-FRAMESIZE)
241	mtlr r0
242	cfi_restore(lr)
243
244L(update3return):
245#ifdef USE_AS_STPNCPY
246	addi r3, r19, -1	/* update return value  */
247#endif
248
249L(hop2return):
250#ifndef USE_AS_STPNCPY
251	mr r3, r18		/* set return value  */
252#endif
253	ld r18, -16(r1)		/* restore callers save register, r18  */
254	ld r19, -8(r1)		/* restore callers save register, r19  */
255	blr			/* return  */
256
257	.p2align 4
258L(update0):
259	mr r9, r19
260
261	.p2align 4
262L(verifyByte):
263	rldicl. r8, r5, 0, 62
264#ifdef USE_AS_STPNCPY
265	mr r3, r9
266#endif
267	beq cr0, L(hop2return)
268	mtctr r8
269	addi r4, r4, -1
270	mr r19, r9
271	b L(oneBYone)
272
273	.p2align 4
274L(proceed):
275	bdz L(done)
276
277L(oneBYone):
278	lbzu r10, 1(r4)		/* copy byte  */
279	addi r19, r19, 1
280	addi r8, r8, -1
281	cmpdi cr7, r10, 0
282	stb r10, -1(r19)
283	bne cr7, L(proceed)
284	b L(zeroFill)
285
286	.p2align 4
287L(done):
288#ifdef USE_AS_STPNCPY
289	mr r3, r19		/* set the return value  */
290#else
291	mr r3, r18		/* set the return value  */
292#endif
293	ld r18, -16(r1)		/* restore callers save register, r18  */
294	ld r19, -8(r1)		/* restore callers save register, r19  */
295	blr			/* return  */
296
297L(update1):
298	mr r0, r11
299	mr r19, r5
300
301	.p2align 4
302L(leftDwords):
303	cmpdi cr7, r0, 0
304	mr r5, r19
305	bne cr7, L(dWordUnrollOFF)
306	b L(byte_by_byte)
307
308	.p2align 4
309L(updtDestComputeN2ndByte):
310	addi r19, r19, 2	/* update dst by 2  */
311	subf r9, r19, r9	/* compute distance covered  */
312	add r8, r9, r5
313	b L(zeroFill)
314
315	.p2align 4
316L(updtDestComputeN3rdByte):
317	addi r19, r19, 3	/* update dst by 3  */
318	subf r9, r19, r9	/* compute distance covered  */
319	add r8, r9, r5
320	b L(zeroFill)
321
322	.p2align 4
323L(HopBy24):
324	addi r9, r9, 24		/* increment dst by 24  */
325	addi r4, r4, 24		/* increment src by 24  */
326	addi r5, r5, -24	/* decrement length 'n' by 24  */
327	addi r0, r11, -3	/* decrement loop counter  */
328	b L(dWordUnrollOFF)
329
330	.p2align 4
331L(update2):
332	mr r5, r19
333	b L(dWordUnrollOFF)
334
335	.p2align 4
336L(HopBy40):
337	addi r9, r7, 40		/* increment dst by 40  */
338	addi r4, r6, 40		/* increment src by 40  */
339	addi r5, r5, -40	/* decrement length 'n' by 40  */
340	addi r0, r11, -5	/* decrement loop counter  */
341	b L(dWordUnrollOFF)
342
343L(update3):
344	mr r0, r11
345	b L(dWordUnrollOFF)
346
347L(HopBy8):
348	addi r9, r3, 8		/* increment dst by 8  */
349	addi r4, r4, 8		/* increment src by 8  */
350	addi r5, r5, -8		/* decrement length 'n' by 8  */
351	addi r0, r11, -1	/* decrement loop counter  */
352	b L(dWordUnrollOFF)
353
354L(unaligned):
355	cmpdi	r5, 16		/* Proceed byte by byte for less than 16  */
356	ble	L(byte_by_byte)
357	rldicl	r7, r3, 0, 61
358	rldicl	r6, r4, 0, 61
359	cmpdi	r6, 0	/* Check src alignment */
360	beq	L(srcaligndstunalign)
361	/* src is unaligned */
362	rlwinm	r10, r4, 3,26,28	/* Calculate padding.  */
363	clrrdi	r4, r4, 3	/* Align the addr to dw boundary */
364	ld	r8, 0(r4)	/* Load doubleword from memory.  */
365	li	r0, 0
366	/* Discard bits not part of the string */
367#ifdef __LITTLE_ENDIAN__
368	srd	r7, r8, r10
369#else
370	sld	r7, r8, r10
371#endif
372	cmpb	r0, r7, r0	/* Compare each byte against null */
373	/* Discard bits not part of the string */
374#ifdef __LITTLE_ENDIAN__
375	sld	r0, r0, r10
376#else
377	srd	r0, r0, r10
378#endif
379	cmpdi	r0, 0
380	bne     L(bytebybyte)	/* if it has null, copy byte by byte */
381	subfic	r6, r6, 8
382	rlwinm	r12, r3, 3,26,28	/* Calculate padding in bits.  */
383	rldicl	r9, r3, 0, 61	/* Calculate padding in bytes. */
384	addi	r3, r3, -1
385
386	cmpdi	r12, 0	/* check dest alignment */
387	beq     L(srcunaligndstalign)
388
389	/* both src and dst unaligned */
390#ifdef __LITTLE_ENDIAN__
391	sld	r8, r7, r10
392	mr	r11, r10
393	addi	r11, r11, -8	/* Adjust byte pointer on loaded dw */
394#else
395	srd	r8, r7, r10
396	subfic	r11, r10, 64
397#endif
398	/* dst alignment is greater then src alignment? */
399	cmpd    cr7, r12, r10
400	ble     cr7, L(dst_align_small)
401	/* src alignment is less than dst */
402
403	/* Calculate the dst alignment difference  */
404	subfic	r7, r9, 8
405	mtctr	r7
406
407	/* Write until dst is aligned  */
408	cmpdi	r0, r7, 4
409	blt     L(storebyte1)	/* less than 4, store byte by byte  */
410	beq     L(equal1)	/* if its 4, store word  */
411	addi	r0, r7, -4	/* greater than 4, so stb and stw  */
412	mtctr	r0
413L(storebyte1):
414#ifdef __LITTLE_ENDIAN__
415	addi	r11, r11, 8	/* Adjust byte pointer on loaded dw  */
416#else
417	addi	r11, r11, -8
418#endif
419	srd	r7, r8, r11
420	stbu	r7, 1(r3)
421	addi	r5, r5, -1
422	bdnz    L(storebyte1)
423
424	subfic	r7, r9, 8	/* Check the remaining bytes  */
425	cmpdi	r0, r7, 4
426	blt     L(proceed1)
427
428	.align 4
429L(equal1):
430#ifdef __LITTLE_ENDIAN__
431	addi	r11, r11, 8	/* Adjust byte pointer on loaded dw  */
432	srd	r7, r8, r11
433#else
434	subfic	r11, r11, 64
435	sld	r7, r8, r11
436	srdi	r7, r7, 32
437#endif
438	stw	r7, 1(r3)
439	addi	r3, r3, 4
440	addi	r5, r5, -4
441
442L(proceed1):
443	mr	r7, r8
444	/* calculate the Left over bytes to be written  */
445	subfic	r11, r10, 64
446	subfic	r12, r12, 64
447	subf	r12, r12, r11	/* remaining bytes on second dw  */
448	subfic	r10, r12, 64	/* remaining bytes on first dw  */
449	subfic	r9, r9, 8
450	subf	r6, r9, r6	/* recalculate padding  */
451L(srcunaligndstalign):
452	addi	r3, r3, 1
453	subfic	r12, r10, 64	/* remaining bytes on second dw  */
454	addi	r4, r4, 8
455	li	r0,0
456	b       L(storedouble)
457
458	.align 4
459L(dst_align_small):
460	mtctr	r6
461	/* Write until src is aligned  */
462L(storebyte2):
463#ifdef __LITTLE_ENDIAN__
464	addi	r11, r11, 8	/* Adjust byte pointer on dw  */
465#else
466	addi	r11, r11, -8
467#endif
468	srd	r7, r8, r11
469	stbu	r7, 1(r3)
470	addi	r5, r5, -1
471	bdnz    L(storebyte2)
472
473	addi	r4, r4, 8	/* Increment src pointer  */
474	addi	r3, r3, 1	/* Increment dst pointer  */
475	mr	r9, r3
476	li	r8, 0
477	cmpd    cr7, r12, r10
478	beq     cr7, L(aligned)
479	rldicl	r6, r3, 0, 61	/* Recalculate padding */
480	mr	r7, r6
481
482	/* src is algined */
483L(srcaligndstunalign):
484	mr	r9, r3
485	mr	r6, r7
486	ld	r8, 0(r4)
487	subfic	r10, r7, 8
488	mr	r7, r8
489	li	r0, 0	/* Check null */
490	cmpb	r0, r8, r0
491	cmpdi	r0, 0
492	bne     L(byte_by_byte)	/* Do byte by byte if there is NULL  */
493	rlwinm	r12, r3, 3,26,28	/* Calculate padding  */
494	addi	r3, r3, -1
495	/* write byte by byte until aligned  */
496#ifdef __LITTLE_ENDIAN__
497	li	r11, -8
498#else
499	li	r11, 64
500#endif
501	mtctr	r10
502	cmpdi	r0, r10, 4
503	blt     L(storebyte)
504	beq     L(equal)
505	addi	r0, r10, -4
506	mtctr	r0
507L(storebyte):
508#ifdef __LITTLE_ENDIAN__
509	addi	r11, r11, 8	/* Adjust byte pointer on  dw  */
510#else
511	addi	r11, r11, -8
512#endif
513	srd	r7, r8, r11
514	stbu	r7, 1(r3)
515	addi	r5, r5, -1
516	bdnz    L(storebyte)
517
518	cmpdi	r0, r10, 4
519	blt     L(align)
520
521	.align 4
522L(equal):
523#ifdef __LITTLE_ENDIAN__
524	addi	r11, r11, 8
525	srd	r7, r8, r11
526#else
527	subfic	r11, r11, 64
528	sld	r7, r8, r11
529	srdi	r7, r7, 32
530#endif
531	stw	r7, 1(r3)
532	addi	r5, r5, -4
533	addi	r3, r3, 4
534L(align):
535	addi	r3, r3, 1
536	addi	r4, r4, 8	/* Increment src pointer  */
537	subfic	r10, r12, 64
538	li	r0, 0
539	/* dst addr aligned to 8 */
540L(storedouble):
541	cmpdi	r5, 8
542	ble	L(null1)
543	ld	r7, 0(r4)	/* load next dw  */
544	cmpb	r0, r7, r0
545	cmpdi	r0, 0	/* check for null on each new dw  */
546	bne     L(null)
547#ifdef __LITTLE_ENDIAN__
548	srd	r9, r8, r10	/* bytes from first dw  */
549	sld	r11, r7, r12	/* bytes from second dw  */
550#else
551	sld	r9, r8, r10
552	srd	r11, r7, r12
553#endif
554	or	r11, r9, r11	/* make as a single dw  */
555	std	r11, 0(r3)	/* store as std on aligned addr  */
556	mr	r8, r7		/* still few bytes left to be written  */
557	addi	r3, r3, 8	/* increment dst addr  */
558	addi	r4, r4, 8	/* increment src addr  */
559	addi	r5, r5, -8
560	b       L(storedouble)	/* Loop until NULL  */
561
562	.align 4
563
564/* We've hit the end of the string.  Do the rest byte-by-byte.  */
565L(null):
566	addi	r3, r3, -1
567	mr	r10, r12
568	mtctr	r6
569#ifdef __LITTLE_ENDIAN__
570	subfic	r10, r10, 64
571	addi	r10, r10, -8
572#endif
573	cmpdi	r0, r5, 4
574	blt	L(loop)
575	cmpdi	r0, r6, 4
576	blt     L(loop)
577
578	/* we can still use stw if leftover >= 4  */
579#ifdef __LITTLE_ENDIAN__
580	addi	r10, r10, 8
581	srd	r11, r8, r10
582#else
583	subfic	r10, r10, 64
584	sld	r11, r8, r10
585	srdi	r11, r11, 32
586#endif
587	stw	r11, 1(r3)
588	addi	r5, r5, -4
589	addi	r3, r3, 4
590	cmpdi	r0, r5, 0
591	beq	L(g1)
592	cmpdi	r0, r6, 4
593	beq     L(bytebybyte1)
594	addi	r10, r10, 32
595#ifdef __LITTLE_ENDIAN__
596	addi	r10, r10, -8
597#else
598	subfic	r10, r10, 64
599#endif
600	addi	r0, r6, -4
601	mtctr	r0
602	/* remaining byte by byte part of first dw  */
603L(loop):
604#ifdef __LITTLE_ENDIAN__
605	addi	r10, r10, 8
606#else
607	addi	r10, r10, -8
608#endif
609	srd	r0, r8, r10
610	stbu	r0, 1(r3)
611	addi	r5, r5, -1
612	cmpdi	r0, r5, 0
613	beq	L(g1)
614	bdnz    L(loop)
615L(bytebybyte1):
616	addi	r3, r3, 1
617	/* remaining byte by byte part of second dw   */
618L(bytebybyte):
619	addi	r3, r3, -8
620	addi	r4, r4, -1
621
622#ifdef __LITTLE_ENDIAN__
623	extrdi. r0, r7, 8, 56
624	stbu	r7, 8(r3)
625	addi	r5, r5, -1
626	beq	L(g2)
627	cmpdi	r5, 0
628	beq	L(g1)
629	extrdi. r0, r7, 8, 48
630	stbu	r0, 1(r3)
631	addi	r5, r5, -1
632	beq	L(g2)
633	cmpdi	r5, 0
634	beq	L(g1)
635	extrdi. r0, r7, 8, 40
636	stbu	r0, 1(r3)
637	addi	r5, r5, -1
638	beq	L(g2)
639	cmpdi	r5, 0
640	beq	L(g1)
641	extrdi. r0, r7, 8, 32
642	stbu	r0, 1(r3)
643	addi	r5, r5, -1
644	beq	L(g2)
645	cmpdi	r5, 0
646	beq	L(g1)
647	extrdi. r0, r7, 8, 24
648	stbu	r0, 1(r3)
649	addi	r5, r5, -1
650	beq	L(g2)
651	cmpdi	r5, 0
652	beq	L(g1)
653	extrdi. r0, r7, 8, 16
654	stbu	r0, 1(r3)
655	addi	r5, r5, -1
656	beq	L(g2)
657	cmpdi	r5, 0
658	beq	L(g1)
659	extrdi. r0, r7, 8, 8
660	stbu	r0, 1(r3)
661	addi	r5, r5, -1
662	beq	L(g2)
663	cmpdi	r5, 0
664	beq	L(g1)
665	extrdi	r0, r7, 8, 0
666	stbu	r0, 1(r3)
667	addi	r5, r5, -1
668	b	L(g2)
669#else
670	extrdi. r0, r7, 8, 0
671	stbu	r0, 8(r3)
672	addi	r5, r5, -1
673	beq	L(g2)
674	cmpdi	r5, 0
675	beq	L(g1)
676	extrdi. r0, r7, 8, 8
677	stbu	r0, 1(r3)
678	addi	r5, r5, -1
679	beq	L(g2)
680	cmpdi	r5, 0
681	beq	L(g1)
682	extrdi. r0, r7, 8, 16
683	stbu	r0, 1(r3)
684	addi	r5, r5, -1
685	beq	L(g2)
686	cmpdi	r5, 0
687	beq	L(g1)
688	extrdi. r0, r7, 8, 24
689	stbu	r0, 1(r3)
690	addi	r5, r5, -1
691	beq	L(g2)
692	cmpdi	r5, 0
693	beq	L(g1)
694	extrdi. r0, r7, 8, 32
695	stbu	r0, 1(r3)
696	addi	r5, r5, -1
697	beq	L(g2)
698	cmpdi	r5, 0
699	beq	L(g1)
700	extrdi. r0, r7, 8, 40
701	stbu	r0, 1(r3)
702	addi	r5, r5, -1
703	beq	L(g2)
704	cmpdi	r5, 0
705	beq	L(g1)
706	extrdi. r0, r7, 8, 48
707	stbu	r0, 1(r3)
708	addi	r5, r5, -1
709	beq	L(g2)
710	cmpdi	r5, 0
711	beq	L(g1)
712	stbu	r7, 1(r3)
713	addi	r5, r5, -1
714	b	L(g2)
715#endif
716L(g1):
717#ifdef USE_AS_STPNCPY
718	addi	r3, r3, 1
719#endif
720L(g2):
721	addi	r3, r3, 1
722	mr	r19, r3
723	mr	r8, r5
724	b	L(zeroFill)
725L(null1):
726	mr	r9, r3
727	subf	r4, r6, r4
728	b	L(byte_by_byte)
729END(FUNC_NAME)
730#ifndef USE_AS_STPNCPY
731libc_hidden_builtin_def (strncpy)
732#endif
733