1/*
2 * Copyright 2007-2008 Analog Devices Inc.
3 *              Philippe Gerum <rpm@xenomai.org>
4 *
5 * Licensed under the GPL-2 or later.
6 */
7
8#include <linux/linkage.h>
9#include <asm/blackfin.h>
10#include <asm/cache.h>
11#include <asm/asm-offsets.h>
12#include <asm/rwlock.h>
13#include <asm/cplb.h>
14
15.text
16
17.macro coreslot_loadaddr reg:req
18	\reg\().l = _corelock;
19	\reg\().h = _corelock;
20.endm
21
22.macro safe_testset addr:req, scratch:req
23#if ANOMALY_05000477
24	cli \scratch;
25	testset (\addr);
26	sti \scratch;
27#else
28	testset (\addr);
29#endif
30.endm
31
32/*
33 * r0 = address of atomic data to flush and invalidate (32bit).
34 *
35 * Clear interrupts and return the old mask.
36 * We assume that no atomic data can span cachelines.
37 *
38 * Clobbers: r2:0, p0
39 */
40ENTRY(_get_core_lock)
41	r1 = -L1_CACHE_BYTES;
42	r1 = r0 & r1;
43	cli r0;
44	coreslot_loadaddr p0;
45.Lretry_corelock:
46	safe_testset p0, r2;
47	if cc jump .Ldone_corelock;
48	SSYNC(r2);
49	jump .Lretry_corelock
50.Ldone_corelock:
51	p0 = r1;
52	/* flush core internal write buffer before invalidate dcache */
53	CSYNC(r2);
54	flushinv[p0];
55	SSYNC(r2);
56	rts;
57ENDPROC(_get_core_lock)
58
59/*
60 * r0 = address of atomic data in uncacheable memory region (32bit).
61 *
62 * Clear interrupts and return the old mask.
63 *
64 * Clobbers: r0, p0
65 */
66ENTRY(_get_core_lock_noflush)
67	cli r0;
68	coreslot_loadaddr p0;
69.Lretry_corelock_noflush:
70	safe_testset p0, r2;
71	if cc jump .Ldone_corelock_noflush;
72	SSYNC(r2);
73	jump .Lretry_corelock_noflush
74.Ldone_corelock_noflush:
75	rts;
76ENDPROC(_get_core_lock_noflush)
77
78/*
79 * r0 = interrupt mask to restore.
80 * r1 = address of atomic data to flush and invalidate (32bit).
81 *
82 * Interrupts are masked on entry (see _get_core_lock).
83 * Clobbers: r2:0, p0
84 */
85ENTRY(_put_core_lock)
86	/* Write-through cache assumed, so no flush needed here. */
87	coreslot_loadaddr p0;
88	r1 = 0;
89	[p0] = r1;
90	SSYNC(r2);
91	sti r0;
92	rts;
93ENDPROC(_put_core_lock)
94
95#ifdef __ARCH_SYNC_CORE_DCACHE
96
97ENTRY(___raw_smp_mark_barrier_asm)
98	[--sp] = rets;
99	[--sp] = ( r7:5 );
100	[--sp] = r0;
101	[--sp] = p1;
102	[--sp] = p0;
103	call _get_core_lock_noflush;
104
105	/*
106	 * Calculate current core mask
107	 */
108	GET_CPUID(p1, r7);
109	r6 = 1;
110	r6 <<= r7;
111
112	/*
113	 * Set bit of other cores in barrier mask. Don't change current core bit.
114	 */
115	p1.l = _barrier_mask;
116	p1.h = _barrier_mask;
117	r7 = [p1];
118	r5 = r7 & r6;
119	r7 = ~r6;
120	cc = r5 == 0;
121	if cc jump 1f;
122	r7 = r7 | r6;
1231:
124	[p1] = r7;
125	SSYNC(r2);
126
127	call _put_core_lock;
128	p0 = [sp++];
129	p1 = [sp++];
130	r0 = [sp++];
131	( r7:5 ) = [sp++];
132	rets = [sp++];
133	rts;
134ENDPROC(___raw_smp_mark_barrier_asm)
135
136ENTRY(___raw_smp_check_barrier_asm)
137	[--sp] = rets;
138	[--sp] = ( r7:5 );
139	[--sp] = r0;
140	[--sp] = p1;
141	[--sp] = p0;
142	call _get_core_lock_noflush;
143
144	/*
145	 * Calculate current core mask
146	 */
147	GET_CPUID(p1, r7);
148	r6 = 1;
149	r6 <<= r7;
150
151	/*
152	 * Clear current core bit in barrier mask if it is set.
153	 */
154	p1.l = _barrier_mask;
155	p1.h = _barrier_mask;
156	r7 = [p1];
157	r5 = r7 & r6;
158	cc = r5 == 0;
159	if cc jump 1f;
160	r6 = ~r6;
161	r7 = r7 & r6;
162	[p1] = r7;
163	SSYNC(r2);
164
165	call _put_core_lock;
166
167	/*
168	 * Invalidate the entire D-cache of current core.
169	 */
170	sp += -12;
171	call _resync_core_dcache
172	sp += 12;
173	jump 2f;
1741:
175	call _put_core_lock;
1762:
177	p0 = [sp++];
178	p1 = [sp++];
179	r0 = [sp++];
180	( r7:5 ) = [sp++];
181	rets = [sp++];
182	rts;
183ENDPROC(___raw_smp_check_barrier_asm)
184
185/*
186 * r0 = irqflags
187 * r1 = address of atomic data
188 *
189 * Clobbers: r2:0, p1:0
190 */
191_start_lock_coherent:
192
193	[--sp] = rets;
194	[--sp] = ( r7:6 );
195	r7 = r0;
196	p1 = r1;
197
198	/*
199	 * Determine whether the atomic data was previously
200	 * owned by another CPU (=r6).
201	 */
202	GET_CPUID(p0, r2);
203	r1 = 1;
204	r1 <<= r2;
205	r2 = ~r1;
206
207	r1 = [p1];
208	r1 >>= 28;   /* CPU fingerprints are stored in the high nibble. */
209	r6 = r1 & r2;
210	r1 = [p1];
211	r1 <<= 4;
212	r1 >>= 4;
213	[p1] = r1;
214
215	/*
216	 * Release the core lock now, but keep IRQs disabled while we are
217	 * performing the remaining housekeeping chores for the current CPU.
218	 */
219	coreslot_loadaddr p0;
220	r1 = 0;
221	[p0] = r1;
222
223	/*
224	 * If another CPU has owned the same atomic section before us,
225	 * then our D-cached copy of the shared data protected by the
226	 * current spin/write_lock may be obsolete.
227	 */
228	cc = r6 == 0;
229	if cc jump .Lcache_synced
230
231	/*
232	 * Invalidate the entire D-cache of the current core.
233	 */
234	sp += -12;
235	call _resync_core_dcache
236	sp += 12;
237
238.Lcache_synced:
239	SSYNC(r2);
240	sti r7;
241	( r7:6 ) = [sp++];
242	rets = [sp++];
243	rts
244
245/*
246 * r0 = irqflags
247 * r1 = address of atomic data
248 *
249 * Clobbers: r2:0, p1:0
250 */
251_end_lock_coherent:
252
253	p1 = r1;
254	GET_CPUID(p0, r2);
255	r2 += 28;
256	r1 = 1;
257	r1 <<= r2;
258	r2 = [p1];
259	r2 = r1 | r2;
260	[p1] = r2;
261	r1 = p1;
262	jump _put_core_lock;
263
264#endif /* __ARCH_SYNC_CORE_DCACHE */
265
266/*
267 * r0 = &spinlock->lock
268 *
269 * Clobbers: r3:0, p1:0
270 */
271ENTRY(___raw_spin_is_locked_asm)
272	p1 = r0;
273	[--sp] = rets;
274	call _get_core_lock;
275	r3 = [p1];
276	cc = bittst( r3, 0 );
277	r3 = cc;
278	r1 = p1;
279	call _put_core_lock;
280	rets = [sp++];
281	r0 = r3;
282	rts;
283ENDPROC(___raw_spin_is_locked_asm)
284
285/*
286 * r0 = &spinlock->lock
287 *
288 * Clobbers: r3:0, p1:0
289 */
290ENTRY(___raw_spin_lock_asm)
291	p1 = r0;
292	[--sp] = rets;
293.Lretry_spinlock:
294	call _get_core_lock;
295	r1 = p1;
296	r2 = [p1];
297	cc = bittst( r2, 0 );
298	if cc jump .Lbusy_spinlock
299#ifdef __ARCH_SYNC_CORE_DCACHE
300	r3 = p1;
301	bitset ( r2, 0 ); /* Raise the lock bit. */
302	[p1] = r2;
303	call _start_lock_coherent
304#else
305	r2 = 1;
306	[p1] = r2;
307	call _put_core_lock;
308#endif
309	rets = [sp++];
310	rts;
311
312.Lbusy_spinlock:
313	/* We don't touch the atomic area if busy, so that flush
314	   will behave like nop in _put_core_lock. */
315	call _put_core_lock;
316	SSYNC(r2);
317	r0 = p1;
318	jump .Lretry_spinlock
319ENDPROC(___raw_spin_lock_asm)
320
321/*
322 * r0 = &spinlock->lock
323 *
324 * Clobbers: r3:0, p1:0
325 */
326ENTRY(___raw_spin_trylock_asm)
327	p1 = r0;
328	[--sp] = rets;
329	call _get_core_lock;
330	r1 = p1;
331	r3 = [p1];
332	cc = bittst( r3, 0 );
333	if cc jump .Lfailed_trylock
334#ifdef __ARCH_SYNC_CORE_DCACHE
335	bitset ( r3, 0 ); /* Raise the lock bit. */
336	[p1] = r3;
337	call _start_lock_coherent
338#else
339	r2 = 1;
340	[p1] = r2;
341	call _put_core_lock;
342#endif
343	r0 = 1;
344	rets = [sp++];
345	rts;
346.Lfailed_trylock:
347	call _put_core_lock;
348	r0 = 0;
349	rets = [sp++];
350	rts;
351ENDPROC(___raw_spin_trylock_asm)
352
353/*
354 * r0 = &spinlock->lock
355 *
356 * Clobbers: r2:0, p1:0
357 */
358ENTRY(___raw_spin_unlock_asm)
359	p1 = r0;
360	[--sp] = rets;
361	call _get_core_lock;
362	r2 = [p1];
363	bitclr ( r2, 0 );
364	[p1] = r2;
365	r1 = p1;
366#ifdef __ARCH_SYNC_CORE_DCACHE
367	call _end_lock_coherent
368#else
369	call _put_core_lock;
370#endif
371	rets = [sp++];
372	rts;
373ENDPROC(___raw_spin_unlock_asm)
374
375/*
376 * r0 = &rwlock->lock
377 *
378 * Clobbers: r2:0, p1:0
379 */
380ENTRY(___raw_read_lock_asm)
381	p1 = r0;
382	[--sp] = rets;
383	call _get_core_lock;
384.Lrdlock_try:
385	r1 = [p1];
386	r1 += -1;
387	[p1] = r1;
388	cc = r1 < 0;
389	if cc jump .Lrdlock_failed
390	r1 = p1;
391#ifdef __ARCH_SYNC_CORE_DCACHE
392	call _start_lock_coherent
393#else
394	call _put_core_lock;
395#endif
396	rets = [sp++];
397	rts;
398
399.Lrdlock_failed:
400	r1 += 1;
401	[p1] = r1;
402.Lrdlock_wait:
403	r1 = p1;
404	call _put_core_lock;
405	SSYNC(r2);
406	r0 = p1;
407	call _get_core_lock;
408	r1 = [p1];
409	cc = r1 < 2;
410	if cc jump .Lrdlock_wait;
411	jump .Lrdlock_try
412ENDPROC(___raw_read_lock_asm)
413
414/*
415 * r0 = &rwlock->lock
416 *
417 * Clobbers: r3:0, p1:0
418 */
419ENTRY(___raw_read_trylock_asm)
420	p1 = r0;
421	[--sp] = rets;
422	call _get_core_lock;
423	r1 = [p1];
424	cc = r1 <= 0;
425	if cc jump .Lfailed_tryrdlock;
426	r1 += -1;
427	[p1] = r1;
428	r1 = p1;
429#ifdef __ARCH_SYNC_CORE_DCACHE
430	call _start_lock_coherent
431#else
432	call _put_core_lock;
433#endif
434	rets = [sp++];
435	r0 = 1;
436	rts;
437.Lfailed_tryrdlock:
438	r1 = p1;
439	call _put_core_lock;
440	rets = [sp++];
441	r0 = 0;
442	rts;
443ENDPROC(___raw_read_trylock_asm)
444
445/*
446 * r0 = &rwlock->lock
447 *
448 * Note: Processing controlled by a reader lock should not have
449 * any side-effect on cache issues with the other core, so we
450 * just release the core lock and exit (no _end_lock_coherent).
451 *
452 * Clobbers: r3:0, p1:0
453 */
454ENTRY(___raw_read_unlock_asm)
455	p1 = r0;
456	[--sp] = rets;
457	call _get_core_lock;
458	r1 = [p1];
459	r1 += 1;
460	[p1] = r1;
461	r1 = p1;
462	call _put_core_lock;
463	rets = [sp++];
464	rts;
465ENDPROC(___raw_read_unlock_asm)
466
467/*
468 * r0 = &rwlock->lock
469 *
470 * Clobbers: r3:0, p1:0
471 */
472ENTRY(___raw_write_lock_asm)
473	p1 = r0;
474	r3.l = lo(RW_LOCK_BIAS);
475	r3.h = hi(RW_LOCK_BIAS);
476	[--sp] = rets;
477	call _get_core_lock;
478.Lwrlock_try:
479	r1 = [p1];
480	r1 = r1 - r3;
481#ifdef __ARCH_SYNC_CORE_DCACHE
482	r2 = r1;
483	r2 <<= 4;
484	r2 >>= 4;
485	cc = r2 == 0;
486#else
487	cc = r1 == 0;
488#endif
489	if !cc jump .Lwrlock_wait
490	[p1] = r1;
491	r1 = p1;
492#ifdef __ARCH_SYNC_CORE_DCACHE
493	call _start_lock_coherent
494#else
495	call _put_core_lock;
496#endif
497	rets = [sp++];
498	rts;
499
500.Lwrlock_wait:
501	r1 = p1;
502	call _put_core_lock;
503	SSYNC(r2);
504	r0 = p1;
505	call _get_core_lock;
506	r1 = [p1];
507#ifdef __ARCH_SYNC_CORE_DCACHE
508	r1 <<= 4;
509	r1 >>= 4;
510#endif
511	cc = r1 == r3;
512	if !cc jump .Lwrlock_wait;
513	jump .Lwrlock_try
514ENDPROC(___raw_write_lock_asm)
515
516/*
517 * r0 = &rwlock->lock
518 *
519 * Clobbers: r3:0, p1:0
520 */
521ENTRY(___raw_write_trylock_asm)
522	p1 = r0;
523	[--sp] = rets;
524	call _get_core_lock;
525	r1 = [p1];
526	r2.l = lo(RW_LOCK_BIAS);
527	r2.h = hi(RW_LOCK_BIAS);
528	cc = r1 == r2;
529	if !cc jump .Lfailed_trywrlock;
530#ifdef __ARCH_SYNC_CORE_DCACHE
531	r1 >>= 28;
532	r1 <<= 28;
533#else
534	r1 = 0;
535#endif
536	[p1] = r1;
537	r1 = p1;
538#ifdef __ARCH_SYNC_CORE_DCACHE
539	call _start_lock_coherent
540#else
541	call _put_core_lock;
542#endif
543	rets = [sp++];
544	r0 = 1;
545	rts;
546
547.Lfailed_trywrlock:
548	r1 = p1;
549	call _put_core_lock;
550	rets = [sp++];
551	r0 = 0;
552	rts;
553ENDPROC(___raw_write_trylock_asm)
554
555/*
556 * r0 = &rwlock->lock
557 *
558 * Clobbers: r3:0, p1:0
559 */
560ENTRY(___raw_write_unlock_asm)
561	p1 = r0;
562	r3.l = lo(RW_LOCK_BIAS);
563	r3.h = hi(RW_LOCK_BIAS);
564	[--sp] = rets;
565	call _get_core_lock;
566	r1 = [p1];
567	r1 = r1 + r3;
568	[p1] = r1;
569	r1 = p1;
570#ifdef __ARCH_SYNC_CORE_DCACHE
571	call _end_lock_coherent
572#else
573	call _put_core_lock;
574#endif
575	rets = [sp++];
576	rts;
577ENDPROC(___raw_write_unlock_asm)
578
579/*
580 * r0 = ptr
581 * r1 = value
582 *
583 * Add a signed value to a 32bit word and return the new value atomically.
584 * Clobbers: r3:0, p1:0
585 */
586ENTRY(___raw_atomic_update_asm)
587	p1 = r0;
588	r3 = r1;
589	[--sp] = rets;
590	call _get_core_lock;
591	r2 = [p1];
592	r3 = r3 + r2;
593	[p1] = r3;
594	r1 = p1;
595	call _put_core_lock;
596	r0 = r3;
597	rets = [sp++];
598	rts;
599ENDPROC(___raw_atomic_update_asm)
600
601/*
602 * r0 = ptr
603 * r1 = mask
604 *
605 * Clear the mask bits from a 32bit word and return the old 32bit value
606 * atomically.
607 * Clobbers: r3:0, p1:0
608 */
609ENTRY(___raw_atomic_clear_asm)
610	p1 = r0;
611	r3 = ~r1;
612	[--sp] = rets;
613	call _get_core_lock;
614	r2 = [p1];
615	r3 = r2 & r3;
616	[p1] = r3;
617	r3 = r2;
618	r1 = p1;
619	call _put_core_lock;
620	r0 = r3;
621	rets = [sp++];
622	rts;
623ENDPROC(___raw_atomic_clear_asm)
624
625/*
626 * r0 = ptr
627 * r1 = mask
628 *
629 * Set the mask bits into a 32bit word and return the old 32bit value
630 * atomically.
631 * Clobbers: r3:0, p1:0
632 */
633ENTRY(___raw_atomic_set_asm)
634	p1 = r0;
635	r3 = r1;
636	[--sp] = rets;
637	call _get_core_lock;
638	r2 = [p1];
639	r3 = r2 | r3;
640	[p1] = r3;
641	r3 = r2;
642	r1 = p1;
643	call _put_core_lock;
644	r0 = r3;
645	rets = [sp++];
646	rts;
647ENDPROC(___raw_atomic_set_asm)
648
649/*
650 * r0 = ptr
651 * r1 = mask
652 *
653 * XOR the mask bits with a 32bit word and return the old 32bit value
654 * atomically.
655 * Clobbers: r3:0, p1:0
656 */
657ENTRY(___raw_atomic_xor_asm)
658	p1 = r0;
659	r3 = r1;
660	[--sp] = rets;
661	call _get_core_lock;
662	r2 = [p1];
663	r3 = r2 ^ r3;
664	[p1] = r3;
665	r3 = r2;
666	r1 = p1;
667	call _put_core_lock;
668	r0 = r3;
669	rets = [sp++];
670	rts;
671ENDPROC(___raw_atomic_xor_asm)
672
673/*
674 * r0 = ptr
675 * r1 = mask
676 *
677 * Perform a logical AND between the mask bits and a 32bit word, and
678 * return the masked value. We need this on this architecture in
679 * order to invalidate the local cache before testing.
680 *
681 * Clobbers: r3:0, p1:0
682 */
683ENTRY(___raw_atomic_test_asm)
684	p1 = r0;
685	r3 = r1;
686	r1 = -L1_CACHE_BYTES;
687	r1 = r0 & r1;
688	p0 = r1;
689	/* flush core internal write buffer before invalidate dcache */
690	CSYNC(r2);
691	flushinv[p0];
692	SSYNC(r2);
693	r0 = [p1];
694	r0 = r0 & r3;
695	rts;
696ENDPROC(___raw_atomic_test_asm)
697
698/*
699 * r0 = ptr
700 * r1 = value
701 *
702 * Swap *ptr with value and return the old 32bit value atomically.
703 * Clobbers: r3:0, p1:0
704 */
705#define	__do_xchg(src, dst) 		\
706	p1 = r0;			\
707	r3 = r1;			\
708	[--sp] = rets;			\
709	call _get_core_lock;		\
710	r2 = src;			\
711	dst = r3;			\
712	r3 = r2;			\
713	r1 = p1;			\
714	call _put_core_lock;		\
715	r0 = r3;			\
716	rets = [sp++];			\
717	rts;
718
719ENTRY(___raw_xchg_1_asm)
720	__do_xchg(b[p1] (z), b[p1])
721ENDPROC(___raw_xchg_1_asm)
722
723ENTRY(___raw_xchg_2_asm)
724	__do_xchg(w[p1] (z), w[p1])
725ENDPROC(___raw_xchg_2_asm)
726
727ENTRY(___raw_xchg_4_asm)
728	__do_xchg([p1], [p1])
729ENDPROC(___raw_xchg_4_asm)
730
731/*
732 * r0 = ptr
733 * r1 = new
734 * r2 = old
735 *
736 * Swap *ptr with new if *ptr == old and return the previous *ptr
737 * value atomically.
738 *
739 * Clobbers: r3:0, p1:0
740 */
741#define	__do_cmpxchg(src, dst) 		\
742	[--sp] = rets;			\
743	[--sp] = r4;			\
744	p1 = r0;			\
745	r3 = r1;			\
746	r4 = r2;			\
747	call _get_core_lock;		\
748	r2 = src;			\
749	cc = r2 == r4;			\
750	if !cc jump 1f;			\
751	dst = r3;			\
752     1: r3 = r2;			\
753	r1 = p1;			\
754	call _put_core_lock;		\
755	r0 = r3;			\
756	r4 = [sp++];			\
757	rets = [sp++];			\
758	rts;
759
760ENTRY(___raw_cmpxchg_1_asm)
761	__do_cmpxchg(b[p1] (z), b[p1])
762ENDPROC(___raw_cmpxchg_1_asm)
763
764ENTRY(___raw_cmpxchg_2_asm)
765	__do_cmpxchg(w[p1] (z), w[p1])
766ENDPROC(___raw_cmpxchg_2_asm)
767
768ENTRY(___raw_cmpxchg_4_asm)
769	__do_cmpxchg([p1], [p1])
770ENDPROC(___raw_cmpxchg_4_asm)
771
772/*
773 * r0 = ptr
774 * r1 = bitnr
775 *
776 * Set a bit in a 32bit word and return the old 32bit value atomically.
777 * Clobbers: r3:0, p1:0
778 */
779ENTRY(___raw_bit_set_asm)
780	r2 = r1;
781	r1 = 1;
782	r1 <<= r2;
783	jump ___raw_atomic_set_asm
784ENDPROC(___raw_bit_set_asm)
785
786/*
787 * r0 = ptr
788 * r1 = bitnr
789 *
790 * Clear a bit in a 32bit word and return the old 32bit value atomically.
791 * Clobbers: r3:0, p1:0
792 */
793ENTRY(___raw_bit_clear_asm)
794	r2 = r1;
795	r1 = 1;
796	r1 <<= r2;
797	jump ___raw_atomic_clear_asm
798ENDPROC(___raw_bit_clear_asm)
799
800/*
801 * r0 = ptr
802 * r1 = bitnr
803 *
804 * Toggle a bit in a 32bit word and return the old 32bit value atomically.
805 * Clobbers: r3:0, p1:0
806 */
807ENTRY(___raw_bit_toggle_asm)
808	r2 = r1;
809	r1 = 1;
810	r1 <<= r2;
811	jump ___raw_atomic_xor_asm
812ENDPROC(___raw_bit_toggle_asm)
813
814/*
815 * r0 = ptr
816 * r1 = bitnr
817 *
818 * Test-and-set a bit in a 32bit word and return the old bit value atomically.
819 * Clobbers: r3:0, p1:0
820 */
821ENTRY(___raw_bit_test_set_asm)
822	[--sp] = rets;
823	[--sp] = r1;
824	call ___raw_bit_set_asm
825	r1 = [sp++];
826	r2 = 1;
827	r2 <<= r1;
828	r0 = r0 & r2;
829	cc = r0 == 0;
830	if cc jump 1f
831	r0 = 1;
8321:
833	rets = [sp++];
834	rts;
835ENDPROC(___raw_bit_test_set_asm)
836
837/*
838 * r0 = ptr
839 * r1 = bitnr
840 *
841 * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
842 * Clobbers: r3:0, p1:0
843 */
844ENTRY(___raw_bit_test_clear_asm)
845	[--sp] = rets;
846	[--sp] = r1;
847	call ___raw_bit_clear_asm
848	r1 = [sp++];
849	r2 = 1;
850	r2 <<= r1;
851	r0 = r0 & r2;
852	cc = r0 == 0;
853	if cc jump 1f
854	r0 = 1;
8551:
856	rets = [sp++];
857	rts;
858ENDPROC(___raw_bit_test_clear_asm)
859
860/*
861 * r0 = ptr
862 * r1 = bitnr
863 *
864 * Test-and-toggle a bit in a 32bit word,
865 * and return the old bit value atomically.
866 * Clobbers: r3:0, p1:0
867 */
868ENTRY(___raw_bit_test_toggle_asm)
869	[--sp] = rets;
870	[--sp] = r1;
871	call ___raw_bit_toggle_asm
872	r1 = [sp++];
873	r2 = 1;
874	r2 <<= r1;
875	r0 = r0 & r2;
876	cc = r0 == 0;
877	if cc jump 1f
878	r0 = 1;
8791:
880	rets = [sp++];
881	rts;
882ENDPROC(___raw_bit_test_toggle_asm)
883
884/*
885 * r0 = ptr
886 * r1 = bitnr
887 *
888 * Test a bit in a 32bit word and return its value.
889 * We need this on this architecture in order to invalidate
890 * the local cache before testing.
891 *
892 * Clobbers: r3:0, p1:0
893 */
894ENTRY(___raw_bit_test_asm)
895	r2 = r1;
896	r1 = 1;
897	r1 <<= r2;
898	jump ___raw_atomic_test_asm
899ENDPROC(___raw_bit_test_asm)
900
901/*
902 * r0 = ptr
903 *
904 * Fetch and return an uncached 32bit value.
905 *
906 * Clobbers: r2:0, p1:0
907 */
908ENTRY(___raw_uncached_fetch_asm)
909	p1 = r0;
910	r1 = -L1_CACHE_BYTES;
911	r1 = r0 & r1;
912	p0 = r1;
913	/* flush core internal write buffer before invalidate dcache */
914	CSYNC(r2);
915	flushinv[p0];
916	SSYNC(r2);
917	r0 = [p1];
918	rts;
919ENDPROC(___raw_uncached_fetch_asm)
920