2 * arch/ubicom32/lib/mem_ubicom32.c
5 * (C) Copyright 2009, Ubicom, Inc.
7 * This file is part of the Ubicom32 Linux Kernel Port.
9 * The Ubicom32 Linux Kernel Port is free software: you can redistribute
10 * it and/or modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation, either version 2 of the
12 * License, or (at your option) any later version.
14 * The Ubicom32 Linux Kernel Port is distributed in the hope that it
15 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
17 * the GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with the Ubicom32 Linux Kernel Port. If not,
21 * see <http://www.gnu.org/licenses/>.
23 * Ubicom32 implementation derived from (with many thanks):
29 #include <linux/module.h>
30 #include <linux/types.h>
31 #include <linux/compiler.h>
34 #define UNLIKELY unlikely
41 void *memcpy(void *dest, const void *src, size_t n)
43 void *dest_ret = dest;
45 if (LIKELY((((addr_t)dest ^ (addr_t)src) & 3) == 0) && LIKELY(n > 6)) {
47 n -= (4 - (addr_t)dest) & 0x03;
50 " sub.4 d15, #0, %2 \n\t" // set up for jump table
51 " and.4 d15, #(32-1), d15 \n\t" // d15 = (-m) & (32 - 1)
52 " moveai a3, #%%hi(1f) \n\t"
53 " lea.1 a3, %%lo(1f)(a3) \n\t"
54 " lea.4 a3, (a3,d15) \n\t"
56 " bfextu d15, %0, #2 \n\t" // d15 = (dest & 3)
57 " jmpne.w.f 100f \n\t"
58 " calli a3, 0(a3) \n\t" // 4-byte alignment
60 "100: cmpi d15, #2 \n\t"
61 " jmpne.s.f 101f \n\t"
62 " move.2 (%0)2++, (%1)2++ \n\t"
63 " calli a3, 0(a3) \n\t" // 2-byte alignment
65 "101: move.1 (%0)1++, (%1)1++ \n\t"
66 " jmpgt.s.f 102f \n\t" // 3-byte alignment
67 " move.2 (%0)2++, (%1)2++ \n\t" // 1-byte alignment
68 "102: calli a3, 0(a3) \n\t"
70 "200: cmpi %3, #2 \n\t"
71 " jmplt.s.f 201f \n\t"
72 " move.2 (%0)2++, (%1)2++ \n\t"
74 "201: move.1 (%0)1++, (%1)1++ \n\t"
78 " movea (%0)4++, (%1)4++ \n\t"
81 " move.4 (%0)4++, (%1)4++ \n\t"
83 " add.4 %2, #-32, %2 \n\t"
86 " and.4 %3, #3, %3 \n\t" // check n
87 " jmpne.w.f 200b \n\t"
89 : "+a" (dest), "+a" (src), "+d" (m), "+d" (n)
91 : "d15", "a3", "memory", "cc"
97 if (LIKELY((((addr_t)dest ^ (addr_t)src) & 1) == 0) && LIKELY(n > 2)) {
99 n -= (addr_t)dest & 0x01;
102 " sub.4 d15, #0, %2 \n\t" // set up for jump table
103 " and.4 d15, #(32-1), d15 \n\t" // d15 = (-m) & (32 - 1)
104 " moveai a3, #%%hi(1f) \n\t"
105 " lea.1 a3, %%lo(1f)(a3) \n\t"
106 " lea.4 a3, (a3,d15) \n\t"
108 " btst %0, #0 \n\t" // check bit 0
109 " jmpne.w.f 100f \n\t"
110 " calli a3, 0(a3) \n\t" // 4-byte alignment
112 "100: move.1 (%0)1++, (%1)1++ \n\t"
113 " calli a3, 0(a3) \n\t"
115 "200: move.1 (%0)1++, (%1)1++ \n\t"
119 " move.2 (%0)2++, (%1)2++ \n\t"
121 " add.4 %2, #-32, %2 \n\t"
124 " and.4 %3, #1, %3 \n\t" // check n
125 " jmpne.w.f 200b \n\t"
128 : "+a" (dest), "+a" (src), "+d" (m), "+d" (n)
130 : "d15", "a3", "memory", "cc"
137 " sub.4 d15, #0, %2 \n\t"
139 " and.4 d15, #(16-1), d15 \n\t" // d15 = (-n) & (16 - 1)
140 " moveai a3, #%%hi(1f) \n\t"
141 " lea.1 a3, %%lo(1f)(a3) \n\t"
142 " lea.4 a3, (a3,d15) \n\t"
143 " calli a3, 0(a3) \n\t"
146 " move.1 (%0)1++, (%1)1++ \n\t"
148 " add.4 %2, #-16, %2 \n\t"
152 : "+a" (dest), "+a" (src), "+d" (n)
154 : "d15", "a3", "memory", "cc"
163 void *memset(void *s, int c, size_t n)
169 n -= (4 - (addr_t)s) & 0x03;
172 " sub.4 d15, #0, %2 \n\t" // set up for jump table
173 " and.4 d15, #(32-1), d15 \n\t" // d15 = (-m) & (32 - 1)
174 " shmrg.1 %1, %1, %1 \n\t"
175 " shmrg.2 %1, %1, %1 \n\t" // %1 = (c<<24)|(c<<16)|(c<<8)|c
176 " moveai a3, #%%hi(1f) \n\t"
177 " lea.1 a3, %%lo(1f)(a3) \n\t"
178 " lea.4 a3, (a3,d15) \n\t"
180 " bfextu d15, %0, #2 \n\t" // d15 = (s & 3)
181 " jmpne.w.f 100f \n\t"
182 " calli a3, 0(a3) \n\t" // 4-byte alignment
184 "100: cmpi d15, #2 \n\t"
185 " jmpne.s.f 101f \n\t"
186 " move.2 (%0)2++, %1 \n\t"
187 " calli a3, 0(a3) \n\t" // 2-byte alignment
189 "101: move.1 (%0)1++, %1 \n\t"
190 " jmpgt.s.f 102f \n\t" // 3-byte alignment
191 " move.2 (%0)2++, %1 \n\t" // 1-byte alignment
192 "102: calli a3, 0(a3) \n\t"
194 "200: cmpi %3, #2 \n\t"
195 " jmplt.s.f 201f \n\t"
196 " move.2 (%0)2++, %1 \n\t"
198 "201: move.1 (%0)1++, %1 \n\t"
202 " movea (%0)4++, %1 \n\t"
205 " move.4 (%0)4++, %1 \n\t"
207 " add.4 %2, #-32, %2 \n\t"
210 " and.4 %3, #3, %3 \n\t" // test bit 1 of n
211 " jmpne.w.f 200b \n\t"
214 : "+a" (s), "+d" (c), "+d" (m), "+d" (n)
216 : "d15", "a3", "memory", "cc"
223 " sub.4 d15, #0, %2 \n\t"
225 " and.4 d15, #(8-1), d15 \n\t" // d15 = (-%2) & (16 - 1)
226 " moveai a3, #%%hi(1f) \n\t"
227 " lea.1 a3, %%lo(1f)(a3) \n\t"
228 " lea.4 a3, (a3,d15) \n\t"
229 " calli a3, 0(a3) \n\t"
232 " move.1 (%0)1++, %1 \n\t"
236 : "+a" (s), "+d" (c), "+d" (n)
238 : "d15", "a3", "memory", "cc"
244 void *memmove(void *dest, const void *src, size_t n)
256 * Will perform 16-bit move if possible
258 if (likely((((u32)dest | (u32)src | n) & 1) == 0)) {
261 " sub.4 d15, #0, %2 \n\t" // set up for jump table
262 " and.4 d15, #(32-2), d15 \n\t" // d15 = (- count) & (32 - 2)
263 " moveai a3, #%%hi(1f) \n\t"
264 " lea.1 a3, %%lo(1f)(a3) \n\t"
265 " lea.2 a3, (a3,d15) \n\t"
266 " calli a3, 0(a3) \n\t"
269 " move.2 (%0)2++, (%1)2++ \n\t"
271 " add.4 %2, #-32, %2 \n\t"
274 : "+a" (tmp), "+a" (s), "+d" (n)
276 : "d15", "a3", "memory", "cc"
282 " sub.4 d15, #0, %2 \n\t" // set up for jump table
283 " and.4 d15, #(32-2), d15 \n\t" // d15 = (- count) & (32 - 2)
284 " moveai a3, #%%hi(1f) \n\t"
285 " lea.1 a3, %%lo(1f)(a3) \n\t"
286 " lea.2 a3, (a3,d15) \n\t"
287 " calli a3, 0(a3) \n\t"
290 " move.2 -2(%0)++, -2(%1)++ \n\t"
292 " add.4 %2, #-32, %2 \n\t"
295 : "+a" (tmp), "+a" (s), "+d" (n)
297 : "d15", "a3", "memory", "cc"
305 " sub.4 d15, #0, %2 \n\t" // set up for jump table
306 " and.4 d15, #(16-1), d15 \n\t" // d15 = (- count) & (16 - 1)
307 " moveai a3, #%%hi(1f) \n\t"
308 " lea.1 a3, %%lo(1f)(a3) \n\t"
309 " lea.4 a3, (a3,d15) \n\t"
310 " calli a3, 0(a3) \n\t"
313 " move.1 (%0)1++, (%1)1++ \n\t"
315 " add.4 %2, #-16, %2 \n\t"
317 : "+a" (tmp), "+a" (s), "+d" (n)
319 : "d15", "a3", "memory", "cc"
325 " sub.4 d15, #0, %2 \n\t" // set up for jump table
326 " and.4 d15, #(16-1), d15 \n\t" // d15 = (- count) & (16 - 1)
327 " moveai a3, #%%hi(1f) \n\t"
328 " lea.1 a3, %%lo(1f)(a3) \n\t"
329 " lea.4 a3, (a3,d15) \n\t"
330 " calli a3, 0(a3) \n\t"
333 " move.1 -1(%0)++, -1(%1)++ \n\t"
335 " add.4 %2, #-16, %2 \n\t"
337 : "+a" (tmp), "+a" (s), "+d" (n)
339 : "d15", "a3", "memory", "cc"