[ubicom32]: move new files out from platform support patch
[openwrt.git] / target / linux / ubicom32 / files / arch / ubicom32 / lib / checksum.c
1 /*
2  * arch/ubicom32/lib/checksum.c
3  *   Optimized checksum utilities for IP.
4  *
5  * (C) Copyright 2009, Ubicom, Inc.
6  *
7  * This file is part of the Ubicom32 Linux Kernel Port.
8  *
9  * The Ubicom32 Linux Kernel Port is free software: you can redistribute
10  * it and/or modify it under the terms of the GNU General Public License
11  * as published by the Free Software Foundation, either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * The Ubicom32 Linux Kernel Port is distributed in the hope that it
15  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
16  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
17  * the GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with the Ubicom32 Linux Kernel Port.  If not,
21  * see <http://www.gnu.org/licenses/>.
22  *
23  * Ubicom32 implementation derived from (with many thanks):
24  *   arch/m68knommu
25  *   arch/blackfin
26  *   arch/parisc
27  */
28 /*
29  * INET         An implementation of the TCP/IP protocol suite for the LINUX
30  *              operating system.  INET is implemented using the  BSD Socket
31  *              interface as the means of communication with the user level.
32  *
33  *              IP/TCP/UDP checksumming routines
34  *
35  * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
36  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
37  *              Tom May, <ftom@netcom.com>
38  *              Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
39  *              Lots of code moved from tcp.c and ip.c; see those files
40  *              for more names.
41  *
42  * 03/02/96     Jes Sorensen, Andreas Schwab, Roman Hodek:
43  *              Fixed some nasty bugs, causing some horrible crashes.
44  *              A: At some points, the sum (%0) was used as
45  *              length-counter instead of the length counter
46  *              (%1). Thanks to Roman Hodek for pointing this out.
47  *              B: GCC seems to mess up if one uses too many
48  *              data-registers to hold input values and one tries to
49  *              specify d0 and d1 as scratch registers. Letting gcc choose these
50  *              registers itself solves the problem.
51  *
52  *              This program is free software; you can redistribute it and/or
53  *              modify it under the terms of the GNU General Public License
54  *              as published by the Free Software Foundation; either version
55  *              2 of the License, or (at your option) any later version.
56  */
57
58 /* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access kills, so most
59    of the assembly has to go. */
60
61 #include <linux/module.h>
62 #include <net/checksum.h>
63
64 static unsigned long do_csum(const unsigned char * buff, int len)
65 {
66         int count;
67         unsigned long result = 0;
68
69         /*
70          * The following optimized assembly code cannot handle data length less than 7 bytes!
71          */
72         if (likely(len >= 7)) {
73                 len -= (4 - (int)buff) & 3;
74                 count = len >> 2;
75                 asm (
76                 "       sub.4           d15, #0, %2             \n\t"   // set up for jump table
77                 "       and.4           d15, #(32-1), d15       \n\t"   // d15 = (-m) & (32 - 1)
78
79                 "       bfextu          d14, %0, #2             \n\t"   // test 2 LSB of buff
80                 "       jmpne.w.f       100f                    \n\t"
81                 "       add.4           %1, #0, %1              \n\t"   // clear C
82                 "       moveai          a3, #%%hi(1f)           \n\t"   // table jump
83                 "       lea.1           a3, %%lo(1f)(a3)        \n\t"
84                 "       lea.4           a3, (a3,d15)            \n\t"
85                 "       calli           a3, 0(a3)               \n\t"
86
87                 "100:   sub.4           %0, %0, d14             \n\t"
88                 "       sub.4           d14, #4, d14            \n\t"
89                 "       lsl.4           d14, d14, #3            \n\t"
90                 "       add.4           %1, #0, %1              \n\t"   // clear C
91                 "       moveai          a3, #%%hi(1f)           \n\t"   // table jump
92                 "       lea.1           a3, %%lo(1f)(a3)        \n\t"
93                 "       lea.4           a3, (a3,d15)            \n\t"
94                 "       bfextu          %1, (%0)4++, d14        \n\t"   // read first partial word
95                 "       calli           a3, 0(a3)               \n\t"
96 #if 1
97                 "200:   lsl.4           %3, %3, #3              \n\t"
98                 "       bfrvrs          d15, (%0), #0           \n\t"   // read last word (partial)
99                 "       bfextu          d15, d15, %3            \n\t"
100                 "       bfrvrs          d15, d15, #0            \n\t"
101                 "       add.4           %1, d15, %1             \n\t"
102                 "       addc            %1, #0, %1              \n\t"   // sample C again
103                 "       jmpt.w.t        2f                      \n\t"
104 #else
105                 "200:   move.1          d15, 0(%0)              \n\t"
106                 "       lsl.4           d15, d15, #8            \n\t"
107                 "       add.4           %1, d15, %1             \n\t"
108                 "       addc            %1, #0, %1              \n\t"   // sample C again
109                 "       add.4           %3, #-1, %3             \n\t"
110                 "       jmpeq.w.t       2f                      \n\t"
111
112                 "       move.1          d15, 1(%0)              \n\t"
113                 "       add.4           %1, d15, %1             \n\t"
114                 "       addc            %1, #0, %1              \n\t"   // sample C again
115                 "       add.4           %3, #-1, %3             \n\t"
116                 "       jmpeq.w.t       2f                      \n\t"
117
118                 "       move.1          d15, 2(%0)              \n\t"
119                 "       lsl.4           d15, d15, #8            \n\t"
120                 "       add.4           %1, d15, %1             \n\t"
121                 "       addc            %1, #0, %1              \n\t"   // sample C again
122                 "       jmpt.w.t        2f                      \n\t"
123 #endif
124 #if defined(IP7000) || defined(IP7000_REV2)
125                 "300:   swapb.2         %1, %1                  \n\t"
126 #else
127                 "300:   shmrg.2         %1, %1, %1              \n\t"
128                 "       lsr.4           %1, %1, #8              \n\t"
129                 "       bfextu          %1, %1, #16             \n\t"
130 #endif
131                 "       jmpt.w.t        3f                      \n\t"
132
133                 "1:     add.4           %1, (%0)4++, %1         \n\t"   // first add without C
134                 "       .rept           31                      \n\t"
135                 "       addc            %1, (%0)4++, %1         \n\t"
136                 "       .endr                                   \n\t"
137                 "       addc            %1, #0, %1              \n\t"   // sample C again
138                 "       add.4           %2, #-32, %2            \n\t"
139                 "       jmpgt.w.t       1b                      \n\t"
140
141                 "       and.4           %3, #3, %3              \n\t"   // check n
142                 "       jmpne.w.f       200b                    \n\t"
143
144                 "2:     .rept           2                       \n\t"
145                 "       lsr.4           d15, %1, #16            \n\t"
146                 "       bfextu          %1, %1, #16             \n\t"
147                 "       add.4           %1, d15, %1             \n\t"
148                 "       .endr                                   \n\t"
149                 "       btst            d14, #3                 \n\t"   // start from odd address (<< 3)?
150                 "       jmpne.w.f       300b                    \n\t"
151                 "3:                                             \n\t"
152
153                         : "+a"(buff), "+d"(result), "+d"(count), "+d"(len)
154                         :
155                         : "d15", "d14", "a3", "cc"
156                 );
157
158                 return result;
159         }
160
161         /*
162          * handle a few bytes and fold result into 16-bit
163          */
164         while (len-- > 0) {
165                 result += (*buff++ << 8);
166                 if (len) {
167                         result += *buff++;
168                         len--;
169                 }
170         }
171         asm (
172         "       .rept           2                       \n\t"
173         "       lsr.4           d15, %0, #16            \n\t"
174         "       bfextu          %0, %0, #16             \n\t"
175         "       add.4           %0, d15, %0             \n\t"
176         "       .endr                                   \n\t"
177                 : "+d" (result)
178                 :
179                 : "d15", "cc"
180         );
181
182         return result;
183 }
184
185 /*
186  *      This is a version of ip_compute_csum() optimized for IP headers,
187  *      which always checksum on 4 octet boundaries.
188  */
189 __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
190 {
191         return (__force __sum16)~do_csum(iph,ihl*4);
192 }
193
194 /*
195  * computes the checksum of a memory block at buff, length len,
196  * and adds in "sum" (32-bit)
197  *
198  * returns a 32-bit number suitable for feeding into itself
199  * or csum_tcpudp_magic
200  *
201  * this function must be called with even lengths, except
202  * for the last fragment, which may be odd
203  *
204  * it's best to have buff aligned on a 32-bit boundary
205  */
206 __wsum csum_partial(const void *buff, int len, __wsum sum)
207 {
208         unsigned int result = do_csum(buff, len);
209
210         /* add in old sum, and carry.. */
211         result += (__force u32)sum;
212         if ((__force u32)sum > result)
213                 result += 1;
214         return (__force __wsum)result;
215 }
216
217 EXPORT_SYMBOL(csum_partial);
218
219 /*
220  * this routine is used for miscellaneous IP-like checksums, mainly
221  * in icmp.c
222  */
223 __sum16 ip_compute_csum(const void *buff, int len)
224 {
225         return (__force __sum16)~do_csum(buff,len);
226 }
227
228 /*
229  * copy from fs while checksumming, otherwise like csum_partial
230  */
231
232 __wsum
233 csum_partial_copy_from_user(const void __user *src, void *dst,
234                             int len, __wsum sum, int *csum_err)
235 {
236         if (csum_err) *csum_err = 0;
237         memcpy(dst, (__force const void *)src, len);
238         return csum_partial(dst, len, sum);
239 }
240
241 /*
242  * copy from ds while checksumming, otherwise like csum_partial
243  */
244
245 __wsum
246 csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
247 {
248         memcpy(dst, src, len);
249         return csum_partial(dst, len, sum);
250 }