target/linux/ubicom32/files/arch/ubicom32/lib/checksum.c

   1 /*
   2  * arch/ubicom32/lib/checksum.c
   3  *   Optimized checksum utilities for IP.
   4  *
   5  * (C) Copyright 2009, Ubicom, Inc.
   6  *
   7  * This file is part of the Ubicom32 Linux Kernel Port.
   8  *
   9  * The Ubicom32 Linux Kernel Port is free software: you can redistribute
  10  * it and/or modify it under the terms of the GNU General Public License
  11  * as published by the Free Software Foundation, either version 2 of the
  12  * License, or (at your option) any later version.
  13  *
  14  * The Ubicom32 Linux Kernel Port is distributed in the hope that it
  15  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
  16  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  17  * the GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with the Ubicom32 Linux Kernel Port.  If not,
  21  * see <http://www.gnu.org/licenses/>.
  22  *
  23  * Ubicom32 implementation derived from (with many thanks):
  24  *   arch/m68knommu
  25  *   arch/blackfin
  26  *   arch/parisc
  27  */
  28 /*
  29  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  30  *              operating system.  INET is implemented using the  BSD Socket
  31  *              interface as the means of communication with the user level.
  32  *
  33  *              IP/TCP/UDP checksumming routines
  34  *
  35  * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
  36  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  37  *              Tom May, <ftom@netcom.com>
  38  *              Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
  39  *              Lots of code moved from tcp.c and ip.c; see those files
  40  *              for more names.
  41  *
  42  * 03/02/96     Jes Sorensen, Andreas Schwab, Roman Hodek:
  43  *              Fixed some nasty bugs, causing some horrible crashes.
  44  *              A: At some points, the sum (%0) was used as
  45  *              length-counter instead of the length counter
  46  *              (%1). Thanks to Roman Hodek for pointing this out.
  47  *              B: GCC seems to mess up if one uses too many
  48  *              data-registers to hold input values and one tries to
  49  *              specify d0 and d1 as scratch registers. Letting gcc choose these
  50  *              registers itself solves the problem.
  51  *
  52  *              This program is free software; you can redistribute it and/or
  53  *              modify it under the terms of the GNU General Public License
  54  *              as published by the Free Software Foundation; either version
  55  *              2 of the License, or (at your option) any later version.
  56  */
  57
  58 /* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access kills, so most
  59    of the assembly has to go. */
  60
  61 #include <linux/module.h>
  62 #include <net/checksum.h>
  63
  64 static unsigned long do_csum(const unsigned char * buff, int len)
  65 {
  66         int count;
  67         unsigned long result = 0;
  68
  69         /*
  70          * The following optimized assembly code cannot handle data length less than 7 bytes!
  71          */
  72         if (likely(len >= 7)) {
  73                 len -= (4 - (int)buff) & 3;
  74                 count = len >> 2;
  75                 asm (
  76                 "       sub.4           d15, #0, %2             \n\t"   // set up for jump table
  77                 "       and.4           d15, #(32-1), d15       \n\t"   // d15 = (-m) & (32 - 1)
  78
  79                 "       bfextu          d14, %0, #2             \n\t"   // test 2 LSB of buff
  80                 "       jmpne.w.f       100f                    \n\t"
  81                 "       add.4           %1, #0, %1              \n\t"   // clear C
  82                 "       moveai          a3, #%%hi(1f)           \n\t"   // table jump
  83                 "       lea.1           a3, %%lo(1f)(a3)        \n\t"
  84                 "       lea.4           a3, (a3,d15)            \n\t"
  85                 "       calli           a3, 0(a3)               \n\t"
  86
  87                 "100:   sub.4           %0, %0, d14             \n\t"
  88                 "       sub.4           d14, #4, d14            \n\t"
  89                 "       lsl.4           d14, d14, #3            \n\t"
  90                 "       add.4           %1, #0, %1              \n\t"   // clear C
  91                 "       moveai          a3, #%%hi(1f)           \n\t"   // table jump
  92                 "       lea.1           a3, %%lo(1f)(a3)        \n\t"
  93                 "       lea.4           a3, (a3,d15)            \n\t"
  94                 "       bfextu          %1, (%0)4++, d14        \n\t"   // read first partial word
  95                 "       calli           a3, 0(a3)               \n\t"
  96 #if 1
  97                 "200:   lsl.4           %3, %3, #3              \n\t"
  98                 "       bfrvrs          d15, (%0), #0           \n\t"   // read last word (partial)
  99                 "       bfextu          d15, d15, %3            \n\t"
 100                 "       bfrvrs          d15, d15, #0            \n\t"
 101                 "       add.4           %1, d15, %1             \n\t"
 102                 "       addc            %1, #0, %1              \n\t"   // sample C again
 103                 "       jmpt.w.t        2f                      \n\t"
 104 #else
 105                 "200:   move.1          d15, 0(%0)              \n\t"
 106                 "       lsl.4           d15, d15, #8            \n\t"
 107                 "       add.4           %1, d15, %1             \n\t"
 108                 "       addc            %1, #0, %1              \n\t"   // sample C again
 109                 "       add.4           %3, #-1, %3             \n\t"
 110                 "       jmpeq.w.t       2f                      \n\t"
 111
 112                 "       move.1          d15, 1(%0)              \n\t"
 113                 "       add.4           %1, d15, %1             \n\t"
 114                 "       addc            %1, #0, %1              \n\t"   // sample C again
 115                 "       add.4           %3, #-1, %3             \n\t"
 116                 "       jmpeq.w.t       2f                      \n\t"
 117
 118                 "       move.1          d15, 2(%0)              \n\t"
 119                 "       lsl.4           d15, d15, #8            \n\t"
 120                 "       add.4           %1, d15, %1             \n\t"
 121                 "       addc            %1, #0, %1              \n\t"   // sample C again
 122                 "       jmpt.w.t        2f                      \n\t"
 123 #endif
 124 #if defined(IP7000) || defined(IP7000_REV2)
 125                 "300:   swapb.2         %1, %1                  \n\t"
 126 #else
 127                 "300:   shmrg.2         %1, %1, %1              \n\t"
 128                 "       lsr.4           %1, %1, #8              \n\t"
 129                 "       bfextu          %1, %1, #16             \n\t"
 130 #endif
 131                 "       jmpt.w.t        3f                      \n\t"
 132
 133                 "1:     add.4           %1, (%0)4++, %1         \n\t"   // first add without C
 134                 "       .rept           31                      \n\t"
 135                 "       addc            %1, (%0)4++, %1         \n\t"
 136                 "       .endr                                   \n\t"
 137                 "       addc            %1, #0, %1              \n\t"   // sample C again
 138                 "       add.4           %2, #-32, %2            \n\t"
 139                 "       jmpgt.w.t       1b                      \n\t"
 140
 141                 "       and.4           %3, #3, %3              \n\t"   // check n
 142                 "       jmpne.w.f       200b                    \n\t"
 143
 144                 "2:     .rept           2                       \n\t"
 145                 "       lsr.4           d15, %1, #16            \n\t"
 146                 "       bfextu          %1, %1, #16             \n\t"
 147                 "       add.4           %1, d15, %1             \n\t"
 148                 "       .endr                                   \n\t"
 149                 "       btst            d14, #3                 \n\t"   // start from odd address (<< 3)?
 150                 "       jmpne.w.f       300b                    \n\t"
 151                 "3:                                             \n\t"
 152
 153                         : "+a"(buff), "+d"(result), "+d"(count), "+d"(len)
 154                         :
 155                         : "d15", "d14", "a3", "cc"
 156                 );
 157
 158                 return result;
 159         }
 160
 161         /*
 162          * handle a few bytes and fold result into 16-bit
 163          */
 164         while (len-- > 0) {
 165                 result += (*buff++ << 8);
 166                 if (len) {
 167                         result += *buff++;
 168                         len--;
 169                 }
 170         }
 171         asm (
 172         "       .rept           2                       \n\t"
 173         "       lsr.4           d15, %0, #16            \n\t"
 174         "       bfextu          %0, %0, #16             \n\t"
 175         "       add.4           %0, d15, %0             \n\t"
 176         "       .endr                                   \n\t"
 177                 : "+d" (result)
 178                 :
 179                 : "d15", "cc"
 180         );
 181
 182         return result;
 183 }
 184
 185 /*
 186  *      This is a version of ip_compute_csum() optimized for IP headers,
 187  *      which always checksum on 4 octet boundaries.
 188  */
 189 __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 190 {
 191         return (__force __sum16)~do_csum(iph,ihl*4);
 192 }
 193
 194 /*
 195  * computes the checksum of a memory block at buff, length len,
 196  * and adds in "sum" (32-bit)
 197  *
 198  * returns a 32-bit number suitable for feeding into itself
 199  * or csum_tcpudp_magic
 200  *
 201  * this function must be called with even lengths, except
 202  * for the last fragment, which may be odd
 203  *
 204  * it's best to have buff aligned on a 32-bit boundary
 205  */
 206 __wsum csum_partial(const void *buff, int len, __wsum sum)
 207 {
 208         unsigned int result = do_csum(buff, len);
 209
 210         /* add in old sum, and carry.. */
 211         result += (__force u32)sum;
 212         if ((__force u32)sum > result)
 213                 result += 1;
 214         return (__force __wsum)result;
 215 }
 216
 217 EXPORT_SYMBOL(csum_partial);
 218
 219 /*
 220  * this routine is used for miscellaneous IP-like checksums, mainly
 221  * in icmp.c
 222  */
 223 __sum16 ip_compute_csum(const void *buff, int len)
 224 {
 225         return (__force __sum16)~do_csum(buff,len);
 226 }
 227
 228 /*
 229  * copy from fs while checksumming, otherwise like csum_partial
 230  */
 231
 232 __wsum
 233 csum_partial_copy_from_user(const void __user *src, void *dst,
 234                             int len, __wsum sum, int *csum_err)
 235 {
 236         if (csum_err) *csum_err = 0;
 237         memcpy(dst, (__force const void *)src, len);
 238         return csum_partial(dst, len, sum);
 239 }
 240
 241 /*
 242  * copy from ds while checksumming, otherwise like csum_partial
 243  */
 244
 245 __wsum
 246 csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
 247 {
 248         memcpy(dst, src, len);
 249         return csum_partial(dst, len, sum);
 250 }