2 * LuCI Template - Utility functions
4 * Copyright (C) 2010 Jo-Philipp Wich <xm@subsignal.org>
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 #include "template_utils.h"
21 /* initialize a buffer object */
22 static struct template_buffer * buf_init(void)
24 struct template_buffer *buf;
26 buf = (struct template_buffer *)malloc(sizeof(struct template_buffer));
32 buf->data = (unsigned char *)malloc(buf->size);
34 if (buf->data != NULL)
36 buf->dptr = buf->data;
49 static int buf_grow(struct template_buffer *buf)
51 unsigned int off = (buf->dptr - buf->data);
53 (unsigned char *)realloc(buf->data, buf->size + 1024);
58 buf->dptr = data + off;
67 /* put one char into buffer object */
68 static int buf_putchar(struct template_buffer *buf, unsigned char c)
70 if( ((buf->fill + 1) >= buf->size) && !buf_grow(buf) )
80 /* append data to buffer */
81 static int buf_append(struct template_buffer *buf, unsigned char *s, int len)
83 while ((buf->fill + len + 1) >= buf->size)
89 memcpy(buf->dptr, s, len);
98 /* destroy buffer object and return pointer to data */
99 static char * buf_destroy(struct template_buffer *buf)
101 unsigned char *data = buf->data;
108 /* calculate the number of expected continuation chars */
109 static inline int mb_num_chars(unsigned char c)
111 if ((c & 0xE0) == 0xC0)
113 else if ((c & 0xF0) == 0xE0)
115 else if ((c & 0xF8) == 0xF0)
117 else if ((c & 0xFC) == 0xF8)
119 else if ((c & 0xFE) == 0xFC)
125 /* test whether the given byte is a valid continuation char */
126 static inline int mb_is_cont(unsigned char c)
128 return ((c >= 0x80) && (c <= 0xBF));
131 /* test whether the byte sequence at the given pointer with the given
132 * length is the shortest possible representation of the code point */
133 static inline int mb_is_shortest(unsigned char *s, int n)
138 /* 1100000x (10xxxxxx) */
139 return ((*s & 0x1E) > 0);
142 /* 11100000 100xxxxx (10xxxxxx) */
143 return ((*s & 0x1F) > 0) && ((*(s+1) & 0x60) > 0);
146 /* 11110000 1000xxxx (10xxxxxx 10xxxxxx) */
147 return ((*s & 0x0F) > 0) && ((*(s+1) & 0x70) > 0);
150 /* 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) */
151 return ((*s & 0x07) > 0) && ((*(s+1) & 0x78) > 0);
154 /* 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) */
155 return ((*s & 0x03) > 0) && ((*(s+1) & 0x7C) > 0);
161 /* test whether the byte sequence at the given pointer with the given
162 * length is an UTF-16 surrogate */
163 static inline int mb_is_surrogate(unsigned char *s, int n)
165 return ((n == 3) && (*s == 0xED) && (*(s+1) >= 0xA0) && (*(s+1) <= 0xBF));
168 /* test whether the byte sequence at the given pointer with the given
169 * length is an illegal UTF-8 code point */
170 static inline int mb_is_illegal(unsigned char *s, int n)
172 return ((n == 3) && (*s == 0xEF) && (*(s+1) == 0xBF) &&
173 (*(s+2) >= 0xBE) && (*(s+2) <= 0xBF));
177 /* scan given source string, validate UTF-8 sequence and store result
178 * in given buffer object */
179 static int _validate_utf8(unsigned char **s, int l, struct template_buffer *buf)
181 unsigned char *ptr = *s;
182 unsigned int o = 0, v, n;
184 //for (o = 0; o < l; o++)
186 /* ascii byte without null */
187 if ((*(ptr+0) >= 0x01) && (*(ptr+0) <= 0x7F))
189 if (!buf_putchar(buf, *ptr++))
195 /* multi byte sequence */
196 else if ((n = mb_num_chars(*ptr)) > 1)
198 /* count valid chars */
199 for (v = 1; (v <= n) && ((o+v) < l) && mb_is_cont(*(ptr+v)); v++);
205 /* five and six byte sequences are always invalid */
206 if (!buf_putchar(buf, '?'))
212 /* if the number of valid continuation bytes matches the
213 * expected number and if the sequence is legal, copy
214 * the bytes to the destination buffer */
215 if ((v == n) && mb_is_shortest(ptr, n) &&
216 !mb_is_surrogate(ptr, n) && !mb_is_illegal(ptr, n))
219 if (!buf_append(buf, ptr, n))
223 /* the found sequence is illegal, skip it */
226 /* invalid sequence */
227 if (!buf_putchar(buf, '?'))
234 /* advance beyound the last found valid continuation char */
239 /* invalid byte (0x00) */
242 if (!buf_putchar(buf, '?')) /* or 0xEF, 0xBF, 0xBD */
254 /* sanitize given string and replace all invalid UTF-8 sequences with "?" */
255 char * sanitize_utf8(const char *s, unsigned int l)
257 struct template_buffer *buf = buf_init();
258 unsigned char *ptr = (unsigned char *)s;
263 if (!_validate_utf8(&ptr, l, buf))
270 return buf_destroy(buf);
273 /* Sanitize given string and strip all invalid XML bytes
274 * Validate UTF-8 sequences
275 * Escape XML control chars */
276 char * sanitize_pcdata(const char *s, unsigned int l)
278 struct template_buffer *buf = buf_init();
279 unsigned char *ptr = (unsigned char *)s;
287 for (o = 0; o < l; o++)
289 /* Invalid XML bytes */
290 if (((*ptr >= 0x00) && (*ptr <= 0x08)) ||
291 ((*ptr >= 0x0B) && (*ptr <= 0x0C)) ||
292 ((*ptr >= 0x0E) && (*ptr <= 0x1F)) ||
299 else if ((*ptr == 0x26) ||
305 esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
307 if (!buf_append(buf, (unsigned char *)esq, esl))
314 else if (*ptr <= 0x7F)
316 buf_putchar(buf, *ptr++);
319 /* multi byte sequence */
322 if (!(v = _validate_utf8(&ptr, l - o, buf)))
329 return buf_destroy(buf);