libmcrypt: moved to github
[packages.git] / lang / php5 / patches / 102-debian_patches_use_embedded_timezonedb.patch
1
2 Add support for use of the system timezone database, rather
3 than embedding a copy.  Discussed upstream but was not desired.
4
5 History:
6 r9: fix another compile error without --with-system-tzdata configured
7 r8: fix compile error without --with-system-tzdata configured
8 r7: improve check for valid timezone id to exclude directories
9 r6: fix fd leak in r5, fix country code/BC flag use in 
10     timezone_identifiers_list() using system db,
11     fix use of PECL timezonedb to override system db,
12 r5: reverts addition of "System/Localtime" fake tzname.
13     updated for 5.3.0, parses zone.tab to pick up mapping between
14     timezone name, country code and long/lat coords
15 r4: added "System/Localtime" tzname which uses /etc/localtime
16 r3: fix a crash if /usr/share/zoneinfo doesn't exist (Raphael Geissert)
17 r2: add filesystem trawl to set up name alias index
18 r1: initial revision
19
20 --- a/ext/date/lib/parse_tz.c
21 +++ b/ext/date/lib/parse_tz.c
22 @@ -20,6 +20,16 @@
23  
24  #include "timelib.h"
25  
26 +#ifdef HAVE_SYSTEM_TZDATA
27 +#include <sys/mman.h>
28 +#include <sys/stat.h>
29 +#include <limits.h>
30 +#include <fcntl.h>
31 +#include <unistd.h>
32 +
33 +#include "php_scandir.h"
34 +#endif
35 +
36  #include <stdio.h>
37  
38  #ifdef HAVE_LOCALE_H
39 @@ -31,7 +41,12 @@
40  #else
41  #include <strings.h>
42  #endif
43 +
44 +#ifndef HAVE_SYSTEM_TZDATA
45  #include "timezonedb.h"
46 +#endif
47 +
48 +#include <ctype.h>
49  
50  #if (defined(__APPLE__) || defined(__APPLE_CC__)) && (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__))
51  # if defined(__LITTLE_ENDIAN__)
52 @@ -51,9 +66,14 @@
53  
54  static void read_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
55  {
56 -       /* skip ID */
57 -       *tzf += 4;
58 -       
59 +        if (memcmp(tzf, "TZif", 4) == 0) {
60 +                *tzf += 20;
61 +                return;
62 +        }
63 +        
64 +        /* skip ID */
65 +        *tzf += 4;
66 +                
67         /* read BC flag */
68         tz->bc = (**tzf == '\1');
69         *tzf += 1;
70 @@ -256,7 +276,397 @@ void timelib_dump_tzinfo(timelib_tzinfo
71         }
72  }
73  
74 -static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
75 +#ifdef HAVE_SYSTEM_TZDATA
76 +
77 +#ifdef HAVE_SYSTEM_TZDATA_PREFIX
78 +#define ZONEINFO_PREFIX HAVE_SYSTEM_TZDATA_PREFIX
79 +#else
80 +#define ZONEINFO_PREFIX "/usr/share/zoneinfo"
81 +#endif
82 +
83 +/* System timezone database pointer. */
84 +static const timelib_tzdb *timezonedb_system = NULL;
85 +
86 +/* Hash table entry for the cache of the zone.tab mapping table. */
87 +struct location_info {
88 +        char code[2];
89 +        double latitude, longitude;
90 +        char name[64];
91 +        char *comment;
92 +        struct location_info *next;
93 +};
94 +
95 +/* Cache of zone.tab. */
96 +static struct location_info **system_location_table;
97 +
98 +/* Size of the zone.tab hash table; a random-ish prime big enough to
99 + * prevent too many collisions. */
100 +#define LOCINFO_HASH_SIZE (1021)
101 +
102 +static uint32_t tz_hash(const char *str)
103 +{
104 +    const unsigned char *p = (const unsigned char *)str;
105 +    uint32_t hash = 5381;
106 +    int c;
107 +    
108 +    while ((c = *p++) != '\0') {
109 +        hash = (hash << 5) ^ hash ^ c;
110 +    }
111 +    
112 +    return hash % LOCINFO_HASH_SIZE;
113 +}
114 +
115 +/* Parse an ISO-6709 date as used in zone.tab. Returns end of the
116 + * parsed string on success, or NULL on parse error.  On success,
117 + * writes the parsed number to *result. */
118 +static char *parse_iso6709(char *p, double *result)
119 +{
120 +    double v, sign;
121 +    char *pend;
122 +    size_t len;
123 +
124 +    if (*p == '+')
125 +        sign = 1.0;
126 +    else if (*p == '-')
127 +        sign = -1.0;
128 +    else
129 +        return NULL;
130 +
131 +    p++;
132 +    for (pend = p; *pend >= '0' && *pend <= '9'; pend++)
133 +        ;;
134 +
135 +    /* Annoying encoding used by zone.tab has no decimal point, so use
136 +     * the length to determine the format:
137 +     * 
138 +     * 4 = DDMM
139 +     * 5 = DDDMM
140 +     * 6 = DDMMSS
141 +     * 7 = DDDMMSS
142 +     */
143 +    len = pend - p;
144 +    if (len < 4 || len > 7) {
145 +        return NULL;
146 +    }
147 +
148 +    /* p => [D]DD */
149 +    v = (p[0] - '0') * 10.0 + (p[1] - '0');
150 +    p += 2;
151 +    if (len == 5 || len == 7)
152 +        v = v * 10.0 + (*p++ - '0');
153 +    /* p => MM[SS] */
154 +    v += (10.0 * (p[0] - '0')
155 +          + p[1] - '0') / 60.0;
156 +    p += 2;
157 +    /* p => [SS] */
158 +    if (len > 5) {
159 +        v += (10.0 * (p[0] - '0')
160 +              + p[1] - '0') / 3600.0;
161 +        p += 2;
162 +    }
163 +
164 +    /* Round to five decimal place, not because it's a good idea,
165 +     * but, because the builtin data uses rounded data, so, match
166 +     * that. */
167 +    *result = round(v * sign * 100000.0) / 100000.0;
168 +
169 +    return p;
170 +}
171 +
172 +/* This function parses the zone.tab file to build up the mapping of
173 + * timezone to country code and geographic location, and returns a
174 + * hash table.  The hash table is indexed by the function:
175 + *
176 + *   tz_hash(timezone-name)
177 + */
178 +static struct location_info **create_location_table(void)
179 +{
180 +    struct location_info **li, *i;
181 +    char zone_tab[PATH_MAX];
182 +    char line[512];
183 +    FILE *fp;
184 +
185 +    strncpy(zone_tab, ZONEINFO_PREFIX "/zone.tab", sizeof zone_tab);
186 +
187 +    fp = fopen(zone_tab, "r");
188 +    if (!fp) {
189 +        return NULL;
190 +    }
191 +
192 +    li = calloc(LOCINFO_HASH_SIZE, sizeof *li);
193 +
194 +    while (fgets(line, sizeof line, fp)) {
195 +        char *p = line, *code, *name, *comment;
196 +        uint32_t hash;
197 +        double latitude, longitude;
198 +
199 +        while (isspace(*p))
200 +            p++;
201 +
202 +        if (*p == '#' || *p == '\0' || *p == '\n')
203 +            continue;
204 +        
205 +        if (!isalpha(p[0]) || !isalpha(p[1]) || p[2] != '\t')
206 +            continue;
207 +        
208 +        /* code => AA */
209 +        code = p;
210 +        p[2] = 0;
211 +        p += 3;
212 +
213 +        /* coords => [+-][D]DDMM[SS][+-][D]DDMM[SS] */
214 +        p = parse_iso6709(p, &latitude);
215 +        if (!p) {
216 +            continue;
217 +        }
218 +        p = parse_iso6709(p, &longitude);
219 +        if (!p) {
220 +            continue;
221 +        }
222 +
223 +        if (!p || *p != '\t') {
224 +            continue;
225 +        }
226 +
227 +        /* name = string */
228 +        name = ++p;
229 +        while (*p != '\t' && *p && *p != '\n')
230 +            p++;
231 +
232 +        *p++ = '\0';
233 +
234 +        /* comment = string */
235 +        comment = p;
236 +        while (*p != '\t' && *p && *p != '\n')
237 +            p++;
238 +
239 +        if (*p == '\n' || *p == '\t')
240 +            *p = '\0';
241 +        
242 +        hash = tz_hash(name);
243 +        i = malloc(sizeof *i);
244 +        memcpy(i->code, code, 2);
245 +        strncpy(i->name, name, sizeof i->name);
246 +        i->comment = strdup(comment);
247 +        i->longitude = longitude;
248 +        i->latitude = latitude;
249 +        i->next = li[hash];
250 +        li[hash] = i;
251 +        /* printf("%s [%u, %f, %f]\n", name, hash, latitude, longitude); */
252 +    }
253 +
254 +    fclose(fp);
255 +
256 +    return li;
257 +}
258 +
259 +/* Return location info from hash table, using given timezone name.
260 + * Returns NULL if the name could not be found. */
261 +const struct location_info *find_zone_info(struct location_info **li, 
262 +                                           const char *name)
263 +{
264 +    uint32_t hash = tz_hash(name);
265 +    const struct location_info *l;
266 +
267 +    if (!li) {
268 +        return NULL;
269 +    }
270 +
271 +    for (l = li[hash]; l; l = l->next) {
272 +        if (strcasecmp(l->name, name) == 0)
273 +            return l;
274 +    }
275 +
276 +    return NULL;
277 +}    
278 +
279 +/* Filter out some non-tzdata files and the posix/right databases, if
280 + * present. */
281 +static int index_filter(const struct dirent *ent)
282 +{
283 +       return strcmp(ent->d_name, ".") != 0
284 +               && strcmp(ent->d_name, "..") != 0
285 +               && strcmp(ent->d_name, "posix") != 0
286 +               && strcmp(ent->d_name, "posixrules") != 0
287 +               && strcmp(ent->d_name, "right") != 0
288 +               && strstr(ent->d_name, ".tab") == NULL;
289 +}
290 +
291 +static int sysdbcmp(const void *first, const void *second)
292 +{
293 +        const timelib_tzdb_index_entry *alpha = first, *beta = second;
294 +
295 +        return strcmp(alpha->id, beta->id);
296 +}
297 +
298 +
299 +/* Create the zone identifier index by trawling the filesystem. */
300 +static void create_zone_index(timelib_tzdb *db)
301 +{
302 +       size_t dirstack_size,  dirstack_top;
303 +       size_t index_size, index_next;
304 +       timelib_tzdb_index_entry *db_index;
305 +       char **dirstack;
306 +
307 +       /* LIFO stack to hold directory entries to scan; each slot is a
308 +        * directory name relative to the zoneinfo prefix. */
309 +       dirstack_size = 32;
310 +       dirstack = malloc(dirstack_size * sizeof *dirstack);
311 +       dirstack_top = 1;
312 +       dirstack[0] = strdup("");
313 +       
314 +       /* Index array. */
315 +       index_size = 64;
316 +       db_index = malloc(index_size * sizeof *db_index);
317 +       index_next = 0;
318 +
319 +       do {
320 +               struct dirent **ents;
321 +               char name[PATH_MAX], *top;
322 +               int count;
323 +
324 +               /* Pop the top stack entry, and iterate through its contents. */
325 +               top = dirstack[--dirstack_top];
326 +               snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s", top);
327 +
328 +               count = php_scandir(name, &ents, index_filter, php_alphasort);
329 +
330 +               while (count > 0) {
331 +                       struct stat st;
332 +                       const char *leaf = ents[count - 1]->d_name;
333 +
334 +                       snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s/%s", 
335 +                                top, leaf);
336 +                       
337 +                       if (strlen(name) && stat(name, &st) == 0) {
338 +                               /* Name, relative to the zoneinfo prefix. */
339 +                               const char *root = top;
340 +
341 +                               if (root[0] == '/') root++;
342 +
343 +                               snprintf(name, sizeof name, "%s%s%s", root, 
344 +                                        *root ? "/": "", leaf);
345 +
346 +                               if (S_ISDIR(st.st_mode)) {
347 +                                       if (dirstack_top == dirstack_size) {
348 +                                               dirstack_size *= 2;
349 +                                               dirstack = realloc(dirstack, 
350 +                                                                  dirstack_size * sizeof *dirstack);
351 +                                       }
352 +                                       dirstack[dirstack_top++] = strdup(name);
353 +                               }
354 +                               else {
355 +                                       if (index_next == index_size) {
356 +                                               index_size *= 2;
357 +                                               db_index = realloc(db_index,
358 +                                                                  index_size * sizeof *db_index);
359 +                                       }
360 +
361 +                                       db_index[index_next++].id = strdup(name);
362 +                               }
363 +                       }
364 +
365 +                       free(ents[--count]);
366 +               }
367 +               
368 +               if (count != -1) free(ents);
369 +               free(top);
370 +       } while (dirstack_top);
371 +
372 +        qsort(db_index, index_next, sizeof *db_index, sysdbcmp);
373 +
374 +       db->index = db_index;
375 +       db->index_size = index_next;
376 +
377 +       free(dirstack);
378 +}
379 +
380 +#define FAKE_HEADER "1234\0??\1??"
381 +#define FAKE_UTC_POS (7 - 4)
382 +
383 +/* Create a fake data segment for database 'sysdb'. */
384 +static void fake_data_segment(timelib_tzdb *sysdb,
385 +                              struct location_info **info)
386 +{
387 +        size_t n;
388 +        char *data, *p;
389 +        
390 +        data = malloc(3 * sysdb->index_size + 7);
391 +
392 +        p = mempcpy(data, FAKE_HEADER, sizeof(FAKE_HEADER) - 1);
393 +
394 +        for (n = 0; n < sysdb->index_size; n++) {
395 +                const struct location_info *li;
396 +                timelib_tzdb_index_entry *ent;
397 +
398 +                ent = (timelib_tzdb_index_entry *)&sysdb->index[n];
399 +
400 +                /* Lookup the timezone name in the hash table. */
401 +                if (strcmp(ent->id, "UTC") == 0) {
402 +                        ent->pos = FAKE_UTC_POS;
403 +                        continue;
404 +                }
405 +
406 +                li = find_zone_info(info, ent->id);
407 +                if (li) {
408 +                        /* If found, append the BC byte and the
409 +                         * country code; set the position for this
410 +                         * section of timezone data.  */
411 +                        ent->pos = (p - data) - 4;
412 +                        *p++ = '\1';
413 +                        *p++ = li->code[0];
414 +                        *p++ = li->code[1];
415 +                }
416 +                else {
417 +                        /* If not found, the timezone data can
418 +                         * point at the header. */
419 +                        ent->pos = 0;
420 +                }
421 +        }
422 +        
423 +        sysdb->data = (unsigned char *)data;
424 +}
425 +
426 +/* Returns true if the passed-in stat structure describes a
427 + * probably-valid timezone file. */
428 +static int is_valid_tzfile(const struct stat *st)
429 +{
430 +       return S_ISREG(st->st_mode) && st->st_size > 20;
431 +}
432 +
433 +/* Return the mmap()ed tzfile if found, else NULL.  On success, the
434 + * length of the mapped data is placed in *length. */
435 +static char *map_tzfile(const char *timezone, size_t *length)
436 +{
437 +       char fname[PATH_MAX];
438 +       struct stat st;
439 +       char *p;
440 +       int fd;
441 +       
442 +       if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
443 +               return NULL;
444 +       }
445 +
446 +       snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone);
447 +       
448 +       fd = open(fname, O_RDONLY);
449 +       if (fd == -1) {
450 +               return NULL;
451 +       } else if (fstat(fd, &st) != 0 || !is_valid_tzfile(&st)) {
452 +               close(fd);
453 +               return NULL;
454 +       }
455 +
456 +       *length = st.st_size;
457 +       p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
458 +       close(fd);
459 +       
460 +       return p != MAP_FAILED ? p : NULL;
461 +}
462 +
463 +#endif
464 +
465 +static int inmem_seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
466  {
467         int left = 0, right = tzdb->index_size - 1;
468  #ifdef HAVE_SETLOCALE
469 @@ -295,36 +705,128 @@ static int seek_to_tz_position(const uns
470         return 0;
471  }
472  
473 +static int seek_to_tz_position(const unsigned char **tzf, char *timezone, 
474 +                              char **map, size_t *maplen,
475 +                              const timelib_tzdb *tzdb)
476 +{
477 +#ifdef HAVE_SYSTEM_TZDATA
478 +       if (tzdb == timezonedb_system) {
479 +               char *orig;
480 +
481 +               orig = map_tzfile(timezone, maplen);
482 +               if (orig == NULL) {
483 +                       return 0;
484 +               }
485 +               
486 +               (*tzf) = (unsigned char *)orig ;
487 +               *map = orig;
488 +                
489 +                return 1;
490 +       }
491 +       else
492 +#endif
493 +       {
494 +               return inmem_seek_to_tz_position(tzf, timezone, tzdb);
495 +       }
496 +}
497 +
498  const timelib_tzdb *timelib_builtin_db(void)
499  {
500 +#ifdef HAVE_SYSTEM_TZDATA
501 +       if (timezonedb_system == NULL) {
502 +               timelib_tzdb *tmp = malloc(sizeof *tmp);
503 +
504 +               tmp->version = "0.system";
505 +               tmp->data = NULL;
506 +               create_zone_index(tmp);
507 +               system_location_table = create_location_table();
508 +                fake_data_segment(tmp, system_location_table);
509 +               timezonedb_system = tmp;
510 +       }
511 +
512 +                       
513 +       return timezonedb_system;
514 +#else
515         return &timezonedb_builtin;
516 +#endif
517  }
518  
519  const timelib_tzdb_index_entry *timelib_timezone_builtin_identifiers_list(int *count)
520  {
521 +#ifdef HAVE_SYSTEM_TZDATA
522 +       *count = timezonedb_system->index_size;
523 +       return timezonedb_system->index;
524 +#else
525         *count = sizeof(timezonedb_idx_builtin) / sizeof(*timezonedb_idx_builtin);
526         return timezonedb_idx_builtin;
527 +#endif
528  }
529  
530  int timelib_timezone_id_is_valid(char *timezone, const timelib_tzdb *tzdb)
531  {
532         const unsigned char *tzf;
533 -       return (seek_to_tz_position(&tzf, timezone, tzdb));
534 +
535 +#ifdef HAVE_SYSTEM_TZDATA
536 +        if (tzdb == timezonedb_system) {
537 +            char fname[PATH_MAX];
538 +            struct stat st;
539 +
540 +            if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
541 +               return 0;
542 +            }
543 +            
544 +            snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone);
545 +            
546 +            return stat(fname, &st) == 0 && is_valid_tzfile(&st);
547 +        }
548 +#endif
549 +
550 +       return (inmem_seek_to_tz_position(&tzf, timezone, tzdb));
551  }
552  
553  timelib_tzinfo *timelib_parse_tzfile(char *timezone, const timelib_tzdb *tzdb)
554  {
555         const unsigned char *tzf;
556 +       char *memmap = NULL;
557 +       size_t maplen;
558         timelib_tzinfo *tmp;
559  
560 -       if (seek_to_tz_position(&tzf, timezone, tzdb)) {
561 +       if (seek_to_tz_position(&tzf, timezone, &memmap, &maplen, tzdb)) {
562                 tmp = timelib_tzinfo_ctor(timezone);
563  
564                 read_preamble(&tzf, tmp);
565                 read_header(&tzf, tmp);
566                 read_transistions(&tzf, tmp);
567                 read_types(&tzf, tmp);
568 -               read_location(&tzf, tmp);
569 +
570 +#ifdef HAVE_SYSTEM_TZDATA
571 +               if (memmap) {
572 +                       const struct location_info *li;
573 +
574 +                       /* TZif-style - grok the location info from the system database,
575 +                        * if possible. */
576 +
577 +                       if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
578 +                               tmp->location.comments = strdup(li->comment);
579 +                                strncpy(tmp->location.country_code, li->code, 2);
580 +                               tmp->location.longitude = li->longitude;
581 +                               tmp->location.latitude = li->latitude;
582 +                               tmp->bc = 1;
583 +                       }
584 +                       else {
585 +                               strcpy(tmp->location.country_code, "??");
586 +                               tmp->bc = 0;
587 +                               tmp->location.comments = strdup("");
588 +                       }
589 +
590 +                       /* Now done with the mmap segment - discard it. */
591 +                       munmap(memmap, maplen);
592 +               } else
593 +#endif
594 +               {
595 +                       /* PHP-style - use the embedded info. */
596 +                       read_location(&tzf, tmp);
597 +               }
598         } else {
599                 tmp = NULL;
600         }
601 --- a/ext/date/lib/timelib.m4
602 +++ b/ext/date/lib/timelib.m4
603 @@ -78,3 +78,17 @@ stdlib.h
604  
605  dnl Check for strtoll, atoll
606  AC_CHECK_FUNCS(strtoll atoll strftime)
607 +
608 +PHP_ARG_WITH(system-tzdata, for use of system timezone data,
609 +[  --with-system-tzdata[=DIR]      to specify use of system timezone data],
610 +no, no)
611 +
612 +if test "$PHP_SYSTEM_TZDATA" != "no"; then
613 +   AC_DEFINE(HAVE_SYSTEM_TZDATA, 1, [Define if system timezone data is used])
614 +
615 +   if test "$PHP_SYSTEM_TZDATA" != "yes"; then
616 +      AC_DEFINE_UNQUOTED(HAVE_SYSTEM_TZDATA_PREFIX, "$PHP_SYSTEM_TZDATA",
617 +                         [Define for location of system timezone data])
618 +   fi
619 +fi
620 +