Changeset 5376a95


Ignore:
Timestamp:
Dec 28, 2007, 5:04:34 PM (13 years ago)
Author:
Alejandro R. Sedeño <asedeno@mit.edu>
Branches:
master, barnowl_perlaim, debian, release-1.4, release-1.5, release-1.6, release-1.7, release-1.8, release-1.9
Children:
6201646
Parents:
925e122
Message:
First pass at incoming zephyr -> UTF-8 sanitizing.
This only operates on incoming data so far.
We still need to clean outgoing data -- the plan is to attempt conversion
to ISO-8859-1, and use that if it works.
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • fmtext.c

    r9866c3a r5376a95  
    235235char *owl_fmtext_print_plain(owl_fmtext *f)
    236236{
    237   char *r, *s, *p;
    238   r = owl_malloc(f->bufflen);
    239   r[0] = '\0';
    240   s = f->textbuff;
    241   /* Find next possible format character. */
    242   p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
    243   while(p) {
    244     /* If it's a format character, copy up to it, and skip all
    245        immediately following format characters. */
    246     if (_owl_fmtext_is_format_char(g_utf8_get_char(p))) {
    247       strncat(r, s, p-s);
    248       p = g_utf8_next_char(p);
    249       while (p && _owl_fmtext_is_format_char(g_utf8_get_char(p))) {
    250         p = g_utf8_next_char(p);
    251       }
    252       s = p;
    253       p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
    254     }
    255     else {
    256       p = strchr(p+1, OWL_FMTEXT_UC_STARTBYTE_UTF8);
    257     }
    258   }
    259   if (s) strcat(r,s);
    260   return(r);
     237  return owl_strip_format_chars(f->textbuff);
    261238}
    262239
  • functions.c

    r28ee32b r5376a95  
    16051605          sprintf(buff, "  Field %i   : ", i+1);
    16061606         
    1607           ptr=owl_zephyr_get_field(n, i+1);
     1607          ptr=owl_zephyr_get_field_as_utf8(n, i+1);
    16081608          len=strlen(ptr);
    16091609          if (len<30) {
  • message.c

    r9866c3a r5376a95  
    7979    owl_list_append_element(&(m->attributes), pair);
    8080  }
    81   owl_pair_set_value(pair, owl_strdup(attrvalue));
     81  owl_pair_set_value(pair, owl_validate_or_convert(attrvalue, -1));
    8282}
    8383
  • perlconfig.c

    rad15610 r5376a95  
    5555    j=owl_zephyr_get_num_fields(owl_message_get_notice(m));
    5656    for (i=0; i<j; i++) {
    57       ptr=owl_zephyr_get_field(owl_message_get_notice(m), i+1);
     57      ptr=owl_zephyr_get_field_as_utf8(owl_message_get_notice(m), i+1);
    5858      av_push(av_zfields, newSVpvn(ptr, strlen(ptr)));
    5959      owl_free(ptr);
  • util.c

    r28ee32b r5376a95  
    753753}
    754754
    755 char * owl_get_datadir() {
    756     char * datadir = getenv("BARNOWL_DATA_DIR");
    757     if(datadir != NULL)
    758         return strchr(datadir, '=') + 1;
    759     return DATADIR;
     755char * owl_get_datadir()
     756{
     757  char * datadir = getenv("BARNOWL_DATA_DIR");
     758  if(datadir != NULL)
     759    return strchr(datadir, '=') + 1;
     760  return DATADIR;
     761}
     762
     763/* Strips format characters from a valid utf-8 string. Returns the
     764   empty string if 'in' does not validate. */
     765char * owl_strip_format_chars(char *in)
     766{
     767  char *r;
     768  if (g_utf8_validate(in, -1, NULL)) {
     769    char *s, *p;
     770    r = owl_malloc(strlen(in)+1);
     771    r[0] = '\0';
     772    s = in;
     773    p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
     774    while(p) {
     775      /* If it's a format character, copy up to it, and skip all
     776         immediately following format characters. */
     777      if (_owl_fmtext_is_format_char(g_utf8_get_char(p))) {
     778        strncat(r, s, p-s);
     779        p = g_utf8_next_char(p);
     780        while (p && _owl_fmtext_is_format_char(g_utf8_get_char(p))) {
     781          p = g_utf8_next_char(p);
     782        }
     783        s = p;
     784        p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
     785      }
     786      else {
     787        p = strchr(p+1, OWL_FMTEXT_UC_STARTBYTE_UTF8);
     788      }
     789    }
     790    if (s) strcat(r,s);
     791  }
     792  else {
     793    r = owl_strdup("");
     794  }
     795  return r;
     796}
     797
     798/* If in is not UTF-8, convert from ISO-8859-1. We may want to allow
     799 * the caller to specify an alternative in the future. We also strip
     800 * out characters in Unicode Plane 16, as we use that plane internally
     801 * for formatting.
     802 */
     803char * owl_validate_or_convert(char *in, int len)
     804{
     805  if (g_utf8_validate(in, len , NULL)) {
     806    return owl_strip_format_chars(in);
     807  }
     808  else {
     809    return g_convert(in, len,
     810                     "UTF-8", "ISO-8859-1",
     811                     NULL, NULL, NULL);
     812  }
    760813}
    761814
  • zephyr.c

    r50e29e3 r5376a95  
    354354  return(owl_strdup(""));
    355355}
     356
     357char *owl_zephyr_get_field_as_utf8(ZNotice_t *n, int j)
     358{
     359  int i, count, save;
     360
     361  /* If there's no message here, just run along now */
     362  if (n->z_message_len == 0)
     363    return(owl_strdup(""));
     364
     365  count=save=0;
     366  for (i = 0; i < n->z_message_len; i++) {
     367    if (n->z_message[i]=='\0') {
     368      count++;
     369      if (count == j) {
     370        /* just found the end of the field we're looking for */
     371        return(owl_validate_or_convert(n->z_message + save, -1));
     372      } else {
     373        save = i + 1;
     374      }
     375    }
     376  }
     377  /* catch the last field, which might not be null terminated */
     378  if (count == j - 1) {
     379    return owl_validate_or_convert(n->z_message + save, n->z_message_len - save);
     380  }
     381
     382  return(owl_strdup(""));
     383}
    356384#else
    357385char *owl_zephyr_get_field(void *n, int j)
    358386{
    359387  return(owl_strdup(""));
     388}
     389char *owl_zephyr_get_field_as_utf8(ZNotice_t *n, int j)
     390{
     391  return owl_zephyr_get_field(n, j);
    360392}
    361393#endif
Note: See TracChangeset for help on using the changeset viewer.