Changeset 5376a95 for util.c


Ignore:
Timestamp:
Dec 28, 2007, 5:04:34 PM (14 years ago)
Author:
Alejandro R. Sedeño <asedeno@mit.edu>
Branches:
master, barnowl_perlaim, debian, release-1.4, release-1.5, release-1.6, release-1.7, release-1.8, release-1.9
Children:
6201646
Parents:
925e122
Message:
First pass at incoming zephyr -> UTF-8 sanitizing.
This only operates on incoming data so far.
We still need to clean outgoing data -- the plan is to attempt conversion
to ISO-8859-1, and use that if it works.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • util.c

    r28ee32b r5376a95  
    753753}
    754754
    755 char * owl_get_datadir() {
    756     char * datadir = getenv("BARNOWL_DATA_DIR");
    757     if(datadir != NULL)
    758         return strchr(datadir, '=') + 1;
    759     return DATADIR;
     755char * owl_get_datadir()
     756{
     757  char * datadir = getenv("BARNOWL_DATA_DIR");
     758  if(datadir != NULL)
     759    return strchr(datadir, '=') + 1;
     760  return DATADIR;
     761}
     762
     763/* Strips format characters from a valid utf-8 string. Returns the
     764   empty string if 'in' does not validate. */
     765char * owl_strip_format_chars(char *in)
     766{
     767  char *r;
     768  if (g_utf8_validate(in, -1, NULL)) {
     769    char *s, *p;
     770    r = owl_malloc(strlen(in)+1);
     771    r[0] = '\0';
     772    s = in;
     773    p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
     774    while(p) {
     775      /* If it's a format character, copy up to it, and skip all
     776         immediately following format characters. */
     777      if (_owl_fmtext_is_format_char(g_utf8_get_char(p))) {
     778        strncat(r, s, p-s);
     779        p = g_utf8_next_char(p);
     780        while (p && _owl_fmtext_is_format_char(g_utf8_get_char(p))) {
     781          p = g_utf8_next_char(p);
     782        }
     783        s = p;
     784        p = strchr(s, OWL_FMTEXT_UC_STARTBYTE_UTF8);
     785      }
     786      else {
     787        p = strchr(p+1, OWL_FMTEXT_UC_STARTBYTE_UTF8);
     788      }
     789    }
     790    if (s) strcat(r,s);
     791  }
     792  else {
     793    r = owl_strdup("");
     794  }
     795  return r;
     796}
     797
     798/* If in is not UTF-8, convert from ISO-8859-1. We may want to allow
     799 * the caller to specify an alternative in the future. We also strip
     800 * out characters in Unicode Plane 16, as we use that plane internally
     801 * for formatting.
     802 */
     803char * owl_validate_or_convert(char *in, int len)
     804{
     805  if (g_utf8_validate(in, len , NULL)) {
     806    return owl_strip_format_chars(in);
     807  }
     808  else {
     809    return g_convert(in, len,
     810                     "UTF-8", "ISO-8859-1",
     811                     NULL, NULL, NULL);
     812  }
    760813}
    761814
Note: See TracChangeset for help on using the changeset viewer.