Changeset cc27237


Ignore:
Timestamp:
Jan 7, 2014, 6:02:25 PM (8 years ago)
Author:
Jason Gross <jgross@mit.edu>
Children:
611236e
Parents:
4b9c3b9
git-author:
Jason Gross <jgross@mit.edu> (01/01/14 20:59:51)
git-committer:
Jason Gross <jgross@mit.edu> (01/07/14 18:02:25)
Message:
Use g_utf8_casefold and g_utf8_normalize

We define a convenience function compat_casefold in util.c for reuse in
filters.
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • perl/lib/BarnOwl/Message/Zephyr.pm

    r50a3240 rcc27237  
    99
    1010use base qw( BarnOwl::Message );
    11 use Unicode::Normalize qw( NFKC );
    1211
    1312sub strip_realm {
     
    2221    my ($user, $realm) = split(/@/,$principal);
    2322    return $realm;
     23}
     24
     25sub casefold_principal {
     26    my $principal = shift;
     27    # split the principal right after the final @, without eating any
     28    # characters; this way, we always get at least '@' in $user
     29    my ($user, $realm) = split(/(?<=@)(?=[^@]+$)/, $principal);
     30    return lc($user) . uc($realm);
    2431}
    2532
     
    261268            my $realm = '';
    262269            $realm .= '@' . $m->realm if $m->realm ne BarnOwl::zephyr_getrealm();
    263             return (lc(NFKC($m->class)) . $realm);
     270            return (BarnOwl::compat_casefold($m->class) . uc($realm));
    264271        }
    265272    } else {
    266273        push @filenames, $m->recipient;
    267274    }
    268     return map { lc(NFKC(BarnOwl::zephyr_smartstrip_user(strip_realm($_)))) } @filenames;
     275    return map { casefold_principal(BarnOwl::zephyr_smartstrip_user(strip_realm($_))) } @filenames;
    269276}
    270277
  • perl/modules/Jabber/lib/BarnOwl/Message/Jabber.pm

    rdca6255 rcc27237  
    1515
    1616use base qw( BarnOwl::Message );
    17 use Unicode::Normalize qw( NFKC );
    1817
    1918sub jtype { shift->{jtype} };
     
    174173
    175174sub log_filenames {
    176     return map { lc(NFKC($_)) } BarnOwl::Message::log_filenames(@_);
     175    return map { BarnOwl::compat_casefold($_) } BarnOwl::Message::log_filenames(@_);
    177176}
    178177
  • perlglue.xs

    r6e764aa rcc27237  
    385385                RETVAL
    386386
     387const utf8 *
     388compat_casefold(in)
     389        const char * in
     390        PREINIT:
     391                char *rv;
     392        CODE:
     393                rv = owl_util_compat_casefold(in);
     394                RETVAL = rv;
     395        OUTPUT:
     396                RETVAL
     397        CLEANUP:
     398                g_free(rv);
     399
    387400
    388401MODULE = BarnOwl                PACKAGE = BarnOwl::Zephyr
  • util.c

    r7b89e8c rcc27237  
    640640}
    641641
     642CALLER_OWN char *owl_util_compat_casefold(const char *str)
     643{
     644  /*
     645   * Quoting Anders Kaseorg at https://github.com/barnowl/barnowl/pull/54#issuecomment-31452543:
     646   *
     647   * The Unicode specification calls this compatibility caseless matching, and
     648   * the correct transformation actually has five calls:
     649   * NFKC(toCasefold(NFKD(toCasefold(NFD(string))))) Zephyr’s current
     650   * implementation incorrectly omits the innermost NFD, but that difference
     651   * only matters for characters including U+0345 ◌ͅ COMBINING GREEK
     652   * YPOGEGRAMMENI. I think we should just write the correct version and get
     653   * Zephyr fixed.
     654   *
     655   * Neither of these operations should be called toNFKC_Casefold, because that
     656   * has slightly different behavior regarding Default_Ignorable_Code_Point. I
     657   * propose compat_casefold. And I guess if Jabber wants it too, we should
     658   * move it to util.c.
     659   */
     660  char *tmp0 = g_utf8_normalize(str, -1, G_NORMALIZE_NFD);
     661  char *tmp1 = g_utf8_casefold(tmp0, -1);
     662  char *tmp2 = g_utf8_normalize(tmp1, -1, G_NORMALIZE_NFKD);
     663  char *tmp3 = g_utf8_casefold(tmp2, -1);
     664  char *out = g_utf8_normalize(tmp3, -1, G_NORMALIZE_NFKC);
     665  g_free(tmp0);
     666  g_free(tmp1);
     667  g_free(tmp2);
     668  g_free(tmp3);
     669
     670  return out;
     671}
     672
    642673/* This is based on _extract() and _isCJ() from perl's Text::WrapI18N */
    643674int owl_util_can_break_after(gunichar c)
Note: See TracChangeset for help on using the changeset viewer.