Changeset d2ba33c


Ignore:
Timestamp:
Aug 16, 2017, 12:53:41 PM (7 years ago)
Author:
Jason Gross <jasongross9@gmail.com>
Branches:
master
Children:
5dee79a
Parents:
47225c9
git-author:
Jason Gross <jgross@mit.edu> (01/01/14 20:59:51)
git-committer:
Jason Gross <jasongross9@gmail.com> (08/16/17 12:53:41)
Message:
Use g_utf8_casefold and g_utf8_normalize

We define a convenience function compat_casefold in util.c for reuse in
filters.
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • perl/lib/BarnOwl/Message/Zephyr.pm

    rdce72c1 rd2ba33c  
    99
    1010use base qw( BarnOwl::Message );
    11 use Unicode::Normalize qw( NFKC );
    1211
    1312sub strip_realm {
     
    2221    my ($user, $realm) = split(/@/,$principal);
    2322    return $realm;
     23}
     24
     25sub casefold_principal {
     26    my $principal = shift;
     27    # split the principal right after the final @, without eating any
     28    # characters; this way, we always get at least '@' in $user
     29    my ($user, $realm) = split(/(?<=@)(?=[^@]+$)/, $principal);
     30    return lc($user) . uc($realm);
    2431}
    2532
     
    261268            my $realm = '';
    262269            $realm .= '@' . $m->realm if $m->realm ne BarnOwl::zephyr_getrealm();
    263             return (lc(NFKC($m->class)) . $realm);
     270            return (BarnOwl::compat_casefold($m->class) . uc($realm));
    264271        }
    265272    } else {
    266273        push @filenames, $m->recipient;
    267274    }
    268     return map { lc(NFKC(BarnOwl::zephyr_smartstrip_user(strip_realm($_)))) } @filenames;
     275    return map { casefold_principal(BarnOwl::zephyr_smartstrip_user(strip_realm($_))) } @filenames;
    269276}
    270277
  • perl/modules/Jabber/lib/BarnOwl/Message/Jabber.pm

    reea7bed4 rd2ba33c  
    1515
    1616use base qw( BarnOwl::Message );
    17 use Unicode::Normalize qw( NFKC );
    1817
    1918sub jtype { shift->{jtype} };
     
    174173
    175174sub log_filenames {
    176     return map { lc(NFKC($_)) } BarnOwl::Message::log_filenames(@_);
     175    return map { BarnOwl::compat_casefold($_) } BarnOwl::Message::log_filenames(@_);
    177176}
    178177
  • perlglue.xs

    r5093c6f rd2ba33c  
    413413                RETVAL
    414414
     415const utf8 *
     416compat_casefold(in)
     417        const char * in
     418        PREINIT:
     419                char *rv;
     420        CODE:
     421                rv = owl_util_compat_casefold(in);
     422                RETVAL = rv;
     423        OUTPUT:
     424                RETVAL
     425        CLEANUP:
     426                g_free(rv);
     427
    415428
    416429MODULE = BarnOwl                PACKAGE = BarnOwl::Zephyr
  • util.c

    rcba6b9c rd2ba33c  
    643643}
    644644
     645CALLER_OWN char *owl_util_compat_casefold(const char *str)
     646{
     647  /*
     648   * Quoting Anders Kaseorg at https://github.com/barnowl/barnowl/pull/54#issuecomment-31452543:
     649   *
     650   * The Unicode specification calls this compatibility caseless matching, and
     651   * the correct transformation actually has five calls:
     652   * NFKC(toCasefold(NFKD(toCasefold(NFD(string))))) Zephyr’s current
     653   * implementation incorrectly omits the innermost NFD, but that difference
     654   * only matters for characters including U+0345 ◌ͅ COMBINING GREEK
     655   * YPOGEGRAMMENI. I think we should just write the correct version and get
     656   * Zephyr fixed.
     657   *
     658   * Neither of these operations should be called toNFKC_Casefold, because that
     659   * has slightly different behavior regarding Default_Ignorable_Code_Point. I
     660   * propose compat_casefold. And I guess if Jabber wants it too, we should
     661   * move it to util.c.
     662   */
     663  char *tmp0 = g_utf8_normalize(str, -1, G_NORMALIZE_NFD);
     664  char *tmp1 = g_utf8_casefold(tmp0, -1);
     665  char *tmp2 = g_utf8_normalize(tmp1, -1, G_NORMALIZE_NFKD);
     666  char *tmp3 = g_utf8_casefold(tmp2, -1);
     667  char *out = g_utf8_normalize(tmp3, -1, G_NORMALIZE_NFKC);
     668  g_free(tmp0);
     669  g_free(tmp1);
     670  g_free(tmp2);
     671  g_free(tmp3);
     672
     673  return out;
     674}
     675
    645676/* This is based on _extract() and _isCJ() from perl's Text::WrapI18N */
    646677int owl_util_can_break_after(gunichar c)
Note: See TracChangeset for help on using the changeset viewer.