source: perl/modules/Facebook/lib/BarnOwl/Module/Facebook/Handle.pm @ a8e1fcf

Last change on this file since a8e1fcf was a8e1fcf, checked in by Edward Z. Yang <ezyang@mit.edu>, 13 years ago
Make topic detection more robust. Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
  • Property mode set to 100644
File size: 14.2 KB
Line 
1use warnings;
2use strict;
3
4=head1 NAME
5
6BarnOwl::Module::Facebook::Handle
7
8=head1 DESCRIPTION
9
10Contains everything needed to send and receive messages from Facebook
11
12=cut
13
14package BarnOwl::Module::Facebook::Handle;
15
16use Facebook::Graph;
17
18use List::Util qw(reduce);
19
20eval { require Lingua::EN::Keywords; };
21if ($@) {
22    *keywords = sub {
23        # stupidly pick the longest one, and only return one.
24        my $sentence = shift;
25        $sentence =~ s/[[:punct:]]+/ /g;
26        my @words = split(' ', lc($sentence));
27        return () unless @words;
28        return (reduce{ length($a) > length($b) ? $a : $b } @words,);
29    };
30} else {
31    *keywords = \&Lingua::EN::Keywords::keywords;
32}
33
34use JSON;
35use Date::Parse;
36use POSIX;
37
38use Scalar::Util qw(weaken);
39
40use BarnOwl;
41use BarnOwl::Message::Facebook;
42
43our $app_id = 235537266461636; # for application 'barnowl'
44
45# Unfortunately, Facebook does not offer a comment stream, in the same
46# way we can get a post stream using the news feed.  This makes it a bit
47# difficult to de-duplicate comments we have already seen.  We use a
48# simple heuristic to fix this: we check if the comment's time is dated
49# from before our last update, and don't re-post if it's dated before.
50# Be somewhat forgiving, since it's better to duplicate a post than to
51# drop one.  Furthermore, we must use Facebook's idea of time, since the
52# server BarnOwl is running on may be desynchronized.  So we need to
53# utilize Facebook's idea of time, not ours.  We do this by looking at
54# all of the timestamps we see while processing an update, and take the
55# latest one and increment it by one second.
56#
57# What properties do we get with this setup?
58#
59#   - We get comment updates only for the latest N posts on a news feed.
60#   Any later ones, you have to use Facebook's usual mechanisms (e.g.
61#   email notifications).
62#
63#   - Processing a poll is relatively expensive, since we have to
64#   iterate over N new posts.  It might be worthwhile polling for new
65#   comments less frequently than polling for new posts.
66
67sub fail {
68    my $self = shift;
69    my $msg  = shift;
70    undef $self->{facebook};
71    die("[Facebook] Error: $msg\n");
72}
73
74sub new {
75    my $class = shift;
76    my $cfg = shift;
77
78    my $self = {
79        'cfg'  => $cfg,
80        'facebook' => undef,
81
82        # Ideally this should be done using Facebook realtime updates,
83        # but we can't assume that the BarnOwl lives on a publically
84        # addressable server (XXX maybe we can setup an option for this.)
85        'last_friend_poll' => 0,
86        'friend_timer' => undef,
87
88        # Initialized with our 'time', but will be synced to Facebook
89        # soon enough. (Subtractive amount is just to preseed with some
90        # values.)
91        'last_poll' => time - 60 * 60 * 24 * 2,
92        'timer' => undef,
93
94        # Message polling not implemented yet
95        #'last_message_poll' => time,
96        #'message_timer' => undef,
97
98        # yeah yeah, inelegant, I know.  You can try using
99        # $fb->authorize, but at time of writing (1.0300) they didn't support
100        # the response_type parameter.
101        # 'login_url' => 'https://www.facebook.com/dialog/oauth?client_id=235537266461636&scope=read_stream,read_mailbox,publish_stream,offline_access&redirect_uri=http://www.facebook.com/connect/login_success.html&response_type=token',
102        # minified to fit in most terminal windows.
103        'login_url' => 'http://goo.gl/yA42G',
104
105        'logged_in' => 0,
106
107        # would need another hash for topic de-dup
108        'topics' => {},
109
110        # deduplicated map of names to user ids
111        'friends' => {},
112    };
113
114    bless($self, $class);
115
116    $self->{facebook} = Facebook::Graph->new( app_id => $app_id );
117    $self->facebook_do_auth;
118
119    return $self;
120}
121
122=head2 sleep N
123
124Stop polling Facebook for N seconds.
125
126=cut
127
128sub sleep {
129    my $self  = shift;
130    my $delay = shift;
131
132    # prevent reference cycles
133    my $weak = $self;
134    weaken($weak);
135
136    # Stop any existing timers.
137    if (defined $self->{friend_timer}) {
138        $self->{friend_timer}->stop;
139        $self->{friend_timer} = undef;
140    }
141    if (defined $self->{timer}) {
142        $self->{timer}->stop;
143        $self->{timer} = undef;
144    }
145    if (defined $self->{message_timer}) {
146        # XXX doesn't do anything right now
147        $self->{message_timer}->stop;
148        $self->{message_timer} = undef;
149    }
150
151    $self->{friend_timer} = BarnOwl::Timer->new({
152        name     => "Facebook friend poll",
153        after    => $delay,
154        interval => 60 * 60 * 24,
155        cb       => sub { $weak->poll_friends if $weak }
156       });
157    $self->{timer} = BarnOwl::Timer->new({
158        name     => "Facebook poll",
159        after    => $delay,
160        interval => 90,
161        cb       => sub { $weak->poll_facebook if $weak }
162       });
163    # XXX implement message polling
164}
165
166sub poll_friends {
167    my $self = shift;
168
169    return unless BarnOwl::getvar('facebook:poll') eq 'on';
170    return unless $self->{logged_in};
171
172    my $friends = eval { $self->{facebook}->fetch('me/friends'); };
173    if ($@) {
174        warn "Poll failed $@";
175        return;
176    }
177
178    $self->{last_friend_poll} = time;
179    $self->{friends} = {};
180
181    for my $friend ( @{$friends->{data}} ) {
182        if (defined $self->{friends}{$friend->{name}}) {
183            # XXX We should try a little harder here, rather than just
184            # tacking on a number.  Ideally, we should be able to
185            # calculate some extra piece of information that the user
186            # needs to disambiguate between the two users.  An old
187            # version of Facebook used to disambiguate with your primary
188            # network (so you might have Edward Yang (MIT) and Edward
189            # Yang (Cambridge), the idea being that users in the same
190            # network would probably have already disambiguated
191            # themselves with middle names or nicknames.  We no longer
192            # get network information, since Facebook axed that
193            # information, but the Education/Work fields may still be
194            # a reasonable approximation (but which one do you pick?!
195            # The most recent one.)  Since getting this information
196            # involves extra queries, there are also caching and
197            # efficiency concerns.
198            #   We may want a facility for users to specify custom
199            # aliases for Facebook users, which are added into this
200            # hash.  See also username support.
201            warn "Duplicate friend name " . $friend->{name};
202            my $name = $friend->{name};
203            my $i = 2;
204            while (defined $self->{friends}{$friend->{name} . ' ' . $i}) { $i++; }
205            $self->{friends}{$friend->{name} . ' ' . $i} = $friend->{id};
206        } else {
207            $self->{friends}{$friend->{name}} = $friend->{id};
208        }
209    }
210
211    # XXX We should also have support for usernames, and not just real
212    # names. However, since this data is not returned by the friends
213    # query, it would require a rather expensive set of queries. We
214    # might try to preserve old data, but all-in-all it's a bit
215    # complicated, so we don't bother.
216}
217
218sub poll_facebook {
219    my $self = shift;
220
221    #return unless ( time - $self->{last_poll} ) >= 60;
222    return unless BarnOwl::getvar('facebook:poll') eq 'on';
223    return unless $self->{logged_in};
224
225    #BarnOwl::message("Polling Facebook...");
226
227    # XXX Oh no! This blocks the user interface.  Not good.
228    # Ideally, we should have some worker thread for polling facebook.
229    # But BarnOwl is probably not thread-safe >_<
230
231    my $old_topics = $self->{topics};
232    $self->{topics} = {};
233
234    my $updates = eval {
235        $self->{facebook}
236             ->query
237             ->from("my_news")
238             # Not using this, because we want to pick up comment
239             # updates. We need to manually de-dup, though.
240             # ->where_since( "@" . $self->{last_poll} )
241             ->limit_results( 200 )
242             ->request()
243             ->as_hashref()
244    };
245    if ($@) {
246        warn "Poll failed $@";
247        return;
248    }
249
250    my $new_last_poll = $self->{last_poll};
251    for my $post ( reverse @{$updates->{data}} ) {
252        # No app invites, thanks! (XXX make configurable)
253        if ($post->{type} eq 'link' && $post->{application}) {
254            next;
255        }
256
257        # XXX Filtering out interest groups for now
258        # A more reasonable strategy may be to show their
259        # posts, but not the comments.
260        if (defined $post->{from}{category}) {
261            next;
262        }
263
264        # XXX Need to somehow access Facebook's user hiding
265        # mechanism
266
267        # There can be multiple recipients! Strange! Pick the first one.
268        my $name    = $post->{to}{data}[0]{name} || $post->{from}{name};
269        my $name_id = $post->{to}{data}[0]{id} || $post->{from}{id};
270        my $post_id  = $post->{id};
271
272        if (defined $old_topics->{$post_id}) {
273            $self->{topics}->{$post_id} = $old_topics->{$post_id};
274        } else {
275            my @keywords = keywords($post->{name} || $post->{message});
276            my $topic = $keywords[0] || 'personal';
277            $topic =~ s/ /-/g;
278            $self->{topics}->{$post_id} = $topic;
279        }
280
281        # Only handle post if it's new
282        my $created_time = str2time($post->{created_time});
283        if ($created_time >= $self->{last_poll}) {
284            # XXX indexing is fragile
285            my $msg = BarnOwl::Message->new(
286                type      => 'Facebook',
287                sender    => $post->{from}{name},
288                sender_id => $post->{from}{id},
289                name      => $name,
290                name_id   => $name_id,
291                direction => 'in',
292                body      => $self->format_body($post),
293                post_id   => $post_id,
294                topic     => $self->get_topic($post_id),
295                time      => asctime(localtime $created_time),
296                # XXX The intent is to get the 'Comment' link, which also
297                # serves as a canonical link to the post.  The {name}
298                # field should equal 'Comment'.
299                zsig      => $post->{actions}[0]{link},
300               );
301            BarnOwl::queue_message($msg);
302        }
303
304        # This will have funky interleaving of times (they'll all be
305        # sorted linearly), but since we don't expect too many updates between
306        # polls this is pretty acceptable.
307        my $updated_time = str2time($post->{updated_time});
308        if ($updated_time >= $self->{last_poll} && defined $post->{comments}{data}) {
309            for my $comment ( @{$post->{comments}{data}} ) {
310                my $comment_time = str2time($comment->{created_time});
311                if ($comment_time < $self->{last_poll}) {
312                    next;
313                }
314                my $msg = BarnOwl::Message->new(
315                    type      => 'Facebook',
316                    sender    => $comment->{from}{name},
317                    sender_id => $comment->{from}{id},
318                    name      => $name,
319                    name_id   => $name_id,
320                    direction => 'in',
321                    body      => $comment->{message},
322                    post_id    => $post_id,
323                    topic     => $self->get_topic($post_id),
324                    time      => asctime(localtime $comment_time),
325                   );
326                BarnOwl::queue_message($msg);
327            }
328        }
329        if ($updated_time + 1 > $new_last_poll) {
330            $new_last_poll = $updated_time + 1;
331        }
332    }
333    # old_topics gets GC'd
334
335    $self->{last_poll} = $new_last_poll;
336}
337
338sub format_body {
339    my $self = shift;
340
341    my $post = shift;
342
343    # XXX implement optional URL minification
344    if ($post->{type} eq 'status') {
345        return $post->{message};
346    } elsif ($post->{type} eq 'link' || $post->{type} eq 'video' || $post->{type} eq 'photo') {
347        return $post->{name}
348          . ($post->{caption} ? " (" . $post->{caption} . ")\n" : "\n")
349          . $post->{link}
350          . ($post->{description} ? "\n\n" . $post->{description} : "")
351          . ($post->{message} ? "\n\n" . $post->{message} : "");
352    } else {
353        return "(unknown post type " . $post->{type} . ")";
354    }
355}
356
357sub facebook {
358    my $self = shift;
359
360    my $user = shift;
361    my $msg = shift;
362
363    if (!defined $self->{facebook} || !$self->{logged_in}) {
364        BarnOwl::admin_message('Facebook', 'You are not currently logged into Facebook.');
365        return;
366    }
367    if (defined $user) {
368        $user = $self->{friends}{$user} || $user;
369        $self->{facebook}->add_post( $user )->set_message( $msg )->publish;
370    } else {
371        $self->{facebook}->add_post->set_message( $msg )->publish;
372    }
373    $self->sleep(0);
374}
375
376sub facebook_comment {
377    my $self = shift;
378
379    my $post_id = shift;
380    my $msg = shift;
381
382    $self->{facebook}->add_comment( $post_id )->set_message( $msg )->publish;
383    $self->sleep(0);
384}
385
386sub facebook_auth {
387    my $self = shift;
388
389    my $url = shift;
390    # http://www.facebook.com/connect/login_success.html#access_token=TOKEN&expires_in=0
391    $url =~ /access_token=([^&]+)/; # XXX Ew regex
392
393    $self->{cfg}->{token} = $1;
394    if ($self->facebook_do_auth) {
395        my $raw_cfg = to_json($self->{cfg});
396        BarnOwl::admin_message('Facebook', "Add this as the contents of your ~/.owl/facebook file:\n$raw_cfg");
397    }
398}
399
400sub facebook_do_auth {
401    my $self = shift;
402    if ( ! defined $self->{cfg}->{token} ) {
403        BarnOwl::admin_message('Facebook', "Login to Facebook at ".$self->{login_url}
404            . "\nand run command ':facebook-auth URL' with the URL you are redirected to.");
405        return 0;
406    }
407    $self->{facebook}->access_token($self->{cfg}->{token});
408    # Do a quick check to see if things are working
409    my $result = eval { $self->{facebook}->fetch('me'); };
410    if ($@) {
411        BarnOwl::admin_message('Facebook', "Failed to authenticate! Login to Facebook at ".$self->{login_url}
412            . "\nand run command ':facebook-auth URL' with the URL you are redirected to.");
413        return 0;
414    } else {
415        my $name = $result->{'name'};
416        BarnOwl::admin_message('Facebook', "Successfully logged in to Facebook as $name!");
417        $self->{logged_in} = 1;
418        $self->sleep(0); # start polling
419        return 1;
420    }
421}
422
423sub get_topic {
424    my $self = shift;
425
426    my $post_id = shift;
427
428    return $self->{topics}->{$post_id} || 'personal';
429}
430
4311;
Note: See TracBrowser for help on using the repository browser.