# CheckLinks.pl # Movable Type plugin tags for identifying non-working links within entries # by Kevin Shay # http://www.staggernation.com/mtplugins/ # last modified May 18, 2005 package MT::Plugin::CheckLinks; use strict; use vars qw( $lwp $VERSION ); $VERSION = '1.3'; use MT; use MT::Template::Context; eval{ require MT::Plugin }; unless ($@) { my $plugin = { name => "CheckLinks $VERSION", description => 'Identify non-working links within your entries.', doc_link => 'http://www.staggernation.com/mtplugins/CheckLinks' }; MT->add_plugin(new MT::Plugin($plugin)); } MT::Template::Context->add_container_tag('CheckLinks' => sub{&_hdlr_check_links;}); MT::Template::Context->add_container_tag('CheckLinksHere' => sub{&_hdlr_check_links_here;}); MT::Template::Context->add_conditional_tag('IfBadLinks' => sub{&_hdlr_if_bad_links;}); MT::Template::Context->add_conditional_tag('IfNoBadLinks' => sub{&_hdlr_if_no_bad_links;}); MT::Template::Context->add_container_tag('BadLinks' => sub{&_hdlr_bad_links;}); MT::Template::Context->add_tag('BadLinkURL' => sub{&_hdlr_bad_link_url;}); MT::Template::Context->add_tag('BadLinkStatus' => sub{&_hdlr_bad_link_status;}); MT::Template::Context->add_tag('BadLinkText' => sub{&_hdlr_bad_link_text;}); # keep track of whether we've loaded LWP::UserAgent $lwp = 0; sub _hdlr_check_links { my ($ctx, $args, $cond) = @_; $lwp ||= load_lwp(); # set up the useragent object my $ua = LWP::UserAgent->new; # some sites (eg. Google) won't return anything unless sent a useragent # name, but don't care what it is $ua->agent('MTCheckLinks'); if ($args->{'proxy'}) { $ua->proxy('http' => $args->{'proxy'}); if ($args->{'no_proxy'}) { $ua->no_proxy(split(/\|/, $args->{'no_proxy'})); } } my $params = {}; $params->{'ua'} = $ua; $params->{'local_site'} = site_from_url($ctx->stash('blog')->site_url); local $ctx->{__stash}{'mtchecklinks_params'} = $params; local $ctx->{__stash}{'mtchecklinks_badlinks'} = []; local $ctx->{__stash}{'mtchecklinks_goodlinks'} = {}; defined(my $text = $ctx->stash('builder')->build($ctx, $ctx->stash('tokens'), $cond)) || return $ctx->error($ctx->errstr); return $text; } sub _hdlr_check_links_here { my ($ctx, $args, $cond) = @_; defined (my $params = $ctx->stash('mtchecklinks_params')) || return $ctx->error('Not called from within MTCheckLinks container'); my $badlinks = $ctx->stash('mtchecklinks_badlinks'); my $goodlinks = $ctx->stash('mtchecklinks_goodlinks'); defined(my $text = $ctx->stash('builder')->build($ctx, $ctx->stash('tokens'), $cond)) || return $ctx->error($ctx->errstr); my $show = $args->{'show'} ? 1 : 0; my $include_local = $args->{'include_local'} ? 1 : 0; while ($text =~ m#]*)>(.*?)#ig) { my $attrs = $1; my $content = $2; my $url; if ($attrs =~ m#href="([^"]+)"#i) { $url = $1; } else { next; } if (!site_from_url($url)) { next unless ($include_local && ($url =~ m#^/#)); $url = $params->{'local_site'} . $url; } # don't check this URL if we've already gotten it successfully if (!$goodlinks->{$url}) { my $request = HTTP::Request->new('HEAD', $url); my $response = $params->{'ua'}->request($request); if ($response->is_success) { $goodlinks->{$url} = 1; } else { my $entry_id; if (my $e = $ctx->stash('entry')) { $entry_id = $e->id; } push(@{$badlinks}, { 'url' => $url, 'status' => $response->status_line, 'text', $content, 'entry_id', $entry_id}); } } } $ctx->stash('mtchecklinks_badlinks', $badlinks); $ctx->stash('mtchecklinks_goodlinks', $goodlinks); return ($show ? $text : ''); } sub _hdlr_if_bad_links { my ($ctx, $args) = @_; return has_bad_links($ctx, $args, 1); } sub _hdlr_if_no_bad_links { my ($ctx, $args) = @_; return has_bad_links($ctx, $args, 0); } sub has_bad_links { my ($ctx, $args, $want_bad_links) = @_; defined (my $params = $ctx->stash('mtchecklinks_params')) || return $ctx->error('Not called from within MTCheckLinks container'); my $badlinks = $ctx->stash('mtchecklinks_badlinks'); return '' if (!@{$badlinks} && $want_bad_links); my %include = $args->{'status'} ? map { $_ => 1 } split(/,/, $args->{'status'}) : (); my %omit = $args->{'omit_status'} ? map { $_ => 1 } split(/,/, $args->{'omit_status'}) : (); my $has_bad_links = 0; if (!%include && !%omit) { $has_bad_links = @{$badlinks} ? 1 : 0; } else { for (map { substr($_->{'status'}, 0, 3) } @{$badlinks}) { next if ($omit{$_}); next if (%include && !$include{$_}); $has_bad_links = 1; last; } } return ($has_bad_links == $want_bad_links) ? 1 : 0; } sub _hdlr_bad_links { my ($ctx, $args, $cond) = @_; defined (my $params = $ctx->stash('mtchecklinks_params')) || return $ctx->error('Not called from within MTCheckLinks container'); my $badlinks = $ctx->stash('mtchecklinks_badlinks'); my $builder = $ctx->stash('builder'); my $tokens = $ctx->stash('tokens'); my %include = $args->{'status'} ? map { $_ => 1 } split(/,/, $args->{'status'}) : (); my %omit = $args->{'omit_status'} ? map { $_ => 1 } split(/,/, $args->{'omit_status'}) : (); if (my $sort_by = $args->{'sort_by'}) { if ($args->{'sort_order'} && ($args->{'sort_order'} eq 'descend')) { @{$badlinks} = sort { $b->{$sort_by} cmp $a->{$sort_by} } @{$badlinks}; } else { @{$badlinks} = sort { $a->{$sort_by} cmp $b->{$sort_by} } @{$badlinks}; } } require MT::Entry if ($args->{'load_entries'}); my %urls_seen = (); my $text = ''; for my $link (@{$badlinks}) { next if ($args->{'skip_dupes'} && $urls_seen{$link->{'url'}}); $urls_seen{$link->{'url'}} = 1; local $ctx->{__stash}{'entry'}; if ($args->{'load_entries'} && $link->{'entry_id'}) { $ctx->{__stash}{'entry'} = MT::Entry->load($link->{'entry_id'}); } local $ctx->{__stash}{'mtchecklinks_link'} = $link; defined(my $item_text = $builder->build($ctx, $tokens, $cond)) || return $ctx->error($ctx->errstr); $text .= $item_text; } return $text; } sub _hdlr_bad_link_url { my ($ctx, $args) = @_; return link_element($ctx, $args, 'url'); } sub _hdlr_bad_link_status { my ($ctx, $args) = @_; return link_element($ctx, $args, 'status'); } sub _hdlr_bad_link_text { my ($ctx, $args) = @_; return link_element($ctx, $args, 'text'); } sub link_element { my ($ctx, $args, $element) = @_; defined (my $params = $ctx->stash('mtchecklinks_params')) || return $ctx->error('Not called from within MTCheckLinks container'); defined (my $link = $ctx->stash('mtchecklinks_link')) || return $ctx->error('Not called from within MTBadLinks container'); return $link->{$element}; } sub load_lwp { require LWP::UserAgent; return 1; } sub site_from_url { my ($url) = @_; my $site = ''; if ($url =~ m!^(http://([^/:]+))!i) { $site = lc($1); } return $site; } 1;