Knowledge

User:XLinkBot/Code/LinkParser.pl

Source 📝

30:#!/usr/bin/perl fork and exit; use POE qw (Component::Client::TCP); use HTML::Entities; use LWP::UserAgent; use perlwikipedia; use strict; my $ editor=Perlwikipedia->new("LinkParser","LinkParser"); my $ diffFetcher=LWP::UserAgent->new; $ diffFetcher->agent("LinkParser/2.0"); my %settings; $ settings{'debug'} = 0; print "Reading config file...\n" if $ settings{'debug'}; open (CONFIG,"<linkwatcher-config") or die "Can't open LinkWatcher config: $ !"; foreach (<CONFIG>) { unless (/^#/) { if(/(.+?)=(.+)/) { $ settings{$ 1}=$ 2; } } } close (CONFIG); my @prefixes = split(/\|/,$ settings{'prefixes'}); print ("Prefixes: " . join(" - ", @prefixes) . "\n") if $ settings{'debug'}; print "done\n" if $ settings{'debug'}; my $ server_port=shift; sleep 4; POE::Component::Client::TCP->new( RemoteAddress =>'127.0.0.1', RemotePort => $ server_port, ServerInput => \&server_input, Connected => \&connected, ); my ($ heap,$ kernel); my $ number_of_edits=0; POE::Kernel->run(); exit 0; sub server_input { my ( $ session, $ heap, $ kernel, $ input ) = @_; if ($ input =~ m{EDIT \\] \\] (http:\/\/.+) (.+)}) { my ($ pagename) = $ 1; my ($ lang) = $ 2; my ($ username) = $ 3; my ($ diffurl) = $ 4; my ($ size) = $ 5; $ number_of_edits++; my @linksadded; my @linkremoved; if ( $ diffurl ) { my @addedPre = (); my @removedPre = (); my @addedlinks = (); my @removedlinks = (); my $ addedTotal = ""; my $ removedTotal = ""; if ($ diffurl =~ m/index\.php/) { my $ diffUrl="$ diffurl&diffonly=1&action=render"; my $ diffContent=$ diffFetcher->get($ diffUrl)->content; print ("$ diffContent\n") if $ settings{'debug'}; @addedPre=$ diffContent=~m/<td class=.diff-addedline.><div>(.*?)<\/div><\/td>/sg; @removedPre=$ diffContent=~m/<td class=.diff-deletedline.><div>(.*?)<\/div><\/td>/sg; $ addedTotal=join(' ', @addedPre); $ removedTotal=join(' ', @removedPre); $ addedTotal =~ s/<span class=.diffchange diffchange-inline.>//g; $ addedTotal =~ s/<span class=.diffchange.>//g; $ addedTotal =~ s/<\/span>//g; $ removedTotal =~ s/<span class=.diffchange diffchange-inline.>//g; $ removedTotal =~ s/<span class=.diffchange.>//g; $ removedTotal =~ s/<\/span>//g; $ addedTotal =~ s/<ins class=.diffchange diffchange-inline.>//g; $ addedTotal =~ s/<ins class=.diffchange.>//g; $ addedTotal =~ s/<\/ins>//g; $ removedTotal =~ s/<ins class=.diffchange diffchange-inline.>//g; $ removedTotal =~ s/<ins class=.diffchange.>//g; $ removedTotal =~ s/<\/ins>//sig; $ addedTotal = lc($ addedTotal); $ removedTotal = lc($ removedTotal); print ("Added data: $ addedTotal\n") if $ settings{'debug'}; } else { $ addedTotal=$ editor->get_text($ pagename); $ addedTotal= lc($ addedTotal); $ removedTotal = ""; } decode_entities( $ addedTotal ); decode_entities( $ removedTotal ); @addedlinks=$ addedTotal=~m{(http://\+)}sgi; @removedlinks=$ removedTotal=~m{(http://\+)}sgi; my @really_added_links = (); my @really_removed_links = (); my $ links_added; my $ links_removed; if (@addedlinks) { if (@removedlinks) { print("----\nDIFF $ diffurl ".join(" ",@addedlinks)." - ".join(" ",@removedlinks)."\n----\n") if $ settings{'debug'}; foreach $ links_added(@addedlinks) { my $ found = 0; foreach $ links_removed(@removedlinks) { if ($ links_removed eq $ links_added) { $ found = 1; } } unless ($ found) { push(@really_added_links,$ links_added); } } } else { @really_added_links = @addedlinks; } print ("DIFF $ diffurl ".join(" ",@really_added_links)."\n----\n") if $ settings{'debug'}; } if (@really_added_links) { my $ message="PARSED ] $ diffurl $ size ] |" . join(" ",@really_added_links) . "|"; $ heap->{server}->put($ message); } } $ heap->{server}->put("REQUEST"); } elsif ($ input =~ m{NOEDIT}) { sleep 1; $ heap->{server}->put("REQUEST"); } if ($ number_of_edits>50) { $ kernel->post("shutdown"); exit 0; } } sub connected { ( $ kernel, $ heap ) = @_; $ heap->{server}->put("REQUEST"); } sub request_edit { ( $ kernel, $ heap ) = @_; $ heap->{server}->put("REQUEST"); } 8: 21: 17: 14: 27: 28: 39: 13: 1: 7: 10: 44: 35: 43: 42: 38: 37: 36: 34: 33: 32: 31: 26: 25: 24: 12: 11: 5: 41: 29: 15: 9: 6: 4: 3: 2: 40: 23: 19: 18:User:XLinkBot 20:‎ | 16:< 22:Code

Index

User:XLinkBot
Code

Text is available under the Creative Commons Attribution-ShareAlike License. Additional terms may apply.