User:AnomieBOT/source/tasks/PERTableUpdater.pm
Appearance
| Per WP:BOTUSERSPACE, any bot or automated editing process that affects only the operator's or their own userspace, and which are not otherwise disruptive, may be run without prior approval. |
package tasks::PERTableUpdater;
=pod
=begin metadata
Bot: AnomieBOT
Task: PERTableUpdater
BRFA: N/A
Status: Begun 2011-12-04
Created: 2011-12-01
Update [[User:AnomieBOT/PERTable]], [[User:AnomieBOT/TPERTable]],
[[User:AnomieBOT/EPERTable]], [[User:AnomieBOT/SPERTable]],
[[User:AnomieBOT/COIREQTable]], and [[User:AnomieBOT/PREQTable]].
=end metadata
=cut
use utf8;
use strict;
use AnomieBOT::Task qw/:time bunchlist/;
use URI::Escape;
use HTML::Entities;
use Data::Dumper;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
my %protact=(
'modify' => 'Modified',
'protect' => 'Protected',
'unprotect' => 'Unprotected',
);
sub new {
my $class=shift;
my $self=$class->SUPER::new();
bless $self, $class;
return $self;
}
=pod
=for info
Per [[WP:BOTUSERSPACE]], any bot or automated editing process that affects only
the operator's or their own userspace, and which are not otherwise disruptive,
may be run without prior approval.
=cut
sub approved {
return 999;
}
sub run {
my ($self, $api)=@_;
my $res;
$api->task('PERTableUpdater', 0, 10, qw/d::Sections d::Timestamp d::Talk d::IWNS/);
my $screwup=' Errors? [[User:'.$api->user.'/shutoff/PERTableUpdater]]';
# Upgrade
if(exists($api->store->{"pages0"})){
$api->store->{"PER pages"}=$api->store->{"pages0"};
delete $api->store->{"pages0"};
}
if(exists($api->store->{"pages1"})){
$api->store->{"SPER pages"}=$api->store->{"pages1"};
delete $api->store->{"pages1"};
}
if(($api->store->{"ver"}//1) < 2){
for my $tag (qw/PER TPER SPER/) {
next unless exists($api->store->{"$tag pages"});
my %old = %{$api->store->{"$tag pages"}};
my %new = ();
while(my ($k,$v) = each %old) {
$new{$k}{$v->{'talk'}} = $v if exists($v->{'talk'});
}
$api->store->{"$tag pages"} = \%new;
}
$api->store->{"ver"} = 2;
}
# Flush warnings daily
my $ts = time;
$ts -= $ts % 86400;
if ( ($api->store->{'warnedBadRegexDate'} // 0) < $ts) {
$api->store->{'warnedBadRegex'} = {};
$api->store->{'warnedBadRegexDate'} = $ts;
}
# First, load MediaWiki:Titleblacklist to catch pages protected by
# that mechanism
my %tb=();
my @sources=(
[ 1, 'Global title blacklist', 'meta:Title blacklist' ],
[ 2, 'Title blacklist', 'MediaWiki:Titleblacklist' ],
[ 3, undef, 'MediaWiki:Titlewhitelist' ],
);
for my $source (@sources) {
my ($i, $name, $page) = @$source;
my $tb;
if ( $page=~/^meta:(.+)$/ ) {
$tb=$api->copy( wikibase => 'https://meta.wikimedia.org/w/', assert => 'user' )->rawpage( $1 );
} else {
$tb=$api->rawpage($page);
}
if($tb->{'code'} ne 'success'){
$api->warn("Failed to load $page: ".$tb->{'error'}."\n");
return 60;
}
my $ln = 0;
for my $line (split /\r?\n/, $tb->{'content'}){
$ln++;
my $re=$line;
my %opts=();
$re=~s/^\s*([^#]*?)\s*(?:#.*)?$/$1/;
if($re=~s/\s*<([^<>]*)>$//){
my $opts=$1;
$opts=~s/^\s+|\s+$//g;
for my $opt (split /\s*\|\s*/, $opts){
if($opt=~/^([^=]*?)\s*=\s*(.+)$/){
$opts{lc($1)}=$2;
} else {
$opts{lc($opt)}=1;
}
}
}
$re=~s/_/ /g;
$re="(?-i:$re)" if($opts{'casesensitive'}//0);
$re=~s!(\{\{\s*ns\s*:\s*(.+?)\s*\}\})! replace_ns($api,$2) // $1 !ge;
# Try to escape left-braces that aren't quantifiers or parameters to escapes
my $tmp = $re;
$re=~s#(?<!\\[a-zA-Z])(?<!\\)\{(?!\d+(?:,\d*)?})#\\{#g;
$self->warnBadRegex( $api, "$page:$ln: Escaped left-braces in regex (old): $tmp" ) if $tmp ne $re;
$self->warnBadRegex( $api, "$page:$ln: Escaped left-braces in regex (new): $re" ) if $tmp ne $re;
# Validate each line, in case someone screws up the blacklist page
eval {
no warnings;
qr/^(?:$re)$/si;
};
if ( $@ ) {
$self->warnBadRegex( $api, "$page:$ln: Ignoring bad regex '$re': $@\n");
next;
}
# Log non-fatal warnings too.
eval {
use warnings FATAL => 'all';
qr/^(?:$re)$/si;
};
if ( $@ ) {
$self->warnBadRegex( $api, "$page:$ln: Warning: $@\n");
}
# Let's just hope no one ever uses {{int:}} here...
$tb{$re}={
i => $i,
source => $name ? "[[$page|$name]]" : undef,
line => $line,
opts => \%opts
} unless(($opts{'moveonly'}//0) || ($opts{'newaccountonly'}//0));
}
}
# Fields are:
# 0: Namespaces to color "attention" instead of "normal"
# 1: "Tag", also used in the name of the subpage the table is put on
# 2: Category name, no prefix
# 3: Type of request, i.e. "$type edit requests"
# 4: URL fragment for request links
# 5: NID component of the urn links
# 6: Regex for parsing the NSS component
# 7: List of color-classes to apply based on page protection level:
# 0: unprotected
# 1: semi-protected
# 2: semi-protected via title blacklist
# 3: extended-confirmed protected
# 4: template protected
# 5: User JSON page
# 6: User CSS/JS page
# 7: fully protected
# 8: "fully" protected via title blacklist
# 9: cascading protection
# 10: MediaWiki-namespace page
# 11: MediaWiki-namespace CSS/JS page
# 8: Extra table columns.
my $urlre = qr/(?<pg>.+)$/;
my %knowncols = ( sts => 'Status', summ => 'Summary' );
my @data=(
[[10,828],'PER','Wikipedia fully protected edit requests','protected','editprotected','x-wp-editprotected',$urlre,[qw/error error error error error normal error normal caution caution caution error/],[]],
[[10,828],'TPER','Wikipedia template-protected edit requests','template-protected','edittemplateprotected','x-wp-edittemplateprotected',$urlre,[qw/error error error error normal error error error caution error error error/],[]],
[[10,828],'EPER','Wikipedia extended-confirmed-protected edit requests','extended-confirmed-protected','editextendedprotected','x-wp-editextendedprotected',$urlre,[qw/error error error normal error error error error error error error error/],[]],
[[10,828],'SPER','Wikipedia semi-protected edit requests','semi-protected','editsemiprotected','x-wp-editsemiprotected',$urlre,[qw/error normal caution error error error error error error error error error/],[]],
[[],'IPER','Wikipedia interface-protected edit requests','interface-protected','editinterfaceprotected','x-wp-editinterfaceprotected',$urlre,[qw/error error error error error error normal error error error error normal/],[]],
[[0],'COIREQ','Wikipedia conflict of interest edit requests','COI','requestedit','x-wp-requestedit2',qr{(?<sts>[^/]+)/(?<pg>[^/]+)/(?<summ>[^/]*)$},[qw/normal caution caution caution error error error error error error error error/],['sts','summ']],
[[],'PREQ','Wikipedia partial-block edit requests','partial block','editpartiallyblocked','x-wp-editpartiallyblocked',$urlre,[qw/normal caution caution caution error error error error error error error error/],[]],
);
my $starttime=time;
for my $data (@data){
my ($attentionns,$tag,$cat,$type,$tgt,$urn,$urnre,$colors,$cols)=@$data;
my $iter=$api->iterator(
generator => 'categorymembers',
gcmtitle => "Category:$cat",
gcmlimit => 'max',
prop => 'info|extlinks',
elprotocol => 'urn',
ellimit => 'max',
);
my %oldpages=%{$api->store->{"$tag pages"}//{}};
my %pages=();
while(my $p=$iter->next){
if(!$p->{'_ok_'}){
$api->warn("Failed to retrieve members for CAT:$tag: ".$p->{'error'}."\n");
return 60;
}
next unless $p->{'ns'}&1;
my @pages = map {
if($_->{'*'}=~/^urn:$urn:$urnre/i) {
my $pp = {};
for my $k (keys %+) {
my $v = $+{$k};
$v = uri_unescape( $v );
$v = decode_entities( $v );
utf8::decode( $v );
$pp->{$k} = $v;
}
$pp;
} else {
();
}
} @{$p->{'extlinks'}//[]};
unless(@pages){
my $t=$p->{'title'};
if($p->{'ns'}==1){
$t=~s/^Talk://;
} else {
$t=~s/^([^:]+) talk:/$1:/;
}
push @pages, { pg => $t };
}
for my $t (@pages) {
my $tt = $p->{'title'};
my $pg = $t->{'pg'};
$pg =~ s/_/ /g;
$pages{$pg}{$tt}=($oldpages{$pg}{$tt} // {
title => $pg,
talk => $p->{'title'},
touched => ISO2timestamp($p->{'touched'}),
});
$pages{$pg}{$tt}{'extra'} = $t;
$pages{$pg}{$tt}{'reqisredir'} = defined( $p->{'redirect'} );
delete $pages{$pg}{$tt}{'color'};
delete $pages{$pg}{$tt}{'prottype'};
delete $pages{$pg}{$tt}{'reason'};
delete $pages{$pg}{$tt}{'logtitle'};
}
}
$api->store->{"$tag pages"}=\%pages;
$api->store->{"ver"} = 2;
if(%pages){
$iter=$api->iterator(
titles => bunchlist(500, keys %pages),
prop => 'info',
inprop => 'protection',
);
while(my $p=$iter->next){
if(!$p->{'_ok_'}){
$api->warn("Failed to retrieve members for CAT:$tag: ".$p->{'error'}."\n");
return 60;
}
my $t=$p->{'title'};
my ($k,$pd) = each %{$pages{$t}}; # Get first
# Protection scoring "bitmap":
# 0x8000 = MediaWiki-namespace CSS/JS auto-protection
# 0x4000 = MediaWiki-namespace auto-protection
# 0x2000 = Cascading protection
# 0x1000 = Full protection
# 0x100 = User script auto-protection
# 0x80 = User JSON auto-protection
# 0x40 = Template-protection
# 0x20 = Extended-confirmed protection
# 0x10 = Semi-protection
# 0x08 = Directly-applied protection
# 0x02 = Title blacklist protection
# Highest score by int value "wins".
my $protscore=0;
$pd->{'prottype'}='Not protected';
$pd->{'reason'}='';
if($p->{'ns'}==8 && ($p->{'contentmodel'} eq 'javascript' || $t=~m!\.js$!)){
$pd->{'prottype'}='Site JS page';
$protscore=0xc000;
} elsif($p->{'ns'}==8 && ($p->{'contentmodel'} eq 'css' || $t=~m!\.css$!)){
$pd->{'prottype'}='Site CSS page';
$protscore=0xc000;
} elsif($p->{'ns'}==8){
$pd->{'prottype'}='MediaWiki page';
$protscore=0x4000;
} elsif($p->{'ns'}==2 && ($p->{'contentmodel'} eq 'javascript' || $t=~m!/.*\.js$!)){
$pd->{'prottype'}='User JS page';
$protscore=0x100;
} elsif($p->{'ns'}==2 && ($p->{'contentmodel'} eq 'css' || $t=~m!/.*\.css$!)){
$pd->{'prottype'}='User CSS page';
$protscore=0x100;
} elsif($p->{'ns'}==2 && ($p->{'contentmodel'} eq 'json' || $t=~m!/.*\.json$!)){
$pd->{'prottype'}='User JSON page';
$protscore=0x80;
}
my $tb = undef;
while(my ($re,$data)=each %tb){
next if $tb && $tb->{'i'} >= $data->{'i'};
next unless(exists($p->{'missing'}) || ($data->{'opts'}{'noedit'}//0));
next unless $t=~/^(?:$re)$/si;
$tb = $data;
}
if ( $tb && $tb->{'source'} ) {
my $sc=exists($tb->{'opts'}{'autoconfirmed'})?0x12:0x42;
next if $sc<$protscore;
$pd->{'prottype'}=$tb->{'source'};
my $line=$tb->{'line'};
$pd->{'reason'}=qq(Matching line: <syntaxhighlight lang="text" inline>$line</syntax).qq(highlight>);
$protscore=$sc;
}
my $expiry=undef;
my $pg=$t;
my $prottype = exists( $p->{'missing'} ) ? 'create' : 'edit';
for my $pp (@{$p->{'protection'}//[]}){
next unless $pp->{'type'} eq $prottype;
my $sc=0;
$sc|=0x1000 if $pp->{'level'} eq 'sysop';
$sc|=0x40 if $pp->{'level'} eq 'templateeditor';
$sc|=0x20 if $pp->{'level'} eq 'extendedconfirmed';
$sc|=0x10 if $pp->{'level'} eq 'autoconfirmed';
$sc|=exists($pp->{'source'})?0x2000:0x08;
$sc|=0x2000 if exists($pp->{'cascade'});
next if $sc<$protscore;
if(exists($pp->{'source'})){
$pg=$pp->{'source'};
$pd->{'prottype'}="Cascade-protected from [[:$pg]]";
} else {
$pg=$t;
$pd->{'prottype'}='Fully protected' if $pp->{'level'} eq 'sysop';
$pd->{'prottype'}='Template-protected' if $pp->{'level'} eq 'templateeditor';
$pd->{'prottype'}='Extended-confirmed protected' if $pp->{'level'} eq 'extendedconfirmed';
$pd->{'prottype'}='Semiprotected' if $pp->{'level'} eq 'autoconfirmed';
$pd->{'prottype'}.=' with cascading' if exists($pp->{'cascade'});
}
$pd->{'prottype'}.=strftime(', expires %F at %T UTC', gmtime ISO2timestamp($pp->{'expiry'})) if $pp->{'expiry'} ne 'infinity';
$protscore=$sc;
$pd->{'reason'}='';
}
$pd->{'color'}=$colors->[0];
if($protscore & 0x10){
$pd->{'color'}=$colors->[1];
$pd->{'color'}=$colors->[2] if($protscore & 0x02);
}
$pd->{'color'}=$colors->[3] if($protscore & 0x20);
if($protscore & 0x40){
$pd->{'color'}=$colors->[4] if($protscore & 0x40);
$pd->{'color'}=$colors->[8] if($protscore & 0x02);
}
$pd->{'color'}=$colors->[5] if($protscore & 0x80);
$pd->{'color'}=$colors->[6] if($protscore & 0x100);
$pd->{'color'}=$colors->[7] if($protscore & 0x1000);
$pd->{'color'}=$colors->[9] if($protscore & 0x2000);
$pd->{'color'}=$colors->[10] if($protscore & 0x4000);
$pd->{'color'}=$colors->[11] if($protscore & 0x8000);
$pd->{'color'}='attention' if($pd->{'color'} eq 'normal' && grep($p->{'ns'}==$_, @$attentionns));
if($pd->{'reason'} eq ''){
my $iter=$api->iterator(
list => 'logevents',
letype => 'protect',
letitle => $pg,
);
my $from='';
while(my $le=$iter->next){
if(!$le->{'_ok_'}){
$api->warn("Failed to retrieve protection log for $pg: ".$le->{'error'}."\n");
return 60;
}
if($le->{'action'} eq 'move_prot'){
$from="From [[:".$le->{'params'}{'oldtitle_title'}."]]: ";
$iter=$api->iterator(
list => 'logevents',
letype => 'protect',
letitle => $le->{'params'}{'oldtitle_title'},
lestart => $le->{'timestamp'},
);
next;
}
next unless exists($protact{$le->{'action'}});
$le->{'timestamp'}=~s/T.*//;
my $comment=$le->{'comment'};
$comment=~s/\s*\[[^]]*\](?: \(expires [^)]*\))?$//;
$pd->{'reason'}=$from.$protact{$le->{'action'}}.' by [[User:'.$le->{'user'}.'|'.$le->{'user'}.']] on '.$le->{'timestamp'}.': "'.esccomment($comment).'"';
last;
}
}
$pd->{'logtitle'}=$pg;
$pd->{'isredir'}=defined( $p->{'redirect'} );
# now fill in the rest
my @keys = qw/prottype reason color logtitle/;
while(my ($k, $pd2) = each %{$pages{$t}}) {
@{$pd2}{@keys} = @{$pd}{@keys};
}
}
}
# The formatting here is a little strange, for backwards compat
my @pages=map { values %$_ } values %pages;
my $txt = qq(<noinclude>{{User:AnomieBOT/PERTableHeader}}</noinclude>\n);
$txt.=qq(<div class="veblenbot-pertable">\n);
$txt.=qq(<templatestyles src="Template:Edit_fully-protected/color_legend/styles.css"/>\n);
$txt.=qq({| class="wikitable" style="padding:0em"\n);
$txt.=qq(|-\n);
my $ct=scalar @pages;
my $s=($ct==1?'':'s');
my $pg='User:AnomieBOT/'.$tag.'Table';
$txt.=qq(! <section begin="count" />$ct<section end="count" /> [[:Category:$cat|$type edit request$s]] {{v|$pg|h|style=float:right}}\n);
$txt.=qq(|-\n);
$txt.=qq(|\n);
$txt.=qq({| class="wikitable sortable" width=100% style="margin:0em"\n);
$txt.=qq(! Page\n);
$txt.=qq(! Tagged since\n);
for my $col (@$cols) {
$txt.=sprintf( "! %s\n", $knowncols{$col} );
}
$txt.=qq(! Protection level\n);
$txt.=qq(! class = "unsortable" | Last protection log entry\n);
for my $p (sort { my $x = $a->{'touched'} <=> $b->{'touched'}; $x = $a->{'title'} cmp $b->{'title'} if $x == 0; return $x; } @pages){
my $c=$p->{'color'};
my $t=$p->{'title'};
my $et=encodetitle($p->{'logtitle'});
my $tt=$p->{'talk'};
my $pt=$p->{'prottype'};
my $r=$p->{'reason'};
my $tl = $p->{'isredir'} ? "{{-r|1=$t}}" : "[[:$t]]";
my $ttl = $p->{'reqisredir'} ? "{{-r|1=$tt#$tgt|2=request}}" : "[[$tt#$tgt|request]]";
$txt.=qq(|- class="protectededit-legend-$c"\n);
$txt.=qq(| $tl ($ttl)\n);
$txt.=strftime("| %F %H:%M\n", gmtime $p->{'touched'});
for my $col (@$cols) {
$txt.=sprintf( "| %s\n", $p->{'extra'}{$col} // '' );
}
$txt.=qq(| $pt <span class="plainlinks">([//en.wikipedia.org/w/index.php?title=Special:Log&type=protect&page=$et log])</span>\n);
$txt.=qq(| $r\n);
}
$txt.=qq(|}\n);
$txt.=qq(|-\n);
$txt.=qq(|style="text-align:right;font-size:smaller"| Updated as needed. Last updated: <!--TS-->~~~~~<!--/TS-->\n);
$txt.=qq(|}</div>);
my $tok=$api->edittoken($pg, EditRedir=>1);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $pg: ".$tok->{'error'}."\n");
next;
}
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'}//'';
$intxt=~s/^\s+|\s+$//;
$intxt=~s#<!--TS-->.*<!--/TS-->#<!--TS-->~~~~~<!--/TS-->#g;
if($intxt ne $txt){
$api->log("Updating $pg ($ct request$s)");
my $r=$api->edit($tok, $txt, "Update table ($ct request$s)", 0, 0);
if($r->{'code'} ne 'success'){
$api->warn("Write failed on $pg: ".$r->{'error'}."\n");
}
}
}
my $t=$starttime-time+300;
$t=0 if $t<0;
return $t;
}
sub warnBadRegex {
my ($self, $api, $msg) = @_;
$msg =~ s/\s+$//;
my $file = __FILE__;
$msg =~ s/ at \Q$file\E line \d+\.$//;
if ( ! defined( $api->{'noedit'} ) ) {
my $warned = $api->store->{'warnedBadRegex'};
return if exists( $warned->{$msg} );
$warned->{$msg} = 1;
$api->store->{'warnedBadRegex'} = $warned;
}
$api->warn( "$msg\n" );
}
sub esccomment {
my $c=shift;
$c=~s/{/{/g;
$c=~s/</</g;
$c=~s/>/>/g;
$c=~s/~/~/g;
$c=~s/\|\]\]/]]/g; # Pipe trick
$c=~s/\[\[\|/[[/g; # Reverse pipe trick
return $c;
}
sub encodetitle {
my $t=shift;
$t=~s/ /_/g;
$t=uri_escape_utf8($t, '^A-Za-z0-9_\-.:/~');
return $t;
}
sub replace_ns {
my ($api,$ns)=@_;
if($ns=~/^([+-]?[0-9]+)/){
$ns=int($1);
} else {
$ns=~s/_/ /g;
my %x=$api->namespace_map();
$ns=$x{$ns} // undef;
return undef unless defined($ns);
}
my %x=$api->namespace_reverse_map();
return $x{$ns} // undef;
}
1;