🇮🇷 Iran Proxy | https://www.wikipedia.org/wiki/User:AnomieBOT/source/tasks/PERTableUpdater.pm
Jump to content

User:AnomieBOT/source/tasks/PERTableUpdater.pm

From Wikipedia, the free encyclopedia
package tasks::PERTableUpdater;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    PERTableUpdater
BRFA:    N/A
Status:  Begun 2011-12-04
Created: 2011-12-01

Update [[User:AnomieBOT/PERTable]], [[User:AnomieBOT/TPERTable]],
[[User:AnomieBOT/EPERTable]], [[User:AnomieBOT/SPERTable]],
[[User:AnomieBOT/COIREQTable]], and [[User:AnomieBOT/PREQTable]].

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task qw/:time bunchlist/;
use URI::Escape;
use HTML::Entities;
use Data::Dumper;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my %protact=(
    'modify'    => 'Modified',
    'protect'   => 'Protected',
    'unprotect' => 'Unprotected',
);

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    bless $self, $class;
    return $self;
}

=pod

=for info
Per [[WP:BOTUSERSPACE]], any bot or automated editing process that affects only
the operator's or their own userspace, and which are not otherwise disruptive,
may be run without prior approval.

=cut

sub approved {
    return 999;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('PERTableUpdater', 0, 10, qw/d::Sections d::Timestamp d::Talk d::IWNS/);

    my $screwup=' Errors? [[User:'.$api->user.'/shutoff/PERTableUpdater]]';

    # Upgrade
    if(exists($api->store->{"pages0"})){
        $api->store->{"PER pages"}=$api->store->{"pages0"};
        delete $api->store->{"pages0"};
    }
    if(exists($api->store->{"pages1"})){
        $api->store->{"SPER pages"}=$api->store->{"pages1"};
        delete $api->store->{"pages1"};
    }
    if(($api->store->{"ver"}//1) < 2){
        for my $tag (qw/PER TPER SPER/) {
            next unless exists($api->store->{"$tag pages"});
            my %old = %{$api->store->{"$tag pages"}};
            my %new = ();
            while(my ($k,$v) = each %old) {
                $new{$k}{$v->{'talk'}} = $v if exists($v->{'talk'});
            }
            $api->store->{"$tag pages"} = \%new;
        }
        $api->store->{"ver"} = 2;
    }

    # Flush warnings daily
    my $ts = time;
    $ts -= $ts % 86400;
    if ( ($api->store->{'warnedBadRegexDate'} // 0) < $ts) {
        $api->store->{'warnedBadRegex'} = {};
        $api->store->{'warnedBadRegexDate'} = $ts;
    }

    # First, load MediaWiki:Titleblacklist to catch pages protected by
    # that mechanism
    my %tb=();
    my @sources=(
        [ 1, 'Global title blacklist', 'meta:Title blacklist' ],
        [ 2, 'Title blacklist', 'MediaWiki:Titleblacklist' ],
        [ 3, undef, 'MediaWiki:Titlewhitelist' ],
    );
    for my $source (@sources) {
        my ($i, $name, $page) = @$source;
        my $tb;
        if ( $page=~/^meta:(.+)$/ ) {
            $tb=$api->copy( wikibase => 'https://meta.wikimedia.org/w/', assert => 'user' )->rawpage( $1 );
        } else {
            $tb=$api->rawpage($page);
        }
        if($tb->{'code'} ne 'success'){
            $api->warn("Failed to load $page: ".$tb->{'error'}."\n");
            return 60;
        }
        my $ln = 0;
        for my $line (split /\r?\n/, $tb->{'content'}){
            $ln++;
            my $re=$line;
            my %opts=();
            $re=~s/^\s*([^#]*?)\s*(?:#.*)?$/$1/;
            if($re=~s/\s*<([^<>]*)>$//){
                my $opts=$1;
                $opts=~s/^\s+|\s+$//g;
                for my $opt (split /\s*\|\s*/, $opts){
                    if($opt=~/^([^=]*?)\s*=\s*(.+)$/){
                        $opts{lc($1)}=$2;
                    } else {
                        $opts{lc($opt)}=1;
                    }
                }
            }
            $re=~s/_/ /g;
            $re="(?-i:$re)" if($opts{'casesensitive'}//0);

            $re=~s!(\{\{\s*ns\s*:\s*(.+?)\s*\}\})! replace_ns($api,$2) // $1 !ge;

            # Try to escape left-braces that aren't quantifiers or parameters to escapes
            my $tmp = $re;
            $re=~s#(?<!\\[a-zA-Z])(?<!\\)\{(?!\d+(?:,\d*)?})#\\{#g;
            $self->warnBadRegex( $api, "$page:$ln: Escaped left-braces in regex (old): $tmp" ) if $tmp ne $re;
            $self->warnBadRegex( $api, "$page:$ln: Escaped left-braces in regex (new): $re" ) if $tmp ne $re;

            # Validate each line, in case someone screws up the blacklist page
            eval {
                no warnings;
                qr/^(?:$re)$/si;
            };
            if ( $@ ) {
                $self->warnBadRegex( $api, "$page:$ln: Ignoring bad regex '$re': $@\n");
                next;
            }

            # Log non-fatal warnings too.
            eval {
                use warnings FATAL => 'all';
                qr/^(?:$re)$/si;
            };
            if ( $@ ) {
                $self->warnBadRegex( $api, "$page:$ln: Warning: $@\n");
            }

            # Let's just hope no one ever uses {{int:}} here...

            $tb{$re}={
                i => $i,
                source => $name ? "[[$page|$name]]" : undef,
                line => $line,
                opts => \%opts
            } unless(($opts{'moveonly'}//0) || ($opts{'newaccountonly'}//0));
        }
    }

    # Fields are:
    # 0: Namespaces to color "attention" instead of "normal"
    # 1: "Tag", also used in the name of the subpage the table is put on
    # 2: Category name, no prefix
    # 3: Type of request, i.e. "$type edit requests"
    # 4: URL fragment for request links
    # 5: NID component of the urn links
    # 6: Regex for parsing the NSS component
    # 7: List of color-classes to apply based on page protection level:
    #    0: unprotected
    #    1: semi-protected
    #    2: semi-protected via title blacklist
    #    3: extended-confirmed protected
    #    4: template protected
    #    5: User JSON page
    #    6: User CSS/JS page
    #    7: fully protected
    #    8: "fully" protected via title blacklist
    #    9: cascading protection
    #   10: MediaWiki-namespace page
    #   11: MediaWiki-namespace CSS/JS page
    # 8: Extra table columns.
    my $urlre = qr/(?<pg>.+)$/;
    my %knowncols = ( sts => 'Status', summ => 'Summary' );
    my @data=(
        [[10,828],'PER','Wikipedia fully protected edit requests','protected','editprotected','x-wp-editprotected',$urlre,[qw/error error error error error normal error normal caution caution caution error/],[]],
        [[10,828],'TPER','Wikipedia template-protected edit requests','template-protected','edittemplateprotected','x-wp-edittemplateprotected',$urlre,[qw/error error error error normal error error error caution error error error/],[]],
        [[10,828],'EPER','Wikipedia extended-confirmed-protected edit requests','extended-confirmed-protected','editextendedprotected','x-wp-editextendedprotected',$urlre,[qw/error error error normal error error error error error error error error/],[]],
        [[10,828],'SPER','Wikipedia semi-protected edit requests','semi-protected','editsemiprotected','x-wp-editsemiprotected',$urlre,[qw/error normal caution error error error error error error error error error/],[]],
        [[],'IPER','Wikipedia interface-protected edit requests','interface-protected','editinterfaceprotected','x-wp-editinterfaceprotected',$urlre,[qw/error error error error error error normal error error error error normal/],[]],
        [[0],'COIREQ','Wikipedia conflict of interest edit requests','COI','requestedit','x-wp-requestedit2',qr{(?<sts>[^/]+)/(?<pg>[^/]+)/(?<summ>[^/]*)$},[qw/normal caution caution caution error error error error error error error error/],['sts','summ']],
        [[],'PREQ','Wikipedia partial-block edit requests','partial block','editpartiallyblocked','x-wp-editpartiallyblocked',$urlre,[qw/normal caution caution caution error error error error error error error error/],[]],
    );
    my $starttime=time;
    for my $data (@data){
        my ($attentionns,$tag,$cat,$type,$tgt,$urn,$urnre,$colors,$cols)=@$data;
        my $iter=$api->iterator(
            generator      => 'categorymembers',
            gcmtitle       => "Category:$cat",
            gcmlimit       => 'max',
            prop           => 'info|extlinks',
            elprotocol     => 'urn',
            ellimit        => 'max',
        );
        my %oldpages=%{$api->store->{"$tag pages"}//{}};
        my %pages=();
        while(my $p=$iter->next){
            if(!$p->{'_ok_'}){
                $api->warn("Failed to retrieve members for CAT:$tag: ".$p->{'error'}."\n");
                return 60;
            }
            next unless $p->{'ns'}&1;
            my @pages = map {
                if($_->{'*'}=~/^urn:$urn:$urnre/i) {
                    my $pp = {};
                    for my $k (keys %+) {
                        my $v = $+{$k};
                        $v = uri_unescape( $v );
                        $v = decode_entities( $v );
                        utf8::decode( $v );
                        $pp->{$k} = $v;
                    }
                    $pp;
                } else {
                    ();
                }
            } @{$p->{'extlinks'}//[]};
            unless(@pages){
                my $t=$p->{'title'};
                if($p->{'ns'}==1){
                    $t=~s/^Talk://;
                } else {
                    $t=~s/^([^:]+) talk:/$1:/;
                }
                push @pages, { pg => $t };
            }
            for my $t (@pages) {
                my $tt = $p->{'title'};
                my $pg = $t->{'pg'};
                $pg =~ s/_/ /g;
                $pages{$pg}{$tt}=($oldpages{$pg}{$tt} // {
                        title   => $pg,
                        talk    => $p->{'title'},
                        touched => ISO2timestamp($p->{'touched'}),
                    });
                $pages{$pg}{$tt}{'extra'} = $t;
                $pages{$pg}{$tt}{'reqisredir'} = defined( $p->{'redirect'} );
                delete $pages{$pg}{$tt}{'color'};
                delete $pages{$pg}{$tt}{'prottype'};
                delete $pages{$pg}{$tt}{'reason'};
                delete $pages{$pg}{$tt}{'logtitle'};
            }
        }
        $api->store->{"$tag pages"}=\%pages;
        $api->store->{"ver"} = 2;

        if(%pages){
            $iter=$api->iterator(
                titles => bunchlist(500, keys %pages),
                prop   => 'info',
                inprop => 'protection',
            );
            while(my $p=$iter->next){
                if(!$p->{'_ok_'}){
                    $api->warn("Failed to retrieve members for CAT:$tag: ".$p->{'error'}."\n");
                    return 60;
                }
                my $t=$p->{'title'};

                my ($k,$pd) = each %{$pages{$t}}; # Get first

                # Protection scoring "bitmap":
                # 0x8000 = MediaWiki-namespace CSS/JS auto-protection
                # 0x4000 = MediaWiki-namespace auto-protection
                # 0x2000 = Cascading protection
                # 0x1000 = Full protection
                #  0x100 = User script auto-protection
                #   0x80 = User JSON auto-protection
                #   0x40 = Template-protection
                #   0x20 = Extended-confirmed protection
                #   0x10 = Semi-protection
                #   0x08 = Directly-applied protection
                #   0x02 = Title blacklist protection
                # Highest score by int value "wins".
                my $protscore=0;
                $pd->{'prottype'}='Not protected';
                $pd->{'reason'}='';
                if($p->{'ns'}==8 && ($p->{'contentmodel'} eq 'javascript' || $t=~m!\.js$!)){
                    $pd->{'prottype'}='Site JS page';
                    $protscore=0xc000;
                } elsif($p->{'ns'}==8 && ($p->{'contentmodel'} eq 'css' || $t=~m!\.css$!)){
                    $pd->{'prottype'}='Site CSS page';
                    $protscore=0xc000;
                } elsif($p->{'ns'}==8){
                    $pd->{'prottype'}='MediaWiki page';
                    $protscore=0x4000;
                } elsif($p->{'ns'}==2 && ($p->{'contentmodel'} eq 'javascript' || $t=~m!/.*\.js$!)){
                    $pd->{'prottype'}='User JS page';
                    $protscore=0x100;
                } elsif($p->{'ns'}==2 && ($p->{'contentmodel'} eq 'css' || $t=~m!/.*\.css$!)){
                    $pd->{'prottype'}='User CSS page';
                    $protscore=0x100;
                } elsif($p->{'ns'}==2 && ($p->{'contentmodel'} eq 'json' || $t=~m!/.*\.json$!)){
                    $pd->{'prottype'}='User JSON page';
                    $protscore=0x80;
                }

                my $tb = undef;
                while(my ($re,$data)=each %tb){
                    next if $tb && $tb->{'i'} >= $data->{'i'};
                    next unless(exists($p->{'missing'}) || ($data->{'opts'}{'noedit'}//0));
                    next unless $t=~/^(?:$re)$/si;
                    $tb = $data;
                }
                if ( $tb && $tb->{'source'} ) {
                    my $sc=exists($tb->{'opts'}{'autoconfirmed'})?0x12:0x42;
                    next if $sc<$protscore;
                    $pd->{'prottype'}=$tb->{'source'};
                    my $line=$tb->{'line'};
                    $pd->{'reason'}=qq(Matching line: <syntaxhighlight lang="text" inline>$line</syntax).qq(highlight>);
                    $protscore=$sc;
                }

                my $expiry=undef;
                my $pg=$t;
                my $prottype = exists( $p->{'missing'} ) ? 'create' : 'edit';
                for my $pp (@{$p->{'protection'}//[]}){
                    next unless $pp->{'type'} eq $prottype;
                    my $sc=0;
                    $sc|=0x1000 if $pp->{'level'} eq 'sysop';
                    $sc|=0x40 if $pp->{'level'} eq 'templateeditor';
                    $sc|=0x20 if $pp->{'level'} eq 'extendedconfirmed';
                    $sc|=0x10 if $pp->{'level'} eq 'autoconfirmed';
                    $sc|=exists($pp->{'source'})?0x2000:0x08;
                    $sc|=0x2000 if exists($pp->{'cascade'});
                    next if $sc<$protscore;
                    if(exists($pp->{'source'})){
                        $pg=$pp->{'source'};
                        $pd->{'prottype'}="Cascade-protected from [[:$pg]]";
                    } else {
                        $pg=$t;
                        $pd->{'prottype'}='Fully protected' if $pp->{'level'} eq 'sysop';
                        $pd->{'prottype'}='Template-protected' if $pp->{'level'} eq 'templateeditor';
                        $pd->{'prottype'}='Extended-confirmed protected' if $pp->{'level'} eq 'extendedconfirmed';
                        $pd->{'prottype'}='Semiprotected' if $pp->{'level'} eq 'autoconfirmed';
                        $pd->{'prottype'}.=' with cascading' if exists($pp->{'cascade'});
                    }
                    $pd->{'prottype'}.=strftime(', expires %F at %T UTC', gmtime ISO2timestamp($pp->{'expiry'})) if $pp->{'expiry'} ne 'infinity';
                    $protscore=$sc;
                    $pd->{'reason'}='';
                }

                $pd->{'color'}=$colors->[0];
                if($protscore & 0x10){
                    $pd->{'color'}=$colors->[1];
                    $pd->{'color'}=$colors->[2] if($protscore & 0x02);
                }
                $pd->{'color'}=$colors->[3] if($protscore & 0x20);
                if($protscore & 0x40){
                    $pd->{'color'}=$colors->[4] if($protscore & 0x40);
                    $pd->{'color'}=$colors->[8] if($protscore & 0x02);
                }
                $pd->{'color'}=$colors->[5] if($protscore & 0x80);
                $pd->{'color'}=$colors->[6] if($protscore & 0x100);
                $pd->{'color'}=$colors->[7] if($protscore & 0x1000);
                $pd->{'color'}=$colors->[9] if($protscore & 0x2000);
                $pd->{'color'}=$colors->[10] if($protscore & 0x4000);
                $pd->{'color'}=$colors->[11] if($protscore & 0x8000);
                $pd->{'color'}='attention' if($pd->{'color'} eq 'normal' && grep($p->{'ns'}==$_, @$attentionns));

                if($pd->{'reason'} eq ''){
                    my $iter=$api->iterator(
                        list    => 'logevents',
                        letype  => 'protect',
                        letitle => $pg,
                    );
                    my $from='';
                    while(my $le=$iter->next){
                        if(!$le->{'_ok_'}){
                            $api->warn("Failed to retrieve protection log for $pg: ".$le->{'error'}."\n");
                            return 60;
                        }
                        if($le->{'action'} eq 'move_prot'){
                            $from="From [[:".$le->{'params'}{'oldtitle_title'}."]]: ";
                            $iter=$api->iterator(
                                list    => 'logevents',
                                letype  => 'protect',
                                letitle => $le->{'params'}{'oldtitle_title'},
                                lestart => $le->{'timestamp'},
                            );
                            next;
                        }
                        next unless exists($protact{$le->{'action'}});
                        $le->{'timestamp'}=~s/T.*//;
                        my $comment=$le->{'comment'};
                        $comment=~s/\s*\[[^]]*\](?: \(expires [^)]*\))?$//;
                        $pd->{'reason'}=$from.$protact{$le->{'action'}}.' by [[User:'.$le->{'user'}.'|'.$le->{'user'}.']] on '.$le->{'timestamp'}.': "'.esccomment($comment).'"';
                        last;
                    }
                }
                $pd->{'logtitle'}=$pg;
                $pd->{'isredir'}=defined( $p->{'redirect'} );

                # now fill in the rest
                my @keys = qw/prottype reason color logtitle/;
                while(my ($k, $pd2) = each %{$pages{$t}}) {
                    @{$pd2}{@keys} = @{$pd}{@keys};
                }
            }
        }

        # The formatting here is a little strange, for backwards compat
        my @pages=map { values %$_ } values %pages;
        my $txt = qq(<noinclude>{{User:AnomieBOT/PERTableHeader}}</noinclude>\n);
        $txt.=qq(<div class="veblenbot-pertable">\n);
        $txt.=qq(<templatestyles src="Template:Edit_fully-protected/color_legend/styles.css"/>\n);
        $txt.=qq({| class="wikitable" style="padding:0em"\n);
        $txt.=qq(|-\n);
        my $ct=scalar @pages;
        my $s=($ct==1?'':'s');
        my $pg='User:AnomieBOT/'.$tag.'Table';
        $txt.=qq(! <section begin="count" />$ct<section end="count" /> [[:Category:$cat|$type edit request$s]] {{v|$pg|h|style=float:right}}\n);
        $txt.=qq(|-\n);
        $txt.=qq(|\n);
        $txt.=qq({| class="wikitable sortable" width=100% style="margin:0em"\n);
        $txt.=qq(! Page\n);
        $txt.=qq(! Tagged since\n);
        for my $col (@$cols) {
            $txt.=sprintf( "! %s\n", $knowncols{$col} );
        }
        $txt.=qq(! Protection level\n);
        $txt.=qq(! class = "unsortable" | Last protection log entry\n);
        for my $p (sort { my $x = $a->{'touched'} <=> $b->{'touched'}; $x = $a->{'title'} cmp $b->{'title'} if $x == 0; return $x; } @pages){
            my $c=$p->{'color'};
            my $t=$p->{'title'};
            my $et=encodetitle($p->{'logtitle'});
            my $tt=$p->{'talk'};
            my $pt=$p->{'prottype'};
            my $r=$p->{'reason'};
            my $tl = $p->{'isredir'} ? "{{-r|1=$t}}" : "[[:$t]]";
            my $ttl = $p->{'reqisredir'} ? "{{-r|1=$tt#$tgt|2=request}}" : "[[$tt#$tgt|request]]";
            $txt.=qq(|- class="protectededit-legend-$c"\n);
            $txt.=qq(| $tl ($ttl)\n);
            $txt.=strftime("| %F %H:%M\n", gmtime $p->{'touched'});
            for my $col (@$cols) {
                $txt.=sprintf( "| %s\n", $p->{'extra'}{$col} // '' );
            }
            $txt.=qq(| $pt <span class="plainlinks">([//en.wikipedia.org/w/index.php?title=Special:Log&type=protect&page=$et log])</span>\n);
            $txt.=qq(| $r\n);
        }
        $txt.=qq(|}\n);
        $txt.=qq(|-\n);
        $txt.=qq(|style="text-align:right;font-size:smaller"| Updated as needed. Last updated: <!--TS-->~~~~~<!--/TS-->\n);
        $txt.=qq(|}</div>);

        my $tok=$api->edittoken($pg, EditRedir=>1);
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to get edit token for $pg: ".$tok->{'error'}."\n");
            next;
        }
        my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'}//'';
        $intxt=~s/^\s+|\s+$//;
        $intxt=~s#<!--TS-->.*<!--/TS-->#<!--TS-->~~~~~<!--/TS-->#g;
        if($intxt ne $txt){
            $api->log("Updating $pg ($ct request$s)");
            my $r=$api->edit($tok, $txt, "Update table ($ct request$s)", 0, 0);
            if($r->{'code'} ne 'success'){
                $api->warn("Write failed on $pg: ".$r->{'error'}."\n");
            }
        }
    }

    my $t=$starttime-time+300;
    $t=0 if $t<0;
    return $t;
}

sub warnBadRegex {
    my ($self, $api, $msg) = @_;

    $msg =~ s/\s+$//;
    my $file = __FILE__;
    $msg =~ s/ at \Q$file\E line \d+\.$//;

    if ( ! defined( $api->{'noedit'} ) ) {
        my $warned = $api->store->{'warnedBadRegex'};
        return if exists( $warned->{$msg} );
        $warned->{$msg} = 1;
        $api->store->{'warnedBadRegex'} = $warned;
    }

    $api->warn( "$msg\n" );
}

sub esccomment {
    my $c=shift;
    $c=~s/{/&#x7b;/g;
    $c=~s/</&lt;/g;
    $c=~s/>/&gt;/g;
    $c=~s/~/&#x7e;/g;
    $c=~s/\|\]\]/]]/g; # Pipe trick
    $c=~s/\[\[\|/[[/g; # Reverse pipe trick
    return $c;
}

sub encodetitle {
    my $t=shift;
    $t=~s/ /_/g;
    $t=uri_escape_utf8($t, '^A-Za-z0-9_\-.:/~');
    return $t;
}

sub replace_ns {
    my ($api,$ns)=@_;

    if($ns=~/^([+-]?[0-9]+)/){
        $ns=int($1);
    } else {
        $ns=~s/_/ /g;
        my %x=$api->namespace_map();
        $ns=$x{$ns} // undef;
        return undef unless defined($ns);
    }
    my %x=$api->namespace_reverse_map();
    return $x{$ns} // undef;
}

1;