User:Pppery/Cfdw.py
Appearance
Cfdw.py is a script to remove completed entries from WP:Categories for discussion/Working. It removes entries if all backlinks are one of:
- A talk page
- One of a long list of pages hardcoded to not count (or subpages of a similar list of pages)
- An article alerts page.
- A user CSD log or XfD log
- A page transcluding {{historical}}
- A WikiProject or user page that transcludes a subpage of either a subpage of Wikipedia:Version 1.0 Editorial Team (like WP:WikiProject Spain/Assessment). an WP:Article alerts page (like WP:WikiProject Chemistry), or a User:AlexNewArtBot page
Enteries may be added or removed from this ignore list as applicable to the specific run.
Current code:
import pywikibot
site = pywikibot.Site("en","wikipedia")
ignore = {"Wikipedia:Database reports",
"Wikipedia:Categories for discussion",
"User:Tim.landscheidt/Sandbox/User categories",
"Wikipedia:Version 1.0 Editorial Team",
"User:SDZeroBot/sandbox/CC",
"User:SDZeroBot/Category cycles",
"User:Random832/WantedCats",
"User:Wbm1058/Category history merges",
"User:ClueBot III/Detailed Indices",
"User:ClueBot III/Master Detailed Indices",
"Wikipedia:Articles for creation/Categories",
"Wikipedia:Articles for creation/Redirects and categories",
"Wikipedia:WikiProject English Language/Alerts",
"User:Qwerfjkl/preservedCategories",
"Wikipedia:AutoWikiBrowser/Tasks",
"User:AlexNewArtBot",
"Wikipedia:Categories for deletion",
"Wikipedia:WikiProject Stub sorting/Proposals/Archive",
"Wikipedia:Articles for deletion",
"Wikipedia:Help desk/Archives"
}
ignore2 = {
"User:RussBot/category redirect log",
"User:RevelationDirect/sandbox/CFDgeneral",
"Wikipedia:WikiProject Estonia/publicwatchlist",
"User:JAAqqO/List of WikiProject Finland articles",
"User:RevelationDirect/sandbox",
"Wikipedia:WikiProject Stub sorting/Stub type sizes",
"Wikipedia:WikiProject Stub sorting/Stub type sizes/data",
"User:Qwerfjkl/sandbox/51",
"User:Qwerfjkl/sandbox/47",
"Wikipedia:WikiProject Rugby league/Watch All",
"Wikipedia:WikiProject Rugby league/Other List",
"Wikipedia:WikiProject Yorkshire/Watch All",
"Wikipedia:WikiProject Yorkshire/Other List",
"Wikipedia:WikiProject Wales/Watch All",
"Wikipedia:WikiProject Wales/Other List",
"User:Liz/Test",
"User:Le Deluge/categories",
"Template:WPRU Announcements",
"User:Lugnuts/Categories I've Created",
"User:R'n'B/Unused category redirects",
"User:JAAqqO/List of WikiProject Finland articles (not tagged)",
"Wikipedia:WikiProject Olympics/Draftified Early Olympian Stub Categories",
"User:Bennylin/Lists/Categories",
"User:Ahecht/Watchlist backup",
"User:Aidan721/sandbox10",
'User:Habst/"Female" and "Male" categories',
"User:Amit6/sandbox02",
"User:Mr. Guye/Sandbox 2",
"User:Basilicofresco/Commons"
}
cr = pywikibot.Page(site, "Template:Category redirect")
def get_backlinks(page):
for backlink in page.backlinks():
tit = backlink.title()
if backlink.isTalkPage():
# Ignore talk pages
continue
elif"Article alerts" in tit or "XfD log" in tit or "CSD log" in tit:
continue
elif tit in ignore2:
# Ignore these specific pages
continue
check = "WikiProject" in tit or tit.startswith("User:")
for ign in ignore:
if tit.startswith(ign):
break
else:
# Ignore {{historical}} pages or WikiProject pages transcluding logs
for temp in backlink.itertemplates():
title = temp.title()
if title == "Template:Historical":
break
if check:
if title.startswith("Wikipedia:Version 1.0 Editorial Team"):
break
if title.endswith("Article alerts"):
break
if title.startswith("User:AlexNewArtBot/"):
break
else:
yield backlink
def can_remove(cat):
page = pywikibot.Category(site, cat)
if page.exists() and cr not in page.templates():
print(page, "not processed yet")
pass
elif not page.isEmptyCategory():
print(page, "not empty")
pass
else:
for bl in get_backlinks(page):
print("Not removing", page, "due to", bl)
return False
else:
print("Removing", page)
return True
def decode(line, first=True):
cat = None
cats = []
for sentinel in ("c","lc","cl","clc"):
for sentinel in sentinel,sentinel.capitalize():
sentinel = "{{"+sentinel+"|"
idx = -1
while True:
try:
idx = line.index(sentinel,idx+1)
except ValueError:
break
cat = "Category:"+line[idx+len(sentinel):line.index("}}", idx)]
cat = cat.replace("Category:Category:","Category:")
cats.append((idx, cat))
idx = -1
while True:
try:
idx = line.index("[[:Category:", idx+1)
except ValueError:
break
idx2 = line.index("]]", idx)
cat = line[idx+2:idx2]
if cat in (":Category:Name of category",
":Category:xxxxx",
":Category:yyyyy"):
# Ignore entirely
return None if first else []
else:
cats.append((idx, cat))
cats.sort()
if first:
return cats[0][1] if cats else None
else:
return cats
def main(page):
content = page.get()
lines = content.splitlines()
total = 0
for line in lines[:]:
cat = decode(line)
if cat:
if can_remove(cat):
lines.remove(line)
total += 1
if total:
#site.login(user="Pppery")
page.put("\n".join(lines), "Removing completed entries with no relevant backlinks (via [[User:Pppery/Cfdw.py]])")
else:
print("Nothing to do")
if __name__ == "__main__":
main(pywikibot.Page(site, "Wikipedia:Categories for discussion/Working"))