Usuari:KRLS/codi/treuCatInterwikisDinsCos.py
Aparença
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pywikibot, re
def main():
site = pywikibot.Site("ca", "wikipedia")
arts = pywikibot.Category(site, u"Articles amb interviquis dins del text").articlesList(recurse=1)
for art in arts:
print art.title
text = art.text
match = re.findall(u"(\[\[:.*?\]\])", text)
if not match:
current = text.replace("[[Categoria:Articles amb interviquis dins del text]]", "")
pywikibot.showDiff(text,current)
art.put(current, u'No té interwikis dins el text. Elimino la "Categoria:Articles amb interviquis dins del text". #QQ18')
else:
current = text
for elem in match:
ref = u'(<ref(?: name=(.*?)>)?.*?{0}.*?</ref>)'.format(re.escape(elem))
refMatch = re.findall(ref, current)
'''if not refMatch:
ref = u'(<ref>?.*?{0}.*?</ref>)'.format(re.escape(elem))
refMatch = re.findall(ref, current)'''
if refMatch:
refArray = refMatch[0]
tmp = current.replace(refArray[0], u"")
if refArray[1]:
tmp = tmp.replace(u'<ref name={0}/>'.format(refArray[1]), "")
pywikibot.showDiff(current,tmp)
answer = raw_input('Vols eliminar la referència sencera? [y] = Yes, [n] = No, [s] = Skip article\n'.format(refArray[0]))
if(answer == 'y'):
current = tmp
elif(answer == 's'):
break
else:
interwiki = re.findall('\[\[((:[a-z]{1,3}:).*?)\]\]', refArray[0])
splitted = interwiki[0][0].split('|')
if len(splitted) > 1:
tmp = refArray[0].replace(splitted[0] + '|', '')
tmp = current.replace(refArray[0], tmp)
pywikibot.showDiff(current,tmp)
answer = raw_input('Vols eliminar només l\'enllaç? [y] = Yes, [n] = No, [s] = Skip article\n')
if(answer == 'y'):
current = tmp
elif(answer == 's'):
break
else:
tmp = refArray[0].replace(interwiki[0][1], '')
tmp = current.replace(refArray[0], tmp)
pywikibot.showDiff(current,tmp)
answer = raw_input('Vols eliminar només l\'interwiki? [y] = Yes, [n] = No, [s] = Skip article\n')
if(answer == 'y'):
current = tmp
elif(answer == 's'):
break
else:
interwiki = re.findall(u"\[\[((:[a-z]{1,3}:)(.*?))\]\]", elem)
splitted = interwiki[0][0].split('|')
if len(splitted) > 1:
tmp = elem.replace(splitted[0] + '|', '')
tmp = current.replace(elem, tmp)
pywikibot.showDiff(current,tmp)
answer = raw_input('Vols eliminar només l\'enllaç? [y] = Yes, [n] = No, [s] = Skip article\n')
if(answer == 'y'):
current = tmp
elif(answer == 's'):
break
else:
tmp = elem.replace(interwiki[0][1], '')
tmp = current.replace(elem, tmp)
pywikibot.showDiff(current,tmp)
answer = raw_input('Vols eliminar només l\'interwiki? [y] = Yes, [n] = No, [s] = Skip article\n')
if(answer == 'y'):
current = tmp
match = re.findall(u"(\[\[:.*?\]\])", current)
if not match:
tmp = current.replace("[[Categoria:Articles amb interviquis dins del text]]", "")
current = tmp
if text != current:
pywikibot.showDiff(text,current)
answer = raw_input('Finalment deso? [y] = Yes, [n] = No\n')
if(answer == 'y'):
art.put(current, u'Retiro interwikis dins el text. #QQ18')
#cerca <ref></ref>
#cerca <ref name=> XXX </ref> i després eliminar <refname/>
#sinó eliminar simplement paraula:
#FIXME vols proposar un nom nou. i substitueix.
if __name__ == '__main__':
main()