Naudotojas:VP-bot/replacemetai.py
Išvaizda
# -*- coding: utf-8 -*- """ This bot will make direct text replacements. It will retrieve information on which pages might need changes either from an XML dump or a text file, or only change a single page. These command line parameters can be used to specify which pages to work on: ¶ms; -xml Retrieve information from a local XML dump (pages-articles or pages-meta-current, see http://download.wikimedia.org). Argument can also be given as "-xml:filename". -page Only edit a specific page. Argument can also be given as "-page:pagetitle". You can give this parameter multiple times to edit multiple pages. Furthermore, the following command line parameters are supported: -regex Make replacements using regular expressions. If this argument isn't given, the bot will make simple text replacements. -nocase Use case insensitive regular expressions. -dotall Make the dot match any character at all, including a newline. Without this flag, '.' will match anything except a newline. -multiline '^' and '$' will now match begin and end of each line. -xmlstart (Only works with -xml) Skip all articles in the XML dump before the one specified (may also be given as -xmlstart:Article). -addcat:cat_name Adds "cat_name" category to every altered page. -excepttitle:XYZ Skip pages with titles that contain XYZ. If the -regex argument is given, XYZ will be regarded as a regular expression. -requiretitle:XYZ Only do pages with titles that contain XYZ. If the -regex argument is given, XYZ will be regarded as a regular expression. -excepttext:XYZ Skip pages which contain the text XYZ. If the -regex argument is given, XYZ will be regarded as a regular expression. -exceptinside:XYZ Skip occurences of the to-be-replaced text which lie within XYZ. If the -regex argument is given, XYZ will be regarded as a regular expression. -exceptinsidetag:XYZ Skip occurences of the to-be-replaced text which lie within an XYZ tag. -summary:XYZ Set the summary message text for the edit to XYZ, bypassing the predefined message texts with original and replacements inserted. -sleep:123 If you use -fix you can check multiple regex at the same time in every page. This can lead to a great waste of CPU because the bot will check every regex without waiting using all the resources. This will slow it down between a regex and another in order not to waste too much CPU. -fix:XYZ Perform one of the predefined replacements tasks, which are given in the dictionary 'fixes' defined inside the file fixes.py. The -regex and -nocase argument and given replacements will be ignored if you use -fix. Currently available predefined fixes are: &fixes-help; -always Don't prompt you for each replacement -recursive Recurse replacement as long as possible. Be careful, this might lead to an infinite loop. -allowoverlap When occurences of the pattern overlap, replace all of them. Be careful, this might lead to an infinite loop. other: First argument is the old text, second argument is the new text. If the -regex argument is given, the first argument will be regarded as a regular expression, and the second argument might contain expressions like \\1 or \g<name>. Examples: If you want to change templates from the old syntax, e.g. {{msg:Stub}}, to the new syntax, e.g. {{Stub}}, download an XML dump file (pages-articles) from http://download.wikimedia.org, then use this command: python replace.py -xml -regex "{{msg:(.*?)}}" "{{\\1}}" If you have a dump called foobar.xml and want to fix typos in articles, e.g. Errror -> Error, use this: python replace.py -xml:foobar.xml "Errror" "Error" -namespace:0 If you have a page called 'John Doe' and want to fix the format of ISBNs, use: python replace.py -page:John_Doe -fix:isbn This command will change 'referer' to 'referrer', but not in pages which talk about HTTP, where the typo has become part of the standard: python replace.py referer referrer -file:typos.txt -excepttext:HTTP """ # # (C) Daniel Herding & the Pywikipediabot Team, 2004-2008 # # Distributed under the terms of the MIT license. # from __future__ import generators import os, sys, codecs, re, time import string import encodings from string import Template import wikipedia, pagegenerators, catlib, config import editarticle import webbrowser import metai import biografijos, ltasmen, politikai, valdovai, sportininkai, dvasininkai, teisininkai, prezidentai import pedagogai, karininkai, menininkai, mokslininkai # Imports predefined replacements tasks from fixes.py import fixes # This is required for the text that is shown when you run this script # with the parameter -help. docuReplacements = { '¶ms;': pagegenerators.parameterHelp, '&fixes-help;': fixes.help, } __version__='$Id: replace.py 6412 2009-02-22 16:13:01Z nicdumz $' # Summary messages in different languages # NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes' # below.`v msg = { 'ar': u'%s روبوت : استبدال تلقائي للنص', 'ca': u'Robot: Reemplaçament automàtic de text %s', 'cs': u'Robot automaticky nahradil text: %s', 'de': u'Bot: Automatisierte Textersetzung %s', 'el': u'Ρομπότ: Αυτόματη αντικατάσταση κειμένου %s', 'en': u'Robot: Automated text replacement %s', 'es': u'Robot: Reemplazo automático de texto %s', 'fa': u'ربات: تغییر خودکار متن %s', 'fr': u'Bot : Remplacement de texte automatisé %s', 'he': u'בוט: החלפת טקסט אוטומטית %s', 'hu': u'Robot: Automatikus szövegcsere %s', 'ia': u'Robot: Reimplaciamento automatic de texto %s', 'id': u'Bot: Penggantian teks otomatis %s', 'is': u'Vélmenni: breyti texta %s', 'it': u'Bot: Sostituzione automatica %s', 'ja': u'ロボットによる: 文字置き換え %s', 'ka': u'რობოტი: ტექსტის ავტომატური შეცვლა %s', 'kk': u'Бот: Мәтінді өздікті алмастырды: %s', 'ksh': u'Bot: hät outomatesch Täx jetuusch: %s', 'lt': u'robotas: Automatinis teksto keitimas %s', 'nds': u'Bot: Text automaatsch utwesselt: %s', 'nds-nl': u'Bot: autematisch tekse vervungen %s', 'nl': u'Bot: automatisch tekst vervangen %s', 'nn': u'robot: automatisk teksterstatning: %s', 'no': u'robot: automatisk teksterstatning: %s', 'pl': u'Robot automatycznie zamienia tekst %s', 'pt': u'Bot: Mudança automática %s', 'ru': u'Робот: Автоматизированная замена текста %s', 'sr': u'Бот: Аутоматска замена текста %s', 'sv': u'Bot: Automatisk textersättning: %s', 'zh': u'機器人:執行文字代換作業 %s', } class XmlDumpReplacePageGenerator: """ Iterator that will yield Pages that might contain text to replace. These pages will be retrieved from a local XML dump file. Arguments: * xmlFilename - The dump's path, either absolute or relative * xmlStart - Skip all articles in the dump before this one * replacements - A list of 2-tuples of original text (as a compiled regular expression) and replacement text (as a string). * exceptions - A dictionary which defines when to ignore an occurence. See docu of the ReplaceRobot constructor below. """ def __init__(self, xmlFilename, xmlStart, replacements, exceptions): self.xmlFilename = xmlFilename self.replacements = replacements self.exceptions = exceptions self.xmlStart = xmlStart self.skipping = bool(xmlStart) self.excsInside = [] if self.exceptions.has_key('inside-tags'): self.excsInside += self.exceptions['inside-tags'] if self.exceptions.has_key('inside'): self.excsInside += self.exceptions['inside'] import xmlreader self.site = wikipedia.getSite() dump = xmlreader.XmlDump(self.xmlFilename) self.parser = dump.parse() def __iter__(self): try: for entry in self.parser: if self.skipping: if entry.title != self.xmlStart: continue self.skipping = False if not self.isTitleExcepted(entry.title) \ and not self.isTextExcepted(entry.text): new_text = entry.text for old, new in self.replacements: new_text = wikipedia.replaceExcept(new_text, old, new, self.excsInside, self.site) if new_text != entry.text: yield wikipedia.Page(self.site, entry.title) except KeyboardInterrupt: try: if not self.skipping: wikipedia.output( u'To resume, use "-xmlstart:%s" on the command line.' % entry.title) except NameError: pass def isTitleExcepted(self, title): if self.exceptions.has_key('title'): for exc in self.exceptions['title']: if exc.search(title): return True if self.exceptions.has_key('require-title'): for req in self.exceptions['require-title']: if not req.search(title): # if not all requirements are met: return True return False def isTextExcepted(self, text): if self.exceptions.has_key('text-contains'): for exc in self.exceptions['text-contains']: if exc.search(text): return True if self.exceptions.has_key('require-text'): ret = True for exc in self.exceptions['require-text']: if exc.search(text): return False return ret return False def file_exists(filename): if os.path.exists(filename): print "'%s' already exists." % filename return True return False class ReplaceRobot: """ A bot that can do text replacements. """ def __init__(self, generator, replacements, exceptions={}, acceptall=False, allowoverlap=False, recursive=False, addedCat=None, sleep=None, biogr=None, biogr1=None, zbiogr=None, upe=None): """ Arguments: * generator - A generator that yields Page objects. * replacements - A list of 2-tuples of original text (as a compiled regular expression) and replacement text (as a string). * exceptions - A dictionary which defines when not to change an occurence. See below. * acceptall - If True, the user won't be prompted before changes are made. * allowoverlap - If True, when matches overlap, all of them are replaced. * addedCat - If set to a value, add this category to every page touched. Structure of the exceptions dictionary: This dictionary can have these keys: title A list of regular expressions. All pages with titles that are matched by one of these regular expressions are skipped. text-contains A list of regular expressions. All pages with text that contains a part which is matched by one of these regular expressions are skipped. inside A list of regular expressions. All occurences are skipped which lie within a text region which is matched by one of these regular expressions. inside-tags A list of strings. These strings must be keys from the exceptionRegexes dictionary in wikipedia.replaceExcept(). """ self.generator = generator self.replacements = replacements self.exceptions = exceptions self.acceptall = acceptall self.allowoverlap = allowoverlap self.recursive = recursive self.biogr = biogr self.biogr1 = biogr1 self.zbiogr = zbiogr self.upe = upe self.page = None if addedCat: site = wikipedia.getSite() cat_ns = site.category_namespaces()[0] self.addedCat = wikipedia.Page(site, cat_ns + ':' + addedCat) self.sleep = sleep self.ltasmencat = ltasmen.ltasmen() self.valdovaicat = valdovai.valdovai() self.politikaicat = politikai.politikai() self.sportininkaicat = sportininkai.sportininkai() self.dvasininkaicat = dvasininkai.dvasininkai() self.teisininkaicat = teisininkai.teisininkai() self.karininkaicat = karininkai.karininkai() self.menininkaicat = menininkai.menininkai() self.mokslininkaicat = mokslininkai.mokslininkai() self.pedagogaicat = pedagogai.pedagogai() self.prezidentaicat = prezidentai.prezidentai() #_fnf = os.path.join('', "biografija.txt") #if not file_exists(_fnf): # self.f = codecs.open(_fnf, "w", "utf-8") def isTitleExcepted(self, title): """ Iff one of the exceptions applies for the given title, returns True. """ if self.exceptions.has_key('title'): for exc in self.exceptions['title']: if exc.search(title): return True if self.exceptions.has_key('require-title'): for req in self.exceptions['require-title']: if not req.search(title): return True return False def isTextExcepted(self, original_text): """ Iff one of the exceptions applies for the given page contents, returns True. """ if self.exceptions.has_key('text-contains'): for exc in self.exceptions['text-contains']: if exc.search(original_text): return True if self.exceptions.has_key('require-title'): for req in self.exceptions['require-title']: if not req.search(title): # if not all requirements are met: return True if self.exceptions.has_key('require-text'): ret = True for exc in self.exceptions['require-text']: if exc.search(original_text): return False return ret return False def doReplacements(self, original_text): """ Returns the text which is generated by applying all replacements to the given text. """ new_text = original_text exceptions = [] if self.exceptions.has_key('inside-tags'): exceptions += self.exceptions['inside-tags'] if self.exceptions.has_key('inside'): exceptions += self.exceptions['inside'] #sab = re.compile(u'\{\{(?:(?:I|i)nfolentelė )?(?:U|u)pė(\|(?P<params>[^\{\}\|\=]*?))\|') for old, new in self.replacements: if self.sleep != None: time.sleep(self.sleep) if self.upe: if type(old) is str or type(old) is unicode: old1 = re.compile(old) else: old1 = old index = 0 #match = old1.search(new_text, index) #wikipedia.output(u'match %s :' %match) temp = self.page.templatesWithParams() #wikipedia.output(u'Templates %s :' % temp) #matchsab = sab.search(new_text, index) #if match == None: #if matchsab != None: # wikipedia.output(u'Templates %s :' % temp) # choice = wikipedia.inputChoice( # u'Not Found. Nead customize?', # ["Quit"], # ['q'], 'q') #else: # return original_text*/ find = False #if match: #if matchsab: tparm = re.compile(u'\ *(?P<name>.*?)\ *\=\ *(?P<value>.*?)\r?\n') for templname, parms in temp: if (templname == u'Upė') or (templname == u'upė') or (templname == u'Infolentelupė upė'): parms2 = [] for parm in parms: match0 = tparm.search(parm, 0) parms2.append( match0.group('value')) name = parms2[0] pav = parms2[1] pava = parms2[2] ilg = parms2[3] nuol = parms2[4] #choice = wikipedia.inputChoice( # u'Found, but not nead customize?', # ["Quit"], # ['q'], 'q') if nuol.strip(' ') == u'-' or nuol.strip(' ') == u'–' or nuol.strip(' ') == u'?': nuol = '' deb = parms2[5] plot = parms2[6] ist = parms2[7] ziot = parms2[8] sal = parms2[9] if nuol != '': nuol = '' else: return original_text #to = Template(u'{{Upė|${name}|${pav}|${pava}|${ilg}|${nuol}|${deb}|${plot}|${ist}|${ziot}|${sal}}}\n') #old1 = to.substitute(name=name, # pav=pav, # pava=pava, # ilg=ilg, # nuol=nuol, # deb=deb, # plot=plot, # ist=ist, # ziot=ziot, # sal=sal, # ) # t1 = Template(new) new = t1.substitute(name=name, pav=pav, pava=pava, ilg=ilg, nuol=nuol, deb=deb, plot=plot, ist=ist, ziot=ziot, sal=sal, ) #wikipedia.output(u'New %s :' % new) find = True break if not find: choice = wikipedia.inputChoice( u'Found, but not nead customize?', ["Quit"], ['q'], 'q') if self.biogr1: old1 = re.compile(old, re.VERBOSE) index = 0 match = old1.search(new_text, index) if match: mens = { '1': u'sausio', '2': u'vasario', '3': u'kovo', '4': u'balandžio', '5': u'gegužės', '6': u'birželio', '7': u'liepos', '8': u'rugpjūčio', '9': u'rugsėjo', '10': u'spalio', '11': u'lapkričio', '12': u'gruodžio', } mend = { '1': u'Sausio', '2': u'Vasario', '3': u'Kovo', '4': u'Balandžio', '5': u'Gegužės', '6': u'Birželio', '7': u'Liepos', '8': u'Rugpjūčio', '9': u'Rugsėjo', '10': u'Spalio', '11': u'Lapkričio', '12': u'Gruodžio', } men = { u'sausio': '1', u'vasario': '2', u'kovo': '3', u'balandžio': '4', u'gegužės': '5', u'birželio': '6', u'liepos': '7', u'rugpjūčio': '8', u'rugsėjo': '9', u'spalio': '10', u'lapkričio': '11', u'gruodžio': '12', u'Sausio': '1', u'Vasario': '2', u'Kovo': '3', u'Balandžio': '4', u'Gegužės': '5', u'Birželio': '6', u'Liepos': '7', u'Rugpjūčio': '8', u'Rugsėjo': '9', u'Spalio': '10', u'Lapkričio': '11', u'Gruodžio': '12', } gmet = match.group('gmet') gmen = match.group('gmen') gd = match.group('gd') gv = match.group('gv') mmet = match.group('mmet') mmen = match.group('mmen') md = match.group('md') mv = match.group('mv') gmet2 = match.group('gmet2') gmen2 = match.group('gmen2') gd2 = match.group('gd2') gv2 = match.group('gv2') mmet2 = match.group('mmet2') mmen2 = match.group('mmen2') md2 = match.group('md2') mv2 = match.group('mv2') pav = match.group('pav') name2 = match.group('name2') name3 = match.group('name3') veikl = match.group('veikl') text = match.group('text') gmenn = '' mmenn = '' if gmet == None: if gmet2 != None: gmet = gmet2 else: gmet = '' if gmen == None: if gmen2 != None: gmen = gmen2 if gmen != None: gmenn = men[gmen] if gd == None: if gd2 != None: gd = gd2 else: gd = '' if gv == None: if gv2 != None: gv = gv2 else: gv = '' if mmet == None: if mmet2 != None: mmet = mmet2 else: mmet = '' if mmen == None: if mmen2 != None: mmen = mmen2 if mmen != None: mmenn = men[mmen] if md == None: if md2 != None: md = md2 else: md = '' if mv == None: if mv2 != None: mv = mv2 else: mv = '' if pav == None: pav = '' if veikl == None: veikl = '' if name2 == None: name2 = '' if name3 == None: name3 = '' if text == None: text = '' if mmet=='': text2=\ '* \'\'\'[['+mend[gmenn]+' '+gd+']]\'\'\' d. - [['+self.page.title()+'|'+name2+']], '+veikl+'\n'+\ '* [['+gmet+']] - [['+self.page.title()+'|'+name2+']], '+veikl+'\n' else: text2=\ '* \'\'\'[['+mend[gmenn]+' '+gd+']]\'\'\' d. - [['+self.page.title()+'|'+name2+']], '+veikl+' (m. [['+mmet+ur']])\n'+\ '* [['+gmet+']] - [['+self.page.title()+'|'+name2+']], '+veikl+' (m. [['+mmet+ur']])\n'+\ '* \'\'\'[['+mend[mmenn]+' '+md+']]\'\'\' d. - [['+self.page.title()+'|'+name2+']], '+veikl+' (g. [['+gmet+ur']])\n'+\ '* [['+mmet+']] - [['+self.page.title()+'|'+name2+']], '+veikl+' (g. [['+gmet+ur']])\n' t1 = Template(new) new = t1.substitute(name=self.page.title(), gmet=gmet, gmen=gmenn, gd=gd, gv=gv, mmet=mmet, mmen=mmenn, md=md, mv=mv, pav=pav, veikl=veikl, name2=name2, name3=name3, text=text, text2=text2, ) if self.biogr: if type(old) is str or type(old) is unicode: old1 = re.compile(old) else: old1 = old index = 0 ltOrPs = 'Pasaulyje' category = wikipedia.getCategoryLinks(original_text,wikipedia.getSite()) ltparm = re.compile(u'^Lietuvos') for cat in category: catName = cat.titleWithoutNamespace() wikipedia.output(u'category : %s' % catName) matchlt = ltparm.search(catName, 0) if matchlt: ltOrPs = 'Lietuvoje' temp = self.page.templatesWithParams() find = False tparm = re.compile(u'\s*(?P<name>.*?)\s*(?P<ly>\=)\s*(?P<reik>.*)\r?\n?$') parms2 = {'pav':'', 'g-data':'', 'm-data':'', 'g-vieta':'', 'm-vieta':''} for templname, parms in temp: if (templname == u'Biografija') or (templname == u'biografija'): #wikipedia.output(u'Parms : %s' % parms) for parm in parms: #wikipedia.output(u'Parm : %s' % parm) match0 = tparm.search(parm, 0) #wikipedia.output(u'match0.groups : %s' % match0.groupdict()) #wikipedia.output(u'match0 : %s' % match0.group('reik')) parms2[match0.group('name')] = match0.group('reik') for key in parms2: wikipedia.output(u'Parm key : %s = %s' % (key, parms2[key])) pav = parms2['pav'] tmpl = re.compile(u'\[\[(((i|I)mage)|((v|V)aizdas))\:(?P<pav>.*?)\|') match = tmpl.search(pav, 0) if match: pav = match.group('pav') wikipedia.output(u'Pav is : %s' % pav) gd = metai.Data(data = parms2['g-data']) #wikipedia.output(u'gdType is : %s, %s, %s' % (type(gd), type(gd._metai), gd._metai.get())) md = metai.Data(data = parms2['m-data']) gv = parms2['g-vieta'] mv = parms2['m-vieta'] #self.f.write('Page : %s \n' % self.page.title()) #self.f.write(u'Parm key : %s = %s \n' % (key, parms2[key])) #wikipedia.output(u'Parm is : %s' % parms2[key]) if gd._metai: gmet = gd._metai.get() else: gmet = None gadd = gd._papild gmen = None if gd._menDien: gmen = gd._menDien._menuo.get() gd = gd._menDien._diena.get() else: gmen = None gd = None #wikipedia.output(u'gmet is : %s' % gmet) #wikipedia.output(u'gmen is : %s' % gmen) #wikipedia.output(u'gd is : %s' % gd) if md._metai: mmet = md._metai.get() else: mmet = None madd = md._papild mmen = None if md._menDien: mmen = md._menDien._menuo.get() md = md._menDien._diena.get() else: mmen = None md = None #return original_text match = old1.search(new_text, 0) if match: name2 = match.group('name2') veikl = match.group('veikl') text = match.group('text') skliaust = match.group('skliaust') if name2 == None: name2 = '' if text == None: text = '' tparms = re.compile(u'^(?P<name3>.*?)(\[\[(?P<gmet2>\d*?)\]\](?:\s*(?:m\.\s*)?\[\[(?P<gmen2>.*?)\ (?P<gd2>\d*?)\]\](?:\s*(d\.\s*)?)?)?(?P<gv2>.*?)((((\-\s*(m\.\s*)?)|(–\s*(m\.\s*)?)|(-\s*(m\.\s*)?)|(—\s*(m\.\s*)?)))\[\[(?P<mmet2>\d*?)\]\](?:\s*(?:m\.\s*)?\[\[(?P<mmen2>.*?)\ (?P<md2>\d*?)\]\](?:\s*(d\.\s*)?)?)?(?P<mv2>.*?))?)?$') matchs = tparms.search(skliaust, 0) if matchs: gmet2 = matchs.group('gmet2') gmen2 = matchs.group('gmen2') gd2 = matchs.group('gd2') gv2 = matchs.group('gv2') mmet2 = matchs.group('mmet2') mmen2 = matchs.group('mmen2') md2 = matchs.group('md2') mv2 = matchs.group('mv2') name3 = matchs.group('name3') if name3 == None: name3 = '' if gmet == None: if gmet2 != None: gmet = gmet2 if gmen == None: if gmen2 != None: gmen = gmen2 if gd == None: if gd2 != None: gd = gd2 if gv == None: if gv2 != None: gv = gv2 if mmet == None: if mmet2 != None: mmet = mmet2 if mmen == None: if mmen2 != None: mmen = mmen2 if md == None: if md2 != None: md = md2 if mv == None: if mv2 != None: mv = mv2 if pav == None: pav = '' asm = metai.Asmuo(name = self.page.title(), nameShow = name2,\ gData = metai.Data(metai = gmet, menuo = gmen, diena = gd), gVieta = gv,\ mData = metai.Data(metai = mmet, menuo = mmen, diena = md), mVieta = mv,\ what = veikl) parttype = 'I' if gmet is not None: gPageMetai = metai.PageMetai(gmet) if gmen is None: gmeni = 'Nedatuoti' parttype = 'MNI' nextind = self.page.title() else: gmeni = metai.Menuo(gmen).get(vard = True, upper=True) parttype = 'I' nextind = int(gd) ind = [ ltOrPs, 'Gimtadieniai', gmeni, nextind, self.page.title()] gime = asm.getGimeMetai(full=False) wikipedia.output(u'Itraukiam %s: \n%s' % (ind, gime)) gPageMetai.add(metai.PageMetaiPart(self.page.title(), parttype, gime, _ind=ind), ind) gPageMetai.join() wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % gmet) oldMet = gPageMetai.old newMet = gPageMetai.new #wikipedia.setAction('Gimtadienis:'+gime[0:100]) while True: wikipedia.showDiff(oldMet, newMet) choice = wikipedia.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', "Quit"], ['y', 'N', 'e', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(newMet) # if user didn't press Cancel if as_edited and as_edited != newMet: newMet = as_edited if choice == 'q': break if choice == 'N': break if choice == 'n': break if choice == 'y': gPageMetai.page.put_async(newMet, watchArticle=True, comment='Gimtadienis:'+gime[0:100]) break if mmet is not None: mPageMetai = metai.PageMetai(mmet) if mmen is None: mmeni = 'Nedatuoti' parttype = 'MNI' nextind = self.page.title() else: mmeni = metai.Menuo(mmen).get(vard = True, upper=True) parttype = 'I' nextind = int(md) ind = [ ltOrPs, 'Mirtys', mmeni, nextind, self.page.title()] mire = asm.getMireMetai(full=False) wikipedia.output(u'Itraukiam %s: \n%s' % (ind, mire)) mPageMetai.add(metai.PageMetaiPart(self.page.title(), parttype, mire, _ind=ind), ind) mPageMetai.join() wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % mmet) oldMet = mPageMetai.old newMet = mPageMetai.new #wikipedia.setAction('Mirtys:'+mire[0:100]) while True: wikipedia.showDiff(oldMet, newMet) choice = wikipedia.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', "Quit"], ['y', 'N', 'e', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(newMet) # if user didn't press Cancel if as_edited and as_edited != newMet: newMet = as_edited if choice == 'q': break if choice == 'N': break if choice == 'n': break if choice == 'y': mPageMetai.page.put_async(newMet, watchArticle=True, comment='Mirtys:'+mire[0:100]) break if mmet is None: text2= asm.getGimeData() t1 = Template(new) new = t1.substitute(name=self.page.title(), gd=asm.getGime(aslist = True), gv=asm._gVieta, gadd = gadd, md='', mv='', madd = madd, pav=pav, veikl=asm._what, name2=asm._nameShow, name3=name3, text=text, text2=text2, ) else: text2= asm.getGimeData() + asm.getMireData() t1 = Template(new) new = t1.substitute(name=self.page.title(), gd=asm.getGime(aslist = True), gv=asm._gVieta, gadd = gadd, md=asm.getMire(aslist = True), mv=asm._mVieta, madd = madd, pav=pav, veikl=asm._what, name2=asm._nameShow, name3=name3, text=text, text2=text2, ) else: raise TypeError('Blogo formato skliaustai: %s' % skliaust) if self.zbiogr: fonas = '' if type(old) is str or type(old) is unicode: old1 = re.compile(old) else: old1 = old index = 0 ltOrPs = 'Pasaulyje' category = wikipedia.getCategoryLinks(original_text,wikipedia.getSite()) #ltparm = re.compile(u'^Lietuvos') for cat in category: catName = cat.titleWithoutNamespace() wikipedia.output(u'category : %s' % catName) #matchlt = ltparm.search(catName, 0) if catName in self.ltasmencat: ltOrPs = 'Lietuvoje' ## temp = self.page.templatesWithParams() ## find = False ## tparm = re.compile(u'\s*(?P<name>.*?)\s*(?P<ly>\=)\s*(?P<reik>.*)\r?\n?$') ## parms2 = {'pav':'', 'g-data':'', 'm-data':'', 'g-vieta':'', 'm-vieta':''} ## for templname, parms in temp: ## if (templname == u'Biografija') or (templname == u'biografija'): ## #wikipedia.output(u'Parms : %s' % parms) ## for parm in parms: ## #wikipedia.output(u'Parm : %s' % parm) ## match0 = tparm.search(parm, 0) ## #wikipedia.output(u'match0.groups : %s' % match0.groupdict()) ## #wikipedia.output(u'match0 : %s' % match0.group('reik')) ## parms2[match0.group('name')] = match0.group('reik') ## ## for key in parms2: ## wikipedia.output(u'Parm key : %s = %s' % (key, parms2[key])) ## pav = parms2['pav'] ## tmpl = re.compile(u'\[\[(((i|I)mage)|((v|V)aizdas))\:(?P<pav>.*?)\|') ## match = tmpl.search(pav, 0) ## if match: ## pav = match.group('pav') ## wikipedia.output(u'Pav is : %s' % pav) ## gd = metai.Data(data = parms2['g-data']) ## #wikipedia.output(u'gdType is : %s, %s, %s' % (type(gd), type(gd._metai), gd._metai.get())) ## md = metai.Data(data = parms2['m-data']) ## gv = parms2['g-vieta'] ## mv = parms2['m-vieta'] ## #self.f.write('Page : %s \n' % self.page.title()) ## #self.f.write(u'Parm key : %s = %s \n' % (key, parms2[key])) ## #wikipedia.output(u'Parm is : %s' % parms2[key]) ## if gd._metai: ## gmet = gd._metai.get() ## else: ## gmet = None ## gadd = gd._papild ## gmen = None ## if gd._menDien: ## gmen = gd._menDien._menuo.get() ## gd = gd._menDien._diena.get() ## else: ## gmen = None ## gd = None ## #wikipedia.output(u'gmet is : %s' % gmet) ## #wikipedia.output(u'gmen is : %s' % gmen) ## #wikipedia.output(u'gd is : %s' % gd) ## if md._metai: ## mmet = md._metai.get() ## else: ## mmet = None ## madd = md._papild ## mmen = None ## if md._menDien: ## mmen = md._menDien._menuo.get() ## md = md._menDien._diena.get() ## else: ## mmen = None ## md = None ## #return original_text break for cat in category: catName = cat.titleWithoutNamespace() wikipedia.output(u'category : %s' % catName) if catName in self.valdovaicat: fonas = 'reg' new = u'{{Valdovas\n' +\ u'| fonas = ${fonas}\n' +\ u'| titulas = \n' +\ u'| kiti titulai = \n' +\ u'| dinastija = \n' +\ u'| vardas = ${names}\n' +\ u'| paveikslėlis = \n' +\ u'| paveikslėlio apibūdinimas = \n' +\ u'| paveikslėlio dydis = \n' +\ u'| gimimo data = ${gd}\n' +\ u'| gimimo vieta = ${gv}\n' +\ u'| mirties data = ${md}\n' +\ u'| mirties vieta = ${mv}\n' +\ u'| tautybė = \n' +\ u'| sutuoktinis = \n' +\ u'| tėvas = \n' +\ u'| motina = \n' +\ u'| vaikai = \n' +\ u'<!-- Veikla -->\n' +\ u'| veikla = ${veikl}\n' +\ u'<!-- Pasikartojanti dalis iki 10 kartu. Pasikartoja, jei nurodytas titulas su atitinkamu numeriu. Nuo 2 yra paslepiami. -->\n' +\ u'| titulas0 = \n' +\ u'| fonas0 = ${fonas}\n' +\ u'| kelintas0 = \n' +\ u'| valdė0 = \n' +\ u'| karūnavimas0 = \n' +\ u'| pirmtakas0 = \n' +\ u'| įpėdinis0 = \n' +\ u'| žinomas = \n' +\ u'| parašas = \n' +\ u'| pastabos = \n' +\ u'}}\n' +\ u'\'\'\'\g<name2>\'\'\' (\g<skliaust>) – \g<veikl>\n${text2}' break elif catName in self.prezidentaicat: fonas = 'off' new = u'{{Žmogaus biografija\n' +\ u'| fonas = ${fonas}\n' +\ u'| titulas = \n' +\ u'| kiti titulai = \n' +\ u'| vardas = ${names}\n' +\ u'| paveikslėlis = \n' +\ u'| paveikslėlio apibūdinimas = \n' +\ u'| paveikslėlio dydis = \n' +\ u'| gimimo data = ${gd}\n' +\ u'| gimimo vieta = ${gv}\n' +\ u'| mirties data = ${md}\n' +\ u'| mirties vieta = ${mv}\n' +\ u'| tautybė = \n' +\ u'| sutuoktinis = \n' +\ u'| tėvas = \n' +\ u'| motina = \n' +\ u'| vaikai = \n' +\ u'<!-- Veikla -->\n' +\ u'| veikla = ${veikl}\n' +\ u'| sritis = \n' +\ u'| įstaigos = \n' +\ u'| pareigos = \n' +\ u'| partija = \n' +\ u'<!-- Išsilavinimas -->\n' +\ u'| išsilavinimas = \n' +\ u'| alma_mater = \n' +\ u'| doktorantūros_vadovas = \n' +\ u'| studentai = \n' +\ u'| žinomas = \n' +\ u'| apdovanojimai = \n' +\ u'<!-- Pasikartojanti dalis iki 0-10, pasikartoja jei nurodytas titulas su atitinkamu numeriu. Nuo 2 yra paslepiami. -->\n' +\ u'| titulas0 = \n' +\ u'| fonas0 = ${fonas}\n' +\ u'| kelintas0 = \n' +\ u'| nuo0 = \n' +\ u'| iki0 = \n' +\ u'| vadovavo0 = \n' +\ u'| ėjo pareigas0 = \n' +\ u'| ankstesnis0 = \n' +\ u'| vėlesnis0 = \n' +\ u'| parašas = \n' +\ u'| pastabos = \n' +\ u'}}\n' +\ u'\'\'\'\g<name2>\'\'\' (\g<skliaust>) – \g<veikl>\n${text2}' break elif catName in self.politikaicat: fonas = 'off' new = u'{{Žmogaus biografija\n' +\ u'| fonas = ${fonas}\n' +\ u'| vardas = ${names}\n' +\ u'| paveikslėlis = \n' +\ u'| paveikslėlio apibūdinimas = \n' +\ u'| paveikslėlio dydis = \n' +\ u'| gimimo data = ${gd}\n' +\ u'| gimimo vieta = ${gv}\n' +\ u'| mirties data = ${md}\n' +\ u'| mirties vieta = ${mv}\n' +\ u'| tautybė = \n' +\ u'| sutuoktinis = \n' +\ u'| tėvas = \n' +\ u'| motina = \n' +\ u'| vaikai = \n' +\ u'<!-- Veikla -->\n' +\ u'| veikla = ${veikl}\n' +\ u'| sritis = \n' +\ u'| įstaigos = \n' +\ u'| pareigos = \n' +\ u'| partija = \n' +\ u'| žinomas = \n' +\ u'| apdovanojimai = \n' +\ u'| parašas = \n' +\ u'| pastabos = \n' +\ u'}}\n' +\ u'\'\'\'\g<name2>\'\'\' (\g<skliaust>) – \g<veikl>\n${text2}' break elif catName in self.dvasininkaicat: fonas = 'rel' new = u'{{Žmogaus biografija\n' +\ u'| fonas = ${fonas}\n' +\ u'| titulas = \n' +\ u'| kiti titulai = \n' +\ u'| vardas = ${names}\n' +\ u'| paveikslėlis = \n' +\ u'| paveikslėlio apibūdinimas = \n' +\ u'| paveikslėlio dydis = \n' +\ u'| gimimo data = ${gd}\n' +\ u'| gimimo vieta = ${gv}\n' +\ u'| mirties data = ${md}\n' +\ u'| mirties vieta = ${mv}\n' +\ u'| tautybė = \n' +\ u'| sutuoktinis = \n' +\ u'| tėvas = \n' +\ u'| motina = \n' +\ u'| vaikai = \n' +\ u'<!-- Veikla -->\n' +\ u'| veikla = ${veikl}\n' +\ u'| sritis = \n' +\ u'| įstaigos = \n' +\ u'| pareigos = \n' +\ u'<!-- Išsilavinimas -->\n' +\ u'| išsilavinimas = \n' +\ u'| alma_mater = \n' +\ u'| doktorantūros_vadovas = \n' +\ u'| studentai = \n' +\ u'| žinomas = \n' +\ u'| apdovanojimai = \n' +\ u'| religija = \n' +\ u'<!-- Šventasis -->\n' +\ u'<!-- | įšventintas = \n' +\ u'| palaimintas = \n' +\ u'| kanonizuotas = \n' +\ u'| kanonizavo = \n' +\ u'| šventė = \n' +\ u'| globėjas = -->\n' +\ u'<!--| popiežiaus tarnystės pradžia = \n' +\ u'| popiežiaus tarnystės pabaiga = -->\n' +\ u'<!-- Pasikartojanti dalis iki 0-10, pasikartoja jei nurodytas titulas su atitinkamu numeriu. Nuo 2 yra paslepiami. -->\n' +\ u'| titulas0 = \n' +\ u'| fonas0 = ${fonas}\n' +\ u'| kelintas0 = \n' +\ u'| ėjo pareigas0 = \n' +\ u'| ankstesnis0 = \n' +\ u'| vėlesnis0 = \n' +\ u'| parašas = \n' +\ u'| pastabos = \n' +\ u'}}\n' +\ u'\'\'\'\g<name2>\'\'\' (\g<skliaust>) – \g<veikl>\n${text2}' break elif catName in self.sportininkaicat: fonas = 'sports' new = u'{{Žmogaus biografija\n' +\ u'| fonas = ${fonas}\n' +\ u'| vardas = ${names}\n' +\ u'| paveikslėlis = \n' +\ u'| paveikslėlio apibūdinimas = \n' +\ u'| paveikslėlio dydis = \n' +\ u'| gimimo data = ${gd}\n' +\ u'| gimimo vieta = ${gv}\n' +\ u'| mirties data = ${md}\n' +\ u'| mirties vieta = ${mv}\n' +\ u'| tautybė = \n' +\ u'| sutuoktinis = \n' +\ u'| tėvas = \n' +\ u'| motina = \n' +\ u'| vaikai = \n' +\ u'<!-- Veikla -->\n' +\ u'| veikla = ${veikl}\n' +\ u'| sritis = \n' +\ u'| įstaigos = \n' +\ u'| pareigos = \n' +\ u'<!-- Išsilavinimas -->\n' +\ u'| išsilavinimas = \n' +\ u'| alma_mater = \n' +\ u'| doktorantūros_vadovas = \n' +\ u'| studentai = \n' +\ u'| žinomas = \n' +\ u'| apdovanojimai = \n' +\ u'| parašas = \n' +\ u'| pastabos = \n' +\ u'}}\n' +\ u'\'\'\'\g<name2>\'\'\' (\g<skliaust>) – \g<veikl>\n${text2}' break elif catName in self.teisininkaicat: fonas = 'legal' new = u'{{Žmogaus biografija\n' +\ u'| fonas = ${fonas}\n' +\ u'| vardas = ${names}\n' +\ u'| paveikslėlis = \n' +\ u'| paveikslėlio apibūdinimas = \n' +\ u'| paveikslėlio dydis = \n' +\ u'| gimimo data = ${gd}\n' +\ u'| gimimo vieta = ${gv}\n' +\ u'| mirties data = ${md}\n' +\ u'| mirties vieta = ${mv}\n' +\ u'| tautybė = \n' +\ u'| sutuoktinis = \n' +\ u'| tėvas = \n' +\ u'| motina = \n' +\ u'| vaikai = \n' +\ u'<!-- Veikla -->\n' +\ u'| veikla = ${veikl}\n' +\ u'| sritis = \n' +\ u'| įstaigos = \n' +\ u'| pareigos = \n' +\ u'<!-- Išsilavinimas -->\n' +\ u'| išsilavinimas = \n' +\ u'| alma_mater = \n' +\ u'| doktorantūros_vadovas = \n' +\ u'| studentai = \n' +\ u'| žinomas = \n' +\ u'| apdovanojimai = \n' +\ u'| parašas = \n' +\ u'| pastabos = \n' +\ u'}}\n' +\ u'\'\'\'\g<name2>\'\'\' (\g<skliaust>) – \g<veikl>\n${text2}' break elif catName in self.karininkaicat: fonas = 'mil' new = u'{{Žmogaus biografija\n' +\ u'| fonas = ${fonas}\n' +\ u'| vardas = ${names}\n' +\ u'| paveikslėlis = \n' +\ u'| paveikslėlio apibūdinimas = \n' +\ u'| paveikslėlio dydis = \n' +\ u'| gimimo data = ${gd}\n' +\ u'| gimimo vieta = ${gv}\n' +\ u'| mirties data = ${md}\n' +\ u'| mirties vieta = ${mv}\n' +\ u'| tautybė = \n' +\ u'| sutuoktinis = \n' +\ u'| tėvas = \n' +\ u'| motina = \n' +\ u'| vaikai = \n' +\ u'<!-- Veikla -->\n' +\ u'| veikla = ${veikl}\n' +\ u'| sritis = \n' +\ u'| įstaigos = \n' +\ u'| pareigos = \n' +\ u'<!-- Išsilavinimas -->\n' +\ u'| išsilavinimas = \n' +\ u'| alma_mater = \n' +\ u'| doktorantūros_vadovas = \n' +\ u'| studentai = \n' +\ u'| žinomas = \n' +\ u'| apdovanojimai = \n' +\ u'| parašas = \n' +\ u'| pastabos = \n' +\ u'}}\n' +\ u'\'\'\'\g<name2>\'\'\' (\g<skliaust>) – \g<veikl>\n${text2}' break elif catName in self.menininkaicat: fonas = 'culture' new = u'{{Žmogaus biografija\n' +\ u'| fonas = ${fonas}\n' +\ u'| vardas = ${names}\n' +\ u'| paveikslėlis = \n' +\ u'| paveikslėlio apibūdinimas = \n' +\ u'| paveikslėlio dydis = \n' +\ u'| gimimo data = ${gd}\n' +\ u'| gimimo vieta = ${gv}\n' +\ u'| mirties data = ${md}\n' +\ u'| mirties vieta = ${mv}\n' +\ u'| tautybė = \n' +\ u'| sutuoktinis = \n' +\ u'| tėvas = \n' +\ u'| motina = \n' +\ u'| vaikai = \n' +\ u'<!-- Veikla -->\n' +\ u'| veikla = ${veikl}\n' +\ u'| sritis = \n' +\ u'| įstaigos = \n' +\ u'| pareigos = \n' +\ u'<!-- Išsilavinimas -->\n' +\ u'| išsilavinimas = \n' +\ u'| alma_mater = \n' +\ u'| doktorantūros_vadovas = \n' +\ u'| studentai = \n' +\ u'| žinomas = \n' +\ u'| apdovanojimai = \n' +\ u'| parašas = \n' +\ u'| pastabos = \n' +\ u'}}\n' +\ u'\'\'\'\g<name2>\'\'\' (\g<skliaust>) – \g<veikl>\n${text2}' break elif catName in self.pedagogaicat: fonas = 'edu' new = u'{{Žmogaus biografija\n' +\ u'| fonas = ${fonas}\n' +\ u'| vardas = ${names}\n' +\ u'| paveikslėlis = \n' +\ u'| paveikslėlio apibūdinimas = \n' +\ u'| paveikslėlio dydis = \n' +\ u'| gimimo data = ${gd}\n' +\ u'| gimimo vieta = ${gv}\n' +\ u'| mirties data = ${md}\n' +\ u'| mirties vieta = ${mv}\n' +\ u'| tautybė = \n' +\ u'| sutuoktinis = \n' +\ u'| tėvas = \n' +\ u'| motina = \n' +\ u'| vaikai = \n' +\ u'<!-- Veikla -->\n' +\ u'| veikla = ${veikl}\n' +\ u'| sritis = \n' +\ u'| įstaigos = \n' +\ u'| pareigos = \n' +\ u'<!-- Išsilavinimas -->\n' +\ u'| išsilavinimas = \n' +\ u'| alma_mater = \n' +\ u'| doktorantūros_vadovas = \n' +\ u'| studentai = \n' +\ u'| žinomas = \n' +\ u'| apdovanojimai = \n' +\ u'| parašas = \n' +\ u'| pastabos = \n' +\ u'}}\n' +\ u'\'\'\'\g<name2>\'\'\' (\g<skliaust>) – \g<veikl>\n${text2}' break else: fonas = '' new = u'{{Žmogaus biografija\n' +\ u'| fonas = ${fonas}\n' +\ u'| vardas = ${names}\n' +\ u'| paveikslėlis = \n' +\ u'| paveikslėlio apibūdinimas = \n' +\ u'| paveikslėlio dydis = \n' +\ u'| gimimo data = ${gd}\n' +\ u'| gimimo vieta = ${gv}\n' +\ u'| mirties data = ${md}\n' +\ u'| mirties vieta = ${mv}\n' +\ u'| tautybė = \n' +\ u'| sutuoktinis = \n' +\ u'| tėvas = \n' +\ u'| motina = \n' +\ u'| vaikai = \n' +\ u'<!-- Veikla -->\n' +\ u'| veikla = ${veikl}\n' +\ u'| sritis = \n' +\ u'| įstaigos = \n' +\ u'| pareigos = \n' +\ u'<!-- Išsilavinimas -->\n' +\ u'| išsilavinimas = \n' +\ u'| alma_mater = \n' +\ u'| doktorantūros_vadovas = \n' +\ u'| studentai = \n' +\ u'| žinomas = \n' +\ u'| apdovanojimai = \n' +\ u'| parašas = \n' +\ u'| pastabos = \n' +\ u'}}\n' +\ u'\'\'\'\g<name2>\'\'\' (\g<skliaust>) – \g<veikl>\n${text2}' gmet = None gmen = None gd = None gv = None mmet = None mmen = None md = None mv = None match = old1.search(new_text, 0) if match: name2 = match.group('name2') veikl = match.group('veikl') text = match.group('text') skliaust = match.group('skliaust') if name2 == None: name2 = '' else: name2 = name2.strip(' \r\n\t') if text == None: text = '' tparms = re.compile(u'^(?P<name3>.*?)(((g\.)|(gimė))\s*)?(\[\[(?P<gmet2>\d*(\ m\.)?(\ pr\.\ m\.\ e\.)?)(\|\d*)?\]\](?:\s*)(?:m\.\s*)?(\[\[(?P<gmen2>[^\ ]*?)\ (?P<gd2>\d*?)\]\](?:\s*(d\.\s*)?)?)?(?P<gv2>.*?)(((((\-\s*)|(–\s*)|(-\s*)|(—\s*)|(−\s*)|(–\s*))(((m\.)|(mirė))\s*)?)|(†\s*))\[\[(?P<mmet2>\d*?(\ m\.)?(\ pr\.\ m\.\ e\.)?)\]\](?:\s*)(?:m\.\s*)?(\[\[(?P<mmen2>[^\ ]*?)\ (?P<md2>\d*?)\]\](?:\s*(d\.\s*)?)?)?(?P<mv2>.*?))?)?$') matchs = tparms.search(skliaust, 0) if matchs: gmet2 = matchs.group('gmet2') gmen2 = matchs.group('gmen2') gd2 = matchs.group('gd2') gv2 = matchs.group('gv2') mmet2 = matchs.group('mmet2') mmen2 = matchs.group('mmen2') md2 = matchs.group('md2') mv2 = matchs.group('mv2') name3 = matchs.group('name3') if name3 == None: name3 = '' else: name3 = name3.strip(' \r\n\t') if gmet == None: if gmet2 != None: gmet = gmet2 if gmen == None: if gmen2 != None: gmen = gmen2 if gd == None: if gd2 != None: gd = gd2 if gv == None: if gv2 != None: gv = gv2 if mmet == None: if mmet2 != None: mmet = mmet2 if mmen == None: if mmen2 != None: mmen = mmen2 if md == None: if md2 != None: md = md2 if mv == None: if mv2 != None: mv = mv2 ## if pav == None: ## pav = '' names = self.page.title() if names != name2: names += '<br/>' + name2 if name3 not in (u'', u'g.', u'gimė'): names += '<br/>' + name3 ltcats = [] ltcat = catlib asm = metai.Asmuo(name = self.page.title(), nameShow = name2,\ gData = metai.Data(metai = gmet, menuo = gmen, diena = gd), gVieta = gv,\ mData = metai.Data(metai = mmet, menuo = mmen, diena = md), mVieta = mv,\ what = veikl) parttype = 'I' if gmet is not None: gPageMetai = metai.PageMetai(gmet) if gmen is None: gmeni = 'Nedatuoti' parttype = 'MNI' nextind = self.page.title() else: gmeni = metai.Menuo(gmen).get(vard = True, upper=True) parttype = 'I' nextind = int(gd) ind = [ ltOrPs, 'Gimtadieniai', gmeni, nextind, self.page.title()] gime = asm.getGimeMetai(full=False) wikipedia.output(u'Itraukiam %s: \n%s' % (ind, gime)) gPageMetai.add(metai.PageMetaiPart(self.page.title(), parttype, gime, _ind=ind), ind) gPageMetai.join() wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % gmet) oldMet = gPageMetai.old newMet = gPageMetai.new #wikipedia.setAction('Gimtadienis:'+gime[0:100]) while True: wikipedia.showDiff(oldMet, newMet) choice = wikipedia.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', "Quit"], ['y', 'N', 'e', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(newMet) # if user didn't press Cancel if as_edited and as_edited != newMet: newMet = as_edited if choice == 'q': break if choice == 'N': break if choice == 'n': break if choice == 'y': gPageMetai.page.put_async(newMet, watchArticle=True, comment='Gimtadienis:'+gime[0:100]) break if mmet is not None: mPageMetai = metai.PageMetai(mmet) if mmen is None: mmeni = 'Nedatuoti' parttype = 'MNI' nextind = self.page.title() else: mmeni = metai.Menuo(mmen).get(vard = True, upper=True) parttype = 'I' nextind = int(md) ind = [ ltOrPs, 'Mirtys', mmeni, nextind, self.page.title()] mire = asm.getMireMetai(full=False) wikipedia.output(u'Itraukiam %s: \n%s' % (ind, mire)) mPageMetai.add(metai.PageMetaiPart(self.page.title(), parttype, mire, _ind=ind), ind) mPageMetai.join() wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % mmet) oldMet = mPageMetai.old newMet = mPageMetai.new #wikipedia.setAction('Mirtys:'+mire[0:100]) while True: wikipedia.showDiff(oldMet, newMet) choice = wikipedia.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', "Quit"], ['y', 'N', 'e', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(newMet) # if user didn't press Cancel if as_edited and as_edited != newMet: newMet = as_edited if choice == 'q': break if choice == 'N': break if choice == 'n': break if choice == 'y': mPageMetai.page.put_async(newMet, watchArticle=True, comment='Mirtys:'+mire[0:100]) break if mmet is None: text2= asm.getGimeData() if text2 is None: text2 = '' t1 = Template(new) new = t1.substitute(#name=self.page.title(), gd=asm.getGime(aslist = True), gv=asm._gVieta, #gadd = gadd, md='', mv='', #madd = madd, fonas=fonas, veikl=asm._what, names=names, #name3=name3, #text=text, text2=text2, ) else: text2= asm.getGimeData() + asm.getMireData() t1 = Template(new) new = t1.substitute(#name=self.page.title(), gd=asm.getGime(aslist = True), gv=asm._gVieta, #gadd = gadd, md=asm.getMire(aslist = True), mv=asm._mVieta, #madd = madd, fonas=fonas, veikl=asm._what, names=names, #name3=name3, #text=text, text2=text2, ) else: raise TypeError('Blogo formato skliaustai: %s' % skliaust) ## else: ## wikipedia.output(u'Nerasta : %s' % new_text) ## raise TypeError('Nerasta:') new_text = wikipedia.replaceExcept(new_text, old, new, exceptions, allowoverlap=self.allowoverlap) return new_text def run(self): """ Starts the robot. """ # Run the generator which will yield Pages which might need to be # changed. for self.page in self.generator: wikipedia.output(u'Page %s begin:' % self.page.title()) if self.isTitleExcepted(self.page.title()): wikipedia.output( u'Skipping %s because the title is on the exceptions list.' % self.page.aslink()) continue try: # Load the page's text from the wiki original_text = self.page.get(get_redirect=True) if not self.page.canBeEdited(): wikipedia.output(u"You can't edit page %s" % self.page.aslink()) continue except wikipedia.NoPage: wikipedia.output(u'Page %s not found' % self.page.aslink()) continue new_text = original_text while True: if self.isTextExcepted(new_text): wikipedia.output( u'Skipping %s because it contains text that is on the exceptions list.' % self.page.aslink()) break new_text = self.doReplacements(new_text) if new_text == original_text: wikipedia.output('No changes were necessary in %s' % self.page.aslink()) break if self.recursive: newest_text = self.doReplacements(new_text) while (newest_text!=new_text): new_text = newest_text newest_text = self.doReplacements(new_text) if hasattr(self, "addedCat"): cats = self.page.categories(nofollow_redirects=True) if self.addedCat not in cats: cats.append(self.addedCat) new_text = wikipedia.replaceCategoryLinks(new_text, cats) # Show the title of the page we're working on. # Highlight the title in purple. #wikipedia.setAction('{{biografija}} pakeitimas') while True: wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % self.page.title()) wikipedia.showDiff(original_text, new_text) if self.acceptall: break choice = wikipedia.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', 'open in Browser', 'All', "Quit"], ['y', 'N', 'e', 'b', 'a', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(new_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'b': webbrowser.open("http://%s%s" % ( self.page.site().hostname(), self.page.site().nice_get_address(self.page.title()) )) wikipedia.input("Press Enter when finished in browser.") original_text = self.page.get(get_redirect=True, force=True) new_text = original_text continue if choice == 'q': return if choice == 'n': new_text = original_text break if choice == 'a': self.acceptall = True if choice == 'y': self.page.put_async(new_text, watchArticle=True, comment='{{biografija}} pakeitimas:'+self.page.title()) new_text = original_text break # choice must be 'N' break if self.acceptall and new_text != original_text: try: self.page.put(new_text, watchArticle=True) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (self.page.title(),)) except wikipedia.SpamfilterError, e: wikipedia.output( u'Cannot change %s because of blacklist entry %s' % (self.page.title(), e.url)) except wikipedia.PageNotSaved, error: wikipedia.output(u'Error putting page: %s' % (error.args,)) except wikipedia.LockedPage: wikipedia.output(u'Skipping %s (locked page)' % (self.page.title(),)) def prepareRegexForMySQL(pattern): pattern = pattern.replace('\s', '[:space:]') pattern = pattern.replace('\d', '[:digit:]') pattern = pattern.replace('\w', '[:alnum:]') pattern = pattern.replace("'", "\\" + "'") #pattern = pattern.replace('\\', '\\\\') #for char in ['[', ']', "'"]: # pattern = pattern.replace(char, '\%s' % char) return pattern def main(*args): add_cat = None gen = None # summary message summary_commandline = None # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'require-text': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fix = None fixCountryKas = None fixCountryKa = None fixCountryKo = None fixdalis = 'vks' fixform = '1a-ti-a-e' fixs1 = u'mir' fixs2 = u'mir\u0161t' fixs3 = u'mir' # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False PageTitles = [] # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() # Load default summary message. # BUG WARNING: This is probably incompatible with the -lang parameter. wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg)) # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None biogr = None upe = None #Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Read commandline parameters. for arg in wikipedia.handleArgs(*args): if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = wikipedia.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = wikipedia.input( u'Please enter the XML dump\'s filename:') else: xmlFilename = arg[5:] elif arg =='-sql': useSql = True elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append(wikipedia.input( u'Which page do you want to change?')) else: PageTitles.append(arg[6:]) elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fix = arg[5:] elif arg.startswith('-kas:'): fixCountryKas = arg[5:] elif arg.startswith('-ka:'): fixCountryKa = arg[4:] elif arg.startswith('-ko:'): fixCountryKo = arg[4:] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[len('addcat:'):] elif arg.startswith('-summary:'): wikipedia.setAction(arg[len('-summary:'):]) summary_commandline = True elif arg.startswith('-allowoverlap'): allowoverlap = True else: if not genFactory.handleArg(arg): commandline_replacements.append(arg) if fixCountryKa == 'zaid': fixCountryKa = u'\u017eaid' if fixCountryKo == 'CRC': fixCountryKo = u'Kosta Rikos' if fixCountryKo == 'TRI': fixCountryKo = u'Trinidado ir Tobago' if fixCountryKo == 'KOR': fixCountryKo = u'Piet\u0173 Kor\u0117jos' if fixCountryKo == 'KSA': fixCountryKo = u'Saudo Arabijos' if fixCountryKo == 'GUI': fixCountryKo = u'Gvin\u0117jos' if fixCountryKo == 'CMR': fixCountryKo = u'Kamer\u016bno' if fixCountryKo == 'CIV': fixCountryKo = u'Dramblio Kaulo Kranto' if fixCountryKo == 'SLO': fixCountryKo = u'Slov\u0117nijos' if fixCountryKo == 'SMR': fixCountryKo = 'San Marino' if fixCountryKo == 'LIE': fixCountryKo = u'Lichten\u0161teino' if fixCountryKo == 'BIH': fixCountryKo = 'Bosnijos ir Hercegovinos' if fixCountryKo == 'ARM': fixCountryKo = u'Arm\u0117nijos' if fixCountryKo == 'CZE': fixCountryKo = u'\u010cekijos' if fixCountryKo == 'FRA': fixCountryKo = u'Pranc\u016bzijos' if fixCountryKo == 'AZE': fixCountryKo = u'Azerbaid\u017eano' if fixCountryKo == 'RSA': fixCountryKo = u'Piet\u0173 Afrikos Respublikos' if fixCountryKo == 'CH': fixCountryKo = u'\u0160veicarijos' if fixCountryKo == 'FRO': fixCountryKo = u'Farer\u0173' elif fixCountryKo == 'SCO': fixCountryKo = u'\u0160kotijos' elif fixCountryKo == 'SWE': fixCountryKo = u'\u0160vedijos' elif fixCountryKo == 'NED': fixCountryKo = u'Nyderland\u0173' elif fixCountryKo == 'SR': fixCountryKo = u'Senov\u0117s Romos' elif fixCountryKo == 'UK': fixCountryKo = u'Jungtin\u0117s Karalyst\u0117s' if fix == 'upe': upe = True if fix == 'biogr': biogr = True if fix == 'stub': if fixCountryKas != None and fixCountryKa != None and fixCountryKo != None: fixes.stubCoutry(fixCountryKas,fixCountryKo,fixCountryKa) fix = fix + '-' + fixCountryKas elif fix == 'fut': if fixCountryKas != None and fixCountryKa != None and fixCountryKo != None: fixes.futCoutry(fixCountryKas,fixCountryKo,fixCountryKa) fix = fix + '-' + fixCountryKas elif fix == 'krep': if fixCountryKas != None and fixCountryKa != None and fixCountryKo != None: fixes.krepCoutry(fixCountryKas,fixCountryKo,fixCountryKa) fix = fix + '-' + fixCountryKas elif fix == 'ledr': if fixCountryKas != None and fixCountryKa != None and fixCountryKo != None: fixes.ledrCoutry(fixCountryKas,fixCountryKo,fixCountryKa) fix = fix + '-' + fixCountryKas if (len(commandline_replacements) % 2): raise wikipedia.Error, 'require even number of replacements.' elif (len(commandline_replacements) == 2 and fix == None): replacements.append((commandline_replacements[0], commandline_replacements[1])) if summary_commandline == None: wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg ) % (' (-' + commandline_replacements[0] + ' +' + commandline_replacements[1] + ')')) elif (len(commandline_replacements) > 1): if (fix == None): for i in xrange (0, len(commandline_replacements), 2): replacements.append((commandline_replacements[i], commandline_replacements[i + 1])) if summary_commandline == None: pairs = [( commandline_replacements[i], commandline_replacements[i + 1] ) for i in range(0, len(commandline_replacements), 2)] replacementsDescription = '(%s)' % ', '.join( [('-' + pair[0] + ' +' + pair[1]) for pair in pairs]) wikipedia.setAction( wikipedia.translate(wikipedia.getSite(), msg ) % replacementsDescription) else: raise wikipedia.Error( 'Specifying -fix with replacements is undefined') elif fix == None: old = wikipedia.input(u'Please enter the text that should be replaced:') new = wikipedia.input(u'Please enter the new text:') change = '(-' + old + ' +' + new replacements.append((old, new)) while True: old = wikipedia.input( u'Please enter another text that should be replaced, or press Enter to start:') if old == '': change = change + ')' break new = wikipedia.input(u'Please enter the new text:') change = change + ' & -' + old + ' +' + new replacements.append((old, new)) if not summary_commandline == True: default_summary_message = wikipedia.translate(wikipedia.getSite(), msg) % change wikipedia.output(u'The summary message will default to: %s' % default_summary_message) summary_message = wikipedia.input( u'Press Enter to use this default message, or enter a description of the\nchanges your bot will make:') if summary_message == '': summary_message = default_summary_message wikipedia.setAction(summary_message) else: # Perform one of the predefined actions. try: fix = fixes.fixes[fix] except KeyError: wikipedia.output(u'Available predefined fixes are: %s' % fixes.fixes.keys()) return if fix.has_key('regex'): regex = fix['regex'] if fix.has_key('msg'): wikipedia.setAction( wikipedia.translate(wikipedia.getSite(), fix['msg'])) if fix.has_key('exceptions'): exceptions = fix['exceptions'] if fix.has_key('nocase'): caseInsensitive = fix['nocase'] replacements = fix['replacements'] # Pre-compile all regular expressions here to save time later for i in range(len(replacements)): old, new = replacements[i] if not regex: old = re.escape(old) oldR = re.compile(old, flags) replacements[i] = oldR, new for exceptionCategory in ['title', 'require-title', 'text-contains', 'inside', 'require-text']: if exceptions.has_key(exceptionCategory): patterns = exceptions[exceptionCategory] if not regex: patterns = [re.escape(pattern) for pattern in patterns] patterns = [re.compile(pattern, flags) for pattern in patterns] exceptions[exceptionCategory] = patterns if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions) elif useSql: whereClause = 'WHERE (%s)' % ' OR '.join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern) for (old, new) in replacements]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions]) else: exceptClause = '' query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) elif PageTitles: pages = [wikipedia.Page(wikipedia.getSite(), PageTitle) for PageTitle in PageTitles] gen = iter(pages) gen = genFactory.getCombinedGenerator(gen) if not gen: # syntax error, show help text from the top of this file wikipedia.showHelp('replace') return if xmlFilename: # XML parsing can be quite slow, so use smaller batches and # longer lookahead. preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=20, lookahead=100) else: preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=60) bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, zbiogr = biogr, upe = upe) bot.run() if __name__ == "__main__": try: main() finally: wikipedia.stopme()