User:Yinweichen/update names.py

维基百科,自由的百科全书
# -*- coding: utf-8  -*-

# Winston Yin 2014-12-19
# Takes in a txt file of list of new names for minor planets
# and makes replacements on Wikipedia. Text file is made from
# copying from MPC pdf file.

import pywikibot

class color:
    cyan = '\033[36m'
    green = '\033[92m'
    red = '\033[31m'
    end = '\033[0m'

def add_name(page_text, number, name):
    # Deal with cases
    if page_text.find(name) >= 0:
        print(color.red + 'Name ' + name + ' already exists. Skipping.' + color.end)
        return None
    pos = page_text.find(str(number))
    # Print original text
    # print(color.green + page_text[pos-7:pos+50] + color.end)
    # If linked
    if page_text[pos-11:pos] == '[[小行星':
        insert_pos = pos+len(str(number))+2 # Insert after [[XXX]]
        insert_text = name
        new_text = page_text[:insert_pos] + insert_text + page_text[insert_pos:]
    # If not linked
    elif page_text[pos-9:pos] == '小行星':
        insert_pos = pos+len(str(number)) # Will add wiki link
        insert_text = '[[小行星' + str(number) + ']]' + name
        new_text = page_text[:insert_pos-len(str(number))-9] + insert_text + page_text[insert_pos:]
    else:
        print(color.red + 'Change ' + str(number) + ' manually!' + color.end)
        return None
    # Print new text
    # print(color.cyan + new_text[insert_pos-len(str(number))-7:insert_pos+50] + color.end)
    return new_text

# Read in list of new names
# Text file format: (###) XXXXX = ******
def read_file(filename):
    text_file = open(filename, 'r')
    new_names = []
    i = 0
    for line in text_file:
        num_start = line.find('(')+1
        num_end = line.find(')')
        name_start = num_end+2
        name_end = line.find('=')-1
        new_names.append([int(line[num_start:num_end]), line[name_start:name_end]])
        i += 1
    return new_names # [0]: number, [1]: name

# Compile changes to be made to each page
def compile_changes(new_names):
    new_pages = []
    print('Adding text to articles'),
    for entry in new_names:
        number = entry[0]
        name = entry[1]
        index = (number-1)//100*100
        page_name = '小行星列表/' + str(index+1) + '-' + str(index+100)
        # Check if page previously changed already
        exist = False
        for page in new_pages:
            if page[0] == page_name:
                new_text = add_name(page[1], number, name)
                if new_text != None:
                    page[1] = new_text
                exist = True
                break
        # Get page from wiki
        if not exist:
            try:
                article = pywikibot.Page(site, unicode(page_name, 'utf_8'))
                page_text = article.get().encode('utf_8')
                new_text = add_name(page_text, number, name)
                if new_text != None:
                    new_pages.append([page_name, new_text])
            except pywikibot.exceptions.NoPage:
                print(color.red + page_name + ' does not exist.' + color.end)
    return new_pages # [0]: page name, [1]: new text

# Make those changes on wiki
def make_changes(new_pages):
    skipped = 0
    dont_ask = 0
    for page in new_pages:
        page[0] = unicode(page[0], 'utf_8')
        page[1] = unicode(page[1], 'utf_8')
        article = pywikibot.Page(site, page[0])
        article_text = article.get()
        print(page[0])
        pywikibot.showDiff(article_text, page[1])
        if dont_ask == 0:
            yn = raw_input('All (y). Skip (n). Once (Enter). ')
        if yn == 'n':
            skipped += 1
            continue
        elif yn == 'y':
            dont_ask = 1
        article.text = page[1]
        article.save(u'機械人:根據MPC資料更新小行星名稱。')
    return len(new_pages)-skipped

# Run sequence
def run():
    site = pywikibot.Site('zh', 'wikipedia')
    new_names = read_file('new_names.txt')
    new_pages = compile_changes(new_names)
    counter = make_changes(new_pages)
    print(str(counter) + ' pages updated.')

run()