User:WhitePhosphorus/python/fixtidyfont.py

import re
import sys
import argparse
from time import sleep
from src import botsite
from bs4 import BeautifulSoup


wrong_tag_re = re.compile(r'(?P<prefix>\<\s*font\s*[^>]+\>)'
                          r'\[\[(?P<link>.*?)(?:\|(?P<text>.*?))?\]\]'
                          r'(?P<suffix>\<\s*/\s*font\s*\>)', flags=re.IGNORECASE)
hex_color_re = re.compile(r'[0-9a-f]{3,6}')
EDIT_SUMMARY = '[[User:WhitePhosphorus/python/fixtidyfont.py|机器人]]：修复[[Special:LintErrors/tidy-font-bug]]'
SIZE_TO_STYLE = [None, 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large', '378%']
VALID_COLORS = ['white', 'silver', 'gray', 'grey', 'black', 'red', 'maroon', 'yellow', 'olive',
                'lime', 'green', 'aqua', 'teal', 'blue', 'navy', 'fuchsia', 'purple']
DRY = False


def page_generator(pageid=None):
    site = botsite.Site()
    #  'lntnamespace': '0',
    for chunk in site.api_get_long({'action': 'query', 'list': 'linterrors', 'lntcategories': 'tidy-font-bug',
        'lntlimit': 'max', 'lntpageid': pageid}, 'query'):
        for item in chunk['linterrors']:
            yield item


# size must be an integer, or the decimal part will be ignored
# size must lie in range 1 to 7 (included), or the nearer boundary will be applied
# size prefixed by '+' or '-' is equivalent to 3 add or plus that number
def convert_size(size):
    size = size.strip()
    try:
        size_num = int(float(size))
    except ValueError:
        size_num = 3
    if size.startswith('+') or size.startswith('-'):
        return SIZE_TO_STYLE[min(max(size_num + 3, 1), 7)]
    return SIZE_TO_STYLE[min(max(size_num, 1), 7)]


# <font color=x> accepts numeric font colors without the standard pound sign prefix
# <font color=x> accepts some nonstandard font color names which are not in HTML colors
# <font color=x> accepts 4-digit and 5-digit hexadecimal codes (which need 0 or 00 appended to the result)
def convert_color(color):
    color = color.strip()
    color_lower = color.lower()
    if hex_color_re.search(color_lower):
        if not color.startswith('#'):
            color = '#' + color
        if 4 < len(color) < 7:
            color += '0' * (7 - len(color))
        return color
    elif color_lower in VALID_COLORS:
        return color
    else:
        # cannot handle unknown colors
        return None


def handle_match(match):
    soup = BeautifulSoup(match.group(0), "lxml")
    if not soup:
        return None
    font = soup.find('font')
    if not font:
        return None
    styles = []
    if set(font.attrs.keys()) - {'size', 'face', 'color', 'style', 'name', 'id'}:
        # cannot handle unknown attrs
        return None
    if font.get('size', '').strip():
        styles.append('font-size: ' + convert_size(font.get('size')))
        del font['size']
    if font.get('face', '').strip():
        styles.append('font-family: ' + font.get('face').strip())
        del font['face']
    if font.get('color', '').strip():
        tmp = convert_color(font.get('color'))
        if tmp:
            styles.append('color: ' + tmp)
        else:
            return None
        del font['color']
    new_style = font.get('style', '').strip()
    if new_style:
        if new_style.endswith(';'):
            new_style += (' ' + '; '.join(styles))
        else:
            new_style += ('; ' + '; '.join(styles))
    else:
        new_style = '; '.join(styles)
    font['style'] = new_style

    font.name = 'span'

    # <font>[[link|text]]</font> => [[link|<span>text</span>]]
    # <font>[[link]]</font> => [[link|<span>link</span>]]
    font.string = match.group('text') or match.group('link')
    return f"[[{match.group('link')}|{str(font)}]]"


def handle(title, locations):
    if not title:
        return
    site = botsite.Site()
    old_text = site.get_text_by_title(title)
    new_text = ''
    for i, pos in enumerate(locations):
        tag = old_text[pos[0]:pos[1]]
        match = wrong_tag_re.search(tag)
        if not match:
            print(f'Warning: skipping unknown tag in [[{title}]] pos {pos}: {tag}')
            return
        new_text += old_text[(locations[i-1][1] if i else 0):pos[0]]
        rst = handle_match(match)
        if rst:
            new_text += rst
        else:
            print(f'Warning: skipping a tag cannot handled in [[{title}]] pos {pos}: {tag}')
            return
    new_text += old_text[locations[-1][1]:]
    if DRY:
        print(new_text)
    else:
        site.edit(new_text, EDIT_SUMMARY, title=title, minor=True, bot=True)
        sleep(6)


def main():
    global DRY
    parser = argparse.ArgumentParser()
    parser.add_argument('--username', help='your username')
    parser.add_argument('--password', help='your password (can be provided at runtime)')
    parser.add_argument('--dry', action='store_true', help='run in dry mode (just output results and will not edit)')
    parser.add_argument('--pageid', help='only apply changes on this page')
    args = parser.parse_args()

    DRY = args.dry
    username, password = args.username, args.password
    if not username:
        if not DRY:
            print('Cannot edit if username not specified')
            exit(1)
    else:
        botsite.bot_name = username
        site = botsite.Site()
        site.client_login(pwd=password or __import__('getpass').getpass('Password: '))
    
    title = ''
    locations = []
    for item in page_generator(pageid=args.pageid):
        if item['templateInfo'] or item['params'] != {"name": "font"}:
            continue
        if item['title'] != title:
            handle(title, locations)
            title = item['title']
            locations = [(item['location'][0], item['location'][1])]
        else:
            locations.append((item['location'][0], item['location'][1]))
    handle(title, locations)


if __name__ == '__main__':
    main()