#!/usr/bin/env python3

# Copyright (C) 2025  Niels Martignène <niels.martignene@protonmail.com>

# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the “Software”), to deal in
# the Software without restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
# Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

import sys
import re
import argparse
import urllib.request
import os.path
from collections import namedtuple

DEFAULT_URL = 'https://unicode.org/Public/16.0.0'

LICENSE_HEADER = """// Copyright (C) 2025  Niels Martignène <niels.martignene@protonmail.com>

// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the “Software”), to deal in
// the Software without restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
// Software, and to permit persons to whom the Software is furnished to do so,
// subject to the following conditions:

// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.

// THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE."""

WcWidthResult = namedtuple('WcWidthResult', ['null', 'wide'])
XidResult = namedtuple('XidResult', ['id_start', 'id_continue'])

def parse_version(core):
    lines = iter(core.splitlines())
    version = next(lines).strip("\n\r \t#") + ' -- ' + next(lines).strip("\n\r \t#")

    return version

def parse_properties_wcwidth(core, asian):
    null = []
    wide = []

    for (chars, parts, comments) in parse_properties(core):
        if comments[0] in ['Me', 'Mn', 'Mc', 'Cf', 'Zl', 'Zp']:
            null.extend(chars)
    for (chars, parts, comments) in parse_properties(asian):
        if parts[1] in ['F', 'W'] and not comments[0] in ['Mn', 'Mc']:
            wide.extend(chars)

    return WcWidthResult(null = compress(null),
                         wide = compress(wide))

def parse_properties_xid(core):
    id_start = []
    id_continue = []

    for (chars, parts, comments) in parse_properties(core):
        if parts[1] == 'ID_Start':
            id_start.extend(chars)
        elif parts[1] == 'ID_Continue':
            id_continue.extend(chars)

    return XidResult(id_start = compress(id_start),
                     id_continue = compress(id_continue))

def parse_properties(text):
    for line in text.splitlines():
        line = line.strip()

        if not line or line[0] == '#':
            continue

        parts = [part.strip() for part in re.split('[;#]', line)]
        if len(parts) < 3:
            continue
        comments = parts[2].split(' ')

        if '..' in parts[0]:
            start, end = parts[0].split('..')
        else:
            start, end = parts[0], parts[0]
        start, end = int(start, 16), int(end, 16)
        chars = list(range(start, end + 1))

        yield (chars, parts, comments)

def compress(chars):
    ranges = []

    if chars:
        chars = sorted(set(chars))
        iterator = iter(chars)

        prev = next(iterator)
        start = prev

        for c in iterator:
            if c != prev + 1:
                ranges.append((start, prev + 1))
                start = c
            prev = c

        end = chars[-1] + 1
        ranges.append((start, end))

    return ranges

def write_header(version, wcwidth, xid, f):
    f.write(f"""{LICENSE_HEADER}

// This file is autogenerated by unicode_gen.py
// Version: {version}

namespace K {{

static const int32_t WcWidthNull[] = {{""")
    for i, v in enumerate(wcwidth.null):
        if i % 5 == 0: f.write('\n   ')
        f.write(f' 0x{v[0]:05X}, 0x{v[1]:05X}{"," if i + 1 < len(wcwidth.null) else ""}')
    f.write("""
};

static const int32_t WcWidthWide[] = {""")
    for i, v in enumerate(wcwidth.wide):
        if i % 5 == 0: f.write('\n   ')
        f.write(f' 0x{v[0]:05X}, 0x{v[1]:05X}{"," if i + 1 < len(wcwidth.wide) else ""}')
    f.write("""
};

static const int32_t XidStartTable[] = {""")
    for i, v in enumerate(xid.id_start):
        if i % 5 == 0: f.write('\n   ')
        f.write(f' 0x{v[0]:05X}, 0x{v[1]:05X}{"," if i + 1 < len(xid.id_start) else ""}')
    f.write("""
};

static const int32_t XidContinueTable[] = {""")
    for i, v in enumerate(xid.id_continue):
        if i % 5 == 0: f.write('\n   ')
        f.write(f' 0x{v[0]:05X}, 0x{v[1]:05X}{"," if i + 1 < len(xid.id_continue) else ""}')
    f.write("""
};

}
""")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description = 'Extract xid and wcwidth information from Unicode standard')
    parser.add_argument('url', metavar = 'url', type = str, nargs = '?', help = 'URL of Unicode standard', default = DEFAULT_URL)
    parser.add_argument('-O', '--output_dir', metavar = 'directory', type = str, help = 'output directory for generated files', default = '.')
    args = parser.parse_args()

    url = args.url.strip('/') + '/ucd/'

    with urllib.request.urlopen(url + 'DerivedCoreProperties.txt') as f:
        core = f.read().decode('UTF-8')
    with urllib.request.urlopen(url + 'EastAsianWidth.txt') as f:
        asian = f.read().decode('UTF-8')

    version = parse_version(core)
    wcwidth = parse_properties_wcwidth(core, asian)
    xid = parse_properties_xid(core)

    with open(os.path.join(args.output_dir, 'unicode.inc'), 'w') as f:
        write_header(version, wcwidth, xid, f)
