"""Japanese test data generation lookup tables and helper functions.

Provides constants and generators for creating Japanese-language test data
including fullwidth/halfwidth characters, Shift-JIS encoding edge cases,
wareki (Japanese era) dates, and encoding round-trip test data.
"""

from __future__ import annotations

import random
from datetime import date, timedelta

# ── Lookup table constants ──

FULLWIDTH_KATAKANA = "アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲン"
FULLWIDTH_HIRAGANA = "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをん"
FULLWIDTH_DIGITS = "０１２３４５６７８９"
FULLWIDTH_ALPHA = "ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺ"
HALFWIDTH_KATAKANA = "ｱｲｳｴｵｶｷｸｹｺｻｼｽｾｿﾀﾁﾂﾃﾄﾅﾆﾇﾈﾉﾊﾋﾌﾍﾎﾏﾐﾑﾒﾓﾔﾕﾖﾗﾘﾙﾚﾛﾜｦﾝ"

# Shift-JIS 第2字节 0x5C 问题文字
SJIS_5C_PROBLEM = ["ソ", "噂", "能", "刈", "搾", "汚"]
# Shift-JIS 第2字节 0x7C 问题文字
SJIS_7C_PROBLEM = ["本", "問", "王", "歓", "輸"]

WAREKI_BOUNDARIES = [
    ("令和", 2019, "R010501", None, None),
    ("平成", 1989, "H010108", "2019/04/30", "H310430"),
    ("昭和", 1926, "S611231", "1989/01/07", "S640107"),
    ("大正", 1912, "T011231", "1926/12/25", "T151225"),
    ("明治", 1868, "M451229", "1912/01/29", "M450129"),
]


# ── Helper: simulate COBOL PIC clause field for length ──


def _field_length(field: dict) -> int:
    """Get the storage length from a PIC field definition dict."""
    if "pic_info" in field and field["pic_info"]:
        pi = field["pic_info"]
        if pi.get("length", 0) > 0:
            return pi["length"]
        dig = pi.get("digits", 0)
        dec = pi.get("decimal", 0)
        return dig + dec
    if "length" in field:
        return field["length"]
    if "digits" in field:
        d = field["digits"]
        dec = field.get("decimal", 0)
        return d + dec
    return 10  # fallback


# ── Generation functions ──


def generate_fullwidth_text(field: dict) -> str:
    """Generate fullwidth text filling a PIC N field.

    Returns a string of fullwidth katakana characters padded to the field length.
    Each PIC N character is 2 bytes (fullwidth), so the number of characters
    equals the field length.
    """
    length = _field_length(field)
    if length <= 0:
        length = 10
    chars = list(FULLWIDTH_KATAKANA)
    return "".join(random.choice(chars) for _ in range(length))


def generate_halfwidth_katakana(field: dict) -> str:
    """Generate halfwidth katakana filling a PIC X field.

    Returns a string of halfwidth katakana characters to fit the field byte length.
    Halfwidth katakana are single-byte in Shift-JIS, so the character count
    equals the field length.
    """
    length = _field_length(field)
    if length <= 0:
        length = 10
    chars = list(HALFWIDTH_KATAKANA)
    return "".join(random.choice(chars) for _ in range(length))


def generate_sjis_5c_problem(field: dict) -> str:
    """Generate a string containing Shift-JIS 0x5C problem characters.

    These characters have 0x5C (backslash) as their second byte in Shift-JIS,
    which can be misinterpreted as an escape character.
    """
    length = _field_length(field)
    if length <= 0:
        length = 6
    result = []
    chars = list(SJIS_5C_PROBLEM)
    while len(result) < length:
        result.append(random.choice(chars))
    return "".join(result)


def generate_sjis_7c_problem(field: dict) -> str:
    """Generate a string containing Shift-JIS 0x7C problem characters.

    These characters have 0x7C (pipe) as their second byte in Shift-JIS,
    which can be misinterpreted as a field separator.
    """
    length = _field_length(field)
    if length <= 0:
        length = 5
    result = []
    chars = list(SJIS_7C_PROBLEM)
    while len(result) < length:
        result.append(random.choice(chars))
    return "".join(result)


def generate_wareki_date(wareki_type: str = "R") -> str:
    """Generate a Japanese era (wareki) date string.

    Args:
        wareki_type: Era prefix letter:
            "R" = Reiwa (令和), "H" = Heisei (平成),
            "S" = Showa (昭和), "T" = Taisho (大正),
            "M" = Meiji (明治)

    Returns:
        Wareki date string formatted as e.g. "R050101" (Reiwa 5, Jan 1).
        The year part is zero-padded to 2 digits (e.g. 01, 05, 12).
    """
    era_map = {
        "R": ("令和", 2019),
        "H": ("平成", 1989),
        "S": ("昭和", 1926),
        "T": ("大正", 1912),
        "M": ("明治", 1868),
    }
    if wareki_type not in era_map:
        wareki_type = "R"

    era_name, base_year = era_map[wareki_type]
    # Generate a random date within the era's range (assuming at least 30 years)
    year_offset = random.randint(1, 30)
    month = random.randint(1, 12)
    day = random.randint(1, 28)
    return f"{wareki_type}{year_offset:02d}{month:02d}{day:02d}"


def generate_wareki_boundary(era: str = "平成") -> tuple[str, str]:
    """Generate a pair of wareki date strings representing an era boundary.

    Args:
        era: Era name in Japanese: "令和", "平成", "昭和", "大正", "明治"

    Returns:
        Tuple of (end_date_of_previous_era, start_date_of_new_era),
        e.g. for "平成": ("S640107", "H010108")
    """
    boundaries = {name: (prev_end, new_start) for name, _, prev_end, _, new_start in WAREKI_BOUNDARIES if prev_end and new_start}
    if era not in boundaries:
        # Default to Heisei boundary
        era = "平成"
    return boundaries[era]


def generate_encoding_test_data(from_enc: str = "shift_jis", to_enc: str = "utf-8") -> tuple[bytes, bytes]:
    """Generate encoding test data with round-trip verification.

    Creates a known string, encodes it to the source encoding, decodes to
    the target encoding, and returns both for comparison.

    Args:
        from_enc: Source encoding name (default: "shift_jis")
        to_enc: Target encoding name (default: "utf-8")

    Returns:
        Tuple of (source_bytes, target_bytes) for comparison.
    """
    test_string = "あいうえおアイウエオ亜唖娃阿"
    source_bytes = test_string.encode(from_enc, errors="replace")
    decoded = source_bytes.decode(from_enc, errors="replace")
    target_bytes = decoded.encode(to_enc, errors="replace")
    return source_bytes, target_bytes


def generate_encoding_test_data_bytes(text: str = None, from_enc: str = "shift_jis", to_enc: str = "utf-8") -> tuple[bytes, bytes]:
    """Generate encoding test data from explicit text.

    Args:
        text: Source text to encode; defaults to Japanese test phrase
        from_enc: Source encoding name
        to_enc: Target encoding name

    Returns:
        Tuple of (source_bytes, target_bytes)
    """
    if text is None:
        text = "あいうえおアイウエオ亜唖娃阿"
    source_bytes = text.encode(from_enc, errors="replace")
    decoded = source_bytes.decode(from_enc, errors="replace")
    target_bytes = decoded.encode(to_enc, errors="replace")
    return source_bytes, target_bytes


def select_data_type(field: dict) -> str:
    """Select the appropriate data type label for a field.

    Examines the field definition and returns a label indicating
    the kind of test data to generate.

    Args:
        field: Field definition dict with 'pic_info' containing 'type' and 'usage'

    Returns:
        One of: "japanese", "numeric", "halfwidth"
    """
    pi = field.get("pic_info", {})
    typ = pi.get("type", "unknown")
    usage = pi.get("usage", "").upper() if pi.get("usage") else ""

    # PIC N (national) or USAGE NATIONAL → Japanese fullwidth
    if typ == "national" or usage == "NATIONAL":
        return "japanese"

    # PIC 9 or numeric usage → numeric
    if typ in ("numeric", "numeric_edited", "numeric_float") or "COMP" in usage:
        return "numeric"

    # PIC X with DISPLAY usage → halfwidth katakana candidate
    if typ == "alphanumeric" or typ == "alphabetic":
        return "halfwidth"

    return "halfwidth"