import pandas as pd
import xml.etree.ElementTree as ET
from xml.dom import minidom

language = "en"
csv_file = f'output_{language}.csv'

df = pd.read_csv(csv_file, keep_default_na=False)
root = ET.Element('TS', version="2.1", language=language)


def write_message(context_elem, locations, key):
    message_elem = ET.SubElement(context_elem, 'message')
    for loc in locations:
        if pd.notna(loc['location']) and loc['location']:
            location_elem = ET.SubElement(message_elem, 'location')
            location_elem.set('filename', loc['location'])
            if pd.notna(loc['line']) and str(loc['line']).strip():
                location_elem.set('line', str(loc['line']))
    source_elem = ET.SubElement(message_elem, 'source')
    source_elem.text = key[0] if pd.notna(key[0]) else ''
    if key[1] == '':
        translation_elem = ET.SubElement(message_elem, 'translation', type="unfinished")
    else:
        translation_elem = ET.SubElement(message_elem, 'translation')
    translation_elem.text = key[1]


for context_name, group in df.groupby('context', sort=False):
    context_elem = ET.SubElement(root, 'context')
    name_elem = ET.SubElement(context_elem, 'name')
    name_elem.text = context_name
    # Iterate through group rows in order
    prev_key = None
    locations = []
    for idx, row in group.iterrows():
        key = (row['source'], row['translation'])
        if prev_key is not None and key != prev_key:
            # Write previous message so we can combine locations, should
            # already be sequential from the original .ts
            write_message(context_elem, locations, prev_key)
            locations = []
        else:
            pass
        locations.append({'location': row['location'], 'line': row['line']})
        prev_key = key

    # Write last message in group
    if prev_key is not None:
        write_message(context_elem, locations, prev_key)

# Write XML
xml_str = ET.tostring(root, encoding='utf-8').decode('utf-8')
parsed = minidom.parseString(xml_str)
pretty_lines = parsed.toprettyxml(indent='    ').split('\n')

with open(f'output_{language}.ts', 'w', encoding='utf-8') as f:
    f.write('<?xml version="1.0" encoding="utf-8"?>\n')
    f.write('<!DOCTYPE TS>\n')
    for line in pretty_lines:
        if line.startswith('<?xml') or line.startswith('<!DOCTYPE'):
            # Skip the XML declaration and DOCTYPE line, printed above
            continue
        if line.startswith("    "):
            # toprettyxml adds leading spaces, remove them
            line = line[4:]
        f.write(line + '\n')

