import json
import sys
import os
import re
from datetime import datetime

from itertools import islice
from collections import deque
def sliding_window(iterable, n):
    "Collect data into overlapping fixed-length chunks or blocks."
    # sliding_window('ABCDEFG', 4) → ABCD BCDE CDEF DEFG
    iterator = iter(iterable)
    window = deque(islice(iterator, n - 1), maxlen=n)
    for x in iterator:
        window.append(x)
        yield tuple(window)

# Конфигурация
INPUT_JSON = sys.argv[1] #'result.json'
OUTPUT_DIR = sys.argv[2] # 'obsidian_notes'
CHANNEL_NAME = sys.argv[3] # 'schroedinger_jokes'

# Создаем директорию для вывода
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Загрузка данных
with open(INPUT_JSON, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Регулярное выражение для ссылок на канал
TG_LINK_PATTERN = re.compile(
    r'https://t\.me/' + re.escape(CHANNEL_NAME) + r'/(\d+)'
)

def convert_entities(entities):
    """Конвертирует массив сущностей в Markdown"""
    parts = []
    for entity in entities:
        if isinstance(entity, str):
            parts.append(entity)
            continue
            
        text = entity.get('text', '')
        etype = entity.get('type', 'plain')
        
        if etype == 'plain':
            parts.append(text)
        elif etype == 'bold':
            parts.append(f'**{text}**')
        elif etype == 'italic':
            parts.append(f'*{text}*')
        elif etype == 'strikethrough':
            parts.append(f'~~{text}~~')
        elif etype == 'underline':
            parts.append(f'<u>{text}</u>')
        elif etype == 'spoiler':
            parts.append(f'<span class="spoiler">{text}</span>')
        elif etype == 'code':
            parts.append(f'`{text}`')
        elif etype == 'pre':
            lang = entity.get('language', '')
            parts.append(f'```{lang}\n{text}\n```')
        elif etype == 'link':
            parts.append(f'[{text.replace("[","(").replace("]",")")}]({text})')
        elif etype == 'text_link':
            parts.append(f'[{text.replace("[","(").replace("]",")")}]({entity["href"]})')
        elif etype == 'hashtag':
            parts.append(f'{text}')
        elif etype == 'mention':
            parts.append(f'@{text}')
        elif etype == 'custom_emoji':
            sticker_id = entity.get('document_id', '')
            parts.append(f'![[{sticker_id}]]')
        elif etype == 'blockquote':
            parts.append(f'> {text}')
        else:
            parts.append(text)
    
    return ''.join(parts)

def extract_text_content(msg):
    # Текстовое содержимое
    if 'text_entities' in msg:
        text_content = convert_entities(msg['text_entities'])
    elif 'text' in msg:
        if isinstance(msg['text'], list):
            text_content = convert_entities(msg['text'])
        else:
            text_content = msg['text']
    else:
        text_content = ''
    
    # Замена ссылок на внутренние
    text_content = TG_LINK_PATTERN.sub(
        lambda m: f'{CHANNEL_NAME}@{m.group(1)}',
        text_content
    )
    return text_content


def truncate_title(original_title, max_length = 40):
    original_title = original_title.strip()
    temp_short = original_title[:max_length]
    if len(original_title) <= max_length:
        return original_title

    temp_short = original_title[:max_length - 1] # additional character in order to add …
    if ' ' in temp_short:
        return temp_short.rsplit(' ', 1)[0] + '…'
    else:
        return temp_short[:max_length - 1] + '…'

def process_message(msg, prev_id=None, next_id=None):
    """Обрабатывает одно сообщение и возвращает Markdown контент"""
    content_lines = []
    msg_id = msg['id']


    text_content = extract_text_content(msg)
    lines_stripped = [line.strip() for line in text_content.split('\n') if (len(line.strip()) > 0)]
    lines_wo_tags = [line for line in lines_stripped  if  not line.startswith('# ')] # space after `#` to distinguish from markdown headers
    if len(lines_wo_tags) > 0:
        title = truncate_title(f'{msg_id}. {lines_wo_tags[0]}', 80)
    else:
        title = f'{msg_id}. Untitled'
    
    # YAML frontmatter
    content_lines.append('---')
    content_lines.append(f'source: "https://t.me/{CHANNEL_NAME}/{msg_id}"')
    content_lines.append(f'date: "{msg["date"]}"')
    content_lines.append(f'title: {json.dumps(title, ensure_ascii=False)}')
    content_lines.append(f'aliases: {json.dumps([title], ensure_ascii=False)}')
    
    # Дополнительные метаданные
    if 'forwarded_from' in msg:
        content_lines.append(f'forwarded_from: "{msg["forwarded_from"]}"')
    
    content_lines.append('---\n')
    
    # Заголовок
    content_lines.append(f'# {title}\n')

    # Навигация
    nav_links = []
    if prev_id:
        nav_links.append( f'[(←) предыдущая запись]({CHANNEL_NAME}@{prev_id})' )
    if next_id:
        nav_links.append( f'[следующая запись (→)]({CHANNEL_NAME}@{next_id})' )
    if len(nav_links) > 0:
        content_lines.append('  ;  '.join(nav_links) + '\n')
    
    content_lines.append(text_content + '\n')
    
    # Обработка медиа-вложений
    # Фото
    if 'photo' in msg:
        photo_path = msg['photo']
        meta = [
            f'width: {msg.get("width", "N/A")}',
            f'height: {msg.get("height", "N/A")}',
            f'size: {msg.get("photo_file_size", "N/A")} bytes'
        ]
        content_lines.append(f'![[{photo_path}]]')
        content_lines.append(f'<!-- Photo: {", ".join(meta)} -->\n')
    
    # Файлы (аудио, видео, документы)
    if 'file' in msg:
        file_path = msg['file']
        file_name = msg.get('file_name', 'File')
        media_type = msg.get('media_type', 'file')
        
        meta = [f'size: {msg.get("file_size", "N/A")} bytes']
        
        if media_type == 'audio_file':
            duration = msg.get('duration_seconds', 'N/A')
            performer = msg.get('performer', '')
            title = msg.get('title', '')
            meta.append(f'duration: {duration} sec')
            content_lines.append(f'**Аудио**: {performer} - {title}')
        elif media_type == 'video_file':
            duration = msg.get('duration_seconds', 'N/A')
            width = msg.get('width', 'N/A')
            height = msg.get('height', 'N/A')
            meta.extend([
                f'duration: {duration} sec',
                f'resolution: {width}x{height}'
            ])
            content_lines.append(f'**Видео**: {file_name}')
        elif media_type == 'sticker':
            emoji = msg.get('sticker_emoji', '')
            width = msg.get('width', 'N/A')
            height = msg.get('height', 'N/A')
            meta.extend([
                f'emoji: {emoji}',
                f'resolution: {width}x{height}'
            ])
            content_lines.append(f'**Стикер**: {emoji}')
        else:
            content_lines.append(f'**Файл**: {file_name}')
        
        content_lines.append(f'[[{file_path}]]')
        content_lines.append(f'<!-- {media_type}: {", ".join(meta)} -->\n')
    
    # Обработка опросов
    if 'poll' in msg:
        poll = msg['poll']
        content_lines.append(f'## Опрос: {poll["question"]}')
        content_lines.append(f'*Всего проголосовало: {poll.get("total_voters", 0)}*\n')
        
        for answer in poll['answers']:
            voters = answer['voters']
            percent = voters / poll['total_voters'] * 100 if poll['total_voters'] > 0 else 0
            content_lines.append(
                f'- **{answer["text"]}**: {voters} ({percent:.1f}%)'
            )
        content_lines.append('')
    
    # Разделитель
    if any(key in msg for key in ['photo', 'file', 'poll']):
        content_lines.append('---\n')
    
    return '\n'.join(content_lines)


# Обработка всех сообщений
for prev_msg, message, next_msg in sliding_window([{'id': None}, *data['messages'], {'id': None}], 3):
    try:
        # Пропускаем служебные сообщения без контента
        if (not any(key in message for key in ['text', 'photo', 'file', 'poll']) and 
            message.get('type') == 'service'):
            continue
            
        # prev_id/next_id могут ссылаться на несуществующие сервисные записи
        content = process_message(message, prev_id=prev_msg['id'], next_id=next_msg['id'])
        filename = f"{CHANNEL_NAME}@{message['id']}.md"
        filepath = os.path.join(OUTPUT_DIR, filename)
        
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(content)
            
    except Exception as e:
        print(f"Ошибка обработки сообщения {message.get('id')}: {str(e)}")

print(f"Конвертация завершена! Сохранено {len(data['messages'])} сообщений в папку {OUTPUT_DIR}")
