#!/usr/bin/env python3
"""
阿里云 OSS 思源笔记读取器
"""

import oss2
import json
import os
from datetime import datetime

# 阿里云 OSS 配置
OSS_CONFIG = {
    'endpoint': 'http://oss-cn-beijing.aliyuncs.com',
    'bucket_name': 'xkka-siyuan',
    'access_key_id': 'LTAI5t8ssdvZgJsqEyHwzVVH',
    'access_key_secret': '0kzY1vFyXaFNjtS4MBdwkNIcZjUmkA',
}

def read_from_oss():
    """从 OSS 读取笔记"""
    
    # 初始化 OSS
    auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
    bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])
    
    # 查找思源笔记文件 (通常在 siyuan/ 或 data/ 目录下)
    prefixes = ['siyuan/', 'data/']
    
    print("="*70)
    print("📚 阿里云 OSS 思源笔记读取")
    print(f"Bucket: {OSS_CONFIG['bucket_name']}")
    print("="*70)
    
    total_files = 0
    
    for prefix in prefixes:
        files = []
        for obj in oss2.ObjectIterator(bucket, prefix=prefix):
            if obj.key.endswith('.sy'):
                files.append(obj.key)
        
        if files:
            print(f"\n📁 路径: {prefix}")
            print(f"找到 {len(files)} 个 .sy 文件\n")
            
            # 下载并读取文件
            for file_key in files[:20]:
                try:
                    file_stream = bucket.get_object(file_key)
                    content = file_stream.read()
                    data = json.loads(content.decode('utf-8'))
                    
                    title = data.get('title', '无标题')
                    children = data.get('children', [])
                    
                    print(f"📄 {title} ({len(children)} 块)")
                    
                    # 显示前100字内容
                    preview = ""
                    for child in children[:3]:
                        block_type = child.get('type', '')
                        texts = child.get(block_type, {}).get('rich_text', [])
                        text = ''.join([t.get('plain_text', '') for t in texts])
                        if text.strip():
                            preview = text[:80] + "..."
                            break
                    
                    if preview:
                        print(f"   📝 {preview}")
                    print()
                    
                    total_files += 1
                    
                except Exception as e:
                    print(f"❌ 读取失败: {file_key}")
    
    print("="*70)
    print(f"📊 共 {total_files} 篇笔记")
    print("="*70)

def download_all(prefix='siyuan/', local_path='/root/.openclaw/workspace/siyuan_oss/'):
    """下载所有笔记到本地"""
    
    os.makedirs(local_path, exist_ok=True)
    
    auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
    bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])
    
    count = 0
    for obj in oss2.ObjectIterator(bucket, prefix=prefix):
        if obj.key.endswith('.sy'):
            local_file = os.path.join(local_path, os.path.basename(obj.key))
            bucket.download_file(obj.key, local_file)
            count += 1
    
    print(f"✅ 下载完成: {count} 个文件")
    print(f"📁 保存到: {local_path}")

def search_content(keyword):
    """搜索笔记内容"""
    
    auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
    bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])
    
    print(f"\n🔍 搜索关键词: {keyword}")
    print("="*70)
    
    count = 0
    for obj in oss2.ObjectIterator(bucket, prefix='siyuan/'):
        if obj.key.endswith('.sy'):
            try:
                file_stream = bucket.get_object(obj.key)
                content = file_stream.read()
                data = json.loads(content.decode('utf-8'))
                
                title = data.get('title', '')
                children = data.get('children', [])
                
                full_text = title + ' ' + ' '.join([
                    ''.join([t.get('plain_text', '') for t in child.get(child.get('type', ''), {}).get('rich_text', [])])
                    for child in children
                ])
                
                if keyword.lower() in full_text.lower():
                    count += 1
                    print(f"✅ {title}")
                    print(f"   文件: {obj.key}")
                    
                    # 显示匹配上下文
                    idx = full_text.lower().find(keyword.lower())
                    if idx >= 0:
                        preview = full_text[max(0, idx-30):idx+50]
                        print(f"   📝 ...{preview}...")
                    print()
                    
            except Exception as e:
                pass
    
    print("="*70)
    print(f"📊 找到 {count} 条相关内容")

if __name__ == '__main__':
    import sys
    
    if len(sys.argv) > 1:
        cmd = sys.argv[1]
        if cmd == 'download':
            prefix = sys.argv[2] if len(sys.argv) > 2 else 'siyuan/'
            download_all(prefix)
        elif cmd == 'search':
            if len(sys.argv) > 2:
                keyword = ' '.join(sys.argv[2:])
                search_content(keyword)
            else:
                print("用法: python oss_siyuan_reader.py search <关键词>")
        else:
            print("用法:")
            print("  python oss_siyuan_reader.py           # 读取并显示")
            print("  python oss_siyuan_reader.py download  # 下载到本地")
            print("  python oss_siyuan_reader.py search <词>  # 搜索内容")
    else:
        read_from_oss()