server-configs/oss_siyuan_reader.py

#!/usr/bin/env python3
"""
阿里云 OSS 思源笔记读取器
"""

import oss2
import json
import os
from datetime import datetime

# 阿里云 OSS 配置
OSS_CONFIG = {
    'endpoint': 'http://oss-cn-beijing.aliyuncs.com',
    'bucket_name': 'xkka-siyuan',
    'access_key_id': 'LTAI5t8ssdvZgJsqEyHwzVVH',
    'access_key_secret': '0kzY1vFyXaFNjtS4MBdwkNIcZjUmkA',
}

def read_from_oss():
    """从 OSS 读取笔记"""

    # 初始化 OSS
    auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
    bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])

    # 查找思源笔记文件 (通常在 siyuan/ 或 data/ 目录下)
    prefixes = ['siyuan/', 'data/']

    print("="*70)
    print("📚 阿里云 OSS 思源笔记读取")
    print(f"Bucket: {OSS_CONFIG['bucket_name']}")
    print("="*70)

    total_files = 0

    for prefix in prefixes:
        files = []
        for obj in oss2.ObjectIterator(bucket, prefix=prefix):
            if obj.key.endswith('.sy'):
                files.append(obj.key)

        if files:
            print(f"\n📁 路径: {prefix}")
            print(f"找到 {len(files)} 个 .sy 文件\n")

            # 下载并读取文件
            for file_key in files[:20]:
                try:
                    file_stream = bucket.get_object(file_key)
                    content = file_stream.read()
                    data = json.loads(content.decode('utf-8'))

                    title = data.get('title', '无标题')
                    children = data.get('children', [])

                    print(f"📄 {title} ({len(children)} 块)")

                    # 显示前100字内容
                    preview = ""
                    for child in children[:3]:
                        block_type = child.get('type', '')
                        texts = child.get(block_type, {}).get('rich_text', [])
                        text = ''.join([t.get('plain_text', '') for t in texts])
                        if text.strip():
                            preview = text[:80] + "..."
                            break

                    if preview:
                        print(f"   📝 {preview}")
                    print()

                    total_files += 1

                except Exception as e:
                    print(f"❌ 读取失败: {file_key}")

    print("="*70)
    print(f"📊 共 {total_files} 篇笔记")
    print("="*70)

def download_all(prefix='siyuan/', local_path='/root/.openclaw/workspace/siyuan_oss/'):
    """下载所有笔记到本地"""

    os.makedirs(local_path, exist_ok=True)

    auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
    bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])

    count = 0
    for obj in oss2.ObjectIterator(bucket, prefix=prefix):
        if obj.key.endswith('.sy'):
            local_file = os.path.join(local_path, os.path.basename(obj.key))
            bucket.download_file(obj.key, local_file)
            count += 1

    print(f"✅ 下载完成: {count} 个文件")
    print(f"📁 保存到: {local_path}")

def search_content(keyword):
    """搜索笔记内容"""

    auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
    bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])

    print(f"\n🔍 搜索关键词: {keyword}")
    print("="*70)

    count = 0
    for obj in oss2.ObjectIterator(bucket, prefix='siyuan/'):
        if obj.key.endswith('.sy'):
            try:
                file_stream = bucket.get_object(obj.key)
                content = file_stream.read()
                data = json.loads(content.decode('utf-8'))

                title = data.get('title', '')
                children = data.get('children', [])

                full_text = title + ' ' + ' '.join([
                    ''.join([t.get('plain_text', '') for t in child.get(child.get('type', ''), {}).get('rich_text', [])])
                    for child in children
                ])

                if keyword.lower() in full_text.lower():
                    count += 1
                    print(f"✅ {title}")
                    print(f"   文件: {obj.key}")

                    # 显示匹配上下文
                    idx = full_text.lower().find(keyword.lower())
                    if idx >= 0:
                        preview = full_text[max(0, idx-30):idx+50]
                        print(f"   📝 ...{preview}...")
                    print()

            except Exception as e:
                pass

    print("="*70)
    print(f"📊 找到 {count} 条相关内容")

if __name__ == '__main__':
    import sys

    if len(sys.argv) > 1:
        cmd = sys.argv[1]
        if cmd == 'download':
            prefix = sys.argv[2] if len(sys.argv) > 2 else 'siyuan/'
            download_all(prefix)
        elif cmd == 'search':
            if len(sys.argv) > 2:
                keyword = ' '.join(sys.argv[2:])
                search_content(keyword)
            else:
                print("用法: python oss_siyuan_reader.py search <关键词>")
        else:
            print("用法:")
            print("  python oss_siyuan_reader.py           # 读取并显示")
            print("  python oss_siyuan_reader.py download  # 下载到本地")
            print("  python oss_siyuan_reader.py search <词>  # 搜索内容")
    else:
        read_from_oss()