#!/usr/bin/env python3 """ 阿里云 OSS 思源笔记读取器 """ import oss2 import json import os from datetime import datetime # 阿里云 OSS 配置 OSS_CONFIG = { 'endpoint': 'http://oss-cn-beijing.aliyuncs.com', 'bucket_name': 'xkka-siyuan', 'access_key_id': 'LTAI5t8ssdvZgJsqEyHwzVVH', 'access_key_secret': '0kzY1vFyXaFNjtS4MBdwkNIcZjUmkA', } def read_from_oss(): """从 OSS 读取笔记""" # 初始化 OSS auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret']) bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name']) # 查找思源笔记文件 (通常在 siyuan/ 或 data/ 目录下) prefixes = ['siyuan/', 'data/'] print("="*70) print("📚 阿里云 OSS 思源笔记读取") print(f"Bucket: {OSS_CONFIG['bucket_name']}") print("="*70) total_files = 0 for prefix in prefixes: files = [] for obj in oss2.ObjectIterator(bucket, prefix=prefix): if obj.key.endswith('.sy'): files.append(obj.key) if files: print(f"\n📁 路径: {prefix}") print(f"找到 {len(files)} 个 .sy 文件\n") # 下载并读取文件 for file_key in files[:20]: try: file_stream = bucket.get_object(file_key) content = file_stream.read() data = json.loads(content.decode('utf-8')) title = data.get('title', '无标题') children = data.get('children', []) print(f"📄 {title} ({len(children)} 块)") # 显示前100字内容 preview = "" for child in children[:3]: block_type = child.get('type', '') texts = child.get(block_type, {}).get('rich_text', []) text = ''.join([t.get('plain_text', '') for t in texts]) if text.strip(): preview = text[:80] + "..." break if preview: print(f" 📝 {preview}") print() total_files += 1 except Exception as e: print(f"❌ 读取失败: {file_key}") print("="*70) print(f"📊 共 {total_files} 篇笔记") print("="*70) def download_all(prefix='siyuan/', local_path='/root/.openclaw/workspace/siyuan_oss/'): """下载所有笔记到本地""" os.makedirs(local_path, exist_ok=True) auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret']) bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name']) count = 0 for obj in oss2.ObjectIterator(bucket, prefix=prefix): if obj.key.endswith('.sy'): local_file = os.path.join(local_path, os.path.basename(obj.key)) bucket.download_file(obj.key, local_file) count += 1 print(f"✅ 下载完成: {count} 个文件") print(f"📁 保存到: {local_path}") def search_content(keyword): """搜索笔记内容""" auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret']) bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name']) print(f"\n🔍 搜索关键词: {keyword}") print("="*70) count = 0 for obj in oss2.ObjectIterator(bucket, prefix='siyuan/'): if obj.key.endswith('.sy'): try: file_stream = bucket.get_object(obj.key) content = file_stream.read() data = json.loads(content.decode('utf-8')) title = data.get('title', '') children = data.get('children', []) full_text = title + ' ' + ' '.join([ ''.join([t.get('plain_text', '') for t in child.get(child.get('type', ''), {}).get('rich_text', [])]) for child in children ]) if keyword.lower() in full_text.lower(): count += 1 print(f"✅ {title}") print(f" 文件: {obj.key}") # 显示匹配上下文 idx = full_text.lower().find(keyword.lower()) if idx >= 0: preview = full_text[max(0, idx-30):idx+50] print(f" 📝 ...{preview}...") print() except Exception as e: pass print("="*70) print(f"📊 找到 {count} 条相关内容") if __name__ == '__main__': import sys if len(sys.argv) > 1: cmd = sys.argv[1] if cmd == 'download': prefix = sys.argv[2] if len(sys.argv) > 2 else 'siyuan/' download_all(prefix) elif cmd == 'search': if len(sys.argv) > 2: keyword = ' '.join(sys.argv[2:]) search_content(keyword) else: print("用法: python oss_siyuan_reader.py search <关键词>") else: print("用法:") print(" python oss_siyuan_reader.py # 读取并显示") print(" python oss_siyuan_reader.py download # 下载到本地") print(" python oss_siyuan_reader.py search <词> # 搜索内容") else: read_from_oss()