Files
server-configs/oss_siyuan_reader.py
2026-02-13 22:24:27 +08:00

163 lines
5.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""
阿里云 OSS 思源笔记读取器
"""
import oss2
import json
import os
from datetime import datetime
# 阿里云 OSS 配置
OSS_CONFIG = {
'endpoint': 'http://oss-cn-beijing.aliyuncs.com',
'bucket_name': 'xkka-siyuan',
'access_key_id': 'LTAI5t8ssdvZgJsqEyHwzVVH',
'access_key_secret': '0kzY1vFyXaFNjtS4MBdwkNIcZjUmkA',
}
def read_from_oss():
"""从 OSS 读取笔记"""
# 初始化 OSS
auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])
# 查找思源笔记文件 (通常在 siyuan/ 或 data/ 目录下)
prefixes = ['siyuan/', 'data/']
print("="*70)
print("📚 阿里云 OSS 思源笔记读取")
print(f"Bucket: {OSS_CONFIG['bucket_name']}")
print("="*70)
total_files = 0
for prefix in prefixes:
files = []
for obj in oss2.ObjectIterator(bucket, prefix=prefix):
if obj.key.endswith('.sy'):
files.append(obj.key)
if files:
print(f"\n📁 路径: {prefix}")
print(f"找到 {len(files)} 个 .sy 文件\n")
# 下载并读取文件
for file_key in files[:20]:
try:
file_stream = bucket.get_object(file_key)
content = file_stream.read()
data = json.loads(content.decode('utf-8'))
title = data.get('title', '无标题')
children = data.get('children', [])
print(f"📄 {title} ({len(children)} 块)")
# 显示前100字内容
preview = ""
for child in children[:3]:
block_type = child.get('type', '')
texts = child.get(block_type, {}).get('rich_text', [])
text = ''.join([t.get('plain_text', '') for t in texts])
if text.strip():
preview = text[:80] + "..."
break
if preview:
print(f" 📝 {preview}")
print()
total_files += 1
except Exception as e:
print(f"❌ 读取失败: {file_key}")
print("="*70)
print(f"📊 共 {total_files} 篇笔记")
print("="*70)
def download_all(prefix='siyuan/', local_path='/root/.openclaw/workspace/siyuan_oss/'):
"""下载所有笔记到本地"""
os.makedirs(local_path, exist_ok=True)
auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])
count = 0
for obj in oss2.ObjectIterator(bucket, prefix=prefix):
if obj.key.endswith('.sy'):
local_file = os.path.join(local_path, os.path.basename(obj.key))
bucket.download_file(obj.key, local_file)
count += 1
print(f"✅ 下载完成: {count} 个文件")
print(f"📁 保存到: {local_path}")
def search_content(keyword):
"""搜索笔记内容"""
auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])
print(f"\n🔍 搜索关键词: {keyword}")
print("="*70)
count = 0
for obj in oss2.ObjectIterator(bucket, prefix='siyuan/'):
if obj.key.endswith('.sy'):
try:
file_stream = bucket.get_object(obj.key)
content = file_stream.read()
data = json.loads(content.decode('utf-8'))
title = data.get('title', '')
children = data.get('children', [])
full_text = title + ' ' + ' '.join([
''.join([t.get('plain_text', '') for t in child.get(child.get('type', ''), {}).get('rich_text', [])])
for child in children
])
if keyword.lower() in full_text.lower():
count += 1
print(f"{title}")
print(f" 文件: {obj.key}")
# 显示匹配上下文
idx = full_text.lower().find(keyword.lower())
if idx >= 0:
preview = full_text[max(0, idx-30):idx+50]
print(f" 📝 ...{preview}...")
print()
except Exception as e:
pass
print("="*70)
print(f"📊 找到 {count} 条相关内容")
if __name__ == '__main__':
import sys
if len(sys.argv) > 1:
cmd = sys.argv[1]
if cmd == 'download':
prefix = sys.argv[2] if len(sys.argv) > 2 else 'siyuan/'
download_all(prefix)
elif cmd == 'search':
if len(sys.argv) > 2:
keyword = ' '.join(sys.argv[2:])
search_content(keyword)
else:
print("用法: python oss_siyuan_reader.py search <关键词>")
else:
print("用法:")
print(" python oss_siyuan_reader.py # 读取并显示")
print(" python oss_siyuan_reader.py download # 下载到本地")
print(" python oss_siyuan_reader.py search <词> # 搜索内容")
else:
read_from_oss()