163 lines
5.6 KiB
Python
Executable File
163 lines
5.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
阿里云 OSS 思源笔记读取器
|
|
"""
|
|
|
|
import oss2
|
|
import json
|
|
import os
|
|
from datetime import datetime
|
|
|
|
# 阿里云 OSS 配置
|
|
OSS_CONFIG = {
|
|
'endpoint': 'http://oss-cn-beijing.aliyuncs.com',
|
|
'bucket_name': 'xkka-siyuan',
|
|
'access_key_id': 'LTAI5t8ssdvZgJsqEyHwzVVH',
|
|
'access_key_secret': '0kzY1vFyXaFNjtS4MBdwkNIcZjUmkA',
|
|
}
|
|
|
|
def read_from_oss():
|
|
"""从 OSS 读取笔记"""
|
|
|
|
# 初始化 OSS
|
|
auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
|
|
bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])
|
|
|
|
# 查找思源笔记文件 (通常在 siyuan/ 或 data/ 目录下)
|
|
prefixes = ['siyuan/', 'data/']
|
|
|
|
print("="*70)
|
|
print("📚 阿里云 OSS 思源笔记读取")
|
|
print(f"Bucket: {OSS_CONFIG['bucket_name']}")
|
|
print("="*70)
|
|
|
|
total_files = 0
|
|
|
|
for prefix in prefixes:
|
|
files = []
|
|
for obj in oss2.ObjectIterator(bucket, prefix=prefix):
|
|
if obj.key.endswith('.sy'):
|
|
files.append(obj.key)
|
|
|
|
if files:
|
|
print(f"\n📁 路径: {prefix}")
|
|
print(f"找到 {len(files)} 个 .sy 文件\n")
|
|
|
|
# 下载并读取文件
|
|
for file_key in files[:20]:
|
|
try:
|
|
file_stream = bucket.get_object(file_key)
|
|
content = file_stream.read()
|
|
data = json.loads(content.decode('utf-8'))
|
|
|
|
title = data.get('title', '无标题')
|
|
children = data.get('children', [])
|
|
|
|
print(f"📄 {title} ({len(children)} 块)")
|
|
|
|
# 显示前100字内容
|
|
preview = ""
|
|
for child in children[:3]:
|
|
block_type = child.get('type', '')
|
|
texts = child.get(block_type, {}).get('rich_text', [])
|
|
text = ''.join([t.get('plain_text', '') for t in texts])
|
|
if text.strip():
|
|
preview = text[:80] + "..."
|
|
break
|
|
|
|
if preview:
|
|
print(f" 📝 {preview}")
|
|
print()
|
|
|
|
total_files += 1
|
|
|
|
except Exception as e:
|
|
print(f"❌ 读取失败: {file_key}")
|
|
|
|
print("="*70)
|
|
print(f"📊 共 {total_files} 篇笔记")
|
|
print("="*70)
|
|
|
|
def download_all(prefix='siyuan/', local_path='/root/.openclaw/workspace/siyuan_oss/'):
|
|
"""下载所有笔记到本地"""
|
|
|
|
os.makedirs(local_path, exist_ok=True)
|
|
|
|
auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
|
|
bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])
|
|
|
|
count = 0
|
|
for obj in oss2.ObjectIterator(bucket, prefix=prefix):
|
|
if obj.key.endswith('.sy'):
|
|
local_file = os.path.join(local_path, os.path.basename(obj.key))
|
|
bucket.download_file(obj.key, local_file)
|
|
count += 1
|
|
|
|
print(f"✅ 下载完成: {count} 个文件")
|
|
print(f"📁 保存到: {local_path}")
|
|
|
|
def search_content(keyword):
|
|
"""搜索笔记内容"""
|
|
|
|
auth = oss2.Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
|
|
bucket = oss2.Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])
|
|
|
|
print(f"\n🔍 搜索关键词: {keyword}")
|
|
print("="*70)
|
|
|
|
count = 0
|
|
for obj in oss2.ObjectIterator(bucket, prefix='siyuan/'):
|
|
if obj.key.endswith('.sy'):
|
|
try:
|
|
file_stream = bucket.get_object(obj.key)
|
|
content = file_stream.read()
|
|
data = json.loads(content.decode('utf-8'))
|
|
|
|
title = data.get('title', '')
|
|
children = data.get('children', [])
|
|
|
|
full_text = title + ' ' + ' '.join([
|
|
''.join([t.get('plain_text', '') for t in child.get(child.get('type', ''), {}).get('rich_text', [])])
|
|
for child in children
|
|
])
|
|
|
|
if keyword.lower() in full_text.lower():
|
|
count += 1
|
|
print(f"✅ {title}")
|
|
print(f" 文件: {obj.key}")
|
|
|
|
# 显示匹配上下文
|
|
idx = full_text.lower().find(keyword.lower())
|
|
if idx >= 0:
|
|
preview = full_text[max(0, idx-30):idx+50]
|
|
print(f" 📝 ...{preview}...")
|
|
print()
|
|
|
|
except Exception as e:
|
|
pass
|
|
|
|
print("="*70)
|
|
print(f"📊 找到 {count} 条相关内容")
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
|
|
if len(sys.argv) > 1:
|
|
cmd = sys.argv[1]
|
|
if cmd == 'download':
|
|
prefix = sys.argv[2] if len(sys.argv) > 2 else 'siyuan/'
|
|
download_all(prefix)
|
|
elif cmd == 'search':
|
|
if len(sys.argv) > 2:
|
|
keyword = ' '.join(sys.argv[2:])
|
|
search_content(keyword)
|
|
else:
|
|
print("用法: python oss_siyuan_reader.py search <关键词>")
|
|
else:
|
|
print("用法:")
|
|
print(" python oss_siyuan_reader.py # 读取并显示")
|
|
print(" python oss_siyuan_reader.py download # 下载到本地")
|
|
print(" python oss_siyuan_reader.py search <词> # 搜索内容")
|
|
else:
|
|
read_from_oss()
|