-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathservicer.py
46 lines (34 loc) · 1.38 KB
/
servicer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import re
from bs4 import BeautifulSoup
import json
# 打开文件
with open('flomo/index.html', 'r', encoding='utf-8') as f:
content = f.read()
soup = BeautifulSoup(content, 'html.parser')
memos = soup.find_all('div', class_='memo')
data = []
for memo in memos:
# 获取时间
memo_data = {'time': memo.find('div', class_='time').text}
# 获取标签
memo_copy = memo
tags = memo_copy.find('div', class_='content').find_all('p',
string=re.compile(
r'(?<![^\W_])(?:(?<=\s)|(?<=^))#[\S]*?(?=[\s.,;!?]|[^\w\s]|$)'))
answer = []
for tag in tags:
i = re.findall(r"#\w+", tag.text)
for j in i:
answer.append(j)
memo_data['tags'] = [tag.strip() for tag in answer] if answer else "None"
# 获取内容
content = memo.find('div', class_='content').find_all('p')
memo_data['content'] = [c.text.strip() for c in content] if content else 'None'
# 获取文件
files = memo.find('div', class_='files').find_all('img')
memo_data['filePath'] = [file['src'] for file in files] if files else "None"
data.append(memo_data)
# 写入文件
with open('flomo/myMemos.json', 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print("生成完毕 flomo/myMemos.json")