-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmake_backup_md.py
76 lines (56 loc) · 2.06 KB
/
make_backup_md.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import datetime
import csv
import requests
from bs4 import BeautifulSoup
from decouple import config
import re
import time
import markdownify
credentials = config("ZENDESK_LOGIN"), config("ZENDESK_PASSWORD")
session = requests.Session()
session.auth = credentials
zendesk = config("ZENDESK_DOMAIN")
language = config("ZENDESK_LANGUAGE")
date = datetime.date.today()
backup_path = os.path.join('backups', str(date), language) + \
'_md_only'
if not os.path.exists(backup_path):
os.makedirs(backup_path)
log = []
# get the articles
endpoint = zendesk + \
'/api/v2/help_center/en-us/articles.json?sort_by=created_at&sort_order=asc'.format(
locale=language.lower())
while endpoint:
response = session.get(endpoint)
if response.status_code != 200:
print('Failed to retrieve articles with error {}'.format(
response.status_code))
exit()
data = response.json()
for article in data['articles']:
if article['body'] is None:
continue
title = '# ' + article['title'] + ''
filename = article['title'].replace(
'/', '|') + '_{id}.md'.format(id=article['id'])
print("processing {}".format(filename))
article_body = article['body']
soup = BeautifulSoup(article_body, 'html.parser')
img_tags = soup.find_all('img')
urls = [img['src'] for img in img_tags]
if not os.path.exists(backup_path):
os.makedirs(backup_path)
with open(os.path.join(backup_path, filename), mode='w', encoding='utf-8') as f:
markdown = markdownify.markdownify(
article_body, heading_style="ATX")
f.write(title + '\n' + markdown)
print('{id} copied!'.format(id=article['id']))
log.append((filename, article['title'], article['author_id']))
endpoint = data['next_page']
with open(os.path.join(backup_path, '_log.csv'), mode='wt', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(('File', 'Title', 'Author ID'))
for article in log:
writer.writerow(article)