-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmigrate_old_db_to_wordpress.py
191 lines (172 loc) · 7.6 KB
/
migrate_old_db_to_wordpress.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# -*- coding: UTF-8 -*-
import csv
import datetime
HEADER = """
<?xml version="1.0" encoding="UTF-8" ?>
<!-- This is a WordPress eXtended RSS file generated by WordPress as an export of your site. -->
<!-- It contains information about your site's posts, pages, comments, categories, and other content. -->
<!-- You may use this file to transfer that content from one site to another. -->
<!-- This file is not intended to serve as a complete backup of your site. -->
<!-- To import this information into a WordPress site follow these steps: -->
<!-- 1. Log in to that site as an administrator. -->
<!-- 2. Go to Tools: Import in the WordPress admin panel. -->
<!-- 3. Install the "WordPress" importer from the list. -->
<!-- 4. Activate & Run Importer. -->
<!-- 5. Upload this file using the form provided on that page. -->
<!-- 6. You will first be asked to map the authors in this export file to users -->
<!-- on the site. For each author, you may choose to map to an -->
<!-- existing user on the site or to create a new user. -->
<!-- 7. WordPress will then import each of the posts, pages, comments, categories, etc. -->
<!-- contained in this file into your site. -->
<!-- generator="WordPress/4.8.1" created="2017-09-03 13:19" -->
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/"
>
<channel>
<title>Роман Терен</title>
<link>http://wp.teren.org.ua</link>
<description>мизантроп, бумагомаратель, космополит</description>
<pubDate>Sun, 03 Sep 2017 13:19:07 +0000</pubDate>
<language>en-US</language>
<wp:wxr_version>1.2</wp:wxr_version>
<wp:base_site_url>http://wp.teren.org.ua</wp:base_site_url>
<wp:base_blog_url>http://wp.teren.org.ua</wp:base_blog_url>
<wp:author><wp:author_id>1</wp:author_id><wp:author_login><![CDATA[teren]]></wp:author_login><wp:author_email><![CDATA[maxim.galushka@mail.ru]]></wp:author_email><wp:author_display_name><![CDATA[teren]]></wp:author_display_name><wp:author_first_name><![CDATA[]]></wp:author_first_name><wp:author_last_name><![CDATA[]]></wp:author_last_name></wp:author>
<generator>https://wordpress.org/?v=4.8.1</generator>
"""
MAIN = """
<item>
<title>{top_page}</title>
<link>http://wp.teren.org.ua/{token}/</link>
<pubDate>Sun, 03 Sep 2017 13:15:31 +0000</pubDate>
<dc:creator><![CDATA[teren]]></dc:creator>
<guid isPermaLink="false">http://wp.teren.org.ua/?page_id={top_id}</guid>
<description></description>
<content:encoded><![CDATA[]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>{top_id}</wp:post_id>
<wp:post_date><![CDATA[2017-09-03 13:15:31]]></wp:post_date>
<wp:post_date_gmt><![CDATA[2017-09-03 13:15:31]]></wp:post_date_gmt>
<wp:comment_status><![CDATA[closed]]></wp:comment_status>
<wp:ping_status><![CDATA[closed]]></wp:ping_status>
<wp:post_name><![CDATA[prose]]></wp:post_name>
<wp:status><![CDATA[publish]]></wp:status>
<wp:post_parent>0</wp:post_parent>
<wp:menu_order>0</wp:menu_order>
<wp:post_type><![CDATA[page]]></wp:post_type>
<wp:post_password><![CDATA[]]></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
</item>
"""
YEAR = """
<item>
<title>{year_title}</title>
<link>http://wp.teren.org.ua/{top_token}/{year}</link>
<pubDate>Sun, 03 Sep {year} 13:00:00 +0000</pubDate>
<dc:creator><![CDATA[teren]]></dc:creator>
<guid isPermaLink="false">http://wp.teren.org.ua/?page_id={year_id}</guid>
<description></description>
<content:encoded><![CDATA[]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>{year_id}</wp:post_id>
<wp:post_date><![CDATA[{year}-09-03 13:15:31]]></wp:post_date>
<wp:post_date_gmt><![CDATA[{year}-09-03 13:15:31]]></wp:post_date_gmt>
<wp:comment_status><![CDATA[closed]]></wp:comment_status>
<wp:ping_status><![CDATA[closed]]></wp:ping_status>
<wp:post_name><![CDATA[{year_title}]]></wp:post_name>
<wp:status><![CDATA[publish]]></wp:status>
<wp:post_parent>{top_id}</wp:post_parent>
<wp:menu_order>0</wp:menu_order>
<wp:post_type><![CDATA[page]]></wp:post_type>
<wp:post_password><![CDATA[]]></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
</item>
"""
FOOTER = """
</channel>
</rss>
"""
# Sun, 03 Sep 2017 13:18:45 +0000
ITEM = """
<item>
<title>{title}</title>
<link>http://wp.teren.org.ua/{top_token}/{year}/{post_token}</link>
<pubDate>{post_date} 12:00:00 +0000</pubDate>
<dc:creator><![CDATA[teren]]></dc:creator>
<guid isPermaLink="false">http://wp.teren.org.ua/?page_id={post_id}</guid>
<description></description>
<content:encoded><![CDATA[{content}<br/><br/><subscript>]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>{post_id}</wp:post_id>
<wp:post_date><![CDATA[{post_date} 12:00:00]]></wp:post_date>
<wp:post_date_gmt><![CDATA[{post_date} 12:00:00]]></wp:post_date_gmt>
<wp:comment_status><![CDATA[opened]]></wp:comment_status>
<wp:ping_status><![CDATA[closed]]></wp:ping_status>
<wp:post_name><![CDATA[{post_token}]]></wp:post_name>
<wp:status><![CDATA[publish]]></wp:status>
<wp:post_parent>{parent_id}</wp:post_parent>
<wp:menu_order>0</wp:menu_order>
<wp:post_type><![CDATA[page]]></wp:post_type>
<wp:post_password><![CDATA[]]></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
</item>
"""
with open('/cygdrive/c/wamp/www/teren-website/rt_texts.csv', 'r') as infile:
with open('/cygdrive/c/wamp/www/teren-website/rt_texts_out_all.xml', 'w') as outfile:
outfile.write(HEADER)
tops = {'poetry': 'Поэзия', 'prose': 'Проза', 'drama': 'Драматургия'}
top_ids = {'poetry': 1, 'prose': 2, 'drama': 3}
top_token_ids = {1: 'poetry', 2: 'prose', 3: 'drama'}
main_id = 100000
year_id = 10000
post_id = 1
year_ids = {}
for top_token, top_title in tops.items():
main_id += 1
outfile.write(MAIN.format(
top_page=top_title,
token=top_token,
top_id=main_id,
))
for year in range(2003,2018):
year_id += 1
outfile.write(YEAR.format(
year_title='{year} - {title}'.format(year=year, title=top_title.lower()),
year=year,
year_id=year_id,
top_token=top_token,
top_id=main_id,
))
if top_ids[top_token] not in year_ids:
year_ids[top_ids[top_token]] = {}
year_ids[top_ids[top_token]][year] = year_id
for row in csv.reader(iter(infile.readline, '')):
genre_id = int(row[1])
d = '2006-01-01'
year = '2006'
try:
parsed_date = datetime.datetime.strptime(row[5], "%d/%m/%Y")
d = parsed_date.strftime("%Y-%m-%d")
year = parsed_date.strftime("%Y")
except:
pass
parent_id = 2006
if int(year) in year_ids[genre_id]:
parent_id=year_ids[genre_id][int(year)]
post_id += 1
outfile.write(ITEM.format(
title=row[2],
top_token=top_token_ids[genre_id],
year=year,
post_date=d,
content=row[8],
post_id=post_id,
post_token=row[4],
parent_id=parent_id,
subscript=row[6],
))
outfile.write(FOOTER)