-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain-mysql.py
89 lines (78 loc) · 3.42 KB
/
main-mysql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2017 Pier Alberto <pieralbertopierini@gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
# Built on LinuxMint 18.3
from bs4 import BeautifulSoup
import urllib2
import MySQLdb
import sys
# Reload sys and encoding to prevent errors
reload(sys)
sys.setdefaultencoding('utf-8')
db = MySQLdb.connect(host="localhost", # your host
user="username", # username
passwd="password", # password
db="database_name") # name of the database
# Create a Cursor object to execute queries.
cursor = db.cursor()
wiki = "http://en.wikipedia.org/wiki/List_of_districts_of_Costa_Rica"
header = {'User-Agent': 'Mozilla/5.0'} #Needed to prevent 403 error on Wikipedia
req = urllib2.Request(wiki,headers=header)
page = urllib2.urlopen(req)
soup = BeautifulSoup(page)
province = ""
canton = ""
district = ""
table = soup.find("table", { "class" : "wikitable sortable" })
for row in table.findAll("tr"):
cells = row.findAll("td")
#For each "tr", assign each "td" to a variable.
if len(cells) == 4:
try:
# Insert Provincia
province = cells[0].find(text=True)
sql_province = "INSERT INTO Provincia (Provincia) SELECT * FROM (SELECT '%s') AS tmp WHERE NOT EXISTS (SELECT Provincia FROM Provincia WHERE Provincia = '%s') LIMIT 1" %(province, province)
# Execute the SQL command
cursor.execute(sql_province)
# Insert Canton
canton = cells[1].find(text=True)
search_provincia_id = "SELECT id FROM Provincia WHERE Provincia = '%s'" %(province)
cursor.execute(search_provincia_id)
data_provincia_id = cursor.fetchone()
provinciaid = data_provincia_id[0]
sql_canton = "INSERT INTO Cantón (Cantón, ProvinciaID) SELECT * FROM (SELECT '%s', '%s') AS tmp WHERE NOT EXISTS (SELECT Cantón FROM Cantón WHERE Cantón = '%s') LIMIT 1" %(canton, provinciaid, canton)
# Execute the sql command
cursor.execute(sql_canton)
# Insert District
district = cells[2].find(text=True)
search_canton_id = "SELECT id FROM Cantón WHERE Cantón = '%s'" %(canton)
cantonid = cursor.execute(search_canton_id)
data_canton_id = cursor.fetchone()
cantonid = data_canton_id[0]
sql_distrito = "INSERT INTO Distrito (Distrito, CantónID) VALUES ('%s', '%s')" %(district, cantonid)
# Execute the sql Coammand
cursor.execute(sql_distrito)
# Commit your changes in the database
db.commit()
except:
# Rollback in case there is any error
db.rollback()
# disconect from server
db.close()