Skip to content
This repository has been archived by the owner on Apr 20, 2021. It is now read-only.

Commit

Permalink
feat: Select whether log can be logged
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhaoQi99 committed Oct 14, 2018
1 parent 35e6196 commit 0afacec
Show file tree
Hide file tree
Showing 10 changed files with 51 additions and 36 deletions.
3 changes: 2 additions & 1 deletion Pydoc/configs.html
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
<strong>EMAIL_PORT</strong> = 0<br>
<strong>EMAIL_SERVER</strong> = ''<br>
<strong>FROM_ADDR</strong> = ''<br>
<strong>LOG_ENABLED</strong> = True<br>
<strong>PASSWORD</strong> = ''<br>
<strong>SCHOOL_NAME</strong> = ''<br>
<strong>SPIDER_CONFIG</strong> = [{'coding': '', 'rule': '', 'subject_CN': '', 'subject_EN': '', 'url': '', 'url_main': ''}]<br>
<strong>SPIDER_CONFIG</strong> = [{'coding': '', 'rule': '', 'subject_CN': '', 'subject_EN': '', 'url': '', 'url_main': ''}, {'coding': '', 'rule': '', 'subject_CN': '', 'subject_EN': '', 'url': '', 'url_main': ''}]<br>
<strong>TWILIO_NUMBER</strong> = ''<br>
<strong>VERSION</strong> = ''</td></tr></table>
</body></html>
4 changes: 2 additions & 2 deletions Pydoc/send.html
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
<font color="#ffffff" face="helvetica, arial"><big><strong>Functions</strong></big></font></td></tr>

<tr><td bgcolor="#eeaa77"><tt>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt></td><td>&nbsp;</td>
<td width="100%"><dl><dt><a name="-Send"><strong>Send</strong></a>(msgs, subject, send_number, to_addr_str, flag=1)</dt><dd><tt>向手机号码为send_number的人发送通知信息<br>
<td width="100%"><dl><dt><a name="-Send"><strong>Send</strong></a>(msgs, subject, send_number, to_addr_str, flag=True)</dt><dd><tt>向手机号码为send_number的人发送通知信息<br>
向to_addr_str中的邮箱地址发送主题为subject的通知信息<br>
支持是否写入日志记录的选择&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br>
Expand All @@ -40,7 +40,7 @@
&nbsp;&nbsp;&nbsp;&nbsp;send_number:&nbsp;短信接收者的手机号码<br>
&nbsp;&nbsp;&nbsp;&nbsp;to_addr_str:&nbsp;收件人的邮箱地址,多个邮箱地址之间应以','分割,类型为字符串<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;例如:'example@qq.com','example1@qq.com,example2@qq.com'<br>
&nbsp;&nbsp;&nbsp;&nbsp;flag:&nbsp;一个可选变量,用来决定是否在发送日志中记录此次发送信息,默认为1(记录)</tt></dd></dl>
&nbsp;&nbsp;&nbsp;&nbsp;flag:&nbsp;一个可选变量,用来决定是否在发送日志中记录此次发送信息,默认为True(记录)</tt></dd></dl>
<dl><dt><a name="-Send_email"><strong>Send_email</strong></a>(txt, to_addr_str, subject)</dt><dd><tt>向to_addr_str中的邮箱地址发送主题为subject,正文部分为txt的邮件<br>
支持多人同时发送<br>
Args:<br>
Expand Down
4 changes: 2 additions & 2 deletions Pydoc/spider.html
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
','师大主页暂无新通知!<br>
'<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;[['关于xxx的通知','2017-03-10','<a href="http://xxxx.com'],['关于xxx的通知','2017-03-10','http://xxxx.com">http://xxxx.com'],['关于xxx的通知','2017-03-10','http://xxxx.com</a>']]</tt></dd></dl>
<dl><dt><a name="-Spider"><strong>Spider</strong></a>(url, url_main, rule, subject_CN, subject_EN, coding, flag=1)</dt><dd><tt>爬取url的源码,并从中按照rule提供的正则表达式规则提取有用信息,并对数据进行处理,<br>
<dl><dt><a name="-Spider"><strong>Spider</strong></a>(url, url_main, rule, subject_CN, subject_EN, coding, flag=True)</dt><dd><tt>爬取url的源码,并从中按照rule提供的正则表达式规则提取有用信息,并对数据进行处理,<br>
生成通知提醒的内容,在subject_EN+'_log.md'文件中记录日志,<br>
返回检查更新的状态码,以及通知提醒的内容<br>
若无新通知,则通知提醒的内容为空<br>
Expand All @@ -73,7 +73,7 @@
&nbsp;&nbsp;&nbsp;&nbsp;rule:&nbsp;表示正则表达式规则的字符串,限制为三个分组,用于从源码中提取信息<br>
&nbsp;&nbsp;&nbsp;&nbsp;subject_CN:&nbsp;抓取的网站类型<br>
&nbsp;&nbsp;&nbsp;&nbsp;subject_EN:&nbsp;生成的日志文件的文件名前缀,数据文件的文件名,以及输出时显示在单条日志信息前的对日志类型的描述<br>
&nbsp;&nbsp;&nbsp;&nbsp;flag:&nbsp;一个可选变量,用来决定是否在日志中记录此次检查的结果,默认为1(记录)<br>
&nbsp;&nbsp;&nbsp;&nbsp;flag:&nbsp;一个可选变量,用来决定是否在日志中记录此次检查的结果,默认为True(记录)<br>
&nbsp;&nbsp;&nbsp;&nbsp;<br>
Returns:<br>
&nbsp;&nbsp;&nbsp;&nbsp;status:&nbsp;检查更新的状态码<br>
Expand Down
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
- [x] 通知更新邮件提醒
- [x] 多人同时进行提醒
- [x] 发送日志、更新检查日志
- [ ] 日志功能可选
- [x] 日志功能可选
- [ ] 每个用户要提醒的部门可选
- [ ] 数据库存储

Expand Down Expand Up @@ -58,6 +58,11 @@ AUTHOR_NAME = ''
AUTHOR_EMAIL = ''
```

### Log Config
是否在日志文件中记录日志,默认为True
```
LOG_ENABLED = True
```
### Spider Config
爬虫的相关配置,包括:部门类型(EN),部门类型(CN)中,"更多通知"页的链接,链接的公共部分,正则表达式,网页编码格式
subject_EN:数据文件的文件名
Expand Down Expand Up @@ -86,9 +91,11 @@ SPIDER_CONFIG = [
]
```
这里有一个爬虫配置的例子:
### Examples
#### Example
```python
SPIDER_CONFIG = [{'subject_EN':'snnu_index', 'subject_CN':'师大主页', 'url': 'http://www.snnu.edu.cn/tzgg.htm', 'url_main' : 'http://www.snnu.edu.cn/info/1085/',
SPIDER_CONFIG = [
{
'subject_EN':'snnu_index', 'subject_CN':'师大主页', 'url': 'http://www.snnu.edu.cn/tzgg.htm', 'url_main' : 'http://www.snnu.edu.cn/info/1085/',
'rule' : 'info/1085/(?P<link>\d+\.htm)" target="_blank">(?P<title>[\s\S]{5,100})((?P<date>\d*-\d*-\d*))','coding':'utf-8'},
{'subject_EN':'snnu_css', 'subject_CN':'计科院主页', 'url': 'http://ccs.snnu.edu.cn/tzgg.htm', 'url_main' : 'http://ccs.snnu.edu.cn/'
, 'rule' : '<a target="_blank" href="(?P<link>[^"]*)">(?P<title>[^( </)]*)[^"]*"[^>]*>(?P<date>\d*-\d*-\d*)','coding':'utf-8'},
Expand Down
3 changes: 3 additions & 0 deletions src/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
EMAIL_PORT = 0
EMAIL_SERVER = ''

# Log Config
LOG_ENABLED = True

# spider config
SPIDER_CONFIG = [
{
Expand Down
4 changes: 2 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ def main():
for dic in configs.SPIDER_CONFIG:
try:
status, new_data = Spider(dic['url'], dic['url_main'], dic['rule'], dic['subject_CN'],
dic['subject_EN'], dic['coding'])
dic['subject_EN'], dic['coding'],configs.LOG_ENABLED)
if status >= 1:
send.Send(new_data, dic['subject_CN'], send_number, to_addr_str)
send.Send(new_data, dic['subject_CN'], send_number, to_addr_str,configs.LOG_ENABLED)
except Exception as e:
print('Exception: ', e)
Error_log = '异常信息如下:\n' + format_exc() + '-' * 70 + '\n'
Expand Down
6 changes: 3 additions & 3 deletions src/send.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def Send_email(txt, to_addr_str, subject):
return log_send_email


def Send(msgs, subject, send_number, to_addr_str, flag=1):
def Send(msgs, subject, send_number, to_addr_str, flag=True):
'''
向手机号码为send_number的人发送通知信息
向to_addr_str中的邮箱地址发送主题为subject的通知信息
Expand All @@ -114,7 +114,7 @@ def Send(msgs, subject, send_number, to_addr_str, flag=1):
send_number: 短信接收者的手机号码
to_addr_str: 收件人的邮箱地址,多个邮箱地址之间应以','分割,类型为字符串
例如:'example@qq.com','example1@qq.com,example2@qq.com'
flag: 一个可选变量,用来决定是否在发送日志中记录此次发送信息,默认为1(记录)
flag: 一个可选变量,用来决定是否在发送日志中记录此次发送信息,默认为True(记录)
'''
temp = ''
log_send = []
Expand All @@ -130,5 +130,5 @@ def Send(msgs, subject, send_number, to_addr_str, flag=1):

log_send.append(log_send_sms)
log_send.append(log_send_email)
if(flag == 1):
if(flag == True):
Log_Write('Send', log_send)
6 changes: 3 additions & 3 deletions src/spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def Log_generate(status, data, subject_CN):
return log_txt


def Spider(url, url_main, rule, subject_CN, subject_EN, coding, flag=1):
def Spider(url, url_main, rule, subject_CN, subject_EN, coding, flag=True):
'''
爬取url的源码,并从中按照rule提供的正则表达式规则提取有用信息,并对数据进行处理,
生成通知提醒的内容,在subject_EN+'_log.md'文件中记录日志,
Expand All @@ -139,7 +139,7 @@ def Spider(url, url_main, rule, subject_CN, subject_EN, coding, flag=1):
rule: 表示正则表达式规则的字符串,限制为三个分组,用于从源码中提取信息
subject_CN: 抓取的网站类型
subject_EN: 生成的日志文件的文件名前缀,数据文件的文件名,以及输出时显示在单条日志信息前的对日志类型的描述
flag: 一个可选变量,用来决定是否在日志中记录此次检查的结果,默认为1(记录)
flag: 一个可选变量,用来决定是否在日志中记录此次检查的结果,默认为True(记录)
Returns:
status: 检查更新的状态码
Expand All @@ -151,7 +151,7 @@ def Spider(url, url_main, rule, subject_CN, subject_EN, coding, flag=1):
status, new_data = Data_processing(subject_EN, data_use, url_main)

log_txt = Log_generate(status, new_data, subject_CN)
if flag == 1:
if flag == True:
tool.Log_Write(subject_EN, log_txt)
return status, new_data

41 changes: 22 additions & 19 deletions test/test_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,45 +20,48 @@
EMAIL_PORT = 0
EMAIL_SERVER = ''

# Log Config
LOG_ENABLED = True

# spider config
SPIDER_CONFIG = [
{
'subject_EN':'snnu_index',
'subject_CN':'师大主页',
'url': 'http://www.snnu.edu.cn/tzgg.htm',
'subject_EN':'snnu_index',
'subject_CN':'师大主页',
'url': 'http://www.snnu.edu.cn/tzgg.htm',
'url_main' : 'http://www.snnu.edu.cn/info/1085/',
'rule' : 'info/1085/(?P<link>\d+\.htm)" target="_blank">(?P<title>[\s\S]{5,100})((?P<date>\d*-\d*-\d*))',
'rule' : 'info/1085/(?P<link>\d+\.htm)" target="_blank">(?P<title>[\s\S]{5,100})((?P<date>\d*-\d*-\d*))',
'coding':'utf-8'
},
{
'subject_EN':'snnu_css',
'subject_CN':'计科院主页',
'subject_EN':'snnu_css',
'subject_CN':'计科院主页',
'url': 'http://ccs.snnu.edu.cn/tzgg.htm', 'url_main' : 'http://ccs.snnu.edu.cn/',
'rule' : '<a target="_blank" href="(?P<link>[^"]*)">(?P<title>[^( </)]*)[^"]*"[^>]*>(?P<date>\d*-\d*-\d*)',
'rule' : '<a target="_blank" href="(?P<link>[^"]*)">(?P<title>[^( </)]*)[^"]*"[^>]*>(?P<date>\d*-\d*-\d*)',
'coding':'utf-8'
},
{
'subject_EN':'snnu_jwc',
'subject_CN':'教务处主页',
'url': 'http://jwc.snnu.edu.cn/news_more.xhm?lm=2',
'subject_EN':'snnu_jwc',
'subject_CN':'教务处主页',
'url': 'http://jwc.snnu.edu.cn/news_more.xhm?lm=2',
'url_main' : 'http://jwc.snnu.edu.cn/html/news_view.xhm?newsid=',
'rule' : 'newsid=(?P<link>\d*)" [^ ]* title="(?P<title>[^(">)]*)[^<]*[^(]*\((?P<date>\d*/\d*/\d*)',
'rule' : 'newsid=(?P<link>\d*)" [^ ]* title="(?P<title>[^(">)]*)[^<]*[^(]*\((?P<date>\d*/\d*/\d*)',
'coding':'gbk'
},
{
'subject_EN':'snnu_xsc',
'subject_CN':'学生处主页',
'url': 'http://www.xsc.snnu.edu.cn/Announcements.asp',
'subject_EN':'snnu_xsc',
'subject_CN':'学生处主页',
'url': 'http://www.xsc.snnu.edu.cn/Announcements.asp',
'url_main' : 'http://www.xsc.snnu.edu.cn/Announcements.asp?id=144&bh=',
'rule' : 'gk3">(?P<date>\d*-\d*-\d*)[^;]*;[^;]*;[^;]*;[^;]*;bh=(?P<link>\d*)[^>]*>(?P<title>[^</]*)',
'rule' : 'gk3">(?P<date>\d*-\d*-\d*)[^;]*;[^;]*;[^;]*;[^;]*;bh=(?P<link>\d*)[^>]*>(?P<title>[^</]*)',
'coding':'gbk'
},
{
'subject_EN':'snnu_lib',
'subject_CN':'图书馆主页',
'url': 'http://www.lib.snnu.edu.cn/action.do?webid=w-d-bggg-l',
'subject_EN':'snnu_lib',
'subject_CN':'图书馆主页',
'url': 'http://www.lib.snnu.edu.cn/action.do?webid=w-d-bggg-l',
'url_main' : 'http://www.lib.snnu.edu.cn/action.do?webid=w-l-showmsg&gtype=a&pid=',
'rule' : 'pid=(?P<link>\d*)[\s\S]{20,57}>(?P<title>[^<]*)</[af][\S\s]{18,70}(?P<date>\d{4}-\d*-\d*)',
'rule' : 'pid=(?P<link>\d*)[\s\S]{20,57}>(?P<title>[^<]*)</[af][\S\s]{18,70}(?P<date>\d{4}-\d*-\d*)',
'coding':'utf-8'
}
]
3 changes: 2 additions & 1 deletion test/test_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from traceback import format_exc
from tool import *
from test_configs import *
from configs import LOG_ENABLED


def test_re_group():
Expand Down Expand Up @@ -65,7 +66,7 @@ def test_Spider():
for dic in spider_list:
try:
status, data = Spider(dic['url'], dic['url_main'], dic['rule'], dic['subject_CN'],
dic['subject_EN'], dic['coding'])
dic['subject_EN'], dic['coding'],LOG_ENABLED)
print(status)
print(data)
except Exception as e:
Expand Down

0 comments on commit 0afacec

Please sign in to comment.