Skip to content

Commit

Permalink
add dailywire
Browse files Browse the repository at this point in the history
  • Loading branch information
Ed Chalstrey authored and Ed Chalstrey committed Jun 13, 2019
1 parent a306977 commit 22dcdab
Show file tree
Hide file tree
Showing 4 changed files with 1,304 additions and 1 deletion.
3 changes: 2 additions & 1 deletion misinformation/middlewares/jsloadbuttonmiddleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def __init__(self):
'//button[text()="Load More"]',
'//button[contains(@class, "show-more")]',
'//button[@phx-track-id="load more"]',
'//form[@class="gdpr-form"]/input[@class="btn"]'
'//form[@class="gdpr-form"]/input[@class="btn"]',
'//ul[contains(@class, "pager--infinite-scroll")]/li/a'
]

def first_load_button_xpath(self):
Expand Down
23 changes: 23 additions & 0 deletions site_configs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,29 @@ dailykos.com:
select_expression: '//div[@class="story-intro story-content"]'
match_rule: 'single'

dailywire.com:
site_name: 'dailywire.com'
start_url: 'https://www.dailywire.com/'
crawl_strategy:
method: 'index_page'
index_page:
url_must_contain: 'dailywire.com'
url_must_not_contain: 'dailywire.com/'
article_links: '//article/div[contains(@class, "media")]/a'
article:
url_must_contain: '/news/'
byline:
select_method: 'xpath'
select_expression: '//div[@class="author-information"]/a/text()'
match_rule: 'first'
content:
select_method: 'xpath'
select_expression: '//div[@class="field-body"]'
match_rule: 'single'
remove_expressions:
- '//div[@id="powr_1"]'
- '//div[contains(@class, "ad-unit")]'

dallasobserver.com:
site_name: 'dallasobserver.com'
start_url: 'https://www.dallasobserver.com/topic/politics-7019564'
Expand Down
Loading

0 comments on commit 22dcdab

Please sign in to comment.