Skip to content

Commit

Permalink
add dailywire
Browse files Browse the repository at this point in the history
  • Loading branch information
Ed Chalstrey authored and Ed Chalstrey committed Jun 13, 2019
1 parent 08772e7 commit a1452a1
Show file tree
Hide file tree
Showing 4 changed files with 1,303 additions and 0 deletions.
1 change: 1 addition & 0 deletions misinformation/middlewares/jsloadbuttonmiddleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(self):
'//button[@phx-track-id="load more"]',
'//form[@class="gdpr-form"]/input[@class="btn"]',
'//div[contains(@class, "load-btn")]/a',
'//ul[contains(@class, "pager--infinite-scroll")]/li/a'
]

def first_load_button_xpath(self):
Expand Down
23 changes: 23 additions & 0 deletions site_configs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,29 @@ dailykos.com:
select_expression: '//div[@class="story-intro story-content"]'
match_rule: 'single'

dailywire.com:
site_name: 'dailywire.com'
start_url: 'https://www.dailywire.com/'
crawl_strategy:
method: 'index_page'
index_page:
url_must_contain: 'dailywire.com'
url_must_not_contain: 'dailywire.com/'
article_links: '//article/div[contains(@class, "media")]/a'
article:
url_must_contain: '/news/'
byline:
select_method: 'xpath'
select_expression: '//div[@class="author-information"]/a/text()'
match_rule: 'first'
content:
select_method: 'xpath'
select_expression: '//div[@class="field-body"]'
match_rule: 'single'
remove_expressions:
- '//div[@id="powr_1"]'
- '//div[contains(@class, "ad-unit")]'

dallasobserver.com:
site_name: 'dallasobserver.com'
start_url: 'https://www.dallasobserver.com/topic/politics-7019564'
Expand Down
Loading

0 comments on commit a1452a1

Please sign in to comment.