Skip to content

Commit

Permalink
Merge pull request #208 from alan-turing-institute/website/191-abqjou…
Browse files Browse the repository at this point in the history
…rnal

add abqjournal
  • Loading branch information
edwardchalstrey1 authored Jun 13, 2019
2 parents a306977 + 1e5a664 commit 08772e7
Show file tree
Hide file tree
Showing 6 changed files with 1,821 additions and 1 deletion.
3 changes: 2 additions & 1 deletion misinformation/middlewares/jsloadbuttonmiddleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def __init__(self):
'//button[text()="Load More"]',
'//button[contains(@class, "show-more")]',
'//button[@phx-track-id="load more"]',
'//form[@class="gdpr-form"]/input[@class="btn"]'
'//form[@class="gdpr-form"]/input[@class="btn"]',
'//div[contains(@class, "load-btn")]/a',
]

def first_load_button_xpath(self):
Expand Down
30 changes: 30 additions & 0 deletions site_configs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,36 @@ abcnews.go.com:
- '//div[contains(@class, "callout")]'
- '//figure[contains(@class, "e_image")]'

abqjournal.com:
site_name: 'abqjournal.com'
start_url: 'https://www.abqjournal.com/category/politics'
crawl_strategy:
method: 'index_page'
index_page:
url_must_contain: '/category/politics/page/'
article_links: '//div[@class="primary page-post-list"]/ul[@id="post-list"]/li/a'
article:
byline:
select_method: 'xpath'
select_expression: '//meta[@name="author"]/@content'
match_rule: 'single'
publication_datetime:
select_method: 'xpath'
select_expression: '//header[@class="entry-title"]/section[@class="entry-meta"]/h6//text()'
match_rule: 'last'
datetime_formats:
- 'dddd, MMMM Do, YYYY [at] h:mmA'
content:
select_method: 'xpath'
select_expression: '//div[@class="entry"]'
match_rule: 'single'
remove_expressions:
- '//div[contains(@class, "advertisement")]'
- '//div[contains(@id, "story-ad-0-skip-target")]'
- '//div[contains(@id, "author-contact-bottom")]'
- '//p[contains(@id, "g-trickster")]'
- '//p[@class="wp-caption-text"]'

addictinginfo.com:
site_name: 'addictinginfo.com'
start_url: 'http://addictinginfo.com/category/news/'
Expand Down
Loading

0 comments on commit 08772e7

Please sign in to comment.