wsj: remove WSJ from supported platforms (#194)

* wsj: disable downloader command and url matching function * readme: remove wsj from supported sites 😭 * workflows: remove WSJ tests from status check
thisisparker · Jul 7, 2024 · f17a2ae · f17a2ae
1 parent 1fd662c
commit f17a2ae
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 21 deletions.
diff --git a/.github/workflows/status-check-outlets.yml b/.github/workflows/status-check-outlets.yml
@@ -169,24 +169,6 @@ jobs:
       - name: Test Vox
         if: '!cancelled()'
         run: xword-dl vox
-      - name: Test WSJ latest
-        if: '!cancelled()'
-        env:
-          DATADOME_COOKIE: ${{ secrets.DATADOME_COOKIE }}
-        run: |
-          xword-dl wsj --settings '{"cookies": {"datadome":"'$DATADOME_COOKIE'"}}'
-      - name: Test WSJ by URL
-        if: '!cancelled()'
-        env:
-          DATADOME_COOKIE: ${{ secrets.DATADOME_COOKIE }}
-        run: |
-          xword-dl --settings '{"cookies": {"datadome":"'$DATADOME_COOKIE'"}}' "https://www.wsj.com/articles/carbon-neutral-saturday-crossword-january-15-11642193133"
-      - name: Test WSJ Friday contest
-        if: '!cancelled()'
-        env:
-          DATADOME_COOKIE: ${{ secrets.DATADOME_COOKIE }}
-        run: |
-          xword-dl --settings '{"cookies": {"datadome":"'$DATADOME_COOKIE'"}}' "https://www.wsj.com/articles/hitting-the-high-notes-friday-crossword-january-5-0f18d7c1"
       - name: Test Washington Post latest
         if: '!cancelled()'
         run: xword-dl wp

diff --git a/README.md b/README.md
@@ -31,7 +31,6 @@ Supported outlets:
 |*Universal*|`uni`|✔️|✔️||
 |*USA Today*|`usa`|✔️|✔️||
 |*Vox*|`vox`|✔️|||
-|*Wall Street Journal*|`wsj`|✔️||✔️|
 |*Washington Post*|`wp`|✔️|✔️||
 
 To download a puzzle, install `xword-dl` and run it on the command line.

diff --git a/xword_dl/downloader/wsjdownloader.py b/xword_dl/downloader/wsjdownloader.py
@@ -8,7 +8,11 @@
 from ..util import XWordDLException
 
 class WSJDownloader(BaseDownloader):
-    command = 'wsj'
+#   Disabling this downloader for now (2024-07-07) because anti-scraping tech
+#   is preventing it from working. Hopefully we'll find a workaround or a
+#   a satisfactory mechanism for getting browser cookies in at runtime.
+#   Tracking issue: https://github.com/thisisparker/xword-dl/issues/178
+#   command = 'wsj'
     outlet = 'Wall Street Journal'
     outlet_prefix = 'WSJ'
 
@@ -17,7 +21,7 @@ def __init__(self, **kwargs):
 
     @staticmethod
     def matches_url(url_components):
-        return 'wsj.com' in url_components.netloc
+        return False # disabling, see above # 'wsj.com' in url_components.netloc
 
     def find_latest(self):
         url = "https://www.wsj.com/news/puzzle"