-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget-subcategories.user.js
149 lines (130 loc) Β· 5.24 KB
/
get-subcategories.user.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
// ==UserScript==
// @name Get subcategories
// @namespace http://tampermonkey.net/
// @version 0.1
// @description Bring all articles from subcategories onto this category page
// @author Josh Parker
// @source https://github.com/joshparkerj/silly-internet-tricks/blob/main/wikipedia/get-subcategories.user.js
// @downloadURL https://gist.github.com/joshparkerj/4d8f488ecd0912680635483a8eb97bf6/raw/get-subcategories.user.js
// @updateURL https://gist.github.com/joshparkerj/4d8f488ecd0912680635483a8eb97bf6/raw/get-subcategories.meta.js
// @match https://en.wikipedia.org/wiki/Category:*
// @icon https://www.google.com/s2/favicons?domain=wikipedia.org
// @grant none
// ==/UserScript==
import getCategoryArea from './get-category-area';
(function subcategoryTreeUserScript() {
const DEFAULT_DEPTH = 3;
const DEFAULT_MAX = 60;
const getSubcategoryPagesButton = document.createElement('button');
getSubcategoryPagesButton.id = 'get-subcategory-pages';
const buttonText = (depth, max) => `get subcategory pages (up to ${depth} layer${
+depth === 1 ? '' : 's'
} deep; max ${max} subcategories)`;
getSubcategoryPagesButton.innerText = buttonText(DEFAULT_DEPTH, DEFAULT_MAX);
const layersInput = document.createElement('input');
layersInput.id = 'layers';
layersInput.type = 'number';
layersInput.value = DEFAULT_DEPTH;
const maxInput = document.createElement('input');
maxInput.id = 'max';
maxInput.type = 'number';
maxInput.value = DEFAULT_MAX;
const changeListener = () => {
getSubcategoryPagesButton.innerText = buttonText(layersInput.value, maxInput.value);
};
layersInput.addEventListener('change', changeListener);
maxInput.addEventListener('change', changeListener);
let count = 0;
const disabledButtonText = () => {
count += 1;
return `${count} subcategories found`;
};
const parser = new DOMParser();
const categoryArea = getCategoryArea();
let maxSubcategories;
const alreadyRetrieved = new Set();
const getAll = function getAll(doc, docHref) {
return new Promise((resolve) => {
let href = docHref;
if (!href) {
const docLinks = [...doc.querySelectorAll('#mw-pages > a')].filter((a) => a.textContent.includes('next'));
href = docLinks.length > 0 ? docLinks[0].href : null;
}
if (!href) {
resolve();
return;
}
const docCategoryPages = doc.querySelector('div#mw-pages');
const docCategoryArea = [...docCategoryPages.childNodes].find(
(node) => node.className === 'mw-content-ltr',
);
fetch(href)
.then((r) => r.text())
.then((text) => parser.parseFromString(text, 'text/html'))
.then((nextDoc) => {
const nextDocCategoryArea = nextDoc.querySelector('#mw-pages > .mw-content-ltr').innerHTML;
docCategoryArea.innerHTML += `<hr>${nextDocCategoryArea}`;
const nextDocLinks = [...nextDoc.querySelectorAll('#mw-pages > a')].filter((a) => a.textContent.includes('next'));
const nextDocHref = nextDocLinks.length > 0 ? nextDocLinks[0].href : null;
if (nextDocHref) {
getAll(doc, nextDocHref).then(() => resolve());
} else {
resolve();
}
});
});
};
const traverseTree = function traverseTree(url, depth, parent) {
if (!url || depth < 1) {
return;
}
fetch(url)
.then((r) => r.text())
.then((text) => parser.parseFromString(text, 'text/html'))
.then((doc) => {
getAll(doc).then(() => {
const title = doc.querySelector('h1#firstHeading').innerText.replace('Category:', '');
categoryArea.appendChild(document.createElement('hr'));
const subcategoryTitle = document.createElement('h3');
subcategoryTitle.classList.add('subcategory-title');
subcategoryTitle.appendChild(new Text(`${title} (parent category: ${parent})`));
categoryArea.appendChild(subcategoryTitle);
const docPages = doc.querySelector('#mw-pages > .mw-content-ltr');
if (docPages) {
categoryArea.innerHTML += docPages.innerHTML;
} else {
categoryArea.innerHTML += '<p>no pages</p>';
}
const children = [...doc.querySelectorAll('div#mw-subcategories ul > li a')]
.slice(0, maxSubcategories)
.filter(({ href }) => !alreadyRetrieved.has(href));
maxSubcategories -= children.length;
getSubcategoryPagesButton.innerText = disabledButtonText();
children.forEach(({ href }) => traverseTree(href, depth - 1, title));
});
});
alreadyRetrieved.add(url);
};
const subcategories = document.querySelector('div#mw-subcategories');
if (subcategories) {
const subcategoryHrefs = [...subcategories.querySelectorAll('ul > li a')].map(({ href }) => href);
getSubcategoryPagesButton.addEventListener('click', () => {
getSubcategoryPagesButton.setAttribute('disabled', true);
layersInput.setAttribute('disabled', true);
maxInput.setAttribute('disabled', true);
maxSubcategories = maxInput.value;
const children = subcategoryHrefs.slice(0, maxSubcategories);
maxSubcategories -= children.length;
children.forEach((href) => traverseTree(
href,
layersInput.value,
document.querySelector('h1#firstHeading').innerText.replace('Category:', ''),
));
});
if (subcategoryHrefs.length) {
subcategories.appendChild(getSubcategoryPagesButton);
subcategories.appendChild(layersInput);
subcategories.appendChild(maxInput);
}
}
}());