diff --git a/packages/preview/wordometer/README.md b/packages/preview/wordometer/README.md new file mode 100644 index 000000000..c2014cbe6 --- /dev/null +++ b/packages/preview/wordometer/README.md @@ -0,0 +1,40 @@ +# `wordometer` + +[![Manual](https://img.shields.io/badge/docs-manual.pdf-green)](https://github.com/Jollywatt/typst-wordometer/raw/master/docs/manual.pdf) +![Version](https://img.shields.io/badge/dynamic/toml?url=https%3A%2F%2Fgithub.com%2FJollywatt%2Ftypst-wordometer%2Fraw%2Fmaster%2Ftypst.toml&query=package.version&label=version) + + + +Basic word counts and document statistics. + + +## Basic usage + +```typ +#import "@preview/wordometer:0.1.0": word-count, total-words + +#show: word-count + +In this document, there are #total-words words all up. + +#word-count(total => [ + The number of words in this block is #total.words + and there are #total.characters letters. +]) +``` + +## Excluding elements by type or label + +```typ +#show: word-count.with(exclude: ("heading", "strike")) + += This Heading Doesn’t Count + +In this document #strike[(excluding me)], there are #total-words words all up. + +#word-count(total => [ + One, two, three, four. + #[That was #total.words, excluding this sentence!] +], exclude: ) +``` + diff --git a/packages/preview/wordometer/docs/manual.pdf b/packages/preview/wordometer/docs/manual.pdf new file mode 100644 index 000000000..f70c37865 Binary files /dev/null and b/packages/preview/wordometer/docs/manual.pdf differ diff --git a/packages/preview/wordometer/docs/manual.typ b/packages/preview/wordometer/docs/manual.typ new file mode 100644 index 000000000..5a6b10636 --- /dev/null +++ b/packages/preview/wordometer/docs/manual.typ @@ -0,0 +1,87 @@ +#import "@preview/tidy:0.1.0" +#import "/src/lib.typ" as wordometer: * + +#set page(numbering: "1") +#set par(justify: true) +#show link: underline.with(stroke: blue.lighten(50%)) + +#let VERSION = toml("/typst.toml").package.version + +#let show-module(path) = { + show heading.where(level: 3): it => { + align(center, line(length: 100%, stroke: black.lighten(70%))) + block(text(1.3em, raw(it.body.text + "()"))) + } + tidy.show-module( + tidy.parse-module( + read(path), + scope: (wordometer: wordometer) + ), + show-outline: false, + ) +} + + + +#v(.2fr) + +#align(center)[ + #stack( + spacing: 12pt, + text(2.7em, `wordometer`), + ) + + #v(30pt) + + A small #link("https://typst.app/")[Typst] package for quick and easy in-document word counts. + + #link("https://github.com/Jollywatt/typst-wordometer")[`github.com/Jollywatt/typst-wordometer`] + + Version #VERSION +] + +#set raw(lang: "typc") + + +#v(1fr) + +#[ + #show heading: pad.with(y: 10pt) + + = Basic usage + + ```typ + #import "@preview/wordometer:0.1.0": word-count, total-words + + #show: word-count + + In this document, there are #total-words words all up. + + #word-count(total => [ + The number of words in this block is #total.words + and there are #total.characters letters. + ]) + ``` + + = Excluding elements by type or label + + ```typ + #show: word-count.with(exclude: ("heading", "strike")) + + = This Heading Doesn’t Count + + In this document #strike[(excluding me)], there are #total-words words all up. + + #word-count(total => [ + One, two, three, four. + #[That was #total.words, excluding this sentence!] + ], exclude: ) + ``` +] + +#v(1fr) + +#pagebreak() + + +#show-module("/src/lib.typ") \ No newline at end of file diff --git a/packages/preview/wordometer/src/lib.typ b/packages/preview/wordometer/src/lib.typ new file mode 100644 index 000000000..4b8d6483f --- /dev/null +++ b/packages/preview/wordometer/src/lib.typ @@ -0,0 +1,265 @@ +#let dictionary-sum(a, b) = { + let c = (:) + for k in a.keys() + b.keys() { + c.insert(k, a.at(k, default: 0) + b.at(k, default: 0)) + } + c +} + +/// Get a basic word count from a string. +/// +/// Returns a dictionary with keys: +/// - `characters`: Number of non-whitespace characters. +/// - `words`: Number of words, defined by `regex("\b[\w'’]+\b")`. +/// - `sentences`: Number of sentences, defined by `regex("\w+\s*[.?!]")`. +/// +/// - string (string): +/// -> dictionary +#let string-word-count(string) = ( + characters: string.replace(regex("\s+"), "").len(), + words: string.matches(regex("\b[\w'’]+\b")).len(), + sentences: string.matches(regex("\w+\s*[.?!]")).len(), +) + +/// Simplify an array of content by concatenating adjacent text elements. +/// +/// Doesn't preserve content exactly; `smartquote`s are replaced with `'` or +/// `"`. This is used on `sequence` elements because it improves word counts for +/// cases like "Digby's", which should count as one word. +/// +/// For example, the content #rect[Qu'est-ce *que* c'est !?] is structured as: +/// +/// #[Qu'est-ce *que* c'est !?].children +/// +/// This function simplifies this to: +/// +/// #wordometer.concat-adjacent-text([Qu'est-ce *que* c'est !?].children) +/// +/// - children (array): Array of content to simplify. +#let concat-adjacent-text(children) = { + if children.len() == 0 { return () } + let squashed = (children.at(0),) + + let as-text(el) = { + let fn = repr(el.func()) + if fn == "text" { el.text } + else if fn == "space" { " " } + else if fn in "linebreak" { "\n" } + else if fn in "parbreak" { "\n\n" } + else if fn in "pagebreak" { "\n\n\n\n" } + else if fn == "smartquote" { + if el.double { "\"" } else { "'" } + } + } + + let last-text = as-text(squashed.at(-1)) + for child in children.slice(1) { + if last-text == none { + squashed.push(child) + last-text = as-text(child) + + } else { + let this-text = as-text(child) + if this-text == none { + squashed.push(child) + last-text = as-text(child) + } else { + last-text = last-text + this-text + squashed.at(-1) = text(last-text) + } + } + } + + squashed +} + +#let IGNORED_ELEMENTS = ( + "display", + "equation", + "h", + "hide", + "image", + "line", + "linebreak", + "locate", + "metadata", + "pagebreak", + "parbreak", + "path", + "polygon", + "repeat", + "smartquote", + "space", + "update", + "v", +) + +/// Traverse a content tree and apply a function to textual leaf nodes. +/// +/// Descends into elements until reaching a textual element (`text` or `raw`) +/// and calls `f` on the contained text, returning a (nested) array of all the +/// return values. +/// +/// - f (function): Unary function to pass text to. +/// - content (content): Content element to traverse. +/// - exclude (array): List of labels or element names to skip while traversing +/// the tree. Default value includes equations and elements without child +/// content or text: +/// #wordometer.IGNORED_ELEMENTS.sorted().map(repr).map(raw).join([, ], +/// last: [, and ]). +/// +/// To exclude figures, but include figure captions, pass the name +/// `"figure-body"` (which is not a real element). To include figure bodies, +/// but exclude their captions, pass the name `"caption"`. +#let map-tree(f, content, exclude: IGNORED_ELEMENTS) = { + let map-subtree = map-tree.with(f, exclude: exclude) + + let fn = repr(content.func()) + let fields = content.fields().keys() + + if fn in exclude { + none + + } else if content.at("label", default: none) in exclude { + none + + } else if fn in ("text", "raw") { + f(content.text) + + } else if "children" in fields { + let children = content.children + + if fn == "sequence" { + // don't do this for, e.g., grid or stack elements + children = concat-adjacent-text(children) + } + + children + .map(map-subtree) + .filter(x => x != none) + + } else if fn == "figure" { + ( + if "figure-body" not in exclude { map-subtree(content.body) }, + map-subtree(content.caption), + ) + .filter(x => x != none) + + } else if fn == "styled" { + map-subtree(content.child) + + } else if "body" in fields { + map-subtree(content.body) + + } else { + panic(fn, content.fields()) + + } + +} + +/// Get word count statistics of a content element. +/// +/// Returns a results dictionary, not the content passed to it. (See +/// `string-word-count()`). +/// +/// - content (content): +/// -> dictionary +/// - exclude (array): Content elements to exclude from word count (see +/// `map-tree()`). +/// - counter (fn): A function that accepts a string and returns a dictionary of +/// counts. +/// +/// For example, to count vowels, you might do: +/// +/// ```typ +/// #word-count-of([ABCDEFG], counter: s => ( +/// vowels: lower(s).matches(regex("[aeiou]")).len(), +/// )) +/// ``` +#let word-count-of(content, exclude: (:), counter: string-word-count) = { + let exclude-elements = IGNORED_ELEMENTS + exclude-elements += (exclude,).flatten() + + (map-tree(counter, content, exclude: exclude-elements),) + .filter(x => x != none) + .flatten() + .fold(counter(""), dictionary-sum) +} + +/// Simultaneously take a word count of some content and insert it into that +/// content. +/// +/// It works by first passing in some dummy results to `fn`, performing a word +/// count on the content returned, and finally returning the result of passing +/// the word count retults to `fn`. This happens once --- it doesn't keep +/// looping until convergence or anything! +/// +/// For example: +/// ```typst +/// #word-count-callback(stats => [There are #stats.words words]) +/// ``` +/// +/// - fn (function): A function accepting a dictionary and returning content to +/// perform the word count on. +/// - ..options ( ): Additional named arguments: +/// - `exclude`: Content to exclude from word count (see `map-tree()`). +/// -> content +#let word-count-callback(fn, ..options) = { + let preview-content = [#fn(string-word-count(""))] + let stats = word-count-of(preview-content, ..options) + fn(stats) +} + +#let total-words = locate(loc => state("total-words").final(loc)) +#let total-characters = locate(loc => state("total-characters").final(loc)) + +/// Get word count statistics of the given content and store the results in +/// global state. Should only be used once in the document. +/// +/// #set raw(lang: "typ") +/// +/// The results are accessible anywhere in the document with `#total-words` and +/// `#total-characters`, which are shortcuts for the final values of states of +/// the same name (e.g., `#locate(loc => state("total-words").final(loc))`) +/// +/// - content (content): +/// Content to word count. +/// - ..options ( ): Additional named arguments: +/// - `exclude`: Content to exclude from word count (see `map-tree()`). +/// -> content +#let word-count-global(content, ..options) = { + let stats = word-count-of(content, ..options) + state("total-words").update(stats.words) + state("total-characters").update(stats.characters) + content +} + +/// Perform a word count on content. +/// +/// Master function which accepts content (calling `word-count-global()`) or a +/// callback function (calling `word-count-callback()`). +/// +/// - arg (content, fn): +/// Can be: +/// #set raw(lang: "typ") +/// - `content`: A word count is performed for the content and the results are +/// accessible through `#total-words` and `#total-characters`. This uses a +/// global state, so should only be used once in a document (e.g., via a +/// document show rule: `#show: word-count`). +/// - `function`: A callback function accepting a dictionary of word count +/// results and returning content to be word counted. For example: +/// ```typ +/// #word-count(total => [This sentence contains #total.characters letters.]) +/// ``` +/// - ..options ( ): Additional named arguments: +/// - `exclude`: Content to exclude from word count (see `map-tree()`). +/// +/// -> dictionary +#let word-count(arg, ..options) = { + if type(arg) == function { + word-count-callback(arg, ..options) + } else { + word-count-global(arg, ..options) + } +} \ No newline at end of file diff --git a/packages/preview/wordometer/test/tests.pdf b/packages/preview/wordometer/test/tests.pdf new file mode 100644 index 000000000..09a97c9c0 Binary files /dev/null and b/packages/preview/wordometer/test/tests.pdf differ diff --git a/packages/preview/wordometer/test/tests.typ b/packages/preview/wordometer/test/tests.typ new file mode 100644 index 000000000..4df330844 --- /dev/null +++ b/packages/preview/wordometer/test/tests.typ @@ -0,0 +1,123 @@ +#import "/src/lib.typ": * +#set page(width: 15cm, height: auto) + +#show heading.where(level: 1): it => pagebreak(weak: true) + it + v(1em) + += Basics + +#let el = [ + One two _three_ four *five* six. + + == Seven eight + + #box[Nine #h(1fr) ten eleven $ sqrt(#[don’t mind me]) $ twelve.] + + Thirteen #text(red)[fourteen] + - fifteen + - sixteen #box(rotate(-5deg)[seventeen]) + - eighteen! +] + +#rect(el) +#word-count-of(el) + += More basics + +#let el = [ + #stack( + dir: ltr, + spacing: 1fr, + table(columns: 3, [one], [two], [three #super[four]], [#sub[five] six], [seven]), + rotate(180deg)[eight], + circle[nine ten], + + ) + + #figure(circle(fill: red, [eleven]), caption: [twelve thirteen]) +] + +#rect(el) +#word-count-of(el) +#map-tree(x => x, el) + += Punctuation + +#let el = [ + "One *two*, three!" #text(red)[Four], five. + #rect[Six, *seven*, eight.] +] + +#rect(el) + +Raw tree: #map-tree(x => x, el) + +Stats: #word-count-of(el) + += Scoped counts + +#word-count-callback(stats => box(stroke: blue, inset: 1em)[ + Guess what, this box contains #stats.words words! + + Full statistics: #stats +]) + +#rect[ + #show: word-count + + One two three four. There are #total-words total words and #total-characters characters. + +] + + += Master function + +#word-count(totals => [ + Hello, stats are in! #totals +]) + +#block(fill: orange.lighten(90%), inset: 1em)[ + #show: word-count + + One two three four. There are #total-words total words and #total-characters characters. + +] + += Sentences + +#let el = [ + Pour quoi ? Qu'est-ce que c'est !? + + "I don't know anything." + +] + +#el +#word-count-of(el) + += Excluding elements by type + +#word-count(total => [ + == Not me. + One, two, three. #strike[Not me, either.] Four. + + #strike[Words: #total.words] +], exclude: ("heading", "strike")) + += Excluding elements by label + +#word-count(total => [ + === One two + Three, four. + + === Not me! + Five, six. + + #total +], exclude: ("raw", )) + +#line(length: 100%) + +#word-count(total => [ + One, two, three, four. + #[That was #total.words, not counting this sentence!] +], exclude: ) \ No newline at end of file diff --git a/packages/preview/wordometer/typst.toml b/packages/preview/wordometer/typst.toml new file mode 100644 index 000000000..bcd10e3d4 --- /dev/null +++ b/packages/preview/wordometer/typst.toml @@ -0,0 +1,14 @@ +[package] +name = "wordometer" +version = "0.1.0" +entrypoint = "src/lib.typ" +authors = ["Joseph Wilson (Jollywatt)"] +repository = "https://github.com/Jollywatt/typst-wordometer" +license = "MIT" +description = "Word counts and document statistics." +exclude = ["README.md", "docs/", "test/"] +keywords = [ + "word", + "count", + "statistics", +]