Add wordometer:0.1.0

typst · Jan 24, 2024 · 7a02099 · 7a02099
1 parent 84b58c8
commit 7a02099
Show file tree

Hide file tree

Showing 7 changed files with 529 additions and 0 deletions.
diff --git a/packages/preview/wordometer/README.md b/packages/preview/wordometer/README.md
@@ -0,0 +1,40 @@
+# `wordometer`
+
+[![Manual](https://img.shields.io/badge/docs-manual.pdf-green)](https://github.com/Jollywatt/typst-wordometer/raw/master/docs/manual.pdf)
+![Version](https://img.shields.io/badge/dynamic/toml?url=https%3A%2F%2Fgithub.com%2FJollywatt%2Ftypst-wordometer%2Fraw%2Fmaster%2Ftypst.toml&query=package.version&label=version)
+
+
+
+Basic word counts and document statistics.
+
+
+## Basic usage
+
+```typ
+#import "@preview/wordometer:0.1.0": word-count, total-words
+
+#show: word-count
+
+In this document, there are #total-words words all up.
+
+#word-count(total => [
+  The number of words in this block is #total.words
+  and there are #total.characters letters.
+])
+```
+
+## Excluding elements by type or label
+
+```typ
+#show: word-count.with(exclude: ("heading", "strike"))
+
+= This Heading Doesn’t Count
+
+In this document #strike[(excluding me)], there are #total-words words all up.
+
+#word-count(total => [
+  One, two, three, four.
+  #[That was #total.words, excluding this sentence!] <no-wc>
+], exclude: <no-wc>)
+```
+
diff --git a/packages/preview/wordometer/docs/manual.pdf b/packages/preview/wordometer/docs/manual.pdf
diff --git a/packages/preview/wordometer/docs/manual.typ b/packages/preview/wordometer/docs/manual.typ
@@ -0,0 +1,87 @@
+#import "@preview/tidy:0.1.0"
+#import "/src/lib.typ" as wordometer: *
+
+#set page(numbering: "1")
+#set par(justify: true)
+#show link: underline.with(stroke: blue.lighten(50%))
+
+#let VERSION = toml("/typst.toml").package.version
+
+#let show-module(path) = {
+	show heading.where(level: 3): it => {
+		align(center, line(length: 100%, stroke: black.lighten(70%)))
+		block(text(1.3em, raw(it.body.text + "()")))
+	}
+	tidy.show-module(
+		tidy.parse-module(
+			read(path),
+			scope: (wordometer: wordometer)
+		),
+		show-outline: false,
+	)
+}
+
+
+
+#v(.2fr)
+
+#align(center)[
+	#stack(
+		spacing: 12pt,
+		text(2.7em, `wordometer`),
+	)
+
+	#v(30pt)
+
+	A small #link("https://typst.app/")[Typst] package for quick and easy in-document word counts.
+
+	#link("https://github.com/Jollywatt/typst-wordometer")[`github.com/Jollywatt/typst-wordometer`]
+
+	Version #VERSION
+]
+
+#set raw(lang: "typc")
+
+
+#v(1fr)
+
+#[
+	#show heading: pad.with(y: 10pt)
+
+	= Basic usage
+
+	```typ
+	#import "@preview/wordometer:0.1.0": word-count, total-words
+
+	#show: word-count
+
+	In this document, there are #total-words words all up.
+
+	#word-count(total => [
+	  The number of words in this block is #total.words
+	  and there are #total.characters letters.
+	])
+	```
+
+	= Excluding elements by type or label
+
+	```typ
+	#show: word-count.with(exclude: ("heading", "strike"))
+
+	= This Heading Doesn’t Count
+
+	In this document #strike[(excluding me)], there are #total-words words all up.
+
+	#word-count(total => [
+	  One, two, three, four.
+	  #[That was #total.words, excluding this sentence!] <no-wc>
+	], exclude: <no-wc>)
+	```
+]
+
+#v(1fr)
+
+#pagebreak()
+
+
+#show-module("/src/lib.typ")
diff --git a/packages/preview/wordometer/src/lib.typ b/packages/preview/wordometer/src/lib.typ
@@ -0,0 +1,265 @@
+#let dictionary-sum(a, b) = {
+  let c = (:)
+  for k in a.keys() + b.keys() {
+    c.insert(k, a.at(k, default: 0) + b.at(k, default: 0))
+  }
+  c
+}
+
+/// Get a basic word count from a string. 
+///
+/// Returns a dictionary with keys:
+/// - `characters`: Number of non-whitespace characters.
+/// - `words`: Number of words, defined by `regex("\b[\w'’]+\b")`.
+/// - `sentences`: Number of sentences, defined by `regex("\w+\s*[.?!]")`.
+///
+/// - string (string): 
+/// -> dictionary
+#let string-word-count(string) = (
+  characters: string.replace(regex("\s+"), "").len(),
+  words: string.matches(regex("\b[\w'’]+\b")).len(),
+  sentences: string.matches(regex("\w+\s*[.?!]")).len(),
+)
+
+/// Simplify an array of content by concatenating adjacent text elements.
+/// 
+/// Doesn't preserve content exactly; `smartquote`s are replaced with `'` or
+/// `"`. This is used on `sequence` elements because it improves word counts for
+/// cases like "Digby's", which should count as one word.
+///
+/// For example, the content #rect[Qu'est-ce *que* c'est !?] is structured as:
+/// 
+/// #[Qu'est-ce *que* c'est !?].children
+/// 
+/// This function simplifies this to:
+/// 
+/// #wordometer.concat-adjacent-text([Qu'est-ce *que* c'est !?].children)
+///
+/// - children (array): Array of content to simplify.
+#let concat-adjacent-text(children) = {
+  if children.len() == 0 { return () }
+  let squashed = (children.at(0),)
+
+  let as-text(el) = {
+    let fn = repr(el.func())
+    if fn == "text" { el.text }
+    else if fn == "space" { " " }
+    else if fn in "linebreak" { "\n" }
+    else if fn in "parbreak" { "\n\n" }
+    else if fn in "pagebreak" { "\n\n\n\n" }
+    else if fn == "smartquote" {
+      if el.double { "\"" } else { "'" }
+    }
+  }
+
+  let last-text = as-text(squashed.at(-1))
+  for child in children.slice(1) {
+    if last-text == none {
+        squashed.push(child)
+        last-text = as-text(child)
+
+    } else {
+      let this-text = as-text(child)
+      if this-text == none {
+        squashed.push(child)
+        last-text = as-text(child)
+      } else {
+        last-text = last-text + this-text
+        squashed.at(-1) = text(last-text)
+      }
+    }
+  }
+
+  squashed
+}
+
+#let IGNORED_ELEMENTS = (
+  "display",
+  "equation",
+  "h",
+  "hide",
+  "image",
+  "line",
+  "linebreak",
+  "locate",
+  "metadata",
+  "pagebreak",
+  "parbreak",
+  "path",
+  "polygon",
+  "repeat",
+  "smartquote",
+  "space",
+  "update",
+  "v",
+)
+
+/// Traverse a content tree and apply a function to textual leaf nodes.
+///
+/// Descends into elements until reaching a textual element (`text` or `raw`)
+/// and calls `f` on the contained text, returning a (nested) array of all the
+/// return values.
+///
+/// - f (function): Unary function to pass text to.
+/// - content (content): Content element to traverse.
+/// - exclude (array): List of labels or element names to skip while traversing
+///  the tree. Default value includes equations and elements without child
+///  content or text:
+///  #wordometer.IGNORED_ELEMENTS.sorted().map(repr).map(raw).join([, ],
+///  last: [, and ]).
+///
+///  To exclude figures, but include figure captions, pass the name
+///  `"figure-body"` (which is not a real element). To include figure bodies,
+///  but exclude their captions, pass the name `"caption"`.
+#let map-tree(f, content, exclude: IGNORED_ELEMENTS) = {
+  let map-subtree = map-tree.with(f, exclude: exclude)
+
+  let fn = repr(content.func())
+  let fields = content.fields().keys()
+
+  if fn in exclude {
+    none
+
+  } else if content.at("label", default: none) in exclude {
+    none
+
+  } else if fn in ("text", "raw") {
+    f(content.text)
+
+  } else if "children" in fields {
+    let children = content.children
+
+    if fn == "sequence" {
+      // don't do this for, e.g., grid or stack elements
+      children = concat-adjacent-text(children)
+    }
+
+    children
+      .map(map-subtree)
+      .filter(x => x != none)
+
+  } else if fn == "figure" {
+    (
+      if "figure-body" not in exclude { map-subtree(content.body) },
+      map-subtree(content.caption),
+    )
+      .filter(x => x != none)
+
+  } else if fn == "styled" {
+    map-subtree(content.child)
+
+  } else if "body" in fields {
+    map-subtree(content.body)
+
+  } else {
+    panic(fn, content.fields())
+
+  }
+
+}
+
+/// Get word count statistics of a content element.
+///
+/// Returns a results dictionary, not the content passed to it. (See
+/// `string-word-count()`).
+///
+/// - content (content):
+/// -> dictionary
+/// - exclude (array): Content elements to exclude from word count (see
+///    `map-tree()`).
+/// - counter (fn): A function that accepts a string and returns a dictionary of
+///  counts.
+///
+///  For example, to count vowels, you might do:
+///
+///  ```typ
+///  #word-count-of([ABCDEFG], counter: s => (
+///      vowels: lower(s).matches(regex("[aeiou]")).len(),
+///  ))
+///  ```
+#let word-count-of(content, exclude: (:), counter: string-word-count) = {
+  let exclude-elements = IGNORED_ELEMENTS
+  exclude-elements += (exclude,).flatten()
+
+  (map-tree(counter, content, exclude: exclude-elements),)
+    .filter(x => x != none)
+    .flatten()
+    .fold(counter(""), dictionary-sum)
+}
+
+/// Simultaneously take a word count of some content and insert it into that
+/// content.
+/// 
+/// It works by first passing in some dummy results to `fn`, performing a word
+/// count on the content returned, and finally returning the result of passing
+/// the word count retults to `fn`. This happens once --- it doesn't keep
+/// looping until convergence or anything!
+///
+/// For example:
+/// ```typst
+/// #word-count-callback(stats => [There are #stats.words words])
+/// ```
+///
+/// - fn (function): A function accepting a dictionary and returning content to
+///  perform the word count on.
+/// - ..options ( ): Additional named arguments:
+///   - `exclude`: Content to exclude from word count (see `map-tree()`).
+/// -> content
+#let word-count-callback(fn, ..options) = {
+  let preview-content = [#fn(string-word-count(""))]
+  let stats = word-count-of(preview-content, ..options)
+  fn(stats)
+}
+
+#let total-words = locate(loc => state("total-words").final(loc))
+#let total-characters = locate(loc => state("total-characters").final(loc))
+
+/// Get word count statistics of the given content and store the results in
+/// global state. Should only be used once in the document.
+///
+/// #set raw(lang: "typ")
+///
+/// The results are accessible anywhere in the document with `#total-words` and
+/// `#total-characters`, which are shortcuts for the final values of states of
+/// the same name (e.g., `#locate(loc => state("total-words").final(loc))`)
+///
+/// - content (content):
+///   Content to word count.
+/// - ..options ( ): Additional named arguments:
+///   - `exclude`: Content to exclude from word count (see `map-tree()`).
+/// -> content
+#let word-count-global(content, ..options) = {
+  let stats = word-count-of(content, ..options)
+  state("total-words").update(stats.words)
+  state("total-characters").update(stats.characters)
+  content
+}
+
+/// Perform a word count on content.
+/// 
+/// Master function which accepts content (calling `word-count-global()`) or a
+/// callback function (calling `word-count-callback()`).
+/// 
+/// - arg (content, fn):
+///   Can be:
+///   #set raw(lang: "typ")
+///   - `content`: A word count is performed for the content and the results are
+///    accessible through `#total-words` and `#total-characters`. This uses a
+///    global state, so should only be used once in a document (e.g., via a
+///    document show rule: `#show: word-count`).
+///   - `function`: A callback function accepting a dictionary of word count
+///    results and returning content to be word counted. For example:
+///    ```typ
+///    #word-count(total => [This sentence contains #total.characters letters.])
+///    ```
+/// - ..options ( ): Additional named arguments:
+///   - `exclude`: Content to exclude from word count (see `map-tree()`).
+///
+/// -> dictionary
+#let word-count(arg, ..options) = {
+  if type(arg) == function {
+    word-count-callback(arg, ..options)
+  } else {
+    word-count-global(arg, ..options)
+  }
+}
diff --git a/packages/preview/wordometer/test/tests.pdf b/packages/preview/wordometer/test/tests.pdf