Skip to content

Commit

Permalink
Add wordometer:0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Jollywatt committed Jan 24, 2024
1 parent 84b58c8 commit 7a02099
Show file tree
Hide file tree
Showing 7 changed files with 529 additions and 0 deletions.
40 changes: 40 additions & 0 deletions packages/preview/wordometer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# `wordometer`

[![Manual](https://img.shields.io/badge/docs-manual.pdf-green)](https://github.com/Jollywatt/typst-wordometer/raw/master/docs/manual.pdf)
![Version](https://img.shields.io/badge/dynamic/toml?url=https%3A%2F%2Fgithub.com%2FJollywatt%2Ftypst-wordometer%2Fraw%2Fmaster%2Ftypst.toml&query=package.version&label=version)



Basic word counts and document statistics.


## Basic usage

```typ
#import "@preview/wordometer:0.1.0": word-count, total-words
#show: word-count
In this document, there are #total-words words all up.
#word-count(total => [
The number of words in this block is #total.words
and there are #total.characters letters.
])
```

## Excluding elements by type or label

```typ
#show: word-count.with(exclude: ("heading", "strike"))
= This Heading Doesn’t Count
In this document #strike[(excluding me)], there are #total-words words all up.
#word-count(total => [
One, two, three, four.
#[That was #total.words, excluding this sentence!] <no-wc>
], exclude: <no-wc>)
```

Binary file added packages/preview/wordometer/docs/manual.pdf
Binary file not shown.
87 changes: 87 additions & 0 deletions packages/preview/wordometer/docs/manual.typ
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#import "@preview/tidy:0.1.0"
#import "/src/lib.typ" as wordometer: *

#set page(numbering: "1")
#set par(justify: true)
#show link: underline.with(stroke: blue.lighten(50%))

#let VERSION = toml("/typst.toml").package.version

#let show-module(path) = {
show heading.where(level: 3): it => {
align(center, line(length: 100%, stroke: black.lighten(70%)))
block(text(1.3em, raw(it.body.text + "()")))
}
tidy.show-module(
tidy.parse-module(
read(path),
scope: (wordometer: wordometer)
),
show-outline: false,
)
}



#v(.2fr)

#align(center)[
#stack(
spacing: 12pt,
text(2.7em, `wordometer`),
)

#v(30pt)

A small #link("https://typst.app/")[Typst] package for quick and easy in-document word counts.

#link("https://github.com/Jollywatt/typst-wordometer")[`github.com/Jollywatt/typst-wordometer`]

Version #VERSION
]

#set raw(lang: "typc")


#v(1fr)

#[
#show heading: pad.with(y: 10pt)

= Basic usage

```typ
#import "@preview/wordometer:0.1.0": word-count, total-words
#show: word-count
In this document, there are #total-words words all up.
#word-count(total => [
The number of words in this block is #total.words
and there are #total.characters letters.
])
```

= Excluding elements by type or label

```typ
#show: word-count.with(exclude: ("heading", "strike"))
= This Heading Doesn’t Count
In this document #strike[(excluding me)], there are #total-words words all up.
#word-count(total => [
One, two, three, four.
#[That was #total.words, excluding this sentence!] <no-wc>
], exclude: <no-wc>)
```
]

#v(1fr)

#pagebreak()


#show-module("/src/lib.typ")
265 changes: 265 additions & 0 deletions packages/preview/wordometer/src/lib.typ
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
#let dictionary-sum(a, b) = {
let c = (:)
for k in a.keys() + b.keys() {
c.insert(k, a.at(k, default: 0) + b.at(k, default: 0))
}
c
}

/// Get a basic word count from a string.
///
/// Returns a dictionary with keys:
/// - `characters`: Number of non-whitespace characters.
/// - `words`: Number of words, defined by `regex("\b[\w'’]+\b")`.
/// - `sentences`: Number of sentences, defined by `regex("\w+\s*[.?!]")`.
///
/// - string (string):
/// -> dictionary
#let string-word-count(string) = (
characters: string.replace(regex("\s+"), "").len(),
words: string.matches(regex("\b[\w'’]+\b")).len(),
sentences: string.matches(regex("\w+\s*[.?!]")).len(),
)

/// Simplify an array of content by concatenating adjacent text elements.
///
/// Doesn't preserve content exactly; `smartquote`s are replaced with `'` or
/// `"`. This is used on `sequence` elements because it improves word counts for
/// cases like "Digby's", which should count as one word.
///
/// For example, the content #rect[Qu'est-ce *que* c'est !?] is structured as:
///
/// #[Qu'est-ce *que* c'est !?].children
///
/// This function simplifies this to:
///
/// #wordometer.concat-adjacent-text([Qu'est-ce *que* c'est !?].children)
///
/// - children (array): Array of content to simplify.
#let concat-adjacent-text(children) = {
if children.len() == 0 { return () }
let squashed = (children.at(0),)

let as-text(el) = {
let fn = repr(el.func())
if fn == "text" { el.text }
else if fn == "space" { " " }
else if fn in "linebreak" { "\n" }
else if fn in "parbreak" { "\n\n" }
else if fn in "pagebreak" { "\n\n\n\n" }
else if fn == "smartquote" {
if el.double { "\"" } else { "'" }
}
}

let last-text = as-text(squashed.at(-1))
for child in children.slice(1) {
if last-text == none {
squashed.push(child)
last-text = as-text(child)

} else {
let this-text = as-text(child)
if this-text == none {
squashed.push(child)
last-text = as-text(child)
} else {
last-text = last-text + this-text
squashed.at(-1) = text(last-text)
}
}
}

squashed
}

#let IGNORED_ELEMENTS = (
"display",
"equation",
"h",
"hide",
"image",
"line",
"linebreak",
"locate",
"metadata",
"pagebreak",
"parbreak",
"path",
"polygon",
"repeat",
"smartquote",
"space",
"update",
"v",
)

/// Traverse a content tree and apply a function to textual leaf nodes.
///
/// Descends into elements until reaching a textual element (`text` or `raw`)
/// and calls `f` on the contained text, returning a (nested) array of all the
/// return values.
///
/// - f (function): Unary function to pass text to.
/// - content (content): Content element to traverse.
/// - exclude (array): List of labels or element names to skip while traversing
/// the tree. Default value includes equations and elements without child
/// content or text:
/// #wordometer.IGNORED_ELEMENTS.sorted().map(repr).map(raw).join([, ],
/// last: [, and ]).
///
/// To exclude figures, but include figure captions, pass the name
/// `"figure-body"` (which is not a real element). To include figure bodies,
/// but exclude their captions, pass the name `"caption"`.
#let map-tree(f, content, exclude: IGNORED_ELEMENTS) = {
let map-subtree = map-tree.with(f, exclude: exclude)

let fn = repr(content.func())
let fields = content.fields().keys()

if fn in exclude {
none

} else if content.at("label", default: none) in exclude {
none

} else if fn in ("text", "raw") {
f(content.text)

} else if "children" in fields {
let children = content.children

if fn == "sequence" {
// don't do this for, e.g., grid or stack elements
children = concat-adjacent-text(children)
}

children
.map(map-subtree)
.filter(x => x != none)

} else if fn == "figure" {
(
if "figure-body" not in exclude { map-subtree(content.body) },
map-subtree(content.caption),
)
.filter(x => x != none)

} else if fn == "styled" {
map-subtree(content.child)

} else if "body" in fields {
map-subtree(content.body)

} else {
panic(fn, content.fields())

}

}

/// Get word count statistics of a content element.
///
/// Returns a results dictionary, not the content passed to it. (See
/// `string-word-count()`).
///
/// - content (content):
/// -> dictionary
/// - exclude (array): Content elements to exclude from word count (see
/// `map-tree()`).
/// - counter (fn): A function that accepts a string and returns a dictionary of
/// counts.
///
/// For example, to count vowels, you might do:
///
/// ```typ
/// #word-count-of([ABCDEFG], counter: s => (
/// vowels: lower(s).matches(regex("[aeiou]")).len(),
/// ))
/// ```
#let word-count-of(content, exclude: (:), counter: string-word-count) = {
let exclude-elements = IGNORED_ELEMENTS
exclude-elements += (exclude,).flatten()

(map-tree(counter, content, exclude: exclude-elements),)
.filter(x => x != none)
.flatten()
.fold(counter(""), dictionary-sum)
}

/// Simultaneously take a word count of some content and insert it into that
/// content.
///
/// It works by first passing in some dummy results to `fn`, performing a word
/// count on the content returned, and finally returning the result of passing
/// the word count retults to `fn`. This happens once --- it doesn't keep
/// looping until convergence or anything!
///
/// For example:
/// ```typst
/// #word-count-callback(stats => [There are #stats.words words])
/// ```
///
/// - fn (function): A function accepting a dictionary and returning content to
/// perform the word count on.
/// - ..options ( ): Additional named arguments:
/// - `exclude`: Content to exclude from word count (see `map-tree()`).
/// -> content
#let word-count-callback(fn, ..options) = {
let preview-content = [#fn(string-word-count(""))]
let stats = word-count-of(preview-content, ..options)
fn(stats)
}

#let total-words = locate(loc => state("total-words").final(loc))
#let total-characters = locate(loc => state("total-characters").final(loc))

/// Get word count statistics of the given content and store the results in
/// global state. Should only be used once in the document.
///
/// #set raw(lang: "typ")
///
/// The results are accessible anywhere in the document with `#total-words` and
/// `#total-characters`, which are shortcuts for the final values of states of
/// the same name (e.g., `#locate(loc => state("total-words").final(loc))`)
///
/// - content (content):
/// Content to word count.
/// - ..options ( ): Additional named arguments:
/// - `exclude`: Content to exclude from word count (see `map-tree()`).
/// -> content
#let word-count-global(content, ..options) = {
let stats = word-count-of(content, ..options)
state("total-words").update(stats.words)
state("total-characters").update(stats.characters)
content
}

/// Perform a word count on content.
///
/// Master function which accepts content (calling `word-count-global()`) or a
/// callback function (calling `word-count-callback()`).
///
/// - arg (content, fn):
/// Can be:
/// #set raw(lang: "typ")
/// - `content`: A word count is performed for the content and the results are
/// accessible through `#total-words` and `#total-characters`. This uses a
/// global state, so should only be used once in a document (e.g., via a
/// document show rule: `#show: word-count`).
/// - `function`: A callback function accepting a dictionary of word count
/// results and returning content to be word counted. For example:
/// ```typ
/// #word-count(total => [This sentence contains #total.characters letters.])
/// ```
/// - ..options ( ): Additional named arguments:
/// - `exclude`: Content to exclude from word count (see `map-tree()`).
///
/// -> dictionary
#let word-count(arg, ..options) = {
if type(arg) == function {
word-count-callback(arg, ..options)
} else {
word-count-global(arg, ..options)
}
}
Binary file added packages/preview/wordometer/test/tests.pdf
Binary file not shown.
Loading

0 comments on commit 7a02099

Please sign in to comment.