-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_database.py
68 lines (57 loc) · 2.08 KB
/
build_database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pathlib
import sys
from datetime import timezone
import sqlite_utils
import git
root = pathlib.Path(__file__).parent.resolve()
URL_ROOT_FMT = "https://github.com/{user}/til/blob/main/{path}"
def created_changed_times(repo_path, ref="main"):
created_changed_times = {}
repo = git.Repo(repo_path, odbt=git.GitDB)
commits = reversed(list(repo.iter_commits(ref)))
for commit in commits:
dt = commit.committed_datetime
affected_files = list(commit.stats.files.keys())
for filepath in affected_files:
if filepath not in created_changed_times:
created_changed_times[filepath] = {
"created": dt.isoformat(),
"created_utc": dt.astimezone(timezone.utc).isoformat(),
}
created_changed_times[filepath].update(
{
"updated": dt.isoformat(),
"updated_utc": dt.astimezone(timezone.utc).isoformat(),
}
)
return created_changed_times
def build_database(repo_path, user):
all_times = created_changed_times(repo_path)
db = sqlite_utils.Database(repo_path / "tils.db")
table = db.table("til", pk="path")
for filepath in root.glob("*/*.md"):
fp = filepath.open()
title = fp.readline().lstrip("#").strip()
body = fp.read().strip()
path = str(filepath.relative_to(root))
slug = filepath.stem
url = URL_ROOT_FMT.format(user=user, path=path)
path_slug = path.replace("/", "_")
record = {
"path": path_slug,
"slug": slug,
"topic": path.split("/")[0],
"title": title,
"url": url,
"body": body,
}
record.update(all_times[path])
with db.conn:
table.upsert(record, alter=True)
table.enable_fts(
["title", "body"], tokenize="porter", create_triggers=True, replace=True
)
if __name__ == "__main__":
if len(sys.argv) < 2:
sys.exit("Usage: create_database.py <username>")
build_database(root, sys.argv[1])