-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparsers.ts
111 lines (86 loc) · 2.57 KB
/
parsers.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import { parseDocument } from 'htmlparser2'
import type { NAICSCode } from './types.js'
import { getWSIBClassificationFromNAICSCode } from './wsibClassifications.js'
interface ParseValidityPeriodReturn {
start?: Date
end?: Date
}
export function stripHTML(rawHTMLString: string): string {
const cleanString = (rawHTMLString ?? '').trim()
if (cleanString.startsWith('<')) {
const rawNode = parseDocument(cleanString)
return (
(rawNode.firstChild as unknown as Element).children[0] as unknown as Text
).data
}
return cleanString
}
export function parseNAICS(rawHTMLString: string): NAICSCode[] {
const naicsCodes: NAICSCode[] = []
const rawNode = parseDocument(rawHTMLString.trim())
for (const child of rawNode.childNodes) {
if (child.type !== 'tag') {
continue
}
const rawText = (
(child as unknown as Element).children[0] as unknown as Text
).data.trim()
if (rawText.includes(':')) {
const naicsCode: NAICSCode = {
code: rawText.slice(0, Math.max(0, rawText.indexOf(':'))).trim(),
codeDescription: rawText
.slice(Math.max(0, rawText.indexOf(':') + 1))
.trim()
}
const classification = getWSIBClassificationFromNAICSCode(naicsCode.code)
if (classification !== undefined) {
Object.assign(naicsCode, classification)
}
naicsCodes.push(naicsCode)
}
}
return naicsCodes
}
const validityPeriodDateRegexp = /^\d+-[A-Z][a-z]{2}-\d{4}$/
const validityPeriodMonthStrings = [
'Jan',
'Feb',
'Mar',
'Apr',
'May',
'Jun',
'Jul',
'Aug',
'Sep',
'Oct',
'Nov',
'Dec'
]
function parseValidityPeriodDate(rawDateString: string): Date {
const datePieces = rawDateString.split('-')
return new Date(
Number.parseInt(datePieces[2], 10),
validityPeriodMonthStrings.indexOf(datePieces[1]),
Number.parseInt(datePieces[0], 10)
)
}
export function parseValidityPeriod(rawHTMLString: string): ParseValidityPeriodReturn {
const validityPeriod: ParseValidityPeriodReturn = {}
const validityPeriodSplit = rawHTMLString.split(' ')
for (const validityPeriodPiece of validityPeriodSplit) {
const validityPeriodPieceTrim = validityPeriodPiece.trim()
if (validityPeriodPieceTrim === '') {
continue
}
if (validityPeriodDateRegexp.test(validityPeriodPieceTrim)) {
const periodDate = parseValidityPeriodDate(validityPeriodPieceTrim)
if (validityPeriod.start) {
validityPeriod.end = periodDate
break
} else {
validityPeriod.start = periodDate
}
}
}
return validityPeriod
}