-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathumnlookup.hs
298 lines (250 loc) · 8.87 KB
/
umnlookup.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
{-# LANGUAGE OverloadedStrings #-}
import Options.Applicative
import Network.HTTP
import Data.String.Utils (strip, replace)
import Data.List (intersperse, partition)
import Data.Semigroup ((<>))
import Text.Printf (printf)
import System.Console.Terminal.Size
import System.IO
import Text.HTML.Onama
import Text.Parsec (parse, try, many1, ParsecT)
import Text.Regex (subRegex, mkRegex)
main :: IO ()
main = execParser opts >>= lookupPeople
where lookupPeople lookup = do
tags <- (drop 4)
<$> (dropWhile notFormClose)
<$> parseTags
<$> urlSlurp (lookupURL lookup)
case parse umnLookup "" tags of
Left error -> hPutStrLn stderr (show error)
Right (Left error) -> hPutStrLn stderr error
Right (Right people) -> stdoutIO
(printPeopleTTY people)
(printPeopleNoTTY people)
notFormClose (TagClose "form" _) = False
notFormClose _other = True
urlSlurp :: String -> IO String
urlSlurp x = getResponseBody =<< simpleHTTP (getRequest x)
-- Parsing our command-line options.
opts :: ParserInfo Lookup
opts = info (helper <*> lookupCLI)
( fullDesc
<> progDesc "Lookup people from UMN's student/faculty database. Outputs in a human-readable format at the command line, or to CSV if called as part of a pipeline."
<> header "umnlookup - look up university peoples"
)
data Lookup = Lookup
{ campus :: Campus
, role :: Role
, x500 :: Bool
, search :: String
}
deriving (Eq, Show)
lookupCLI :: Parser Lookup
lookupCLI =
Lookup
<$> option verifyCampus
( long "campus"
<> short 'c'
<> metavar "CAMPUS"
<> value AnyCampus
<> help "Restrict search to a specific campus. Choices: any, crookston, duluth, morris, rochester, twincities, other. Default: any"
)
<*> option verifyRole
( long "role"
<> short 'r'
<> metavar "ROLE"
<> value AnyRole
<> help "Restrict search to certain kinds of University people. Choices: any, staff, student, alumni, retired. Default: any"
)
<*> switch
( long "id-only"
<> short 'i'
<> help "Search only by x500. Default is to search as if the search term could be either a name or an x500."
)
<*> argument str (metavar "NAME/X500")
-- | Each Lookup maps to a URL which we request in order to get the actual people.
lookupURL :: Lookup -> String
lookupURL lookup =
"http://myaccount.umn.edu/lookup?SET_INSTITUTION=&" ++ vars lookup
where vars (Lookup campus role x500 search) =
let baseVars = zip ["campus", "role", "CN"]
[toParamString campus, toParamString role, search]
in urlEncodeVars $ ("type", if x500 then "Internet ID" else "name") : baseVars
data Campus
= AnyCampus
| Crookston
| Duluth
| Morris
| Rochester
| TwinCities
| OtherCampus
deriving (Eq, Show)
data Role
= AnyRole
| Staff
| Student
| Alumni
| Retired
deriving (Eq, Show)
verifyCampus :: ReadM Campus
verifyCampus = eitherReader $ \str ->
case str of
"any" -> Right AnyCampus
"crookston" -> Right Crookston
"duluth" -> Right Duluth
"morris" -> Right Morris
"rochester" -> Right Rochester
"twincities" -> Right TwinCities
"other" -> Right OtherCampus
_otherwise -> Left $ printf "invalid campus name: %s" str
verifyRole :: ReadM Role
verifyRole = eitherReader $ \str ->
case str of
"any" -> Right AnyRole
"staff" -> Right Staff
"student" -> Right Student
"alumni" -> Right Alumni
"retired" -> Right Retired
_otherwise -> Left $ printf "invalid role: %s" str
class HTMLParam param where
toParamString :: param -> String
instance HTMLParam Campus where
toParamString AnyCampus = "a"
toParamString Crookston = "c"
toParamString Duluth = "d"
toParamString Morris = "m"
toParamString Rochester = "r"
toParamString TwinCities = "t"
toParamString OtherCampus = "o"
instance HTMLParam Role where
toParamString AnyRole = "any"
toParamString Staff = "sta"
toParamString Student = "stu"
toParamString Alumni = "alu"
toParamString Retired = "ret"
-- Parsing our HTML into a list of people.
umnLookup :: Monad m => ParsecT [Tag String] u m LookupResult
umnLookup =
try singlePersonLookup
<|> try multiPersonLookup
<|> try errorLookup
type LookupResult = Either String [Person]
errorLookup :: Monad m => ParsecT [Tag String] u m LookupResult
errorLookup = do
error <- innerText <$> balancedTags "b"
return $ Left (strip error)
deassoc :: Eq a => a -> [(a, b)] -> ([(a, b)], Maybe b)
deassoc k l =
let (l', l'') = partition ((/= k) . fst) l
in (l', lookup k l'')
singlePersonLookup :: Monad m => ParsecT [Tag String] u m LookupResult
singlePersonLookup = do
optional $ tagOpen "img" >> tagOpen "br"
name <- innerText <$> balancedTags "h2"
fields <- tableFields
let (fields', email) = deassoc "Email Address" fields
case email of
Nothing -> fail "no email found"
Just e -> return $ Right [Person name e fields']
where tableFields = do
tagOpen "table"
fields <- many1 tableField
tagClose "table"
return fields
tableField = do
tagOpen "tr"
fieldName <- (replace ":" "") <$> innerText <$> balancedTags "th"
fieldValue <- strip <$> innerText <$> balancedTags "td"
tagClose "tr"
return (fieldName, fieldValue)
multiPersonLookup :: Monad m => ParsecT [Tag String] u m LookupResult
multiPersonLookup = do
tagOpen "table"
fieldNames <- fieldNames
people <- many1 fieldValues
tagClose "table"
case generatePeople fieldNames people of
Nothing -> fail "not a properly-formatted multi-result page (maybe the layout changed?)"
Just people -> return $ Right people
where fieldNames = do
tagOpen "tr"
f <- many1 $ (try $ strip <$> innerText <$> balancedTags "th")
tagClose "tr"
return f
fieldValues = do
tagOpen "tr"
f <- many1 $ (try $ strip
<$> (replace "\xA0" " ")
<$> innerText
<$> balancedTags "td")
tagClose "tr"
return f
generatePeople fieldNames people = mapM (generatePerson fieldNames) people
generatePerson fieldNames person =
let fields = zip fieldNames person
(fields', name) = deassoc "Name" fields
(fields'', email) = deassoc "Email" fields'
in case (name, email) of
(Just n, Just e) -> Just $ Person (normalize n) e fields''
_other -> Nothing
where normalize n = subRegex (mkRegex "(.+?), (.+?)") n "\\2 \\1"
-- Printing the people we've found to the terminal.
defaultWindow :: Integral a => Window a
defaultWindow = Window 24 80
-- | Produce an IO which calls the first IO if connected to a TTY, and
-- the second IO otherwise.
stdoutIO :: (Window Int -> IO a) -> IO a -> IO a
stdoutIO f noTTY = do
isTTY <- hIsTerminalDevice stdout
if isTTY
then (maybe defaultWindow id) <$> size >>= f
else noTTY
data Person = Person
{ name :: String
, email :: String
, fields :: [(String, String)]
}
putLn :: IO ()
putLn = putStrLn ""
printPeopleTTY :: [Person] -> Window Int -> IO ()
printPeopleTTY people window =
sequence_ $ intersperse (printf "\n* * * * *\n\n")
(fmap (printPersonTTY window) people)
filterMaybes :: [Maybe a] -> [a]
filterMaybes = fmap (maybe undefined id)
. (filter $ \obj -> case obj of { Nothing -> False; Just _x -> True} )
printPersonTTY :: Window Int -> Person -> IO ()
printPersonTTY window (Person name email fields) = do
putStrLn name
putStrLn email
putLn
sequence_ $ intersperse putLn (filterMaybes (fmap (printFieldTTY window) fields))
printFieldTTY :: Window Int -> (String, String) -> Maybe (IO ())
printFieldTTY _window (fieldName, value) =
if null value
then Nothing
else Just $ do printf "### %s ###\n" fieldName
putStrLn $ value
printPeopleNoTTY :: [Person] -> IO ()
printPeopleNoTTY = mapM_ printPersonNoTTY
printPersonNoTTY :: Person -> IO ()
printPersonNoTTY (Person name email fields) = do
printf "%s,%s" name email
putChar ','
printFieldNamesNoTTY $ fmap fst fields
putChar ','
printFieldValuesNoTTY $ fmap snd fields
putLn
printFieldNamesNoTTY :: [String] -> IO ()
printFieldNamesNoTTY = mapM_ putStr . intersperse ";" . fmap strip
printFieldValuesNoTTY :: [String] -> IO ()
printFieldValuesNoTTY strs = mapM_ putStr
(intersperse ";" $ fmap (escapeCSV . strip) strs)
escapeCSV :: String -> String
escapeCSV = mconcat . fmap escape
where escape '\n' = "\\n"
escape ';' = "\\;"
escape ',' = "\\,"
escape c = [c]