-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathagiletok.sh
executable file
·83 lines (66 loc) · 2.16 KB
/
agiletok.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env bash
#PBS -l walltime=12:00:00
#PBS -T allcores
#PBS -N agiletok
#PBS -q isi
# wrap agile tokenizer/safe LC
#Set Script Name variable
SCRIPT=`basename ${BASH_SOURCE[0]}`
SCRIPTDIR=`dirname $0`
#Set fonts for Help.
NORM=`tput sgr0`
BOLD=`tput bold`
REV=`tput smso`
unamestr=`uname`
sed="sed"
if [[ $unamestr == 'Darwin' ]]; then
sed="gsed";
fi
OUTFILE=/dev/stdout
TOKFILE=/dev/null
INFILE=/dev/stdin
TOKENIZER=$SCRIPTDIR/agile_tokenizer/gale-eng-tok.sh
#Help function
function HELP {
echo -e \\n"Help documentation for ${BOLD}${SCRIPT}.${NORM}"\\n
echo -e "${REV}Basic usage:${NORM} ${BOLD}$SCRIPT -i corpus -o tok.lc -t tok ${NORM}"\\n
echo "${REV}-t${NORM} --Destination for tokenized file. Default is ${BOLD}$TOKFILE${NORM}."
echo "${REV}-o${NORM} --Destination for tokenized, lowercased file. Default is ${BOLD}$OUTFILE${NORM}."
echo "${REV}-i${NORM} --Source for untokenized, truecased file. Default is ${BOLD}$INFILE${NORM}."
echo -e "${REV}-h${NORM} --Displays this help message. No further functions are performed."\\n
exit 1
}
### Start getopts code ###
#Parse command line flags
#If an option should be followed by an argument, it should be followed by a ":".
#Notice there is no ":" after "h". The leading ":" suppresses error messages from
#getopts. This is required to get my unrecognized option code to work.
COMMANDLINE=$@;
while getopts :i:o:t:h FLAG; do
case $FLAG in
i) #set option "r"
INFILE=$OPTARG
;;
o) #set option "o"
OUTFILE=$OPTARG
;;
t) #set option "t"
TOKFILE=$OPTARG
;;
h) #show help
HELP
;;
\?) #unrecognized option - show help
echo -e \\n"Option -${BOLD}$OPTARG${NORM} not allowed."
HELP
#If you just want to display a simple error message instead of the full
#help, remove the 2 lines above and uncomment the 2 lines below.
#echo -e "Use ${BOLD}$SCRIPT -h${NORM} to see the help documentation."\\n
#exit 2
;;
esac
done
shift $((OPTIND-1)) #This tells getopts to move on to the next argument.
### End getopts code ###
$TOKENIZER < $INFILE 2> /dev/null | tee $TOKFILE | $sed -e 's/\(.*\)/\L\1/' > $OUTFILE;
exit 0