forked from rdpeng/ExData_Plotting1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot1.R
60 lines (54 loc) · 2.54 KB
/
plot1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
###
### this function plots an histogram of the global active power for the household power consumption data set
### The data set is downloaded and unzipped if this was not done before.
###
plot1 <- function(){
# define some constants
fileURL <- "https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip"
zipName <- "exdata_data_household_power_consumption.zip"
fileName <- "household_power_consumption.txt"
dateFormat <- "%d/%m/%Y"
timeFormat <- "%H:%M:%S"
startDate <- "1/2/2007"
stopDate <- "2/2/2007"
plotFile <- "plot1.png"
Sys.setlocale("LC_TIME", "English")
# make sure the data set is available on disk
if(!file.exists(fileName)){
if (!file.exists(zipName)){
download.file(fileURL, zipName, mode="wb")
}
unzip(zipName)
}
# create new types in order to parse the data directly during loading
setAs("character","hpcDate", function(from) as.Date(from, format=dateFormat) )
setClass('hpcDate')
setAs("character","hpcTime", function(from) strptime(from, format=timeFormat) )
setClass('hpcTime')
setAs("character","hpcNumeric", function(from) suppressWarnings(as.numeric(from)) ) # numeric fails for "?" values
setClass('hpcNumeric')
# loading and general formatting the data set
colClasses <- c(
'hpcDate', # col 1 is the date
'hpcTime', # col 2 is the time
'hpcNumeric', # col 3 is the Global_active_power
'hpcNumeric', # col 4 is the Global_reactive_power
'hpcNumeric', # col 5 is the Voltage
'hpcNumeric', # col 6 is the Global_intensity
'hpcNumeric', # col 7 is the Sub_metering_1
'hpcNumeric', # col 8 is the Sub_metering_2
'hpcNumeric' # col 9 is the Sub_metering_3
);
raw <- read.table(fileName, sep=";", colClasses=colClasses, header=TRUE)
data <- raw[raw$Date==as.Date(startDate, format=dateFormat) | raw$Date==as.Date(stopDate, format=dateFormat),]
# we don't have to check for NA's because there are none in the range we're interested in,
# but I keep it for the sake of completeness
data <- data[!is.na(data[,3]), ] # no need to check other columns. Columns 3 to 9 have a ? together.
# plot the chart
# the reference is 504X504 while the one I produce is 480X480
png(plotFile, width = 480, height = 480)
hist(data$Global_active_power, col="red", main="Global Active Power", xlab="Global Active Power (kilowatts)")
dev.off()
# return the data set as used for plotting
data
}