Merge pull request #13 from JuliaGeo/unlimited

Support for unlimited dimensions
JuliaGeo · Feb 18, 2016 · 4317fb1 · 4317fb1
2 parents 4e65e28 + 4f920e0
commit 4317fb1
Show file tree

Hide file tree

Showing 5 changed files with 133 additions and 73 deletions.
diff --git a/doc/NetCDF.md b/doc/NetCDF.md
@@ -12,43 +12,43 @@ prints information on the variables, dimension and attributes contained in the f
 ## Reading data
 
     ncread(filename, varname, start=[1,1,...], count=[-1,-1,...])
-    
-reads the values of the variable varname from file filename. If only parts of the variable are to be read, you can provide optionally start and count, which enable you to read blocks of data. 
-start and count have the same length as the number of variable dimensions. start gives the initial index for each dimension, while count gives the number of indices to be read along each dimension. As a special case, setting a value in count to -1 will cause the function to read all values along this dimension. 
+
+reads the values of the variable varname from file filename. If only parts of the variable are to be read, you can provide optionally start and count, which enable you to read blocks of data.
+start and count have the same length as the number of variable dimensions. start gives the initial index for each dimension, while count gives the number of indices to be read along each dimension. As a special case, setting a value in count to -1 will cause the function to read all values along this dimension.
 
     ncread!(filename, varname, d, start=[1,1,...], count=[-1,-1,...])
-    
-is the mutating form of `ncread` which expects a pre-allocated array d, where the data are written to. In performance-critical situations you should always use this function for two reasons: First you can avoid unnecessary memory allocation if you read and process data in a loop and can reuse memory. The second point is that the mutating version is type-safe is contrast to the non-mutating version. `ncread` will return an array with a type depending on the data type of your netCDF variable, which means that type-inference can not work correctly and further operations on that array might be slow. `ncread!` will always try to convert the data to the array-type provided by the user and never change the type of `d`, so that the element type of the returned array is predictable and operations on the returned array run fast. 
+
+is the mutating form of `ncread` which expects a pre-allocated array d, where the data are written to. In performance-critical situations you should always use this function for two reasons: First you can avoid unnecessary memory allocation if you read and process data in a loop and can reuse memory. The second point is that the mutating version is type-safe is contrast to the non-mutating version. `ncread` will return an array with a type depending on the data type of your netCDF variable, which means that type-inference can not work correctly and further operations on that array might be slow. `ncread!` will always try to convert the data to the array-type provided by the user and never change the type of `d`, so that the element type of the returned array is predictable and operations on the returned array run fast.
 
 ## Writing data
 
     ncwrite(data, filename, varname, start=start, count=count)
-    
-Writes the array data to the file. If no start argument is supplied, writing starts at index 1 in each dimension. 
-You can supply the argument start, a vector that has the same number as the number of variable dimensions, 
+
+Writes the array data to the file. If no start argument is supplied, writing starts at index 1 in each dimension.
+You can supply the argument start, a vector that has the same number as the number of variable dimensions,
 that provides the indices where to start writing the data. As default the number of values written along each dimension
 equals the dimension of the input array. However you can specify the along which dimension the data will be written by
 setting a count argument, an integer vector indicating the number of values written along each dimension.
 
 ## Reading attributes
 
     ncgetatt(filename, varname, attname)
-    
-This reads an attribute from the specified file and variable. To read global attributes, set varname to "Global". 
+
+This reads an attribute from the specified file and variable. To read global attributes, set varname to "Global".
 
 ## Writing attributes
 
     ncputatt(filename, varname, attributes)
-    
-Here the filename is a string, varname the name of the variable the attribute is associated with. If varname is not a valid variable name, then a global attribute is created. 
+
+Here the filename is a string, varname the name of the variable the attribute is associated with. If varname is not a valid variable name, then a global attribute is created.
 
 ## Creating files
 
     nccreate(filename, varname, dimensions ..., atts=atts,gatts=gatts,compress=compress,t=t,mode=mode)
 
 This creates a variable in an existing netCDF file or creates a new file. Filename and varname are strings.  
-After that follows a list of dimensions. Each dimension entry starts with a dimension name(a String), and 
-may be followed by a dimension length, an array with dimension values or a Dict containing dimension attributes. 
+After that follows a list of dimensions. Each dimension entry starts with a dimension name (a String), and
+may be followed by a dimension length (can be Inf for unlimited dimensions), an array with dimension values or a Dict containing dimension attributes.
 Then the next dimension is entered and so on. Have a look at examples/high.jl for an example use.
 
 Possible optional arguments are:
@@ -61,57 +61,61 @@ Possible optional arguments are:
 ## Miscellaneous
 
     ncsync([ filename ])
-    
-Synchronizes the changes made to the file and writes changes to the disk. If the argument is omitted, all open files are synchronized. 
+
+Synchronizes the changes made to the file and writes changes to the disk. If the argument is omitted, all open files are synchronized.
 
     ncclose([ filename ])
-    
+
 Closes the file and writes changes to the disk. If argument is omitted, all open files are closed.   
 
 # Medium-level interface
 
 ## Getting information
 
     nc = netCDF.open(filename, mode=NC_NOWRITE, readdimvar=false)
-    
-this function returns an object of type NcVar, which contains all file metainformation and attributes. You can browse it, just type 
+
+this function returns an object of type NcVar, which contains all file metainformation and attributes. You can browse it, just type
 
     names(nc)
-    
-to find out the fields of the type NcVar. Most of the other functions of the medium-level interface will use the NcFile object as their first argument. The optional argument mode determines the mode in which the files is opened(NC_NOWRITE or NC_WRITE). If you set readdimvar=true, then the dimension variables will be read when opening the file and added to the NcFIle object. 
+
+to find out the fields of the type NcVar. Most of the other functions of the medium-level interface will use the NcFile object as their first argument. The optional argument mode determines the mode in which the files is opened(NC_NOWRITE or NC_WRITE). If you set readdimvar=true, then the dimension variables will be read when opening the file and added to the NcFIle object.
 
 ## Reading data
 
     netCDF.readvar(nc, varname, start=[1,1,...], count=[-1,-1,...])
-    
-This function returns an array of values read from the file. The first argument is of type NcFile and is the file handler of a previously opened netCDF file. varname is the variable name of the variable to be read. start and count are optional integer arrays of the same length as the number of variable dimensions, giving the starting indices and the number of steps to be read along each dimension. Setting values in the count vector to -1 will cause the function to read all indices of the respective dimension. If the start and count argument are omitted, the whole variable will be read. 
+
+This function returns an array of values read from the file. The first argument is of type NcFile and is the file handler of a previously opened netCDF file. varname is the variable name of the variable to be read. start and count are optional integer arrays of the same length as the number of variable dimensions, giving the starting indices and the number of steps to be read along each dimension. Setting values in the count vector to -1 will cause the function to read all indices of the respective dimension. If the start and count argument are omitted, the whole variable will be read.
 
 ## Writing data
 
     netCDF.putvar(nc, varname, vals, start=[1,1,...], count=[size(vals)...])
-
-This function writes the values from the array vals to a netCDF file. nc is a netCDF file handler of type NcFile, varname the variable name and vals an array with the same dimension as the variable in the netCDF file. The optional parameter start gives the first index in each dimension along which the writing should begin. It is assumed that the input array vals has the same number of dimensions as the and writing happens along these dimensions. However, you can specify the number of values to be written along each dimension by adding an optional count argument, which is a vector whose length equals the number of variable dimensions. 
 
-
+This function writes the values from the array vals to a netCDF file. nc is a netCDF file handler of type NcFile, varname the variable name and vals an array with the same dimension as the variable in the netCDF file. The optional parameter start gives the first index in each dimension along which the writing should begin. It is assumed that the input array vals has the same number of dimensions as the and writing happens along these dimensions. However, you can specify the number of values to be written along each dimension by adding an optional count argument, which is a vector whose length equals the number of variable dimensions.
+
+This function writes the values from the array vals to a netcdf file. nc is a netcdf file handler of type NcFile, varname the variable name and vals an array with the same dimension as the variable in the netcdf file. The optional parameter start gives the first index in each dimension along which the writing should begin. It is assumed that the input array vals has the same number of dimensions as the and writing happens along these dimensions. However, you can specify the number of values to be written along each dimension by adding an optional count argument, which is a vector whose length equals the number of variable dimensions.
+
+
 ## Creating files
 
 To create a netCDF file you first have to define the dimensions and variables that it is supposed to hold. As representations for netCDF dimensions and variables there are the predefined NcVar and NcDim types. An NcDim object is created by:
 
-    NcDim(dimname, dimlength, atts=Dict{Any,Any}(), values=[])
-    
-here dimname is the dimension name, dimlength is the dimension length. The optional argument values is a 1D array of values that are written to the dimension variable and the optional argument atts is a Dict holding pairs of attribute names and values. 
+    NcDim(dimname, dimlength, atts=Dict{Any,Any}(), values=[], unlimited=false)
+
+here dimname is the dimension name, dimlength is the dimension length. The optional argument values is a 1D array of values that are written to the dimension variable and the optional argument atts is a Dict holding pairs of attribute names and values. Setting `unlimited=true` creates an unlimited dimension.
 
 After defining the dimensions, you can create NcVar objects with
 
     NcVar(varname , dimlist; atts=Dict{Any,Any}(), t=Float64, compress=-1)
-
-Here *varname* is the name of the variable, *dimlist* an array of type NcDim holding the dimensions associated to the variable, varattributes is a Dict holding pairs of attribute names and values. *t* is the data type that should be used for storing the variable.  You can either specify a Julia type(Int16, Int32, Float32, Float64) which will be translated to(NC_SHORT, NC_INT, NC_FLOAT, NC_DOUBLE) or directly specify one of the latter list. You can also set the compression level of the variable by setting *compress* to a number in the range 1..9 This has only an effect in netCDF4 files. 
+
+Here *varname* is the name of the variable, *dimlist* an array of type NcDim holding the dimensions associated to the variable, varattributes is a Dict holding pairs of attribute names and values. *t* is the data type that should be used for storing the variable.  You can either specify a Julia type(Int16, Int32, Float32, Float64) which will be translated to(NC_SHORT, NC_INT, NC_FLOAT, NC_DOUBLE) or directly specify one of the latter list. You can also set the compression level of the variable by setting *compress* to a number in the range 1..9 This has only an effect in netCDF4 files.
+
 
 Having defined the variables, the netCDF file can be created:
 
     netCDF.create(filename, varlist, gatts=Dict{Any,Any}(),mode=NC_netCDF4)
-
-Here, filename is the name of the file to be created and varlist an array of NcVar holding the variables that should appear in the file. In the optional argument *gatts* you can specify a Dict containing global attributes and mode is the file type you want to create(NC_netCDF4, NC_CLASSIC_MODEL or NC_64BIT_OFFSET). 
+
+Here, filename is the name of the file to be created and varlist an array of NcVar holding the variables that should appear in the file. In the optional argument *gatts* you can specify a Dict containing global attributes and mode is the file type you want to create(NC_netCDF4, NC_CLASSIC_MODEL or NC_64BIT_OFFSET).
+
 
 ## Miscellaneous
 
@@ -122,5 +126,5 @@ once you have finished reading, writing or editing your files you can close the
 If you just want to synchronize your changes to the disk, run
 
     netCDF.sync(nc)
-    
-where nc is a netCDF file handler. 
+
+where nc is a netCDF file handler.
diff --git a/src/NetCDF.jl b/src/NetCDF.jl
@@ -4,7 +4,7 @@ using Formatting
 using Base.Cartesian
 include("netcdf_c.jl")
 import Base.show
-export NcDim,NcVar,NcFile,ncread,ncread!,ncwrite,nccreate,ncsync,ncinfo,ncclose,ncputatt,NC_BYTE,NC_SHORT,NC_INT,NC_FLOAT,NC_DOUBLE,NC_STRING,ncgetatt,NC_NOWRITE,NC_WRITE,NC_CLOBBER,NC_NOCLOBBER,NC_CLASSIC_MODEL,NC_64BIT_OFFSET,NC_NETCDF4
+export NcDim,NcVar,NcFile,ncread,ncread!,ncwrite,nccreate,ncsync,ncinfo,ncclose,ncputatt,NC_BYTE,NC_SHORT,NC_INT,NC_FLOAT,NC_DOUBLE,NC_STRING,ncgetatt,NC_NOWRITE,NC_WRITE,NC_CLOBBER,NC_NOCLOBBER,NC_CLASSIC_MODEL,NC_64BIT_OFFSET,NC_NETCDF4,NC_UNLIMITED
 NC_VERBOSE=false
 #Some constants
 
@@ -45,29 +45,30 @@ type NcDim
   dimlen::UInt
   vals::AbstractArray
   atts::Dict
+  unlim::Bool
 end
 
 
 """
 
-    NcDim(name::String,dimlength::Integer;values::Union{AbstractArray,Number}=[],atts::Dict{Any,Any}=Dict{Any,Any}())`
+    NcDim(name::String,dimlength::Integer;values::Union{AbstractArray,Number}=[],atts::Dict{Any,Any}=Dict{Any,Any}(),unlimited=false)`
 This constructor creates an NcDim object with the name `name` and length `dimlength`.
 """
-function NcDim(name::AbstractString,dimlength::Integer;values::Union{AbstractArray,Number}=[],atts::Dict=Dict{Any,Any}())
+function NcDim(name::AbstractString,dimlength::Integer;values::Union{AbstractArray,Number}=[],atts::Dict=Dict{Any,Any}(),unlimited=false)
     (length(values)>0 && length(values)!=dimlength) ? error("Dimension value vector must have the same length as dimlength!") : nothing
-    NcDim(-1,-1,-1,utf8(name),dimlength,values,atts)
+    NcDim(-1,-1,-1,utf8(name),dimlength,values,atts,unlimited)
 end
 
 
 """
-    NcDim(name::AbstractString,dimlength::Integer;values::Union{AbstractArray,Number}=[],atts::Dict{Any,Any}=Dict{Any,Any}())
+    NcDim(name::AbstractString,dimlength::Integer;values::Union{AbstractArray,Number}=[],atts::Dict{Any,Any}=Dict{Any,Any}();unlimited=false)
 This constructor creates an NcDim object with the name `name` and and associated values `values`. Upon creation of the NetCDF file a
 dimension variable will be generated and the values be written to this variable. Optionally a Dict of attributes can be supplied.
 """
-NcDim(name::AbstractString,values::AbstractArray;atts::Dict=Dict{Any,Any}())=
-  NcDim(name,length(values),values=values,atts=atts)
-NcDim(name::AbstractString,values::AbstractArray,atts::Dict)=
-  NcDim(name,length(values),values=values,atts=atts)
+NcDim(name::AbstractString,values::AbstractArray;atts::Dict=Dict{Any,Any}(),unlimited=false)=
+  NcDim(name,length(values),values=values,atts=atts,unlimited=unlimited)
+NcDim(name::AbstractString,values::AbstractArray,atts::Dict;unlimited=false)=
+  NcDim(name,length(values),values=values,atts=atts,unlimited=unlimited)
 
 """
 The type `NcVar{T,N}` represents a NetCDF variable. It is a subtype of AbstractArray{T,N}, so normal indexing using `[]`
@@ -323,22 +324,23 @@ end
   N==length(I) || error("Dimension mismatch")
 
   quote
-    checkbounds(v,I...)
+
     @nexprs $N i->gstart[v.ndim+1-i]=firsti(I[i],v.dim[i].dimlen)
     @nexprs $N i->gcount[v.ndim+1-i]=counti(I[i],v.dim[i].dimlen)
+    checkboundsNC(v)
     p=1
     @nexprs $N i->p=p*gcount[v.ndim+1-i]
     length(val) != p && error(string("Size of output array ($(length(retvalsa))) does not equal number of elements to be read (",p,")!"))
     nc_put_vara_x(v.ncid,v.varid,gstart,gcount,val)
   end
 end
 
-function putvar{T,N}(v::NcVar{T,N},val::Any,I::Integer...)
+@generated function putvar{T,N}(v::NcVar{T,N},val::Any,I::Integer...)
 
     N==length(I) || error("Dimension mismatch")
     quote
-      checkbounds(v,I...)
       @nexprs $N i->gstart[v.ndim+1-i]=I[i]-1
+      @nall($N,d->((I[d]<=v.dim[d].dimlen && I[d]>0) || v.dim[d].unlim)) || throw(BoundsError(v,I)) 
       nc_put_var1_x(v.ncid,v.varid,gstart,val)
     end
 
@@ -362,6 +364,27 @@ function nc_put_var1_x(ncid::Integer,varid::Integer,start::Vector{UInt},val::Abs
   nc_put_var1_string(ncid,varid,start,val_p)
 end
 
+function Base.push!(v::NcVar,a::AbstractArray)
+    sold=size(v)
+    N=ndims(v)
+    iunlim=find(map(x->x.unlim,v.dim))
+    length(iunlim)==1 || error("You can only push to a NetCDF variable with one unlimited dimension")
+    st=fill(1,N);st[iunlim[1]]=sold[iunlim[1]]+1
+    co=fill(-1,N)
+    if ndims(v)==ndims(a)
+        co[iunlim[1]]=size(a,iunlim[1])
+    elseif ndims(v)==ndims(a)+1
+        co[iunlim[1]]=1
+    else
+        error("You can only push variables that have equal or one fewer dimension than the NetCDF Variable")
+    end
+    NetCDF.putvar(v,a,start=st,count=co)
+end
+
+function Base.push!{T}(v::NcVar{T,1},a::Number)
+    push!(v,collect(a))
+end
+
 
 "Synchronizes the changes made to the file and writes changes to the disk. If the argument is omitted, all open files are synchronized. "
 function ncsync()
@@ -504,7 +527,7 @@ function open(fil::AbstractString; mode::Integer=NC_NOWRITE, readdimvar::Bool=fa
   #Read dimensions
   for dimid = 0:ndim-1
     (name,dimlen)=nc_inq_dim(ncid,dimid)
-    ncf.dim[name]=NcDim(ncid,dimid,-1,name,dimlen,[],Dict{Any,Any}())
+    ncf.dim[name]=NcDim(ncid,dimid,-1,name,dimlen,[],Dict{Any,Any}(),dimid==nunlimdimid ? true : false)
   end
 
   #Read variable information
@@ -678,6 +701,7 @@ function nccreate(fil::AbstractString,varname::AbstractString,dims...;atts::Dict
             !isempty(d.vals) && ncwrite(d.vals,fil,d.name)
         end
     end
+    return v
 end
 
 #show{T<:Any,N}(io::IO,a::NcVar{T,N})=println(io,a.name)
@@ -697,7 +721,7 @@ function show(io::IO,nc::NcFile)
     println(io,tolen("Name",l2),tolen("Length",l1))
     println(hline)
   for d in nc.dim
-    println(io,tolen(d[2].name,l2),tolen(d[2].dimlen,l1))
+    println(io,tolen(d[2].name,l2),tolen(d[2].unlim ? string("UNLIMITED (" ,d[2].dimlen," currently)") : d[2].dimlen,l1))
   end
   l1=div(ncol,5)
   l2=2*l1