Skip to content

Commit

Permalink
Changed Gene structure and API
Browse files Browse the repository at this point in the history
  • Loading branch information
kdyrhage committed Dec 10, 2019
1 parent 5a19426 commit d70ea7f
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 120 deletions.
1 change: 1 addition & 0 deletions src/GenomicAnnotations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ using GZip

export Chromosome, Gene, AbstractGene, GeneDataView, Locus
export readgbk, sequence, iscomplement, addgene!, pushproperty!, printgbk
export feature, index, locus
export @genes

include("types.jl")
Expand Down
60 changes: 27 additions & 33 deletions src/genedataview.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
struct GeneDataView{G <: AbstractGene} <: AbstractArray{G, 1}
parent::Chromosome{G}
parent::Vector{Chromosome{G}}
indices::Vector{UInt}
property::Symbol
end


function Base.getproperty(gene::G, name::Symbol) where {G <: AbstractGene}
if name in (:parent, :index)
return getfield(gene, name)
elseif name in propertynames(gene)
return gene.parent.genedata[gene.index, name]
if name in propertynames(gene)
return parent(gene).genedata[index(gene), name]
else
return missing
end
Expand All @@ -22,46 +20,42 @@ end


function Base.getproperty(genes::AbstractArray{G, 1}, name::Symbol) where {G <: AbstractGene}
# @assert all(getfield.(genes, :parent) .== Ref(getfield(genes[1], :parent))) "Not all members of `genes` come from the same parent"
if name == :index
return [getfield(g, :index) for g in genes]
elseif name == :parent
return getfield(genes[1], name)
else
return GeneDataView(genes[1].parent, genes.index, name)
end
GeneDataView(parent.(genes), getfield.(genes, Ref(:index)), name)
end


function Base.setproperty!(gene::G, name::Symbol, x::T) where {G <: AbstractGene, T}
if hasproperty(gene.parent.genedata, name)
gene.parent.genedata[gene.index, name] = x
if hasproperty(parent(gene).genedata, name)
parent(gene).genedata[index(gene), name] = x
else
s = size(gene.parent.genedata, 1)
gene.parent.genedata[!, name] = Vector{Union{Missing, T}}(missing, s)
gene.parent.genedata[gene.index, name] = x
s = size(parent(gene).genedata, 1)
parent(gene).genedata[!, name] = Vector{Union{Missing, T}}(missing, s)
parent(gene).genedata[index(gene), name] = x
end
return x
end


Base.size(gv::GeneDataView) = size(gv.indices)
Base.getindex(gv::GeneDataView, i::Int) = getproperty(gv.parent.genes[gv.indices[i]], gv.property)
Base.getindex(gv::GeneDataView, I::AbstractArray) = GeneDataView(gv.parent, gv.indices[I], gv.property)
Base.setindex!(gv::GeneDataView, v, i::Int) = (Base.setproperty!(gv.parent.genes[i], gv.property, v))
Base.similar(gv::GeneDataView{Gene}) = GeneDataView(gv.parent, gv.indices, gv.property)
Base.copy(gv::GeneDataView{Gene}) = GeneDataView(gv.parent, gv.indices, gv.property)
Base.view(gv::GeneDataView{Gene}, I) = GeneDataView(gv.parent, gv.indices[I], gv.property)


function Base.fill!(gv::GeneDataView{Gene}, x)
if hasproperty(gv.parent.genedata, gv.property)
xT = convert(eltype(gv.parent.genedata[!, gv.property]), x)
else
xT = x
Base.getindex(gv::GeneDataView, i::Int) = getproperty(gv.parent[i].genes[gv.indices[i]], gv.property)
Base.getindex(gv::GeneDataView, I::AbstractArray) = GeneDataView(gv.parent[I], gv.indices[I], gv.property)
Base.setindex!(gv::GeneDataView, v, i::Int) = (Base.setproperty!(gv.parent[i].genes[gv.indices[i]], gv.property, v))
Base.similar(gv::GeneDataView{G}) where {G <: AbstractGene} = GeneDataView(gv.parent, gv.indices, gv.property)
Base.copy(gv::GeneDataView{G}) where {G <: AbstractGene} = GeneDataView(gv.parent, gv.indices, gv.property)
Base.view(gv::GeneDataView{G}, I) where {G <: AbstractGene} = GeneDataView(gv.parent[I], gv.indices[I], gv.property)


function Base.fill!(gv::GeneDataView{G}, x) where {G <: AbstractGene}
chrs = unique(gv.parent)
for chr in chrs
if hasproperty(chr.genedata, gv.property)
xT = convert(eltype(chr.genedata[!, gv.property]), x)
else
xT = x
end
end
for I in gv.indices
@inbounds gv[I] = x
for i in eachindex(gv.indices)
@inbounds gv[i] = x
end
gv
end
29 changes: 16 additions & 13 deletions src/readgbk.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ end


"""
Parse lines encoding genomic position, returning the feature as a `String`, and an instance of `Locus`.
Parse lines encoding genomic position, returning the feature as a `Symbol`, and an instance of `Locus`.
"""
function parseposition(line::String)
feature, posstring = split(strip(line), r" +")
Expand All @@ -31,7 +31,7 @@ function parseposition(line::String)
push!(order, r[1]:r[2])
end
end
return feature, Locus(position, strand, complete_left, complete_right, order, join)
return Symbol(feature), Locus(position, strand, complete_left, complete_right, order, join)
end


Expand All @@ -49,8 +49,8 @@ end
Parse and return one chromosome entry, and the line number that it ends at.
"""
function parsechromosome(lines)
genes = Gene[]
function parsechromosome(lines, G::Type = Gene)
genes = G[]
iobuffer = IOBuffer()
isheader = true
isfooter = false
Expand All @@ -60,10 +60,10 @@ function parsechromosome(lines)
content = String("")
header = ""

feature = ""
feature = :source
locus = Locus()

chromosome = Chromosome()
chromosome = Chromosome{G}()

linecount = 0
for line in lines
Expand Down Expand Up @@ -120,8 +120,9 @@ function parsechromosome(lines)
else
(qualifier, content) = match(r"^ +/(\S+)=(\S+)$", line).captures
try
content = Meta.parse(content)
content isa Expr && throw(Meta.ParseError)
tmpcontent = Meta.parse(content)
tmpcontent isa Expr && throw(Meta.ParseError)
content = tmpcontent
catch
content = Symbol(content)
end
Expand All @@ -131,7 +132,9 @@ function parsechromosome(lines)
spanning = true
end

pushproperty!(chromosome.genes[end], Symbol(qualifier), content)
isempty(names(chromosome.genedata)) ?
(chromosome.genedata[!, Symbol(qualifier)] = Union{Missing, typeof(content)}[content]) :
pushproperty!(chromosome.genes[end], Symbol(qualifier), content)

else
# Qualifiers without a value assigned to them end up here
Expand All @@ -148,7 +151,7 @@ function parsechromosome(lines)
spanning = false
end
if eltype(chromosome.genedata[!, Symbol(qualifier)]).b <: AbstractArray
i = chromosome.genes[end].index
i = index(chromosome.genes[end])
chromosome.genedata[!, Symbol(qualifier)][end][end] = Base.getproperty(chromosome.genes[end], Symbol(qualifier))[end] * "\n" * content
else
Base.setproperty!(chromosome.genes[end], Symbol(qualifier), Base.getproperty(chromosome.genes[end], Symbol(qualifier)) * "\n" * content)
Expand Down Expand Up @@ -176,10 +179,10 @@ end
Parse GenBank-formatted file `filename`, returning a `Vector{Chromosome}`.
"""
function readgbk(filename)
function readgbk(filename, G::Type = Gene)
gz = filename[end-2:end] == ".gz"
finished = false
chrs = Chromosome[]
chrs = Chromosome{G}[]
if gz
f = GZip.open(filename)
else
Expand All @@ -191,7 +194,7 @@ function readgbk(filename)
if currentline >= length(lines)
break
end
i, chr = parsechromosome(lines[currentline:end])
i, chr = parsechromosome(lines[currentline:end], G)
currentline += i
push!(chrs, chr)
end
Expand Down
Loading

0 comments on commit d70ea7f

Please sign in to comment.