Skip to content

Commit

Permalink
Added support for the join operator, and more documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
kdyrhage committed Sep 25, 2019
1 parent 1a85291 commit c766efb
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63"

[compat]
DataFrames = "0.19"
BioSequences = "2"
DataFrames = "0.19"
julia = "1"

[extras]
Expand Down
3 changes: 3 additions & 0 deletions ci_prep.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
using Pkg.Registry
Registry.add(Registry.RegistrySpec(url = "https://github.com/JuliaRegistries/General.git"))
Registry.add(Registry.RegistrySpec(url = "https://github.com/BioJulia/BioJuliaRegistry.git"))
4 changes: 3 additions & 1 deletion src/macro.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ end
Iterate over and evaluate expressions in `exs` for all genes in `chr.genes`,
returning genes where all expressions evaluate to `true`. Any given symbol `s`
in the expression will be substituted for `gene.s`. The gene itself can be
accessed in the expression as `gene` (see example below).
accessed in the expression as `gene`.
# Examples
```julia
julia> chromosome = readgbk("example.gbk")
Chromosome 'example' (5028 bp) with 6 annotations
Expand Down
8 changes: 6 additions & 2 deletions src/readgbk.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
Return the LOCUS entry of the header.
"""
function parseheader(header::String)
lines = split(header, "\n")
locus = split(lines[1], r" +")[2]
Expand All @@ -21,13 +24,14 @@ function parseposition(line::String)
complete_left = !occursin('<', posstring)
complete_right = !occursin('>', posstring)
order = Vector{UnitRange{Int}}()
if occursin("order", posstring)
join = occursin("join", posstring)
if join || occursin("order", posstring)
for m in eachmatch(r"\d+(\.\.|\^)\d+", posstring)
r = Meta.parse.(split(m.match, r"(\.\.|\^)"))
push!(order, r[1]:r[2])
end
end
return feature, Locus(position, strand, complete_left, complete_right, order)
return feature, Locus(position, strand, complete_left, complete_right, order, join)
end


Expand Down
33 changes: 26 additions & 7 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@ abstract type AbstractGene end

"""
Struct for storing information on genomic locations. `strand` can be '+', '-',
or '.' when the strand is irrelevant.
or '.' when the strand is irrelevant. `order` is used to store discontiguous
sequences, indicated in the GenBank file with the order() and join() operators.
"""
struct Locus
position::UnitRange{Int}
strand::Char
complete_left::Bool
complete_right::Bool
order::Vector{UnitRange{Int}}
join::Bool
end


Expand All @@ -20,10 +22,10 @@ end
Locus(position::UnitRange{Int}, strand::Char)
"""
Locus() = Locus(1:1, '.', true, true, UnitRange{Int}[])
Locus(position::UnitRange{Int}) = Locus(position, '.', true, true, UnitRange{Int}[])
Locus(position::UnitRange{Int}, strand::Char) = Locus(position, strand, true, true, UnitRange{Int}[])
Locus(position::UnitRange{Int}, strand::Char, complete_left, complete_right) = Locus(position, strand, complete_left, complete_right, UnitRange{Int}[])
Locus() = Locus(1:1, '.', true, true, UnitRange{Int}[], false)
Locus(position::UnitRange{Int}) = Locus(position, '.', true, true, UnitRange{Int}[], false)
Locus(position::UnitRange{Int}, strand::Char) = Locus(position, strand, true, true, UnitRange{Int}[], false)
Locus(position::UnitRange{Int}, strand::Char, complete_left, complete_right) = Locus(position, strand, complete_left, complete_right, UnitRange{Int}[], false)

Base.convert(::Type{Locus}, x::UnitRange{Int}) = Locus(x)
function Base.convert(::Type{Locus}, x::StepRange{Int, Int})
Expand All @@ -35,6 +37,13 @@ function Base.convert(::Type{Locus}, x::StepRange{Int, Int})
throw(DomainError(x, "`x` must have a step of 1 or -1"))
end


"""
Struct for storing annotations for a single chromosome, plasmid, contig, etc.
Contains five fields: `name`, `sequence`, `header`, `genes`, and `genedata`.
Annotations are stored as a `DataFrame` in `genedata`, but can be accessed
more easily through `genes` using the API provided in this module.
"""
mutable struct Chromosome{G <: AbstractGene}
name::String
sequence::LongDNASeq
Expand All @@ -59,7 +68,8 @@ end
"""
addgene!(chr::Chromosome, feature, locus; kw...)
Add gene to `chr`. `locus` can be a `Locus`, a UnitRange, or a StepRange.
Add gene to `chr`. `locus` can be a `Locus`, a UnitRange, or a StepRange (for
decreasing ranges, which will be annotated on the complementary strand).
"""
function addgene!(chr::Chromosome, feature, locus; kw...)
locus = convert(Locus, locus)
Expand Down Expand Up @@ -291,7 +301,11 @@ function Base.show(io::IO, locus::Locus)
locus.strand == '-' && (s *= "complement(")
!locus.complete_left && (s *= ">")
if length(locus.order) > 0
s *= "order(" * join([join((r.start, r.stop), "..") for r in locus.order], ",") * ")"
if locus.join
s *= "join(" * join([join((r.start, r.stop), "..") for r in locus.order], ",") * ")"
else
s *= "order(" * join([join((r.start, r.stop), "..") for r in locus.order], ",") * ")"
end
else
s *= join((locus.position.start, locus.position.stop), "..")
end
Expand Down Expand Up @@ -328,6 +342,11 @@ function formatsequence(sequence, io = IOBuffer)
end


"""
printgbk([io], chr)
Print `chr` in GenBank format.
"""
function printgbk(chrs::AbstractVector{C}) where {C <: Chromosome}
io = IOBuffer()
printgbk(io, chrs)
Expand Down
4 changes: 2 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ using Test
s = " gene 1..1"
@test GenomicAnnotations.parseposition(s) == ("gene", Locus(1:1, '+'))
s = " gene complement(order(3300..4037,4047..4052))"
@test GenomicAnnotations.parseposition(s) == ("gene", Locus(3300:4052, '-', true, true, UnitRange{Int}[3300:4037, 4047:4052]))
@test GenomicAnnotations.parseposition(s) == ("gene", Locus(3300:4052, '-', true, true, UnitRange{Int}[3300:4037, 4047:4052], false))


chrs = readgbk("example.gbk")
Expand Down Expand Up @@ -71,7 +71,7 @@ using Test
end

@testset "Locus" begin
locus = Locus(1:1, '.', true, true, UnitRange{Int}[])
locus = Locus(1:1, '.', true, true, UnitRange{Int}[], false)
@test Locus() == locus
@test chr.genes[2].locus < chr.genes[4].locus
@test chr.genes[2].locus == chr.genes[2].locus
Expand Down

0 comments on commit c766efb

Please sign in to comment.