From b1c0866111aa15ddbc858e7d6051a5f2f7d5a792 Mon Sep 17 00:00:00 2001 From: Sameer Deshmukh Date: Sun, 5 Oct 2014 16:21:19 +0530 Subject: [PATCH] Vectors implemented and README update Implemented basic vector Created files Wrote tests for vector initialize Implemented basic vector enumerables Vectors done readme update readme update --- .rspec | 1 + Gemfile | 4 ++ Gemfile.lock | 35 ++++++++++++++ README.md | 18 ++++++++ daru.gemspec | 29 ++++++++++++ lib/daru.rb | 5 ++ lib/daru/dataframe.rb | 7 +++ lib/daru/vector.rb | 56 +++++++++++++++++++++++ lib/version.rb | 3 ++ spec/jruby/dataframe_spec.rb | 1 + spec/jruby/vector_spec.rb | 19 ++++++++ spec/mri/dataframe_spec.rb | 5 ++ spec/mri/vector_spec.rb | 89 ++++++++++++++++++++++++++++++++++++ spec/spec_helper.rb | 6 +++ 14 files changed, 278 insertions(+) create mode 100644 .rspec create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 daru.gemspec create mode 100644 lib/daru.rb create mode 100644 lib/daru/dataframe.rb create mode 100644 lib/daru/vector.rb create mode 100644 lib/version.rb create mode 100644 spec/jruby/dataframe_spec.rb create mode 100644 spec/jruby/vector_spec.rb create mode 100644 spec/mri/dataframe_spec.rb create mode 100644 spec/mri/vector_spec.rb create mode 100644 spec/spec_helper.rb diff --git a/.rspec b/.rspec new file mode 100644 index 000000000..5052887a0 --- /dev/null +++ b/.rspec @@ -0,0 +1 @@ +--color \ No newline at end of file diff --git a/Gemfile b/Gemfile new file mode 100644 index 000000000..e181c5eff --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' + +# Specify your gem's dependencies in daru.gemspec +gemspec \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 000000000..68ed0affa --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,35 @@ +PATH + remote: . + specs: + daru (0.0.1) + +GEM + remote: https://rubygems.org/ + specs: + diff-lcs (1.2.5) + json (1.8.1) + nmatrix (0.1.0.rc5) + rdoc (~> 4.0, >= 4.0.1) + rdoc (4.1.2) + json (~> 1.4) + rspec (3.1.0) + rspec-core (~> 3.1.0) + rspec-expectations (~> 3.1.0) + rspec-mocks (~> 3.1.0) + rspec-core (3.1.5) + rspec-support (~> 3.1.0) + rspec-expectations (3.1.2) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.1.0) + rspec-mocks (3.1.2) + rspec-support (~> 3.1.0) + rspec-support (3.1.1) + +PLATFORMS + ruby + +DEPENDENCIES + bundler + daru! + nmatrix (~> 0.1.0.rc5) + rspec (~> 3.0) diff --git a/README.md b/README.md index cf79daf64..0ba5e41cf 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,21 @@ daru ==== Data Analysis in RUby + +## Introduction + +daru (Data Analysis in RUby) is a library for storage, analysis and manipulation of data. It aims to be the preferred data analysis library for Ruby. + +Development of daru was started to address the fragmentation of Dataframe-like classes which were created in many ruby gems as per their own needs. + +This creates a hurdle in using these gems together to solve a problem. For example, calculating something in [statsample](https://github.com/clbustos/statsample) and plotting the results in [Nyaplot](https://github.com/domitry/nyaplot). + +daru is heavily inspired by `Statsample::Dataset`, `Nyaplot::DataFrame` and the super-awesome pandas, a very mature solution in Python. + +## Data Structures + +daru employs several data structures for storing and manipulating data: +* Vector - A basic 1-D vector. +* DataFrame - A 2-D matrix-like structure which is internally composed of named `Vector` classes. + +daru data structures can be constructed by using several Ruby classes. These include `Array`, `Hash`, `Matrix`, [NMatrix](https://github.com/SciRuby/nmatrix) and [MDArray](https://github.com/rbotafogo/mdarray). daru brings a uniform API for handling and manipulating data represented in any of the above Ruby classes. diff --git a/daru.gemspec b/daru.gemspec new file mode 100644 index 000000000..29f153c7b --- /dev/null +++ b/daru.gemspec @@ -0,0 +1,29 @@ +# coding: utf-8 +$:.unshift File.expand_path("../lib", __FILE__) + +require 'version.rb' + +DESCRIPTION = < 3.0' + spec.add_development_dependency 'nmatrix', '~> 0.1.0.rc5' +end \ No newline at end of file diff --git a/lib/daru.rb b/lib/daru.rb new file mode 100644 index 000000000..a583f96f7 --- /dev/null +++ b/lib/daru.rb @@ -0,0 +1,5 @@ +require 'securerandom' +require 'matrix' + +require 'daru/vector.rb' +require 'daru/dataframe.rb' \ No newline at end of file diff --git a/lib/daru/dataframe.rb b/lib/daru/dataframe.rb new file mode 100644 index 000000000..075b08cb0 --- /dev/null +++ b/lib/daru/dataframe.rb @@ -0,0 +1,7 @@ +module Daru + class DataFrame + def initialize source, name=SecureRandom.uuid + + end + end +end \ No newline at end of file diff --git a/lib/daru/vector.rb b/lib/daru/vector.rb new file mode 100644 index 000000000..299856bf3 --- /dev/null +++ b/lib/daru/vector.rb @@ -0,0 +1,56 @@ +module Daru + class Vector + include Enumerable + + def each(&block) + @vector.each(&block) + end + + attr_reader :name + + attr_reader :size + + def initialize source, name=nil + + if source.is_a?(Hash) + initialize source.values[0], source.keys[0] + else + @name = name || SecureRandom.uuid + + @vector = + case source + when Range, Matrix + source.to_a.flatten + else + source + end + + @size = @vector.size + end + end + + def [](index) + @vector[index] + end + + def to_json + @vector.to_a.to_json + end + + def to_a + @vector.to_a + end + + def to_html threshold=15 + html = '>' + + @vector.to_a.each_with_index do |el,i| + next if threshold < i and i < @arr.length-1 + content = i == threshold ? '...' : el.to_s + html.concat('') + end + + html += '
' + @name.to_s + '
' + content + '
' + end + end +end \ No newline at end of file diff --git a/lib/version.rb b/lib/version.rb new file mode 100644 index 000000000..a95251d9a --- /dev/null +++ b/lib/version.rb @@ -0,0 +1,3 @@ +module Daru + VERSION = "0.0.1" +end \ No newline at end of file diff --git a/spec/jruby/dataframe_spec.rb b/spec/jruby/dataframe_spec.rb new file mode 100644 index 000000000..7024aa043 --- /dev/null +++ b/spec/jruby/dataframe_spec.rb @@ -0,0 +1 @@ +# Tests if interpreter is JRuby diff --git a/spec/jruby/vector_spec.rb b/spec/jruby/vector_spec.rb new file mode 100644 index 000000000..9273fc1b3 --- /dev/null +++ b/spec/jruby/vector_spec.rb @@ -0,0 +1,19 @@ +# Tests if interpreter is JRuby + +# describe Daru::Vector do +# context ".initialize" do +# it "creates a vector object with an MDArray" do +# vector = Daru::Vector.new(MDArray.new([5], [1,2,3,4,5]), :uhura) + +# expect(vector[1]) .to eq(2) +# expect(vector.name).to eq(:uhura) +# end + +# it "creates a vector object with a Hash with different values" do +# vector = Daru::Vector.new { sulu: MDArray.new([5], [1,2,3,4,5])} + +# expect(vector[1]) .to eq(2) +# expect(vector.name).to eq(:sulu) +# end +# end +# end \ No newline at end of file diff --git a/spec/mri/dataframe_spec.rb b/spec/mri/dataframe_spec.rb new file mode 100644 index 000000000..ccc64190d --- /dev/null +++ b/spec/mri/dataframe_spec.rb @@ -0,0 +1,5 @@ +require 'spec_helper.rb' + +describe Daru::DataFrame do + +end \ No newline at end of file diff --git a/spec/mri/vector_spec.rb b/spec/mri/vector_spec.rb new file mode 100644 index 000000000..180c1ed7d --- /dev/null +++ b/spec/mri/vector_spec.rb @@ -0,0 +1,89 @@ +require 'spec_helper.rb' + +describe Daru::Vector do + context "#initialize" do + it "creates a vector object with an Array" do + vector = Daru::Vector.new [1,2,3,4,5], :mowgli + + expect(vector[1]) .to eq(2) + expect(vector.name).to eq(:mowgli) + end + + it "creates a vector object with a Range" do + vector = Daru::Vector.new 1..5, :bakasur + + expect(vector[1]) .to eq(2) + expect(vector.name).to eq(:bakasur) + end + + it "creates a vector object with an NMatrix" do + vector = Daru::Vector.new(NMatrix.new([5], [1,2,3,4,5], + dtype: :int32), :scotty) + + expect(vector[1]) .to eq(2) + expect(vector.name).to eq(:scotty) + end + + it "creates a vector object with a Matrix" do + vector = Daru::Vector.new Matrix[[1,2,3,4,5]], :ravan + + expect(vector[1]) .to eq(2) + expect(vector.name).to eq(:ravan) + end + + it "creates a vector object with a Hash with different values" do + vector = Daru::Vector.new({orion: [1,2,3,4,5]}) + + expect(vector[1]) .to eq(2) + expect(vector.name).to eq(:orion) + + vector = Daru::Vector.new({ kirk: 1..5 }) + + expect(vector[1]) .to eq(2) + expect(vector.name).to eq(:kirk) + + vector = Daru::Vector.new({ spock: NMatrix.new([5], [1,2,3,4,5], + dtype: :int32) }) + + expect(vector[1]) .to eq(2) + expect(vector.name).to eq(:spock) + end + + it "auto assigns a name if not specified" do + earth = Daru::Vector.new 1..5 + organion = Daru::Vector.new 1..5 + + expect(earth.name == organion.name).to eq(false) + end + end + + context "tests for methods" do + before do + @anakin = Daru::Vector.new NMatrix.new([5], [1,2,3,4,5]), :anakin + @luke = Daru::Vector.new NMatrix.new([3], [3,4,5,6]) , :luke + end + + it "checks for an each block" do + sum = 0 + + @anakin.each{ |e| sum += e} + expect(sum).to eq(15) + end + + it "checks for inequality of vectors" do + expect(@anakin == @luke).to be(false) + end + + it "calculates maximum value" do + expect(@anakin.max).to eq(5) + end + + it "calculates minimmum value" do + expect(@anakin.min).to eq(1) + end + + it "delegates to the internal array storage" do + expect(@anakin.size).to eq(@anakin.to_a.size) + end + end +end \ No newline at end of file diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 000000000..73dfe64af --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,6 @@ +require 'rspec' +require 'nmatrix' + +$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) +$LOAD_PATH.unshift(File.dirname(__FILE__)) +require 'daru' \ No newline at end of file