require 'daru' df = Daru::DataFrame.new([[1,2,3,4], [1,2,3,4]],order: [:a, :b], index: [:one, :two, :three, :four]) df = Daru::DataFrame.new({a: [1,2,3,4], b: [1,2,3,4]},order: [:b, :a]) v1 = Daru::Vector.new([1,2,3,4,5], index: [:a, :b, :c, :d, :e]) v2 = Daru::Vector.new([11,22,33,44], index: [:b, :e, :a, :absent]) Daru::DataFrame.new({v1: v1, v2: v2}) v1 = Daru::Vector.new([1,2,3,4,5]) v2 = Daru::Vector.new([11,22,33,44,55]) df = Daru::DataFrame.new({a: v1, b: v2}, clone: false) puts "equalness a : #{v1.object_id == df[:a].object_id}\nequalness b : #{v2.object_id == df[:b].object_id}" Daru::DataFrame.rows([ [1,11,10,'a'], [2,22,20 ,4 ], [3,33,30,'g'], [4,44,40, 3 ] ], order: [:a, :b, :c, :d]) r1 = Daru::Vector.new([1,2,3,4,5], index: [:a, :b, :c, :d, :e]) r2 = Daru::Vector.new([11,22,33,44,55], index: [:a, :c, :e, :b, :odd]) Daru::DataFrame.rows([r1,r2], order: [:a, :b, :c, :d, :odd]) Daru::DataFrame.from_csv 'data/sales-funnel.csv' df = Daru::DataFrame.from_excel 'data/test_xls.xls' df = Daru::DataFrame.new({ a: [1,2,3,4,5,6,7], b: ['a','b','c','d','e','f','g'], c: [11,22,33,44,55,66,77] }, index: [:a,:b,:c,:d,:e,:f,:g]) df[:b] df[:b..:c] df.row[:c] df.row[:d..:f] df.row[3] df.head 3 df[:d] = df[:a] * df[:c] df df.delete_vector :b df[:b] = Daru::Vector.new(['a',33,'b','c','d',88,'e'], index: [:a,:c,:d,:b,:e,:f,:extra]) df df.row[:latest] = Daru::Vector.new([10,20,30,40], index: [:c,:b,:a,:d]) df # Filter vectors. # The `type` method returns either :numeric or :object. The :numeric type states # that the Vector consists only of numerical data (combined with missing data). # If the type happens to be :object, it contains non-numerical data like strings # or symbols. Statistical operations will not be possible on Vectors of type :object. df.filter do |vector| vector.type == :numeric and vector.median < 50 end # Filter rows df.filter(:row) do |row| row[:a] + row[:d] < 100 end df.transpose df + 10 df1 = Daru::DataFrame.new({ a: 7.times.map { rand(100) }, f: 7.times.map { rand(100) }, c: 7.times.map { rand(100) } }, index: [:a,:b,:c,:d,:latest,:older,:f]) df1 + df df.mean df.describe df.cov df.corr puts df.summary # Iterate over vectors e = [] df.each do |vector| e << vector[:a].to_s + vector[:latest].to_s end puts e # Iterate over rows r = [] df.each(:row) do |row| r << row[:a] * row[:c] end puts r # Map over vectors. # The `only_numerics` method returns a DataFrame which contains vectors # with only numerical values. Setting the `:clone` option to false will # return the same Vector objects that are contained in the original DataFrame. df.only_numerics(clone: false).map do |vector| vector.mean end # Map over rows. # Calling `only_numerics` on a Daru::Vector will return a Vector with only numeric and # missing data. Data marked as 'missing' is not considered during statistical computation. df.map(:row) do |row| row.only_numerics.mean end # Recode vectors df.only_numerics(clone: false).recode do |vector| vector[:a] = vector[:d] + vector[:c] vector[:b] = vector.mean + vector[:a] vector # <- return the vector to the block end # Recode rows df.recode(:row) do |row| row[:a] = row[:c] - row[:d] row[:b] = row[:b].to_i if row[:b].is_a?(String) row end # Collect Vectors df.collect do |vector| vector[:c] + vector[:f] end # Collect Rows df.collect(:row) do |row| row[:a] + row[:d] - row[:c] end df.vector_by_calculation { a + c + d } df = Daru::DataFrame.new({ a: ['g', 'g','g','sort', 'this'], b: [4,4,335,32,11], c: ['This', 'dataframe','is','for','sorting'] }) df.sort([:a,:b,:c], ascending: [true, false, true], by: {c: lambda { |a| a.size }}) df = Daru::DataFrame.new({a: [1,2,1,2,3], b: [5,4,3,2,1]}) df.sort [:a, :b] df = Daru::DataFrame.new({a: [-3,nil,-1,nil,5], b: [4,3,2,1,4]}) df.sort([:a]) df = Daru::DataFrame.new({a: [nil,-1,1,nil,-1,1], b: ['aaa','aa',nil,'baaa','x',nil] }) # df.sort [:b], by: {b: lambda { |a| a.length } } # This would give "NoMethodError: undefined method `length' for nil:NilClass" # Instead you could do the following if you want the nils to be handled automatically df.sort [:b], by: {b: lambda { |a| a.length } }, handle_nils: true df = Daru::DataFrame.new({a: [nil,-1,1,nil,-1,1], b: ['aaa','aa',nil,'baaa','x',nil] }) # To print nils at the bottom one can use lambda { |a| (a.nil?)[1]:[0,a.length] } df.sort [:b], by: {b: lambda { |a| (a.nil?)?[1]:[0,a.length] } }, handle_nils: true