Skip to content

Commit

Permalink
To optimize the result in from_excel (#516)
Browse files Browse the repository at this point in the history
* To optimize the result in from_excel

There are some files have a merged cell in row 1, so the old codes only return the first column.
I put the fixed '0' to opts to custom the row I want.

* daru from_excel

* break up from_excel

* remove needless line

* fix break up

* fix break up

* add testcases

* add testcases
  • Loading branch information
weqopy authored May 30, 2020
1 parent 7446aa9 commit ba5992c
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 7 deletions.
22 changes: 15 additions & 7 deletions lib/daru/io/io.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,12 @@ class << self
# Functions for loading/writing Excel files.

def from_excel path, opts={}
optional_gem 'spreadsheet', '~>1.1.1'
opts = {
worksheet_id: 0
worksheet_id: 0,
row_id: 0
}.merge opts

worksheet_id = opts[:worksheet_id]
book = Spreadsheet.open path
worksheet = book.worksheet worksheet_id
headers = ArrayHelper.recode_repeated(worksheet.row(0)).map(&:to_sym)

worksheet, headers = read_from_excel(path, opts)
df = Daru::DataFrame.new({})
headers.each_with_index do |h,i|
col = worksheet.column(i).to_a
Expand All @@ -59,6 +55,18 @@ def from_excel path, opts={}
df
end

def read_from_excel path, opts
optional_gem 'spreadsheet', '~>1.1.1'

worksheet_id = opts[:worksheet_id]
row_id = opts[:row_id]
book = Spreadsheet.open path
worksheet = book.worksheet worksheet_id
headers = ArrayHelper.recode_repeated(worksheet.row(row_id)).map(&:to_sym)

[worksheet, headers]
end

def dataframe_write_excel dataframe, path, _opts={}
book = Spreadsheet::Workbook.new
sheet = book.create_worksheet
Expand Down
Binary file modified spec/fixtures/test_xls.xls
Binary file not shown.
Binary file added spec/fixtures/test_xls_2.xls
Binary file not shown.
32 changes: 32 additions & 0 deletions spec/io/io_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,38 @@
end
end

context "#from_excel with row_id" do
before do
id = Daru::Vector.new(['id', 1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
name = Daru::Vector.new(%w(name Alex Claude Peter Franz George Fernand))
age = Daru::Vector.new(['age', 20.0, 23.0, 25.0, nil, 5.5, nil])
city = Daru::Vector.new(['city', 'New York', 'London', 'London', 'Paris', 'Tome', nil])
a1 = Daru::Vector.new(['a1', 'a,b', 'b,c', 'a', nil, 'a,b,c', nil])
@expected_1 = Daru::DataFrame.new({:id2 => id, :name2 => name, :age2 => age}, order: [:id2, :name2, :age2])
@expected_2 = Daru::DataFrame.new({
:id => id, :name => name, :age => age, :city => city, :a1 => a1
}, order: [:id, :name, :age, :city, :a1])
end

it "loads DataFrame from test_xls_2.xls" do
df = Daru::DataFrame.from_excel 'spec/fixtures/test_xls_2.xls'

expect(df.nrows).to eq(7)
expect(df.vectors.to_a).to eq([:id2, :name2, :age2])
expect(df[:age2][6]).to eq(nil)
expect(@expected_1).to eq(df)
end

it "loads DataFrame from test_xls_2.xls with row_id" do
df = Daru::DataFrame.from_excel 'spec/fixtures/test_xls_2.xls', {row_id: 1}

expect(df.nrows).to eq(7)
expect(df.vectors.to_a).to eq([:id, :name, :age, :city, :a1])
expect(df[:age][6]).to eq(nil)
expect(@expected_2).to eq(df)
end
end

context "#write_excel" do
before do
a = Daru::Vector.new(100.times.map { rand(100) })
Expand Down

0 comments on commit ba5992c

Please sign in to comment.