CSV::Converters[:cencoding] = lambda do |string|
string.encode('UTF-8', 'binary', invalid: :replace, undef: :replace,
replace: '')
end
CSV.foreach(path, :col_sep => " ", :converters => :cencoding) do |array|
fields = { 'campaign' => 2, 'validity' => 3, 'choice' => 4 }
field_values = fields.each_with_object({}) do |(name, index), hash|
match = /#{name}:(\w+)/i.match(array[index])
hash[name]= match[1].strip unless match.nil?
end
next unless field_values.values.all?(&:present?)
Campaign.create!(name: field_values['campaign']) unless Campaign.exists?
(name: field_values['campaign'])
campaign = Campaign.find_by_name(field_values['campaign'])
campaign.votes.create!(validity: field_values['validity'], choice:
field_values['choice'])
end
But, I'm getting error as -
arup@linux-wzza:~/Rails/tv_sms_voting> rake db:reset
#.........
#..........
rake aborted!
ArgumentError: invalid byte sequence in UTF-8
/home/arup/Rails/tv_sms_voting/db/seeds.rb:20:in `<top (required)>'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activesupport-4.1.4/lib/active_support/dependencies.rb:241:in
`load'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activesupport-4.1.4/lib/active_support/dependencies.rb:241:in
`block in load'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activesupport-4.1.4/lib/active_support/dependencies.rb:232:in
`load_dependency'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activesupport-4.1.4/lib/active_support/dependencies.rb:241:in
`load'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/railties-4.1.4/lib/rails/engine.rb:543:in
`load_seed'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activerecord-4.1.4/lib/active_record/tasks/database_tasks.rb:184:in
`load_seed'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activerecord-4.1.4/lib/active_record/railties/databases.rake:173:in
`block (2 levels) in <top (required)>'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activerecord-4.1.4/lib/active_record/railties/databases.rake:132:in
`block (2 levels) in <top (required)>'
Tasks: TOP => db:setup => db:seed
(See full trace by running task with --trace)
arup@linux-wzza:~/Rails/tv_sms_voting>
···
================
How can I handle this error ?
--
Regards,
Arup Rakshit
Debugging is twice as hard as writing the code in the first place. Therefore,
if you write the code as cleverly as possible, you are, by definition, not
smart enough to debug it.
CSV::Converters[:cencoding] = lambda do |string|
string.encode('UTF-8', 'binary', invalid: :replace, undef: :replace,
replace: '')
end
CSV.foreach(path, :col_sep => " ", :converters => :cencoding) do |array|
fields = { 'campaign' => 2, 'validity' => 3, 'choice' => 4 }
field_values = fields.each_with_object({}) do |(name, index), hash|
match = /#{name}:(\w+)/i.match(array[index])
hash[name]= match[1].strip unless match.nil?
end
next unless field_values.values.all?(&:present?)
Campaign.create!(name: field_values['campaign']) unless Campaign.exists?
(name: field_values['campaign'])
campaign = Campaign.find_by_name(field_values['campaign'])
campaign.votes.create!(validity: field_values['validity'], choice:
field_values['choice'])
end
But, I'm getting error as -
arup@linux-wzza:~/Rails/tv_sms_voting> rake db:reset
#.........
#..........
rake aborted!
ArgumentError: invalid byte sequence in UTF-8
/home/arup/Rails/tv_sms_voting/db/seeds.rb:20:in `<top (required)>'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activesupport-4.1.4/lib/active_support/dependencies.rb:241:in
`load'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activesupport-4.1.4/lib/active_support/dependencies.rb:241:in
`block in load'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activesupport-4.1.4/lib/active_support/dependencies.rb:232:in
`load_dependency'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activesupport-4.1.4/lib/active_support/dependencies.rb:241:in
`load'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/railties-4.1.4/lib/rails/engine.rb:543:in
`load_seed'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activerecord-4.1.4/lib/active_record/tasks/database_tasks.rb:184:in
`load_seed'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activerecord-4.1.4/lib/active_record/railties/databases.rake:173:in
`block (2 levels) in <top (required)>'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activerecord-4.1.4/lib/active_record/railties/databases.rake:132:in
`block (2 levels) in <top (required)>'
Tasks: TOP => db:setup => db:seed
(See full trace by running task with --trace)
arup@linux-wzza:~/Rails/tv_sms_voting>
================
How can I handle this error ?
--
Regards,
Arup Rakshit
Debugging is twice as hard as writing the code in the first place. Therefore,
if you write the code as cleverly as possible, you are, by definition, not
smart enough to debug it.
CSV::Converters[:cencoding] = lambda do |string|
string.encode('UTF-8', 'binary', invalid: :replace, undef: :replace,
replace: '')
end
CSV.foreach(path, :col_sep => " ", :converters => :cencoding) do |array|
fields = { 'campaign' => 2, 'validity' => 3, 'choice' => 4 }
field_values = fields.each_with_object({}) do |(name, index), hash|
match = /#{name}:(\w+)/i.match(array[index])
hash[name]= match[1].strip unless match.nil?
end
next unless field_values.values.all?(&:present?)
Campaign.create!(name: field_values['campaign']) unless Campaign.exists?
(name: field_values['campaign'])
campaign = Campaign.find_by_name(field_values['campaign'])
campaign.votes.create!(validity: field_values['validity'], choice:
field_values['choice'])
end
But, I'm getting error as -
arup@linux-wzza:~/Rails/tv_sms_voting> rake db:reset
#.........
#..........
rake aborted!
ArgumentError: invalid byte sequence in UTF-8
/home/arup/Rails/tv_sms_voting/db/seeds.rb:20:in `<top (required)>'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activesupport-4.1.4/lib/active_support/dependencies.rb:241:in
`load'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activesupport-4.1.4/lib/active_support/dependencies.rb:241:in
`block in load'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activesupport-4.1.4/lib/active_support/dependencies.rb:232:in
`load_dependency'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activesupport-4.1.4/lib/active_support/dependencies.rb:241:in
`load'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/railties-4.1.4/lib/rails/engine.rb:543:in
`load_seed'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activerecord-4.1.4/lib/active_record/tasks/database_tasks.rb:184:in
`load_seed'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activerecord-4.1.4/lib/active_record/railties/databases.rake:173:in
`block (2 levels) in <top (required)>'
/home/arup/.rvm/gems/ruby-2.1.2@active_record/gems/activerecord-4.1.4/lib/active_record/railties/databases.rake:132:in
`block (2 levels) in <top (required)>'
Tasks: TOP => db:setup => db:seed
(See full trace by running task with --trace)
arup@linux-wzza:~/Rails/tv_sms_voting>
================
How can I handle this error ?
--
Regards,
Arup Rakshit
Debugging is twice as hard as writing the code in the first place. Therefore,
if you write the code as cleverly as possible, you are, by definition, not
smart enough to debug it.
Thanks for your reply. I am thinking, why my custom converter didn't work. I
don't want to extract all data from each line. I want only the 3 fields values
as I have shown.
···
On Wednesday, October 29, 2014 11:48:18 PM Abinoam Jr. wrote:
Hi Arup,
Your source file (votes.txt) is not "clean".
Line 5608 has an invalid character after "Shortcode:63334".
Debugging is twice as hard as writing the code in the first place. Therefore,
if you write the code as cleverly as possible, you are, by definition, not
smart enough to debug it.
Hi Arup, the CSV converters are designed for casting individual fields content into different types. (Integer, Date, etc.)
Your problem happens before that, it can't split the file into fields when parsing it into CSV.
If you do your `string.encode!` before passing it into `CSV.parse`, then it will work. Note, you probably don't need to specify the 'binary' as a second parameter, it should just re-encode as UTF-8 with the correct replacements.
Perhaps you could try to use String#scrub to sanitize the string.
···
Em 30/10/2014 15:03, "Arup Rakshit" <aruprakshit@rocketmail.com> escreveu:
On Wednesday, October 29, 2014 11:48:18 PM Abinoam Jr. wrote:
> Hi Arup,
>
> Your source file (votes.txt) is not "clean".
>
> Line 5608 has an invalid character after "Shortcode:63334".
>
> VOTE 1168123287 Campaign:ssss_uk_02A Validity:during Choice:Antony
> CONN:MIG01XU MSISDN:00777779989999
> GUID:029DBA7C-26E7-4F82-BAE9-2DEC2C665F6B Shortcode:63334¡
>
>
> Abinoam Jr.
>
Thanks for your reply. I am thinking, why my custom converter didn't work.
I
don't want to extract all data from each line. I want only the 3 fields
values
as I have shown.
--
Regards,
Arup Rakshit
Debugging is twice as hard as writing the code in the first place.
Therefore,
if you write the code as cleverly as possible, you are, by definition, not
smart enough to debug it.
Thanks Andrew for your reply. CSV::parse need data as a string. But the log
file I am parsing, and loading data from there to DB is too huge and varying
size. Currently it has 45,000 lines. Will it be a good idea to read it as a
string using IO::read and then fix the encoding and feed it to the CSV::parse
method?
···
On Friday, October 31, 2014 01:59:15 PM Andrew Vit wrote:
On 14-10-29 13:10, Arup Rakshit wrote:
> Hi,
>
> I am trying to read the
> file(http://wikisend.com/download/703976/votes.txt\)
> with the below code :
>
> CSV::Converters[:cencoding] = lambda do |string|
>
> string.encode('UTF-8', 'binary', invalid: :replace, undef: :replace,
>
> replace: '')
> end
Hi Arup, the CSV converters are designed for casting individual fields
content into different types. (Integer, Date, etc.)
Your problem happens before that, it can't split the file into fields
when parsing it into CSV.
If you do your `string.encode!` before passing it into `CSV.parse`, then
it will work. Note, you probably don't need to specify the 'binary' as a
second parameter, it should just re-encode as UTF-8 with the correct
replacements.
Andrew Vit
--
Regards,
Arup Rakshit
Debugging is twice as hard as writing the code in the first place. Therefore,
if you write the code as cleverly as possible, you are, by definition, not
smart enough to debug it.
Thanks Abinoam. I finally used String#scrub method and it worked.
···
On Friday, October 31, 2014 05:55:38 PM Abinoam Jr. wrote:
Perhaps you could try to use String#scrub to sanitize the string.
#########################################
Campaign.all.each { |c| c.destroy }
require 'csv'
path = '/home/arup/Pictures/ruby_test/votes.txt'
data = IO::read(path).scrub("")
CSV.parse(data, :col_sep => " ") do |array|
fields = { 'campaign' => 2, 'validity' => 3, 'choice' => 4 }
field_values = fields.each_with_object({}) do |(name, index), hash|
match = /#{name}:(\w+)/i.match(array[index])
hash[name]= match[1].strip unless match.nil?
end
begin
Campaign.create!(name: field_values['campaign']) unless Campaign.exists?
(name: field_values['campaign'])
campaign = Campaign.find_by_name(field_values['campaign'])
campaign.votes.create!(validity: field_values['validity'], choice:
field_values['choice'])
rescue ActiveRecord::RecordInvalid
next
end
end
##########################################
The only pain is I am reading and loading a huge file into the memory, which
may in near future wouldn't work.
--
Regards,
Arup Rakshit
Debugging is twice as hard as writing the code in the first place. Therefore,
if you write the code as cleverly as possible, you are, by definition, not
smart enough to debug it.
Thanks Abinoam. I finally used String#scrub method and it worked.
Congrats!
CSV will have some problems with the invalid character.
But you don't need CSV for this very specific case.
Just use String#split
And take care to read one line at a time, so it may be memory
effective. (Use File#foreach)
File.foreach(path) do |line|
array = line.scrub.split(/\s+/)
field_values = FIELDS.each_with_object({}) do |(name, index), hash|
key_value = array[index].split(":") # ["Campaign", "ssss_uk_01B"]
value = key_value[1] # "ssss_uk_01B"
hash[name] = value
end
end
···
#########################################
Campaign.all.each { |c| c.destroy }
require 'csv'
path = '/home/arup/Pictures/ruby_test/votes.txt'
data = IO::read(path).scrub("")
CSV.parse(data, :col_sep => " ") do |array|
fields = { 'campaign' => 2, 'validity' => 3, 'choice' => 4 }
field_values = fields.each_with_object({}) do |(name, index), hash|
match = /#{name}:(\w+)/i.match(array[index])
hash[name]= match[1].strip unless match.nil?
end
begin
Campaign.create!(name: field_values['campaign']) unless Campaign.exists?
(name: field_values['campaign'])
campaign = Campaign.find_by_name(field_values['campaign'])
campaign.votes.create!(validity: field_values['validity'], choice:
field_values['choice'])
rescue ActiveRecord::RecordInvalid
next
end
end
##########################################
The only pain is I am reading and loading a huge file into the memory, which
may in near future wouldn't work.
--
Regards,
Arup Rakshit
Debugging is twice as hard as writing the code in the first place. Therefore,
if you write the code as cleverly as possible, you are, by definition, not
smart enough to debug it.
Ohh! IO::foreach works well, problem is with CSV::foreach. I was not aware of
the fact. Good to know.
···
On Sunday, November 02, 2014 12:56:04 PM Abinoam Jr. wrote:
> Thanks Abinoam. I finally used String#scrub method and it worked.
Congrats!
CSV will have some problems with the invalid character.
But you don't need CSV for this very specific case.
Just use String#split
And take care to read one line at a time, so it may be memory
effective. (Use File#foreach)
--
Regards,
Arup Rakshit
Debugging is twice as hard as writing the code in the first place. Therefore,
if you write the code as cleverly as possible, you are, by definition, not
smart enough to debug it.