From 802c527fcdef77879cd66a547e8b0a7f3f1a21f1 Mon Sep 17 00:00:00 2001 From: Samuel Vasko Date: Sat, 25 Jan 2014 11:36:53 +0100 Subject: [PATCH] Small formatting fixes --- lib/convert.rb | 21 ++++++++++++++++++++- lib/fetch.rb | 16 ++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/lib/convert.rb b/lib/convert.rb index 4cad284..24033ed 100644 --- a/lib/convert.rb +++ b/lib/convert.rb @@ -1 +1,20 @@ -require 'eeepub' \ No newline at end of file +require 'eeepub' + +epub = EeePub.make do + title 'Vala Tutorial' + creator 'Maciej Piechotka' + publisher 'Samuel Vasko' + date '2014-01-25' + identifier 'http://example.com/book/foo', :scheme => 'URL' + uid 'http://example.com/book/foo' + + + files ['/path/to/foo.html', '/path/to/bar.html'] # or files [{'/path/to/foo.html' => 'dest/dir'}, {'/path/to/bar.html' => 'dest/dir'}] + nav [ + {:label => '1. foo', :content => 'foo.html', :nav => [ + {:label => '1.1 foo-1', :content => 'foo.html#foo-1'} + ]}, + {:label => '1. bar', :content => 'bar.html'} + ] +end +epub.save('sample.epub') \ No newline at end of file diff --git a/lib/fetch.rb b/lib/fetch.rb index 3865a77..36fcb0e 100644 --- a/lib/fetch.rb +++ b/lib/fetch.rb @@ -13,10 +13,15 @@ def self.run self.transform_code content self.absolute_links content self.cleanup_paragraphs content + self.cleanup_headings content + + ['id', 'lang', 'dir'].each { |e| content[0].remove_attribute(e) } return content end + private + # Get the goods, sorry for the global def self.get_content $doc = Nokogiri::HTML(self.download('https://wiki.gnome.org/Projects/Vala/Tutorial')) @@ -27,6 +32,12 @@ def self.parse_toc content content.css('.table-of-contents > ol > li > ol') end + def self.cleanup_headings content + content.css('h1, h2, h3, h4, h5, h6').each do |h| + h.set_attribute('id', h.attr('id').gsub('?', '')) + end + end + def self.absolute_links content content.css('a').each do |a| if a.attr('href').starts_with '/' @@ -50,13 +61,14 @@ def self.remove_elements content def self.transform_code content content.css('.highlight').each do |el| pre = Nokogiri::XML::Node.new 'pre', $doc - pre.content = el.text + # cleaning non breaking spaces + pre.content = el.text.gsub("\u00A0", ' ') el.replace(pre) end content.css('tt.backtick').each do |el| code = Nokogiri::XML::Node.new 'code', $doc - code.content = el.text + code.content = el.text.gsub("\u00A0", ' ') el.replace(code) end end