mkbok

#!/usr/bin/env ruby
# -*- coding: utf-8 -*-

require 'optparse'
require 'fileutils'
require 'erb'
require 'yaml'
require 'pathname'

include FileUtils

def main()
    options = {
        "build"=> "pdf",
        "lang" => "zh",
        "config"        => "latex/config.yml",
        "template"      => "latex/template.tex",
        "chapter-files" => "*/*.md",
        "appendix-files"=> "*appendix/*.md",
        "jeykll"        => false
    }
    config_file = File.join('.mkbok.yml')
    if File.exists? config_file
        config_options = YAML.load_file(config_file)
        options.merge!(config_options)
    end

    option_parser = OptionParser.new do |opts|
        executable_name = File.basename($PROGRAM_NAME)
        opts.banner = "make ebooks from markdown plain text
        Usage: #{executable_name} [options]    
        "
        # Create a switch
        opts.on("-b","--build FORMAT","build format:epub,pdf,html. seperated by ','") do |format|
            formatOptions = format.split(",")
            formatOptions.each do | fmt |
                # puts fmt
                unless ["pdf","epub","html"].include?(fmt)
                    raise ArgumentError,"FORMAT must be one of 'pdf,epub,html' format"
                end
            end
            options["build"] = format
        end
        opts.on("-d","--debug","debug") do 
            options["debug"] = true
        end
        # Create a flag
        opts.on("-l","--lang LANG","language selection") do |lang|
            unless lang=="zh" or lang=="en"
                raise ArgumentError,"LANG must be in zh or en"
            end
            options["lang"] = lang
        end
        opts.on("-c","--config CONFIG","config file") do |config|
            unless File.exists? config
                raise ArgumentError,"config file \"#{config}\" doesn't exist"
            end
            options["config"] = config
        end
        opts.on("-t","--template template.tex","latex template file") do |template|
            unless File.exists? template
                raise ArgumentError,"template file \"#{template}\" doesn't exist"
            end
            options["template"] = template
        end
        opts.on("-n","--name book name","book name") do |name|
            unless name =~ /^[a-zA-Z0-9]+$/
                raise ArgumentError,"name should be [a-zA-Z0-9_]"
            end
            options["name"] = name
        end
        opts.on("-g","--generate project","project name") do |name|
            unless name =~ /^[a-zA-Z0-9]+$/
                raise ArgumentError,"name should be [a-zA-Z0-9_]"
            end
            options["command"] = "generate"
            options["name"] = name
        end        
    end

    option_parser.parse!

    #$here = File.expand_path(File.dirname(__FILE__))
    $here = Dir.getwd
    $root = File.join($here)
    $outDir = File.join($root, 'pdf')
    options["name"] = File.basename(Dir.getwd) unless options["name"]
    options["outputformat"] = options["build"].split(',')

    puts options.inspect if options["debug"]
    
    if options["command"] == "generate"
        generate_project(options["name"])
        exit
    end
    
    if options["outputformat"].include?("pdf")
        #puts "pdf"
        generate_pdf(options)
    end
    options["outputformat"].delete("pdf")
    if options["outputformat"].count>0
        generate_ebook(options)
    end
end

def figures(&block)
	begin
		Dir["#$root/figures/18333*.png"].each do |file|
			cp(file, file.sub(/18333fig0(\d)0?(\d+)\-tn/, '\1.\2'))
		end
		block.call
	ensure
		Dir["#$root/figures/18333*.png"].each do |file|
			rm(file.gsub(/18333fig0(\d)0?(\d+)\-tn/, '\1.\2'))
		end
	end
end

def command_exists?(command)
	if File.executable?(command) then
		return command
	end
	ENV['PATH'].split(File::PATH_SEPARATOR).map do |path|
		cmd = "#{path}/#{command}"
		File.executable?(cmd) || File.executable?("#{cmd}.exe") || File.executable?("#{cmd}.cmd")
	end.inject{|a, b| a || b}
end

def replace(string, &block)
	string.instance_eval do
		alias :s :gsub!
		instance_eval(&block)
	end
	string
end

def verbatim_sanitize(string)
	string.gsub('\\', '{\textbackslash}').
		gsub('~', '{\textasciitilde}').
		gsub(/([\$\#\_\^\%])/, '\\\\' + '\1{}')
end

def pre_pandoc(string, config)
	replace(string) do
        s /\#\#\#\#\# (.*?) \#\#\#\#\#/, 'PARASECTION: \1'
		# Pandoc discards #### subsubsections #### - this hack recovers them
		s /\#\#\#\# (.*?) \#\#\#\#/, 'SUBSUBSECTION: \1'
		
        # convert div figures to normal markdown format
        # http://johnmacfarlane.net/pandoc/README.html
        s /^<div class=\"figures\"> <img src=\"..\/figures\/(.*)\".*<\/div>/, '![](figures/\1)\ '
        # "
		# Turns URLs into clickable links
		s /\`(http:\/\/[A-Za-z0-9\/\%\&\=\-\_\\\.]+)\`/, '<\1>'
		s /(\n\n)\t(http:\/\/[A-Za-z0-9\/\%\&\=\-\_\\\.]+)\n([^\t]|\t\n)/, '\1<\2>\1'
        # `
		# Process figures
		s /Insert\s18333fig\d+\.png\s*\n.*?\d{1,2}-\d{1,2}\. (.*)/, 'FIG: \1'
	end
end

def post_pandoc(string, config, lang, chapter=true)
	replace(string) do
		space = /\s/

		# Reformat for the book documentclass as opposed to article
		s '\section', '\chap'
		s '\sub', '\\'
		s /SUBSUBSECTION: (.*)/, '\subsubsection{\1}'
                s /PARASECTION: (.*)/, '\paragraph{\1}'

		# Enable proper cross-reference
		s /#{config['fig'].gsub(space, '\s')}\s*(\d+)\-\-(\d+)/, '\imgref{\1.\2}'
		s /#{config['tab'].gsub(space, '\s')}\s*(\d+)\-\-(\d+)/, '\tabref{\1.\2}'
		s /#{config['prechap'].gsub(space, '\s')}\s*(\d+)(\s*)#{config['postchap'].gsub(space, '\s')}/, '\chapref{\1}\2'

		# Miscellaneous fixes
		s /FIG: (.*)/, '\img{\1}'
		s '\begin{enumerate}[1.]', '\begin{enumerate}'
		s /(\w)--(\w)/, '\1-\2'
		s /``(.*?)''/, "#{config['dql']}\\1#{config['dqr']}"

		# Typeset the maths in the book with TeX
		s '\verb!p = (n(n-1)/2) * (1/2^160))!', '$p = \frac{n(n-1)}{2} \times \frac{1}{2^{160}}$)'
		s '2\^{}80', '$2^{80}$'
		s /\sx\s10\\\^\{\}(\d+)/, '\e{\1}'

		# Convert inline-verbatims into \texttt (which is able to wrap)
		s /\\verb(\W)(.*?)\1/ do
			"{\\texttt{#{verbatim_sanitize($2)}}}"
		end

		# Ensure monospaced stuff is in a smaller font
		s /(\\verb(\W).*?\2)/, '{\footnotesize\1}'
		s /(\\begin\{verbatim\}.*?\\end\{verbatim\})/m, '{\footnotesize\1}'

		# Shaded verbatim block
		s /(\\begin\{verbatim\}.*?\\end\{verbatim\})/m, '\begin{shaded}\1\end{shaded}'
		
        if lang=="zh" 
            # http://www.devdaily.com/blog/post/latex/control-line-spacing-in-itemize-enumerate-tags
            # http://wiki.ctex.org/index.php/LaTeX/%E5%88%97%E8%A1%A8
            # set the space of itemsize
            #s /(\\begin\{itemize\})/m,'\begin{itemize}\setlength{\itemsep}{1pt}\setlength{\parskip}{0pt}\setlength{\parsep}{0pt}'
            s /(\\begin\{enumerate\})/m,'\begin{enumerate}\setlength{\itemsep}{1pt}\setlength{\parskip}{0pt}\setlength{\parsep}{0pt}'
            # hardcode for itemize to use * instead of dot, which is missed in some chinese fonts
            # and keep \item inside \enumerate env is not changed
            # \item -> \item[*]
            # solution is provided by Alexis, and it works under ruby 1.9+ only due to bug in 1.8.7
            # http://stackoverflow.com/questions/9115018/regular-expression-using-ruby-string-gsub-method-to-replace-multi-matches
            #if RUBY_VERSION >= "1.9"
            #    s /^\\item(?=((?!\\begin\{itemize\}).)*\\end\{itemize\})/m, '\\item[*]'
            #else
            #    s /(^\\item)/m,'\item[*]'
            #end
            
            # change the width to standard .6 width 
            s /\\includegraphics/m, '\\includegraphics[width=\\imgwidth]'
        end
        
        if chapter != true
            s /^\\chap\{(.*)\}/,'\chapter*{\1}'"\n"'\addcontentsline{toc}{chapter}{\1}'
            s /^\\section\{(.*)\}/,'\section*{\1}'
            s /^\\subsection\{(.*)\}/,'\subsection*{\1}'
        end
		
	end
end

def check_jekyll(str)
    str.lines.to_a[4..-3].join
end 

def generate_pdf(options)
    $config = YAML.load_file(options["config"])
    template = ERB.new(File.read(options["template"]))
    languages = [options["lang"]]
    missing = ['pandoc', 'xelatex'].reject{|command| command_exists?(command)}
    unless missing.empty?
        puts "Missing dependencies: #{missing.join(', ')}."
        puts "Install these and try again."
        exit
    end

    puts "Will generate pdf for the following languages using template #{options["template"]}:"
    puts "    #{languages}"
    puts

    figures do
        languages.each do |lang|
            config = $config['default'].merge($config[lang]) rescue $config['default']

            puts "#{lang}:"
            
            prefacefiles = "#$root/#{lang}/#{options['preface-files']}"

            puts "\tParsing preface markdown... #{prefacefiles} "        
            prefacemarkdown = Dir["#{prefacefiles}"].sort.map do |file|
                puts "\t =>"+file
                if options["jeykll"]
                    check_jekyll(File.read(file))
                else
                    File.read(file)
                end
            end.join("\n\n")

            preface = IO.popen('pandoc --highlight-style=pygments -p --no-wrap -f markdown -t latex', 'w+') do |pipe|
                pipe.write(pre_pandoc(prefacemarkdown, config))
                pipe.close_write
                post_pandoc(pipe.read, config, lang, false)
            end
            
            chapterfiles = "#$root/#{lang}/#{options['chapter-files']}"
            
            puts "\n\tParsing main chapters markdown... #{chapterfiles} "
            chaptersmarkdown = Dir["#{chapterfiles}"].sort{|a,b| [Integer(a[/\d+/]),a]<=>[Integer(b[/\d+/]),b]}.map do |file|
                puts "\t =>"+file
                if options["jeykll"]
                    check_jekyll(File.read(file))
                else
                    File.read(file)
                end
            end.join("\n\n")
            # puts "done"
            
            latex = IO.popen('pandoc -p --no-wrap -f markdown -t latex', 'w+') do |pipe|
                pipe.write(pre_pandoc(chaptersmarkdown, config))
                pipe.close_write
                post_pandoc(pipe.read, config, lang)
            end
            # puts "done"
            
            appendixfiles = "#$root/#{lang}/#{options['appendix-files']}"
            
            puts "\n\tParsing appendix markdown... #{appendixfiles}  "        
            appendixmarkdown = Dir["#{appendixfiles}"].sort.map do |file|
                puts "\t =>"+file
                if options["jeykll"]
                    check_jekyll(File.read(file))
                else
                    File.read(file)
                end
            end.join("\n\n") 

            appendix = IO.popen('pandoc -p --no-wrap -f markdown -t latex', 'w+') do |pipe|
                pipe.write(pre_pandoc(appendixmarkdown, config))
                pipe.close_write
                post_pandoc(pipe.read, config, lang)
            end
            #puts "done"

            print "\n\tCreating main.tex for #{lang}... "
            dir = "latex/#{lang}"
            mkdir_p(dir)
            
            File.open("#{dir}/main.tex", 'w') do |file|
                file.write(template.result(binding))
            end
            puts "done"

            abort = false

            puts "\tRunning XeTeX:"
            cd(dir)
            3.times do |i|
                print "\t\tPass #{i + 1}... "
                line = ""
                IO.popen("xelatex -output-directory=\".\" \"./main.tex\" 2>&1") do |pipe|
                    unless $DEBUG
                        while line = pipe.gets and not abort
                            # print line.encoding.name
                            # use different way to handle encoding issues for ruby
                            if (RUBY_VERSION >= "1.9" and line.valid_encoding? and line =~ /^!\s/) or (RUBY_VERSION < "1.9" and line =~ /^!\s/)                       
                                puts "failed with:\n\t\t\t#{line.strip}"
                                puts "\tConsider running this again with --debug."
                                abort = true
                            end
                        end

                        #print line while line = pipe.gets
                    else
                        STDERR.print while pipe.gets rescue abort = true
                    end
                end
                break if abort
                puts "done"
            end
            cd($root)
            unless abort
                print "\tMoving output to #{options["name"]}.#{lang}.pdf... "
                            mv("#{dir}/main.pdf", "#$root/#{options["name"]}.#{lang}.pdf")
                puts "done"
            else
                print "\tConvert error, exit !\n"
                exit 1
            end
        end
    end
end

def generate_ebook(options)
    name = File.basename(Dir.getwd)
    
    missing = ['pandoc'].reject{|command| command_exists?(command)}
    unless missing.empty?
        puts "Missing dependencies: #{missing.join(', ')}."
        puts "Install these and try again."
        exit
    end
    
    puts " "
    puts "Will generate ebooks [#{options["outputformat"].join(',')}] for the following languages #{options["lang"]}"
    puts " "
    
    options["lang"].split(',').each do |lang|
        puts "convert content for '#{lang}' language"

        if lang == 'zh'
            figure_title = '图'
        else 
            figure_title = 'Figure'
        end

        prefacefiles = "#$root/#{lang}/#{options['preface-files']}"
        chapterfiles = "#$root/#{lang}/#{options['chapter-files']}"
        appendixfiles = "#$root/#{lang}/#{options['appendix-files']}"

        book_content = ""

        puts "\n\tParsing preface markdown...  "
        Dir["#{prefacefiles}"].sort.map do |input|
            puts "\tProcessing #{input}"
            content = File.read(input)
            content.gsub!(/Insert\s+(.*)(\.png)\s*\n?\s*#{figure_title}\s+(.*)/, '![\3](figures/\1-tn\2 "\3")')
            book_content << content + ("\n\n")
        end

        puts "\n\tParsing chapter markdown...  "
        Dir["#{chapterfiles}"].sort.map do |input|
            puts "\tProcessing #{input}"
            content = File.read(input)
            content.gsub!(/Insert\s+(.*)(\.png)\s*\n?\s*#{figure_title}\s+(.*)/, '![\3](figures/\1-tn\2 "\3")')
            book_content << content + ("\n\n")
        end

        puts "\n\tParsing appendix markdown...  "
        Dir["#{appendixfiles}"].sort.map do |input|
            puts "\tProcessing #{input}"
            content = File.read(input)
            content.gsub!(/Insert\s+(.*)(\.png)\s*\n?\s*#{figure_title}\s+(.*)/, '![\3](figures/\1-tn\2 "\3")')
            book_content << content + ("\n\n")
        end

        print "\n\tCreating markdown for #{lang}... "
        dir = "md/#{lang}"
        mkdir_p(dir)

        File.open("#{dir}/#{name}.#{lang}.md", 'w') do |output|
            output.write(book_content)
        end

        puts "\tRunning pandoc:"
        cd(dir)
        puts "#$root"
        options["outputformat"].each do | format |
            system('pandoc',
               '--standalone', 
               '--toc',
               '--epub-metadata', "#$root/epub/metadata.xml",
               '--epub-stylesheet', "#$root/epub/book.css", # this doesn't work
               '--output', "#{name}.#{lang}.#{format}",
               "#$root/title.#{lang}.txt", # little strange, if this put under epub, pandoc reports error
               "#{name}.#{lang}.md")
          # pandoc -S --epub-metadata=metadata.xml -o progit.epub title.txt
            cd($root)
            mv("#{dir}/#{name}.#{lang}.#{format}", "#$root/#{name}.#{lang}.#{format}")
            puts("#{name}.#{lang}.#{format} is generated")
        end
    end
end

# http://stackoverflow.com/questions/5074327/most-appropriate-way-to-generate-directory-of-files-from-directory-of-template-f
def generate_project(project)
    destination = project
    source = File.dirname(__FILE__)+"/../templates"
    #puts "generate project \"#{destination}\" from source \"#{source}\""
    FileUtils.rmtree(destination)
    FileUtils.mkdir_p(destination)
    sourceroot=Pathname.new(source)
    sourcerealpath = sourceroot.cleanpath
    puts "generate project \"#{destination}\" from source \"#{sourcerealpath}\""     
    Dir.glob(File.join(source, '**/*')).each do |path|
      pathname = Pathname.new(path)
      relative = pathname.relative_path_from(sourceroot)
      #puts "parent:" , sourceroot
      #puts "relative:", relative
      if File.directory?(pathname) 
        destdir = File.join(destination, relative.dirname)
        #puts "create #{destdir} "
        FileUtils.mkdir_p(destdir) 
      else
          FileUtils.mkdir_p(File.join(destination, relative.dirname))
          if pathname.extname == '.erb'
            #puts pathname.basename.sub(/\.erb$/, '')
            #puts destination
            #puts File.join(destination,pathname.basename.sub(/\.erb$/, ''))
            File.open(File.join(destination,pathname.basename.sub(/\.erb$/, '')), 'w') do |file|
              file.puts(ERB.new(File.read(path)).result(binding))
            end   
          else
            print pathname.cleanpath, " => ", File.join(destination, relative.dirname),"\n"
            FileUtils.cp(pathname, File.join(destination, relative.dirname))
          end
      end
    end
end

main