#!/bin/bash - 
#==============================================================================fortune=
#
#          FILE:  man2html-convertall.sh
# 
#         USAGE:  ./man2html-convertall.sh 
# 
#   DESCRIPTION:  Convert all system manpages to HTML into tmp folder
#                 
#                 Using an Amazon Kindle for reading ManPages is the motivation
#                 for this script.
# 
#       OPTIONS:  ---
#  REQUIREMENTS:  ---
#          BUGS:  ---
#         NOTES:  ---
#        AUTHOR: Roger Zauner (rdz), rogerx (dot) oss (at) gmail (dot) com 
#       LICENSE: GPL-2
#       CREATED: 08/07/2011 02:01:10 AM AKDT
#      REVISION:  ---
#===============================================================================

set -o nounset                              # Treat unset variables as an error
#set -o xtrace                              # Enable trace debugging


# FOR REFERENCE: Use cgi web-based man2html file:
# man-1.6f/man2html/scripts/cgi-bin/man/man2html (not related to system man
# package) contains some scripting with functions similar to this script.

# DEPENDECIES: Make sure you have the following list of externally used programs
# installed on your system.  The _check_depends function will do it's best to
# automatically check for you.
#   basename (coreutils)
#   bzcat (bzip2)
#   cut (coreutils)
#   find (findutils)
#   gunzip (gunzip)
#   man2html (man)
#   mkdir (coreutils)
#   sed (sed)
#
#   Amazon's Kindlegen closed source binary can be used to convert HTML to AZW
#   (AKA MOBI format).
#   http://www.amazon.com/gp/feature.html?ie=UTF8&docId=1000234621

# BUGS
# You might encounter an HTML conversion error if a manpages contents are a
# symbolic link pointing to another manpage.  Such as:
#
# $ bzcat /usr/share/man/man0p/msg.h.0p.bz2 | man2html ;\
# printf "\nreturn value is: %s\n" $?
#
# The above also demonstrates man2html not returning a value other then zero
# on error.
#
# Might see a few segfaults/core dumps on HTML conversion (man2html)
# pdfroff.1 is one file causing manhtml to segfault.
# Again, since man2html doesn't return a value other then zero, this passes
# through checks in this script.

#
# VARIABLES
#

BASE_OUTPUT_DIR="/tmp"  # Base dir for writing html manpages to

DEBUG="1"               # Set to other then 0 (ie. 1) for debugging
                        # (Uncomment xtrace above for even more output.)

MAN_PATH="/usr/share/man"       # The base path for your systems man pages.

#KINDLEGEN="YES"         # TODO: Create .mobi file instead of .html files
                         #       Or, create both .html & .mobi files???


declare -rx SCRIPT=${0##*/}     # Name of this script for debug output


#
# FUNCTIONS
#

# Function to optionally handle executing included debug statements
_debug()
{
    #[ "${DEBUG}" -ne "0" ] &&  "$@"
    [[ $DEBUG != 0 ]] && eval "$@"
}

_check_return_val()
{
    let ret_val=$?

    if [[ $ret_val != 0 ]]; then
        printf "$SCRIPT @ Line $LINENO: Last command returned value other then "
        printf "zero. ret_val=${ret_val} -- aborting\n" >&2

        break 50
    fi
}

_check_depends()
{
    _debug && printf "Checking depedencies...\n"

    program_list="basename bzcat cut find gunzip man2html mkdir sed"

    for program in ${program_list}; do

        if [ ! `type -P ${program}` ]; then
            printf "${SCRIPT}:${LINENO}: the command ${program} is not "
            printf "available -- aborting\n" >&2

            exit 3
        fi

    done

    _debug && printf "Done checking dependecies.\n\n"
}

_html-to-mobi() # using kindlegen
{
    for file in ./*.html; do
        kindlegen $file >/dev/null
    done
}


#
# MAIN
#

_check_depends

# Find man pages, but omit .keep and any dot files and folders.
for manpage_fullpath in `find ${MAN_PATH}/man* \( ! -iname ".*" \)`
do
    # we just want the files and not empty folders.  filter all others out.
    if [ -f "${manpage_fullpath}" ]; then

        # keep subdivision of system numbered manpage folders.
        # (if folder doesn't exist, then create it.)
        # TODO: Should use Bash Parameter Expansion instead of external cut!
        # basename "$file"  
        # ${file##*/}       # everything after last /
        # ${file##*.}       # for file extension
        # dirname $file
        # ${file%/*}        # everything before last /
        foldername=`echo ${manpage_fullpath} | cut --delimiter="/" --field 5`
        _debug && printf "\nfoldername=%s\n" ${foldername}

        if [ ! -d "${BASE_OUTPUT_DIR}/${foldername}" ]; then
            mkdir ${BASE_OUTPUT_DIR}/${foldername}
        fi


        # Remove the folder location from the file name.
        filename=`basename ${manpage_fullpath}`
        _debug && printf "filename=%s\n" ${filename}

        
        # Test variable contains correct values.
        _debug && printf "filename=%s" ${filename}
        _debug && printf "manpage_fullpath=%s\n" ${manpage_fullpath}

        
        # Uncompress each file according to file extension and convert to a
        # HTML file.  For now, just gunzip & bzip2 are used.
        case ${filename} in
            *.bz2)
                _debug && printf "This is a bzip2 compressed file.\n"
                base_filename=`echo ${filename} | sed s/[.].*[.]bz2//`
                bzcat ${manpage_fullpath} | \
                    man2html > ${BASE_OUTPUT_DIR}/${foldername}/${base_filename}.html
                _debug && _check_return_val
                ;;

            *.gz)
                _debug && printf "This is a gunzip compressed file.\n"
                base_filename=`echo ${filename} | sed s/[.].*[.]gz//`
                gunzip --to-stdout ${manpage_fullpath} | \
                    man2html > ${BASE_OUTPUT_DIR}/${foldername}/${base_filename}.html 
                _debug && _check_return_val
                ;;

            *)
                printf "Unrecognized file extension for case command for"
                printf " filename ${manpage_fullpath}\n"
                ;;
        esac

    fi
done

