#!/usr/bin/env bash

#
# Program:
#
#     eudora2unix.sh    (GNU/Bash shell script)
#
# Copyright and Author:
#
#     Copyright (C) 2002  Eric Maryniak
#
#     Eric Maryniak <e.maryniak@pobox.com>
#     WWW homepage: http://pobox.com/~e.maryniak/
#
# Version:
#
#     Last modification: 2002-01-29  (started: 2002-01-15).
#     Number of lines in file: ca. 490.
#
# License:
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
# 
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
# 
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# Program Info:
#
#    Converts an entire Eudora directory to Linux/Unix (KMail) format.
#    It loops over a Eudora mailbox tree and copies and converts mailbox
#    files (*.mbx) to a Unix Mail directory with the companion Perl script:
#
#        eudora2unix.pl  - companion script (per-mailbox conversion)
#
#    Intelligently handles folders and subfolders and converts names to
#    KMail convention. Requires GNU bash and the GNU versions (or compatible)
#    of the programs find, egrep, mv, rm, chmod and tar.
#
#    Usage:
#
#        eudora2unix.sh /path/to/eudora-directory
#
#    The program expects:
#
#        argument 1 :  Eudora mailbox directory (with *.mbx files)
#
#    and makes:
#
#        ~/Mail     :  target mail directory (created)
#
#    Example:
#
#        eudora2unix /d-windows/eudora
#
#    Be sure to remove embedded spaces in folder and mailbox names first
#    (but the program warns about this and aborts if so).
#    The original Eudora directory is left intact.
#    All mailboxes are copied to $HOME/Mail first, and then processed.
#    The original mailbox file, e.g. 'myproj.mbx', is backupped by renaming
#    it to 'myproj.mbx.ORIG' (in ~/Mail).
#    The following files are created by eudora2unix.pl (in ~/Mail):
#
#        Program invocation     :  eudora2unix.sh /path/to/eudora-dir
#        Example Eudora mailbox :  /path/to/eudora-dir/myproj.mbx
#
#        eudora2unix.sh created :  myproj.mbx.ORIG  (original, for diff-ing)
#
#        eudora2unix.pl created :  myproj.mbx.OUT   (converted mailbox)
#                                  myproj.mbx.LOG   (informational log)
#                                  myproj.mbx.WARN  (warnings while processing)
#                                  myproj.mbx.ERR   (errors while processing)
#
#    The converted mailboxes are renamed by this script to 'myproj', thus the
#    eudora2unix.pl generated '.mbx.OUT' suffix is removed. KMail does not,
#    like many other Unix mail programs, require a certain suffix (extention).
#    Empty log, warning and error files are removed after processing.
#    The original mailbox in ~/Mail, e.g. 'myproj.mbx' which was renamed to
#    'myproj.mbx.ORIG' can be used for analysis with diff.
#    As said, the original mailboxes in the Eudora directory (*.mbx) are
#    left alone altogether. A working copy to ~/Mail is made first.
#
#    You should carefully inspect the remaining warning (*.mbx.WARN) and
#    error files (*.mbx.ERR) and compare original and converted mailbox, like
#    ("$ " is the Unix bash shell prompt):
#
#        $ ( cd $HOME/Mail && for f in `find . -type f -print | sort`; do
#                [ -f $f.WARN ] && diff -u $f.ORIG $f
#                [ -f $f.ERR  ] && diff -u $f.ORIG $f
#            done ) | more
#        $_
#
#    After inspection and perhaps fixing up, you can remove the log, warnings
#    and original mailbox files:
#
#        $ find $HOME/Mail -type f \
#              \( -name '*.mbx.ORIG' -o -name '*.mbx.LOG' -o \
#                 -name '*.mbx.WARN' -o -name '*.mbx.ERR' \) -exec rm -v {} \;
#        $_
#
#    Converting ~1 Gb mail, made up by 115 folders (.fol directories) with
#    1132 mailboxes (.mbx files) and 218663 messages, took about 30 minutes
#    on a Pentium II @500 Mhz (single cpu) with 256 Mb RAM (SuSE Linux 7.3,
#    kernel 2.4.10-4GB, reiserfs on an IDE disk).
#
# End of Program Info.
#

# Configuration.
# See variable $user_pre_script below (user specific script to run first).
# End of configuration.

#
# Basename of this program script (eat up $0 until last /), full name and
# directory name.
#
basefn=${0##*/}
fullfn="$0"
dirnfn="`dirname $0 2>/dev/null`"

#
# GNU/Linux or Unix Mail directory and Eudora directory (arg 1).
# Remove a trailing slash (eudoradir/ -> eudoradir) in the latter.
# Enforce the Eudora directory to be an absolute path (below).
# Relative paths (such as '.') will give problems, because this script
# does a few cd's (change directory) and must therefore be able to come
# back where it came from.
#
MAILDIR="$HOME/Mail"
EUDORADIR="`echo $1 | sed -e 's@/$@@'`"

# Path to eudora2unix.pl companion script.
# Look in $PATH first, then current directory, then this script's path,
# then give up.
eudora2unix_pl=`which eudora2unix.pl 2>/dev/null`
[ -z "$eudora2unix_pl" -a -x ./eudora2unix.pl ] && \
    eudora2unix_pl="`pwd 2>/dev/null`/eudora2unix.pl"
[ -z "$eudora2unix_pl" -a -x $dirnfn/eudora2unix.pl ] && \
    eudora2unix_pl="$dirnfn/eudora2unix.pl"
if [ -z "$eudora2unix_pl" -o ! -x "$eudora2unix_pl" ]; then
    echo "$basefn: cannot locate executable \"eudora2unix.pl\""
    echo "$basefn: tried \$PATH, \".\" and \"$dirnfn\""
    exit 1
fi

#
# Help functions and aliases.
#
function exit_msg() {
    echo "$basefn: a fatal error occurred:"
    echo "$basefn:    $*"
    echo "$basefn: aborting; directory $MAILDIR (if present)"
    echo "$basefn: probably has an inconsistent state and should be removed."
    exit 1
}
function gnu_check() {
    echo "$basefn: a GNU or GNU-compatible program is missing or has problems:"
    echo "$basefn:    $*"
    echo "$basefn: aborting; please install it or ask your system admin to ---"
    echo "$basefn: see http://www.gnu.org/ for a list of ftp mirrors."
    exit 1
}
function step() {
    # For debugging.
    echo -n "Continue: "
    read a
}

#
# Usage.
#
if [ -z "$EUDORADIR" -o $# -ne 1 ]; then
    echo "$basefn: usage error; specify Eudora directory to be converted:"
    echo "$basefn:     $basefn eudora_directory_to_be_converted"
    exit 1
fi
if [ -d "./$EUDORADIR" ]; then
    echo "$basefn: usage error; Eudora directory must be absolute, as in:"
    echo "$basefn:     $basefn `pwd 2>/dev/null`/$EUDORADIR"
    exit 1
fi

#
# Some safety checks first.
#
echo

#
# Program availability (GNU).
# Do some simple, crude, but probably good enough testing for the
# availability of GNU or GNU compatable versions of find, egrep and others.
# Most GNU programs support a '-v' (verbose) flag, which we use extensively.
#

# The Gnu date check has been removed, because in eudora2unix.pl now the
# Perl built-in 'gmtime(time)' is used (returns UTC Zulu, formerly known
# as 'GMT', or +00:00 or zero-meridian date and time).
# Gnu's date has the handy (albeit non-ISO 8601 conformant) %z flag (not
# to be confused with %Z) that returns the timezone difference numerically.
#date '+%Y-%m-%dT%H:%M:%S%z' 1>/dev/null 2>/dev/null || \
#    gnu_check "date with %Y and %z format specifier"

f="/tmp/${basefn}.$$.EU.TMP"
fnew="/tmp/${basefn}.$$.UU.TMP"
touch $f 2>/dev/null || gnu_check "touch $f"
echo "$f" 1>$f 2>/dev/null || gnu_check "echo $f > $f"
find /tmp -maxdepth 1 -type f -iname '*.EU.TMP' -print 1>/dev/null 2>/dev/null
[ $? -eq 0 ] || gnu_check "find with -maxdepth, -type and -iname option"
egrep -iv "mismatch-test" $f 1>/dev/null 2>/dev/null || \
    gnu_check "egrep with -i and -v option"
chmod -v 0600 $f 1>/dev/null 2>/dev/null || gnu_check "chmod with -v option"
mv -v $f $fnew 1>/dev/null 2>/dev/null || gnu_check "mv with -v option"
rm -v    $fnew 1>/dev/null 2>/dev/null || gnu_check "rm with -v option"

#
# Unix and Eudora mailbox directory.
#
if [ -d $MAILDIR ]; then
    echo "$basefn: directory $MAILDIR already exits;"
    echo "$basefn: rename it, e.g. to ${MAILDIR}-old"
    echo "$basefn: then you can merge the old into the new mail directory"
    echo "$basefn: ($MAILDIR) after conversion."
    exit 1
fi

if [ ! -d $EUDORADIR -o ! -x $EUDORADIR -o ! -r $EUDORADIR ]; then
    echo "$basefn: \"$EUDORADIR\" is not a (readable) directory, abort."
    exit 1
fi

cd $EUDORADIR || { echo "$basefn: cannot cd to $EUDORADIR, abort."; exit 1; }

if [ ! -f In.mbx -a ! -f in.mbx -a ! -f IN.mbx -a ! -f IN.MBX ]; then
    echo "$basefn: this is not a Eudora directory; cannot find inbox, abort."
    exit 1
fi

find $EUDORADIR \( -iname '*.fol' -o -iname '*.mbx' \) -print 2>/dev/null | \
    grep " "
if [ $? -eq 0 ]; then
    echo
    echo "$basefn: there are Eudora mailbox folder(s) and/or file(s)"
    echo "$basefn: with spaces in their names (see list above);"
    echo "$basefn: correct this first by renaming those files, e.g. by"
    echo "$basefn: replacing the space with a dash (-) or underscore (_):"
    echo
    find $EUDORADIR \( -iname '*.fol' -o -iname '*.mbx' \) -print | \
        grep " " | tail -5 | sed 's/ /-/g'
    echo
    exit 1
fi

# Not used: code to lowercase filenames:
#
#   for f in `find . -print|sort|grep "[A-Z]"`; do
#       fnew=`lowercase $f`
#       mv -v $f $fnew
#   done
#
# where 'lowercase' is this bash function (works for ISO 646 ("ASCII") only):
#
#   function lowercase () {
#     local a="$*"
#     a=${a//A/a} a=${a//B/b} a=${a//C/c} a=${a//D/d} a=${a//E/e} a=${a//F/f}
#     a=${a//G/g} a=${a//H/h} a=${a//I/i} a=${a//J/j} a=${a//K/k} a=${a//L/l}
#     a=${a//M/m} a=${a//N/n} a=${a//O/o} a=${a//P/p} a=${a//Q/q} a=${a//R/r}
#     a=${a//S/s} a=${a//T/t} a=${a//U/u} a=${a//V/v} a=${a//W/w} a=${a//X/x}
#     a=${a//Y/y} a=${a//Z/z}
#     echo $a
#   }

#
# Let's rock.
#
echo "Quote:"
echo ""
echo "  Beware of bugs in the above code; I have only proved it correct,"
echo "  not tried it."
echo ""
echo "      - Donald Knuth"
echo
echo
echo "Ok, the Eudora mailbox converter is about to start."
echo "In principle, the Eudora directory should be left unchanged..."
echo "But just in case, are you sure you have made a backup copy of the"
echo "Eudora directory:"
echo "    $EUDORADIR"
echo
echo -n "I have made a backup copy [y/N]: "
read answer
echo
if [ "$answer" != "y" -a "$answer" != "Y" ]; then
    echo "Ok, see you later, alligator ;-)"
    echo
    exit 0
else
    echo "Right, let's rock."
    echo "Expects lots of output... which is ok."
    echo "Program will stop if there is any error along the way."
    echo
    echo "Last chance to bail out."
    echo -n "Hit return to continue, control-C to stop: "
    read a
    echo
    echo "Starting."
    sleep 3
    echo
fi

# Create $MAILDIR.

mkdir -v $MAILDIR || exit_msg "cannot create $MAILDIR"

#
# User-specific pre-actions first.
# Add your hook here.
# Note: script exit code is checked and must be 0, to continue.
#
user_pre_script="$HOME/bin/eric-eudora2unix-file-renames.sh"
#user_pre_script="$HOME/bin/UP2low-fname.sh"
if [ -x $user_pre_script ]; then
    echo "Pre-actions with script"
    echo "    $user_pre_script"
    echo "started."
    $user_pre_script
    ret=$?
    echo "Pre-actions with script"
    echo "    $user_pre_script"
    if [ $ret -eq 0 ]; then
        echo "finished successfully."
    else
        echo "finished UNsuccessfully."
        echo "Script did not exit with code 0. Abort."
        rmdir -v $MAILDIR 2>/dev/null
        exit 1
    fi
fi

# Copy In, Out and Trash box.
# Skip 'drafts' and touch 'outbox'.
# Name conversions:
#
# ---------       ----------------
# Eudora     -->  KMail
# ---------       ----------------
# In.mbx          inbox
# Out.mbx         sent-mail
# Trash.mbx       trash
# n/a             outbox (touched)
# n/a             drafts (skipped)
# ---------       ----------------
# 
# For now, gave them a .mbx suffix (a la Eudora), so they be picked up later
# generically along with the other regular mailboxes, where a backup copy
# (inbox.mbx.ORIG etc.) will also be made.
#
cd $EUDORADIR || exit_msg "cannot cd to $EUDORADIR"
for f in In.mbx in.mbx IN.mbx IN.MBX; do
    if [ -f $f ]; then
        t=$MAILDIR/inbox.mbx
        cp -v $f $t || exit_msg "cannot copy $f to $t"
    fi
done
for f in Out.mbx out.mbx OUT.mbx OUT.MBX; do
    if [ -f $f ]; then
        t=$MAILDIR/sent-mail.mbx
        cp -v $f $t || exit_msg "cannot copy $f to $t"
    fi
done
for f in Trash.mbx trash.mbx TRASH.mbx TRASH.MBX; do
    if [ -f $f ]; then
        t=$MAILDIR/trash.mbx
        cp -v $f $t || exit_msg "cannot copy $f to $t"
    fi
done
# Create (empty) standard boxes if they were not present in Eudora.
for f in {inbox,sent-mail,trash,outbox}.mbx; do
    t="$MAILDIR/$f"
    if [ ! -f $t ]; then
        touch $t  &&  echo "touch: $t"  ||  exit_msg "cannot touch $t"
    fi
done

# Copy other Eudora mailboxes in the top level mailbox folder.
cd $EUDORADIR || exit_msg "cannot cd to $EUDORADIR"
for f in $( find . -maxdepth 1 -type f -iname '*.mbx' -print | \
                egrep -iv '(/in.mbx$|/out.mbx$|/trash.mbx$)'   ); do
    cp -pv $f $MAILDIR/$f || exit_msg "cannot copy $f"
done

# Copy Eudora folders (*.fol) in the top level Eudora directory recursively.
# Only copy the mailbox files (*.mbx) in those folders.
# In the next step, the subfolders and mailboxes (*.mbx) will be processed
# (DOS to Unix EOL conversion).
cd $EUDORADIR || exit_msg "cannot cd to $EUDORADIR"
for d in $( find . -maxdepth 1 -type d -iname '*.fol' -print | \
                sort                                         | \
                sed -e 's@^\./@@'                              ); do
    tar cf - `find $d -type f -iname '*.mbx' -print | sort` | \
        ( cd $MAILDIR; tar xvf - )
done

# Process all Eudora mailbox files (*.mbx) with the per-mailbox converter
# 'eudora2unix.pl', the companion Perl script, that does DOS to Unix
# end-of-line conversion, too.
# Mailboxes include inbox.mbx, sent-mail.mbx and trash.mbx, and the mailboxes
# in the top folder.
# Rename the original .mbx mailbox file to .mbx.ORIG (backup copy).
# Move the converted mailbox, e.g. 'myproj.mbx.OUT', to 'myproj' which will
# be the KMail mailbox.
# A myproj.mbx.ORIG backup will be kept for later analysis (e.g. diff-ing).
# Remove the .mbx.WARN and .mbx.ERR file if they are empty, i.e. there were
# no warnings errors reported by 'eudora2unix.pl'.
# The non-empty ones are kept for later analysis (e.g. diff-ing against the
# .mbx.ORIG version).
echo
echo
for f in $( find $MAILDIR -type f -iname '*.mbx' -print | sort ); do
    $eudora2unix_pl $f
    fnew=`echo $f | sed -e 's/\.mbx$//i'`
    mv -v $f.OUT $fnew || exit_msg "cannot move $f.OUT to $fnew"
    fbak="$f.ORIG"
    mv -v $f $fbak || exit_msg "cannot move $f to $fbak"
    [ ! -s $f.WARN ] && { rm -v $f.WARN || exit_msg "cannot delete $f.WARN"; }
    [ ! -s $f.ERR  ] && { rm -v $f.ERR  || exit_msg "cannot delete $f.ERR";  }
    echo
    echo
done
echo
echo

# We now have all Eudora mailboxes (*.mbx with .mbx extension removed)
# properly processed in Eudora folders (*.fol).
# KMail handles folders and subfolders specially.
# For a mail folder 'foobar', KMail uses a file 'foobar' and directory
# '.foobar.directory' (note the dot).
# Therefore, process all *.fol folders now, by creating an empty 'foobar'
# and renaming the folder to '.foobar.directory'.
# Recurse from 'leaf' to 'trunk' (-depth) and do _not_ sort!
for d in $( find $MAILDIR -depth -type d -iname '*.fol' -print | \
                sed -e 's@^\./@@'                              ); do
    ddirname=`dirname  $d`
    dbasname=`basename $d`
    # Create (actually rename/mv) target directory and touch file.
    # Remove '.fol' suffix.
    dbasname_nofol=`echo $dbasname | sed -e 's/\.fol$//i'`
    tdir=$ddirname/.${dbasname_nofol}.directory
    tfil=$ddirname/${dbasname_nofol}
    mv -v $d $tdir || exit_msg "cannot move $d to $tdir"
    touch $tfil  &&  echo "touch: $tfil"  ||  exit_msg "cannot touch $tfil"
done

# Finally fix permissions (your eyes only).

echo
find $MAILDIR -type d -exec chmod -v 0700 {} \; || \
    exit_msg "cannot chmod directories in $MAILDIR"
find $MAILDIR -type f -exec chmod -v 0600 {} \; || \
    exit_msg "cannot chmod files in $MAILDIR"


# Hasta la vista, baby.
echo
echo "DONE."
echo
echo "Use the following command to analyze the mailboxes that had problems:"
echo
echo "  ("
echo "    cd \$HOME/Mail && for f in \`find . -type f -print | sort\`; do"
echo "        [ -f \$f.mbx.WARN ] && diff -u \$f.mbx.ORIG \$f"
echo "        [ -f \$f.mbx.ERR  ] && diff -u \$f.mbx.ORIG \$f"
echo "    echo"
echo "    done"
echo "  ) | less -eiMs +/\"^\\@\\@\""
echo
echo "You should first convert all *.mbx.ORIG files from DOS to Unix EOL"
echo "(End-Of-Line) convention, or else you will get a huge number of diff's."
echo "Example:  dos2unix \`find \$HOME/Mail -name '*.mbx.ORIG' -print\`"
echo
echo "After analysis, remove these files with:"
echo
echo "  find \$HOME/Mail -type f \\"
echo "      \\( -name '*.mbx.ORIG' -o -name '*.mbx.LOG' -o \\"
echo "         -name '*.mbx.WARN' -o -name '*.mbx.ERR' \\) -exec rm -v {} \\;"
echo
echo 'and fire up KMail (kmail &).  Good luck!'
echo
echo "PROGRAM END."
echo

exit 0

