Table of Contents

TWiki to DokuWiki Conversion

This is a description of how to migrate an existing TWiki datastore to DokuWiki ready text files. I am in the process of migrating our 1000+ page TWiki site into DokuWiki and created this shell script to assist in the transition.

Requirements

Capabilities

This script is able to translate the following items from TWiki Markup Language (TML) to DokuWiki:

How To

Source Code

migratenamespace.sh
#!/bin/bash
#Twiki to DokuWiki File Converter
#Developed by Thomas Vachon <vachon-dokuwiki ATgamelogicDOTcom>
#Licensed under the GPL: (http://www.gnu.org/licenses/gpl.txt)
 
if [ -z "$1" ]; then
echo "Usage: $0 TwikiNamespace"
exit
fi
 
#Copy the files to your present directory (run this in the data directory of the doku desination)
rsync -arv /var/lib/twiki/data/$1 .
cd $1
rm *,v *.lock
 
#Makes all the filenames lowercase (as it is a dokuwiki requirement)
for f in *; do
g=`expr "xxx$f" : 'xxx\(.*\)' | tr '[A-Z]' '[a-z]'`
mv "$f" "$g"
done
 
cd ../
 
#make the namespace directory lowercase
namespace=`echo $1 | tr '[:upper:]' '[:lower:]'`
mv $1 $namespace
./twikiconverter $namespace/*.txt
twikiconverter
#!/bin/bash
#Twiki to DokuWiki Syntax Converter
#Developed by Ben Chapman <bkchapman atgmaildotcom>
#Supplemented by Thomas Vachon <vachon-dokuwiki ATgamelogicDOTcom>
#Licensed under the GPL: (http://www.gnu.org/licenses/gpl.txt)
 
 
if [ -z "$1" ]; then
echo "Usage: $0 twikifilename.txt or *.txt "
exit
fi
 
INPUT=$*
 
#Remove the ---+++++ !! (headed links)
perl -pi -e 's/\-\-\-\+\+\+\+\+ \!\!//g' $*
 
# Substituting newlines for carriage returns
perl -pi -e 's/\r/\n\n/g' $*
 
# Stripping Twiki %META information
perl -ni -e "print unless /^\s*%META:TOPIC*/;" $*
 
# Converting attachments
# Replace replaceme with the name of your namespace
perl -pi -e "s/\%META\:FILEATTACHMENT\{name\=\"/\{{replaceme\:/g" $*
perl -pi -e "s/\" attr\=\"\" comment\=\"/\|/g" $*
perl -pi -e 's/\" date\=\".+/}}/g' $*
 
# Converting monospaced text
perl -pi -e "s/\=/''/g" $*
 
# Converting bold, monospaced text
perl -pi -e "s/^\=\=/**''/g" $*
perl -pi -e "s/$\=\=/''**/g" $*
 
# Adding DokuWiki syntax to the headings
perl -pi -e 's/(---\+\s.*|---\+\w.*)/====== $1 ======/g' $*
perl -pi -e 's/(---\+\+\s.*|---\+\+\w.*)/===== $1 =====/g' $*
perl -pi -e 's/(---\+\+\+\s.*|---\+\+\+\w.*)/==== $1 ====/g' $*
perl -pi -e 's/(---\+\+\+\+\s.*|---\+\+\+\+\w.*)/=== $1 ===/g' $*
perl -pi -e 's/(---\+\+\+\+\+\s.*|---\+\+\+\+\+\w.*)/== $1 ==/g' $*
 
# Removing TML from the headings
perl -pi -e 's/(---\+\+\+\+\+\s|---\+\+\+\+\+)//g' $*
perl -pi -e 's/(---\+\+\+\+\s|---\+\+\+\+)//g' $*
perl -pi -e 's/(---\+\+\+\s|---\+\+\+)//g' $*
perl -pi -e 's/(---\+\+\s|---\+\+)//g' $*
perl -pi -e 's/(---\+\s|---\+)//g' $*
 
# Converting unordered lists
# I'm using %^ as a placeholder so the
# bold conversion below does not mess with
# our lists. Don't worry it gets fixed below.
perl -pi -e 's/\t\t\t\*/      \%\^/g' $*
perl -pi -e 's/\t\t\*/    \%\^/g' $*
perl -pi -e 's/\t\*/   \%\^/g' $*
perl -pi -e 's/            \*/        \%\^/g' $*
perl -pi -e 's/         \*/      \%\^/g' $*
perl -pi -e 's/      \*/    \%\^/g' $*
perl -pi -e 's/   \*/  \%\^/g' $*
 
# Commented out. See below
# Convert Ordered Lists v1
#perl -pi -e 's/\t\t\t\1/      \-/g' $*
#perl -pi -e 's/\t\t\1/    \-/g' $*
#perl -pi -e 's/\t\1/   \-/g' $*
#perl -pi -e 's/            \1/        \-/g' $*
#perl -pi -e 's/         \1/      \-/g' $*
#perl -pi -e 's/      \1/    \-/g' $*
#perl -pi -e 's/   \1/  \-/g' $*
 
# In Perl 5.1, the \1 in the capture group is supposed to reference a capturing group.  I assume the original author intended that to be a single "1" instead.  When the original set of commands are run against 5.1 an error is thrown and the text is not converted properly.  The modified version below throws no errors.
# Convert Ordered Lists v2
perl -pi -e 's/\t\t\t1/      \-/g' $*
perl -pi -e 's/\t\t1/    \-/g' $*
perl -pi -e 's/\t1/   \-/g' $*
perl -pi -e 's/            1/        \-/g' $*
perl -pi -e 's/         1/      \-/g' $*
perl -pi -e 's/      1/    \-/g' $*
perl -pi -e 's/   1/  \-/g' $*
 
# Converting bold-italic text
perl -pi -e 's/^\_{2}/\/\/\*\*/g' $*
perl -pi -e 's/$\_{2}/\*\*\/\//g' $*
 
# Converting italic text
perl -pi -e 's/(^\_{1})/\/\//g' $*
perl -pi -e 's/$\_{1}\s/\/\//g' $*
 
# Converting bold text
perl -pi -e 's/\*{1}| \*{1}/\*\*/g' $*
 
# Unordered lists are broken...fixing
perl -pi -e 's/\%\^/\*/g' $*
 
# Converting TML <verbatim> to Doku <file>
# You could use <code> but I like the highlighting
# of <file> better...your choice
perl -pi -e 's/<verbatim>/<file>/g' $*
perl -pi -e 's/<\/verbatim>/<\/file>/g' $*
# Removing <nop>...no more CamelCase
perl -pi -e 's/<nop>//g' $*
 
# Converting <b></b> to Doku bold
perl -pi -e 's/<b>/\*\*/g' $*
perl -pi -e 's/<\/b>/\*\*/g' $*
 
# Converting <i></i> to Doku italic
perl -pi -e 's/<i>/\/\//g' $*
perl -pi -e 's/<\/i>/\/\//g' $*
 
# Converting <u></u> to Doku underline
perl -pi -e 's/<u>/__/g' $*
perl -pi -e 's/<\/u>/__/g' $*
 
# Converting table headings
perl -pi -e 's/\| \*\*/\^  /g' $*
perl -pi -e 's/\*\*  \^/  \^/g' $*
perl -pi -e 's/\*\*  \|/  \^/g' $*
 
#Remove !!
perl -pi -e 's/\!\!//g' $*
 
#Remove %TOC%
perl -pi -e 's/\%TOC\%//g' $*
 
#Fix named links
perl -pi -e 's/\]\[/\|/g' $*
 
#Remove unneeded META info and HTML 4.0 Traditional br's
perl -pi -e 's/\%META.*//g' $*
perl -pi -e 's/\<br\>/\n/g' $*
 
#Name pre-formatted text as code
perl -pi -e 's/\<pre\>/\<code\>/g' $*
perl -pi -e 's/\<\/pre\>/\<\/code\>/g' $*
perl -pi -e 's/\<PRE\>/\<code\>/g' $*
perl -pi -e 's/\<\/PRE\>/\<\/code\>/g' $*
 
#Fix Twiki's abuse of spaces in links
#Yes, I know this is ugly, but it works, feel free to clean it up
perl -pi -e 's/\[\[(\w+)\s(\w+)/\[\[\1\2/g' $*
perl -pi -e 's/\[\[(\w+)\s(\w+)\s(\w+)/\[\[\1\2\3/g' $*
perl -pi -e 's/\[\[(\w+)\s(\w+)\s(\w+)\s(\w+)/\[\[\1\2\3\4/g' $*
perl -pi -e 's/\[\[(\w+)\s(\w+)\s(\w+)\s(\w+)\s(\w+)/\[\[\1\2\3\4\5/g' $*
perl -pi -e 's/\[\[(\w+)\s(\w+)\s(\w+)\s(\w+)\s(\w+)\s(\w+)/\[\[\1\2\3\4\5\6/g' $*
perl -pi -e 's/\[\[(\w+)\s(\w+)\s(\w+)\s(\w+)\s(\w+)\s(\w+)\s(\w+)/\[\[\1\2\3\4\5\6\7/g' $*
 
exit 0

Notes

Ben Chapman

Thomas Vachon