#!/bin/bash # Converter script to convert almost everything openoffice can read to txt using the jodconverter # and the pdf2txt tool # Because the jodconverter can not convert files formats like ppt, pptx, xls, ods, xlsx to txt directly, # a conversion to PDF is performed first using the jodconvert. The second step is a conversion from # PDF to txt using the pdftotxt commandline tool # usage: all2text.sh # is a arbitrary file open office can read (with correct file extension!) # is the filename the result should go to. (txt as file extension) # # adapt the settings below to your own needs echo "Input: $1" #jodconverter binary cmd JODCONVERTER_CMD=/opt/jodconverter/lib/jodconverter-cli-2.2.2.jar #pdf2txt binary cmd (find out your path using the 'which pdftotxt' cmd) PDF2TXT_CMD=/usr/bin/pdftotext #your java cmd JAVA_CMD=/usr/bin/java #temporary folder for storing the PDF (path without trailing /)(you need to have write access here!) TMP_FOLDER=/tmp/pdftmp #extract input name input_fullfile=$1 input_filename_w_ext=$(basename "$input_fullfile") input_extension=${input_filename_w_ext##*.} input_filename_wo_ext=${input_filename_w_ext%.*} #first conversion to PDF: tmpfile=$TMP_FOLDER/$input_filename_wo_ext".pdf" $JAVA_CMD -jar $JODCONVERTER_CMD "$input_fullfile" "$tmpfile" #second conversion to txt: $PDF2TXT_CMD "$TMP_FOLDER/$input_filename_wo_ext.pdf" "$2" #remove tmp file rm -f $tmpfile