/useful/trunk-1

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/useful/trunk-1
18 by Gustav Hartvigsson
* pdf2images.sh:
1
#!/usr/bin/env bash
2
####
3
# FILE NAME process_text_to_image.sh
4
#
5
# Changes
6
#
7
# 2018-09-03:
8
#   * added --no-header for when you want to use your own pandoc header
9
#
10
# 2018-09-22
11
#   * Fixed --no-header...
12
#     Seemed to have forgotten the "$" infront of the variable.
13
#
14
# 2021-01-13
15
#   * fixed up the if statments.
16
#
17
# 2024-07-10
18
#   * Added sanity check
19
#   * General logic fixes.
20
#   * Added --author argument
21
#   * Fixed help message formating
22
####
23
24
__DPI=300
25
26
__IN_FILE=
27
__OUT_FILE=big_image
28
__PERSERVE_TMP=false
29
__INVERT_COLOURS=false
30
__NO_PANDOC_HEADER=false
31
32
__CWD=$PWD
33
34
__PANDOC_HEADER="
35
geometry: margin=0.5cm
36
papersize: a5
37
mainfont: DejaVu Serif
38
fontsize: 12pt
39
"
40
41
__TITLE=""
42
__AUTHOR=""
43
44
__SANITY=true
45
46
function __usage () {
47
  
48
  echo "process_text_to_image.sh - Takes one text file and convernts it to a single"
49
  echo "image using pandoc, xelatex, imagemagick, pdftoppm, pdfcrop"
50
  echo ""
51
  echo "!IMPORTANT! The folder \"./tmp/\" in the current working directory will be"
52
  echo "            used as a temporary storage, and may be deleted, along with it's"
53
  echo "            contents!"
54
  echo ""
55
  echo "---------------------"
56
  echo ""
57
  echo "-h             --help"
58
  echo "	Print this help message"
59
  echo ""
60
  echo "-i <file>      --input <file>"
61
  echo "	The file to use to convert to an image. "
62
  echo ""
63
  echo "-o <file>      --output <file>"
64
  echo "	The image to output to. (Default=big_image.png)"
65
  echo ""
66
  echo "-d <integer>   --dpi <integer>"
67
  echo "	Set the dpi of the intermediate image relative to an a5 paper."
68
  echo "	(Default=300)"
69
  echo ""
70
  echo "-p             --perserve"
71
  echo "	Do not delete the TMP folder."
72
  echo ""
73
  echo "--invert"
74
  echo "	Invert the colours of the final image."
75
  echo ""
76
  echo "-t \"name\"      --title \"name\""
77
  echo "	Set the title on the the title page."
78
  echo ""
79
  echo "-a \"name\"      --author \"name\""
80
  echo "        Set an author to the title page."
81
  echo ""
82
  echo "--no-header"
83
  echo "	Do not insert the pandoc header. (Default:"
84
  echo "$__PANDOC_HEADER"
85
  echo ")"
86
  echo ""
87
  echo "---------------------"
88
  echo ""
89
  echo "If you are getting an error from convert that the height or width exeeds"
90
  echo "some value, you may want to check the ImageMagick policy.xml file."
91
  echo ""
92
  echo "The path to ImageMagick policy file is:"
93
  convert -list policy | grep .xml 
94
  echo ""
95
  echo "---------------------"
96
}
97
98
function __silent () {
99
  $@ >> /dev/null 2>&1
100
  return $?
101
}
102
103
function __find_tool () {
104
  __silent which $1
105
106
  if [ $? -gt 0 ]; then
107
    echo "    Can't find tool \"${1}\"."
108
    ___SANITY=false
109
  fi
110
}
111
112
function __sanity_check () {
113
  # Check that we have the tools needed.
114
  __find_tool pandoc
115
  __find_tool xelatex
116
  __find_tool convert
117
  __find_tool pdftoppm
118
  __find_tool pdfcrop
119
120
  if [[ $___SANITY == true ]]; then
121
    echo "Please install the missing tools."
122
    echo ""
123
    exit 1
124
  fi
125
}
126
127
function __main () {
128
  # FIXME: Split the functionality out of the main function.
129
  # FIXME: Use mkdtemp instead of the folder we are in.
130
  __parse_args "${@}"
131
  __sanity_check
132
  
133
  echo "__IN_FILE\: $__IN_FILE"
134
  echo "__OUT_FILE\: $__OUT_FILE"
135
  echo "CWD\: $__CWD"
136
  echo "__DPI: $__DPI"
137
  
138
  if [[ ! -e "$__CWD/$__IN_FILE" ]] || [[ -z $__IN_FILE  ]]
139
  then
140
    echo "The provided <infile> does not exit."
141
    echo ""
142
    exit 1
143
  fi
144
  
145
  # first we create a temp folder.
146
  mkdir -p "$__CWD/tmp"
147
  
148
  #next we want to copy our file into it.
149
  cp "$__CWD/$__IN_FILE" "$__CWD/tmp/text.txt"
150
  cd "$__CWD/tmp"
151
  
152
  # Now we can start the work for this.
153
  if [[ $__NO_PANDOC_HEADER == false ]]
154
  then
155
    # FIXME: This is cursed.
156
    # We add a special header to the file to make it pandoc know what to do.
157
    #
158
    # The header is built from the bottom up. The input text at the bottom, and
159
    # the rest of the "elements" added above.
160
    
161
    printf '%s\n' "---" "$(cat "$__CWD/tmp/text.txt")" > "$__CWD/tmp/text.txt"
162
    if [[ ! -z $__TITLE ]]; then
163
      printf '%s\n' "title: ${__TITLE}" "$(cat "$__CWD/tmp/text.txt")" > "$__CWD/tmp/text.txt"
164
    fi
165
166
    if [[ ! -z $__AUTHOR ]]; then
167
      printf '%s\n' "author: ${__AUTHOR}" "$(cat "$__CWD/tmp/text.txt")" > "$__CWD/tmp/text.txt"
168
    fi
169
    
170
    printf '%s' "$__PANDOC_HEADER" "$(cat "$__CWD/tmp/text.txt")" > "$__CWD/tmp/text.txt"
171
    
172
    printf '%s' "---" "$(cat "$__CWD/tmp/text.txt")" > "$__CWD/tmp/text.txt"
173
  fi
174
  
175
  # Now we use pandoc to do to convert it to a PDF.
176
  echo "Generating PDF"
177
  pandoc --pdf-engine=xelatex "$__CWD/tmp/text.txt" -o "$__CWD/tmp/text.pdf"
178
  echo "Cropping PDF"
179
  pdfcrop --margins '10 5 10 5' "$__CWD/tmp/text.pdf" "$__CWD/tmp/text-croped.pdf"
180
  
181
  # Convert it to images
182
  echo "Converting to images"
183
  pdftoppm "$__CWD/tmp/text-croped.pdf" "$__CWD/tmp/page" -png -rx $__DPI -ry $__DPI -gray
184
  
185
  # convert make the colour space greyscale and the append to each other
186
  convert -append -colorspace gray +matte -depth 8 "$__CWD/tmp/page-*.png" "$__CWD/tmp/big-page.png"
187
  
188
  FINAL_IMAGE=""
189
  
190
  # If we invert the final image this is where we do it.
191
  if [[ $__INVERT_COLOURS == true ]]
192
  then
193
    echo "Inverting colours"
194
    convert "$__CWD/tmp/big-page.png" -channel RGB -negate "$__CWD/tmp/big-page-inverted.png"
195
    FINAL_IMAGE="$__CWD/tmp/big-page-inverted.png"
196
  else
197
    FINAL_IMAGE="$__CWD/tmp/big-page.png"
198
  fi
199
  
200
  echo "Copying final image to $__CWD/$__OUT_FILE.png"
201
  cp "$FINAL_IMAGE" "$__CWD/$__OUT_FILE.png"
202
  
203
  ####
204
  # Cleanup of eveything.
205
  ####
206
  if [[ $__PERSERVE_TMP == true ]]
207
  then
208
    echo "Note: Not cleaning up!"
209
  else
210
    rm -r "$__CWD/tmp"
211
  fi
212
  echo "Done."
213
  echo ""
214
}
215
216
217
function __parse_args () {
218
  if [[ -z "$1" ]]
219
  then
220
    echo "Try --help or -h."
221
    exit 1
222
  fi
223
  
224
  while [[ $# -gt 0 ]]
225
  do
226
    case $1 in
227
      -i|--input)
228
        __IN_FILE="$2"
229
        shift
230
        shift
231
      ;;
232
      -o|--output)
233
        __OUT_FILE="$2"
234
        shift
235
        shift
236
      ;;
237
      -t|--title)
238
        __TITLE="$2"
239
        shift
240
        shift
241
      ;;
242
      -a|--author)
243
        __AUTHOR="$2"
244
        shift
245
        shift
246
      ;;
247
      -d|--dpi)
248
        __DPI="$2"
249
        shift
250
        shift
251
      ;;
252
      -p|--perserve)
253
        __PERSERVE_TMP=true
254
        shift
255
      ;;
256
      --invert)
257
        __INVERT_COLOURS=true
258
        shift
259
      ;;
260
      --no-header)
261
        __NO_PANDOC_HEADER=true
262
        shift
263
      ;;
264
      -h|--help)
265
         __usage
266
         exit
267
         shift
268
      ;;
269
      *)
270
        echo "Unkown argument \"${1}\"."
271
        exit 1
272
        shift
273
      ;;
274
      --)
275
        shift
276
        break
277
      ;;
278
    esac
279
  done
280
}
281
282
__main "${@}"
283