/lenasys/trunk

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/lenasys/trunk

« back to all changes in this revision

Viewing changes to codeigniter/system/libraries/Typography.php

  • Committer: Gustav Hatvigsson
  • Date: 2013-04-11 09:20:09 UTC
  • mfrom: (19.1.5 lenasys)
  • Revision ID: gustav.hartvigsson@gmail.com-20130411092009-ylcqzqwcmjdglb17
merged in implemetaion group one's team bransh, it contains code-ignighter
and the new admin-panel.
20130411

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
<?php  if ( ! defined('BASEPATH')) exit('No direct script access allowed');
 
2
/**
 
3
 * CodeIgniter
 
4
 *
 
5
 * An open source application development framework for PHP 5.1.6 or newer
 
6
 *
 
7
 * @package             CodeIgniter
 
8
 * @author              ExpressionEngine Dev Team
 
9
 * @copyright   Copyright (c) 2008 - 2011, EllisLab, Inc.
 
10
 * @license             http://codeigniter.com/user_guide/license.html
 
11
 * @link                http://codeigniter.com
 
12
 * @since               Version 1.0
 
13
 * @filesource
 
14
 */
 
15
 
 
16
// ------------------------------------------------------------------------
 
17
 
 
18
/**
 
19
 * Typography Class
 
20
 *
 
21
 *
 
22
 * @access              private
 
23
 * @category    Helpers
 
24
 * @author              ExpressionEngine Dev Team
 
25
 * @link                http://codeigniter.com/user_guide/helpers/
 
26
 */
 
27
class CI_Typography {
 
28
 
 
29
        // Block level elements that should not be wrapped inside <p> tags
 
30
        var $block_elements = 'address|blockquote|div|dl|fieldset|form|h\d|hr|noscript|object|ol|p|pre|script|table|ul';
 
31
 
 
32
        // Elements that should not have <p> and <br /> tags within them.
 
33
        var $skip_elements      = 'p|pre|ol|ul|dl|object|table|h\d';
 
34
 
 
35
        // Tags we want the parser to completely ignore when splitting the string.
 
36
        var $inline_elements = 'a|abbr|acronym|b|bdo|big|br|button|cite|code|del|dfn|em|i|img|ins|input|label|map|kbd|q|samp|select|small|span|strong|sub|sup|textarea|tt|var';
 
37
 
 
38
        // array of block level elements that require inner content to be within another block level element
 
39
        var $inner_block_required = array('blockquote');
 
40
 
 
41
        // the last block element parsed
 
42
        var $last_block_element = '';
 
43
 
 
44
        // whether or not to protect quotes within { curly braces }
 
45
        var $protect_braced_quotes = FALSE;
 
46
 
 
47
        /**
 
48
         * Auto Typography
 
49
         *
 
50
         * This function converts text, making it typographically correct:
 
51
         *      - Converts double spaces into paragraphs.
 
52
         *      - Converts single line breaks into <br /> tags
 
53
         *      - Converts single and double quotes into correctly facing curly quote entities.
 
54
         *      - Converts three dots into ellipsis.
 
55
         *      - Converts double dashes into em-dashes.
 
56
         *  - Converts two spaces into entities
 
57
         *
 
58
         * @access      public
 
59
         * @param       string
 
60
         * @param       bool    whether to reduce more then two consecutive newlines to two
 
61
         * @return      string
 
62
         */
 
63
        function auto_typography($str, $reduce_linebreaks = FALSE)
 
64
        {
 
65
                if ($str == '')
 
66
                {
 
67
                        return '';
 
68
                }
 
69
 
 
70
                // Standardize Newlines to make matching easier
 
71
                if (strpos($str, "\r") !== FALSE)
 
72
                {
 
73
                        $str = str_replace(array("\r\n", "\r"), "\n", $str);
 
74
                }
 
75
 
 
76
                // Reduce line breaks.  If there are more than two consecutive linebreaks
 
77
                // we'll compress them down to a maximum of two since there's no benefit to more.
 
78
                if ($reduce_linebreaks === TRUE)
 
79
                {
 
80
                        $str = preg_replace("/\n\n+/", "\n\n", $str);
 
81
                }
 
82
 
 
83
                // HTML comment tags don't conform to patterns of normal tags, so pull them out separately, only if needed
 
84
                $html_comments = array();
 
85
                if (strpos($str, '<!--') !== FALSE)
 
86
                {
 
87
                        if (preg_match_all("#(<!\-\-.*?\-\->)#s", $str, $matches))
 
88
                        {
 
89
                                for ($i = 0, $total = count($matches[0]); $i < $total; $i++)
 
90
                                {
 
91
                                        $html_comments[] = $matches[0][$i];
 
92
                                        $str = str_replace($matches[0][$i], '{@HC'.$i.'}', $str);
 
93
                                }
 
94
                        }
 
95
                }
 
96
 
 
97
                // match and yank <pre> tags if they exist.  It's cheaper to do this separately since most content will
 
98
                // not contain <pre> tags, and it keeps the PCRE patterns below simpler and faster
 
99
                if (strpos($str, '<pre') !== FALSE)
 
100
                {
 
101
                        $str = preg_replace_callback("#<pre.*?>.*?</pre>#si", array($this, '_protect_characters'), $str);
 
102
                }
 
103
 
 
104
                // Convert quotes within tags to temporary markers.
 
105
                $str = preg_replace_callback("#<.+?>#si", array($this, '_protect_characters'), $str);
 
106
 
 
107
                // Do the same with braces if necessary
 
108
                if ($this->protect_braced_quotes === TRUE)
 
109
                {
 
110
                        $str = preg_replace_callback("#\{.+?\}#si", array($this, '_protect_characters'), $str);
 
111
                }
 
112
 
 
113
                // Convert "ignore" tags to temporary marker.  The parser splits out the string at every tag
 
114
                // it encounters.  Certain inline tags, like image tags, links, span tags, etc. will be
 
115
                // adversely affected if they are split out so we'll convert the opening bracket < temporarily to: {@TAG}
 
116
                $str = preg_replace("#<(/*)(".$this->inline_elements.")([ >])#i", "{@TAG}\\1\\2\\3", $str);
 
117
 
 
118
                // Split the string at every tag.  This expression creates an array with this prototype:
 
119
                //
 
120
                //      [array]
 
121
                //      {
 
122
                //              [0] = <opening tag>
 
123
                //              [1] = Content...
 
124
                //              [2] = <closing tag>
 
125
                //              Etc...
 
126
                //      }
 
127
                $chunks = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
 
128
 
 
129
                // Build our finalized string.  We cycle through the array, skipping tags, and processing the contained text
 
130
                $str = '';
 
131
                $process = TRUE;
 
132
                $paragraph = FALSE;
 
133
                $current_chunk = 0;
 
134
                $total_chunks = count($chunks);
 
135
 
 
136
                foreach ($chunks as $chunk)
 
137
                {
 
138
                        $current_chunk++;
 
139
 
 
140
                        // Are we dealing with a tag? If so, we'll skip the processing for this cycle.
 
141
                        // Well also set the "process" flag which allows us to skip <pre> tags and a few other things.
 
142
                        if (preg_match("#<(/*)(".$this->block_elements.").*?>#", $chunk, $match))
 
143
                        {
 
144
                                if (preg_match("#".$this->skip_elements."#", $match[2]))
 
145
                                {
 
146
                                        $process =  ($match[1] == '/') ? TRUE : FALSE;
 
147
                                }
 
148
 
 
149
                                if ($match[1] == '')
 
150
                                {
 
151
                                        $this->last_block_element = $match[2];
 
152
                                }
 
153
 
 
154
                                $str .= $chunk;
 
155
                                continue;
 
156
                        }
 
157
 
 
158
                        if ($process == FALSE)
 
159
                        {
 
160
                                $str .= $chunk;
 
161
                                continue;
 
162
                        }
 
163
 
 
164
                        //  Force a newline to make sure end tags get processed by _format_newlines()
 
165
                        if ($current_chunk == $total_chunks)
 
166
                        {
 
167
                                $chunk .= "\n";
 
168
                        }
 
169
 
 
170
                        //  Convert Newlines into <p> and <br /> tags
 
171
                        $str .= $this->_format_newlines($chunk);
 
172
                }
 
173
 
 
174
                // No opening block level tag?  Add it if needed.
 
175
                if ( ! preg_match("/^\s*<(?:".$this->block_elements.")/i", $str))
 
176
                {
 
177
                        $str = preg_replace("/^(.*?)<(".$this->block_elements.")/i", '<p>$1</p><$2', $str);
 
178
                }
 
179
 
 
180
                // Convert quotes, elipsis, em-dashes, non-breaking spaces, and ampersands
 
181
                $str = $this->format_characters($str);
 
182
 
 
183
                // restore HTML comments
 
184
                for ($i = 0, $total = count($html_comments); $i < $total; $i++)
 
185
                {
 
186
                        // remove surrounding paragraph tags, but only if there's an opening paragraph tag
 
187
                        // otherwise HTML comments at the ends of paragraphs will have the closing tag removed
 
188
                        // if '<p>{@HC1}' then replace <p>{@HC1}</p> with the comment, else replace only {@HC1} with the comment
 
189
                        $str = preg_replace('#(?(?=<p>\{@HC'.$i.'\})<p>\{@HC'.$i.'\}(\s*</p>)|\{@HC'.$i.'\})#s', $html_comments[$i], $str);
 
190
                }
 
191
 
 
192
                // Final clean up
 
193
                $table = array(
 
194
 
 
195
                                                // If the user submitted their own paragraph tags within the text
 
196
                                                // we will retain them instead of using our tags.
 
197
                                                '/(<p[^>*?]>)<p>/'      => '$1', // <?php BBEdit syntax coloring bug fix
 
198
 
 
199
                                                // Reduce multiple instances of opening/closing paragraph tags to a single one
 
200
                                                '#(</p>)+#'                     => '</p>',
 
201
                                                '/(<p>\W*<p>)+/'        => '<p>',
 
202
 
 
203
                                                // Clean up stray paragraph tags that appear before block level elements
 
204
                                                '#<p></p><('.$this->block_elements.')#' => '<$1',
 
205
 
 
206
                                                // Clean up stray non-breaking spaces preceeding block elements
 
207
                                                '#(&nbsp;\s*)+<('.$this->block_elements.')#'    => '  <$2',
 
208
 
 
209
                                                // Replace the temporary markers we added earlier
 
210
                                                '/\{@TAG\}/'            => '<',
 
211
                                                '/\{@DQ\}/'                     => '"',
 
212
                                                '/\{@SQ\}/'                     => "'",
 
213
                                                '/\{@DD\}/'                     => '--',
 
214
                                                '/\{@NBS\}/'            => '  ',
 
215
 
 
216
                                                // An unintended consequence of the _format_newlines function is that
 
217
                                                // some of the newlines get truncated, resulting in <p> tags
 
218
                                                // starting immediately after <block> tags on the same line.
 
219
                                                // This forces a newline after such occurrences, which looks much nicer.
 
220
                                                "/><p>\n/"                      => ">\n<p>",
 
221
 
 
222
                                                // Similarly, there might be cases where a closing </block> will follow
 
223
                                                // a closing </p> tag, so we'll correct it by adding a newline in between
 
224
                                                "#</p></#"                      => "</p>\n</"
 
225
                                                );
 
226
 
 
227
                // Do we need to reduce empty lines?
 
228
                if ($reduce_linebreaks === TRUE)
 
229
                {
 
230
                        $table['#<p>\n*</p>#'] = '';
 
231
                }
 
232
                else
 
233
                {
 
234
                        // If we have empty paragraph tags we add a non-breaking space
 
235
                        // otherwise most browsers won't treat them as true paragraphs
 
236
                        $table['#<p></p>#'] = '<p>&nbsp;</p>';
 
237
                }
 
238
 
 
239
                return preg_replace(array_keys($table), $table, $str);
 
240
 
 
241
        }
 
242
 
 
243
        // --------------------------------------------------------------------
 
244
 
 
245
        /**
 
246
         * Format Characters
 
247
         *
 
248
         * This function mainly converts double and single quotes
 
249
         * to curly entities, but it also converts em-dashes,
 
250
         * double spaces, and ampersands
 
251
         *
 
252
         * @access      public
 
253
         * @param       string
 
254
         * @return      string
 
255
         */
 
256
        function format_characters($str)
 
257
        {
 
258
                static $table;
 
259
 
 
260
                if ( ! isset($table))
 
261
                {
 
262
                        $table = array(
 
263
                                                        // nested smart quotes, opening and closing
 
264
                                                        // note that rules for grammar (English) allow only for two levels deep
 
265
                                                        // and that single quotes are _supposed_ to always be on the outside
 
266
                                                        // but we'll accommodate both
 
267
                                                        // Note that in all cases, whitespace is the primary determining factor
 
268
                                                        // on which direction to curl, with non-word characters like punctuation
 
269
                                                        // being a secondary factor only after whitespace is addressed.
 
270
                                                        '/\'"(\s|$)/'                                   => '&#8217;&#8221;$1',
 
271
                                                        '/(^|\s|<p>)\'"/'                               => '$1&#8216;&#8220;',
 
272
                                                        '/\'"(\W)/'                                             => '&#8217;&#8221;$1',
 
273
                                                        '/(\W)\'"/'                                             => '$1&#8216;&#8220;',
 
274
                                                        '/"\'(\s|$)/'                                   => '&#8221;&#8217;$1',
 
275
                                                        '/(^|\s|<p>)"\'/'                               => '$1&#8220;&#8216;',
 
276
                                                        '/"\'(\W)/'                                             => '&#8221;&#8217;$1',
 
277
                                                        '/(\W)"\'/'                                             => '$1&#8220;&#8216;',
 
278
 
 
279
                                                        // single quote smart quotes
 
280
                                                        '/\'(\s|$)/'                                    => '&#8217;$1',
 
281
                                                        '/(^|\s|<p>)\'/'                                => '$1&#8216;',
 
282
                                                        '/\'(\W)/'                                              => '&#8217;$1',
 
283
                                                        '/(\W)\'/'                                              => '$1&#8216;',
 
284
 
 
285
                                                        // double quote smart quotes
 
286
                                                        '/"(\s|$)/'                                             => '&#8221;$1',
 
287
                                                        '/(^|\s|<p>)"/'                                 => '$1&#8220;',
 
288
                                                        '/"(\W)/'                                               => '&#8221;$1',
 
289
                                                        '/(\W)"/'                                               => '$1&#8220;',
 
290
 
 
291
                                                        // apostrophes
 
292
                                                        "/(\w)'(\w)/"                                   => '$1&#8217;$2',
 
293
 
 
294
                                                        // Em dash and ellipses dots
 
295
                                                        '/\s?\-\-\s?/'                                  => '&#8212;',
 
296
                                                        '/(\w)\.{3}/'                                   => '$1&#8230;',
 
297
 
 
298
                                                        // double space after sentences
 
299
                                                        '/(\W)  /'                                              => '$1&nbsp; ',
 
300
 
 
301
                                                        // ampersands, if not a character entity
 
302
                                                        '/&(?!#?[a-zA-Z0-9]{2,};)/'             => '&amp;'
 
303
                                                );
 
304
                }
 
305
 
 
306
                return preg_replace(array_keys($table), $table, $str);
 
307
        }
 
308
 
 
309
        // --------------------------------------------------------------------
 
310
 
 
311
        /**
 
312
         * Format Newlines
 
313
         *
 
314
         * Converts newline characters into either <p> tags or <br />
 
315
         *
 
316
         * @access      public
 
317
         * @param       string
 
318
         * @return      string
 
319
         */
 
320
        function _format_newlines($str)
 
321
        {
 
322
                if ($str == '')
 
323
                {
 
324
                        return $str;
 
325
                }
 
326
 
 
327
                if (strpos($str, "\n") === FALSE  && ! in_array($this->last_block_element, $this->inner_block_required))
 
328
                {
 
329
                        return $str;
 
330
                }
 
331
 
 
332
                // Convert two consecutive newlines to paragraphs
 
333
                $str = str_replace("\n\n", "</p>\n\n<p>", $str);
 
334
 
 
335
                // Convert single spaces to <br /> tags
 
336
                $str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
 
337
 
 
338
                // Wrap the whole enchilada in enclosing paragraphs
 
339
                if ($str != "\n")
 
340
                {
 
341
                        // We trim off the right-side new line so that the closing </p> tag
 
342
                        // will be positioned immediately following the string, matching
 
343
                        // the behavior of the opening <p> tag
 
344
                        $str =  '<p>'.rtrim($str).'</p>';
 
345
                }
 
346
 
 
347
                // Remove empty paragraphs if they are on the first line, as this
 
348
                // is a potential unintended consequence of the previous code
 
349
                $str = preg_replace("/<p><\/p>(.*)/", "\\1", $str, 1);
 
350
 
 
351
                return $str;
 
352
        }
 
353
 
 
354
        // ------------------------------------------------------------------------
 
355
 
 
356
        /**
 
357
         * Protect Characters
 
358
         *
 
359
         * Protects special characters from being formatted later
 
360
         * We don't want quotes converted within tags so we'll temporarily convert them to {@DQ} and {@SQ}
 
361
         * and we don't want double dashes converted to emdash entities, so they are marked with {@DD}
 
362
         * likewise double spaces are converted to {@NBS} to prevent entity conversion
 
363
         *
 
364
         * @access      public
 
365
         * @param       array
 
366
         * @return      string
 
367
         */
 
368
        function _protect_characters($match)
 
369
        {
 
370
                return str_replace(array("'",'"','--','  '), array('{@SQ}', '{@DQ}', '{@DD}', '{@NBS}'), $match[0]);
 
371
        }
 
372
 
 
373
        // --------------------------------------------------------------------
 
374
 
 
375
        /**
 
376
         * Convert newlines to HTML line breaks except within PRE tags
 
377
         *
 
378
         * @access      public
 
379
         * @param       string
 
380
         * @return      string
 
381
         */
 
382
        function nl2br_except_pre($str)
 
383
        {
 
384
                $ex = explode("pre>",$str);
 
385
                $ct = count($ex);
 
386
 
 
387
                $newstr = "";
 
388
                for ($i = 0; $i < $ct; $i++)
 
389
                {
 
390
                        if (($i % 2) == 0)
 
391
                        {
 
392
                                $newstr .= nl2br($ex[$i]);
 
393
                        }
 
394
                        else
 
395
                        {
 
396
                                $newstr .= $ex[$i];
 
397
                        }
 
398
 
 
399
                        if ($ct - 1 != $i)
 
400
                                $newstr .= "pre>";
 
401
                }
 
402
 
 
403
                return $newstr;
 
404
        }
 
405
 
 
406
}
 
407
// END Typography Class
 
408
 
 
409
/* End of file Typography.php */
 
410
/* Location: ./system/libraries/Typography.php */
 
 
b'\\ No newline at end of file'