HTML to Textile Converter


<?php
// -------------------------------------------------------------
// The following functions are used to detextile html, a process
// still in development.
// By Tim Koschützki
// Based on code from www.aquarionics.com
// -------------------------------------------------------------
class html2textile {
    function 
detextile($text) {
        
$text preg_replace("/(<\/?)(\w+)([^>]*>)/e",
"'\\1'.strtolower('\\2').'\\3'"$text);
        
$text preg_replace("/<br[ \/]*>\s*/","\n",$text);
        
$text preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/""\n",
$text);
        
$text stripslashes($text);
        
$oktags =
array(
'p','ol','ul','li','i','b','em','strong','span','a','h[1-6]',
            
'table','tr','td','u','del','sup','sub','blockquote',
'center''code');
        
$text preg_replace_callback("/\t*<(img)\s*([^>]*)>/Usi",
            array(
$this,'processTag'),$text);
        foreach(
$oktags as $tag){
            
$text =
preg_replace_callback("/\t*<(".$tag.")\s*([^>]*)>(.*)<\/\\1>/Usi",
            array(
$this,'processTag'),$text);
        }
        
$text $this->detextile_process_glyphs($text);
        
$text $this->detextile_process_lists($text);
        
$text preg_replace('/^\t* *p\. /m','',$text);
        return 
str_replace(array("#\\","/#"),
                    array(
">","<"),
                    
strip_tags($this->decode_high($text), '<pre>'));
//strip_tags($text);
    
}
    function 
detextile_process_glyphs($text) {
        
$glyphs = array(
            
'’'=>'\'',        # single closing
            
'‘'=>'\'',        # single opening
            
'”'=>'"',         # double closing
            
'“'=>'"',         # double opening
            
'—'=>'--',        # em dash
            
'–'=>' - ',       # en dash
            
'×' =>'x',         # dimension sign
            
'™'=>'(TM)',      # trademark
            
'®' =>'(R)',       # registered
            
'©' =>'(C)',       # copyright
            
'…'=>'...'        # ellipsis
        
);
        foreach(
$glyphs as $f=>$r){
            
$text str_replace($f,$r,$text);
        }
        return 
$text;
    }
    function 
detextile_process_lists($text) {
        
$list false;
        
$text =
preg_split("/(<.*>)/U",$text,-1,PREG_SPLIT_DELIM_CAPTURE);
        foreach(
$text as $line){
            if (
$list == false && preg_match('/<ol /',$line)){
                
$line "";
                
$list "o";
            } else if (
preg_match('/<\/ol/',$line)){
                
$line "";
                
$list false;
            } else if (
$list == false && preg_match('/<ul/',$line)){
                
$line "";
                
$list "u";
            } else if (
preg_match('/<\/ul/',$line)){
                
$line "";
                
$list false;
            } else if (
$list == 'o'){
                
$line preg_replace('/<li.*>/U','# '$line);
            } else if (
$list == 'u'){
                
$line preg_replace('/<li .*>/U','* '$line);
            }
            
$glyph_out[] = $line;
        }
        return 
$text implode('',$glyph_out);
    }
    function 
processTag($matches) {
        list(
$all,$tag,$atts,$content) = $matches;
        
$a $this->splat($atts);
        
$phr = array(
        
'em'=>'_',
        
'i'=>'__',
        
'b'=>'**',
        
'strong'=>'*',
        
'cite'=>'??',
        
'del'=>'-',
        
'ins'=>'+',
        
'u'=>'+',
        
'sup'=>'^',
        
'sub'=>'~',
        
'span'=>'%',
        
'code'=>'@'
        
);
        
$blk = array('p','h1','h2','h3','h4','h5','h6');
        if(isset(
$phr[$tag])) {
            return 
$phr[$tag].$this->sci($a).$content.$phr[$tag];
        } elseif(
$tag=='blockquote') {
            return 
'bq.'.$this->sci($a).' '.$content;
        } elseif(
$tag=='center') {
            return 
'p=.'.$this->sci($a).' '.$content;
        } elseif(
in_array($tag,$blk)) {
            return 
$tag.$this->sci($a).'. '.$content;
        } elseif (
$tag=='a') {
            
$t $this->filterAtts($a,array('href','title'));
            
$out '"'.$content;
            
$out.= (isset($t['title'])) ? '
('
.preg_replace(array("/\(/","/\)/"), array("[","]"), $t['title']).')' :
'';
            
$out.= '":'.$t['href'];
            return 
$out;
        } elseif (
$tag=='img') {
            
$t $this->filterAtts($a,array('src','alt'));
            
$out '!'.($t['src']);
            
$out.= (isset($t['alt'])) ?
'('.preg_replace(array("/\(/","/\)/"), array("[","]"), $t['alt']).')' :
'';
            
$out.= '!';
            return 
$out;
        } else {
            return 
$all;
        }
    }
// -------------------------------------------------------------
    
function filterAtts($atts,$ok)
    {
        foreach(
$atts as $a) {
            if(
in_array($a['name'],$ok)) {
                if(
$a['att']!='') {
                
$out[$a['name']] = $a['att'];
                }
            }
        }
#        dump($out);
        
return $out;
    }
// -------------------------------------------------------------
    
function sci($a)
    {
        
$out '';
        foreach(
$a as $t){
            
$out.= ($t['name']=='class') ? '(='.$t['att'].')' '';
            
$out.= ($t['name']=='id') ? '[='.$t['att'].']' '';
            
$out.= ($t['name']=='style') ? '{='.$t['att'].'}' '';
            
$out.= ($t['name']=='cite') ? ':'.$t['att'] : '';
            if (
$t['name']=='align')
                if (
$t['att'] == "left")
                    
$out.= '/#';
                elseif (
$t['att'] == "right")
                    
$out.= '#\\';
                elseif (
$t['att'] == "center")
                    
$out.= '=';
                elseif (
$t['att'] == "justify")
                    
$out.= '/##\\';
        }
        return 
$out;
    }
// -------------------------------------------------------------
    
function splat($attr)  // returns attributes as an array
    
{
        
$arr = array();
        
$atnm '';
        
$mode 0;
        while (
strlen($attr) != 0){
            
$ok 0;
            switch (
$mode) {
                case 
0// name
                    
if (preg_match('/^([a-z]+)/i'$attr$match)) {
                        
$atnm $match[1]; $ok $mode 1;
                        
$attr preg_replace('/^[a-z]+/i'''$attr);
                    }
                break;
                case 
1// =
                    
if (preg_match('/^\s*=\s*/'$attr)) {
                        
$ok 1$mode 2;
                        
$attr preg_replace('/^\s*=\s*/'''$attr);
                    break;
                    }
                    if (
preg_match('/^\s+/'$attr)) {
                        
$ok 1$mode 0;
                        
$arr[] =
array(
'name'=>$atnm,'whole'=>$atnm,'att'=>$atnm);
                        
$attr preg_replace('/^\s+/'''$attr);
                    }
                break;
                case 
2// value
                    
if (preg_match('/^("[^"]*")(\s+|$)/'$attr,
$match)) {
                        
$arr[]=array('name'
=>$atnm,'whole'=>$atnm.'='.$match[1],
                                
'att'=>str_replace('"','',$match[1]));
                        
$ok 1$mode 0;
                        
$attr preg_replace('/^"[^"]*"(\s+|$)/''',
$attr);
                    break;
                    }
                    if (
preg_match("/^('[^']*')(\s+|$)/"$attr,
$match)) {
                        
$arr[]=array('name'
=>$atnm,'whole'=>$atnm.'='.$match[1],
                                
'att'=>str_replace("'",'',$match[1]));
                        
$ok 1$mode 0;
                        
$attr preg_replace("/^'[^']*'(\s+|$)/"'',
$attr);
                    break;
                    }
                    if (
preg_match("/^(\w+)(\s+|$)/"$attr$match)) {
                        
$arr[]=
                            array(
'name'=>$atnm,'whole'=>$atnm.'="'.$match[1].'"',
                                
'att'=>$match[1]);
                        
$ok 1$mode 0;
                        
$attr preg_replace("/^\w+(\s+|$)/"'',
$attr);
                    }
                break;
            }
            if (
$ok == 0){
                
$attr preg_replace('/^\S*\s*/'''$attr);
                
$mode 0;
            }
        }
        if (
$mode == 1$arr[] =
                array
(
'name'=>$atnm,'whole'=>$atnm.'="'.$atnm.'"','att'=>$atnm);
        return 
$arr;
    }
// -------------------------------------------------------------
    
function cmap() {
        
$f 0xffff;
        
        
$cmap = array(
        
160,  255,  0$f,
        
402,  402,  0$f,
        
913,  929,  0$f,
        
931,  937,  0$f,
        
945,  969,  0$f,
        
977,  978,  0$f,
        
982,  982,  0$f,
        
822682260$f,
        
823082300$f,
        
824282430$f,
        
825482540$f,
        
826082600$f,
        
846584650$f,
        
847284720$f,
        
847684760$f,
        
848284820$f,
        
850185010$f,
        
859285960$f,
        
862986290$f,
        
865686600$f,
        
870487040$f,
        
870687070$f,
        
870987090$f,
        
871187130$f,
        
871587150$f,
        
871987190$f,
        
872187220$f,
        
872787270$f,
        
873087300$f,
        
873387340$f,
        
873687360$f,
        
874387470$f,
        
875687560$f,
        
876487640$f,
        
877387730$f,
        
877687760$f,
        
880088010$f,
        
880488050$f,
        
883488360$f,
        
883888390$f,
        
885388530$f,
        
885588550$f,
        
886988690$f,
        
890189010$f,
        
896889710$f,
        
900190020$f,
        
967496740$f,
        
982498240$f,
        
982798270$f,
        
982998300$f,
        
338,  339,  0$f,
        
352,  353,  0$f,
        
376,  376,  0$f,
        
710,  710,  0$f,
        
732,  732,  0$f,
        
819481950$f,
        
820182010$f,
        
820482070$f,
        
821182120$f,
        
821682180$f,
        
821882180$f,
        
822082220$f,
        
822482250$f,
        
824082400$f,
        
824982500$f,
        
836483640$f
        
);
        
        return 
$cmap;
    }
// -------------------------------------------------------------
    
function decode_high($text) {
        
$cmap $this->cmap();
        return 
mb_decode_numericentity($text$cmap"UTF-8");
    }
}
?>

<html>
    <head>
        <title>Html to Textile Converter - Html2Textile</title>
        <meta http-equiv="Cotent-Type" content="text/html;charset=utf-8" />
        <link rel="stylesheet" type="text/css" media="screen" href="https://txstyle.xyz/css/bootstrap3min.css" />
    </head>

    <body>

    <div class="container">
      <div class="row">
        <div class="col-md-10">

        <h1><b>HTML to Textile Converter</b></h1>
    <hr/>

        <?
        
if (!isSet($_REQUEST['source']))
        {
            echo 
'<p><b>This will convert simple HTML data into Textile.</b></p>';
            echo 
'<p><b>Enter HTML data here:</b><br/><br/>';
            if (isSet(
$_POST['html']))
            {
                
$html stripslashes($_POST['html']);
                
$html2textile = new html2textile;
                
$textile $html2textile->detextile($html);
            }
        
?>

        <form action="<? $_SERVER['SCRIPT_NAME'?>" method="post">
        <p style="text-align:left;">
        <textarea name="html" 
        style="width:100%; background-color:#FAFAE0; border:1px solid silver;" 
        cols="80" rows="23" 
        ><? echo $html ?></textarea>
        </p>
        <p style="text-align:left;">
            <input name="html2textile" 
               value="Convert to Textile!"<? if ($textile) echo ' ↓' ?>
                   class="button"
                   style="font-size:120%; font-weight:bold;"
                   type="submit" />
        </p>
        </form>
        <?
            
if ($textile)
            {
            
?>
                <textarea name="textile"
              style="width:100%; background-color:#FFFFFF; border:1px solid silver;" 
              cols="80" rows="23" 
                     ><? echo $textile ?></textarea>
            <?
            
}
        
?>

<?
        
}
        elseif (
$_REQUEST['source'] == "show")
        {
?>
        <table cellspacing="0" width="90%" border="1" align="center" bordercolor="#000033" bgcolor="#E6F2FF">
            <tr>
                <td width="90%" align="left">
                    <? show_source($_SERVER['SCRIPT_FILENAME']) ?>
                </td>
            </tr>
        </table>
<?
        
}
?>

        <p align="center">
        <small>© 2007 Treviño - PHP Script for converting HTML code to Textile<br>
        My part is released under GPL License, freely use, copy and redistribuite it.<br>
<?
if (!isSet($_REQUEST['source']))
    echo 
'<a href="' $_SERVER['SCRIPT_NAME'] .  '?source=show">Source code</a>';
elseif (
$_REQUEST['source'] == "show")
    echo 
'<a href="' $_SERVER['SCRIPT_NAME'] . '">Hide source code</a>';
?>
        </small>
        </p>

        </div><!-- col-md-10-->
      </div><!-- row-->
    </div><!-- fluid-container-->

    </body>
</html>

© 2007 Treviño - PHP Script for converting HTML code to Textile
My part is released under GPL License, freely use, copy and redistribuite it.
Hide source code