Show pre_process.php syntax highlighted
#!/usr/bin/php -q
<?php
/**
* This script replaces <![CDATA[...]]> by <cdata>nr</cdata>
* and <!--...--> by <comment>nr</comment> in the xml chunks
* that are passed as arguments. The stripped cdata and comments
* are saved in files so that they can be placed back later,
* after the XML chunks are imploded.
*/
if ($argc < 2)
{
print "Usage: $argv[0] book_dir \n";
exit(1);
}
$dir = $argv[1];
$arr_cdata = array();
$arr_comments = array();
$output = shell_exec("find $dir -name 'content.xml'");
$arr_files = explode("\n", $output);
for ($i=0; $i < sizeof($arr_files); $i++)
{
$fname = $arr_files[$i];
if (trim($fname)=='') continue;
//print $fname."\n"; //debug
$fcontents = file_get_contents($fname);
$fcontents = strip_cdata($fcontents);
$fcontents = strip_comments($fcontents);
$fcontents = strip_amp($fcontents);
$fp = fopen($fname, 'w');
fputs($fp, $fcontents);
fclose($fp);
}
save_arrays();
exit(0);
/** save arrays for the script putback_cdata.sh */
function save_arrays()
{
global $dir;
global $arr_cdata, $arr_comments;
$fname = "$dir/cdata.txt";
$fp = fopen($fname, 'w');
fputs($fp, serialize($arr_cdata));
fclose($fp);
$fname = "$dir/comments.txt";
$fp = fopen($fname, 'w');
fputs($fp, serialize($arr_comments));
fclose($fp);
}
/** replace <![CDATA[...]]> by <cdata>x</cdata> */
function strip_cdata($str)
{
global $arr_cdata;
$arr_size = sizeof($arr_cdata);
preg_match_all('#<!\[CDATA\[.*?]]>#s', $str, $matches);
$arr_matches = $matches[0];
for ($i=0; $i < sizeof($arr_matches); $i++)
{
$cnt = $arr_size + $i;
$arr_cdata[$cnt] = $arr_matches[$i];
//$arr_cdata[$cnt] = str_replace("\r\n", "\n", $arr_cdata[$cnt]);
$str = str_replace($arr_matches[$i], "<cdata>$cnt</cdata>", $str);
}
return $str;
}
/** replace <!--...--> by <comment>x</comment> */
function strip_comments($str)
{
global $arr_comments;
$arr_size = sizeof($arr_comments);
preg_match_all('#<!--.*?-->#s', $str, $matches);
$arr_matches = $matches[0];
for ($i=0; $i < sizeof($arr_matches); $i++)
{
$cnt = $arr_size + $i;
$arr_comments[$cnt] = $arr_matches[$i];
//$arr_comments[$cnt] = str_replace("\r\n", "\n", $arr_comments[$cnt]);
$str = str_replace($arr_comments[$i], "<comment>$cnt</comment>", $str);
}
return $str;
}
/** Replace &xyz; by &xyz; */
function strip_amp($str)
{
$str = preg_replace('#&(\w+);#', '&$1;', $str);
return $str;
}
?>
See more files for this project here