php采集后的处理
分类:热门新闻

?php/*** @name 采集后的处理.php* @date Sat Dec 22 02:07:45 CST 2007* @copyright 马永占(MyZ)* @author 马永占(MyZ) * @link *///采集后的文件,然后那来进行处理.这里的东西让我抄了5本书,是哪的不方便提供,自己找找吧.header(''Content-Type:text/html;charset=utf8'');function writer($content,$url){ $fp = fopen($url, ''ab''); fwrite($fp, $content); fclose($fp); }//从1到136页的内容一次合并.这个是最爽的...for ($i=1;$i136;$i++) { $str = file_get_contents(''./myz/''.$i.''.shtml''); preg_match(/(h1)(.*?)(/h1)(.*?)(div class=artibody id=artibody)(.*?)(/div)/s,$str,$arr); $arr[6] = preg_replace(/(span[^]+.*?a[^]+)(.*?)(/a/span)/s,$2,preg_replace(/p|/p/,rn,$arr[6])); $result = rn------------------------------------------------rn------------------------------------------------rn------------------------------------------------rn.$i.----------------马永占的目录编号:.$arr[2].rn------------------------------------------------rn------------------------------------------------rn------------------------------------------------rn.$arr[6]; writer($result, ./myz/all.txt);}?

/**
* @name 采集书.php
*威尼斯登录首页, @date Sun Mar 01 22:48:02 CST 2009
* @copyright 马永占(MyZ)
* @author 马永占(MyZ)
* @link
*/
//header('Content-Type:text/html;charset=utf8');
header('Content-Type:text/html;charset=gb2312');
error_reporting(E_ALL);
date_default_timezone_set('Asia/Shanghai');
set_time_limit(0);
function writer($content,$url)
{
$fp = fopen($url, 'ab');
fwrite($fp, $content);
fclose($fp);
}
$folder = '2'; //文件夹
$book_base_url = 'xxxxxxxxxxxxxxxxxxxxx';
$book_url = 'yyyyyyyyyyyyy.html';
$main = file_get_contents($book_base_url.$book_url);
preg_match_all('/chapter_.*?.html/', $main, $pages);
$pages = array_unique($pages[0]);
foreach ($pages as $value) {
writer(file_get_contents($book_base_url.$value), './'.$folder.'/'.$value.'.txt');
$str = file_get_contents('./'.$folder.'/'.$value.'.txt');
//print_r($str);
preg_match("/(

)(.*?)()(.*?)()(.*?)()/s",$str,$arr); //print_r($arr);die(); $arr[6] = preg_replace("/(]+>.*?]+>)(.*?)()/s","$2",preg_replace("/

|/","rn",$arr[6]));
$result = "rn------------------------------------------------rn------------------------------------------------rn------------------------------------------------rn----------------".$arr[2]."rn------------------------------------------------rn------------------------------------------------rn------------------------------------------------rn".$arr[6];
writer($result, './'.$folder.'/new.txt');
}
?>

本文由威尼斯在线注册平台发布于热门新闻,转载请注明出处:php采集后的处理

上一篇:array('网页特效',php$keys=array(array('网页特效' 下一篇:用户可接受的语言信息
猜你喜欢
热门排行
精彩图文