您的位置:首页技术文章
文章详情页

PHP采集程序中常用的函数

【字号: 日期:2024-02-22 08:59:51浏览:4作者:猪猪

//获得当前的脚本网址function get_php_url(){ if(!empty($_SERVER['REQUEST_URI'])){ $scriptName = $_SERVER['REQUEST_URI']; $nowurl = $scriptName; }else{ $scriptName = $_SERVER['PHP_SELF']; if(empty($_SERVER['QUERY_STRING'])) $nowurl = $scriptName; else $nowurl = $scriptName.'?'.$_SERVER['QUERY_STRING']; } return $nowurl;}//把全角数字转为半角数字function GetAlabNum($fnum){ $nums = array('0','1','2','3','4','5','6','7','8','9'); $fnums = '0123456789'; for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum); $fnum = ereg_replace('[^0-9.]|^0{1,}','',$fnum); if($fnum=='') $fnum=0; return $fnum;}//去除HTML标记function Text2Html($txt){ $txt = str_replace('; ',' ',$txt); $txt = str_replace('<','&lt;',$txt); $txt = str_replace('>','&gt;',$txt); $txt = preg_replace('/[rn]{1,}/isU','<br/>rn',$txt); return $txt;}

//清除HTML标记function ClearHtml($str){ $str = str_replace('<','&lt;',$str); $str = str_replace('>','&gt;',$str); return $str;}//相对路径转化成绝对路径function relative_to_absolute($content, $feed_url) { preg_match('/(http|https|ftp):///', $feed_url, $protocol); $server_url = preg_replace('/(http|https|ftp|news):///', '', $feed_url); $server_url = preg_replace('//.*/', '', $server_url);

if ($server_url == '') { return $content; }

if (isset($protocol[0])) { $new_content = preg_replace('/href='https://', 'href='https://www.haobala.com/bcjs/21944.html'.$protocol[0].$server_url.'/', $content); $new_content = preg_replace('/src='https://', 'src='https://www.haobala.com/bcjs/21944.html'.$protocol[0].$server_url.'/', $new_content); } else { $new_content = $content; } return $new_content;}//取得所有链接function get_all_url($code){ preg_match_all('/<as+href=['|']?([^>'' ]+)['|']?s*[^>]*>([^>]+)</a>/i',$code,$arr); return array('name'=>$arr[2],'url'=>$arr[1]);}

//获取指定标记中的内容function get_tag_data($str, $start, $end){ if ( $start == '' || $end == '' ){ return; } $str = explode($start, $str); $str = explode($end, $str[1]); return $str[0];}//HTML表格的每行转为CSV格式数组function get_tr_array($table) { $table = preg_replace(''<td[^>]*?>'si',''',$table); $table = str_replace('</td>','',',$table); $table = str_replace('</tr>','{tr}',$table); //去掉 HTML 标记 $table = preg_replace(''<[/!]*?[^<>]*?>'si','',$table); //去掉空白字符 $table = preg_replace(''([rn])[s]+'','',$table); $table = str_replace(' ','',$table); $table = str_replace(' ','',$table);

$table = explode(',{tr}',$table); array_pop($table); return $table;}

//将HTML表格的每行每列转为数组,采集表格数据function get_td_array($table) { $table = preg_replace(''<table[^>]*?>'si','',$table); $table = preg_replace(''<tr[^>]*?>'si','',$table); $table = preg_replace(''<td[^>]*?>'si','',$table); $table = str_replace('</tr>','{tr}',$table); $table = str_replace('</td>','{td}',$table); //去掉 HTML 标记 $table = preg_replace(''<[/!]*?[^<>]*?>'si','',$table); //去掉空白字符 $table = preg_replace(''([rn])[s]+'','',$table); $table = str_replace(' ','',$table); $table = str_replace(' ','',$table); $table = explode('{tr}', $table); array_pop($table); foreach ($table as $key=>$tr) { $td = explode('{td}', $tr); array_pop($td); $td_array[] = $td; } return $td_array;}

//返回字符串中的所有单词 $distinct=true 去除重复function split_en_str($str,$distinct=true) { preg_match_all('/([a-zA-Z]+)/',$str,$match); if ($distinct == true) { $match[1] = array_unique($match[1]); } sort($match[1]); return $match[1];}

标签: PHP