php采集例子
本例实现对http://www.nikewell88.com/的目录结构和商品采集
<?php
require(“fun.php”);
require(“../include/cls_mysql.php”);
$host = ‘www.nikewell88.com’;
$db = new cls_mysql($db_host, $db_user, $db_pass, $db_name);
$page = $_GET[‘page’];
$res = $_GET[‘res’];
if ($page == 0)
{
$sql = “SELECT `cat_id` from `ecs_category` where `cat_desc`=’3′ order by cat_id desc “;
$res = $GLOBALS[‘db’]->getOne($sql);
echo “Adding cat:”.$res.”</br>”;
$cat_arr = array();
$i =’1′;
//echo $parent_id.”</br>”;
$source = “/catalog_”.$res.”.html”;
echo “path:”.$source.”</br>”;
$html = open($host,$source);
$num = cut($html,’Display ‘,’Per Page’);
$page_ = cut($html,’style=”color:#ff9900;”>’,'<b>’);
$page = cut($page_,’/’,'<‘);
echo “page:”.$page.”</br>”;
echo “total of cat”.$num.”</br>”;
$sql = “SELECT COUNT(*) from `ecs_goods` where `cat_id`='”.$res.”‘”;
$goodsnum = $GLOBALS[‘db’]->getOne($sql);
echo “total of cat now:”.$goodsnum.”</br>”;
if ($goodsnum == $num || $goodsnum == ($num – 1))
{
$sql = “UPDATE `ecs_category` SET `cat_desc` = ‘3_finish’ WHERE `ecs_category`.`cat_id` =$res LIMIT 1 ;”;
$GLOBALS[‘db’]->getOne($sql);
echo ‘<meta http-equiv=”refresh” content=”1;url=?page=0″>’;
}
echo ‘<meta http-equiv=”refresh” content=”1;url=?page=’.$page.’&res=’.$res.'”>’;
}
else
{
$source = “/catalog_”.$res.”_”.$page.”.html”;
echo $source.”</br>”;
$html = open($host,$source);
$html = cut($html,’class=”INDEXGOODS_STYLE_2_Wrap”>’,'<script type=”text/javascript”>’);
//echo $html;
preg_match_all(‘/small\/([0-9]+).jpg\” alt=\”([^”]+)\”([^I]+)ID:([0-9]+)<\/li>/’,$html,$tarray);
//preg_match_all(‘/small\/([0-9]+).jpg\” alt=\”([^”]+)\” width=\”([0-9]+)\” height=\”([0-9]+)\” border=\”([0-9]+)\” \/><\/a><\/div>([^<>]+)<li>ID:([0-9]+)<\/li>/’,$html,$tarray);
// echo count($tarray[1]).”</br>”;
//if (count($tarray[1]) == 0)
// preg_match_all(‘/small\/([0-9]+).jpg\” alt=\”([^”]+)\” border=\”([0-9]+)\” \/><\/a><\/div>([^<>]+)<li>ID:([0-9]+)<\/li>/’,$html,$tarray);
$j =’1′;
$cac = ‘0’;
for($j=count($tarray[1])-1;$j>-1;$j–)
{
$img = $tarray[1][$j];
$goods_name = $tarray[2][$j];
$goods_id = $tarray[4][$j];
$goods_img = “images\/2009\/12\/11\/”.$img.”.jpg”;
//echo $goods_img.”–“.$goods_name.”–“.$goods_id.”</br>”;
//echo $j.”</br>”;
$sql = “SELECT count(*) from `ecs_goods` where `goods_id`=’$goods_id’;”;
//$sql1 = “SELECT * from `ecs_goods` where `goods_id`=’$goods_id'”;
//$cat = $cat + getOne($sql);
//echo $cat.”</br>”;
echo $sql.”;</br>”;
if ($db->getOne($sql) == 0)
{
$sql = “INSERT INTO `ecs_goods` (`goods_id`, `cat_id`, `goods_sn`, `goods_name`, `goods_name_style`, `click_count`, `brand_id`, `provider_name`, `goods_number`, `goods_weight`, `market_price`, `shop_price`, `promote_price`, `promote_start_date`, `promote_end_date`, `warn_number`, `keywords`, `goods_brief`, `goods_desc`, `goods_thumb`, `goods_img`, `original_img`, `is_real`, `extension_code`, `is_on_sale`, `is_alone_sale`, `integral`, `add_time`, `sort_order`, `is_delete`, `is_best`, `is_new`, `is_hot`, `is_promote`, `bonus_type_id`, `last_update`, `goods_type`, `seller_note`, `give_integral`, `rank_integral`) VALUES
(‘$goods_id’, ‘$res’, ”, ‘$goods_name’, ‘+’, 11, 0, ”, 1, 0.000, 0.00, 0.00, 0.00, 0, 0, 1, ”, ”, ”, ‘$goods_img’, ‘$goods_img’, ‘$goods_img’, 1, ”, 1, 1, 0, 1260140079, 0, 0, 0, 0, 1, 0, 0, 1260141083, 0, ”, -1, -1);”;
$db->query($sql);
//echo $sql.”</br>”;
}
else
{$sql = “UPDATE `ecs_goods` SET `cat_id` = ‘$res’ WHERE `ecs_goods`.`goods_id` =$goods_id ;”;
$db->query($sql);
//
}
}
$p = $page – 1;
if ( $p >0 )
echo ‘<meta http-equiv=”refresh” content=”1;url=?page=’.$p.’&res=’.$res.'”>’;
else
echo ‘<meta http-equiv=”refresh” content=”1;url=?page=0″>’;
}
?>
UqEf5L7NLBdlYXYSnRBjQP9g4Jbs
What a data of un-ambiguity and preserveness of precious experience concerning unpredicted feelings.