首页 > web > php采集例子

php采集例子

Dec 13th,2009 发表评论

本例实现对http://www.nikewell88.com/的目录结构和商品采集

<?php
require(“fun.php”);
require(“../include/cls_mysql.php”);

$host = ‘www.nikewell88.com’;
$db = new cls_mysql($db_host, $db_user, $db_pass, $db_name);

$page = $_GET[‘page’];
$res = $_GET[‘res’];


if ($page == 0)
  {
 $sql = “SELECT `cat_id` from `ecs_category` where `cat_desc`=’3′ order by cat_id  desc “;
    $res = $GLOBALS[‘db’]->getOne($sql);
   
    echo “Adding cat:”.$res.”</br>”;
    $cat_arr = array();
 $i =’1′;

        //echo $parent_id.”</br>”;
        $source = “/catalog_”.$res.”.html”;
        echo “path:”.$source.”</br>”;
        $html = open($host,$source);
        $num = cut($html,’Display ‘,’Per Page’);
        $page_ = cut($html,’style=”color:#ff9900;”>’,'<b>’);
        $page = cut($page_,’/’,'<‘);
        echo “page:”.$page.”</br>”;
        echo “total of cat”.$num.”</br>”;
       
        $sql = “SELECT COUNT(*) from `ecs_goods` where `cat_id`='”.$res.”‘”;
        $goodsnum = $GLOBALS[‘db’]->getOne($sql);
        echo “total of cat now:”.$goodsnum.”</br>”;
       
       
        if ($goodsnum == $num || $goodsnum == ($num – 1))
          {
           $sql = “UPDATE `ecs_category` SET `cat_desc` = ‘3_finish’ WHERE `ecs_category`.`cat_id` =$res LIMIT 1 ;”;
           $GLOBALS[‘db’]->getOne($sql);
           echo ‘<meta http-equiv=”refresh” content=”1;url=?page=0″>’;
          }
        echo ‘<meta http-equiv=”refresh” content=”1;url=?page=’.$page.’&res=’.$res.'”>’;
  }
  else
  {
       
           $source = “/catalog_”.$res.”_”.$page.”.html”;
           echo $source.”</br>”;
           $html = open($host,$source);

           $html = cut($html,’class=”INDEXGOODS_STYLE_2_Wrap”>’,'<script type=”text/javascript”>’);
           //echo $html;
           preg_match_all(‘/small\/([0-9]+).jpg\” alt=\”([^”]+)\”([^I]+)ID:([0-9]+)<\/li>/’,$html,$tarray);
           //preg_match_all(‘/small\/([0-9]+).jpg\” alt=\”([^”]+)\” width=\”([0-9]+)\” height=\”([0-9]+)\” border=\”([0-9]+)\” \/><\/a><\/div>([^<>]+)<li>ID:([0-9]+)<\/li>/’,$html,$tarray);
           // echo count($tarray[1]).”</br>”;
           //if (count($tarray[1]) == 0)
           //  preg_match_all(‘/small\/([0-9]+).jpg\” alt=\”([^”]+)\” border=\”([0-9]+)\” \/><\/a><\/div>([^<>]+)<li>ID:([0-9]+)<\/li>/’,$html,$tarray);
         
          
          
            $j =’1′;
            $cac = ‘0’;
            for($j=count($tarray[1])-1;$j>-1;$j–)
               {
                 $img = $tarray[1][$j];
              $goods_name = $tarray[2][$j];
              $goods_id = $tarray[4][$j];
              $goods_img = “images\/2009\/12\/11\/”.$img.”.jpg”;
              //echo $goods_img.”–“.$goods_name.”–“.$goods_id.”</br>”;
              //echo $j.”</br>”;
              $sql = “SELECT count(*) from `ecs_goods` where `goods_id`=’$goods_id’;”;
              //$sql1 = “SELECT * from `ecs_goods` where `goods_id`=’$goods_id'”;
              //$cat = $cat + getOne($sql);
              //echo $cat.”</br>”;
                 echo $sql.”;</br>”;
                 if ($db->getOne($sql) == 0)
              {
                $sql = “INSERT INTO `ecs_goods` (`goods_id`, `cat_id`, `goods_sn`, `goods_name`, `goods_name_style`, `click_count`, `brand_id`, `provider_name`, `goods_number`, `goods_weight`, `market_price`, `shop_price`, `promote_price`, `promote_start_date`, `promote_end_date`, `warn_number`, `keywords`, `goods_brief`, `goods_desc`, `goods_thumb`, `goods_img`, `original_img`, `is_real`, `extension_code`, `is_on_sale`, `is_alone_sale`, `integral`, `add_time`, `sort_order`, `is_delete`, `is_best`, `is_new`, `is_hot`, `is_promote`, `bonus_type_id`, `last_update`, `goods_type`, `seller_note`, `give_integral`, `rank_integral`) VALUES
(‘$goods_id’, ‘$res’, ”, ‘$goods_name’, ‘+’, 11, 0, ”, 1, 0.000, 0.00, 0.00, 0.00, 0, 0, 1, ”, ”, ”, ‘$goods_img’, ‘$goods_img’, ‘$goods_img’, 1, ”, 1, 1, 0, 1260140079, 0, 0, 0, 0, 1, 0, 0, 1260141083, 0, ”, -1, -1);”;
                   $db->query($sql);
                   //echo $sql.”</br>”;
                  }
                  else
                  {$sql = “UPDATE `ecs_goods` SET `cat_id` = ‘$res’ WHERE `ecs_goods`.`goods_id` =$goods_id ;”;
                   $db->query($sql);
                   //
                  }
                 
               }
            $p = $page – 1;
            if ( $p >0 )  
              echo ‘<meta http-equiv=”refresh” content=”1;url=?page=’.$p.’&res=’.$res.'”>’;
            else
              echo ‘<meta http-equiv=”refresh” content=”1;url=?page=0″>’;

         
  }
 
?>

声明: 本文采用 BY-NC-SA 协议进行授权. 转载请注明转自: php采集例子
  1. GlmlecyUlyyp7Kbw4YJkfEKxS | 2020年11月4日17:05 | #1

    UqEf5L7NLBdlYXYSnRBjQP9g4Jbs

  2. Xiaomitoto | 2021年5月14日20:07 | #2

    What a data of un-ambiguity and preserveness of precious experience concerning unpredicted feelings.

  1. 本文目前尚无任何 trackbacks 和 pingbacks.