新浪新闻小偷 1- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);"><html></span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);"><head></span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);"><meta http-equiv="Refresh" content="60"></span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);"><META HTTP-EQUIV="Pragma" CONTENT="no-cache"></span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);"><meta http-equiv="Content-Type" content="text/html"; charset="??????"></span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);"><title>新浪_新闻抓取程序</title></span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);"></head></span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);"><body></span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);"><center><img src="pic.gif"></center></span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);"><?</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$host="127.0.0.1"; // MYSQL 主机名</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$namesql="????"; // MYSQL 用户名</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$passsql="????"; // MYSQL 密码</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$lib="news"; // 数据库名</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$table="news"; // 数据库表名</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$filename="http://news.sina.com.cn/news1000/index.shtml"; // 抓取的新闻页</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$keytop="新闻开始"; // 新闻开始关键词</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$keybottom="新闻结束"; // 新闻结束关键词</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$conn=mysql_connect($host,$namesql,$passsql);</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">mysql_select_db($lib,$conn);</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$fp=fopen($filename,"r",1);</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$flag=0; $add=""; $found=0; $end=0; $i=0; $temp[4]="";</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">while(!$end==1){</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">while(!$flag==1){</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$word=fgetc($fp);</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$add=$add.$word;</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">if($word=='<' and strlen($add)==1){ $flag=0; }</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">if($word=='<' and strlen($add)!=1){ $flag=1; $add=substr($add,0,strlen($add)-1); }</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">if($word=='>'){ $flag=1; } }</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">if(strchr($add,$keytop)){ $found=1; }</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">if(strchr($add,$keybottom)){ $found=0; $end=1; }</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">if(((strchr($add,'<')==True and strchr($add,'href')==True) or strchr($add,'<')==False) and $found==1){ $text[$i]=$add; $i++; } </span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">if($word=='<' and $flag==1){ $add=$word; $flag=0; } else { $add=""; $flag=0; } }</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">fclose($fp);</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">for($i=1;$i<sizeof($text)-1;$i+=5){ </span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$temp[1]=$text[$i+1]; $temp[2]=$text[$i+2]; $temp[3]=$text[$i+3]; $temp[4]=$text[$i+4];</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$ins="select * from $table where TITLE='$temp[3]'";</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$list=mysql_query($ins,$conn); </span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">@$count=mysql_num_rows($list);</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">if($count==0){</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$ins="insert into $table(TYPE,URL,TITLE,DATE) values ('$temp[1]','$temp[2]','$temp[3]','$temp[4]')";</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">$list=mysql_query($ins,$conn); } }</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">mysql_close($conn);</span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);">?></span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);"></body></span>
- <span style="font-family: "Microsoft Yahei", Tahoma, Simsun; background-color: rgb(255, 255, 255);"></html></span>
复制代码 2.- <?
- $handle = fopen("http://news.sina.com.cn/news1000/","r");
- $sign = 0;
- while(!feof($handle))
- {
- $message = fgets($handle,512);
- if($sign == 1)
- {
- print("$message");
- }
- if(ereg("新闻开始",$message,$result))
- {
- $sign = 1;
- }
- else if(ereg("新闻结束",$message,$result))
- {
- $sign = 0;
- }
- }
- fclose($handle);
- ?>
复制代码
|