|
- <?php
- /****/
- //Gary xu
- //1122557724@qq.com
- /****/
- namespace Xuyaoxiang;
-
- class Snoopy {
-
- public $pattern_array=array(
- 'title'=>'/<title>(\s*.*)<\/title>/i',
- 'description'=>'/<meta +name="[d|D]escription" +content="(.*)" +\/>/',
- 'charset'=>'/charset="?([\w-]+)"?/i',
- );
-
- public $user_agent='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'; //模拟浏览器头部数据
-
- public $target_code="utf-8"; //目标编码
-
- public $url;
-
- public $data;
-
- public $pattern_key;
-
- function __construct($url)
- {
- $this->url=$url;
- }
-
- public function set_pattern($key,$val)
- {
- $this->pattern_array[$key]=$val;
- }
-
-
-
- function get_content($pattern_key)
- {
- $this->pattern_key=$pattern_key;
-
- if($this->pattern_key==''){return false;}
-
- $this->curl_get_data();
-
- if($this->data==false){return false;}
-
- $charset=$this->get_charset();
-
- $this->check_charset($charset);
-
- $content=$this->get_key_content();
-
- return trim($content[1]);
- }
-
-
- function curl_get_data()
- {
- $curl=curl_init();
- // 设置你需要抓取的URL
-
- curl_setopt($curl, CURLOPT_URL, $this->url);
-
- // 设置header
- curl_setopt($curl, CURLOPT_HEADER, 0);
-
- // 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。
- curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
-
- curl_setopt($curl, CURLOPT_USERAGENT, $this->user_agent);
-
- // 运行cURL,请求网页
-
- $this->data = curl_exec($curl);
-
- curl_close($curl);
- }
-
- function check_charset($page_charset)
- {
- if($page_charset!=$this->target_code)
- {
- $this->data=mb_convert_encoding($this->data,$this->target_code,$page_charset);
- }
- }
-
- function get_key_content()
- {
- preg_match($this->pattern_array[$this->pattern_key],$this->data,$content);
- return $content;
- }
-
- function get_charset()
- {
- preg_match($this->pattern_array['charset'],$this->data,$reg_charset);
- return $page_charset=strtolower($reg_charset[1]);
- }
- }
-
-
- header("Content-type:text/html;charset=utf-8");
- $snoopy=new snoopy("http://www.qq.com");
-
- $title=$snoopy->get_content('title');
-
- print_r($title);
- ?>
复制代码 |
|