关于curl / curl_multi的一些实验

2018-06-22 05:09:39来源:未知 阅读 ()

新老客户大回馈,云服务器低至5折

几天没写了,主要都是自己的学习过程,贴一下curl / curl_multi_exec的一些代码,mark一下。

<?php
/**
 * Created by PhpStorm.
 * User: f3ngt1ng
 * Date: 2017/2/23
 * Time: 10:46
 */

//今天巩固一下curl_multi_exec的技术,用proxy写一个简单多线程爬虫。


function curl_crawl($url, $proxy, $auth = array()){
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_TIMEOUT , 10);
    curl_setopt($ch, CURLOPT_HEADER, array('Connection: close'));
    if (isset($proxy))
        curl_setopt($ch, CURLOPT_PROXY, $proxy);
    if (!empty($auth))
        curl_setopt($ch, CURLOPT_PROXYUSERPWD, join(':', $auth));
    $content = curl_exec($ch);
    curl_close($ch);
    return $content;
}

/*//$proxy = '200.255.220.211:8080';
$url = 'http://demo.com:8080/2.23/server.php';
$content = curl_crawl($url);
echo $content;
*/
/**
 * @param $url
 * @param array $proxy
 * @param array $auth
 * @param int $threads
 */
function curl_multi_crawl($url = array(), $proxy = array(), $auth = array(), $threads = 1){
    $mul = curl_multi_init();
    $curl_handlers = array();
    $results = array();
    //非单URL多线程的情况
    if ($threads === 1) {
        foreach ($url as $t){
        $ch = curl_init();
        $curl_handlers[$t] = $ch;
        curl_setopt($ch, CURLOPT_URL, $t);
        curl_setopt($ch, CURLOPT_HEADER, 0);
        //curl_setopt($ch, CURLOPT_TIMEOUT, 5);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
        curl_multi_add_handle($mul, $ch);
    }
    }
    if(!empty($proxy)){
        foreach ($curl_handlers as $handler){
            curl_setopt($handler, CURLOPT_PROXY, $proxy[mt_rand(0, (count($proxy)-1))]);
        }
    }
    /*foreach($curl_handlers as $url => $handler){
        echo $url."=========".$handler."\r\n";
    }*/
    //执行

    do {
        $mrc = curl_multi_exec($mul, $active);
    } while ($mrc == CURLM_CALL_MULTI_PERFORM);

    while ($active && $mrc == CURLM_OK) {
        if (curl_multi_select($mul) != -1) {
            do {
                $mrc = curl_multi_exec($mul, $active);
            } while ($mrc == CURLM_CALL_MULTI_PERFORM);
        }
    }
    foreach ($curl_handlers as $url => $handler){
        $results[$url] = curl_multi_getcontent($handler);
        curl_multi_remove_handle($mul, $handler);
    }
    curl_multi_close($mul);
    var_dump($results);
}

/*
$proxy = array('127.0.0.1:8888');
$url = array('http://demo.com:8080/2.23/server.php', 'http://www.baidu.com');
curl_multi_crawl($url, $proxy);
*/

标签:

版权申明:本站文章部分自网络,如有侵权,请联系:west999com@outlook.com
特别注意:本站所有转载文章言论不代表本站观点,本站所提供的摄影照片,插画,设计作品,如需使用,请与原作者联系,版权归原作者所有

上一篇:PHP编辑器PhpStrom运行缓慢问题

下一篇:【夯实PHP基础】PHPUnit -- PHP测试框架