//方式1 $page = file_get_contents('http://www.example.com/robots.txt'); //方式2 $c = curl_init('http://www.example.com/robots.txt'); curl_setopt($c, CURLOPT_RETURNTRANSFER, true); $page = curl_exec($c); curl_close($c); //方式3 require_once 'HTTP/Request.php'; $r = new HTTP_Request('http://www.example.com/robots.txt'); $r->sendRequest(); $page = $r->getResponseBody();
//1 $url = 'http://www.example.com/submit.php'; $body = 'monkey=uncle&rhino=aunt'; $options = array('method' => 'POST', 'content' => $body); $context = stream_context_create(array('http' => $options)); print file_get_contents($url, false, $context); //2 $url = 'http://www.example.com/submit.php'; $body = 'monkey=uncle&rhino=aunt'; $c = curl_init($url); curl_setopt($c, CURLOPT_POST, true); curl_setopt($c, CURLOPT_POSTFIELDS, $body); curl_setopt($c, CURLOPT_RETURNTRANSFER, true); $page = curl_exec($c); curl_close($c); //3 require 'HTTP/Request.php'; $url = 'http://www.example.com/submit.php'; $r = new HTTP_Request($url); $r->setMethod(HTTP_REQUEST_METHOD_POST); $r->addPostData('monkey','uncle'); $r->addPostData('rhino','aunt'); $r->sendRequest(); $page = $r->getResponseBody();
//2 $c = curl_init('http://www.example.com/needs-cookies.php'); curl_setopt($c, CURLOPT_COOKIE, 'user=ellen; activity=swimming'); curl_setopt($c, CURLOPT_RETURNTRANSFER, true); $page = curl_exec($c); curl_close($c); //3 require 'HTTP/Request.php'; $r = new HTTP_Request('http://www.example.com/needs-cookies.php'); $r->addHeader('Cookie','user=ellen; activity=swimming'); $r->sendRequest(); $page = $r->getResponseBody();
通过修改header中的信息可以来伪造 Referer 或 User-Agent 后请求目标URL,不少防盗链网站经常会采用判断Referer中的信息来源决定是否允许下载或访问资源。需要具备一些HTTP的HEADER背景知识。
$doc = new DOMDocument(); $opts = array('output-xml' => true, // Prevent DOMDocument from being confused about entities 'numeric-entities' => true); $doc->loadXML(tidy_repair_file('linklist.html',$opts)); $xpath = new DOMXPath($doc); // Tell $xpath about the XHTML namespace $xpath->registerNamespace('xhtml','http://www.w3.org/1999/xhtml'); foreach ($xpath->query('//xhtml:a/@href') as $node) { $link = $node->nodeValue; print $link . "\n";
$html = file_get_contents('linklist.html'); $links = pc_link_extractor($html); foreach ($links as $link) { print $link[0] . "\n"; } function pc_link_extractor($html) { $links = array(); preg_match_all('/]*)[\"\']?[^>]*>(.*?)/i', $html,$matches,PREG_SET_ORDER); foreach($matches as $match) { $links[] = array($match[1],$match[2]); } return $links;
function pc_text2html($s) { $s = htmlentities($s); $grafs = split("\n\n",$s); for ($i = 0, $j = count($grafs); $i <p>'.$grafs[$i].'</p> <pre class="brush:php;toolbar:false">'; } return implode("\n\n",$grafs);}
