VideoUrlParser是一款基于PHP根据视频URL抓取视频信息的工具,支持优酷、土豆、酷六、56、乐视、搜狐、腾讯、新浪。
使用方法:
复制代码 代码如下:
require_once "VideoUrlParser.class.php";
$url = "http://v.youku.com/v_show/id_XMjkwMzc0Njg4.html";
$info = VedioUrlParser::parse($url);
echo $info;
说明:调用该工具php文件VideoUrlParser.class.php,$url变量后面的字符串为视频页的地址,然后使用echo输出变量$info。
附:info含有的几个值,分别是img(用于视频缩略图),title(视频标题),url(地址),,swf(视频swf播放地址)。我只用到了img和swf地址。具体的可以根据自己的需要进行调整。
VideoUrlParser类源码:
复制代码 代码如下:
/**
* Video
*
* @package
* @version 1.2
* @copyright 2005-2011 HDJ.ME
* @author Dijia Huang
* @license PHP Version 3.0 {@link }
*
* Usage
* require_once "VideoUrlParser.class.php";
* $urls[] = "http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html";
* $urls[] = "http://www.tudou.com/playlist/p/l13087099.html";
* $urls[] = "http://www.tudou.com/programs/view/ufg-A3tlcxk/";
* $urls[] = "http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html";
* $urls[] = "http://www.56.com/u68/v_NjI2NTkxMzc.html";
* $urls[] = "http://www.letv.com/ptv/vplay/1168109.html";
* $urls[] = "http://video.sina.com.cn/v/b/46909166-1290055681.html";
*
* foreach($urls as $url){
* $info = VideoUrlParser::parse($url);
* //var_dump($info);
* echo "{$info['title']}";
* echo "
";
* echo $info['object'];
* echo "
";
* }
*
*
*
* //优酷
*
*
*
* //酷六
*
*
*
* //土豆
* ?iid=74905844
*
*
* //56
*
*
*
* //新浪播客
*
*
*
* //乐视
*
*
class VideoUrlParser
{
const USER_AGENT = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko)
Chrome/8.0.552.224 Safari/534.10";
const CHECK_URL_VALID = "/(youku\.com|tudou\.com|ku6\.com|56\.com|letv\.com|video\.sina\.com\.cn|(my\.)?tv\.sohu\.com|v\.qq\.com)/";
/**
* parse
*
* @param string $url
* @param mixed $createObject
* @static
* @access public
* @return void
*/
static public function parse($url='', $createObject=true){
$lowerurl = strtolower($url);
preg_match(self::CHECK_URL_VALID, $lowerurl, $matches);
if(!$matches) return false;
switch($matches[1]){
case 'youku.com':
$data = self::_parseYouku($url);
break;
case 'tudou.com':
$data = self::_parseTudou($url);
break;
case 'ku6.com':
$data = self::_parseKu6($url);
break;
case '56.com':
$data = self::_parse56($url);
break;
case 'letv.com':
$data = self::_parseLetv($url);
break;
case 'video.sina.com.cn':
$data = self::_parseSina($url);
break;
case 'my.tv.sohu.com':
case 'tv.sohu.com':
case 'sohu.com':
$data = self::_parseSohu($url);
break;
case 'v.qq.com':
$data = self::_parseQq($url);
break;
default:
$data = false;
}
if($data && $createObject) $data['object'] = "";
return $data;
}
/**
* 腾讯视频
* ?vid=97abu74o4w3_0
*
*
*
* ?_v=20110829&vid=97abu74o4w3&autoplay=1&list=2&showcfg=1&tpid=23&title=%E7%AC%AC%E4%B8%80%E7%8E%B0%E5%9C%BA&adplay=1&cid=o9tab7nuu0q3esh
*/
private function _parseQq($url){
if(preg_match("/\/play\//", $url)){
$html = self::_fget($url);
preg_match("/url=[^\"]+/", $html, $matches);
if(!$matches); return false;
$url = $matches[0];
}
preg_match("/vid=([^\_]+)/", $url, $matches);
$vid = $matches[1];
$html = self::_fget($url);
// query
preg_match("/flashvars\s=\s\"([^;]+)/s", $html, $matches);
$query = $matches[1];
if(!$vid){
preg_match("/vid\s?=\s?vid\s?\|\|\s?\"(\w+)\";/i", $html, $matches);
$vid = $matches[1];
}
$query = str_replace('"+vid+"', $vid, $query);
parse_str($query, $output);
$data['img'] = "http://vpic.video.qq.com/{$$output['cid']}/{$vid}_1.jpg";
$data['url'] = $url;
$data['title'] = $output['title'];
$data['swf'] = "http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?".$query;
return $data;
}
/**
* 优酷网
*
*
*/
private function _parseYouku($url){
preg_match("#id\_(\w+)#", $url, $matches);
if (empty($matches)){
preg_match("#v_playlist\/#", $url, $mat);
if(!$mat) return false;
$html = self::_fget($url);
preg_match("#videoId2\s*=\s*\'(\w+)\'#", $html, $matches);
if(!$matches) return false;
}
$link = "http://v.youku.com/player/getPlayList/VideoIDS/{$matches[1]}/timezone/+08/version/5/source/out?password=&ran=2513&n=3";
$retval = self::_cget($link);
if ($retval) {
$json = json_decode($retval, true);
$data['img'] = $json['data'][0]['logo'];
$data['title'] = $json['data'][0]['title'];
$data['url'] = $url;
$data['swf'] = "http://player.youku.com/player.php/sid/{$matches[1]}/v.swf";
return $data;
} else {
return false;
}
}
/**
* 土豆网
*
*
*
* ?iid=74909603
* &iid=74909603/v.swf
*/
private function _parseTudou($url){
preg_match("#view/([-\w]+)/#", $url, $matches);
if (empty($matches)) {
if (strpos($url, "/playlist/") == false) return false;
if(strpos($url, 'iid=') !== false){
$quarr = explode("iid=", $lowerurl);
if (empty($quarr[1])) return false;
}elseif(preg_match("#p\/l(\d+).#", $lowerurl, $quarr)){
if (empty($quarr[1])) return false;
}
$html = self::_fget($url);
$html = iconv("GB2312", "UTF-8", $html);
preg_match("/lid_code\s=\slcode\s=\s[\'\"]([^\'\"]+)/s", $html, $matches);
$icode = $matches[1];
preg_match("/iid\s=\s.*?\|\|\s(\d+)/sx", $html, $matches);
$iid = $matches[1];
preg_match("/listData\s=\s(\[\{.*\}\])/sx", $html, $matches);
$find = array("/\n/", '/\s/', "/:[^\d\"]\w+[^\,]*,/i", "/(\{|,)(\w+):/");
$replace = array("", "", ':"",', '\\1"\\2":');
$str = preg_replace($find, $replace, $matches[1]);
//var_dump($str);
$json = json_decode($str);
//var_dump($json);exit;
if(is_array($json) || is_object($json) && !empty($json)){
foreach ($json as $val) {
if ($val->iid == $iid) {
break;
}
}
}
$data['img'] = $val->pic;
$data['title'] = $val->title;
$data['url'] = $url;
$data['swf'] = "http://www.tudou.com/l/{$icode}/&iid={$iid}/v.swf";
return $data;
}
$host = "www.tudou.com";
$path = "/v/{$matches[1]}/v.swf";
$ret = self::_fsget($path, $host);
if (preg_match("#\nLocation: (.*)\n#", $ret, $mat)) {
parse_str(parse_url(urldecode($mat[1]), PHP_URL_QUERY));
$data['img'] = $snap_pic;
$data['title'] = $title;
$data['url'] = $url;
$data['swf'] = "http://www.tudou.com/v/{$matches[1]}/v.swf";
return $data;
}
return false;
}
/**
* 酷6网
*
*
*
*
*/
private function _parseKu6($url){
if(preg_match("/show\_/", $url)){
preg_match("#/([-\w]+)\.html#", $url, $matches);
$url = "http://v.ku6.com/fetchVideo4Player/{$matches[1]}.html";
$html = self::_fget($url);
if ($html) {
$json = json_decode($html, true);
if(!$json) return false;
$data['img'] = $json['data']['picpath'];
$data['title'] = $json['data']['t'];
$data['url'] = $url;
$data['swf'] = "http://player.ku6.com/refer/{$matches[1]}/v.swf";
return $data;
} else {
return false;
}
}elseif(preg_match("/show\//", $url, $matches)){
$html = self::_fget($url);
preg_match("/ObjectInfo\s?=\s?([^\n]*)};/si", $html, $matches);
$str = $matches[1];
// img
preg_match("/cover\s?:\s?\"([^\"]+)\"/", $str, $matches);
$data['img'] = $matches[1];
// title
preg_match("/title\"?\s?:\s?\"([^\"]+)\"/", $str, $matches);
$jsstr = "{\"title\":\"{$matches[1]}\"}";
$json = json_decode($jsstr, true);
$data['title'] = $json['title'];
// url
$data['url'] = $url;
// query
preg_match("/\"(vid=[^\"]+)\"\sname=\"flashVars\"/s", $html, $matches);
$query = str_replace("&", '&', $matches[1]);
preg_match("/\/\/player\.ku6cdn\.com[^\"\']+/", $html, $matches);
$data['swf'] = 'http:'.$matches[0].'?'.$query;
return $data;
}
}
/**
* 56网
*
*
*/
private function _parse56($url){
preg_match("#/v_(\w+)\.html#", $url, $matches);
if (empty($matches)) return false;
$link="http://vxml.56.com/json/{$matches[1]}/?src=out";
$retval = self::_cget($link);
if ($retval) {
$json = json_decode($retval, true);
$data['img'] = $json['info']['img'];
$data['title'] = $json['info']['Subject'];
$data['url'] = $url;
$data['swf'] = "http://player.56.com/v_{$matches[1]}.swf";
return $data;
} else {
return false;
}
}
/**
* 乐视网
*
*
*/
private function _parseLetv($url){
$html = self::_fget($url);
preg_match("#([^'\"]*)#", $html, $matches);
parse_str(parse_url(urldecode($matches[0]), PHP_URL_QUERY));
preg_match("#vplay/(\d+)#", $url, $matches);
$data['img'] = $pic;
$data['title'] = $title;
$data['url'] = $url;
$data['swf'] = "http://www.letv.com/player/x{$matches[1]}.swf";
return $data;
}
// 搜狐TV
private function _parseSohu($url){
$html = self::_fget($url);
$html = iconv("GB2312", "UTF-8", $html);
preg_match_all("/og:(?:title|image|videosrc)\"\scontent=\"([^\"]+)\"/s", $html, $matches);
$data['img'] = $matches[1][1];
$data['title'] = $matches[1][0];
$data['url'] = $url;
$data['swf'] = $matches[1][2];
return $data;
}
/*
* 新浪播客
*
* =48717043_1290055681_PUzkSndrDzXK+l1lHz2stqkP7KQNt6nki2O0u1ehIwZYQ0/XM5GdatoG5ynSA9kEqDhAQJA4dPkm0x4/s.swf
*/
private function _parseSina($url){
preg_match("/(\d+)(?:\-|\_)(\d+)/", $url, $matches);
$url = "http://video.sina.com.cn/v/b/{$matches[1]}-{$matches[2]}.html";
$html = self::_fget($url);
preg_match("/video\s?:\s?([^ $find = array("/\n/", "/\s*/", "/\'/", "/\{([^:,]+):/", "/,([^:]+):/", "/:[^\d\"]\w+[^\,]*,/i");
$replace = array('', '', '"', '{"\\1":', ',"\\1":', ':"",');
$str = preg_replace($find, $replace, $matches[1]);
$arr = json_decode($str, true);
$data['img'] = $arr['pic'];
$data['title'] = $arr['title'];
$data['url'] = $url;
$data['swf'] = $arr['swfOutsideUrl'];
return $data;
}
/*
* 通过 file_get_contents 获取内容
*/
private function _fget($url=''){
if(!$url) return false;
$html = file_get_contents($url);
// 判断是否gzip压缩
if($dehtml = self::_gzdecode($html))
return $dehtml;
else
return $html;
}
/*
* 通过 fsockopen 获取内容
*/
private function _fsget($path='/', $host='', $user_agent=''){
if(!$path || !$host) return false;
$user_agent = $user_agent ? $user_agent : self::USER_AGENT;
$out = GET $path HTTP/1.1
Host: $host
User-Agent: $user_agent
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
Accept-Language: zh-cn,zh;q=0.5
Accept-Charset: GB2312,utf-8;q=0.7,*;q=0.7\r\n\r\n
HEADER;
$fp = @fsockopen($host, 80, $errno, $errstr, 10);
if (!$fp) return false;
if(!fputs($fp, $out)) return false;
while ( !feof($fp) ) {
$html .= fgets($fp, 1024);
}
fclose($fp);
// 判断是否gzip压缩
if($dehtml = self::_gzdecode($html))
return $dehtml;
else
return $html;
}
/*
* 通过 curl 获取内容
*/
private function _cget($url='', $user_agent=''){
if(!$url) return;
$user_agent = $user_agent ? $user_agent : self::USER_AGENT;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
if(strlen($user_agent)) curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
ob_start();
curl_exec($ch);
$html = ob_get_contents();
ob_end_clean();
if(curl_errno($ch)){
curl_close($ch);
return false;
}
curl_close($ch);
if(!is_string($html) || !strlen($html)){
return false;
}
return $html;
// 判断是否gzip压缩
if($dehtml = self::_gzdecode($html))
return $dehtml;
else
return $html;
}
private function _gzdecode($data) {
$len = strlen ( $data );
if ($len return null; // Not GZIP format (See RFC 1952)
}
$method = ord ( substr ( $data, 2, 1 ) ); // Compression method
$flags = ord ( substr ( $data, 3, 1 ) ); // Flags
if ($flags & 31 != $flags) {
// Reserved bits are set -- NOT ALLOWED by RFC 1952
return null;
}
// NOTE: $mtime may be negative (PHP integer limitations)
$mtime = unpack ( "V", substr ( $data, 4, 4 ) );
$mtime = $mtime [1];
$xfl = substr ( $data, 8, 1 );
$os = substr ( $data, 8, 1 );
$headerlen = 10;
$extralen = 0;
$extra = "";
if ($flags & 4) {
// 2-byte length prefixed EXTRA data in header
if ($len - $headerlen - 2 return false; // Invalid format
}
$extralen = unpack ( "v", substr ( $data, 8, 2 ) );
$extralen = $extralen [1];
if ($len - $headerlen - 2 - $extralen return false; // Invalid format
}
$extra = substr ( $data, 10, $extralen );
$headerlen += 2 + $extralen;
}
$filenamelen = 0;
$filename = "";
if ($flags & 8) {
// C-style string file NAME data in header
if ($len - $headerlen - 1 return false; // Invalid format
}
$filenamelen = strpos ( substr ( $data, 8 + $extralen ), chr ( 0 ) );
if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 return false; // Invalid format
}
$filename = substr ( $data, $headerlen, $filenamelen );
$headerlen += $filenamelen + 1;
}
$commentlen = 0;
$comment = "";
if ($flags & 16) {
// C-style string COMMENT data in header
if ($len - $headerlen - 1 return false; // Invalid format
}
$commentlen = strpos ( substr ( $data, 8 + $extralen + $filenamelen ), chr ( 0 ) );
if ($commentlen === false || $len - $headerlen - $commentlen - 1 return false; // Invalid header format
}
$comment = substr ( $data, $headerlen, $commentlen );
$headerlen += $commentlen + 1;
}
$headercrc = "";
if ($flags & 1) {
// 2-bytes (lowest order) of CRC32 on header present
if ($len - $headerlen - 2 return false; // Invalid format
}
$calccrc = crc32 ( substr ( $data, 0, $headerlen ) ) & 0xffff;
$headercrc = unpack ( "v", substr ( $data, $headerlen, 2 ) );
$headercrc = $headercrc [1];
if ($headercrc != $calccrc) {
return false; // Bad header CRC
}
$headerlen += 2;
}
// GZIP FOOTER - These be negative due to PHP's limitations
$datacrc = unpack ( "V", substr ( $data, - 8, 4 ) );
$datacrc = $datacrc [1];
$isize = unpack ( "V", substr ( $data, - 4 ) );
$isize = $isize [1];
// Perform the decompression:
$bodylen = $len - $headerlen - 8;
if ($bodylen // This should never happen - IMPLEMENTATION BUG!
return null;
}
$body = substr ( $data, $headerlen, $bodylen );
$data = "";
if ($bodylen > 0) {
switch ($method) {
case 8 :
// Currently the only supported compression method:
$data = gzinflate ( $body );
break;
default :
// Unknown compression method
return false;
}
} else {
//...
}
if ($isize != strlen ( $data ) || crc32 ( $data ) != $datacrc) {
// Bad format! Length or CRC doesn't match!
return false;
}
return $data;
}
}

Outils d'IA chauds

Undresser.AI Undress
Application basée sur l'IA pour créer des photos de nu réalistes

AI Clothes Remover
Outil d'IA en ligne pour supprimer les vêtements des photos.

Undress AI Tool
Images de déshabillage gratuites

Clothoff.io
Dissolvant de vêtements AI

AI Hentai Generator
Générez AI Hentai gratuitement.

Article chaud

Outils chauds

Télécharger la version Mac de l'éditeur Atom
L'éditeur open source le plus populaire

ZendStudio 13.5.1 Mac
Puissant environnement de développement intégré PHP

DVWA
Damn Vulnerable Web App (DVWA) est une application Web PHP/MySQL très vulnérable. Ses principaux objectifs sont d'aider les professionnels de la sécurité à tester leurs compétences et leurs outils dans un environnement juridique, d'aider les développeurs Web à mieux comprendre le processus de sécurisation des applications Web et d'aider les enseignants/étudiants à enseigner/apprendre dans un environnement de classe. Application Web sécurité. L'objectif de DVWA est de mettre en pratique certaines des vulnérabilités Web les plus courantes via une interface simple et directe, avec différents degrés de difficulté. Veuillez noter que ce logiciel

Version Mac de WebStorm
Outils de développement JavaScript utiles

Navigateur d'examen sécurisé
Safe Exam Browser est un environnement de navigation sécurisé permettant de passer des examens en ligne en toute sécurité. Ce logiciel transforme n'importe quel ordinateur en poste de travail sécurisé. Il contrôle l'accès à n'importe quel utilitaire et empêche les étudiants d'utiliser des ressources non autorisées.