Heim >Backend-Entwicklung >PHP-Tutorial >php各种优酷,土豆,视频封装类

php各种优酷,土豆,视频封装类

WBOY
WBOYOriginal
2016-07-25 08:49:371112Durchsuche
php各种视频采集类封装,优酷,土豆,腾讯视频、56、搜狐视频、酷六、新浪视频、乐视.......

九弄 jigwang.com
  1. /**
  2. * Video
  3. *
  4. * @package
  5. * @version 1.3
  6. * @copyright 2011 - 2013
  7. *
  8. * Usage
  9. * require_once "VideoUrlParser.class.php";
  10. * $urls[] = "http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html";
  11. * $urls[] = "http://www.tudou.com/playlist/p/l13087099.html";
  12. * $urls[] = "http://www.tudou.com/programs/view/ufg-A3tlcxk/";
  13. * $urls[] = "http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html";
  14. * $urls[] = "http://www.56.com/u68/v_NjI2NTkxMzc.html";
  15. * $urls[] = "http://www.letv.com/ptv/vplay/1168109.html";
  16. * $urls[] = "http://video.sina.com.cn/v/b/46909166-1290055681.html";
  17. *
  18. * foreach($urls as $url){
  19. * $info = VideoUrlParser::parse($url);
  20. * //var_dump($info);
  21. * echo "{$info['title']}";
  22. * echo "
    ";
  23. * echo $info['object'];
  24. * echo "
    ";
  25. * }
  26. *
  27. *
  28. *
  29. * //优酷
  30. * http://v.youku.com/v_show/id_XMjU0NjY4OTEy.html
  31. *
  32. *
  33. * //酷六
  34. * http://v.ku6.com/special/show_3917484/x0BMXAbgZdQS6FqN.html
  35. *
  36. *
  37. * //土豆
  38. * http://www.tudou.com/playlist/p/a65929.html?iid=74905844
  39. *
  40. *
  41. * //56
  42. * http://www.56.com/u98/v_NTkyODY2NTU.html
  43. *
  44. *
  45. * //新浪播客
  46. * http://video.sina.com.cn/v/b/46909166-1290055681.html
  47. *
  48. *
  49. * //乐视
  50. * http://www.letv.com/ptv/vplay/1168109.html
  51. *
  52. */
  53. class VideoUrlparser
  54. {
  55. const USER_AGENT = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko)
  56. Chrome/8.0.552.224 Safari/534.10";
  57. const CHECK_URL_VALID = "/(youku\.com|tudou\.com|ku6\.com|56\.com|letv\.com|video\.sina\.com\.cn|(my\.)?tv\.sohu\.com|v\.qq\.com)/";
  58. /**
  59. * parse
  60. *
  61. * @param string $url
  62. * @param mixed $createObject
  63. * @static
  64. * @access public
  65. * @return void
  66. */
  67. static public function parse($url='', $createObject=true){
  68. $lowerurl = strtolower($url);
  69. preg_match(self::CHECK_URL_VALID, $lowerurl, $matches);
  70. if(!$matches) return false;
  71. switch($matches[1]){
  72. case 'youku.com':
  73. $data = self::_parseYouku($url);
  74. break;
  75. case 'tudou.com':
  76. $data = self::_parseTudou($url);
  77. break;
  78. case 'ku6.com':
  79. $data = self::_parseKu6($url);
  80. break;
  81. case '56.com':
  82. $data = self::_parse56($url);
  83. break;
  84. case 'letv.com':
  85. $data = self::_parseLetv($url);
  86. break;
  87. case 'video.sina.com.cn':
  88. $data = self::_parseSina($url);
  89. break;
  90. case 'my.tv.sohu.com':
  91. case 'tv.sohu.com':
  92. case 'sohu.com':
  93. $data = self::_parseSohu($url);
  94. break;
  95. case 'v.qq.com':
  96. $data = self::_parseQq($url);
  97. break;
  98. default:
  99. $data = false;
  100. }
  101. if($data && $createObject) $data['object'] = "";
  102. return $data;
  103. }
  104. /**
  105. * 腾讯视频
  106. * http://v.qq.com/cover/o/o9tab7nuu0q3esh.html?vid=97abu74o4w3_0
  107. * http://v.qq.com/play/97abu74o4w3.html
  108. * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o.html
  109. * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o/9SfqULsrtSb.html
  110. * http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?_v=20110829&vid=97abu74o4w3&autoplay=1&list=2&showcfg=1&tpid=23&title=%E7%AC%AC%E4%B8%80%E7%8E%B0%E5%9C%BA&adplay=1&cid=o9tab7nuu0q3esh
  111. */
  112. private static function _parseQq($url){
  113. if(preg_match("/\/play\//", $url)){
  114. $html = self::_fget($url);
  115. preg_match("/url=[^\"]+/", $html, $matches);
  116. if(!$matches); return false;
  117. $url = $matches[0];
  118. }
  119. preg_match("/vid=([^\_]+)/", $url, $matches);
  120. $vid = $matches[1];
  121. $html = self::_fget($url);
  122. // query
  123. preg_match("/flashvars\s=\s\"([^;]+)/s", $html, $matches);
  124. $query = $matches[1];
  125. if(!$vid){
  126. preg_match("/vid\s?=\s?vid\s?\|\|\s?\"(\w+)\";/i", $html, $matches);
  127. $vid = $matches[1];
  128. }
  129. $query = str_replace('"+vid+"', $vid, $query);
  130. parse_str($query, $output);
  131. $data['img'] = "http://vpic.video.qq.com/{$$output['cid']}/{$vid}_1.jpg";
  132. $data['url'] = $url;
  133. $data['title'] = $output['title'];
  134. $data['swf'] = "http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?".$query;
  135. return $data;
  136. }
  137. /*
  138. * 优酷网
  139. * http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html
  140. * http://player.youku.com/player.php/sid/XMjU0NjI2Njg4/v.swf
  141. */
  142. private static function _parseYouku($url){
  143. preg_match("#id\_(\w+)#", $url, $matches);
  144. if (empty($matches)){
  145. preg_match("#v_playlist\/#", $url, $mat);
  146. if(!$mat) return false;
  147. $html = self::_fget($url);
  148. preg_match("#videoId2\s*=\s*\'(\w+)\'#", $html, $matches);
  149. if(!$matches) return false;
  150. }
  151. $link = "http://v.youku.com/player/getPlayList/VideoIDS/{$matches[1]}/timezone/+08/version/5/source/out?password=&ran=2513&n=3";
  152. $retval = self::_cget($link);
  153. if ($retval) {
  154. $json = json_decode($retval, true);
  155. $data['img'] = $json['data'][0]['logo'];
  156. $data['title'] = $json['data'][0]['title'];
  157. $data['url'] = $url;
  158. $data['swf'] = "http://player.youku.com/player.php/sid/{$matches[1]}/v.swf";
  159. // echo "
    "; var_dump($data);	
  160. return $data;
  161. } else {
  162. return false;
  163. }
  164. }
  165. /**
  166. * 土豆网
  167. * http://www.tudou.com/programs/view/Wtt3FjiDxEE/
  168. * http://www.tudou.com/v/Wtt3FjiDxEE/v.swf
  169. *
  170. * http://www.tudou.com/playlist/p/a65718.html?iid=74909603
  171. * http://www.tudou.com/l/G5BzgI4lAb8/&iid=74909603/v.swf
  172. */
  173. private static function _parseTudou($url){
  174. preg_match("#view/([-\w]+)/#", $url, $matches);
  175. if (empty($matches)) {
  176. if (strpos($url, "/playlist/") == false) return false;
  177. if(strpos($url, 'iid=') !== false){
  178. $quarr = explode("iid=", $lowerurl);
  179. if (empty($quarr[1])) return false;
  180. }elseif(preg_match("#p\/l(\d+).#", $lowerurl, $quarr)){
  181. if (empty($quarr[1])) return false;
  182. }
  183. $html = self::_fget($url);
  184. $html = iconv("GB2312", "UTF-8", $html);
  185. preg_match("/lid_code\s=\slcode\s=\s[\'\"]([^\'\"]+)/s", $html, $matches);
  186. $icode = $matches[1];
  187. preg_match("/iid\s=\s.*?\|\|\s(\d+)/sx", $html, $matches);
  188. $iid = $matches[1];
  189. preg_match("/listData\s=\s(\[\{.*\}\])/sx", $html, $matches);
  190. $find = array("/\n/", '/\s/', "/:[^\d\"]\w+[^\,]*,/i", "/(\{|,)(\w+):/");
  191. $replace = array("", "", ':"",', '\\1"\\2":');
  192. $str = preg_replace($find, $replace, $matches[1]);
  193. //var_dump($str);
  194. $json = json_decode($str);
  195. //var_dump($json);exit;
  196. if(is_array($json) || is_object($json) && !empty($json)){
  197. foreach ($json as $val) {
  198. if ($val->iid == $iid) {
  199. break;
  200. }
  201. }
  202. }
  203. $data['img'] = $val->pic;
  204. $data['title'] = $val->title;
  205. $data['url'] = $url;
  206. $data['swf'] = "http://www.tudou.com/l/{$icode}/&iid={$iid}/v.swf";
  207. return $data;
  208. }
  209. $host = "www.tudou.com";
  210. $path = "/v/{$matches[1]}/v.swf";
  211. $ret = self::_fsget($path, $host);
  212. if (preg_match("#\nLocation: (.*)\n#", $ret, $mat)) {
  213. parse_str(parse_url(urldecode($mat[1]), PHP_URL_QUERY));
  214. $data['img'] = $snap_pic;
  215. $data['title'] = $title;
  216. $data['url'] = $url;
  217. $data['swf'] = "http://www.tudou.com/v/{$matches[1]}/v.swf";
  218. return $data;
  219. }
  220. return false;
  221. }
  222. /**
  223. * 酷6网
  224. * http://v.ku6.com/film/show_520/3X93vo4tIS7uotHg.html
  225. * http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html
  226. * http://v.ku6.com/show/7US-kDXjyKyIInDevhpwHg...html
  227. * http://player.ku6.com/refer/3X93vo4tIS7uotHg/v.swf
  228. */
  229. private static function _parseKu6($url){
  230. if(preg_match("/show\_/", $url)){
  231. preg_match("#/([-\w]+)\.html#", $url, $matches);
  232. $url = "http://v.ku6.com/fetchVideo4Player/{$matches[1]}.html";
  233. $html = self::_fget($url);
  234. if ($html) {
  235. $json = json_decode($html, true);
  236. if(!$json) return false;
  237. $data['img'] = $json['data']['picpath'];
  238. $data['title'] = $json['data']['t'];
  239. $data['url'] = $url;
  240. $data['swf'] = "http://player.ku6.com/refer/{$matches[1]}/v.swf";
  241. return $data;
  242. } else {
  243. return false;
  244. }
  245. }elseif(preg_match("/show\//", $url, $matches)){
  246. $html = self::_fget($url);
  247. preg_match("/ObjectInfo\s?=\s?([^\n]*)};/si", $html, $matches);
  248. $str = $matches[1];
  249. // img
  250. preg_match("/cover\s?:\s?\"([^\"]+)\"/", $str, $matches);
  251. $data['img'] = $matches[1];
  252. // title
  253. preg_match("/title\"?\s?:\s?\"([^\"]+)\"/", $str, $matches);
  254. $jsstr = "{\"title\":\"{$matches[1]}\"}";
  255. $json = json_decode($jsstr, true);
  256. $data['title'] = $json['title'];
  257. // url
  258. $data['url'] = $url;
  259. // query
  260. preg_match("/\"(vid=[^\"]+)\"\sname=\"flashVars\"/s", $html, $matches);
  261. $query = str_replace("&", '&', $matches[1]);
  262. preg_match("/\/\/player\.ku6cdn\.com[^\"\']+/", $html, $matches);
  263. $data['swf'] = 'http:'.$matches[0].'?'.$query;
  264. return $data;
  265. }
  266. }
  267. /**
  268. * 56网
  269. * http://www.56.com/u73/v_NTkzMDcwNDY.html
  270. * http://player.56.com/v_NTkzMDcwNDY.swf
  271. */
  272. private static function _parse56($url){
  273. preg_match("#/v_(\w+)\.html#", $url, $matches);
  274. if (empty($matches)) return false;
  275. $link="http://vxml.56.com/json/{$matches[1]}/?src=out";
  276. $retval = self::_cget($link);
  277. if ($retval) {
  278. $json = json_decode($retval, true);
  279. $data['img'] = $json['info']['img'];
  280. $data['title'] = $json['info']['Subject'];
  281. $data['url'] = $url;
  282. $data['swf'] = "http://player.56.com/v_{$matches[1]}.swf";
  283. return $data;
  284. } else {
  285. return false;
  286. }
  287. }
  288. /**
  289. * 乐视网
  290. * http://www.letv.com/ptv/vplay/1168109.html
  291. * http://www.letv.com/player/x1168109.swf
  292. */
  293. private static function _parseLetv($url){
  294. $html = self::_fget($url);
  295. preg_match("#http://v.t.sina.com.cn/([^'\"]*)#", $html, $matches);
  296. parse_str(parse_url(urldecode($matches[0]), PHP_URL_QUERY));
  297. preg_match("#vplay/(\d+)#", $url, $matches);
  298. $data['img'] = $pic;
  299. $data['title'] = $title;
  300. $data['url'] = $url;
  301. $data['swf'] = "http://www.letv.com/player/x{$matches[1]}.swf";
  302. return $data;
  303. }
  304. // 搜狐TV http://my.tv.sohu.com/u/vw/5101536
  305. private static function _parseSohu($url){
  306. $html = self::_fget($url);
  307. $html = iconv("GB2312", "UTF-8", $html);
  308. preg_match_all("/og:(?:title|image|videosrc)\"\scontent=\"([^\"]+)\"/s", $html, $matches);
  309. $data['img'] = $matches[1][1];
  310. $data['title'] = $matches[1][0];
  311. $data['url'] = $url;
  312. $data['swf'] = $matches[1][2];
  313. return $data;
  314. }
  315. /*
  316. * 新浪播客
  317. * http://video.sina.com.cn/v/b/48717043-1290055681.html
  318. * http://you.video.sina.com.cn/api/sinawebApi/outplayrefer.php/vid=48717043_1290055681_PUzkSndrDzXK+l1lHz2stqkP7KQNt6nki2O0u1ehIwZYQ0/XM5GdatoG5ynSA9kEqDhAQJA4dPkm0x4/s.swf
  319. */
  320. private static function _parseSina($url){
  321. preg_match("/(\d+)(?:\-|\_)(\d+)/", $url, $matches);
  322. $url = "http://video.sina.com.cn/v/b/{$matches[1]}-{$matches[2]}.html";
  323. $html = self::_fget($url);
  324. preg_match("/video\s?:\s?([^ $find = array("/\n/", "/\s*/", "/\'/", "/\{([^:,]+):/", "/,([^:]+):/", "/:[^\d\"]\w+[^\,]*,/i");
  325. $replace = array('', '', '"', '{"\\1":', ',"\\1":', ':"",');
  326. $str = preg_replace($find, $replace, $matches[1]);
  327. $arr = json_decode($str, true);
  328. $data['img'] = $arr['pic'];
  329. $data['title'] = $arr['title'];
  330. $data['url'] = $url;
  331. $data['swf'] = $arr['swfOutsideUrl'];
  332. return $data;
  333. }
  334. /*
  335. * 通过 file_get_contents 获取内容
  336. */
  337. private static function _fget($url=''){
  338. if(!$url) return false;
  339. $html = file_get_contents($url);
  340. // 判断是否gzip压缩
  341. if($dehtml = self::_gzdecode($html))
  342. return $dehtml;
  343. else
  344. return $html;
  345. }
  346. /*
  347. * 通过 fsockopen 获取内容
  348. */
  349. private static function _fsget($path='/', $host='', $user_agent=''){
  350. if(!$path || !$host) return false;
  351. $user_agent = $user_agent ? $user_agent : self::USER_AGENT;
  352. $out = GET $path HTTP/1.1
  353. Host: $host
  354. User-Agent: $user_agent
  355. Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
  356. Accept-Language: zh-cn,zh;q=0.5
  357. Accept-Charset: GB2312,utf-8;q=0.7,*;q=0.7\r\n\r\n
  358. HEADER;
  359. $fp = @fsockopen($host, 80, $errno, $errstr, 10);
  360. if (!$fp) return false;
  361. if(!fputs($fp, $out)) return false;
  362. while ( !feof($fp) ) {
  363. $html .= fgets($fp, 1024);
  364. }
  365. fclose($fp);
  366. // 判断是否gzip压缩
  367. if($dehtml = self::_gzdecode($html))
  368. return $dehtml;
  369. else
  370. return $html;
  371. }
  372. /*
  373. * 通过 curl 获取内容
  374. */
  375. private static function _cget($url='', $user_agent=''){
  376. if(!$url) return;
  377. $user_agent = $user_agent ? $user_agent : self::USER_AGENT;
  378. $ch = curl_init();
  379. curl_setopt($ch, CURLOPT_URL, $url);
  380. curl_setopt($ch, CURLOPT_HEADER, 0);
  381. if(strlen($user_agent)) curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
  382. ob_start();
  383. curl_exec($ch);
  384. $html = ob_get_contents();
  385. ob_end_clean();
  386. if(curl_errno($ch)){
  387. curl_close($ch);
  388. return false;
  389. }
  390. curl_close($ch);
  391. if(!is_string($html) || !strlen($html)){
  392. return false;
  393. }
  394. return $html;
  395. // 判断是否gzip压缩
  396. if($dehtml = self::_gzdecode($html))
  397. return $dehtml;
  398. else
  399. return $html;
  400. }
  401. private static function _gzdecode($data) {
  402. $len = strlen ( $data );
  403. if ($len return null; // Not GZIP format (See RFC 1952)
  404. }
  405. $method = ord ( substr ( $data, 2, 1 ) ); // Compression method
  406. $flags = ord ( substr ( $data, 3, 1 ) ); // Flags
  407. if ($flags & 31 != $flags) {
  408. // Reserved bits are set -- NOT ALLOWED by RFC 1952
  409. return null;
  410. }
  411. // NOTE: $mtime may be negative (PHP integer limitations)
  412. $mtime = unpack ( "V", substr ( $data, 4, 4 ) );
  413. $mtime = $mtime [1];
  414. $xfl = substr ( $data, 8, 1 );
  415. $os = substr ( $data, 8, 1 );
  416. $headerlen = 10;
  417. $extralen = 0;
  418. $extra = "";
  419. if ($flags & 4) {
  420. // 2-byte length prefixed EXTRA data in header
  421. if ($len - $headerlen - 2 return false; // Invalid format
  422. }
  423. $extralen = unpack ( "v", substr ( $data, 8, 2 ) );
  424. $extralen = $extralen [1];
  425. if ($len - $headerlen - 2 - $extralen return false; // Invalid format
  426. }
  427. $extra = substr ( $data, 10, $extralen );
  428. $headerlen += 2 + $extralen;
  429. }
  430. $filenamelen = 0;
  431. $filename = "";
  432. if ($flags & 8) {
  433. // C-style string file NAME data in header
  434. if ($len - $headerlen - 1 return false; // Invalid format
  435. }
  436. $filenamelen = strpos ( substr ( $data, 8 + $extralen ), chr ( 0 ) );
  437. if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 return false; // Invalid format
  438. }
  439. $filename = substr ( $data, $headerlen, $filenamelen );
  440. $headerlen += $filenamelen + 1;
  441. }
  442. $commentlen = 0;
  443. $comment = "";
  444. if ($flags & 16) {
  445. // C-style string COMMENT data in header
  446. if ($len - $headerlen - 1 return false; // Invalid format
  447. }
  448. $commentlen = strpos ( substr ( $data, 8 + $extralen + $filenamelen ), chr ( 0 ) );
  449. if ($commentlen === false || $len - $headerlen - $commentlen - 1 return false; // Invalid header format
  450. }
  451. $comment = substr ( $data, $headerlen, $commentlen );
  452. $headerlen += $commentlen + 1;
  453. }
  454. $headercrc = "";
  455. if ($flags & 1) {
  456. // 2-bytes (lowest order) of CRC32 on header present
  457. if ($len - $headerlen - 2 return false; // Invalid format
  458. }
  459. $calccrc = crc32 ( substr ( $data, 0, $headerlen ) ) & 0xffff;
  460. $headercrc = unpack ( "v", substr ( $data, $headerlen, 2 ) );
  461. $headercrc = $headercrc [1];
  462. if ($headercrc != $calccrc) {
  463. return false; // Bad header CRC
  464. }
  465. $headerlen += 2;
  466. }
  467. // GZIP FOOTER - These be negative due to PHP's limitations
  468. $datacrc = unpack ( "V", substr ( $data, - 8, 4 ) );
  469. $datacrc = $datacrc [1];
  470. $isize = unpack ( "V", substr ( $data, - 4 ) );
  471. $isize = $isize [1];
  472. // Perform the decompression:
  473. $bodylen = $len - $headerlen - 8;
  474. if ($bodylen // This should never happen - IMPLEMENTATION BUG!
  475. return null;
  476. }
  477. $body = substr ( $data, $headerlen, $bodylen );
  478. $data = "";
  479. if ($bodylen > 0) {
  480. switch ($method) {
  481. case 8 :
  482. // Currently the only supported compression method:
  483. $data = gzinflate ( $body );
  484. break;
  485. default :
  486. // Unknown compression method
  487. return false;
  488. }
  489. } else {
  490. //...
  491. }
  492. if ($isize != strlen ( $data ) || crc32 ( $data ) != $datacrc) {
  493. // Bad format! Length or CRC doesn't match!
  494. return false;
  495. }
  496. return $data;
  497. }
  498. }
  499. /*
  500. $url = "http://v.youku.com/v_show/id_XNjIxNjUyOTky.html";
  501. $obj = new VideoUrlparser();
  502. $data = $obj->parse($url);
  503. var_dump($data);
  504. */
复制代码


Stellungnahme:
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn