Home >Backend Development >PHP Tutorial >PHP various Youku, Tudou, video packaging classes

PHP various Youku, Tudou, video packaging classes

WBOY
WBOYOriginal
2016-07-25 08:49:371110browse
PHP various video collection packages, Youku, Tudou, Tencent Video, 56, Sohu Video, Kuliu, Sina Video, LeTV...

Nine Lane jigwang.com
  1. /**
  2. * Video
  3. *
  4. * @package
  5. * @version 1.3
  6. * @copyright 2011 - 2013
  7. *
  8. * Usage
  9. * require_once "VideoUrlParser.class.php";
  10. * $urls[] = "http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html";
  11. * $urls[] = "http://www.tudou.com/playlist/p/l13087099.html";
  12. * $urls[] = "http://www.tudou.com/programs/view/ufg-A3tlcxk/";
  13. * $urls[] = "http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html";
  14. * $urls[] = "http://www.56.com/u68/v_NjI2NTkxMzc.html";
  15. * $urls[] = "http://www.letv.com/ptv/vplay/1168109.html";
  16. * $urls[] = "http://video.sina.com.cn/v/b/46909166-1290055681.html";
  17. *
  18. * foreach($urls as $url){
  19. * $info = VideoUrlParser::parse($url);
  20. * //var_dump($info);
  21. * echo "{$info['title']}";
  22. * echo "
    ";
  23. * echo $info['object'];
  24. * echo "
    ";
  25. * }
  26. *
  27. *
  28. *
  29. * //优酷
  30. * http://v.youku.com/v_show/id_XMjU0NjY4OTEy.html
  31. *
  32. *
  33. * //酷六
  34. * http://v.ku6.com/special/show_3917484/x0BMXAbgZdQS6FqN.html
  35. *
  36. *
  37. * //土豆
  38. * http://www.tudou.com/playlist/p/a65929.html?iid=74905844
  39. *
  40. *
  41. * //56
  42. * http://www.56.com/u98/v_NTkyODY2NTU.html
  43. *
  44. *
  45. * //新浪播客
  46. * http://video.sina.com.cn/v/b/46909166-1290055681.html
  47. *
  48. *
  49. * //乐视
  50. * http://www.letv.com/ptv/vplay/1168109.html
  51. *
  52. */
  53. class VideoUrlparser
  54. {
  55. const USER_AGENT = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko)
  56. Chrome/8.0.552.224 Safari/534.10";
  57. const CHECK_URL_VALID = "/(youku.com|tudou.com|ku6.com|56.com|letv.com|video.sina.com.cn|(my.)?tv.sohu.com|v.qq.com)/";
  58. /**
  59. * parse
  60. *
  61. * @param string $url
  62. * @param mixed $createObject
  63. * @static
  64. * @access public
  65. * @return void
  66. */
  67. static public function parse($url='', $createObject=true){
  68. $lowerurl = strtolower($url);
  69. preg_match(self::CHECK_URL_VALID, $lowerurl, $matches);
  70. if(!$matches) return false;
  71. switch($matches[1]){
  72. case 'youku.com':
  73. $data = self::_parseYouku($url);
  74. break;
  75. case 'tudou.com':
  76. $data = self::_parseTudou($url);
  77. break;
  78. case 'ku6.com':
  79. $data = self::_parseKu6($url);
  80. break;
  81. case '56.com':
  82. $data = self::_parse56($url);
  83. break;
  84. case 'letv.com':
  85. $data = self::_parseLetv($url);
  86. break;
  87. case 'video.sina.com.cn':
  88. $data = self::_parseSina($url);
  89. break;
  90. case 'my.tv.sohu.com':
  91. case 'tv.sohu.com':
  92. case 'sohu.com':
  93. $data = self::_parseSohu($url);
  94. break;
  95. case 'v.qq.com':
  96. $data = self::_parseQq($url);
  97. break;
  98. default:
  99. $data = false;
  100. }
  101. if($data && $createObject) $data['object'] = "";
  102. return $data;
  103. }
  104. /**
  105. * Tencent Video
  106. * http://v.qq.com/cover/o/o9tab7nuu0q3esh.html?vid=97abu74o4w3_0
  107. * http://v.qq.com/play/97abu74o4w3.html
  108. * http:// v.qq.com/cover/d/dtdqyd8g7xvoj0o.html
  109. * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o/9SfqULsrtSb.html
  110. * http://imgcache.qq.com/tencentvideo_v1/player/ TencentPlayer.swf?_v=20110829&vid=97abu74o4w3&autoplay=1&list=2&showcfg=1&tpid=23&title=%E7%AC%AC%E4%B8%80%E7%8E%B0%E5%9C%BA&adplay=1&cid=o9tab7nuu0q3esh
  111. */
  112. private static function _parseQq($url){
  113. if(preg_match("//play//", $url)){
  114. $html = self::_fget($url);
  115. preg_match("/url=[^"]+/", $html, $matches);
  116. if(!$matches); return false;
  117. $url = $matches[0];
  118. }
  119. preg_match("/vid=([^_]+)/", $url, $matches);
  120. $vid = $matches[1];
  121. $html = self::_fget($url);
  122. // query
  123. preg_match("/flashvarss=s"([^;]+)/s", $html, $matches);
  124. $query = $matches[1];
  125. if(!$vid){
  126. preg_match("/vids?=s?vids?||s?"(w+)";/i", $html, $matches);
  127. $vid = $matches[1];
  128. }
  129. $query = str_replace('"+vid+"', $vid, $query);
  130. parse_str($query, $output);
  131. $data['img'] = "http://vpic.video.qq.com/{$$output['cid']}/{$vid}_1.jpg";
  132. $data['url'] = $url;
  133. $data['title'] = $output['title'];
  134. $data['swf'] = "http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?".$query;
  135. return $data;
  136. }
  137. /*
  138. * 优酷网
  139. * http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html
  140. * http://player.youku.com/player.php/sid/XMjU0NjI2Njg4/v.swf
  141. */
  142. private static function _parseYouku($url){
  143. preg_match("#id_(w+)#", $url, $matches);
  144. if (empty($matches)){
  145. preg_match("#v_playlist/#", $url, $mat);
  146. if(!$mat) return false;
  147. $html = self::_fget($url);
  148. preg_match("#videoId2s*=s*'(w+)'#", $html, $matches);
  149. if(!$matches) return false;
  150. }
  151. $link = "http://v.youku.com/player/getPlayList/VideoIDS/{$matches[1]}/timezone/+08/version/5/source/out?password=&ran=2513&n=3";
  152. $retval = self::_cget($link);
  153. if ($retval) {
  154. $json = json_decode($retval, true);
  155. $data['img'] = $json['data'][0]['logo'];
  156. $data['title'] = $json['data'][0]['title'];
  157. $data['url'] = $url;
  158. $data['swf'] = "http://player.youku.com/player.php/sid/{$matches[1]}/v.swf";
  159. // echo "
    "; var_dump($data);	
  160. return $data;
  161. } else {
  162. return false;
  163. }
  164. }
  165. /**
  166. * Tudou.com
  167. * http://www.tudou.com/programs/view/Wtt3FjiDxEE/
  168. * http://www.tudou.com/v/Wtt3FjiDxEE/v.swf
  169. *
  170. * http://www .tudou.com/playlist/p/a65718.html?iid=74909603
  171. * http://www.tudou.com/l/G5BzgI4lAb8/&iid=74909603/v.swf
  172. */
  173. private static function _parseTudou($url){
  174. preg_match("#view/([-w]+)/#", $url, $matches);
  175. if (empty($matches)) {
  176. if (strpos($url, "/playlist/") == false) return false;
  177. if(strpos($url, 'iid=') !== false){
  178. $quarr = explode("iid=", $lowerurl);
  179. if (empty($quarr[1])) return false;
  180. }elseif(preg_match("#p/l(d+).#", $lowerurl, $quarr)){
  181. if (empty($quarr[1])) return false;
  182. }
  183. $html = self::_fget($url);
  184. $html = iconv("GB2312", "UTF-8", $html);
  185. preg_match("/lid_codes=slcodes=s['"]([^'"]+)/s", $html, $matches);
  186. $icode = $matches[1];
  187. preg_match("/iids=s.*?||s(d+)/sx", $html, $matches);
  188. $iid = $matches[1];
  189. preg_match("/listDatas=s([{.*}])/sx", $html, $matches);
  190. $find = array("/n/", '/s/', "/:[^d"]w+[^,]*,/i", "/({|,)(w+):/");
  191. $replace = array("", "", ':"",', '\1"\2":');
  192. $str = preg_replace($find, $replace, $matches[1]);
  193. //var_dump($str);
  194. $json = json_decode($str);
  195. //var_dump($json);exit;
  196. if(is_array($json) || is_object($json) && !empty($json)){
  197. foreach ($json as $val) {
  198. if ($val->iid == $iid) {
  199. break;
  200. }
  201. }
  202. }
  203. $data['img'] = $val->pic;
  204. $data['title'] = $val->title;
  205. $data['url'] = $url;
  206. $data['swf'] = "http://www.tudou.com/l/{$icode}/&iid={$iid}/v.swf";
  207. return $data;
  208. }
  209. $host = "www.tudou.com";
  210. $path = "/v/{$matches[1]}/v.swf";
  211. $ret = self::_fsget($path, $host);
  212. if (preg_match("#nLocation: (.*)n#", $ret, $mat)) {
  213. parse_str(parse_url(urldecode($mat[1]), PHP_URL_QUERY));
  214. $data['img'] = $snap_pic;
  215. $data['title'] = $title;
  216. $data['url'] = $url;
  217. $data['swf'] = "http://www.tudou.com/v/{$matches[1]}/v.swf";
  218. return $data;
  219. }
  220. return false;
  221. }
  222. /**
  223. * Ku6.com
  224. * http://v.ku6.com/film/show_520/3X93vo4tIS7uotHg.html
  225. * http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html
  226. * http://v .ku6.com/show/7US-kDXjyKyIInDevhpwHg...html
  227. * http://player.ku6.com/refer/3X93vo4tIS7uotHg/v.swf
  228. */
  229. private static function _parseKu6($url){
  230. if(preg_match("/show_/", $url)){
  231. preg_match("#/([-w]+).html#", $url, $matches);
  232. $url = "http://v.ku6.com/fetchVideo4Player/{$matches[1]}.html";
  233. $html = self::_fget($url);
  234. if ($html) {
  235. $json = json_decode($html, true);
  236. if(!$json) return false;
  237. $data['img'] = $json['data']['picpath'];
  238. $data['title'] = $json['data']['t'];
  239. $data['url'] = $url;
  240. $data['swf'] = "http://player.ku6.com/refer/{$matches[1]}/v.swf";
  241. return $data;
  242. } else {
  243. return false;
  244. }
  245. }elseif(preg_match("/show//", $url, $matches)){
  246. $html = self::_fget($url);
  247. preg_match("/ObjectInfos?=s?([^n]*)};/si", $html, $matches);
  248. $str = $matches[1];
  249. // img
  250. preg_match("/covers?:s?"([^"]+)"/", $str, $matches);
  251. $data['img'] = $matches[1];
  252. // title
  253. preg_match("/title"?s?:s?"([^"]+)"/", $str, $matches);
  254. $jsstr = "{"title":"{$matches[1]}"}";
  255. $json = json_decode($jsstr, true);
  256. $data['title'] = $json['title'];
  257. // url
  258. $data['url'] = $url;
  259. // query
  260. preg_match("/"(vid=[^"]+)"sname="flashVars"/s", $html, $matches);
  261. $query = str_replace("&", '&', $matches[1]);
  262. preg_match("///player.ku6cdn.com[^"']+/", $html, $matches);
  263. $data['swf'] = 'http:'.$matches[0].'?'.$query;
  264. return $data;
  265. }
  266. }
  267. /**
  268. * 56.com
  269. * http://www.56.com/u73/v_NTkzMDcwNDY.html
  270. * http://player.56.com/v_NTkzMDcwNDY.swf
  271. */
  272. private static function _parse56($url){
  273. preg_match("#/v_(w+).html#", $url, $matches);
  274. if (empty($matches)) return false;
  275. $link="http://vxml.56.com/json/{$matches[1]}/?src=out";
  276. $retval = self::_cget($link);
  277. if ($retval) {
  278. $json = json_decode($retval, true);
  279. $data['img'] = $json['info']['img'];
  280. $data['title'] = $json['info']['Subject'];
  281. $data['url'] = $url;
  282. $data['swf'] = "http://player.56.com/v_{$matches[1]}.swf";
  283. return $data;
  284. } else {
  285. return false;
  286. }
  287. }
  288. /**
  289. * LeTV
  290. * http://www.letv.com/ptv/vplay/1168109.html
  291. * http://www.letv.com/player/x1168109.swf
  292. */
  293. private static function _parseLetv($url){
  294. $html = self::_fget($url);
  295. preg_match("#http://v.t.sina.com.cn/([^'"]*)#", $html, $matches);
  296. parse_str(parse_url(urldecode($matches[0]), PHP_URL_QUERY));
  297. preg_match("#vplay/(d+)#", $url, $matches);
  298. $data['img'] = $pic;
  299. $data['title'] = $title;
  300. $data['url'] = $url;
  301. $data['swf'] = "http://www.letv.com/player/x{$matches[1]}.swf";
  302. return $data;
  303. }
  304. // 搜狐TV http://my.tv.sohu.com/u/vw/5101536
  305. private static function _parseSohu($url){
  306. $html = self::_fget($url);
  307. $html = iconv("GB2312", "UTF-8", $html);
  308. preg_match_all("/og:(?:title|image|videosrc)"scontent="([^"]+)"/s", $html, $matches);
  309. $data['img'] = $matches[1][1];
  310. $data['title'] = $matches[1][0];
  311. $data['url'] = $url;
  312. $data['swf'] = $matches[1][2];
  313. return $data;
  314. }
  315. /*
  316. * 新浪播客
  317. * http://video.sina.com.cn/v/b/48717043-1290055681.html
  318. * http://you.video.sina.com.cn/api/sinawebApi/outplayrefer.php/vid=48717043_1290055681_PUzkSndrDzXK+l1lHz2stqkP7KQNt6nki2O0u1ehIwZYQ0/XM5GdatoG5ynSA9kEqDhAQJA4dPkm0x4/s.swf
  319. */
  320. private static function _parseSina($url){
  321. preg_match("/(d+)(?:-|_)(d+)/", $url, $matches);
  322. $url = "http://video.sina.com.cn/v/b/{$matches[1]}-{$matches[2]}.html";
  323. $html = self::_fget($url);
  324. preg_match("/videos?:s?([^<]+)}/", $html, $matches);
  325. $find = array("/n/", "/s*/", "/'/", "/{([^:,]+):/", "/,([^:]+):/", "/:[^d"]w+[^,]*,/i");
  326. $replace = array('', '', '"', '{"\1":', ',"\1":', ':"",');
  327. $str = preg_replace($find, $replace, $matches[1]);
  328. $arr = json_decode($str, true);
  329. $data['img'] = $arr['pic'];
  330. $data['title'] = $arr['title'];
  331. $data['url'] = $url;
  332. $data['swf'] = $arr['swfOutsideUrl'];
  333. return $data;
  334. }
  335. /*
  336. * 通过 file_get_contents 获取内容
  337. */
  338. private static function _fget($url=''){
  339. if(!$url) return false;
  340. $html = file_get_contents($url);
  341. // 判断是否gzip压缩
  342. if($dehtml = self::_gzdecode($html))
  343. return $dehtml;
  344. else
  345. return $html;
  346. }
  347. /*
  348. * 通过 fsockopen 获取内容
  349. */
  350. private static function _fsget($path='/', $host='', $user_agent=''){
  351. if(!$path || !$host) return false;
  352. $user_agent = $user_agent ? $user_agent : self::USER_AGENT;
  353. $out = <<GET $path HTTP/1.1
  354. Host: $host
  355. User-Agent: $user_agent
  356. Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
  357. Accept-Language: zh-cn,zh;q=0.5
  358. Accept-Charset: GB2312,utf-8;q=0.7,*;q=0.7rnrn
  359. HEADER;
  360. $fp = @fsockopen($host, 80, $errno, $errstr, 10);
  361. if (!$fp) return false;
  362. if(!fputs($fp, $out)) return false;
  363. while ( !feof($fp) ) {
  364. $html .= fgets($fp, 1024);
  365. }
  366. fclose($fp);
  367. // 判断是否gzip压缩
  368. if($dehtml = self::_gzdecode($html))
  369. return $dehtml;
  370. else
  371. return $html;
  372. }
  373. /*
  374. * 通过 curl 获取内容
  375. */
  376. private static function _cget($url='', $user_agent=''){
  377. if(!$url) return;
  378. $user_agent = $user_agent ? $user_agent : self::USER_AGENT;
  379. $ch = curl_init();
  380. curl_setopt($ch, CURLOPT_URL, $url);
  381. curl_setopt($ch, CURLOPT_HEADER, 0);
  382. if(strlen($user_agent)) curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
  383. ob_start();
  384. curl_exec($ch);
  385. $html = ob_get_contents();
  386. ob_end_clean();
  387. if(curl_errno($ch)){
  388. curl_close($ch);
  389. return false;
  390. }
  391. curl_close($ch);
  392. if(!is_string($html) || !strlen($html)){
  393. return false;
  394. }
  395. return $html;
  396. // 判断是否gzip压缩
  397. if($dehtml = self::_gzdecode($html))
  398. return $dehtml;
  399. else
  400. return $html;
  401. }
  402. private static function _gzdecode($data) {
  403. $len = strlen ( $data );
  404. if ($len < 18 || strcmp ( substr ( $data, 0, 2 ), "x1fx8b" )) {
  405. return null; // Not GZIP format (See RFC 1952)
  406. }
  407. $method = ord ( substr ( $data, 2, 1 ) ); // Compression method
  408. $flags = ord ( substr ( $data, 3, 1 ) ); // Flags
  409. if ($flags & 31 != $flags) {
  410. // Reserved bits are set -- NOT ALLOWED by RFC 1952
  411. return null;
  412. }
  413. // NOTE: $mtime may be negative (PHP integer limitations)
  414. $mtime = unpack ( "V", substr ( $data, 4, 4 ) );
  415. $mtime = $mtime [1];
  416. $xfl = substr ( $data, 8, 1 );
  417. $os = substr ( $data, 8, 1 );
  418. $headerlen = 10;
  419. $extralen = 0;
  420. $extra = "";
  421. if ($flags & 4) {
  422. // 2-byte length prefixed EXTRA data in header
  423. if ($len - $headerlen - 2 < 8) {
  424. return false; // Invalid format
  425. }
  426. $extralen = unpack ( "v", substr ( $data, 8, 2 ) );
  427. $extralen = $extralen [1];
  428. if ($len - $headerlen - 2 - $extralen < 8) {
  429. return false; // Invalid format
  430. }
  431. $extra = substr ( $data, 10, $extralen );
  432. $headerlen += 2 + $extralen;
  433. }
  434. $filenamelen = 0;
  435. $filename = "";
  436. if ($flags & 8) {
  437. // C-style string file NAME data in header
  438. if ($len - $headerlen - 1 < 8) {
  439. return false; // Invalid format
  440. }
  441. $filenamelen = strpos ( substr ( $data, 8 + $extralen ), chr ( 0 ) );
  442. if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {
  443. return false; // Invalid format
  444. }
  445. $filename = substr ( $data, $headerlen, $filenamelen );
  446. $headerlen += $filenamelen + 1;
  447. }
  448. $commentlen = 0;
  449. $comment = "";
  450. if ($flags & 16) {
  451. // C-style string COMMENT data in header
  452. if ($len - $headerlen - 1 < 8) {
  453. return false; // Invalid format
  454. }
  455. $commentlen = strpos ( substr ( $data, 8 + $extralen + $filenamelen ), chr ( 0 ) );
  456. if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {
  457. return false; // Invalid header format
  458. }
  459. $comment = substr ( $data, $headerlen, $commentlen );
  460. $headerlen += $commentlen + 1;
  461. }
  462. $headercrc = "";
  463. if ($flags & 1) {
  464. // 2-bytes (lowest order) of CRC32 on header present
  465. if ($len - $headerlen - 2 < 8) {
  466. return false; // Invalid format
  467. }
  468. $calccrc = crc32 ( substr ( $data, 0, $headerlen ) ) & 0xffff;
  469. $headercrc = unpack ( "v", substr ( $data, $headerlen, 2 ) );
  470. $headercrc = $headercrc [1];
  471. if ($headercrc != $calccrc) {
  472. return false; // Bad header CRC
  473. }
  474. $headerlen += 2;
  475. }
  476. // GZIP FOOTER - These be negative due to PHP's limitations
  477. $datacrc = unpack ( "V", substr ( $data, - 8, 4 ) );
  478. $datacrc = $datacrc [1];
  479. $isize = unpack ( "V", substr ( $data, - 4 ) );
  480. $isize = $isize [1];
  481. // Perform the decompression:
  482. $bodylen = $len - $headerlen - 8;
  483. if ($bodylen < 1) {
  484. // This should never happen - IMPLEMENTATION BUG!
  485. return null;
  486. }
  487. $body = substr ( $data, $headerlen, $bodylen );
  488. $data = "";
  489. if ($bodylen > 0) {
  490. switch ($method) {
  491. case 8 :
  492. // Currently the only supported compression method:
  493. $data = gzinflate ( $body );
  494. break;
  495. default :
  496. // Unknown compression method
  497. return false;
  498. }
  499. } else {
  500. //...
  501. }
  502. if ($isize != strlen ( $data ) || crc32 ( $data ) != $datacrc) {
  503. // Bad format! Length or CRC doesn't match!
  504. return false;
  505. }
  506. return $data;
  507. }
  508. }
  509. /*
  510. $url = "http://v.youku.com/v_show/id_XNjIxNjUyOTky.html";
  511. $obj = new VideoUrlparser();
  512. $data = $obj->parse($ url);
  513. var_dump($data);
  514. */
Copy code


Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn