Home  >  Article  >  Backend Development  >  Create Baidu Dictionary word search collector with PHP_PHP tutorial

Create Baidu Dictionary word search collector with PHP_PHP tutorial

WBOY
WBOYOriginal
2016-07-13 10:08:311249browse

PHP makes Baidu Dictionary word search collector

This article mainly introduces the relevant information about PHP making Baidu Dictionary word search collector. Friends who need it can refer to it

Baidu dict collection sample

Written to collect all result data after translation from Baidu dict dictionary. Of course, it comes with a 13.5w word library and simple collection cases. Here I will release the main class dict.class.php I wrote. The project address is http:/ /github.com/widuu/baidu_dict, if you need it, just fork it~ Well, very few people use this thing, so the useful brothers took it away~

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

/**

* dict.class.php collects Baidu dictionary translation content

*

* @copyright (C) 2014 widuu

* @license http://www.widuu.com

* @lastmodify 2014-2-15

*/

 

 

header("content-type:text/html;charset=utf8");

class Dict{

 

private $word;

 

//显示的条数

private static $num = 10;

 

public function __construct(){}

 

 

/**

* Public method of returning Baidu collected data

* @param string English word

* retun array(

* symbol" => phonetic symbol

* "pro" => Pronunciation

* "example"=> Example

* "explain"=> concise explanation

* "synonym"=> Synonyms and antonyms

* "phrase" => Phrase array

* )

*

*/

public function content($word){

$this -> word = $word;

$symbol = $this -> Pronounced();

$pro = $this->getSay();

$example = $this -> getExample();

$explain = $this -> getExplain();

$synonym = $this -> getSynonym();

$phrase = $this -> getPhrase();

$result = array(

"symbol" => $symbol, //音标

"pro" => $pro, //发音

"example"=> $example, //例句

"explain"=> $explain, //简明释义

"synonym"=> $synonym, //同反义词

"phrase" => $phrase //短语数组

);

return $result;

}

 

 

/**

* Remotely obtain Baidu translation content

* get function curl

* retun string

*

*/

 

private function getContent(){

$useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0";

$ch = curl_init();

$url = "http://dict.baidu.com/s?wd=".$this->word;

curl_setopt($ch, CURLOPT_URL, $url);

curl_setopt($ch, CURLOPT_USERAGENT,$useragent);

curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);

curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);

curl_setopt($ch, CURLOPT_HTTPGET, 1);

curl_setopt($ch, CURLOPT_AUTOREFERER,1);

curl_setopt($ch, CURLOPT_HEADER, 0);

curl_setopt($ch, CURLOPT_TIMEOUT, 30);

$result = curl_exec($ch);

if (curl_errno($curl)) {

echo 'Errno'.curl_error($curl);

}

curl_close($ch);

return $result;

}

 

 

/**

* Get Baidu translation pronunciation

* retun array(UK, US)

*

*/

 

private function Pronounced(){

$data = $this -> getContent();

preg_match_all("/"EN-US">(.*)/Ui",$data,$pronounced);

return array(

'en' => $pronounced[1][0],

'us' => $pronounced[1][1]

);

}

 

/**

* Get Baidu translation pronunciation

* return array(UK, US)

*

*/

 

private function getSay(){

$data = $this -> getContent();

preg_match_all("/url="(.*)"/Ui",$data,$pronounced);

return array(

'en' => $pronounced[1][0],

'us' => $pronounced[1][1]

);

}

 

/**

* Get Baidu translation examples

* return array() multidimensional array Example

*

*/

 

private function getExample(){

$str = "";

$data = $this -> getContent();

preg_match_all("/var example_data = (.*)];/Us",$data,$example);

$data1 = "[[[".ltrim($example[1][0],"[");

$data2 = explode("[[[",$data1);

$num = count(array_filter($data2));

foreach($data2 as $key => $value){

$data3 = explode("[[","[[".$value);

foreach ($data3 as $k => $v) {

preg_match_all("/["(.*)",/Us","[".$v, $match);

if(!empty($match[1])){

$str .= implode($match[1]," ")."@";

}

}

}

$data4 = trim($str,"@");

$data5 = explode("@", $data4);

$result = array_chunk($data5, 2);

return $result;

}

 

/**

* Get a concise explanation

* return array (x => "Part of Speech", b => "Attachment")

*

**/

 

private function getExplain(){

$data = $this -> getContent();

preg_match_all("/id="en-simple-means">(.*)/Us",$data,$explain);

$r_data = $explain[1][0];

preg_match_all("/

(?P.*)(?P.*)

/Us", $r_data, $a_data);

preg_match_all("/(?P[^>]+):(?P.*)/Us", $r_data, $b_data);

 

$result = array();

foreach ($a_data["adj"] as $key => $value) {

$result[$value] = $a_data["name"][$key];

}

 

$word_b = array();

foreach ($b_data["tag"] as $key => $value) {

$word_b[$value] = strip_tags($b_data["word"][$key]);

}

 

$result_data = array("x" => $result,"b" => $word_b);

 

return $result_data;

}

 

 

/**

* Get synonyms

* return array(0 => "Synonym", 1 => "Antonym") usually a multi-dimensional array

*

*/

 

private function getSynonym(){

$data = $this -> getContent();

preg_match_all("/id="en-syn-ant">(.*)/Us",$data,$synonym);

$content = $synonym[1][0];

$data1 = explode("", $content);

$result = array();

$data2 = array();

foreach ($data1 as $key => $value) {

preg_match_all("/(?P.*) ;

    (?.*)
/Us", $value, $r_data);

$data2[$key]["adj"] = $r_data["adj"];

$data2[$key]["content"] = $r_data["content"];

}

 

foreach ($data2 as $key => $value) {

foreach ($value["content"] as $k => $v) {

if(!empty($v)){

preg_match_all("/

  • (?P.*)</p>(?P<value>.*)</li>/Us", $v, $v_data);</p> <p>foreach ($v_data['title'] as $m => $d) {</p> <p>$data = strip_tags(preg_replace("<</a>>"," ", $v_data["value"][$m]));</p> <p>$result[$key][$value["adj"][$k]][$d] = $data;</p> <p>}</p> <p>}</p> <p>}</p> <p>}</p> <p>return $result;</p> <p>}</p> <p> </p> <p>/**</p> <p>* Get phrase phrase</p> <p>* return array (key => value) one-dimensional or multi-dimensional array</p> <p>*</p> <p>*/</p> <p> </p> <p>private function getPhrase(){</p> <p>$num = self::$num;</p> <p>$data = $this -> getContent();</p> <p>preg_match_all("/id="en-phrase">(.*)<div class="source">/Us",$data,$phrase);</p> <p>$data = explode("</dd>",$phrase[1][0]);</p> <p>$data1 = array_slice($data,0,$num);</p> <p>$result = array();</p> <p>foreach ($data1 as $key => $value) {</p> <p>$data2 = explode("</p>", $value);</p> <p>$n = count($data2);</p> <p>if($n<=3){</p> <p>$result[str_replace(" ","",strip_tags($data2[0]))] = strip_tags($data2[1]);</p> <p>}else{</p> <p>$data3 = array_slice($data2,0,$n-1);</p> <p>$data4 = array_slice($data2,0,2);</p> <p>$res = array_diff($data3,$data4);</p> <p>$data5 = array_chunk($res,2);</p> <p>$key_value = trim(str_replace(" ","",strip_tags($data4[0])));</p> <p>$result[$key_value] = strip_tags($data4[1]);</p> <p>foreach ($data5 as $key => $value) {</p> <p>foreach ($value as $k => $v) {</p> <p>$value[$k] = strip_tags($v);</p> <p>}</p> <p>$array = array($result[$key_value],$value);</p> <p>if (array_key_exists($key_value, $result)){</p> <p>$result[$key_value] = $array;</p> <p>}</p> <p>}</p> <p> </p> <p>}</p> <p>}</p> <p>return $result;</p> <p>}</p> <p> </p> <p>/**</p> <p>* Convert array to string</p> <p>*</p> <p>* @param array $data array</p> <p>* @param bool $isformdata If it is 0, new_stripslashes processing is not used, optional parameter, default is 1</p> <p>* @return string Returns a string, if data is empty, then returns empty </p> <p>*/</p> <p>private function array2string($data, $isformdata = 1) {</p> <p>if($data == '') return '';</p> <p>if($isformdata) $data = $this->new_stripslashes($data);</p> <p>return addslashes(var_export($data, TRUE));</p> <p>}</p> <p> </p> <p>/**</p> <p>* Returns the string or array processed by stripslashes</p> <p>* @param $string The string or array to be processed</p> <p>* @return mixed</p> <p>*/</p> <p>private function new_stripslashes($string) {</p> <p>if(!is_array($string)) return stripslashes($string);</p> <p>foreach($string as $key => $val) $string[$key] = $this->new_stripslashes($val);</p> <p>return $string;</p> <p>}</p> <p> </p> <p>}</p> <p> </p> <p>// $word = new dict("express");</p> <p>// $word ->content();</p> </td> </tr> </tbody> </table> <p>The above is the entire content of this article. It is a very practical function. I hope you will like it. </p> <p align="left"></p> <div style="display:none;"> <span id="url" itemprop="url">http://www.bkjia.com/PHPjc/950763.html</span><span id="indexUrl" itemprop="indexUrl">www.bkjia.com</span><span id="isOriginal" itemprop="isOriginal">true</span><span id="isBasedOnUrl" itemprop="isBasedOnUrl">http: //www.bkjia.com/PHPjc/950763.html</span><span id="genre" itemprop="genre">TechArticle</span><span id="description" itemprop="description">PHP production of Baidu dictionary word search collector This article mainly introduces the relevant aspects of PHP production of Baidu dictionary word search collector For information, friends who need it can refer to Baidu dict collection sample writing...</span> </div> </div> <div class="art_confoot"></div> </div> </div></div><div class="nphpQianMsg"><div class="clear"></div></div><div class="nphpQianSheng"><span>Statement:</span><div>The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn</div></div></div><div class="nphpSytBox"><span>Previous article:<a class="dBlack" title="Summary of PHP file upload problems, _PHP tutorial" href="http://m.php.cn/faq/292159.html">Summary of PHP file upload problems, _PHP tutorial</a></span><span>Next article:<a class="dBlack" title="Summary of PHP file upload problems, _PHP tutorial" href="http://m.php.cn/faq/292161.html">Summary of PHP file upload problems, _PHP tutorial</a></span></div><div class="nphpSytBox2"><div class="nphpZbktTitle"><h2>Related articles</h2><em><a href="http://m.php.cn/article.html" class="bBlack"><i>See more</i><b></b></a></em><div class="clear"></div></div><ins class="adsbygoogle" style="display:block" data-ad-format="fluid" data-ad-layout-key="-6t+ed+2i-1n-4w" data-ad-client="ca-pub-5902227090019525" data-ad-slot="8966999616"></ins><script> (adsbygoogle = window.adsbygoogle || []).push({}); </script><ul class="nphpXgwzList"><li><b></b><a href="http://m.php.cn/faq/1.html" title="How to use cURL to implement Get and Post requests in PHP" class="aBlack">How to use cURL to implement Get and Post requests in PHP</a><div class="clear"></div></li><li><b></b><a href="http://m.php.cn/faq/1.html" title="How to use cURL to implement Get and Post requests in PHP" class="aBlack">How to use cURL to implement Get and Post requests in PHP</a><div class="clear"></div></li><li><b></b><a href="http://m.php.cn/faq/1.html" title="How to use cURL to implement Get and Post requests in PHP" class="aBlack">How to use cURL to implement Get and Post requests in PHP</a><div class="clear"></div></li><li><b></b><a href="http://m.php.cn/faq/1.html" title="How to use cURL to implement Get and Post requests in PHP" class="aBlack">How to use cURL to implement Get and Post requests in PHP</a><div class="clear"></div></li><li><b></b><a href="http://m.php.cn/faq/2.html" title="All expression symbols in regular expressions (summary)" class="aBlack">All expression symbols in regular expressions (summary)</a><div class="clear"></div></li></ul></div></div><ins class="adsbygoogle" style="display:block" data-ad-format="autorelaxed" data-ad-client="ca-pub-5902227090019525" data-ad-slot="5027754603"></ins><script> (adsbygoogle = window.adsbygoogle || []).push({}); </script><footer><div class="footer"><div class="footertop"><img src="/static/imghwm/logo.png" alt=""><p>Public welfare online PHP training,Help PHP learners grow quickly!</p></div><div class="footermid"><a href="http://m.php.cn/about/us.html">About us</a><a href="http://m.php.cn/about/disclaimer.html">Disclaimer</a><a href="http://m.php.cn/update/article_0_1.html">Sitemap</a></div><div class="footerbottom"><p> © php.cn All rights reserved </p></div></div></footer><script>isLogin = 0;</script><script type="text/javascript" src="/static/layui/layui.js"></script><script type="text/javascript" src="/static/js/global.js?4.9.47"></script></div><script src="https://vdse.bdstatic.com//search-video.v1.min.js"></script><link rel='stylesheet' id='_main-css' href='/static/css/viewer.min.css' type='text/css' media='all'/><script type='text/javascript' src='/static/js/viewer.min.js?1'></script><script type='text/javascript' src='/static/js/jquery-viewer.min.js'></script><script>jQuery.fn.wait = function (func, times, interval) { var _times = times || -1, //100次 _interval = interval || 20, //20毫秒每次 _self = this, _selector = this.selector, //选择器 _iIntervalID; //定时器id if( this.length ){ //如果已经获取到了,就直接执行函数 func && func.call(this); } else { _iIntervalID = setInterval(function() { if(!_times) { //是0就退出 clearInterval(_iIntervalID); } _times <= 0 || _times--; //如果是正数就 -- _self = $(_selector); //再次选择 if( _self.length ) { //判断是否取到 func && func.call(_self); clearInterval(_iIntervalID); } }, _interval); } return this; } $("table.syntaxhighlighter").wait(function() { $('table.syntaxhighlighter').append("<p class='cnblogs_code_footer'><span class='cnblogs_code_footer_icon'></span></p>"); }); $(document).on("click", ".cnblogs_code_footer",function(){ $(this).parents('table.syntaxhighlighter').css('display','inline-table');$(this).hide(); }); $('.nphpQianCont').viewer({navbar:true,title:false,toolbar:false,movable:false,viewed:function(){$('img').click(function(){$('.viewer-close').trigger('click');});}}); </script></body></html>