<code>import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.SocketTimeoutException;
import java.util.ArrayList;
import java.util.
Date
;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
public
class
GithubRepoPageProcessor
implements
PageProcessor {
jxl.Workbook readwb=null;
String[] a=
new
String[]{};
Goodsdata gd=
new
Goodsdata();
DatabaseControl dc=
new
DatabaseControl();
static
ArrayList<String>list=
new
ArrayList<String>();
private
Site site = Site.me().setRetryTimes(3).setSleepTime(100).setCharset(
"Shift_JIS"
);
public
void process(Page page) {
String todey_status=
""
;
String maker_no=
""
;
String oem_no=
""
;
String color=
""
;
String material=
""
;
String size=
""
;
String innerGoods=
""
;
String rightMor=
""
;
String warning=
""
;
String introduction=
""
;
String referedGoods=
""
;
String similiarGoods=
""
;
String similiarGoodscheck=
""
;
maker_no=page.getHtml().xpath(
"//p[1]//p[2]//p[2]//table[5]//tbody//tr[4]//td//table//tbody//tr[2]//td[1]/text()"
).get();
oem_no=page.getHtml().xpath(
"//p[1]//p[2]//p[2]//table[5]//tbody//tr[4]//td//table//tbody//tr[2]//td[2]/text()"
).get();
color=page.getHtml().xpath(
"//p[1]//p[2]//p[2]//table[5]//tbody//tr[4]//td//table//tbody//tr[4]//td[1]/text()"
).get();
material=page.getHtml().xpath(
"//p[1]//p[2]//p[2]//table[5]//tbody//tr[4]//td//table//tbody//tr[4]//td[2]/text()"
).get();
size=page.getHtml().xpath(
"//p[1]//p[2]//p[2]//table[5]//tbody//tr[4]//td//table//tbody//tr[6]//td/text()"
).get();
innerGoods=page.getHtml().xpath(
"//p[1]//p[2]//p[2]//table[5]//tbody//tr[4]//td//table//tbody//tr[8]//td/text()"
).get();
rightMor=page.getHtml().xpath(
"//p[1]//p[2]//p[2]//table[5]//tbody//tr[4]//td//table//tbody//tr[10]//td/text()"
).get();
warning=page.getHtml().xpath(
"//p[1]//p[2]//p[2]//table[5]//tbody//tr[4]//td//table//tbody//tr[12]//td/text()"
).get();
introduction=page.getHtml().xpath(
"//p[1]//p[2]//p[2]//table[5]//tbody//tr[4]//td//table//tbody//tr[14]//td/text()"
).get();
String todey_status_check=page.getHtml().xpath(
"//p[1]//p[2]//p[2]//table[4]//tbody//tr//td"
).get();
if
(todey_status_check.contains(
"売り切れ中です。"
)){
todey_status=
"0"
;
}
else
{
String[] str=null;
str=todey_status_check.split(
">"
);
todey_status=RegexString(str[str.length-2],
"\\d{1,2}"
);
}
String html=page.getHtml().toString();
a=html.split(
"\n"
);
if
(page.getHtml()
.xpath(
"//p[1]//p[2]//p[2]//table[6]//tbody//tr[1]//td//table//tbody//tr[1]//th"
)
.match()){
for
(int i=0;i<a.length;i++){
if
(!a[i].contains(
"この商品の関連商品"
)){
continue
;
}
else
{
for
(int j=i+1;j<a.length;j++){
if
(a[j].contains(
"</table>"
)){
referedGoods=referedGoods.substring(0, referedGoods.length()-1);
break
;
}
else
{
if
(a[j].contains(
"商品番号"
)){
String regEx=
"\\d{6}|\\b\\w{2,3}\\d{3,4}"
;
referedGoods=referedGoods+
"nf-"
+RegexString(a[j],regEx)+
":"
;
}
}
}
}
}
}
if
(page.getHtml()
.xpath(
"//p[1]//p[2]//p[2]//table[6]//tbody//tr[2]//td//table//tbody//tr[1]//th//strong"
)
.match()) {
similiarGoodscheck = page.getHtml()
.xpath(
"//p[1]//p[2]//p[2]//table[6]//tbody//tr[2]//td//table//tbody//tr[1]//th//strong/text()"
)
.get();
for
(int i = 0; i < a.length; i++) {
if
(!a[i].contains(similiarGoodscheck)) {
continue
;
}
else
{
for
(int j = i + 1; j < a.length; j++) {
if
(a[j].contains(
"</table>"
)) {
similiarGoods = similiarGoods.substring(0, similiarGoods.length() - 1);
break
;
}
else
{
if
(a[j].contains(
"商品番号"
)) {
String regEx =
"\\d{6}|\\b\\w{2,3}\\d{3,4}"
;
similiarGoods = similiarGoods +
"nf-"
+ RegexString(a[j], regEx) +
":"
;
}
}
}
}
}
}
gd.setMaker_no(maker_no);
gd.setOem_no(oem_no);
gd.setColor(color);
gd.setMaterial(material);
gd.setSize(size);
gd.setInnerGoods(innerGoods);
gd.setRightMor(rightMor);
gd.setWarning(warning);
gd.setIntroduction(introduction);
gd.setReferedGoods(referedGoods);
gd.setSimiliarGoods(similiarGoods);
}
public
String RegexString(String targetStr,String patternStr){
String goodsnum=null;
Pattern pt=Pattern.compile(patternStr);
Matcher matcher=pt.matcher(targetStr);
boolean rs=matcher.find();
if
(rs){
goodsnum=matcher.group();
}
return
goodsnum;
}
public
Site getSite() {
return
site;
}
public
void openXls() throws BiffException, IOException{
try
{
int column=0;
InputStream instream=
new
FileInputStream(
"C:\\Users\\xujio\\Desktop\\itemdatabase_neo.xls"
);
readwb=Workbook.getWorkbook(instream);
Sheet readsheet =readwb.getSheet(0);
int rsColumn=readsheet.getColumns();
int rsRows=readsheet.getRows();
for
(int j=0;j<rsColumn;j++){
Cell cell=readsheet.getCell(j, 0);
if
(cell.getContents().equals(
"管理番号"
)){
column=j;
break
;
}
}
for
(int i=1;i<rsRows;i++){
String originNum=null;
Cell cell=readsheet.getCell(column,i);
originNum=cell.getContents();
String[] numGoods=originNum.split(
"-"
);
list.add(numGoods[1]);
}
}
catch
(Exception e) {
e.printStackTrace();
}finally{
readwb.close();
}
}
public
static
void main(String[] args) {
int check=0;
String strNum=null;
try
{
new
GithubRepoPageProcessor().openXls();
}
catch
(BiffException e) {
e.printStackTrace();
}
catch
(IOException e) {
e.printStackTrace();
}
for
(int i=0;i<5;i++){
strNum=list.get(i);
String url=
"http://www.neofactory.co.jp/product_detail/"
+list.get(i)+
"/"
;//获取相关商品代号下的网页的地址
Spider.create(
new
GithubRepoPageProcessor()).addUrl(url).thread(5).run();
}
}
}</code>