Home >Java >javaTutorial >Example of selenium processing Jiexian sliding verification code in Java
本篇文章主要介绍了Java selenium处理极验滑动验证码示例,小编觉得挺不错的,现在分享给大家,也给大家做个参考。一起跟随小编过来看看吧
要爬取一个网站遇到了极验的验证码,这周都在想着怎么破解这个,网上搜了好多知乎上看到有人问了这问题,我按照这思路去大概实现了一下。
1.使用htmlunit(这种方式我没成功,模拟鼠标拖拽后轨迹没生成,可以跳过)
我用的是java,我首先先想到了用直接用htmlunit,我做了点初始化
private void initWebClient() { if (webClient != null) { return; } webClient = new WebClient(BrowserVersion.FIREFOX_24); webClient.getOptions().setProxyConfig(new ProxyConfig("127.0.0.1",8888)); webClient.getOptions().setActiveXNative(true); webClient.getOptions().setUseInsecureSSL(true); // 配置证书 webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setCssEnabled(true); webClient.setCssErrorHandler(new SilentCssErrorHandler()); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); CookieManager cookieManager = new CookieManager(); List<org.apache.http.cookie.Cookie> httpCookies = client.getCookies();//其方式获取的cookie for (org.apache.http.cookie.Cookie cookie : httpCookies) { cookieManager.addCookie(new com.gargoylesoftware.htmlunit.util.Cookie(cookie)); } webClient.setCookieManager(cookieManager); }
初始化代理,cookie..然后就能正常调用了
HtmlPage page = webClient.getPage("http://www.qixin.com/login");//企信宝 gePageInfor(page);
下面就是我获取图片,还原图片并且模拟拖拽,(这里我觉得是有些问题的,可能是拖拽我模拟的不对导致触发的js并没有生成正确的轨迹,还请大家帮忙看看哪里错了)
private void gePageInfor(HtmlPage page) { String[] img_slice={"p", "class", "gt_cut_fullbg_slice"}; String[] img_bg_slice={"p", "class", "gt_cut_bg_slice"}; Htmlpision p = (Htmlpision) page.getElementById("captcha"); int deCAPTCHA = 0; try { byte[] img_slice_binary = client.get(getImgUrl(img_slice, p, true)).getBinary();//获取图片byte byte[] img_bg_slice_binary = client.get(getImgUrl(img_bg_slice, p, false)).getBinary(); //获取还原后的图片 BufferedImage geetestImg = ImgTest.getGeetestImg(img_slice_binary, ImgTest.imgArray); BufferedImage geetestImg2 = ImgTest.getGeetestImg(img_bg_slice_binary, ImgTest.imgArray); //获得图片移动位置(目前还有问题,需改用第三方图片识别) deCAPTCHA =ImgTest.deCAPTCHA(geetestImg,geetestImg2); System.out.println(deCAPTCHA); } catch (IOException | FetchException e) { e.printStackTrace(); } Htmlpision p_slider_knob = get_p_slider_knob(page,"gt_slider_knob gt_show");//获取要移动p HtmlPage mouseOver = (HtmlPage) p_slider_knob.mouseOver(); HtmlPage mouseDownPage = (HtmlPage)p_slider_knob.mouseDown(); p_slider_knob = get_p_slider_knob(mouseDownPage,"gt_slider_knob gt_show moving"); mouseMoveX(deCAPTCHA, p_slider_knob, mouseDownPage); HtmlPage newPage =(HtmlPage)p_slider_knob.mouseOver(); // newPage =(HtmlPage)p_slider_knob.mouseDown(); System.out.println(newPage.asXml()); p = (Htmlpision)newPage.getElementById("captcha"); HtmlElement htmlElement = p.getElementsByAttribute("p", "class", "gt_slice gt_show moving").get(0); System.out.println(htmlElement); newPage =(HtmlPage)p_slider_knob.mouseUp();//触发js,轨迹没有生成 System.out.println("---------------"); System.out.println(newPage.asXml()); if (newPage.getElementById("captcha")!=null) {//错误重试 //gePageInfor(newPage); } } private void mouseMoveX(int deCAPTCHA, Htmlpision p_slider_knob, HtmlPage mouseDown) { MouseEvent mouseEvent = new MouseEvent(p_slider_knob, MouseEvent.TYPE_MOUSE_MOVE, false, false, false, MouseEvent.BUTTON_LEFT); mouseEvent.setClientX( mouseEvent.getClientX()+((deCAPTCHA!=0)?deCAPTCHA:99)); //移动x坐标 ScriptResult scriptResult = mouseDown.getDocumentElement().fireEvent(mouseEvent); } private Htmlpision get_p_slider_knob(HtmlPage page,String classString) { return (Htmlpision)(((Htmlpision) page.getElementById("captcha")).getElementsByAttribute("p", "class", classString).get(0)); } private String getImgUrl(String[] img_slice, Htmlpision p, boolean isNeedCheckPostion) { String url =""; int[] postion = new int[2]; boolean empty = p.getElementsByAttribute(img_slice[0],img_slice[1],img_slice[2]).isEmpty(); if (p.hasChildNodes() && !empty) { List<HtmlElement> elementsByAttribute = p.getElementsByAttribute(img_slice[0],img_slice[1],img_slice[2]); for(int i = 0;i<elementsByAttribute.size();i++){ Htmlpision p_img = (Htmlpision)elementsByAttribute.get(i); String style = p_img.getAttribute("style"); String[] imge_url_position = style.split(";"); if(StringUtils.isBlank(url)){//确认url url = StringUtils.replacePattern(imge_url_position[0], ".*\\(", "").replace(")", ""); } if (isNeedCheckPostion) {//确认图片切割postion,两张图切割方式一样 background-position: -157px -58px // String[] positionS = StringUtils.split(StringUtils.remove(imge_url_position[1], "px").replace("-", "").replaceAll(".*:", ""), null); String[] positionS = StringUtils.split(StringUtils.removePattern(imge_url_position[1], "[^\\d+ \\s]"),null); postion[0] = Integer.parseInt(positionS[0]); postion[1] = Integer.parseInt(positionS[1]); int[] is = ImgTest.imgArray[i]; if (is[0]!=postion[0]||is[1]!=postion[1]) { logger.debug("更新分割postion"); ImgTest.imgArray[i] = postion; } System.out.println(ImgTest.imgArray); isNeedCheckPostion= false; } } } return url; }
对比图片获取位移方法(deCAPTCHA)是错的我就不放代码了,下面是其中还原图片用的方法,目前是其实审查元素后你就明白怎么还原这个图片了,这里是每次读的10px,58px
public static BufferedImage getGeetestImg(byte[] binary, int[][] imgArray) throws IOException { BufferedImage img = ImageIO.read(new ByteArrayInputStream(binary)); List<BufferedImage> list = new ArrayList<>(); for (int i=0;i< imgArray.length;i++) { BufferedImage subimage = img.getSubimage(imgArray[i][0], imgArray[i][1], 10, 58); list.add(subimage); // ImageIO.write(subimage, "jpg", new File("d:\\image\\imgs"+i+".jpg")); } BufferedImage mergeImageUp = null; BufferedImage mergeImageDown = null; int mid = list.size()>>>1; for (int i = 0; i <mid-1 ; i++) { mergeImageUp = mergeImage(mergeImageUp==null?list.get(i):mergeImageUp, list.get(i+1), true); } for(int i = mid;i<list.size()-1;i++){ mergeImageDown = mergeImage(mergeImageDown==null?list.get(i):mergeImageDown,list.get(i+1), true); } img = mergeImage(mergeImageUp, mergeImageDown, false); return img; } public static BufferedImage mergeImage(BufferedImage img1, BufferedImage img2, boolean isHorizontal) throws IOException { int w1 = img1.getWidth(); int h1 = img1.getHeight(); int w2 = img2.getWidth(); int h2 = img2.getHeight(); // 从图片中读取RGB int[] ImageArrayOne = new int[w1 * h1]; ImageArrayOne = img1.getRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 逐行扫描图像中各个像素的RGB到数组中 int[] ImageArrayTwo = new int[w2 * h2]; ImageArrayTwo = img2.getRGB(0, 0, w2, h2, ImageArrayTwo, 0, w2); // 生成新图片 BufferedImage DestImage = null; if (isHorizontal) { // 水平方向合并 DestImage = new BufferedImage(w1+w2, h1, BufferedImage.TYPE_INT_RGB); DestImage.setRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 设置上半部分或左半部分的RGB DestImage.setRGB(w1, 0, w2, h2, ImageArrayTwo, 0, w2); } else { // 垂直方向合并 DestImage = new BufferedImage(w1, h1 + h2, BufferedImage.TYPE_INT_RGB); DestImage.setRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 设置上半部分或左半部分的RGB DestImage.setRGB(0, h1, w2, h2, ImageArrayTwo, 0, w2); // 设置下半部分的RGB } return DestImage; }
2.使用selenium
后来我想着是我模拟鼠标这个动作哪里有问题,我就又找到了selenium(2.42.2),他也能操作htmlunit关键他的鼠标动作好像封装比较完全
但是我尝试了以后发现了这个,HtmlUnitMouse这个动作没有实现
public void mouseMove(Coordinates where, long xOffset, long yOffset) { throw new UnsupportedOperationException("Moving to arbitrary X,Y coordinates not supported."); }
好吧,于是调用chrome吧
System.setProperty("webdriver.chrome.driver","C:\\chromedriver.exe"); Proxy proxy = new Proxy(); //设置代理服务器地址 proxy.setHttpProxy("127.0.0.1:8888"); // DesiredCapabilities capabilities = DesiredCapabilities.htmlUnitWithJs(); DesiredCapabilities capabilities = DesiredCapabilities.chrome(); capabilities.setCapability(CapabilityType.PROXY, proxy); // final WebDriver driver = new HtmlUnitDriver(capabilities); WebDriver driver = new ChromeDriver(capabilities); driver.get("http://www.qixin.com/login"); driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS); checkPage(driver,"return $('.gt_cut_fullbg_slice');"); // 获取 网页的 title System.out.println("1 Page title is: " + driver.getTitle()); // 通过 id 找到 input 的 DOM String pageSource = driver.getPageSource(); System.out.println(pageSource); org.openqa.selenium.JavascriptExecutor executor = (org.openqa.selenium.JavascriptExecutor)driver; boolean equals = executor.executeScript("return document.readyState").equals("complete"); int moveX =99;//移动位置 if (equals) { WebElement element = driver.findElement(By.className("gt_slider_knob"));//(".gt_slider_knob")); Point location = element.getLocation(); element.getSize(); Actions action = new Actions(driver); // action.clickAndHold().perform();// 鼠标在当前位置点击后不释放 // action.clickAndHold(element).perform();// 鼠标在 onElement 元素的位置点击后不释放 // action.clickAndHold(element).moveByOffset(location.x+99,location.y).release().perform(); //选中source元素->拖放到(xOffset,yOffset)位置->释放左键 action.dragAndDropBy(element, location.x+moveX,location.y).perform(); // action.dragAndDrop(element,newelement).perform(); pageSource = driver.getPageSource(); } //更新cookie Set<org.openqa.selenium.Cookie> cookies = driver.manage().getCookies(); Set<Cookie> cookies2 = new HashSet<>(); for (org.openqa.selenium.Cookie cookie : cookies) { cookies2.add((Cookie) new Cookie(cookie.getDomain(), cookie.getName(), cookie.getValue(), cookie.getPath(), cookie.getExpiry(), true)); } for (Cookie cookie : cookies2) { org.apache.http.cookie.Cookie httpClient = cookie.toHttpClient(); } System.out.println(pageSource);
这样提交的表单确实是有轨迹的,这里移动位置我先写了个固定值,可以由上面图片还原,以及一些开源的图片识别工具识别出位置。以上应该就能解决这个滑动验证码了
The above is the detailed content of Example of selenium processing Jiexian sliding verification code in Java. For more information, please follow other related articles on the PHP Chinese website!