功能其实很见简单,通过 phantomjs.exe 采集 url 加载的资源,通过子进程的方式,启动nodejs 加载所有的资源,对于css的资源,匹配css内容,下载里面的url资源
当然功能还是很简单的,在响应式设计和异步加载的情况下,还是有很多资源没有能够下载,需要根据实际情况处理下
首先当然是下载 nodejs 和 phantomjs
下面是 phantomjs.exe 执行的 down.js
var page = require('webpage').create(), system = require('system'); var spawn = require("child_process").spawn if (system.args.length === 1) { console.log('Usage: netsniff.js <some URL>'); phantom.exit(1); } else { var urls = []; page.address = system.args[1]; page.onResourceReceived = function (res) { if (res.stage === 'start') { urls.push(res.url); } }; page.open(page.address, function (status) { var har; if (status !== 'success') { console.log('FAIL to load the address'); phantom.exit(1); } else { console.log('down resource ' + urls.length + ' urls.'); var child = spawn("node", ["--harmony", "downHtml.js", urls.join(',')]) child.stdout.on("data", function (data) { console.log(data); }) child.stderr.on("data", function (data) { console.log(data); }) child.on("exit", function (code) { phantom.exit(); }) } }); }
下面是对应的node运行的 downHtml.js
"use strict"; var fs = require('fs'); var http = require('http'); var path = require('path'); var r_url = require('url'); var dirCache = {};//缓存减少判断 function makedir (pathStr, callback) { if (dirCache[pathStr] == 1) { callback(); } else { fs.exists(pathStr, function (exists) { if (exists == true) { dirCache[pathStr] == 1; callback(); } else { makedir(path.dirname(pathStr), function () { fs.mkdir(pathStr, function () { dirCache[pathStr] == 1; callback(); }) }); } }) } }; var reg = /[:,]\s*url\(['"]?.*?(\1)\)/g var reg2 = /\((['"]?)(.*?)(\1)\)/ var isDownMap = {}; var downImgFromCss = function (URL) { http.get(URL, function(res) { //console.log(path.resolve(process.cwd(), 'index.min.css')) //res.pipe(fs.createWriteStream(path.resolve(process.cwd(), 'index.min.css'))); var body = ""; res.setEncoding('utf8'); res.on('data', function (chunk) { body += chunk; }); res.on('end', function () { var match = body.match(reg); for (var i = 0, len = match.length; i < len; i++){ var m = match[i].match(reg2); if (m && m[2]) { var url = m[2]; let imgUrl = r_url.resolve(URL, url); if (!isDownMap[imgUrl]) { var uo = r_url.parse(imgUrl); let filepath = CWD + '/' + uo.hostname + uo.pathname; makedir(path.dirname(filepath), function () { http.get(imgUrl, function (res) { res.pipe(fs.createWriteStream(filepath)); }) }) isDownMap[imgUrl] = 1; } } } }); }); } var URLS = process.argv[2].split(','); var CWD = process.cwd(); //下载资源 URLS.forEach(function (URL) { var uo = r_url.parse(URL); var filepath; if (uo.pathname == '/' || uo.pathname == '') { filepath = CWD + '/' + uo.hostname + '/index.html'; } else { filepath = CWD + '/' + uo.hostname + uo.pathname; } makedir(path.dirname(filepath), function () { http.get(URL, function (res) { if (URL.indexOf('.css') != -1 || (res.headers["content-type"] && res.headers["content-type"].indexOf('text/css')!= -1)) { console.log('down images form css file:' + URL + '.'); downImgFromCss(URL); } res.pipe(fs.createWriteStream(filepath)); }) }); });
down.js downHtml.js 放在同一个文件夹下 通过下列 cmd 运行
D:\phantomjs-2.0.0-windows\bin\phantomjs.exe down.js http://www.youku.com/
以上所述就是本文的全部内容了,希望大家能够喜欢。

Vercel是什么?本篇文章带大家了解一下Vercel,并介绍一下在Vercel中部署 Node 服务的方法,希望对大家有所帮助!

gm是基于node.js的图片处理插件,它封装了图片处理工具GraphicsMagick(GM)和ImageMagick(IM),可使用spawn的方式调用。gm插件不是node默认安装的,需执行“npm install gm -S”进行安装才可使用。

今天跟大家介绍一个最新开源的 javaScript 运行时:Bun.js。比 Node.js 快三倍,新 JavaScript 运行时 Bun 火了!

大家都知道 Node.js 是单线程的,却不知它也提供了多进(线)程模块来加速处理一些特殊任务,本文便带领大家了解下 Node.js 的多进(线)程,希望对大家有所帮助!

在nodejs中,lts是长期支持的意思,是“Long Time Support”的缩写;Node有奇数版本和偶数版本两条发布流程线,当一个奇数版本发布后,最近的一个偶数版本会立即进入LTS维护计划,一直持续18个月,在之后会有12个月的延长维护期,lts期间可以支持“bug fix”变更。

node怎么爬取数据?下面本篇文章给大家分享一个node爬虫实例,聊聊利用node抓取小说章节的方法,希望对大家有所帮助!


Hot AI Tools

Undresser.AI Undress
AI-powered app for creating realistic nude photos

AI Clothes Remover
Online AI tool for removing clothes from photos.

Undress AI Tool
Undress images for free

Clothoff.io
AI clothes remover

AI Hentai Generator
Generate AI Hentai for free.

Hot Article

Hot Tools

Zend Studio 13.0.1
Powerful PHP integrated development environment

EditPlus Chinese cracked version
Small size, syntax highlighting, does not support code prompt function

Dreamweaver Mac version
Visual web development tools

Atom editor mac version download
The most popular open source editor

mPDF
mPDF is a PHP library that can generate PDF files from UTF-8 encoded HTML. The original author, Ian Back, wrote mPDF to output PDF files "on the fly" from his website and handle different languages. It is slower than original scripts like HTML2FPDF and produces larger files when using Unicode fonts, but supports CSS styles etc. and has a lot of enhancements. Supports almost all languages, including RTL (Arabic and Hebrew) and CJK (Chinese, Japanese and Korean). Supports nested block-level elements (such as P, DIV),
