# node抓取网络图片
有些朋友问,你的一些资源是怎么获取的?
难道是每天逛网站一张一张的手动保存图片吗?当然不是了,如果那样岂不是要累死,每天耗费大量精力。今天做一个小教程,爬取https://www.3gbizhi.com/wallMV/ (opens new window)
- 本次使用node爬虫批量获取,其他资源同样道理..你懂的
- 所有操作在window10下完成
- mac请自行百度,请提前搭建好node环境。
第一步:cd到根目录安装所有依赖
npm install
第二部:启动爬虫,接着往下看
node index.js
所用到代码文件:File.js--对常用文件处理的封装
const fs = require("fs");
(function () {
/**
* 读取文件
* @param {String} path
* @param {String} encoding
*/
function readFile(path, encoding = "UTF-8") {
encoding = encoding || "UTF-8"
return new Promise((resolve, reject) => {
fs.readFile(path, (error, data) => {
if (error) {
reject(error);
} else {
console.log("readFile success: " + path);
resolve(data.toString());
}
});
})
}
/**
* 写入文件
* @param {string} path
* @param {string} encoding
*/
function writeFile(path, encoding = "UTF-8") {
encoding = encoding || "UTF-8"
return new Promise((resolve, reject) => {
fs.writeFile(path, encoding, (error) => {
if (error) {
reject(error);
} else {
console.log("writeFile success" + option.path);
resolve();
}
})
})
}
/**
* 写入图片
* @param {string} path
* @param {string} encoding
*/
function writeFileImg(path, data, encoding) {
encoding = encoding || "binary"
return new Promise((resolve, reject) => {
fs.writeFile(path, data, encoding, (error) => {
if (error) {
reject(error);
} else {
console.log("writeFile success" + path);
resolve();
}
})
})
}
/**
* 拷贝文件
* @param {string} oldFile
* @param {string} newFile
*/
function copyFile(oldFile, newFile) {
return new Promise((resolve, reject) => {
fs.copyFile(oldFile, newFile, (error) => {
if (error) {
reject(error);
} else {
console.log(`oldFile: ${oldFile} newFile: ${newFile}`);
resolve();
}
})
})
}
/**
* 判断是否是存在文件 || 文件夹
* @param {string} path
*/
function exists(path) {
return fs.existsSync(path);
}
function mkdir(path) {
fs.mkdirSync(path);
}
module.exports = {
readFile,
writeFile,
copyFile,
exists,
writeFileImg,
mkdir
}
})()
所用到代码文件:index.js--主要逻辑处理文件
const file = require("./File");
const fs = require("fs");
const path = require("path");
const axios = require("axios");
const cheerio = require("cheerio");
const GetImg = {
url: "https://www.3gbizhi.com/wallMV/",
titleUrl: {
"日历": "http://www.netbian.com/rili/",
"动漫": "http://www.netbian.com/dongman/",
"风景": "http://www.netbian.com/fengjing/",
"美女": "http://www.netbian.com/meinv/",
"游戏": "http://www.netbian.com/youxi/",
"影视": "http://www.netbian.com/yingshi/",
"动态": "http://www.netbian.com/dongtai/",
"唯美": "http://www.netbian.com/weimei/",
"设计": "http://www.netbian.com/sheji/",
"可爱": "http://www.netbian.com/keai/",
"汽车": "http://www.netbian.com/qiche/",
"植物": "http://www.netbian.com/huahui/",
"动物": "http://www.netbian.com/dongwu/",
"节日": "http://www.netbian.com/jieri/",
"人物": "http://www.netbian.com/renwu/",
"水果": "http://www.netbian.com/shuiguo/",
"建筑": "http://www.netbian.com/jianzhu/",
"非主流": "http://www.netbian.com/feizhuliu/",
"王者荣耀": "http://www.netbian.com/s/wangzherongyao/",
"护眼": "http://www.netbian.com/s/huyan/",
"LOL": "http://www.netbian.com/s/lol/"
},
imgNum: 0,
/**
* @description 初始化运行
*/
async _init() {
await this.main();
},
/**
* @param {Boolean} filePath
* @description 检查目录是否存在
*/
isDirectory(filePath) {
return fs.statSync(path.join(__dirname), filePath).isDirectory();
},
request(url) {
try {
return new Promise((resolve, reject) => {
return axios({
url: url,
// `method` 是创建请求时使用的方法
method: 'get', // default
responseEncoding: 'utf8', // default
}).then((res) => {
// let str = iconv.decode(res.data,'utf8');
resolve(res.data);
})
})
} catch (error) {
cconsole.log("request", "出错误了");
return new Promise((resolve, reject) => {
return axios({
url: url,
// `method` 是创建请求时使用的方法
method: 'get', // default
responseEncoding: 'gbk', // default
}).then((res) => {
// let str = iconv.decode(res.data,'utf8');
resolve(res.data);
})
})
}
},
requestImg(url) {
return new Promise((resolve, reject) => {
return axios.get(url, {
responseType: 'arraybuffer'
}).then((res) => {
resolve(res.data);
}).catch(() => {
console.log("失败了");
reject(false)
})
})
},
async getImg(key, item) {
console.log("写入title:", key);
console.log("爬取目标", item);
const data = await this.request(item),
$ = cheerio.load(data),
a = $("#main > div.page > span.slh").next();
const pageImg = $("#main > div.list > ul > li").find("a");
for (let i = 0; i < pageImg.length; i++) {
const element = pageImg.eq(i).attr("href");
if (element.indexOf("desk") !== -1) {
const data2 = await this.request("http://www.netbian.com/" + element);
$2 = cheerio.load(data2);
const pageImg2 = $2("#main > div.endpage > div > p > a > img").attr("src");
console.log("图片地址:", pageImg2);
if (pageImg2 !== "") {
const imgData = await this.requestImg(pageImg2);
if (imgData === false) {
return false;
}
const imgFileName = pageImg2.split("/")[pageImg2.split("/").length - 1];
//判断是否有相同文件夹,写入图片
if (file.exists(`./images/${key}`)) {
await file.writeFileImg(`./images/${key}/${imgFileName}`, imgData);
} else {
file.mkdir(`./images/${key}`);
await file.writeFileImg(`./images/${key}/${imgFileName}`, imgData);
}
this.imgNum = this.imgNum + 1;
console.log("共写入" + this.imgNum);
}
}
}
},
async main() {
for (const key in this.titleUrl) {
if (Object.hasOwnProperty.call(this.titleUrl, key)) {
// try {
const item = this.titleUrl[key],
data = await this.request(item + "index.htm"),
$ = cheerio.load(data),
a = $("#main > div.page > span.slh").next();
console.log(parseInt(a.text()));
if (parseInt(a.text()) !== "NaN") {
for (let x = 0; x < parseInt(a.text()); x++) {
// 防止爬取失败,从爬取失败的位置重新爬取
if (key === "非主流") {
if (x > 5) {
console.log("第" + x + "页");
try {
await this.getImg(key, item + "index.htm");
} catch (error) {
continue;
}
}
} else {
if (x !== 0) {
console.log("第" + x + "页");
try {
await this.getImg(key, item + "index_" + (x + 1) + ".htm");
} catch (error) {
continue;
}
} else {
console.log("第" + x + "页");
try {
await this.getImg(key, item + "index.htm");
} catch (error) {
continue;
}
}
}
}
}
}
}
}
}
// run
GetImg._init();
所用到代码文件:package.json--配置文件
{
"name": "3g_wallpaper",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"axios": "^0.21.1",
"cheerio": "^1.0.0-rc.5",
"iconv-lite": "^0.6.2",
"utf8": "^3.0.0"
}
}
文件目录如下所示:
3g_wallpaper
images
index.js
File.js
package.json
最后声明:
本次教程只适用于https://www.3gbizhi.com/wallMV/ (opens new window) ,如果冒犯请及时联系我,并删除此贴。
文件流读写 →