# node抓取网络图片

有些朋友问,你的一些资源是怎么获取的?

难道是每天逛网站一张一张的手动保存图片吗?当然不是了,如果那样岂不是要累死,每天耗费大量精力。今天做一个小教程,爬取https://www.3gbizhi.com/wallMV/ (opens new window)

  • 本次使用node爬虫批量获取,其他资源同样道理..你懂的
  • 所有操作在window10下完成
  • mac请自行百度,请提前搭建好node环境。

第一步:cd到根目录安装所有依赖

npm install

第二部:启动爬虫,接着往下看

node index.js

所用到代码文件:File.js--对常用文件处理的封装

const fs = require("fs");

(function () {
    /**
     * 读取文件
     * @param {String} path 
     * @param {String} encoding 
     */
    function readFile(path, encoding = "UTF-8") {
        encoding = encoding || "UTF-8"
        return new Promise((resolve, reject) => {
            fs.readFile(path, (error, data) => {
                if (error) {
                    reject(error);
                } else {
                    console.log("readFile success: " + path);
                    resolve(data.toString());
                }
            });
        })
    }

    /**
     * 写入文件
     * @param {string} path 
     * @param {string} encoding 
     */
    function writeFile(path, encoding = "UTF-8") {
        encoding = encoding || "UTF-8"
        return new Promise((resolve, reject) => {
            fs.writeFile(path, encoding, (error) => {
                if (error) {
                    reject(error);
                } else {
                    console.log("writeFile success" + option.path);
                    resolve();
                }
            })
        })
    }

    /**
     * 写入图片
     * @param {string} path 
     * @param {string} encoding 
     */
    function writeFileImg(path, data, encoding) {
        encoding = encoding || "binary"
        return new Promise((resolve, reject) => {
            fs.writeFile(path, data, encoding, (error) => {
                if (error) {
                    reject(error);
                } else {
                    console.log("writeFile success" + path);
                    resolve();
                }
            })
        })
    }

    /**
     * 拷贝文件
     * @param {string} oldFile 
     * @param {string} newFile
     */
    function copyFile(oldFile, newFile) {
        return new Promise((resolve, reject) => {
            fs.copyFile(oldFile, newFile, (error) => {
                if (error) {
                    reject(error);
                } else {
                    console.log(`oldFile: ${oldFile}  newFile: ${newFile}`);
                    resolve();
                }
            })
        })
    }

    /**
     * 判断是否是存在文件 || 文件夹
     * @param {string} path
     */
    function exists(path) {
        return fs.existsSync(path);
    }

    function mkdir(path) {
        fs.mkdirSync(path);
    }

    module.exports = {
        readFile,
        writeFile,
        copyFile,
        exists,
        writeFileImg,
        mkdir
    }
})()

所用到代码文件:index.js--主要逻辑处理文件

const file = require("./File");
const fs = require("fs");
const path = require("path");
const axios = require("axios");
const cheerio = require("cheerio");

const GetImg = {
    url: "https://www.3gbizhi.com/wallMV/",
    titleUrl: {
        "日历": "http://www.netbian.com/rili/",
        "动漫": "http://www.netbian.com/dongman/",
        "风景": "http://www.netbian.com/fengjing/",
        "美女": "http://www.netbian.com/meinv/",
        "游戏": "http://www.netbian.com/youxi/",
        "影视": "http://www.netbian.com/yingshi/",
        "动态": "http://www.netbian.com/dongtai/",
        "唯美": "http://www.netbian.com/weimei/",
        "设计": "http://www.netbian.com/sheji/",
        "可爱": "http://www.netbian.com/keai/",
        "汽车": "http://www.netbian.com/qiche/",
        "植物": "http://www.netbian.com/huahui/",
        "动物": "http://www.netbian.com/dongwu/",
        "节日": "http://www.netbian.com/jieri/",
        "人物": "http://www.netbian.com/renwu/",
        "水果": "http://www.netbian.com/shuiguo/",
        "建筑": "http://www.netbian.com/jianzhu/",
        "非主流": "http://www.netbian.com/feizhuliu/",
        "王者荣耀": "http://www.netbian.com/s/wangzherongyao/",
        "护眼": "http://www.netbian.com/s/huyan/",
        "LOL": "http://www.netbian.com/s/lol/"
    },
    imgNum: 0,

    /**
     * @description 初始化运行
     */
    async _init() {
        await this.main();
    },
    /**
     * @param {Boolean} filePath
     * @description 检查目录是否存在
     */
    isDirectory(filePath) {
        return fs.statSync(path.join(__dirname), filePath).isDirectory();
    },
    request(url) {
        try {
            return new Promise((resolve, reject) => {
                return axios({
                    url: url,
                    // `method` 是创建请求时使用的方法
                    method: 'get', // default
                    responseEncoding: 'utf8', // default
                }).then((res) => {
                    // let str = iconv.decode(res.data,'utf8');
                    resolve(res.data);
                })
            })

        } catch (error) {
            cconsole.log("request", "出错误了");
            return new Promise((resolve, reject) => {
                return axios({
                    url: url,
                    // `method` 是创建请求时使用的方法
                    method: 'get', // default
                    responseEncoding: 'gbk', // default
                }).then((res) => {
                    // let str = iconv.decode(res.data,'utf8');
                    resolve(res.data);
                })
            })
        }
    },
    requestImg(url) {
        return new Promise((resolve, reject) => {
            return axios.get(url, {
                responseType: 'arraybuffer'
            }).then((res) => {
                resolve(res.data);
            }).catch(() => {
                console.log("失败了");
                reject(false)
            })
        })
    },
    async getImg(key, item) {
        console.log("写入title:", key);
        console.log("爬取目标", item);
        const data = await this.request(item),
            $ = cheerio.load(data),
            a = $("#main > div.page > span.slh").next();
        const pageImg = $("#main > div.list > ul > li").find("a");
        for (let i = 0; i < pageImg.length; i++) {
            const element = pageImg.eq(i).attr("href");
            if (element.indexOf("desk") !== -1) {
                const data2 = await this.request("http://www.netbian.com/" + element);
                $2 = cheerio.load(data2);
                const pageImg2 = $2("#main > div.endpage > div > p > a > img").attr("src");
                console.log("图片地址:", pageImg2);
                if (pageImg2 !== "") {
                    const imgData = await this.requestImg(pageImg2);
                    if (imgData === false) {
                        return false;
                    }
                    const imgFileName = pageImg2.split("/")[pageImg2.split("/").length - 1];
                    //判断是否有相同文件夹,写入图片
                    if (file.exists(`./images/${key}`)) {
                        await file.writeFileImg(`./images/${key}/${imgFileName}`, imgData);
                    } else {
                        file.mkdir(`./images/${key}`);
                        await file.writeFileImg(`./images/${key}/${imgFileName}`, imgData);
                    }
                    this.imgNum = this.imgNum + 1;
                    console.log("共写入" + this.imgNum);
                }
            }
        }

    },
    async main() {
        for (const key in this.titleUrl) {
            if (Object.hasOwnProperty.call(this.titleUrl, key)) {
                // try {
                const item = this.titleUrl[key],
                    data = await this.request(item + "index.htm"),
                    $ = cheerio.load(data),
                    a = $("#main > div.page > span.slh").next();

                console.log(parseInt(a.text()));
                if (parseInt(a.text()) !== "NaN") {
                    for (let x = 0; x < parseInt(a.text()); x++) {
                        // 防止爬取失败,从爬取失败的位置重新爬取
                        if (key === "非主流") {
                            if (x > 5) {
                                console.log("第" + x + "页");
                                try {
                                    await this.getImg(key, item + "index.htm");
                                } catch (error) {
                                    continue;
                                }
                            }
                        } else {
                            if (x !== 0) {
                                console.log("第" + x + "页");
                                try {
                                    await this.getImg(key, item + "index_" + (x + 1) + ".htm");
                                } catch (error) {
                                    continue;
                                }
                            } else {
                                console.log("第" + x + "页");
                                try {
                                    await this.getImg(key, item + "index.htm");
                                } catch (error) {
                                    continue;
                                }
                            }
                        }

                    }
                }
            }
        }
    }
}

// run
GetImg._init();

所用到代码文件:package.json--配置文件

{
  "name": "3g_wallpaper",
  "version": "1.0.0",
  "description": "",
  "main": "index.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "",
  "license": "ISC",
  "dependencies": {
    "axios": "^0.21.1",
    "cheerio": "^1.0.0-rc.5",
    "iconv-lite": "^0.6.2",
    "utf8": "^3.0.0"
  }
}

文件目录如下所示:

3g_wallpaper
    images
    index.js
    File.js
    package.json

最后声明:

本次教程只适用于https://www.3gbizhi.com/wallMV/ (opens new window) ,如果冒犯请及时联系我,并删除此贴。