var http = require('http'); var fs = require('fs'); var request = require('request'); function base64_decode (data) { var b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; var o1, o2, o3, h1, h2, h3, h4, bits, i = 0, ac = 0, dec = "", tmp_arr = []; if (!data) {return data;} data += ''; do { h1 = b64.indexOf(data.charAt(i++)); h2 = b64.indexOf(data.charAt(i++)); h3 = b64.indexOf(data.charAt(i++)); h4 = b64.indexOf(data.charAt(i++)); bits = h1<<18 | h2<<12 | h3<<6 | h4; o1 = bits>>16 & 0xff; o2 = bits>>8 & 0xff; o3 = bits & 0xff; if (h3 == 64) { tmp_arr[ac++] = String.fromCharCode(o1); } else if (h4 == 64) { tmp_arr[ac++] = String.fromCharCode(o1, o2); } else { tmp_arr[ac++] = String.fromCharCode(o1, o2, o3); } } while (i < data.length); dec = tmp_arr.join(''); dec = utf8_decode(dec); return dec; } function base64_encode (data) { var b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; var o1, o2, o3, h1, h2, h3, h4, bits, i = 0, ac = 0, enc="", tmp_arr = []; if (!data){return data;} data = utf8_encode(data+''); do { o1 = data.charCodeAt(i++); o2 = data.charCodeAt(i++); o3 = data.charCodeAt(i++); bits = o1<<16 | o2<<8 | o3; h1 = bits>>18 & 0x3f; h2 = bits>>12 & 0x3f; h3 = bits>>6 & 0x3f; h4 = bits & 0x3f; tmp_arr[ac++] = b64.charAt(h1) + b64.charAt(h2) + b64.charAt(h3) + b64.charAt(h4); } while (i < data.length); enc = tmp_arr.join(''); switch (data.length % 3) { case 1: enc = enc.slice(0, -2) + '=='; break; case 2: enc = enc.slice(0, -1) + '='; break; } return enc; } function utf8_decode ( str_data ) { var tmp_arr = [], i = 0, ac = 0, c1 = 0, c2 = 0, c3 = 0; str_data += ''; while ( i < str_data.length ) { c1 = str_data.charCodeAt(i); if (c1 < 128) { tmp_arr[ac++] = String.fromCharCode(c1); i++; } else if ((c1 > 191) && (c1 < 224)) { c2 = str_data.charCodeAt(i+1); tmp_arr[ac++] = String.fromCharCode(((c1 & 31) << 6) | (c2 & 63)); i += 2; } else { c2 = str_data.charCodeAt(i+1); c3 = str_data.charCodeAt(i+2); tmp_arr[ac++] = String.fromCharCode(((c1 & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)); i += 3; } } return tmp_arr.join(''); } function utf8_encode ( argString ) { var string = (argString+''); var utftext = ""; var start, end; var stringl = 0; start = end = 0; stringl = string.length; for (var n = 0; n < stringl; n++) { var c1 = string.charCodeAt(n); var enc = null; if (c1 < 128) { end++; } else if (c1 > 127 && c1 < 2048) { enc = String.fromCharCode((c1 >> 6) | 192) + String.fromCharCode((c1 & 63) | 128); } else { enc = String.fromCharCode((c1 >> 12) | 224) + String.fromCharCode(((c1 >> 6) & 63) | 128) + String.fromCharCode((c1 & 63) | 128); } if (enc !== null) { if (end > start) { utftext += string.substring(start, end); } utftext += enc; start = end = n+1; } } if (end > start) { utftext += string.substring(start, string.length); } return utftext; } var counter = 0; var totalNum = 0; function downloadPic(url,filePath,fn){ var r = request(url).pipe(fs.createWriteStream(filePath)); r.on('close', function(){ fn(); }).on('error',function(){ log('download pic error!'); if(nextChapterPath && chapterNow < chapterNum){ downloadChapter(nextChapterPath); }else{ chapterNow++; } }); } function downloadFiles(arr,baseDir){ var length = arr.length; var i = 0; function loadNext(index){ downloadPic(arr[i], baseDir + '/' + index + '.jpg',function(){ i++; if(i < length){ loadNext(i); }else{ console.log('Chapter ' + chapterNow + ' All done!'); chapterNow++; if(nextChapterPath && chapterNow < chapterNum){ downloadChapter(nextChapterPath); } } }); } loadNext(i); } var baseDir = 'F:/node_test/Pictures/'; var urlReg = new RegExp('qTcms_S_m_murl_e\\s*=\\s*"([\\w+/=]+)"'); var isUrlReg = /^http:\/\//; var chapterReg = new RegExp('qTcms_S_m_playm\\s*=\\s*"\\W+(\\d+)\\W+"'); var nextChapterReg = new RegExp('qTcms_Pic_nextArr\\s*=\\s*"([/\\w.])html"'); var chapterNow = 0, chapterNum = 10; var nextChapterPath = ''; function log(data){ fs.appendFile(baseDir + 'log.txt',data + '\r\n','utf8',function(err){ if(err) { console.log(err); } }); } function downloadChapter(path){ log('//------------------------------------------------------------------//'); var page = 'http://some.page.com' + path; log('正在读取:' + page); var html = ''; http.get(page , function(res) { res.setEncoding('utf-8'); res.on('data', function(data) { // collect the data chunks to the variable named "html" html += data; }).on('end', function() { var dir = baseDir; if(chapterReg.test(html)){ dir += '第' + RegExp.$1 + '话'; }else{ console.log('找不到章节'); return; } if (!fs.existsSync(dir)) { fs.mkdirSync(dir); }else{ console.log(dir + ' 已存在!'); return; } // log('文件存放目录:' + dir); var nextChapterReg = new RegExp('qTcms_Pic_nextArr\\s*=\\s*"([/\\w.]+)"'); if(nextChapterReg.test(html)){ nextChapterPath = RegExp.$1; log('下一个页面地址:' + nextChapterPath); }else{ nextChapterPath = null; } var url64 = null, picArr = null; if(urlReg.test(html)){ url64 = base64_decode (RegExp.$1); log('图片数组:\r\n' + url64); picArr = url64.split('$'); picArr = picArr.filter(function(val){ return isUrlReg.test(val); }); downloadFiles(picArr,dir); }else{ //console.log("not found."); console.log(html); } }); }); } downloadChapter('someurl');