diff --git a/common/parse_html.js b/common/parse_html.js new file mode 100644 index 0000000..3a4e480 --- /dev/null +++ b/common/parse_html.js @@ -0,0 +1,59 @@ +var jsdom = require("jsdom"); +var fs = require("fs"); + +var parsehtml = function(file, callback) { + // var html = fs.readFileSync(file).toString(); + + fs.readFile(file, (err, data) => { + if (err) throw err; + var html = data.toString(); + jsdom.env({ + html: html, + scripts: ["./public/scripts/externe/jquery-3.1.1.min.js"], + done: function(errors, window) { + var $, anchors, itemdoubleclick, results, allTags, bookmarks; + $ = window.$; + itemdoubleclick = ""; + allTags = []; + bookmarks = []; + results = {}; + + anchors = $("dl").find("a"); + anchors.each(function(i, e) { + var add_date, name, bookmark, tags, url; + url = $(e).attr("href"); + name = $(e).text(); + add_date = $(e).attr("add_date"); + tags = new Array(); + $(e).parents("dl").each(function(ii, ee) { + var folder, tag; + folder = $(ee).prev(); + tag = folder.text().replace(/(^\s*)|(\s*$)/g, '').replace(/\s+/g, ' '); + if (allTags.indexOf(tag) == -1) { + allTags.push(tag); + } + if (tag != "Bookmarks" && tag != "书签栏") { + return tags.push(tag); + } + }); + bookmark = { + url: url, + name: name, + add_date: add_date, + tags: tags + }; + return bookmarks.push(bookmark); + }); + if (typeof callback === "function") { + results.tags = allTags; + results.bookmarks = bookmarks; + return callback(results); + } else { + return console.warn("Callback isn't a function."); + } + } + }); + }); +}; + +module.exports = parsehtml; diff --git a/package.json b/package.json index 70e2f10..282331d 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "ejs": "~2.4.1", "express": "~4.13.4", "express-session": "^1.14.1", + "jsdom": "^9.10.0", "morgan": "~1.7.0", "multer": "^1.3.0", "mysql": "^2.11.1", diff --git a/routes/api.js b/routes/api.js index 5c898be..842cd85 100644 --- a/routes/api.js +++ b/routes/api.js @@ -3,6 +3,7 @@ var mysql = require('mysql'); var crypto = require('crypto'); var read = require('node-readability'); var db = require('../database/db.js'); +var parseHtml = require('../common/parse_html.js'); var multer = require('multer'); var storage = multer.diskStorage({ @@ -463,7 +464,11 @@ api.post('/addAdvice', function(req, res) { }) }); }); - +var jsdom = require("jsdom"); +var fs = require("fs"); +parseHtml('./uploads/luchenqun-20170210161830.html', function(data) { + // console.log(data); +}) api.post('/uploadBookmarkFile', upload.single('bookmark'), function(req, res) { console.log('hello uploadBookmarkFile'); if (!req.session.user) { @@ -472,95 +477,77 @@ api.post('/uploadBookmarkFile', upload.single('bookmark'), function(req, res) { } var file = req.file; - // var bookmarks = [{ - // "url": "https://www.163.com/", - // "name": "Bookmarks", - // "add_date": "1432116178", - // "tags": [] - // }, { - // "url": "https://github.com/aponxi/npm-bookmark-parser", - // "name": "aponxi/npm-bookmark-parser: Node plugin to parse Chrome bookmarks into usable JSON format, via javascript.", - // "add_date": "1486615941", - // "tags": ["测试栏目2", "测试栏目1"] - // }, { - // "url": "http://stackoverflow.com/questions/26673837/parsing-bookmark-html-in-node-js", - // "name": "parsing bookmark.html in node.js - Stack Overflow", - // "add_date": "1486614926", - // "tags": ["测试栏目1"] - // }, { - // "url": "http://stackoverflow.com/", - // "name": "dddddddddddddd", - // "add_date": "1486614926", - // "tags": ["测试栏目3"] - // }]; + parseHtml(file.path, function(data){ + console.log(data); + var bookmarks = data.bookmarks; + var tagsName = data.tags; - // var tagsName = ['测试栏目1', '测试栏目2', '测试栏目3']; - // var userId = req.session.user.id; - // var addTagNames = []; - // - // db.getTags(userId) - // // 先插入分类 - // .then((tags) => { - // // 需要插入的书签是该用户在数据库不存在的书签 - // addTagNames = tagsName.filter((name) => { - // for (var i = 0; i < tags.length; i++) { - // if (tags[i].name.toLowerCase() === name.toLowerCase()) { - // return false; - // } - // } - // return true; - // }); - // return Promise.resolve(addTagNames); - // }) - // .then((newTagNames) => { - // if (newTagNames.length > 0) { - // return db.addTags(userId, newTagNames) - // } else { - // return Promise.resolve(); - // } - // }) - // .then(() => db.getTags(userId)) - // .then((allTags) => { - // bookmarks.forEach((item, index) => { - // var count = 0; - // - // var bookmark = {}; - // bookmark.title = item.name; - // bookmark.description = ""; - // bookmark.url = item.url; - // bookmark.public = '1'; - // if (item.tags.length == 0) { - // item.tags.push("未分类") - // } - // - // var tags = []; - // item.tags.forEach((tag) => { - // allTags.forEach((at) => { - // if (at.name == tag) { - // tags.push(at.id); - // } - // }) - // }) - // // 插入书签 - // db.addBookmark(userId, bookmark) // 插入书签 - // .then((bookmark_id) => { - // db.delBookmarkTags(bookmark_id); // 不管3721,先删掉旧的分类 - // return bookmark_id; - // }) // 将之前所有的书签分类信息删掉 - // .then((bookmark_id) => db.addTagsBookmarks(tags, bookmark_id)) // 插入分类 - // .then(() => db.updateLastUseTags(userId, tags)) // 更新最新使用的分类 - // .then(() => { - // count++ - // }) // 运气不错 - // .catch((err) => console.log('uploadBookmarkFile addBookmark err', err)); // oops! - // if ((index + 1) == bookmarks.length) { - // // 通知前台 - // } - // }) - // }) - // .catch((err) => console.log('uploadBookmarkFile err', err)); - console.log("file", file); - res.json({}); + var userId = req.session.user.id; + var addTagNames = []; + + db.getTags(userId) + // 先插入分类 + .then((tags) => { + // 需要插入的书签是该用户在数据库不存在的书签 + addTagNames = tagsName.filter((name) => { + for (var i = 0; i < tags.length; i++) { + if (tags[i].name.toLowerCase() === name.toLowerCase()) { + return false; + } + } + return true; + }); + return Promise.resolve(addTagNames); + }) + .then((newTagNames) => { + if (newTagNames.length > 0) { + return db.addTags(userId, newTagNames) + } else { + return Promise.resolve(); + } + }) + .then(() => db.getTags(userId)) + .then((allTags) => { + bookmarks.forEach((item, index) => { + var count = 0; + + var bookmark = {}; + bookmark.title = item.name; + bookmark.description = ""; + bookmark.url = item.url; + bookmark.public = '1'; + if (item.tags.length == 0) { + item.tags.push("未分类") + } + + var tags = []; + item.tags.forEach((tag) => { + allTags.forEach((at) => { + if (at.name == tag) { + tags.push(at.id); + } + }) + }) + // 插入书签 + db.addBookmark(userId, bookmark) // 插入书签 + .then((bookmark_id) => { + db.delBookmarkTags(bookmark_id); // 不管3721,先删掉旧的分类 + return bookmark_id; + }) // 将之前所有的书签分类信息删掉 + .then((bookmark_id) => db.addTagsBookmarks(tags, bookmark_id)) // 插入分类 + .then(() => db.updateLastUseTags(userId, tags)) // 更新最新使用的分类 + .then(() => { + count++ + }) // 运气不错 + .catch((err) => console.log('uploadBookmarkFile addBookmark err', err)); // oops! + if ((index + 1) == bookmarks.length) { + // 通知前台 + } + }) + }) + .catch((err) => console.log('uploadBookmarkFile err', err)); + }) + res.json(file); }); api.post('/addBookmark', function(req, res) {