my-bookmark/common/parse_html.js

62 lines
2.1 KiB
JavaScript

var cheerio = require("cheerio");
var fs = require("fs");
var parsehtml = function (file, callback) {
// var html = fs.readFileSync(file).toString();
fs.readFile(file, (err, data) => {
if (err) throw err;
var html = data.toString();
var $, anchors, itemdoubleclick, results, allTags, bookmarks;
$ = cheerio.load(html);
itemdoubleclick = "";
allTags = [];
bookmarks = [];
results = {};
anchors = $("dl").find("a");
anchors.each(function (i, e) {
var add_date, name, bookmark, tags, url;
url = $(e).attr("href");
name = $(e).text() || "无标题";
add_date = parseInt($(e).attr("add_date")) * 1000;
// 只允许用一个标签
// 只允许用一个标签
tags = new Array();
var tag = "未分类";
$(e).parents("dl").each(function (ii, ee) {
var folder = $(ee).prev();
var temp = folder.text().replace(/(^\s*)|(\s*$)/g, '').replace(/\s+/g, ' ');
if (temp != "Bookmarks" && temp != "书签栏" && temp != "" && temp != undefined) {
tag = temp;
}
});
if (allTags.indexOf(tag) == -1) {
allTags.push(tag);
}
tags.push(tag);
if (name.length > 255) {
name = name.substring(255);
}
name = name.replace(/\uD83C[\uDF00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/gi, "");
bookmark = {
url: url,
name: name,
add_date: add_date,
tags: tags
};
return bookmarks.push(bookmark);
});
if (typeof callback === "function") {
results.tags = allTags;
results.bookmarks = bookmarks;
return callback(results);
} else {
return console.warn("Callback isn't a function.");
}
})
};
module.exports = parsehtml;