Skip to content

Commit ee315fe

Browse files
author
jjeejj
committed
导出专栏文章评论
1 parent b1cda5b commit ee315fe

File tree

6 files changed

+99
-5
lines changed

6 files changed

+99
-5
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ node_modules
33
geektime_*/**
44
config.js
55
.nyc_output
6-
coverage
6+
coverage
7+
quest.md

columnArticleList.js

+11
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ const utils = require('./utils');
55
const path = require('path');
66
const generaterPdf = require('./generaterPdf.js');
77
const downloadAudio = require('./downloadAudio.js');
8+
const downloadComment = require('./downloadComment.js');
89

910
/**
1011
* 执行方法
@@ -45,6 +46,16 @@ const downloadAudio = require('./downloadAudio.js');
4546
audioTitle: columnArticle.audio_title
4647
};
4748
columnArticleUrlList.push(articleInfo);
49+
articleInfo.commentsTotal = 0;
50+
articleInfo.commentsArr = [];
51+
// 是否导出评论
52+
if (config.isComment) {
53+
let {commentsTotal, commentsArr} = await downloadComment(
54+
config.columnBaseUrl + articalId,
55+
articalId);
56+
articleInfo.commentsTotal = commentsTotal;
57+
articleInfo.commentsArr = commentsArr;
58+
};
4859
// 替换文章名称的 / 线, 解决路径被分割的问题
4960
let useArticleTtle = columnArticle.article_title.replace(/\//g, '-');
5061
//生成PDF

config.js

+5-4
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
*/
44
module.exports = {
55
url: 'https://time.geekbang.org/serv/v1/article',
6+
commentUrl: 'https://time.geekbang.org/serv/v1/comments',
67
columnBaseUrl: 'https://time.geekbang.org/column/article/',
7-
columnName: '趣谈Linux操作系统', // 专栏名称
8-
firstArticalId: 87104, //专栏第一篇文章的ID
9-
isdownloadVideo: false, // 是否下载音频
10-
isComment: false, // 是否导出评论
8+
columnName: '软件工程之美',
9+
firstArticalId: 85730, //专栏第一篇文章的ID
10+
isdownloadVideo: true, // 是否下载音频
11+
isComment: true, // 是否导出评论
1112
cookie: 'cookie'
1213
};

downloadComment.js

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// 获取每篇文章下面所有的评论
2+
const config = require('./config.js');
3+
const superagent = require('superagent');
4+
const utils = require('./utils');
5+
6+
/**
7+
* 获取每篇文章下面所有的评论
8+
* @param {String} 文章的链接地址
9+
* @param {Number} 文章的ID
10+
*/
11+
async function downloadComments (url, articleId, prev = 0) {
12+
console.log('开始获取 ', url, '评论');
13+
let commentsArr = [];
14+
let commentsTotal = 0;
15+
async function run (prev) {
16+
try {
17+
let res = await superagent.post(config.commentUrl)
18+
.set({
19+
'Content-Type': 'application/json',
20+
'Cookie': config.cookie,
21+
'Referer': url,
22+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
23+
}).send({
24+
aid: articleId,
25+
prev: prev
26+
});
27+
if (res.body && res.body.error && res.body.error.code){
28+
console.log('error msg', res.body.error.msg);
29+
throw new Error(res.body.error.msg);
30+
};
31+
let resData = res.body.data
32+
commentsTotal = resData.page.count;
33+
let nextPage = resData.page.more;
34+
commentsArr.push(...resData.list);
35+
if (nextPage) {
36+
prev = resData.list[resData.list.length -1].score;
37+
await utils.sleep(1);
38+
await run(prev);
39+
};
40+
}catch (err){
41+
console.log(`获取 评论 ${url} err`, err.message);
42+
};
43+
};
44+
await run(prev);
45+
// console.log('commentsArr', commentsArr);
46+
// console.log('commentsTotal', commentsTotal);
47+
console.log('结束获取 ', url, '评论 总评论数为', commentsTotal);
48+
return {commentsArr, commentsTotal};
49+
};
50+
51+
// downloadComments('https://time.geekbang.org/column/article/82337',82337);
52+
53+
module.exports = downloadComments;

generaterPdf.js

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

template/article.ejs

+27
Original file line numberDiff line numberDiff line change
@@ -3056,6 +3056,33 @@
30563056
</div>
30573057
<% } %>
30583058
</div>
3059+
<div class="_1qhD3bdE_0 _2QmGFWqF_0">
3060+
<h2><span>精选留言</span><span class="_2FC_cD1O_0"><%- commentsTotal %></span></h2>
3061+
<ul>
3062+
<%for(let i=0;i<commentsArr.length;i++){%>
3063+
<li class="reJj6Thl_0">
3064+
<img src="<%- commentsArr[i].user_header %>" class="_2273kGdT_0"/>
3065+
<div class="_2CG0SquK_0">
3066+
<div class="_304R4gla_0">
3067+
<div>
3068+
<div class="_18Dng5rT_0">
3069+
<span><%- commentsArr[i].user_name %></span>
3070+
</div>
3071+
<div class="_1H1Z49Dr_0"><%- commentsArr[i].comment_ctime %></div>
3072+
</div>
3073+
</div>
3074+
<div class="_3M6kV3zb_0 _3D2NkqD6_0"><%- commentsArr[i].comment_content %></div>
3075+
<% if(commentsArr[i].replies) { %>
3076+
<% for(let j=0;j<commentsArr[i].replies.length;j++){ %>
3077+
<div class="_2xNIY4NG_0">
3078+
<p class="_33BLbmw4_0"><%- commentsArr[i].replies[j].user_name %> <%- commentsArr[i].replies[j].content %></p>
3079+
</div>
3080+
<% } %>
3081+
<% } %>
3082+
</div>
3083+
</li>
3084+
<%}%>
3085+
</ul>
30593086
</div>
30603087
</div>
30613088
</div>

0 commit comments

Comments
 (0)