Skip to content

Commit 63f8c8e

Browse files
authored
Merge pull request #1052 from xzuoqi/main
Add script to generate JSON data for rendering the homepage.
2 parents 59800ac + 04f3903 commit 63f8c8e

File tree

4 files changed

+480
-1
lines changed

4 files changed

+480
-1
lines changed

.github/scripts/baidu-tongji.js

Lines changed: 300 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
const axios = require("axios");
2+
const fs = require("fs");
3+
const path = require("path");
4+
const cheerio = require("cheerio");
5+
6+
function getAccessToken() {
7+
return process.env.BAIDU_TONGJI_ACCESS_TOKEN || "";
8+
}
9+
10+
async function refreshAccessTokenIfPossible() {
11+
const refreshToken = process.env.BAIDU_TONGJI_REFRESH_TOKEN || "";
12+
const clientId = process.env.BAIDU_TONGJI_CLIENT_ID || "";
13+
const clientSecret = process.env.BAIDU_TONGJI_CLIENT_SECRET || "";
14+
15+
if (!refreshToken || !clientId || !clientSecret) {
16+
console.warn("缺少刷新令牌或客户端凭据,无法刷新 access_token");
17+
return "";
18+
}
19+
20+
const url = "https://openapi.baidu.com/oauth/2.0/token";
21+
const params = {
22+
grant_type: "refresh_token",
23+
refresh_token: refreshToken,
24+
client_id: clientId,
25+
client_secret: clientSecret,
26+
};
27+
28+
try {
29+
const { data } = await axios.get(url, { params, timeout: 15000 });
30+
if (data && data.access_token) {
31+
console.log("已通过 refresh_token 获取新的 access_token");
32+
return data.access_token;
33+
}
34+
console.warn("刷新 access_token 失败:响应无 access_token 字段");
35+
return "";
36+
} catch (e) {
37+
console.warn("刷新 access_token 异常:", e.message);
38+
return "";
39+
}
40+
}
41+
42+
// 获取上一周的日期范围(格式:yyyyMMdd)
43+
function getLastWeekDateRange() {
44+
const today = new Date();
45+
const dayOfWeek = today.getDay();
46+
47+
const lastMonday = new Date(today);
48+
lastMonday.setDate(today.getDate() - dayOfWeek - 6);
49+
50+
const lastSunday = new Date(lastMonday);
51+
lastSunday.setDate(lastMonday.getDate() + 6);
52+
53+
const formatDate = (date) => {
54+
const year = date.getFullYear();
55+
const month = String(date.getMonth() + 1).padStart(2, "0");
56+
const day = String(date.getDate()).padStart(2, "0");
57+
return `${year}${month}${day}`;
58+
};
59+
60+
return {
61+
startDate: formatDate(lastMonday),
62+
endDate: formatDate(lastSunday),
63+
};
64+
}
65+
// 获取受访页面数据
66+
async function getTopPages() {
67+
try {
68+
const dateRange = getLastWeekDateRange();
69+
const url = "https://openapi.baidu.com/rest/2.0/tongji/report/getData";
70+
let accessToken = getAccessToken();
71+
if (!accessToken) {
72+
accessToken = await refreshAccessTokenIfPossible();
73+
if (!accessToken) {
74+
throw new Error("无法获得可用的 access_token");
75+
}
76+
}
77+
78+
let params = {
79+
access_token: accessToken,
80+
site_id: 22607172,
81+
method: "visit/toppage/a",
82+
start_date: dateRange.startDate,
83+
end_date: dateRange.endDate,
84+
max_results: 20,
85+
metrics: "pv_count",
86+
};
87+
88+
let response = await axios.get(url, { params });
89+
90+
if (response.data.error_code) {
91+
if (
92+
response.data.error_code === 110 ||
93+
response.data.error_code === 111
94+
) {
95+
const refreshed = await refreshAccessTokenIfPossible();
96+
if (refreshed) {
97+
params.access_token = refreshed;
98+
response = await axios.get(url, { params });
99+
}
100+
}
101+
if (response.data.error_code) {
102+
console.log("失败", response.data);
103+
return false;
104+
}
105+
}
106+
console.log("访问数据获取成功");
107+
return response.data;
108+
} catch (error) {
109+
console.log("获取访问数据时出错:", error.message);
110+
return {};
111+
}
112+
}
113+
114+
// 从URL检测语言
115+
function detectLanguageFromUrl(url) {
116+
if (url.includes("/en/") || url.endsWith("/en")) {
117+
return "en";
118+
} else if (url.includes("/zh/") || url.endsWith("/zh")) {
119+
return "zh-CN";
120+
} else if (url.includes("/zh-CN/") || url.endsWith("/zh-CN")) {
121+
return "zh-CN";
122+
} else if (url.includes("/zh-TW/") || url.endsWith("/zh-TW")) {
123+
return "zh-TW";
124+
} else if (url.includes("/ja/") || url.endsWith("/ja")) {
125+
return "ja";
126+
} else if (url.includes("/ko/") || url.endsWith("/ko")) {
127+
return "ko";
128+
}
129+
return null;
130+
}
131+
132+
// 从HTML内容检测语言
133+
function detectLanguageFromHtml($) {
134+
const htmlLang = $("html").attr("lang");
135+
if (htmlLang) {
136+
return htmlLang.toLowerCase();
137+
}
138+
139+
const metaLang = $('meta[http-equiv="content-language"]').attr("content");
140+
if (metaLang) {
141+
return metaLang.toLowerCase();
142+
}
143+
144+
const charset = $("meta[charset]").attr("charset");
145+
if (charset) {
146+
if (charset.toLowerCase().includes("utf-8")) {
147+
return "auto";
148+
}
149+
}
150+
151+
return null;
152+
}
153+
154+
// 获取页面标题和语言
155+
async function getPageTitleAndLang(url) {
156+
try {
157+
const response = await axios.get(url, {
158+
timeout: 10000,
159+
headers: {
160+
"User-Agent":
161+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
162+
},
163+
});
164+
165+
const $ = cheerio.load(response.data);
166+
167+
// 获取标题
168+
let title = "";
169+
const titleElement = $("title");
170+
if (titleElement.length > 0) {
171+
title = titleElement.text().trim();
172+
} else {
173+
const h1Element = $("h1");
174+
if (h1Element.length > 0) {
175+
title = h1Element.first().text().trim();
176+
} else {
177+
title = url.split("/").pop() || url;
178+
}
179+
}
180+
181+
let lang = detectLanguageFromUrl(url);
182+
if (!lang) {
183+
lang = detectLanguageFromHtml($);
184+
}
185+
186+
if (!lang) {
187+
if (url.includes("docs.radxa.com") && !url.includes("/en/")) {
188+
lang = "zh-CN";
189+
} else {
190+
lang = "en";
191+
}
192+
}
193+
194+
return { title, lang };
195+
} catch (error) {
196+
console.error(`获取页面标题失败 ${url}:`, error.message);
197+
let lang = detectLanguageFromUrl(url) || "en";
198+
return {
199+
title: url.split("/").pop() || url,
200+
lang,
201+
};
202+
}
203+
}
204+
205+
async function updateDataWithTitles(data) {
206+
if (!data || !data.data || !data.data.items || !data.data.items[0]) {
207+
console.log("数据结构不符合预期,跳过标题更新");
208+
return data;
209+
}
210+
211+
const items = data.data.items[0];
212+
const urlSet = new Set();
213+
214+
// 收集所有唯一的URL
215+
for (const itemGroup of items) {
216+
for (const item of itemGroup) {
217+
if (item.name) {
218+
urlSet.add(item.name);
219+
}
220+
}
221+
}
222+
223+
console.log(`找到 ${urlSet.size} 个唯一URL,开始获取标题和语言信息...`);
224+
225+
// 获取所有URL的标题和语言
226+
const urlInfo = {};
227+
const urls = Array.from(urlSet);
228+
229+
for (let i = 0; i < urls.length; i++) {
230+
const url = urls[i];
231+
console.log(`正在获取第 ${i + 1}/${urls.length} 个URL的信息: ${url}`);
232+
const { title, lang } = await getPageTitleAndLang(url);
233+
urlInfo[url] = { title, lang };
234+
235+
await new Promise((resolve) => setTimeout(resolve, 500));
236+
}
237+
238+
// 更新数据中的标题和语言信息
239+
for (const itemGroup of items) {
240+
for (const item of itemGroup) {
241+
if (item.name && urlInfo[item.name]) {
242+
item.title = urlInfo[item.name].title;
243+
item.lang = urlInfo[item.name].lang;
244+
}
245+
}
246+
}
247+
248+
console.log("标题和语言信息更新完成");
249+
return data;
250+
}
251+
252+
// 保存数据到 JSON 文件
253+
function saveToJsonFile(data, filename = "static/json/baidu_tongji_data.json") {
254+
try {
255+
const filePath = path.join(filename);
256+
const dirPath = path.dirname(filePath);
257+
258+
if (!fs.existsSync(dirPath)) {
259+
fs.mkdirSync(dirPath, { recursive: true });
260+
}
261+
const jsonData = JSON.stringify(data, null, 2);
262+
263+
fs.writeFileSync(filePath, jsonData, "utf8");
264+
console.log(`数据已保存到: ${filePath}`);
265+
return filePath;
266+
} catch (error) {
267+
console.error("保存文件时出错:", error.message);
268+
throw error;
269+
}
270+
}
271+
272+
async function main() {
273+
try {
274+
const visitData = await getTopPages();
275+
276+
// 准备要保存的数据
277+
const resultData = {
278+
fetch_time: new Date().toISOString(),
279+
date_range: {
280+
start_date: getLastWeekDateRange().startDate,
281+
end_date: getLastWeekDateRange().endDate,
282+
},
283+
site_info: {
284+
site_id: 22607172,
285+
},
286+
data: visitData?.result,
287+
};
288+
289+
// 获取并更新页面标题信息
290+
console.log("开始获取页面标题信息...");
291+
const updatedData = await updateDataWithTitles(resultData);
292+
293+
// 保存到 JSON(写入 contents/static/json)
294+
saveToJsonFile(updatedData, "static/json/baidu_tongji_data.json");
295+
} catch (error) {
296+
process.exit(1);
297+
}
298+
}
299+
300+
main();

0 commit comments

Comments
 (0)