|
|
Hello, everybody! 我又回来了!
因为工作的原因有两年没在论坛里水帖子了,不过好消息是两个月前我喜提毕业大礼包后变为家里蹲啦
为了治愈工伤,我开始在各个论坛的小说区找寻香甜可口的好饭,
期间看到不少超绝好饭却苦于无法下载保存留待日后细细品尝而懊恼。
但是
俗话说得好,办法总比困难多,作为一个聪明的互联网仓鼠,本可从小动手能力就很强。
从十多年前我就开始通过油猴脚本进行过优化贴吧使用体验,跳过视频网站广告,挂大学网课,卡百度网盘不限速下载等操作,
这个提取器脚本一开始确实好用,但是小问题也很多,尤其是部分论坛提取的内容会出现乱码,导出的饭完全无法品鉴。
遍寻AI模型查找解决方法无果,
今日刚好看到@MuHuang 大佬的帖子里提到@咸鱼鱼 大佬曽分享的脚本在提取繁中论坛时文本没有乱码,且将干扰码也一并去除了!
【感谢大佬的无私分享】
我当即茅塞顿开,让AI把这两个脚本融合一番,经过几轮调教成功地让这个脚本达到了我想要的效果。 | 前边写了三次都因为带了个emoji导致后面的内容保存后全部清空,没办法恢复,煮了600字的科研小作文没了,气死我了 |
废话结束,先说说这个脚本目前已经实现的功能和我后续想添加的功能
1.进入想私下品鉴的论坛帖子,点击“仅查看该作者” 【不点也可以,只是提取时间会久一点】
2.鼠标移动到右上方“下载全贴楼主发言”按钮,出现下拉菜单, 在这里你可以选择是否过滤字数较少的楼层或保留楼层号 【默认过滤少于50字的楼层,此类楼层一般是楼主互动回复或碎碎念,默认不保留楼层号,方便手机阅读】
3.完成可选设置后点击“下载全贴楼主发言”按钮,等待脚本提取完成, 提取完成后会生成txt文档下载到默认下载文件夹(C:\Users\[你的电脑用户名]\Downloads)
结语
欢迎大家取用,没有设置权限(所以需不需要权限过滤一下?), 直接复制代码后在油猴新建脚本粘贴保存就可以使用了, 请多多反馈不同论坛的使用情况,有想要的功能也不要吝啬你的回复, 说不定还能再售后一下,让AI再实现一两个功能
最后的最后,谢谢能看到这里的你,能不能给我点点评分,孩子想升级了
@Name @Name @Name避免脚本无法正常导入的操作,无需理会- // ==UserScript==
- // @name Discuz!楼主发言通用抓取器(修改版)
- // @namespace http://tampermonkey.net/
- // @version 15.0
- // @description 按楼层抓取楼主发言,字数过滤可自定义,浮层悬停渐变弹性显示,5秒延迟隐藏,勾选框文字齐平,清理乱码
- // @author Chick
- // @match *://*/*
- // @grant GM_xmlhttpRequest
- // ==/UserScript==
- (function() {
- 'use strict';
- const sleep = ms => new Promise(r => setTimeout(r, ms));
- // 是否为 Discuz 帖子页面
- function isDiscuzThread() {
- return !!(document.querySelector('[id^="post_"]') || document.querySelector('.authi') || document.querySelector('#postlist'));
- }
- // 过滤乱码
- function cleanText(rawText) {
- return rawText
- .replace(/<font class="jammer">.*?<\/font>/gi, '')
- .replace(/<span style="display:none">.*?<\/span>/gi, '')
- .replace(/<[^>]*>/g, '')
- .replace(/</g, '<')
- .replace(/>/g, '>')
- .replace(/ /g, ' ')
- .trim();
- }
- // 清理内容中不需要的元素
- function cleanContent(el) {
- el.querySelectorAll('img, .attach_nopermission, .t_attach, .hidden, .a_pr, .quote').forEach(e => e.remove());
- return cleanText(el.innerHTML);
- }
- // 使用 GM_xmlhttpRequest 获取页面
- async function fetchPage(url) {
- return new Promise(resolve => {
- GM_xmlhttpRequest({
- method: "GET",
- url,
- onload: function(res) {
- const doc = new DOMParser().parseFromString(res.responseText, 'text/html');
- resolve(doc);
- },
- onerror: function() { resolve(null); }
- });
- });
- }
- // 提取楼主发言
- async function extractPosts() {
- const btn = document.getElementById('grabber-btn');
- const filterCheckbox = document.getElementById('floorFilterCheckbox');
- const minLengthInput = document.getElementById('floorMinLengthInput');
- const retainFloorCheckbox = document.getElementById('retainFloorCheckbox');
- const enableFilter = filterCheckbox?.checked ?? true;
- const minLength = parseInt(minLengthInput.value) || 50;
- const retainFloor = retainFloorCheckbox?.checked ?? false;
- const authorEl = document.querySelector('.authi a.xw1, .pi a.xw1, .authorinfo a, .name a');
- if (!authorEl) { alert('无法识别楼主'); return; }
- const authorName = authorEl.innerText.trim();
- let allContent = `楼主:${authorName} 的全贴发言备份\n来源:${window.location.href}\n生成时间:${new Date().toLocaleString()}\n================================\n\n`;
- btn.disabled = true;
- btn.style.opacity = '0.6';
- btn.innerText = '提取中...';
- // 获取总页数
- const pageSpan = document.querySelector('.pgt .pg label span, .pg label span');
- const totalPages = pageSpan ? parseInt(pageSpan.innerText.replace(/\D/g, '')) : 1;
- let floorIndex = 1;
- for (let i = 1; i <= totalPages; i++) {
- let url = window.location.href;
- if (url.includes('mod=viewthread')) {
- url = url.replace(/&page=\d+/, '') + '&page=' + i;
- } else if (/-(\d+)-(\d+)\.html/.test(url)) {
- url = url.replace(/-(\d+)-(\d+)\.html/, `-${i}-$2.html`);
- } else if (totalPages > 1) {
- url = url.split('#')[0] + (url.includes('?') ? '&' : '?') + 'page=' + i;
- }
- const doc = await fetchPage(url);
- if (!doc) continue;
- const posts = doc.querySelectorAll('[id^="post_"]');
- for (const post of posts) {
- const name = post.querySelector('.authi a.xw1, .pi a.xw1, .authorinfo a, .name a')?.innerText.trim();
- if (name === authorName) {
- const contentEl = post.querySelector('.t_f, .postmessage, .pcb, .pcbs, .t_msg, .message');
- if (contentEl) {
- const text = cleanContent(contentEl);
- if (!enableFilter || text.length >= minLength) {
- allContent += (retainFloor ? `=== 第 ${floorIndex} 楼 ===\n` : '') + `${text}\n\n`;
- floorIndex++;
- }
- }
- }
- }
- btn.innerText = `提取中: 第 ${i}/${totalPages} 页`;
- await sleep(500);
- }
- const blob = new Blob([allContent], { type: 'text/plain;charset=utf-8' });
- const a = document.createElement('a');
- a.href = URL.createObjectURL(blob);
- a.download = `${authorName}_${document.title.split(' - ')[0]}.txt`.replace(/[\\/:*?"<>|]/g, '_');
- a.click();
- btn.disabled = false;
- btn.style.opacity = '1';
- btn.innerText = '下载完成!';
- btn.style.backgroundColor = '#27ae60';
- setTimeout(() => {
- btn.innerText = '下载全贴楼主发言';
- btn.style.backgroundColor = '#2980b9';
- }, 3000);
- }
- // 初始化浮层
- function init() {
- if (!isDiscuzThread() || document.getElementById('grabber-btn')) return;
- const container = document.createElement('div');
- container.style.position = 'fixed';
- container.style.top = '150px';
- container.style.right = '20px';
- container.style.zIndex = '999999';
- container.style.fontFamily = 'Arial, sans-serif';
- // 提取按钮
- const btn = document.createElement('button');
- btn.id = 'grabber-btn';
- btn.innerText = '下载全贴楼主发言';
- Object.assign(btn.style, {
- display: 'block',
- backgroundColor: '#2980b9',
- color: '#fff',
- border: 'none',
- borderRadius: '6px',
- padding: '8px 12px',
- cursor: 'pointer',
- boxShadow: '0 3px 8px rgba(0,0,0,0.3)',
- transition: 'background 0.3s'
- });
- btn.onmouseover = () => btn.style.backgroundColor = '#3498db';
- btn.onmouseout = () => btn.style.backgroundColor = '#2980b9';
- btn.onclick = extractPosts;
- // 浮层容器
- const filterLayer = document.createElement('div');
- filterLayer.style.position = 'absolute';
- filterLayer.style.top = '100%';
- filterLayer.style.right = '0';
- filterLayer.style.background = 'linear-gradient(135deg, #fdfbfb, #ebedee)';
- filterLayer.style.padding = '10px 14px';
- filterLayer.style.borderRadius = '8px';
- filterLayer.style.boxShadow = '0 3px 12px rgba(0,0,0,0.25)';
- filterLayer.style.marginTop = '6px';
- filterLayer.style.opacity = '0';
- filterLayer.style.transform = 'translateY(-10px) scale(0.95)';
- filterLayer.style.transition = 'opacity 0.4s cubic-bezier(0.25,1.5,0.5,1), transform 0.4s cubic-bezier(0.25,1.5,0.5,1)';
- filterLayer.style.pointerEvents = 'none';
- filterLayer.style.display = 'flex';
- filterLayer.style.flexDirection = 'column';
- filterLayer.style.whiteSpace = 'nowrap';
- // 第一行:启用字数过滤(勾选框 + 文字)
- const row1 = document.createElement('div');
- row1.style.display = 'flex';
- row1.style.alignItems = 'center';
- row1.style.marginBottom = '6px';
- const filterCheckbox = document.createElement('input');
- filterCheckbox.type = 'checkbox';
- filterCheckbox.id = 'floorFilterCheckbox';
- filterCheckbox.checked = true;
- filterCheckbox.style.marginRight = '6px';
- filterCheckbox.style.verticalAlign = 'middle';
- const filterLabel = document.createElement('label');
- filterLabel.htmlFor = 'floorFilterCheckbox';
- filterLabel.innerText = '启用提取楼层字数过滤';
- filterLabel.style.fontSize = '13px';
- filterLabel.style.userSelect = 'none';
- filterLabel.style.verticalAlign = 'middle';
- row1.appendChild(filterCheckbox);
- row1.appendChild(filterLabel);
- filterLayer.appendChild(row1);
- // 第二行:最小字数 + 输入框(左边对齐第一行文字)
- const row2 = document.createElement('div');
- row2.style.display = 'flex';
- row2.style.alignItems = 'center';
- row2.style.marginBottom = '6px';
- row2.style.paddingLeft = '22px'; // 对齐第一行文字,假设勾选框宽度约 16px + margin 6px = 22px
- const minLabel = document.createElement('span');
- minLabel.innerText = '提取字数不少于';
- minLabel.style.fontSize = '12px';
- minLabel.style.marginRight = '6px';
- const minLengthInput = document.createElement('input');
- minLengthInput.type = 'number';
- minLengthInput.id = 'floorMinLengthInput';
- minLengthInput.min = '1';
- minLengthInput.value = 50;
- Object.assign(minLengthInput.style, {
- width: '45px',
- fontSize: '12px',
- borderRadius: '4px',
- border: '1px solid #ccc',
- padding: '2px 4px',
- boxShadow: 'inset 0 1px 2px rgba(0,0,0,0.1)',
- outline: 'none',
- transition: 'border-color 0.2s, box-shadow 0.2s'
- });
- minLengthInput.onfocus = () => { minLengthInput.style.borderColor = '#2980b9'; minLengthInput.style.boxShadow = '0 0 3px rgba(41,128,185,0.5)'; }
- minLengthInput.onblur = () => { minLengthInput.style.borderColor = '#ccc'; minLengthInput.style.boxShadow = 'inset 0 1px 2px rgba(0,0,0,0.1)'; }
- row2.appendChild(minLabel);
- row2.appendChild(minLengthInput);
- filterLayer.appendChild(row2);
- // 第三行:保留楼层编号(勾选框 + 文字)
- const row3 = document.createElement('div');
- row3.style.display = 'flex';
- row3.style.alignItems = 'center';
- const retainFloorCheckbox = document.createElement('input');
- retainFloorCheckbox.type = 'checkbox';
- retainFloorCheckbox.id = 'retainFloorCheckbox';
- retainFloorCheckbox.checked = false;
- retainFloorCheckbox.style.marginRight = '6px';
- const retainFloorLabel = document.createElement('label');
- retainFloorLabel.htmlFor = 'retainFloorCheckbox';
- retainFloorLabel.innerText = '保留楼层编号';
- retainFloorLabel.style.fontSize = '13px';
- retainFloorLabel.style.userSelect = 'none';
- row3.appendChild(retainFloorCheckbox);
- row3.appendChild(retainFloorLabel);
- filterLayer.appendChild(row3);
- // 悬停显示动画
- let hideTimer = null;
- function showLayer() {
- if (hideTimer) clearTimeout(hideTimer);
- filterLayer.style.opacity = '1';
- filterLayer.style.transform = 'translateY(0) scale(1)';
- filterLayer.style.pointerEvents = 'auto';
- }
- function hideLayerDelayed() {
- if (hideTimer) clearTimeout(hideTimer);
- hideTimer = setTimeout(() => {
- filterLayer.style.opacity = '0';
- filterLayer.style.transform = 'translateY(-10px) scale(0.95)';
- filterLayer.style.pointerEvents = 'none';
- }, 5000);
- }
- btn.addEventListener('mouseenter', showLayer);
- btn.addEventListener('mouseleave', hideLayerDelayed);
- filterLayer.addEventListener('mouseenter', showLayer);
- filterLayer.addEventListener('mouseleave', hideLayerDelayed);
- container.appendChild(btn);
- container.appendChild(filterLayer);
- document.body.appendChild(container);
- }
- if (document.readyState === 'complete') init();
- else window.addEventListener('load', init);
- })();
复制代码
|
|
本帖子中包含更多资源
您需要 登录 才可以下载或查看,没有账号?立即注册
x
评分
-
查看全部评分
|