One Click Page Extractor - source text package
This attachment contains source code as plain text. It is not an executable Chrome extension package.
To restore, create the listed files under one-click-page-extractor/ with UTF-8 encoding, then add icons separately from the existing local folder if needed.
===== FILE: manifest.json =====
{
"manifest_version": 3,
"name": "One Click Page Extractor",
"description": "Extract readable text, headings, and links from the current page and download them as Markdown, plain text, or a URL list.",
"version": "1.0.0",
"permissions": [
"activeTab",
"scripting",
"downloads"
],
"action": {
"default_popup": "popup.html"
},
"icons": {
"16": "icons/icon16.png",
"48": "icons/icon48.png",
"128": "icons/icon128.png"
}
}
===== END FILE: manifest.json =====
===== FILE: popup.html =====
One Click Page Extractor
===== END FILE: popup.html =====
===== FILE: popup.css =====
:root {
color-scheme: light;
--bg: #f4f8ff;
--card: #ffffff;
--text: #152238;
--muted: #5d6b80;
--line: #d8e4f5;
--primary: #1769e0;
--primary-dark: #0f55b8;
--primary-soft: #eaf2ff;
--danger: #b42318;
}
* {
box-sizing: border-box;
}
body {
width: 340px;
margin: 0;
padding: 14px;
background: var(--bg);
color: var(--text);
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
font-size: 14px;
}
.popup-card {
border: 1px solid var(--line);
border-radius: 8px;
background: var(--card);
box-shadow: 0 10px 30px rgba(23, 105, 224, 0.10);
padding: 14px;
}
.popup-header {
display: grid;
grid-template-columns: 38px minmax(0, 1fr);
gap: 10px;
align-items: center;
margin-bottom: 14px;
}
.app-mark {
display: grid;
width: 38px;
height: 38px;
place-items: center;
border-radius: 8px;
background: var(--primary-soft);
color: var(--primary);
font-weight: 800;
letter-spacing: 0;
}
h1 {
margin: 0 0 4px;
font-size: 16px;
line-height: 1.25;
}
.page-title {
min-height: 18px;
margin: 0;
overflow: hidden;
color: var(--muted);
font-size: 12px;
line-height: 1.45;
text-overflow: ellipsis;
white-space: nowrap;
}
.button-grid {
display: grid;
grid-template-columns: 1fr;
gap: 8px;
}
button {
width: 100%;
min-height: 38px;
border: 1px solid var(--primary);
border-radius: 6px;
background: var(--primary);
color: #ffffff;
cursor: pointer;
font: inherit;
font-weight: 700;
}
button:hover {
border-color: var(--primary-dark);
background: var(--primary-dark);
}
button:disabled {
border-color: #b9c6d8;
background: #d7e0ed;
color: #66758a;
cursor: wait;
}
.status {
min-height: 18px;
margin: 12px 0 0;
color: var(--muted);
font-size: 12px;
line-height: 1.5;
}
.status.is-error {
color: var(--danger);
font-weight: 700;
}
===== END FILE: popup.css =====
===== FILE: popup.js =====
const STATUS = {
ready: "準備完了",
extracting: "抽出中...",
downloaded: "ダウンロードしました",
extractFailed: "抽出に失敗しました",
unsupportedPage: "このページでは抽出できません"
};
const buttons = {
downloadMarkdown: document.getElementById("downloadMarkdown"),
downloadText: document.getElementById("downloadText"),
downloadUrls: document.getElementById("downloadUrls")
};
const pageTitle = document.getElementById("pageTitle");
const statusMessage = document.getElementById("statusMessage");
document.addEventListener("DOMContentLoaded", async () => {
await updateCurrentTabTitle();
setStatus(STATUS.ready);
buttons.downloadMarkdown.addEventListener("click", () => handleAction("markdown"));
buttons.downloadText.addEventListener("click", () => handleAction("text"));
buttons.downloadUrls.addEventListener("click", () => handleAction("urls"));
});
async function updateCurrentTabTitle() {
try {
const tab = await getCurrentTab();
pageTitle.textContent = tab.title || "タイトルなし";
} catch (error) {
pageTitle.textContent = "現在ページを取得できません";
}
}
async function handleAction(format) {
setBusy(true);
setStatus(STATUS.extracting);
try {
const data = await extractCurrentPage();
const extractedAt = formatDateTime(new Date());
const output = buildOutput(format, data, extractedAt);
await downloadOutput(output, data.title, format);
setStatus(STATUS.downloaded);
} catch (error) {
const isExpectedError = error && error.isExpected;
setStatus(isExpectedError ? error.message : STATUS.extractFailed, true);
} finally {
setBusy(false);
}
}
function buildOutput(format, data, extractedAt) {
if (format === "markdown") {
return buildMarkdown(data, extractedAt);
}
if (format === "urls") {
return buildUrlList(data, extractedAt);
}
return buildPlainText(data, extractedAt);
}
async function getCurrentTab() {
const tabs = await chrome.tabs.query({ active: true, currentWindow: true });
if (!tabs.length || !tabs[0].id) {
throw new Error("No active tab found.");
}
return tabs[0];
}
async function extractCurrentPage() {
const tab = await getCurrentTab();
if (!canAccessTabUrl(tab.url)) {
throw createExpectedError(STATUS.unsupportedPage);
}
const results = await chrome.scripting.executeScript({
target: { tabId: tab.id },
files: ["content.js"]
});
const result = results && results[0] ? results[0].result : null;
if (!result) {
throw new Error("No extraction result returned.");
}
return result;
}
function canAccessTabUrl(url) {
if (!url) {
return false;
}
if (/^https:\/\/(chromewebstore\.google\.com|chrome\.google\.com\/webstore)\//i.test(url)) {
return false;
}
return /^(https?:|file:)/i.test(url);
}
function createExpectedError(message) {
const error = new Error(message);
error.isExpected = true;
return error;
}
function buildMarkdown(data, extractedAt) {
const headings = data.headings && data.headings.length
? data.headings.map((heading) => {
const indent = " ".repeat(Math.max(0, heading.level - 1));
return `${indent}- H${heading.level}: ${heading.text}`;
}).join("\n")
: "- 見出しなし";
const links = data.links && data.links.length
? data.links.map((link) => `- [${escapeMarkdownLinkText(link.text)}](${link.url})`).join("\n")
: "- リンクなし";
return [
`# ${data.title}`,
"",
`Source: ${data.url}`,
"",
`Extracted at: ${extractedAt}`,
"",
"---",
"",
"## Headings",
"",
headings,
"",
"---",
"",
"## Content",
"",
data.content || "",
"",
"---",
"",
"## Links",
"",
links,
""
].join("\n");
}
function buildPlainText(data, extractedAt) {
const links = data.links && data.links.length
? data.links.map((link) => `${link.text} - ${link.url}`).join("\n")
: "リンクなし";
return [
"Title:",
data.title,
"",
"URL:",
data.url,
"",
"Extracted at:",
extractedAt,
"",
"Content:",
data.content || "",
"",
"Links:",
links,
""
].join("\n");
}
function buildUrlList(data, extractedAt) {
const seen = new Set();
const uniqueLinks = (data.allLinks || []).filter((link) => {
if (!link.url || seen.has(link.url)) {
return false;
}
seen.add(link.url);
return true;
});
const links = uniqueLinks.length
? uniqueLinks.map((link) => `${link.text} - ${link.url}`).join("\n")
: "URLなし";
return [
"Title:",
data.title,
"",
"URL:",
data.url,
"",
"Extracted at:",
extractedAt,
"",
"Page URLs:",
links,
""
].join("\n");
}
async function downloadOutput(content, title, format) {
const extension = format === "markdown" ? "md" : "txt";
const mimeType = format === "markdown" ? "text/markdown" : "text/plain";
const blob = new Blob([content], { type: `${mimeType};charset=utf-8` });
const objectUrl = URL.createObjectURL(blob);
try {
await chrome.downloads.download({
url: objectUrl,
filename: `${buildDownloadBaseName(title, format)}_${formatDateForFileName(new Date())}.${extension}`,
saveAs: false
});
} finally {
setTimeout(() => URL.revokeObjectURL(objectUrl), 1000);
}
}
function buildDownloadBaseName(title, format) {
const suffix = format === "urls" ? "-urls" : "";
return `${buildSafeFileName(title)}${suffix}`;
}
function buildSafeFileName(title) {
const safe = String(title || "")
.normalize("NFKC")
.toLowerCase()
.replace(/[\\/:*?"<>|#%&{}$!`'@+=]/g, "-")
.replace(/[\s._]+/g, "-")
.replace(/-+/g, "-")
.replace(/^-|-$/g, "")
.slice(0, 80)
.replace(/^-|-$/g, "");
return safe || "page";
}
function formatDateTime(date) {
const year = date.getFullYear();
const month = pad(date.getMonth() + 1);
const day = pad(date.getDate());
const hours = pad(date.getHours());
const minutes = pad(date.getMinutes());
const seconds = pad(date.getSeconds());
return `${year}-${month}-${day} ${hours}:${minutes}:${seconds}`;
}
function formatDateForFileName(date) {
const year = date.getFullYear();
const month = pad(date.getMonth() + 1);
const day = pad(date.getDate());
return `${year}-${month}-${day}`;
}
function pad(value) {
return String(value).padStart(2, "0");
}
function escapeMarkdownLinkText(text) {
return String(text || "").replace(/[[\]\\]/g, "\\$&");
}
function setBusy(isBusy) {
Object.values(buttons).forEach((button) => {
button.disabled = isBusy;
});
}
function setStatus(message, isError = false) {
statusMessage.textContent = message;
statusMessage.classList.toggle("is-error", isError);
}
===== END FILE: popup.js =====
===== FILE: content.js =====
(() => {
const CONTENT_SELECTORS = [
"article",
"main",
"[role=\"main\"]",
".article",
".post",
".entry-content",
".content"
];
const REMOVE_SELECTORS = [
"script",
"style",
"noscript",
"iframe",
"svg",
"canvas",
"nav",
"footer",
"header",
"aside"
];
const AD_WORDS = [
"ad",
"ads",
"advertisement",
"sponsor",
"sponsored",
"banner",
"promo",
"promotion"
];
function getCanonicalUrl() {
const canonical = document.querySelector("link[rel~=\"canonical\"]");
const href = canonical && canonical.href ? canonical.href.trim() : "";
return href || window.location.href;
}
function normalizeText(value) {
return String(value || "")
.replace(/\u00a0/g, " ")
.replace(/[ \t]+/g, " ")
.replace(/\s*\n\s*/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.trim();
}
function isHidden(element) {
const style = window.getComputedStyle(element);
return style.display === "none" ||
style.visibility === "hidden" ||
style.opacity === "0" ||
element.hidden ||
element.getAttribute("aria-hidden") === "true";
}
function isLikelyAdElement(element) {
const marker = `${element.id || ""} ${element.className || ""}`.toLowerCase();
return AD_WORDS.some((word) => marker.includes(word));
}
function findContentRoot() {
for (const selector of CONTENT_SELECTORS) {
const element = document.querySelector(selector);
if (element) {
return element;
}
}
return document.body;
}
function cleanClone(source) {
const clone = source.cloneNode(true);
const sourceElements = source.querySelectorAll("*");
const cloneElements = clone.querySelectorAll("*");
cloneElements.forEach((element, index) => {
const original = sourceElements[index];
if (original && (isHidden(original) || isLikelyAdElement(original))) {
element.remove();
}
});
clone.querySelectorAll(REMOVE_SELECTORS.join(",")).forEach((element) => element.remove());
return clone;
}
function getHeadings() {
return Array.from(document.querySelectorAll("h1, h2, h3"))
.map((heading) => ({
level: Number(heading.tagName.slice(1)),
text: normalizeText(heading.textContent)
}))
.filter((heading) => heading.text);
}
function getLinks(root) {
const seen = new Set();
return Array.from(root.querySelectorAll("a[href]"))
.map((link) => ({
text: normalizeText(link.getAttribute("title")) ||
normalizeText(link.getAttribute("aria-label")) ||
normalizeText(link.textContent) ||
link.href,
url: link.href
}))
.filter((link) => {
if (!link.url || seen.has(`${link.text}\n${link.url}`)) {
return false;
}
seen.add(`${link.text}\n${link.url}`);
return true;
});
}
function getAllPageLinks() {
return getLinks(document);
}
const sourceRoot = findContentRoot();
const cleanedRoot = cleanClone(sourceRoot);
return {
title: normalizeText(document.title) || "Untitled Page",
url: getCanonicalUrl(),
headings: getHeadings(),
content: normalizeText(cleanedRoot.innerText || cleanedRoot.textContent),
links: getLinks(cleanedRoot),
allLinks: getAllPageLinks()
};
})();
===== END FILE: content.js =====
===== FILE: README.md =====
# One Click Page Extractor
One Click Page Extractor は、現在表示中のWebページから本文、H1〜H3見出し、リンクを抽出し、Markdown、プレーンテキスト、URL一覧としてローカル保存できるChrome拡張です。
## 主な機能
- 現在タブのページタイトル、URL、本文、見出し、本文内リンクを抽出
- ページ全体のリンクタイトルとURL一覧を抽出
- 本文Markdown形式でダウンロード
- 本文プレーンテキスト形式でダウンロード
- URL一覧テキストをダウンロード
## 使い方
1. Chromeの拡張機能管理画面で、このフォルダを「パッケージ化されていない拡張機能」として読み込みます。
2. 抽出したいWebページを開きます。
3. ツールバーの拡張機能アイコンをクリックします。
4. 本文が必要な場合は「本文Markdown」「本文テキスト」を選びます。
5. ページ内のURLだけが必要な場合は「URL一覧」を選びます。
## プライバシー方針
- 外部APIを使いません。
- 外部サーバーへ送信しません。
- CDNを使いません。
- リモートJavaScriptを読み込みません。
- アナリティクスを入れません。
- ユーザーデータを収集しません。
- 閲覧履歴を保存しません。
- 抽出内容はユーザーのブラウザ内だけで処理します。
## 権限説明
- `activeTab`:ユーザー操作時のみ現在ページを抽出するため。
- `scripting`:現在タブ上で抽出処理を実行するため。
- `downloads`:抽出ファイルを端末に保存するため。
## 初期版でしないこと
- 画像ファイル本体の一括ダウンロード
- HTML完全保存
- PDF化
- Notion連携
- ChatGPT/API連携
- 自動要約
- クラウド保存
- 履歴保存
- 複数ページ一括処理
===== END FILE: README.md =====