fix: properly extract YouTube heatmap data for most watched segments
This commit is contained in:
@@ -6,20 +6,18 @@ export interface DownloadOptions {
|
|||||||
url: string;
|
url: string;
|
||||||
outputDir: string;
|
outputDir: string;
|
||||||
format: string;
|
format: string;
|
||||||
extractChapters: boolean;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
interface MostWatchedSegment {
|
interface HeatmapSegment {
|
||||||
start: number;
|
start_seconds: number;
|
||||||
end: number;
|
end_seconds: number;
|
||||||
intensity?: number;
|
intensity: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface VideoInfo {
|
interface VideoInfo {
|
||||||
title: string;
|
title: string;
|
||||||
chapters: Array<{ title: string; start_time: number; end_time: number }>;
|
|
||||||
heatmap?: Array<{ start_seconds: number; end_seconds: number; intensity: number }>;
|
|
||||||
duration: number;
|
duration: number;
|
||||||
|
heatmap?: HeatmapSegment[];
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getVideoInfo(url: string): Promise<VideoInfo> {
|
async function getVideoInfo(url: string): Promise<VideoInfo> {
|
||||||
@@ -27,6 +25,8 @@ async function getVideoInfo(url: string): Promise<VideoInfo> {
|
|||||||
const ytDlp = spawn("yt-dlp", [
|
const ytDlp = spawn("yt-dlp", [
|
||||||
"--dump-json",
|
"--dump-json",
|
||||||
"--no-download",
|
"--no-download",
|
||||||
|
"--compat-option",
|
||||||
|
"no-youtube-channel-redirect",
|
||||||
url,
|
url,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
@@ -50,11 +50,14 @@ async function getVideoInfo(url: string): Promise<VideoInfo> {
|
|||||||
try {
|
try {
|
||||||
const info = JSON.parse(stdout);
|
const info = JSON.parse(stdout);
|
||||||
|
|
||||||
|
// Extract heatmap data from YouTube's internal API
|
||||||
|
// The heatmap shows what segments were re-watched the most
|
||||||
|
const heatmapData = info.heatmap;
|
||||||
|
|
||||||
resolve({
|
resolve({
|
||||||
title: info.title || "video",
|
title: info.title || "video",
|
||||||
chapters: info.chapters || [],
|
|
||||||
heatmap: info.heatmap || [],
|
|
||||||
duration: info.duration || 0,
|
duration: info.duration || 0,
|
||||||
|
heatmap: heatmapData,
|
||||||
});
|
});
|
||||||
} catch (parseError) {
|
} catch (parseError) {
|
||||||
reject(new Error(`Failed to parse video info: ${parseError}`));
|
reject(new Error(`Failed to parse video info: ${parseError}`));
|
||||||
@@ -67,12 +70,15 @@ async function getVideoInfo(url: string): Promise<VideoInfo> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function downloadSection(
|
async function downloadSegment(
|
||||||
url: string,
|
url: string,
|
||||||
outputPath: string,
|
outputPath: string,
|
||||||
section: string,
|
startTime: number,
|
||||||
|
endTime: number,
|
||||||
format: string
|
format: string
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
|
const section = `*${startTime.toFixed(3)}-${endTime.toFixed(3)}`;
|
||||||
|
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
const ytDlp = spawn("yt-dlp", [
|
const ytDlp = spawn("yt-dlp", [
|
||||||
"-f", format,
|
"-f", format,
|
||||||
@@ -114,29 +120,7 @@ function formatTime(seconds: number): string {
|
|||||||
return `${mins}:${secs.toString().padStart(2, "0")}`;
|
return `${mins}:${secs.toString().padStart(2, "0")}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getMostWatchedSegments(
|
export async function downloadMostWatchedSegment(options: DownloadOptions): Promise<void> {
|
||||||
heatmap: Array<{ start_seconds: number; end_seconds: number; intensity: number }>,
|
|
||||||
duration: number,
|
|
||||||
topN: number = 1
|
|
||||||
): MostWatchedSegment[] {
|
|
||||||
if (!heatmap || heatmap.length === 0) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort by intensity (most watched first)
|
|
||||||
const sorted = [...heatmap].sort((a, b) => b.intensity - a.intensity);
|
|
||||||
|
|
||||||
// Get top N segments
|
|
||||||
const topSegments = sorted.slice(0, topN);
|
|
||||||
|
|
||||||
return topSegments.map((segment) => ({
|
|
||||||
start: segment.start_seconds,
|
|
||||||
end: segment.end_seconds,
|
|
||||||
intensity: segment.intensity,
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function downloadVideoSegments(options: DownloadOptions): Promise<void> {
|
|
||||||
const { url, outputDir, format } = options;
|
const { url, outputDir, format } = options;
|
||||||
|
|
||||||
// Create output directory if it doesn't exist
|
// Create output directory if it doesn't exist
|
||||||
@@ -144,65 +128,57 @@ export async function downloadVideoSegments(options: DownloadOptions): Promise<v
|
|||||||
mkdirSync(outputDir, { recursive: true });
|
mkdirSync(outputDir, { recursive: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get video info including most watched segments from YouTube API
|
// Get video info with heatmap data from YouTube
|
||||||
console.log("Fetching video information from YouTube API...");
|
console.log("Fetching video information from YouTube...");
|
||||||
const info = await getVideoInfo(url);
|
const info = await getVideoInfo(url);
|
||||||
const safeTitle = sanitizeFilename(info.title);
|
const safeTitle = sanitizeFilename(info.title);
|
||||||
|
|
||||||
console.log(`Video: ${info.title}`);
|
console.log(`Video: ${info.title}`);
|
||||||
console.log(`Duration: ${formatTime(info.duration)}`);
|
console.log(`Duration: ${formatTime(info.duration)}`);
|
||||||
|
|
||||||
// Try to get most watched segments from heatmap data
|
// Check for heatmap data - this shows what was re-watched the most
|
||||||
const mostWatchedSegments = getMostWatchedSegments(info.heatmap || [], info.duration);
|
if (!info.heatmap || info.heatmap.length === 0) {
|
||||||
|
console.log("\nNo heatmap data available for this video.");
|
||||||
// If no heatmap data, fall back to chapters
|
console.log("The video may not have enough view data to determine most watched segments.");
|
||||||
if (mostWatchedSegments.length === 0 && info.chapters.length > 0) {
|
console.log("Downloading full video instead...");
|
||||||
console.log("\nNo most watched segments found. Falling back to chapters...");
|
|
||||||
const chapter = info.chapters[0]; // Download first chapter as most relevant
|
|
||||||
const outputPath = join(outputDir, `${safeTitle}_most_watched.%(ext)s`);
|
|
||||||
const section = `*${formatTime(chapter.start_time || 0)}-${formatTime(chapter.end_time || 60)}`;
|
|
||||||
|
|
||||||
console.log(`Downloading chapter: ${chapter.title || "First Chapter"}`);
|
|
||||||
await downloadSection(url, outputPath, section, format);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mostWatchedSegments.length === 0) {
|
|
||||||
console.log("No segments found. Downloading full video...");
|
|
||||||
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
|
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
|
||||||
await downloadSection(url, outputPath, "*", format);
|
await downloadSegment(url, outputPath, 0, info.duration, format);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Find the most watched segment (highest intensity)
|
||||||
|
const mostWatched = info.heatmap.reduce((max, current) => {
|
||||||
|
return current.intensity > max.intensity ? current : max;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`\nHeatmap data found: ${info.heatmap.length} segments`);
|
||||||
|
console.log(`Most watched segment intensity: ${(mostWatched.intensity * 100).toFixed(1)}%`);
|
||||||
|
console.log(`Segment: ${formatTime(mostWatched.start_seconds)} - ${formatTime(mostWatched.end_seconds)}`);
|
||||||
|
|
||||||
// Download the most watched segment
|
// Download the most watched segment
|
||||||
const topSegment = mostWatchedSegments[0];
|
|
||||||
const outputPath = join(outputDir, `${safeTitle}_most_watched.%(ext)s`);
|
const outputPath = join(outputDir, `${safeTitle}_most_watched.%(ext)s`);
|
||||||
const section = `*${formatTime(topSegment.start)}-${formatTime(topSegment.end)}`;
|
|
||||||
|
|
||||||
console.log(`\nMost watched segment: ${formatTime(topSegment.start)} - ${formatTime(topSegment.end)}`);
|
|
||||||
console.log(`Duration: ${formatTime(topSegment.end - topSegment.start)}`);
|
|
||||||
console.log(`Intensity: ${((topSegment.intensity || 0) * 100).toFixed(1)}%`);
|
|
||||||
console.log(`\nDownloading most watched segment...`);
|
console.log(`\nDownloading most watched segment...`);
|
||||||
|
await downloadSegment(
|
||||||
await downloadSection(url, outputPath, section, format);
|
url,
|
||||||
|
outputPath,
|
||||||
|
mostWatched.start_seconds,
|
||||||
|
mostWatched.end_seconds,
|
||||||
|
format
|
||||||
|
);
|
||||||
|
|
||||||
// Save segment info
|
// Save segment info
|
||||||
const segmentInfoPath = join(outputDir, `${safeTitle}_most_watched.txt`);
|
const segmentInfoPath = join(outputDir, `${safeTitle}_segment_info.txt`);
|
||||||
let segmentInfo = `# ${info.title}\n\n`;
|
const segmentInfo = `# ${info.title}\n\n` +
|
||||||
segmentInfo += `Most watched segment:\n`;
|
`Most watched segment (from YouTube heatmap):\n` +
|
||||||
segmentInfo += ` Start: ${formatTime(topSegment.start)}\n`;
|
` Start: ${formatTime(mostWatched.start_seconds)} (${mostWatched.start_seconds}s)\n` +
|
||||||
segmentInfo += ` End: ${formatTime(topSegment.end)}\n`;
|
` End: ${formatTime(mostWatched.end_seconds)} (${mostWatched.end_seconds}s)\n` +
|
||||||
segmentInfo += ` Duration: ${formatTime(topSegment.end - topSegment.start)}\n`;
|
` Duration: ${formatTime(mostWatched.end_seconds - mostWatched.start_seconds)}\n` +
|
||||||
segmentInfo += ` Intensity: ${((topSegment.intensity || 0) * 100).toFixed(1)}%\n`;
|
` Intensity: ${(mostWatched.intensity * 100).toFixed(1)}%\n\n` +
|
||||||
|
`Note: This segment had the highest re-watch rate according to YouTube's analytics.\n`;
|
||||||
if (mostWatchedSegments.length > 1) {
|
|
||||||
segmentInfo += `\nOther top segments:\n`;
|
|
||||||
for (let i = 1; i < mostWatchedSegments.length; i++) {
|
|
||||||
const seg = mostWatchedSegments[i];
|
|
||||||
segmentInfo += ` ${formatTime(seg.start)} - ${formatTime(seg.end)} (${((seg.intensity || 0) * 100).toFixed(1)}%)\n`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
writeFileSync(segmentInfoPath, segmentInfo);
|
writeFileSync(segmentInfoPath, segmentInfo);
|
||||||
console.log(`Segment info saved to: ${segmentInfoPath}`);
|
console.log(`\nSegment info saved to: ${segmentInfoPath}`);
|
||||||
|
console.log("Download complete!");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env bun
|
#!/usr/bin/env bun
|
||||||
|
|
||||||
import { downloadVideoSegments } from "./downloader.js";
|
import { downloadMostWatchedSegment } from "./downloader.js";
|
||||||
import { parseArgs } from "./args.js";
|
import { parseArgs } from "./args.js";
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
@@ -17,20 +17,17 @@ async function main() {
|
|||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`Downloading most watched segment(s) from: ${args.url}`);
|
console.log(`Downloading most watched segment from: ${args.url}`);
|
||||||
console.log(`Output directory: ${args.output}`);
|
console.log(`Output directory: ${args.output}`);
|
||||||
console.log(`Format: ${args.format}`);
|
console.log(`Format: ${args.format}`);
|
||||||
console.log(`Segments to download: ${args.segments}`);
|
|
||||||
console.log("");
|
console.log("");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await downloadVideoSegments({
|
await downloadMostWatchedSegment({
|
||||||
url: args.url,
|
url: args.url,
|
||||||
outputDir: args.output,
|
outputDir: args.output,
|
||||||
format: args.format,
|
format: args.format,
|
||||||
extractChapters: true,
|
|
||||||
});
|
});
|
||||||
console.log("\nDownload complete!");
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error:", error instanceof Error ? error.message : error);
|
console.error("Error:", error instanceof Error ? error.message : error);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
|
|||||||
Reference in New Issue
Block a user