fix: properly extract YouTube heatmap data for most watched segments

This commit is contained in:
Kilo Code Cloud
2026-01-14 19:42:45 +00:00
parent 1366a4f9ba
commit 128a81722c
2 changed files with 56 additions and 83 deletions

View File

@@ -6,20 +6,18 @@ export interface DownloadOptions {
url: string; url: string;
outputDir: string; outputDir: string;
format: string; format: string;
extractChapters: boolean;
} }
interface MostWatchedSegment { interface HeatmapSegment {
start: number; start_seconds: number;
end: number; end_seconds: number;
intensity?: number; intensity: number;
} }
interface VideoInfo { interface VideoInfo {
title: string; title: string;
chapters: Array<{ title: string; start_time: number; end_time: number }>;
heatmap?: Array<{ start_seconds: number; end_seconds: number; intensity: number }>;
duration: number; duration: number;
heatmap?: HeatmapSegment[];
} }
async function getVideoInfo(url: string): Promise<VideoInfo> { async function getVideoInfo(url: string): Promise<VideoInfo> {
@@ -27,6 +25,8 @@ async function getVideoInfo(url: string): Promise<VideoInfo> {
const ytDlp = spawn("yt-dlp", [ const ytDlp = spawn("yt-dlp", [
"--dump-json", "--dump-json",
"--no-download", "--no-download",
"--compat-option",
"no-youtube-channel-redirect",
url, url,
]); ]);
@@ -50,11 +50,14 @@ async function getVideoInfo(url: string): Promise<VideoInfo> {
try { try {
const info = JSON.parse(stdout); const info = JSON.parse(stdout);
// Extract heatmap data from YouTube's internal API
// The heatmap shows what segments were re-watched the most
const heatmapData = info.heatmap;
resolve({ resolve({
title: info.title || "video", title: info.title || "video",
chapters: info.chapters || [],
heatmap: info.heatmap || [],
duration: info.duration || 0, duration: info.duration || 0,
heatmap: heatmapData,
}); });
} catch (parseError) { } catch (parseError) {
reject(new Error(`Failed to parse video info: ${parseError}`)); reject(new Error(`Failed to parse video info: ${parseError}`));
@@ -67,12 +70,15 @@ async function getVideoInfo(url: string): Promise<VideoInfo> {
}); });
} }
async function downloadSection( async function downloadSegment(
url: string, url: string,
outputPath: string, outputPath: string,
section: string, startTime: number,
endTime: number,
format: string format: string
): Promise<void> { ): Promise<void> {
const section = `*${startTime.toFixed(3)}-${endTime.toFixed(3)}`;
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const ytDlp = spawn("yt-dlp", [ const ytDlp = spawn("yt-dlp", [
"-f", format, "-f", format,
@@ -114,29 +120,7 @@ function formatTime(seconds: number): string {
return `${mins}:${secs.toString().padStart(2, "0")}`; return `${mins}:${secs.toString().padStart(2, "0")}`;
} }
function getMostWatchedSegments( export async function downloadMostWatchedSegment(options: DownloadOptions): Promise<void> {
heatmap: Array<{ start_seconds: number; end_seconds: number; intensity: number }>,
duration: number,
topN: number = 1
): MostWatchedSegment[] {
if (!heatmap || heatmap.length === 0) {
return [];
}
// Sort by intensity (most watched first)
const sorted = [...heatmap].sort((a, b) => b.intensity - a.intensity);
// Get top N segments
const topSegments = sorted.slice(0, topN);
return topSegments.map((segment) => ({
start: segment.start_seconds,
end: segment.end_seconds,
intensity: segment.intensity,
}));
}
export async function downloadVideoSegments(options: DownloadOptions): Promise<void> {
const { url, outputDir, format } = options; const { url, outputDir, format } = options;
// Create output directory if it doesn't exist // Create output directory if it doesn't exist
@@ -144,65 +128,57 @@ export async function downloadVideoSegments(options: DownloadOptions): Promise<v
mkdirSync(outputDir, { recursive: true }); mkdirSync(outputDir, { recursive: true });
} }
// Get video info including most watched segments from YouTube API // Get video info with heatmap data from YouTube
console.log("Fetching video information from YouTube API..."); console.log("Fetching video information from YouTube...");
const info = await getVideoInfo(url); const info = await getVideoInfo(url);
const safeTitle = sanitizeFilename(info.title); const safeTitle = sanitizeFilename(info.title);
console.log(`Video: ${info.title}`); console.log(`Video: ${info.title}`);
console.log(`Duration: ${formatTime(info.duration)}`); console.log(`Duration: ${formatTime(info.duration)}`);
// Try to get most watched segments from heatmap data // Check for heatmap data - this shows what was re-watched the most
const mostWatchedSegments = getMostWatchedSegments(info.heatmap || [], info.duration); if (!info.heatmap || info.heatmap.length === 0) {
console.log("\nNo heatmap data available for this video.");
console.log("The video may not have enough view data to determine most watched segments.");
console.log("Downloading full video instead...");
// If no heatmap data, fall back to chapters
if (mostWatchedSegments.length === 0 && info.chapters.length > 0) {
console.log("\nNo most watched segments found. Falling back to chapters...");
const chapter = info.chapters[0]; // Download first chapter as most relevant
const outputPath = join(outputDir, `${safeTitle}_most_watched.%(ext)s`);
const section = `*${formatTime(chapter.start_time || 0)}-${formatTime(chapter.end_time || 60)}`;
console.log(`Downloading chapter: ${chapter.title || "First Chapter"}`);
await downloadSection(url, outputPath, section, format);
return;
}
if (mostWatchedSegments.length === 0) {
console.log("No segments found. Downloading full video...");
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`); const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
await downloadSection(url, outputPath, "*", format); await downloadSegment(url, outputPath, 0, info.duration, format);
return; return;
} }
// Find the most watched segment (highest intensity)
const mostWatched = info.heatmap.reduce((max, current) => {
return current.intensity > max.intensity ? current : max;
});
console.log(`\nHeatmap data found: ${info.heatmap.length} segments`);
console.log(`Most watched segment intensity: ${(mostWatched.intensity * 100).toFixed(1)}%`);
console.log(`Segment: ${formatTime(mostWatched.start_seconds)} - ${formatTime(mostWatched.end_seconds)}`);
// Download the most watched segment // Download the most watched segment
const topSegment = mostWatchedSegments[0];
const outputPath = join(outputDir, `${safeTitle}_most_watched.%(ext)s`); const outputPath = join(outputDir, `${safeTitle}_most_watched.%(ext)s`);
const section = `*${formatTime(topSegment.start)}-${formatTime(topSegment.end)}`;
console.log(`\nMost watched segment: ${formatTime(topSegment.start)} - ${formatTime(topSegment.end)}`);
console.log(`Duration: ${formatTime(topSegment.end - topSegment.start)}`);
console.log(`Intensity: ${((topSegment.intensity || 0) * 100).toFixed(1)}%`);
console.log(`\nDownloading most watched segment...`); console.log(`\nDownloading most watched segment...`);
await downloadSegment(
await downloadSection(url, outputPath, section, format); url,
outputPath,
mostWatched.start_seconds,
mostWatched.end_seconds,
format
);
// Save segment info // Save segment info
const segmentInfoPath = join(outputDir, `${safeTitle}_most_watched.txt`); const segmentInfoPath = join(outputDir, `${safeTitle}_segment_info.txt`);
let segmentInfo = `# ${info.title}\n\n`; const segmentInfo = `# ${info.title}\n\n` +
segmentInfo += `Most watched segment:\n`; `Most watched segment (from YouTube heatmap):\n` +
segmentInfo += ` Start: ${formatTime(topSegment.start)}\n`; ` Start: ${formatTime(mostWatched.start_seconds)} (${mostWatched.start_seconds}s)\n` +
segmentInfo += ` End: ${formatTime(topSegment.end)}\n`; ` End: ${formatTime(mostWatched.end_seconds)} (${mostWatched.end_seconds}s)\n` +
segmentInfo += ` Duration: ${formatTime(topSegment.end - topSegment.start)}\n`; ` Duration: ${formatTime(mostWatched.end_seconds - mostWatched.start_seconds)}\n` +
segmentInfo += ` Intensity: ${((topSegment.intensity || 0) * 100).toFixed(1)}%\n`; ` Intensity: ${(mostWatched.intensity * 100).toFixed(1)}%\n\n` +
`Note: This segment had the highest re-watch rate according to YouTube's analytics.\n`;
if (mostWatchedSegments.length > 1) {
segmentInfo += `\nOther top segments:\n`;
for (let i = 1; i < mostWatchedSegments.length; i++) {
const seg = mostWatchedSegments[i];
segmentInfo += ` ${formatTime(seg.start)} - ${formatTime(seg.end)} (${((seg.intensity || 0) * 100).toFixed(1)}%)\n`;
}
}
writeFileSync(segmentInfoPath, segmentInfo); writeFileSync(segmentInfoPath, segmentInfo);
console.log(`Segment info saved to: ${segmentInfoPath}`); console.log(`\nSegment info saved to: ${segmentInfoPath}`);
console.log("Download complete!");
} }

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env bun #!/usr/bin/env bun
import { downloadVideoSegments } from "./downloader.js"; import { downloadMostWatchedSegment } from "./downloader.js";
import { parseArgs } from "./args.js"; import { parseArgs } from "./args.js";
async function main() { async function main() {
@@ -17,20 +17,17 @@ async function main() {
process.exit(1); process.exit(1);
} }
console.log(`Downloading most watched segment(s) from: ${args.url}`); console.log(`Downloading most watched segment from: ${args.url}`);
console.log(`Output directory: ${args.output}`); console.log(`Output directory: ${args.output}`);
console.log(`Format: ${args.format}`); console.log(`Format: ${args.format}`);
console.log(`Segments to download: ${args.segments}`);
console.log(""); console.log("");
try { try {
await downloadVideoSegments({ await downloadMostWatchedSegment({
url: args.url, url: args.url,
outputDir: args.output, outputDir: args.output,
format: args.format, format: args.format,
extractChapters: true,
}); });
console.log("\nDownload complete!");
} catch (error) { } catch (error) {
console.error("Error:", error instanceof Error ? error.message : error); console.error("Error:", error instanceof Error ? error.message : error);
process.exit(1); process.exit(1);