Files
ytdlp-segment-downloader/src/cli/downloader.ts

333 lines
9.3 KiB
TypeScript
Raw Normal View History

import { spawn } from "child_process";
import { writeFileSync, mkdirSync, existsSync } from "fs";
import { join } from "path";
export interface DownloadOptions {
url: string;
outputDir: string;
format: string;
topN: number;
intensityThreshold: number;
}
interface RawHeatmapSegment {
start_seconds?: number;
start_time?: number;
end_seconds?: number;
end_time?: number;
intensity?: number;
heat?: number;
value?: number;
}
interface ProcessedSegment {
start: number;
end: number;
intensity: number;
totalIntensity: number;
segmentCount: number;
}
interface VideoInfo {
title: string;
duration: number;
heatmap?: RawHeatmapSegment[];
}
async function getVideoInfo(url: string): Promise<VideoInfo> {
return new Promise((resolve, reject) => {
const ytDlp = spawn("yt-dlp", [
"--dump-json",
"--no-download",
"--compat-option",
"no-youtube-channel-redirect",
url,
]);
let stdout = "";
let stderr = "";
ytDlp.stdout.on("data", (data) => {
stdout += data.toString();
});
ytDlp.stderr.on("data", (data) => {
stderr += data.toString();
});
ytDlp.on("close", (code) => {
if (code !== 0) {
reject(new Error(`yt-dlp failed: ${stderr}`));
return;
}
try {
const info = JSON.parse(stdout);
resolve({
title: info.title || "video",
duration: info.duration || 0,
heatmap: info.heatmap,
});
} catch (parseError) {
reject(new Error(`Failed to parse video info: ${parseError}`));
}
});
ytDlp.on("error", (err) => {
reject(new Error(`Failed to run yt-dlp: ${err.message}`));
});
});
}
async function downloadSegment(
url: string,
outputPath: string,
startTime: number,
endTime: number,
format: string
): Promise<void> {
const section = `*${startTime.toFixed(3)}-${endTime.toFixed(3)}`;
return new Promise((resolve, reject) => {
const ytDlp = spawn("yt-dlp", [
"-f", format,
"--download-sections", section,
"-o", outputPath,
url,
]);
let stderr = "";
ytDlp.stderr.on("data", (data) => {
stderr += data.toString();
});
ytDlp.on("close", (code) => {
if (code !== 0) {
reject(new Error(`yt-dlp failed: ${stderr}`));
return;
}
resolve();
});
ytDlp.on("error", (err) => {
reject(new Error(`Failed to run yt-dlp: ${err.message}`));
});
});
}
function sanitizeFilename(filename: string): string {
return filename
.replace(/[^a-zA-Z0-9\s\-_]/g, "")
.replace(/\s+/g, "_")
.substring(0, 100);
}
function formatTime(seconds: number): string {
if (!Number.isFinite(seconds) || seconds < 0) {
return "0:00";
}
const mins = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60);
return `${mins}:${secs.toString().padStart(2, "0")}`;
}
function getStartTime(segment: RawHeatmapSegment): number {
return segment.start_seconds ?? segment.start_time ?? 0;
}
function getEndTime(segment: RawHeatmapSegment): number {
return segment.end_seconds ?? segment.end_time ?? 0;
}
function getIntensity(segment: RawHeatmapSegment): number {
return segment.intensity ?? segment.heat ?? segment.value ?? 0;
}
interface RawSegment {
start: number;
end: number;
intensity: number;
}
function clusterHighIntensitySegments(
segments: RawSegment[],
threshold: number
): ProcessedSegment[] {
// Filter to only high-intensity segments
const highIntensity = segments.filter(seg => seg.intensity >= threshold);
if (highIntensity.length === 0) {
return [];
}
// Sort by start time
highIntensity.sort((a, b) => a.start - b.start);
// Cluster adjacent/overlapping segments
const clusters: ProcessedSegment[] = [];
let currentCluster: RawSegment | null = null;
for (const seg of highIntensity) {
if (!currentCluster) {
currentCluster = seg;
continue;
}
// Check if this segment is adjacent or overlapping with current cluster
// Adjacent means: seg.start is within a small gap of currentCluster.end
const gap = seg.start - currentCluster.end;
const maxGap = 10; // Allow up to 10 second gap
if (gap <= maxGap && gap >= -1) { // -1 allows for small overlaps
// Merge into current cluster
currentCluster.end = Math.max(currentCluster.end, seg.end);
} else {
// Finalize current cluster and start new one
const totalIntensity = highIntensity
.filter(s => s.start >= currentCluster!.start && s.end <= currentCluster!.end)
.reduce((sum, s) => sum + s.intensity, 0);
clusters.push({
start: currentCluster.start,
end: currentCluster.end,
intensity: currentCluster.intensity,
totalIntensity,
segmentCount: highIntensity.filter(s =>
s.start >= currentCluster!.start && s.end <= currentCluster!.end
).length,
});
currentCluster = seg;
}
}
// Don't forget the last cluster
if (currentCluster) {
const totalIntensity = highIntensity
.filter(s => s.start >= currentCluster.start && s.end <= currentCluster.end)
.reduce((sum, s) => sum + s.intensity, 0);
clusters.push({
start: currentCluster.start,
end: currentCluster.end,
intensity: currentCluster.intensity,
totalIntensity,
segmentCount: highIntensity.filter(s =>
s.start >= currentCluster.start && s.end <= currentCluster.end
).length,
});
}
// Sort clusters by total intensity (highest first)
clusters.sort((a, b) => b.totalIntensity - a.totalIntensity);
return clusters;
}
function getTopSegments(
segments: RawHeatmapSegment[],
topN: number,
threshold: number
): ProcessedSegment[] {
// Convert to processed format and filter valid segments
const validSegments = segments
.map(seg => ({
start: getStartTime(seg),
end: getEndTime(seg),
intensity: getIntensity(seg),
}))
.filter(seg =>
Number.isFinite(seg.start) &&
Number.isFinite(seg.end) &&
Number.isFinite(seg.intensity) &&
seg.start >= 0 &&
seg.end > seg.start
);
if (validSegments.length === 0) {
return [];
}
// Cluster high-intensity segments
const clusters = clusterHighIntensitySegments(validSegments, threshold);
return clusters.slice(0, topN);
}
export async function downloadMostWatchedSegment(options: DownloadOptions): Promise<void> {
const { url, outputDir, format, topN, intensityThreshold } = options;
// Create output directory if it doesn't exist
if (!existsSync(outputDir)) {
mkdirSync(outputDir, { recursive: true });
}
// Get video info with heatmap data from YouTube
console.log("Fetching video information from YouTube...");
const info = await getVideoInfo(url);
const safeTitle = sanitizeFilename(info.title);
console.log(`Video: ${info.title}`);
console.log(`Duration: ${formatTime(info.duration)}`);
// Check for heatmap data
if (!info.heatmap || info.heatmap.length === 0) {
console.log("\nNo heatmap data available for this video.");
console.log("Downloading full video instead...");
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
await downloadSegment(url, outputPath, 0, info.duration, format);
return;
}
console.log(`\nHeatmap data found: ${info.heatmap.length} segments`);
console.log(`Intensity threshold: ${(intensityThreshold * 100).toFixed(0)}%`);
console.log(`\nTop ${topN} high-intensity regions (clustered segments):\n`);
// Get top clustered segments
const topSegments = getTopSegments(info.heatmap, topN, intensityThreshold);
if (topSegments.length === 0) {
console.log("No high-intensity regions found. Downloading full video...");
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
await downloadSegment(url, outputPath, 0, info.duration, format);
return;
}
// Output the top segments
for (let i = 0; i < topSegments.length; i++) {
const seg = topSegments[i];
const duration = seg.end - seg.start;
console.log(`${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)} | Duration: ${formatTime(duration)} | Total Intensity: ${seg.totalIntensity.toFixed(2)}`);
}
console.log("");
// Download the top segment
const topSegment = topSegments[0];
const outputPath = join(outputDir, `${safeTitle}_most_watched.%(ext)s`);
console.log(`Downloading segment: ${formatTime(topSegment.start)} - ${formatTime(topSegment.end)}`);
await downloadSegment(url, outputPath, topSegment.start, topSegment.end, format);
// Save segment info
const segmentInfoPath = join(outputDir, `${safeTitle}_top_segments.txt`);
let segmentInfo = `# ${info.title}\n\n`;
segmentInfo += `Top ${topN} high-intensity regions (intensity >= ${(intensityThreshold * 100).toFixed(0)}%):\n\n`;
for (let i = 0; i < topSegments.length; i++) {
const seg = topSegments[i];
const duration = seg.end - seg.start;
segmentInfo += `${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)}\n`;
segmentInfo += ` Duration: ${formatTime(duration)}\n`;
segmentInfo += ` Total Intensity: ${seg.totalIntensity.toFixed(2)}\n`;
segmentInfo += ` Segments: ${seg.segmentCount}\n\n`;
}
writeFileSync(segmentInfoPath, segmentInfo);
console.log(`\nSegment info saved to: ${segmentInfoPath}`);
console.log("Download complete!");
}