feat: cluster adjacent high-intensity segments
This commit is contained in:
@@ -7,6 +7,7 @@ export interface DownloadOptions {
|
||||
outputDir: string;
|
||||
format: string;
|
||||
topN: number;
|
||||
intensityThreshold: number;
|
||||
}
|
||||
|
||||
interface RawHeatmapSegment {
|
||||
@@ -23,6 +24,8 @@ interface ProcessedSegment {
|
||||
start: number;
|
||||
end: number;
|
||||
intensity: number;
|
||||
totalIntensity: number;
|
||||
segmentCount: number;
|
||||
}
|
||||
|
||||
interface VideoInfo {
|
||||
@@ -142,9 +145,91 @@ function getIntensity(segment: RawHeatmapSegment): number {
|
||||
return segment.intensity ?? segment.heat ?? segment.value ?? 0;
|
||||
}
|
||||
|
||||
function getTopSegmentsByIntensity(
|
||||
interface RawSegment {
|
||||
start: number;
|
||||
end: number;
|
||||
intensity: number;
|
||||
}
|
||||
|
||||
function clusterHighIntensitySegments(
|
||||
segments: RawSegment[],
|
||||
threshold: number
|
||||
): ProcessedSegment[] {
|
||||
// Filter to only high-intensity segments
|
||||
const highIntensity = segments.filter(seg => seg.intensity >= threshold);
|
||||
|
||||
if (highIntensity.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Sort by start time
|
||||
highIntensity.sort((a, b) => a.start - b.start);
|
||||
|
||||
// Cluster adjacent/overlapping segments
|
||||
const clusters: ProcessedSegment[] = [];
|
||||
let currentCluster: RawSegment | null = null;
|
||||
|
||||
for (const seg of highIntensity) {
|
||||
if (!currentCluster) {
|
||||
currentCluster = seg;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this segment is adjacent or overlapping with current cluster
|
||||
// Adjacent means: seg.start is within a small gap of currentCluster.end
|
||||
const gap = seg.start - currentCluster.end;
|
||||
const maxGap = 10; // Allow up to 10 second gap
|
||||
|
||||
if (gap <= maxGap && gap >= -1) { // -1 allows for small overlaps
|
||||
// Merge into current cluster
|
||||
currentCluster.end = Math.max(currentCluster.end, seg.end);
|
||||
} else {
|
||||
// Finalize current cluster and start new one
|
||||
const totalIntensity = highIntensity
|
||||
.filter(s => s.start >= currentCluster!.start && s.end <= currentCluster!.end)
|
||||
.reduce((sum, s) => sum + s.intensity, 0);
|
||||
|
||||
clusters.push({
|
||||
start: currentCluster.start,
|
||||
end: currentCluster.end,
|
||||
intensity: currentCluster.intensity,
|
||||
totalIntensity,
|
||||
segmentCount: highIntensity.filter(s =>
|
||||
s.start >= currentCluster!.start && s.end <= currentCluster!.end
|
||||
).length,
|
||||
});
|
||||
|
||||
currentCluster = seg;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't forget the last cluster
|
||||
if (currentCluster) {
|
||||
const totalIntensity = highIntensity
|
||||
.filter(s => s.start >= currentCluster.start && s.end <= currentCluster.end)
|
||||
.reduce((sum, s) => sum + s.intensity, 0);
|
||||
|
||||
clusters.push({
|
||||
start: currentCluster.start,
|
||||
end: currentCluster.end,
|
||||
intensity: currentCluster.intensity,
|
||||
totalIntensity,
|
||||
segmentCount: highIntensity.filter(s =>
|
||||
s.start >= currentCluster.start && s.end <= currentCluster.end
|
||||
).length,
|
||||
});
|
||||
}
|
||||
|
||||
// Sort clusters by total intensity (highest first)
|
||||
clusters.sort((a, b) => b.totalIntensity - a.totalIntensity);
|
||||
|
||||
return clusters;
|
||||
}
|
||||
|
||||
function getTopSegments(
|
||||
segments: RawHeatmapSegment[],
|
||||
topN: number
|
||||
topN: number,
|
||||
threshold: number
|
||||
): ProcessedSegment[] {
|
||||
// Convert to processed format and filter valid segments
|
||||
const validSegments = segments
|
||||
@@ -165,13 +250,14 @@ function getTopSegmentsByIntensity(
|
||||
return [];
|
||||
}
|
||||
|
||||
// Sort by raw intensity (highest first) - this matches visual "bumps" in heatmap
|
||||
validSegments.sort((a, b) => b.intensity - a.intensity);
|
||||
return validSegments.slice(0, topN);
|
||||
// Cluster high-intensity segments
|
||||
const clusters = clusterHighIntensitySegments(validSegments, threshold);
|
||||
|
||||
return clusters.slice(0, topN);
|
||||
}
|
||||
|
||||
export async function downloadMostWatchedSegment(options: DownloadOptions): Promise<void> {
|
||||
const { url, outputDir, format, topN } = options;
|
||||
const { url, outputDir, format, topN, intensityThreshold } = options;
|
||||
|
||||
// Create output directory if it doesn't exist
|
||||
if (!existsSync(outputDir)) {
|
||||
@@ -197,13 +283,14 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom
|
||||
}
|
||||
|
||||
console.log(`\nHeatmap data found: ${info.heatmap.length} segments`);
|
||||
console.log(`\nTop ${topN} segments by intensity (visual heatmap bumps):\n`);
|
||||
console.log(`Intensity threshold: ${(intensityThreshold * 100).toFixed(0)}%`);
|
||||
console.log(`\nTop ${topN} high-intensity regions (clustered segments):\n`);
|
||||
|
||||
// Get top segments by raw intensity
|
||||
const topSegments = getTopSegmentsByIntensity(info.heatmap, topN);
|
||||
// Get top clustered segments
|
||||
const topSegments = getTopSegments(info.heatmap, topN, intensityThreshold);
|
||||
|
||||
if (topSegments.length === 0) {
|
||||
console.log("No valid segments found. Downloading full video...");
|
||||
console.log("No high-intensity regions found. Downloading full video...");
|
||||
const outputPath = join(outputDir, `${safeTitle}.%(ext)s`);
|
||||
await downloadSegment(url, outputPath, 0, info.duration, format);
|
||||
return;
|
||||
@@ -213,7 +300,7 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom
|
||||
for (let i = 0; i < topSegments.length; i++) {
|
||||
const seg = topSegments[i];
|
||||
const duration = seg.end - seg.start;
|
||||
console.log(`${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)} | Duration: ${formatTime(duration)} | Intensity: ${(seg.intensity * 100).toFixed(1)}%`);
|
||||
console.log(`${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)} | Duration: ${formatTime(duration)} | Total Intensity: ${seg.totalIntensity.toFixed(2)}`);
|
||||
}
|
||||
|
||||
console.log("");
|
||||
@@ -228,14 +315,15 @@ export async function downloadMostWatchedSegment(options: DownloadOptions): Prom
|
||||
// Save segment info
|
||||
const segmentInfoPath = join(outputDir, `${safeTitle}_top_segments.txt`);
|
||||
let segmentInfo = `# ${info.title}\n\n`;
|
||||
segmentInfo += `Top ${topN} segments by intensity (highest re-watch rate):\n\n`;
|
||||
segmentInfo += `Top ${topN} high-intensity regions (intensity >= ${(intensityThreshold * 100).toFixed(0)}%):\n\n`;
|
||||
|
||||
for (let i = 0; i < topSegments.length; i++) {
|
||||
const seg = topSegments[i];
|
||||
const duration = seg.end - seg.start;
|
||||
segmentInfo += `${i + 1}. ${formatTime(seg.start)} - ${formatTime(seg.end)}\n`;
|
||||
segmentInfo += ` Duration: ${formatTime(duration)}\n`;
|
||||
segmentInfo += ` Intensity: ${(seg.intensity * 100).toFixed(1)}%\n\n`;
|
||||
segmentInfo += ` Total Intensity: ${seg.totalIntensity.toFixed(2)}\n`;
|
||||
segmentInfo += ` Segments: ${seg.segmentCount}\n\n`;
|
||||
}
|
||||
|
||||
writeFileSync(segmentInfoPath, segmentInfo);
|
||||
|
||||
Reference in New Issue
Block a user