🔨 Fixes to import

This commit is contained in:
2026-02-26 23:56:39 -05:00
parent f7804ee3f0
commit a1fa12f181
5 changed files with 699 additions and 13 deletions

286
utils/import.utils.ts Normal file
View File

@@ -0,0 +1,286 @@
/**
* Import utilities for checking existing records and managing incremental imports
*/
import Comic from "../models/comic.model";
import path from "path";
/**
* Get all imported file paths from MongoDB as a Set for O(1) lookup
* @returns Set of normalized file paths
*/
export async function getImportedFilePaths(): Promise<Set<string>> {
try {
// Query only the rawFileDetails.filePath field for efficiency
const comics = await Comic.find(
{ "rawFileDetails.filePath": { $exists: true, $ne: null } },
{ "rawFileDetails.filePath": 1, _id: 0 }
).lean();
const filePaths = new Set<string>();
for (const comic of comics) {
if (comic.rawFileDetails?.filePath) {
// Normalize the path to handle different path formats
const normalizedPath = path.normalize(comic.rawFileDetails.filePath);
filePaths.add(normalizedPath);
}
}
console.log(`Found ${filePaths.size} imported files in database`);
return filePaths;
} catch (error) {
console.error("Error fetching imported file paths:", error);
throw error;
}
}
/**
* Get all imported file names (without extension) as a Set
* @returns Set of file names for path-independent matching
*/
export async function getImportedFileNames(): Promise<Set<string>> {
try {
// Query only the rawFileDetails.name field for efficiency
const comics = await Comic.find(
{ "rawFileDetails.name": { $exists: true, $ne: null } },
{ "rawFileDetails.name": 1, _id: 0 }
).lean();
const fileNames = new Set<string>();
for (const comic of comics) {
if (comic.rawFileDetails?.name) {
fileNames.add(comic.rawFileDetails.name);
}
}
console.log(`Found ${fileNames.size} imported file names in database`);
return fileNames;
} catch (error) {
console.error("Error fetching imported file names:", error);
throw error;
}
}
/**
* Check if a file path exists in the database
* @param filePath - Full file path to check
* @returns true if file is imported
*/
export async function isFileImported(filePath: string): Promise<boolean> {
try {
const normalizedPath = path.normalize(filePath);
const exists = await Comic.exists({
"rawFileDetails.filePath": normalizedPath,
});
return exists !== null;
} catch (error) {
console.error(`Error checking if file is imported: ${filePath}`, error);
return false;
}
}
/**
* Check if a file name exists in the database
* @param fileName - File name without extension
* @returns true if file name is imported
*/
export async function isFileNameImported(fileName: string): Promise<boolean> {
try {
const exists = await Comic.exists({
"rawFileDetails.name": fileName,
});
return exists !== null;
} catch (error) {
console.error(`Error checking if file name is imported: ${fileName}`, error);
return false;
}
}
/**
* Filter array to only new (unimported) files
* @param files - Array of objects with path property
* @param importedPaths - Set of imported paths
* @returns Filtered array of new files
*/
export function filterNewFiles<T extends { path: string }>(
files: T[],
importedPaths: Set<string>
): T[] {
return files.filter((file) => {
const normalizedPath = path.normalize(file.path);
return !importedPaths.has(normalizedPath);
});
}
/**
* Filter array to only new files by name
* @param files - Array of objects with name property
* @param importedNames - Set of imported names
* @returns Filtered array of new files
*/
export function filterNewFilesByName<T extends { name: string }>(
files: T[],
importedNames: Set<string>
): T[] {
return files.filter((file) => !importedNames.has(file.name));
}
/**
* Compare local files against database to get import statistics
* Uses batch queries for better performance with large libraries
* @param localFilePaths - Array of local file paths
* @returns Statistics object with counts and imported paths Set
*/
export async function getImportStatistics(localFilePaths: string[]): Promise<{
total: number;
alreadyImported: number;
newFiles: number;
importedPaths: Set<string>;
}> {
console.log(`[Import Stats] Checking ${localFilePaths.length} files against database...`);
// Normalize all paths upfront
const normalizedPaths = localFilePaths.map((p) => path.normalize(p));
// Use batch query instead of fetching all comics
// This is much faster for large libraries
const importedComics = await Comic.find(
{
"rawFileDetails.filePath": { $in: normalizedPaths },
},
{ "rawFileDetails.filePath": 1, _id: 0 }
).lean();
// Build Set of imported paths
const importedPaths = new Set<string>(
importedComics
.map((c: any) => c.rawFileDetails?.filePath)
.filter(Boolean)
.map((p: string) => path.normalize(p))
);
const alreadyImported = importedPaths.size;
const newFiles = localFilePaths.length - alreadyImported;
console.log(`[Import Stats] Results: ${alreadyImported} already imported, ${newFiles} new files`);
return {
total: localFilePaths.length,
alreadyImported,
newFiles,
importedPaths,
};
}
/**
* Batch check multiple files in a single query (more efficient than individual checks)
* @param filePaths - Array of file paths to check
* @returns Map of filePath -> isImported boolean
*/
export async function batchCheckImported(
filePaths: string[]
): Promise<Map<string, boolean>> {
try {
const normalizedPaths = filePaths.map((p) => path.normalize(p));
// Query all at once
const importedComics = await Comic.find(
{
"rawFileDetails.filePath": { $in: normalizedPaths },
},
{ "rawFileDetails.filePath": 1, _id: 0 }
).lean();
// Create a map of imported paths
const importedSet = new Set(
importedComics
.map((c: any) => c.rawFileDetails?.filePath)
.filter(Boolean)
.map((p: string) => path.normalize(p))
);
// Build result map
const resultMap = new Map<string, boolean>();
for (let i = 0; i < filePaths.length; i++) {
resultMap.set(filePaths[i], importedSet.has(normalizedPaths[i]));
}
return resultMap;
} catch (error) {
console.error("Error batch checking imported files:", error);
throw error;
}
}
/**
* Find comics with files but missing canonical metadata
* @returns Array of comic documents needing re-import
*/
export async function getComicsNeedingReimport(): Promise<any[]> {
try {
// Find comics that have files but missing canonical metadata
const comics = await Comic.find({
"rawFileDetails.filePath": { $exists: true, $ne: null },
$or: [
{ canonicalMetadata: { $exists: false } },
{ "canonicalMetadata.title": { $exists: false } },
{ "canonicalMetadata.series": { $exists: false } },
],
}).lean();
console.log(`Found ${comics.length} comics needing re-import`);
return comics;
} catch (error) {
console.error("Error finding comics needing re-import:", error);
throw error;
}
}
/**
* Find files with same name but different paths
* @returns Array of duplicates with name, paths, and count
*/
export async function findDuplicateFiles(): Promise<
Array<{ name: string; paths: string[]; count: number }>
> {
try {
const duplicates = await Comic.aggregate([
{
$match: {
"rawFileDetails.name": { $exists: true, $ne: null },
},
},
{
$group: {
_id: "$rawFileDetails.name",
paths: { $push: "$rawFileDetails.filePath" },
count: { $sum: 1 },
},
},
{
$match: {
count: { $gt: 1 },
},
},
{
$project: {
_id: 0,
name: "$_id",
paths: 1,
count: 1,
},
},
{
$sort: { count: -1 },
},
]);
console.log(`Found ${duplicates.length} duplicate file names`);
return duplicates;
} catch (error) {
console.error("Error finding duplicate files:", error);
throw error;
}
}