/** * Import utilities for checking existing records and managing incremental imports */ import Comic from "../models/comic.model"; import path from "path"; /** * Get all imported file paths from MongoDB as a Set for O(1) lookup * @returns Set of normalized file paths */ export async function getImportedFilePaths(): Promise> { try { // Query only the rawFileDetails.filePath field for efficiency const comics = await Comic.find( { "rawFileDetails.filePath": { $exists: true, $ne: null } }, { "rawFileDetails.filePath": 1, _id: 0 } ).lean(); const filePaths = new Set(); for (const comic of comics) { if (comic.rawFileDetails?.filePath) { // Normalize the path to handle different path formats const normalizedPath = path.normalize(comic.rawFileDetails.filePath); filePaths.add(normalizedPath); } } console.log(`Found ${filePaths.size} imported files in database`); return filePaths; } catch (error) { console.error("Error fetching imported file paths:", error); throw error; } } /** * Get all imported file names (without extension) as a Set * @returns Set of file names for path-independent matching */ export async function getImportedFileNames(): Promise> { try { // Query only the rawFileDetails.name field for efficiency const comics = await Comic.find( { "rawFileDetails.name": { $exists: true, $ne: null } }, { "rawFileDetails.name": 1, _id: 0 } ).lean(); const fileNames = new Set(); for (const comic of comics) { if (comic.rawFileDetails?.name) { fileNames.add(comic.rawFileDetails.name); } } console.log(`Found ${fileNames.size} imported file names in database`); return fileNames; } catch (error) { console.error("Error fetching imported file names:", error); throw error; } } /** * Check if a file path exists in the database * @param filePath - Full file path to check * @returns true if file is imported */ export async function isFileImported(filePath: string): Promise { try { const normalizedPath = path.normalize(filePath); const exists = await Comic.exists({ "rawFileDetails.filePath": normalizedPath, }); return exists !== null; } catch (error) { console.error(`Error checking if file is imported: ${filePath}`, error); return false; } } /** * Check if a file name exists in the database * @param fileName - File name without extension * @returns true if file name is imported */ export async function isFileNameImported(fileName: string): Promise { try { const exists = await Comic.exists({ "rawFileDetails.name": fileName, }); return exists !== null; } catch (error) { console.error(`Error checking if file name is imported: ${fileName}`, error); return false; } } /** * Filter array to only new (unimported) files * @param files - Array of objects with path property * @param importedPaths - Set of imported paths * @returns Filtered array of new files */ export function filterNewFiles( files: T[], importedPaths: Set ): T[] { return files.filter((file) => { const normalizedPath = path.normalize(file.path); return !importedPaths.has(normalizedPath); }); } /** * Filter array to only new files by name * @param files - Array of objects with name property * @param importedNames - Set of imported names * @returns Filtered array of new files */ export function filterNewFilesByName( files: T[], importedNames: Set ): T[] { return files.filter((file) => !importedNames.has(file.name)); } /** * Compare local files against database to get import statistics * Uses batch queries for better performance with large libraries * @param localFilePaths - Array of local file paths * @returns Statistics object with counts and imported paths Set */ export async function getImportStatistics(localFilePaths: string[]): Promise<{ total: number; alreadyImported: number; newFiles: number; importedPaths: Set; }> { console.log(`[Import Stats] Checking ${localFilePaths.length} files against database...`); // Extract file names (without extension) from paths // This matches how comics are stored in the database (rawFileDetails.name) const fileNameToPath = new Map(); const fileNames: string[] = []; for (const filePath of localFilePaths) { const fileName = path.basename(filePath, path.extname(filePath)); fileNames.push(fileName); fileNameToPath.set(fileName, filePath); } console.log(`[Import Stats] Extracted ${fileNames.length} file names from paths`); // Query by file name (matches how comics are checked during import) const importedComics = await Comic.find( { "rawFileDetails.name": { $in: fileNames }, }, { "rawFileDetails.name": 1, "rawFileDetails.filePath": 1, _id: 0 } ).lean(); console.log(`[Import Stats] Found ${importedComics.length} matching comics in database`); // Build Set of imported paths based on name matching const importedPaths = new Set(); const importedNames = new Set(); for (const comic of importedComics) { if (comic.rawFileDetails?.name) { importedNames.add(comic.rawFileDetails.name); // Map back to the local file path const localPath = fileNameToPath.get(comic.rawFileDetails.name); if (localPath) { importedPaths.add(localPath); } } } const alreadyImported = importedPaths.size; const newFiles = localFilePaths.length - alreadyImported; console.log(`[Import Stats] Results: ${alreadyImported} already imported, ${newFiles} new files`); return { total: localFilePaths.length, alreadyImported, newFiles, importedPaths, }; } /** * Batch check multiple files in a single query (more efficient than individual checks) * @param filePaths - Array of file paths to check * @returns Map of filePath -> isImported boolean */ export async function batchCheckImported( filePaths: string[] ): Promise> { try { const normalizedPaths = filePaths.map((p) => path.normalize(p)); // Query all at once const importedComics = await Comic.find( { "rawFileDetails.filePath": { $in: normalizedPaths }, }, { "rawFileDetails.filePath": 1, _id: 0 } ).lean(); // Create a map of imported paths const importedSet = new Set( importedComics .map((c: any) => c.rawFileDetails?.filePath) .filter(Boolean) .map((p: string) => path.normalize(p)) ); // Build result map const resultMap = new Map(); for (let i = 0; i < filePaths.length; i++) { resultMap.set(filePaths[i], importedSet.has(normalizedPaths[i])); } return resultMap; } catch (error) { console.error("Error batch checking imported files:", error); throw error; } } /** * Find comics with files but missing canonical metadata * @returns Array of comic documents needing re-import */ export async function getComicsNeedingReimport(): Promise { try { // Find comics that have files but missing canonical metadata const comics = await Comic.find({ "rawFileDetails.filePath": { $exists: true, $ne: null }, $or: [ { canonicalMetadata: { $exists: false } }, { "canonicalMetadata.title": { $exists: false } }, { "canonicalMetadata.series": { $exists: false } }, ], }).lean(); console.log(`Found ${comics.length} comics needing re-import`); return comics; } catch (error) { console.error("Error finding comics needing re-import:", error); throw error; } } /** * Find files with same name but different paths * @returns Array of duplicates with name, paths, and count */ export async function findDuplicateFiles(): Promise< Array<{ name: string; paths: string[]; count: number }> > { try { const duplicates = await Comic.aggregate([ { $match: { "rawFileDetails.name": { $exists: true, $ne: null }, }, }, { $group: { _id: "$rawFileDetails.name", paths: { $push: "$rawFileDetails.filePath" }, count: { $sum: 1 }, }, }, { $match: { count: { $gt: 1 }, }, }, { $project: { _id: 0, name: "$_id", paths: 1, count: 1, }, }, { $sort: { count: -1 }, }, ]); console.log(`Found ${duplicates.length} duplicate file names`); return duplicates; } catch (error) { console.error("Error finding duplicate files:", error); throw error; } }