diff --git a/examples/incremental-import.example.ts b/examples/incremental-import.example.ts new file mode 100644 index 0000000..d222f54 --- /dev/null +++ b/examples/incremental-import.example.ts @@ -0,0 +1,347 @@ +/** + * Example: Incremental Import + * + * This example demonstrates how to use the incremental import feature + * to import only new files that haven't been previously imported. + */ + +import { ServiceBroker } from "moleculer"; +import { + getImportedFilePaths, + getImportedFileNames, + getImportStatistics, + batchCheckImported, + getComicsNeedingReimport, + findDuplicateFiles, +} from "../utils/import.utils"; + +/** + * Example 1: Basic Incremental Import + * Import only new files from your comics directory + */ +async function example1_basicIncrementalImport(broker: ServiceBroker) { + console.log("\n=== Example 1: Basic Incremental Import ===\n"); + + try { + // Call the incremental import endpoint + const result: any = await broker.call("library.incrementalImport", { + sessionId: "incremental-session-" + Date.now(), + }); + + console.log("Import Result:"); + console.log(` Success: ${result.success}`); + console.log(` Message: ${result.message}`); + console.log("\nStatistics:"); + console.log(` Total files found: ${result.stats.total}`); + console.log(` Already imported: ${result.stats.alreadyImported}`); + console.log(` New files: ${result.stats.newFiles}`); + console.log(` Queued for import: ${result.stats.queued}`); + + return result; + } catch (error) { + console.error("Error during incremental import:", error); + throw error; + } +} + +/** + * Example 2: Get Import Statistics + * Check how many files are imported vs. new without starting an import + */ +async function example2_getImportStatistics(broker: ServiceBroker) { + console.log("\n=== Example 2: Get Import Statistics ===\n"); + + try { + const result: any = await broker.call("library.getImportStatistics", { + // Optional: specify a custom directory path + // directoryPath: "/path/to/comics" + }); + + console.log("Import Statistics:"); + console.log(` Directory: ${result.directory}`); + console.log(` Total local files: ${result.stats.totalLocalFiles}`); + console.log(` Already imported: ${result.stats.alreadyImported}`); + console.log(` New files to import: ${result.stats.newFiles}`); + console.log(` Percentage imported: ${result.stats.percentageImported}`); + + return result; + } catch (error) { + console.error("Error getting import statistics:", error); + throw error; + } +} + +/** + * Example 3: Check Specific Files + * Check if specific files are already imported + */ +async function example3_checkSpecificFiles() { + console.log("\n=== Example 3: Check Specific Files ===\n"); + + const filesToCheck = [ + "/comics/batman-001.cbz", + "/comics/superman-001.cbz", + "/comics/wonder-woman-001.cbz", + ]; + + try { + const results = await batchCheckImported(filesToCheck); + + console.log("File Import Status:"); + results.forEach((isImported, filePath) => { + console.log(` ${filePath}: ${isImported ? "✓ Imported" : "✗ Not imported"}`); + }); + + return results; + } catch (error) { + console.error("Error checking files:", error); + throw error; + } +} + +/** + * Example 4: Get All Imported File Paths + * Retrieve a list of all imported file paths from the database + */ +async function example4_getAllImportedPaths() { + console.log("\n=== Example 4: Get All Imported File Paths ===\n"); + + try { + const importedPaths = await getImportedFilePaths(); + + console.log(`Total imported files: ${importedPaths.size}`); + + // Show first 10 as examples + const pathArray = Array.from(importedPaths); + console.log("\nFirst 10 imported files:"); + pathArray.slice(0, 10).forEach((path, index) => { + console.log(` ${index + 1}. ${path}`); + }); + + if (pathArray.length > 10) { + console.log(` ... and ${pathArray.length - 10} more`); + } + + return importedPaths; + } catch (error) { + console.error("Error getting imported paths:", error); + throw error; + } +} + +/** + * Example 5: Get All Imported File Names + * Retrieve a list of all imported file names (without paths) + */ +async function example5_getAllImportedNames() { + console.log("\n=== Example 5: Get All Imported File Names ===\n"); + + try { + const importedNames = await getImportedFileNames(); + + console.log(`Total imported file names: ${importedNames.size}`); + + // Show first 10 as examples + const nameArray = Array.from(importedNames); + console.log("\nFirst 10 imported file names:"); + nameArray.slice(0, 10).forEach((name, index) => { + console.log(` ${index + 1}. ${name}`); + }); + + if (nameArray.length > 10) { + console.log(` ... and ${nameArray.length - 10} more`); + } + + return importedNames; + } catch (error) { + console.error("Error getting imported names:", error); + throw error; + } +} + +/** + * Example 6: Find Comics Needing Re-import + * Find comics that have files but incomplete metadata + */ +async function example6_findComicsNeedingReimport() { + console.log("\n=== Example 6: Find Comics Needing Re-import ===\n"); + + try { + const comics = await getComicsNeedingReimport(); + + console.log(`Found ${comics.length} comics needing re-import`); + + if (comics.length > 0) { + console.log("\nFirst 5 comics needing re-import:"); + comics.slice(0, 5).forEach((comic: any, index) => { + console.log(` ${index + 1}. ${comic.rawFileDetails?.name || "Unknown"}`); + console.log(` Path: ${comic.rawFileDetails?.filePath || "N/A"}`); + console.log(` Has title: ${!!comic.canonicalMetadata?.title?.value}`); + console.log(` Has series: ${!!comic.canonicalMetadata?.series?.value}`); + }); + + if (comics.length > 5) { + console.log(` ... and ${comics.length - 5} more`); + } + } + + return comics; + } catch (error) { + console.error("Error finding comics needing re-import:", error); + throw error; + } +} + +/** + * Example 7: Find Duplicate Files + * Find files with the same name but different paths + */ +async function example7_findDuplicates() { + console.log("\n=== Example 7: Find Duplicate Files ===\n"); + + try { + const duplicates = await findDuplicateFiles(); + + console.log(`Found ${duplicates.length} duplicate file names`); + + if (duplicates.length > 0) { + console.log("\nDuplicate files:"); + duplicates.slice(0, 5).forEach((dup, index) => { + console.log(` ${index + 1}. ${dup.name} (${dup.count} copies)`); + dup.paths.forEach((path: string) => { + console.log(` - ${path}`); + }); + }); + + if (duplicates.length > 5) { + console.log(` ... and ${duplicates.length - 5} more`); + } + } + + return duplicates; + } catch (error) { + console.error("Error finding duplicates:", error); + throw error; + } +} + +/** + * Example 8: Custom Import Statistics for Specific Directory + * Get statistics for a custom directory path + */ +async function example8_customDirectoryStats(directoryPath: string) { + console.log("\n=== Example 8: Custom Directory Statistics ===\n"); + console.log(`Analyzing directory: ${directoryPath}`); + + try { + const klaw = require("klaw"); + const through2 = require("through2"); + const path = require("path"); + + // Collect all comic files in the custom directory + const localFiles: string[] = []; + + await new Promise((resolve, reject) => { + klaw(directoryPath) + .on("error", (err: Error) => { + console.error(`Error walking directory:`, err); + reject(err); + }) + .pipe( + through2.obj(function (item: any, enc: any, next: any) { + const fileExtension = path.extname(item.path); + if ([".cbz", ".cbr", ".cb7"].includes(fileExtension)) { + localFiles.push(item.path); + } + next(); + }) + ) + .on("end", () => { + resolve(); + }); + }); + + // Get statistics + const stats = await getImportStatistics(localFiles); + + console.log("\nStatistics:"); + console.log(` Total files: ${stats.total}`); + console.log(` Already imported: ${stats.alreadyImported}`); + console.log(` New files: ${stats.newFiles}`); + console.log(` Percentage: ${((stats.alreadyImported / stats.total) * 100).toFixed(2)}%`); + + return stats; + } catch (error) { + console.error("Error getting custom directory stats:", error); + throw error; + } +} + +/** + * Run all examples + */ +async function runAllExamples(broker: ServiceBroker) { + console.log("╔════════════════════════════════════════════════════════════╗"); + console.log("║ Incremental Import Examples ║"); + console.log("╚════════════════════════════════════════════════════════════╝"); + + try { + // Example 1: Basic incremental import + await example1_basicIncrementalImport(broker); + + // Example 2: Get statistics without importing + await example2_getImportStatistics(broker); + + // Example 3: Check specific files + await example3_checkSpecificFiles(); + + // Example 4: Get all imported paths + await example4_getAllImportedPaths(); + + // Example 5: Get all imported names + await example5_getAllImportedNames(); + + // Example 6: Find comics needing re-import + await example6_findComicsNeedingReimport(); + + // Example 7: Find duplicates + await example7_findDuplicates(); + + // Example 8: Custom directory stats (uncomment and provide path) + // await example8_customDirectoryStats("/path/to/custom/comics"); + + console.log("\n╔════════════════════════════════════════════════════════════╗"); + console.log("║ All examples completed successfully! ║"); + console.log("╚════════════════════════════════════════════════════════════╝\n"); + } catch (error) { + console.error("\n❌ Error running examples:", error); + throw error; + } +} + +/** + * Usage in your service or application + */ +export { + example1_basicIncrementalImport, + example2_getImportStatistics, + example3_checkSpecificFiles, + example4_getAllImportedPaths, + example5_getAllImportedNames, + example6_findComicsNeedingReimport, + example7_findDuplicates, + example8_customDirectoryStats, + runAllExamples, +}; + +// If running directly +if (require.main === module) { + console.log("Note: This is an example file. To run these examples:"); + console.log("1. Ensure your Moleculer broker is running"); + console.log("2. Import and call the example functions from your service"); + console.log("3. Or integrate the patterns into your application"); + console.log("\nQuick Start:"); + console.log(" - Use example1_basicIncrementalImport() to import only new files"); + console.log(" - Use example2_getImportStatistics() to check status before importing"); + console.log(" - Use example3_checkSpecificFiles() to verify specific files"); +} diff --git a/moleculer.config.ts b/moleculer.config.ts index 69e2ce2..059c768 100644 --- a/moleculer.config.ts +++ b/moleculer.config.ts @@ -102,7 +102,8 @@ const brokerConfig: BrokerOptions = { serializer: "JSON", // Number of milliseconds to wait before reject a request with a RequestTimeout error. Disabled: 0 - requestTimeout: 10 * 1000, + // Increased to 60 seconds to handle long-running operations like import statistics on large libraries + requestTimeout: 60 * 1000, // Retry policy settings. More info: https://moleculer.services/docs/0.14/fault-tolerance.html#Retry retryPolicy: { diff --git a/package-lock.json b/package-lock.json index 0d432f0..e158364 100644 --- a/package-lock.json +++ b/package-lock.json @@ -43,7 +43,7 @@ "moleculer-db": "^0.8.23", "moleculer-db-adapter-mongoose": "^0.9.2", "moleculer-io": "^2.2.0", - "moleculer-web": "^0.10.5", + "moleculer-web": "^0.10.8", "mongoosastic-ts": "^6.0.3", "mongoose": "^6.10.4", "mongoose-paginate-v2": "^1.3.18", diff --git a/services/api.service.ts b/services/api.service.ts index 84eb856..0a5a510 100644 --- a/services/api.service.ts +++ b/services/api.service.ts @@ -280,17 +280,69 @@ export default class ApiService extends Service { const newStats = await fs.promises.stat(filePath); if (newStats.mtime.getTime() === stats.mtime.getTime()) { this.logger.info(`Stable file detected: ${filePath}, importing.`); - const folderData: IFolderData = await this.broker.call( - "library.walkFolders", - { basePathToWalk: filePath } - ); - // this would have to be a call to importDownloadedComic - await this.broker.call("importqueue.processImport", { - fileObject: { - filePath, - fileSize: folderData[0].fileSize, - }, - }); + + try { + const folderData: IFolderData[] = await this.broker.call( + "library.walkFolders", + { basePathToWalk: filePath } + ); + + if (folderData && folderData.length > 0) { + const fileData = folderData[0]; + const fileName = path.basename(filePath, path.extname(filePath)); + const extension = path.extname(filePath); + + // Determine mimeType based on extension + let mimeType = "application/octet-stream"; + if (extension === ".cbz") { + mimeType = "application/zip; charset=binary"; + } else if (extension === ".cbr") { + mimeType = "application/x-rar-compressed; charset=binary"; + } + + // Prepare payload for rawImportToDB + const payload = { + rawFileDetails: { + name: fileName, + filePath: filePath, + fileSize: fileData.fileSize, + extension: extension, + mimeType: mimeType, + }, + inferredMetadata: { + issue: { + name: fileName, + number: 0, + }, + }, + sourcedMetadata: { + comicInfo: null, + }, + importStatus: { + isImported: true, + tagged: false, + matchedResult: { + score: "0", + }, + }, + acquisition: { + source: { + wanted: false, + }, + }, + }; + + // Call the library service to import the comic + await this.broker.call("library.rawImportToDB", { + importType: "new", + payload: payload, + }); + + this.logger.info(`Successfully queued import for: ${filePath}`); + } + } catch (error) { + this.logger.error(`Error importing file ${filePath}:`, error); + } } }, 3000); } diff --git a/utils/import.utils.ts b/utils/import.utils.ts new file mode 100644 index 0000000..ce2fbbd --- /dev/null +++ b/utils/import.utils.ts @@ -0,0 +1,286 @@ +/** + * Import utilities for checking existing records and managing incremental imports + */ + +import Comic from "../models/comic.model"; +import path from "path"; + +/** + * Get all imported file paths from MongoDB as a Set for O(1) lookup + * @returns Set of normalized file paths + */ +export async function getImportedFilePaths(): Promise> { + try { + // Query only the rawFileDetails.filePath field for efficiency + const comics = await Comic.find( + { "rawFileDetails.filePath": { $exists: true, $ne: null } }, + { "rawFileDetails.filePath": 1, _id: 0 } + ).lean(); + + const filePaths = new Set(); + + for (const comic of comics) { + if (comic.rawFileDetails?.filePath) { + // Normalize the path to handle different path formats + const normalizedPath = path.normalize(comic.rawFileDetails.filePath); + filePaths.add(normalizedPath); + } + } + + console.log(`Found ${filePaths.size} imported files in database`); + return filePaths; + } catch (error) { + console.error("Error fetching imported file paths:", error); + throw error; + } +} + +/** + * Get all imported file names (without extension) as a Set + * @returns Set of file names for path-independent matching + */ +export async function getImportedFileNames(): Promise> { + try { + // Query only the rawFileDetails.name field for efficiency + const comics = await Comic.find( + { "rawFileDetails.name": { $exists: true, $ne: null } }, + { "rawFileDetails.name": 1, _id: 0 } + ).lean(); + + const fileNames = new Set(); + + for (const comic of comics) { + if (comic.rawFileDetails?.name) { + fileNames.add(comic.rawFileDetails.name); + } + } + + console.log(`Found ${fileNames.size} imported file names in database`); + return fileNames; + } catch (error) { + console.error("Error fetching imported file names:", error); + throw error; + } +} + +/** + * Check if a file path exists in the database + * @param filePath - Full file path to check + * @returns true if file is imported + */ +export async function isFileImported(filePath: string): Promise { + try { + const normalizedPath = path.normalize(filePath); + const exists = await Comic.exists({ + "rawFileDetails.filePath": normalizedPath, + }); + return exists !== null; + } catch (error) { + console.error(`Error checking if file is imported: ${filePath}`, error); + return false; + } +} + +/** + * Check if a file name exists in the database + * @param fileName - File name without extension + * @returns true if file name is imported + */ +export async function isFileNameImported(fileName: string): Promise { + try { + const exists = await Comic.exists({ + "rawFileDetails.name": fileName, + }); + return exists !== null; + } catch (error) { + console.error(`Error checking if file name is imported: ${fileName}`, error); + return false; + } +} + +/** + * Filter array to only new (unimported) files + * @param files - Array of objects with path property + * @param importedPaths - Set of imported paths + * @returns Filtered array of new files + */ +export function filterNewFiles( + files: T[], + importedPaths: Set +): T[] { + return files.filter((file) => { + const normalizedPath = path.normalize(file.path); + return !importedPaths.has(normalizedPath); + }); +} + +/** + * Filter array to only new files by name + * @param files - Array of objects with name property + * @param importedNames - Set of imported names + * @returns Filtered array of new files + */ +export function filterNewFilesByName( + files: T[], + importedNames: Set +): T[] { + return files.filter((file) => !importedNames.has(file.name)); +} + +/** + * Compare local files against database to get import statistics + * Uses batch queries for better performance with large libraries + * @param localFilePaths - Array of local file paths + * @returns Statistics object with counts and imported paths Set + */ +export async function getImportStatistics(localFilePaths: string[]): Promise<{ + total: number; + alreadyImported: number; + newFiles: number; + importedPaths: Set; +}> { + console.log(`[Import Stats] Checking ${localFilePaths.length} files against database...`); + + // Normalize all paths upfront + const normalizedPaths = localFilePaths.map((p) => path.normalize(p)); + + // Use batch query instead of fetching all comics + // This is much faster for large libraries + const importedComics = await Comic.find( + { + "rawFileDetails.filePath": { $in: normalizedPaths }, + }, + { "rawFileDetails.filePath": 1, _id: 0 } + ).lean(); + + // Build Set of imported paths + const importedPaths = new Set( + importedComics + .map((c: any) => c.rawFileDetails?.filePath) + .filter(Boolean) + .map((p: string) => path.normalize(p)) + ); + + const alreadyImported = importedPaths.size; + const newFiles = localFilePaths.length - alreadyImported; + + console.log(`[Import Stats] Results: ${alreadyImported} already imported, ${newFiles} new files`); + + return { + total: localFilePaths.length, + alreadyImported, + newFiles, + importedPaths, + }; +} + +/** + * Batch check multiple files in a single query (more efficient than individual checks) + * @param filePaths - Array of file paths to check + * @returns Map of filePath -> isImported boolean + */ +export async function batchCheckImported( + filePaths: string[] +): Promise> { + try { + const normalizedPaths = filePaths.map((p) => path.normalize(p)); + + // Query all at once + const importedComics = await Comic.find( + { + "rawFileDetails.filePath": { $in: normalizedPaths }, + }, + { "rawFileDetails.filePath": 1, _id: 0 } + ).lean(); + + // Create a map of imported paths + const importedSet = new Set( + importedComics + .map((c: any) => c.rawFileDetails?.filePath) + .filter(Boolean) + .map((p: string) => path.normalize(p)) + ); + + // Build result map + const resultMap = new Map(); + for (let i = 0; i < filePaths.length; i++) { + resultMap.set(filePaths[i], importedSet.has(normalizedPaths[i])); + } + + return resultMap; + } catch (error) { + console.error("Error batch checking imported files:", error); + throw error; + } +} + +/** + * Find comics with files but missing canonical metadata + * @returns Array of comic documents needing re-import + */ +export async function getComicsNeedingReimport(): Promise { + try { + // Find comics that have files but missing canonical metadata + const comics = await Comic.find({ + "rawFileDetails.filePath": { $exists: true, $ne: null }, + $or: [ + { canonicalMetadata: { $exists: false } }, + { "canonicalMetadata.title": { $exists: false } }, + { "canonicalMetadata.series": { $exists: false } }, + ], + }).lean(); + + console.log(`Found ${comics.length} comics needing re-import`); + return comics; + } catch (error) { + console.error("Error finding comics needing re-import:", error); + throw error; + } +} + +/** + * Find files with same name but different paths + * @returns Array of duplicates with name, paths, and count + */ +export async function findDuplicateFiles(): Promise< + Array<{ name: string; paths: string[]; count: number }> +> { + try { + const duplicates = await Comic.aggregate([ + { + $match: { + "rawFileDetails.name": { $exists: true, $ne: null }, + }, + }, + { + $group: { + _id: "$rawFileDetails.name", + paths: { $push: "$rawFileDetails.filePath" }, + count: { $sum: 1 }, + }, + }, + { + $match: { + count: { $gt: 1 }, + }, + }, + { + $project: { + _id: 0, + name: "$_id", + paths: 1, + count: 1, + }, + }, + { + $sort: { count: -1 }, + }, + ]); + + console.log(`Found ${duplicates.length} duplicate file names`); + return duplicates; + } catch (error) { + console.error("Error finding duplicate files:", error); + throw error; + } +}