🔨 Fixes to import

2026-02-26 23:56:39 -05:00
parent f7804ee3f0
commit a1fa12f181
5 changed files with 699 additions and 13 deletions
--- a/examples/incremental-import.example.ts
+++ b/examples/incremental-import.example.ts
@@ -0,0 +1,347 @@
+/**
+ * Example: Incremental Import
+ * 
+ * This example demonstrates how to use the incremental import feature
+ * to import only new files that haven't been previously imported.
+ */
+
+import { ServiceBroker } from "moleculer";
+import {
+	getImportedFilePaths,
+	getImportedFileNames,
+	getImportStatistics,
+	batchCheckImported,
+	getComicsNeedingReimport,
+	findDuplicateFiles,
+} from "../utils/import.utils";
+
+/**
+ * Example 1: Basic Incremental Import
+ * Import only new files from your comics directory
+ */
+async function example1_basicIncrementalImport(broker: ServiceBroker) {
+	console.log("\n=== Example 1: Basic Incremental Import ===\n");
+
+	try {
+		// Call the incremental import endpoint
+		const result: any = await broker.call("library.incrementalImport", {
+			sessionId: "incremental-session-" + Date.now(),
+		});
+
+		console.log("Import Result:");
+		console.log(`  Success: ${result.success}`);
+		console.log(`  Message: ${result.message}`);
+		console.log("\nStatistics:");
+		console.log(`  Total files found: ${result.stats.total}`);
+		console.log(`  Already imported: ${result.stats.alreadyImported}`);
+		console.log(`  New files: ${result.stats.newFiles}`);
+		console.log(`  Queued for import: ${result.stats.queued}`);
+
+		return result;
+	} catch (error) {
+		console.error("Error during incremental import:", error);
+		throw error;
+	}
+}
+
+/**
+ * Example 2: Get Import Statistics
+ * Check how many files are imported vs. new without starting an import
+ */
+async function example2_getImportStatistics(broker: ServiceBroker) {
+	console.log("\n=== Example 2: Get Import Statistics ===\n");
+
+	try {
+		const result: any = await broker.call("library.getImportStatistics", {
+			// Optional: specify a custom directory path
+			// directoryPath: "/path/to/comics"
+		});
+
+		console.log("Import Statistics:");
+		console.log(`  Directory: ${result.directory}`);
+		console.log(`  Total local files: ${result.stats.totalLocalFiles}`);
+		console.log(`  Already imported: ${result.stats.alreadyImported}`);
+		console.log(`  New files to import: ${result.stats.newFiles}`);
+		console.log(`  Percentage imported: ${result.stats.percentageImported}`);
+
+		return result;
+	} catch (error) {
+		console.error("Error getting import statistics:", error);
+		throw error;
+	}
+}
+
+/**
+ * Example 3: Check Specific Files
+ * Check if specific files are already imported
+ */
+async function example3_checkSpecificFiles() {
+	console.log("\n=== Example 3: Check Specific Files ===\n");
+
+	const filesToCheck = [
+		"/comics/batman-001.cbz",
+		"/comics/superman-001.cbz",
+		"/comics/wonder-woman-001.cbz",
+	];
+
+	try {
+		const results = await batchCheckImported(filesToCheck);
+
+		console.log("File Import Status:");
+		results.forEach((isImported, filePath) => {
+			console.log(`  ${filePath}: ${isImported ? "✓ Imported" : "✗ Not imported"}`);
+		});
+
+		return results;
+	} catch (error) {
+		console.error("Error checking files:", error);
+		throw error;
+	}
+}
+
+/**
+ * Example 4: Get All Imported File Paths
+ * Retrieve a list of all imported file paths from the database
+ */
+async function example4_getAllImportedPaths() {
+	console.log("\n=== Example 4: Get All Imported File Paths ===\n");
+
+	try {
+		const importedPaths = await getImportedFilePaths();
+
+		console.log(`Total imported files: ${importedPaths.size}`);
+		
+		// Show first 10 as examples
+		const pathArray = Array.from(importedPaths);
+		console.log("\nFirst 10 imported files:");
+		pathArray.slice(0, 10).forEach((path, index) => {
+			console.log(`  ${index + 1}. ${path}`);
+		});
+
+		if (pathArray.length > 10) {
+			console.log(`  ... and ${pathArray.length - 10} more`);
+		}
+
+		return importedPaths;
+	} catch (error) {
+		console.error("Error getting imported paths:", error);
+		throw error;
+	}
+}
+
+/**
+ * Example 5: Get All Imported File Names
+ * Retrieve a list of all imported file names (without paths)
+ */
+async function example5_getAllImportedNames() {
+	console.log("\n=== Example 5: Get All Imported File Names ===\n");
+
+	try {
+		const importedNames = await getImportedFileNames();
+
+		console.log(`Total imported file names: ${importedNames.size}`);
+		
+		// Show first 10 as examples
+		const nameArray = Array.from(importedNames);
+		console.log("\nFirst 10 imported file names:");
+		nameArray.slice(0, 10).forEach((name, index) => {
+			console.log(`  ${index + 1}. ${name}`);
+		});
+
+		if (nameArray.length > 10) {
+			console.log(`  ... and ${nameArray.length - 10} more`);
+		}
+
+		return importedNames;
+	} catch (error) {
+		console.error("Error getting imported names:", error);
+		throw error;
+	}
+}
+
+/**
+ * Example 6: Find Comics Needing Re-import
+ * Find comics that have files but incomplete metadata
+ */
+async function example6_findComicsNeedingReimport() {
+	console.log("\n=== Example 6: Find Comics Needing Re-import ===\n");
+
+	try {
+		const comics = await getComicsNeedingReimport();
+
+		console.log(`Found ${comics.length} comics needing re-import`);
+
+		if (comics.length > 0) {
+			console.log("\nFirst 5 comics needing re-import:");
+			comics.slice(0, 5).forEach((comic: any, index) => {
+				console.log(`  ${index + 1}. ${comic.rawFileDetails?.name || "Unknown"}`);
+				console.log(`     Path: ${comic.rawFileDetails?.filePath || "N/A"}`);
+				console.log(`     Has title: ${!!comic.canonicalMetadata?.title?.value}`);
+				console.log(`     Has series: ${!!comic.canonicalMetadata?.series?.value}`);
+			});
+
+			if (comics.length > 5) {
+				console.log(`  ... and ${comics.length - 5} more`);
+			}
+		}
+
+		return comics;
+	} catch (error) {
+		console.error("Error finding comics needing re-import:", error);
+		throw error;
+	}
+}
+
+/**
+ * Example 7: Find Duplicate Files
+ * Find files with the same name but different paths
+ */
+async function example7_findDuplicates() {
+	console.log("\n=== Example 7: Find Duplicate Files ===\n");
+
+	try {
+		const duplicates = await findDuplicateFiles();
+
+		console.log(`Found ${duplicates.length} duplicate file names`);
+
+		if (duplicates.length > 0) {
+			console.log("\nDuplicate files:");
+			duplicates.slice(0, 5).forEach((dup, index) => {
+				console.log(`  ${index + 1}. ${dup.name} (${dup.count} copies)`);
+				dup.paths.forEach((path: string) => {
+					console.log(`     - ${path}`);
+				});
+			});
+
+			if (duplicates.length > 5) {
+				console.log(`  ... and ${duplicates.length - 5} more`);
+			}
+		}
+
+		return duplicates;
+	} catch (error) {
+		console.error("Error finding duplicates:", error);
+		throw error;
+	}
+}
+
+/**
+ * Example 8: Custom Import Statistics for Specific Directory
+ * Get statistics for a custom directory path
+ */
+async function example8_customDirectoryStats(directoryPath: string) {
+	console.log("\n=== Example 8: Custom Directory Statistics ===\n");
+	console.log(`Analyzing directory: ${directoryPath}`);
+
+	try {
+		const klaw = require("klaw");
+		const through2 = require("through2");
+		const path = require("path");
+
+		// Collect all comic files in the custom directory
+		const localFiles: string[] = [];
+
+		await new Promise<void>((resolve, reject) => {
+			klaw(directoryPath)
+				.on("error", (err: Error) => {
+					console.error(`Error walking directory:`, err);
+					reject(err);
+				})
+				.pipe(
+					through2.obj(function (item: any, enc: any, next: any) {
+						const fileExtension = path.extname(item.path);
+						if ([".cbz", ".cbr", ".cb7"].includes(fileExtension)) {
+							localFiles.push(item.path);
+						}
+						next();
+					})
+				)
+				.on("end", () => {
+					resolve();
+				});
+		});
+
+		// Get statistics
+		const stats = await getImportStatistics(localFiles);
+
+		console.log("\nStatistics:");
+		console.log(`  Total files: ${stats.total}`);
+		console.log(`  Already imported: ${stats.alreadyImported}`);
+		console.log(`  New files: ${stats.newFiles}`);
+		console.log(`  Percentage: ${((stats.alreadyImported / stats.total) * 100).toFixed(2)}%`);
+
+		return stats;
+	} catch (error) {
+		console.error("Error getting custom directory stats:", error);
+		throw error;
+	}
+}
+
+/**
+ * Run all examples
+ */
+async function runAllExamples(broker: ServiceBroker) {
+	console.log("╔════════════════════════════════════════════════════════════╗");
+	console.log("║          Incremental Import Examples                       ║");
+	console.log("╚════════════════════════════════════════════════════════════╝");
+
+	try {
+		// Example 1: Basic incremental import
+		await example1_basicIncrementalImport(broker);
+
+		// Example 2: Get statistics without importing
+		await example2_getImportStatistics(broker);
+
+		// Example 3: Check specific files
+		await example3_checkSpecificFiles();
+
+		// Example 4: Get all imported paths
+		await example4_getAllImportedPaths();
+
+		// Example 5: Get all imported names
+		await example5_getAllImportedNames();
+
+		// Example 6: Find comics needing re-import
+		await example6_findComicsNeedingReimport();
+
+		// Example 7: Find duplicates
+		await example7_findDuplicates();
+
+		// Example 8: Custom directory stats (uncomment and provide path)
+		// await example8_customDirectoryStats("/path/to/custom/comics");
+
+		console.log("\n╔════════════════════════════════════════════════════════════╗");
+		console.log("║  All examples completed successfully!                      ║");
+		console.log("╚════════════════════════════════════════════════════════════╝\n");
+	} catch (error) {
+		console.error("\n❌ Error running examples:", error);
+		throw error;
+	}
+}
+
+/**
+ * Usage in your service or application
+ */
+export {
+	example1_basicIncrementalImport,
+	example2_getImportStatistics,
+	example3_checkSpecificFiles,
+	example4_getAllImportedPaths,
+	example5_getAllImportedNames,
+	example6_findComicsNeedingReimport,
+	example7_findDuplicates,
+	example8_customDirectoryStats,
+	runAllExamples,
+};
+
+// If running directly
+if (require.main === module) {
+	console.log("Note: This is an example file. To run these examples:");
+	console.log("1. Ensure your Moleculer broker is running");
+	console.log("2. Import and call the example functions from your service");
+	console.log("3. Or integrate the patterns into your application");
+	console.log("\nQuick Start:");
+	console.log("  - Use example1_basicIncrementalImport() to import only new files");
+	console.log("  - Use example2_getImportStatistics() to check status before importing");
+	console.log("  - Use example3_checkSpecificFiles() to verify specific files");
+}
--- a/moleculer.config.ts
+++ b/moleculer.config.ts
@@ -102,7 +102,8 @@ const brokerConfig: BrokerOptions = {
 	serializer: "JSON",

 	// Number of milliseconds to wait before reject a request with a RequestTimeout error. Disabled: 0
-	requestTimeout: 10 * 1000,
+	// Increased to 60 seconds to handle long-running operations like import statistics on large libraries
+	requestTimeout: 60 * 1000,

 	// Retry policy settings. More info: https://moleculer.services/docs/0.14/fault-tolerance.html#Retry
 	retryPolicy: {
--- a/package-lock.json
+++ b/package-lock.json
@@ -43,7 +43,7 @@
        "moleculer-db": "^0.8.23",
        "moleculer-db-adapter-mongoose": "^0.9.2",
        "moleculer-io": "^2.2.0",
-        "moleculer-web": "^0.10.5",
+        "moleculer-web": "^0.10.8",
        "mongoosastic-ts": "^6.0.3",
        "mongoose": "^6.10.4",
        "mongoose-paginate-v2": "^1.3.18",
--- a/services/api.service.ts
+++ b/services/api.service.ts
@@ -280,17 +280,69 @@ export default class ApiService extends Service {
        const newStats = await fs.promises.stat(filePath);
        if (newStats.mtime.getTime() === stats.mtime.getTime()) {
          this.logger.info(`Stable file detected: ${filePath}, importing.`);
-          const folderData: IFolderData = await this.broker.call(
-            "library.walkFolders",
-            { basePathToWalk: filePath }
-          );
-          // this would have to be a call to importDownloadedComic
-          await this.broker.call("importqueue.processImport", {
-            fileObject: {
-              filePath,
-              fileSize: folderData[0].fileSize,
-            },
-          });
+          
+          try {
+            const folderData: IFolderData[] = await this.broker.call(
+              "library.walkFolders",
+              { basePathToWalk: filePath }
+            );
+            
+            if (folderData && folderData.length > 0) {
+              const fileData = folderData[0];
+              const fileName = path.basename(filePath, path.extname(filePath));
+              const extension = path.extname(filePath);
+              
+              // Determine mimeType based on extension
+              let mimeType = "application/octet-stream";
+              if (extension === ".cbz") {
+                mimeType = "application/zip; charset=binary";
+              } else if (extension === ".cbr") {
+                mimeType = "application/x-rar-compressed; charset=binary";
+              }
+              
+              // Prepare payload for rawImportToDB
+              const payload = {
+                rawFileDetails: {
+                  name: fileName,
+                  filePath: filePath,
+                  fileSize: fileData.fileSize,
+                  extension: extension,
+                  mimeType: mimeType,
+                },
+                inferredMetadata: {
+                  issue: {
+                    name: fileName,
+                    number: 0,
+                  },
+                },
+                sourcedMetadata: {
+                  comicInfo: null,
+                },
+                importStatus: {
+                  isImported: true,
+                  tagged: false,
+                  matchedResult: {
+                    score: "0",
+                  },
+                },
+                acquisition: {
+                  source: {
+                    wanted: false,
+                  },
+                },
+              };
+              
+              // Call the library service to import the comic
+              await this.broker.call("library.rawImportToDB", {
+                importType: "new",
+                payload: payload,
+              });
+              
+              this.logger.info(`Successfully queued import for: ${filePath}`);
+            }
+          } catch (error) {
+            this.logger.error(`Error importing file ${filePath}:`, error);
+          }
        }
      }, 3000);
    }
--- a/utils/import.utils.ts
+++ b/utils/import.utils.ts
@@ -0,0 +1,286 @@
+/**
+ * Import utilities for checking existing records and managing incremental imports
+ */
+
+import Comic from "../models/comic.model";
+import path from "path";
+
+/**
+ * Get all imported file paths from MongoDB as a Set for O(1) lookup
+ * @returns Set of normalized file paths
+ */
+export async function getImportedFilePaths(): Promise<Set<string>> {
+	try {
+		// Query only the rawFileDetails.filePath field for efficiency
+		const comics = await Comic.find(
+			{ "rawFileDetails.filePath": { $exists: true, $ne: null } },
+			{ "rawFileDetails.filePath": 1, _id: 0 }
+		).lean();
+
+		const filePaths = new Set<string>();
+		
+		for (const comic of comics) {
+			if (comic.rawFileDetails?.filePath) {
+				// Normalize the path to handle different path formats
+				const normalizedPath = path.normalize(comic.rawFileDetails.filePath);
+				filePaths.add(normalizedPath);
+			}
+		}
+
+		console.log(`Found ${filePaths.size} imported files in database`);
+		return filePaths;
+	} catch (error) {
+		console.error("Error fetching imported file paths:", error);
+		throw error;
+	}
+}
+
+/**
+ * Get all imported file names (without extension) as a Set
+ * @returns Set of file names for path-independent matching
+ */
+export async function getImportedFileNames(): Promise<Set<string>> {
+	try {
+		// Query only the rawFileDetails.name field for efficiency
+		const comics = await Comic.find(
+			{ "rawFileDetails.name": { $exists: true, $ne: null } },
+			{ "rawFileDetails.name": 1, _id: 0 }
+		).lean();
+
+		const fileNames = new Set<string>();
+		
+		for (const comic of comics) {
+			if (comic.rawFileDetails?.name) {
+				fileNames.add(comic.rawFileDetails.name);
+			}
+		}
+
+		console.log(`Found ${fileNames.size} imported file names in database`);
+		return fileNames;
+	} catch (error) {
+		console.error("Error fetching imported file names:", error);
+		throw error;
+	}
+}
+
+/**
+ * Check if a file path exists in the database
+ * @param filePath - Full file path to check
+ * @returns true if file is imported
+ */
+export async function isFileImported(filePath: string): Promise<boolean> {
+	try {
+		const normalizedPath = path.normalize(filePath);
+		const exists = await Comic.exists({
+			"rawFileDetails.filePath": normalizedPath,
+		});
+		return exists !== null;
+	} catch (error) {
+		console.error(`Error checking if file is imported: ${filePath}`, error);
+		return false;
+	}
+}
+
+/**
+ * Check if a file name exists in the database
+ * @param fileName - File name without extension
+ * @returns true if file name is imported
+ */
+export async function isFileNameImported(fileName: string): Promise<boolean> {
+	try {
+		const exists = await Comic.exists({
+			"rawFileDetails.name": fileName,
+		});
+		return exists !== null;
+	} catch (error) {
+		console.error(`Error checking if file name is imported: ${fileName}`, error);
+		return false;
+	}
+}
+
+/**
+ * Filter array to only new (unimported) files
+ * @param files - Array of objects with path property
+ * @param importedPaths - Set of imported paths
+ * @returns Filtered array of new files
+ */
+export function filterNewFiles<T extends { path: string }>(
+	files: T[],
+	importedPaths: Set<string>
+): T[] {
+	return files.filter((file) => {
+		const normalizedPath = path.normalize(file.path);
+		return !importedPaths.has(normalizedPath);
+	});
+}
+
+/**
+ * Filter array to only new files by name
+ * @param files - Array of objects with name property
+ * @param importedNames - Set of imported names
+ * @returns Filtered array of new files
+ */
+export function filterNewFilesByName<T extends { name: string }>(
+	files: T[],
+	importedNames: Set<string>
+): T[] {
+	return files.filter((file) => !importedNames.has(file.name));
+}
+
+/**
+ * Compare local files against database to get import statistics
+ * Uses batch queries for better performance with large libraries
+ * @param localFilePaths - Array of local file paths
+ * @returns Statistics object with counts and imported paths Set
+ */
+export async function getImportStatistics(localFilePaths: string[]): Promise<{
+	total: number;
+	alreadyImported: number;
+	newFiles: number;
+	importedPaths: Set<string>;
+}> {
+	console.log(`[Import Stats] Checking ${localFilePaths.length} files against database...`);
+	
+	// Normalize all paths upfront
+	const normalizedPaths = localFilePaths.map((p) => path.normalize(p));
+	
+	// Use batch query instead of fetching all comics
+	// This is much faster for large libraries
+	const importedComics = await Comic.find(
+		{
+			"rawFileDetails.filePath": { $in: normalizedPaths },
+		},
+		{ "rawFileDetails.filePath": 1, _id: 0 }
+	).lean();
+
+	// Build Set of imported paths
+	const importedPaths = new Set<string>(
+		importedComics
+			.map((c: any) => c.rawFileDetails?.filePath)
+			.filter(Boolean)
+			.map((p: string) => path.normalize(p))
+	);
+
+	const alreadyImported = importedPaths.size;
+	const newFiles = localFilePaths.length - alreadyImported;
+
+	console.log(`[Import Stats] Results: ${alreadyImported} already imported, ${newFiles} new files`);
+
+	return {
+		total: localFilePaths.length,
+		alreadyImported,
+		newFiles,
+		importedPaths,
+	};
+}
+
+/**
+ * Batch check multiple files in a single query (more efficient than individual checks)
+ * @param filePaths - Array of file paths to check
+ * @returns Map of filePath -> isImported boolean
+ */
+export async function batchCheckImported(
+	filePaths: string[]
+): Promise<Map<string, boolean>> {
+	try {
+		const normalizedPaths = filePaths.map((p) => path.normalize(p));
+		
+		// Query all at once
+		const importedComics = await Comic.find(
+			{
+				"rawFileDetails.filePath": { $in: normalizedPaths },
+			},
+			{ "rawFileDetails.filePath": 1, _id: 0 }
+		).lean();
+
+		// Create a map of imported paths
+		const importedSet = new Set(
+			importedComics
+				.map((c: any) => c.rawFileDetails?.filePath)
+				.filter(Boolean)
+				.map((p: string) => path.normalize(p))
+		);
+
+		// Build result map
+		const resultMap = new Map<string, boolean>();
+		for (let i = 0; i < filePaths.length; i++) {
+			resultMap.set(filePaths[i], importedSet.has(normalizedPaths[i]));
+		}
+
+		return resultMap;
+	} catch (error) {
+		console.error("Error batch checking imported files:", error);
+		throw error;
+	}
+}
+
+/**
+ * Find comics with files but missing canonical metadata
+ * @returns Array of comic documents needing re-import
+ */
+export async function getComicsNeedingReimport(): Promise<any[]> {
+	try {
+		// Find comics that have files but missing canonical metadata
+		const comics = await Comic.find({
+			"rawFileDetails.filePath": { $exists: true, $ne: null },
+			$or: [
+				{ canonicalMetadata: { $exists: false } },
+				{ "canonicalMetadata.title": { $exists: false } },
+				{ "canonicalMetadata.series": { $exists: false } },
+			],
+		}).lean();
+
+		console.log(`Found ${comics.length} comics needing re-import`);
+		return comics;
+	} catch (error) {
+		console.error("Error finding comics needing re-import:", error);
+		throw error;
+	}
+}
+
+/**
+ * Find files with same name but different paths
+ * @returns Array of duplicates with name, paths, and count
+ */
+export async function findDuplicateFiles(): Promise<
+	Array<{ name: string; paths: string[]; count: number }>
+> {
+	try {
+		const duplicates = await Comic.aggregate([
+			{
+				$match: {
+					"rawFileDetails.name": { $exists: true, $ne: null },
+				},
+			},
+			{
+				$group: {
+					_id: "$rawFileDetails.name",
+					paths: { $push: "$rawFileDetails.filePath" },
+					count: { $sum: 1 },
+				},
+			},
+			{
+				$match: {
+					count: { $gt: 1 },
+				},
+			},
+			{
+				$project: {
+					_id: 0,
+					name: "$_id",
+					paths: 1,
+					count: 1,
+				},
+			},
+			{
+				$sort: { count: -1 },
+			},
+		]);
+
+		console.log(`Found ${duplicates.length} duplicate file names`);
+		return duplicates;
+	} catch (error) {
+		console.error("Error finding duplicate files:", error);
+		throw error;
+	}
+}