👇🏼 Inferring issue metadata upon import

2022-02-06 23:14:18 -08:00
parent f0d6143af2
commit bfb1f7fa28
9 changed files with 6712 additions and 479 deletions
--- a/services/api.service.ts
+++ b/services/api.service.ts
@@ -100,7 +100,7 @@ export default class ApiService extends Service {
 								// 1. Send task to queue
 								console.log(`Recieved ${action.type} event.`)
 								await this.broker.call(
-									"import.newImport",
+									"library.newImport",
 									action.data,
 									{}
 								);
@@ -142,10 +142,10 @@ export default class ApiService extends Service {
 									"File detected, starting import..."
 								);
 								const walkedFolder: IFolderData =
-									await broker.call("import.walkFolders", {
+									await broker.call("library.walkFolders", {
 										basePathToWalk: path,
 									});
-								await this.broker.call("libraryqueue.enqueue", {
+								await this.broker.call("library.processImport", {
 									fileObject: {
 										filePath: walkedFolder[0].filePath,
 										fileSize: walkedFolder[0].fileSize,
--- a/services/library.service.ts
+++ b/services/library.service.ts
@@ -32,7 +32,7 @@ SOFTWARE.
 */

 "use strict";
-import { isDate, isNil, isUndefined, map } from "lodash";
+import { isNil, isNull, isUndefined, map } from "lodash";
 import {
 	Context,
 	Service,
@@ -44,7 +44,6 @@ import { DbMixin } from "../mixins/db.mixin";
 import Comic from "../models/comic.model";
 import { explodePath, walkFolder } from "../utils/file.utils";
 import { convertXMLToJSON } from "../utils/xml.utils";
-import https from "https";
 import {
 	IExtractComicBookCoverErrorResponse,
 	IExtractedComicBookCoverFile,
@@ -66,7 +65,7 @@ export default class ImportService extends Service {
 	public constructor(public broker: ServiceBroker) {
 		super(broker);
 		this.parseServiceSchema({
-			name: "import",
+			name: "library",
 			mixins: [DbMixin("comics", Comic)],
 			hooks: {},
 			actions: {
@@ -129,7 +128,7 @@ export default class ImportService extends Service {
 								});
 								if (!comicExists) {
 									// 2. Send the extraction job to the queue
-									await broker.call("libraryqueue.enqueue", {
+									await broker.call("queue.processImport", {
 										fileObject: {
 											filePath: item.path,
 											fileSize: item.stats.size,
@@ -146,86 +145,7 @@ export default class ImportService extends Service {
 							});
 					},
 				},
-				nicefyPath: {
-					rest: "POST /nicefyPath",
-					params: {},
-					async handler(
-						ctx: Context<{
-							filePath: string;
-						}>
-					) {
-						return explodePath(ctx.params.filePath);
-					},
-				},
-				processAndImportToDB: {
-					rest: "POST /processAndImportToDB",

-					params: {},
-					async handler(
-						ctx: Context<{
-							walkedFolder: {
-								name: string;
-								path: string;
-								extension: string;
-								containedIn: string;
-								fileSize: number;
-								isFile: boolean;
-								isLink: boolean;
-							};
-						}>
-					) {
-						try {
-							const { walkedFolder } = ctx.params;
-							let comicExists = await Comic.exists({
-								"rawFileDetails.name": `${walkedFolder.name}`,
-							});
-							// rough flow of import process
-							// 1. Walk folder
-							// 2. For each folder, call extract function
-							// 3. For each successful extraction, run dbImport
-
-							if (!comicExists) {
-								// 1. Extract cover and cover metadata
-								let comicBookCoverMetadata:
-									| IExtractedComicBookCoverFile
-									| IExtractComicBookCoverErrorResponse
-									| IExtractedComicBookCoverFile[] = await extractCoverFromFile2(
-									walkedFolder[0]
-								);
-
-								// 2. Add to mongo
-								const dbImportResult = await this.broker.call(
-									"import.rawImportToDB",
-									{
-										importStatus: {
-											isImported: true,
-											tagged: false,
-											matchedResult: {
-												score: "0",
-											},
-										},
-										rawFileDetails: comicBookCoverMetadata,
-										sourcedMetadata: {
-											comicvine: {},
-										},
-									},
-									{}
-								);
-
-								return {
-									comicBookCoverMetadata,
-									dbImportResult,
-								};
-							} else {
-								console.info(
-									`Comic: \"${walkedFolder.name}\" already exists in the database`
-								);
-							}
-						} catch (error) {
-							console.error("Error importing comic books", error);
-						}
-					},
-				},
 				rawImportToDB: {
 					rest: "POST /rawImportToDB",
 					params: {},
@@ -250,13 +170,16 @@ export default class ImportService extends Service {
 								comicMetadata.sourcedMetadata.comicvine.volume
 							)
 						) {
-							volumeDetails =
-								await this.getComicVineVolumeMetadata(
-									comicMetadata.sourcedMetadata.comicvine
-										.volume.api_detail_url
-								);
+							volumeDetails = await this.broker.call(
+								"comicvine.getVolumes",
+								{
+									volumeURI:
+										comicMetadata.sourcedMetadata.comicvine
+											.volume.api_detail_url,
+								}
+							);
 							comicMetadata.sourcedMetadata.comicvine.volumeInformation =
-								volumeDetails;
+								volumeDetails.results;
 						}
 						return new Promise(async (resolve, reject) => {
 							Comic.create(ctx.params, (error, data) => {
@@ -291,35 +214,39 @@ export default class ImportService extends Service {
 						const comicObjectId = new ObjectId(
 							ctx.params.comicObjectId
 						);
-						const matchedResult = ctx.params.match;
-						let volumeDetailsPromise;
-						if (!isNil(matchedResult.volume)) {
-							volumeDetailsPromise =
-								this.getComicVineVolumeMetadata(
-									matchedResult.volume.api_detail_url
-								);
-						}
+
 						return new Promise(async (resolve, reject) => {
-							const volumeDetails = await volumeDetailsPromise;
-							matchedResult.volumeInformation = volumeDetails;
-							Comic.findByIdAndUpdate(
-								comicObjectId,
-								{
-									sourcedMetadata: {
-										comicvine: matchedResult,
-									},
-								},
-								{ new: true },
-								(err, result) => {
-									if (err) {
-										console.info(err);
-										reject(err);
-									} else {
-										// 3. Fetch and append volume information
-										resolve(result);
+							let volumeDetails = {};
+							const matchedResult = ctx.params.match;
+							if (!isNil(matchedResult.volume)) {
+								const volumeDetails = await this.broker.call(
+									"comicvine.getVolumes",
+									{
+										volumeURI:
+											matchedResult.volume.api_detail_url,
 									}
-								}
-							);
+								);
+								matchedResult.volumeInformation =
+									volumeDetails.results;
+								Comic.findByIdAndUpdate(
+									comicObjectId,
+									{
+										sourcedMetadata: {
+											comicvine: matchedResult,
+										},
+									},
+									{ new: true },
+									(err, result) => {
+										if (err) {
+											console.info(err);
+											reject(err);
+										} else {
+											// 3. Fetch and append volume information
+											resolve(result);
+										}
+									}
+								);
+							}
 						});
 					},
 				},
@@ -386,15 +313,17 @@ export default class ImportService extends Service {
 				getComicBooksByIds: {
 					rest: "POST /getComicBooksByIds",
 					params: { ids: "array" },
-					handler: async (ctx: Context<{ ids: [string]}>) => {
+					handler: async (ctx: Context<{ ids: [string] }>) => {
 						console.log(ctx.params.ids);
-						const queryIds = ctx.params.ids.map((id) => new ObjectId(id));
+						const queryIds = ctx.params.ids.map(
+							(id) => new ObjectId(id)
+						);
 						return await Comic.find({
-							'_id': {
+							_id: {
 								$in: queryIds,
-							}
-						})
-					}
+							},
+						});
+					},
 				},
 				getComicBookGroups: {
 					rest: "GET /getComicBookGroups",
@@ -449,61 +378,104 @@ export default class ImportService extends Service {
 						return Promise.all(volumesMetadata);
 					},
 				},
-				findIssuesForSeriesInLibrary: {
-					rest: "POST /findIssuesForSeriesInLibrary",
+
+				findIssuesForSeries: {
+					rest: "POST /findIssueForSeries",
 					params: {},
 					handler: async (
-						ctx: Context<{ comicObjectID: string }>
+						ctx: Context<{
+							queryObjects: [
+								{
+									issueId: string;
+									issueName: string;
+									volumeName: string;
+									issueNumber: string;
+								}
+							];
+						}>
 					) => {
-						// 1. Query mongo to get the comic document by its _id
-						const comicBookDetails: any = await this.broker.call(
-							"import.getComicBookById",
-							{ id: ctx.params.comicObjectID }
+						// 2a. Enqueue the Elasticsearch job
+						const { queryObjects } = ctx.params;
+						// construct the query for ElasticSearch
+						let elasticSearchQuery = {};
+						const elasticSearchQueries = queryObjects.map(
+							(queryObject) => {
+								console.log("Volume: ", queryObject.volumeName);
+								console.log("Issue: ", queryObject.issueName);
+								if (queryObject.issueName === null) {
+									queryObject.issueName = "";
+								}
+								if (queryObject.volumeName === null) {
+									queryObject.volumeName = "";
+								}
+								elasticSearchQuery = {
+									bool: {
+										must: [
+											// {
+											// 	match_phrase: {
+											// 		"rawFileDetails.name":
+											// 			queryObject.issueName,
+											// 	},
+											// },
+											{
+												match_phrase: {
+													"rawFileDetails.name":
+														queryObject.volumeName,
+												},
+											},
+											{
+												term: {
+													"inferredMetadata.issue.number":
+														parseInt(queryObject.issueNumber, 10),
+												},
+											},
+										],
+									},
+								};
+
+								return [
+									{
+										index: "comics",
+										search_type: "dfs_query_then_fetch",
+									},
+									// { issueId: queryObject.issueId },
+									{
+										query: elasticSearchQuery,
+										// script_fields: {
+										// 	issueId: {
+										// 		script: {
+										// 			lang: "painless",
+										// 			params: {
+										// 				match: {
+										// 					issueId:
+										// 						queryObject.issueId,
+										// 				},
+										// 			},
+										// 			inline: "params.match",
+										// 		},
+										// 	},
+										// 	fileName: {
+										// 		script: {
+										// 			lang: "painless",
+										// 			inline: "params['_source']['rawFileDetails']",
+										// 		},
+										// 	},
+										// },
+									},
+								];
+							}
+						);
+						console.log(
+							JSON.stringify(elasticSearchQueries, null, 2)
 						);

-						// 2. Query CV and get metadata for them
-						const foo =
-							await comicBookDetails.sourcedMetadata.comicvine.volumeInformation.issues.map(
-								async (issue: any, idx: any) => {
-									const metadata: any = await axios.request({
-										url: `${issue.api_detail_url}?api_key=${process.env.COMICVINE_API_KEY}`,
-										params: {
-											resources: "issues",
-											limit: "100",
-											format: "json",
-										},
-										headers: {
-											"User-Agent": "ThreeTwo",
-										},
-									});
-									const issueMetadata = metadata.data.results;
-
-									// 2a. Enqueue the Elasticsearch job
-									if (
-										!isUndefined(issueMetadata.volume.name) &&
-										!isUndefined(issueMetadata.issue_number)
-									) {
-										await ctx.broker.call(
-											"libraryqueue.issuesForSeries",
-											{
-												queryObject: {
-													issueId: issue.id,
-													issueName: issueMetadata.name,
-													volumeName:
-														issueMetadata.volume
-															.name,
-													issueNumber:
-														issueMetadata.issue_number,
-													issueMetadata,
-												},
-											}
-										);
-									}
-									// 3. Just return the issues
-									return issueMetadata;
-								}
-							);
-						return Promise.all(foo);
+						return await ctx.broker.call("search.searchComic", {
+							elasticSearchQueries,
+							queryObjects,
+						});
+						// await ctx.broker.call("queue.issuesForSeries", {
+						// 	elasticSearchQueries,
+						// });
 					},
 				},
 				flushDB: {
@@ -551,40 +523,7 @@ export default class ImportService extends Service {
 					},
 				},
 			},
-			methods: {
-				getComicVineVolumeMetadata: (apiDetailURL) =>
-					new Promise((resolve, reject) => {
-						const options = {
-							headers: {
-								"User-Agent": "ThreeTwo",
-							},
-						};
-						return https
-							.get(
-								`${apiDetailURL}?api_key=${process.env.COMICVINE_API_KEY}&format=json&limit=1&offset=0`,
-								options,
-								(resp) => {
-									let data = "";
-									resp.on("data", (chunk) => {
-										data += chunk;
-									});
-
-									resp.on("end", () => {
-										console.log(
-											`${apiDetailURL} returned data.`
-										);
-										const volumeInformation =
-											JSON.parse(data);
-										resolve(volumeInformation.results);
-									});
-								}
-							)
-							.on("error", (err) => {
-								console.info("Error: " + err.message);
-								reject(err);
-							});
-					}),
-			},
+			methods: {},
 		});
 	}
 }
--- a/services/libraryqueue.service.ts
+++ b/services/libraryqueue.service.ts
@@ -47,15 +47,16 @@ import { SandboxedJob } from "moleculer-bull";
 import { DbMixin } from "../mixins/db.mixin";
 import Comic from "../models/comic.model";
 import { extractCoverFromFile2 } from "../utils/uncompression.utils";
+import { refineQuery } from "filename-parser";
 import { io } from "./api.service";
 const REDIS_URI = process.env.REDIS_URI || `redis://0.0.0.0:6379`;

 console.log(`REDIS -> ${REDIS_URI}`);
-export default class LibraryQueueService extends Service {
+export default class QueueService extends Service {
 	public constructor(public broker: ServiceBroker) {
 		super(broker);
 		this.parseServiceSchema({
-			name: "libraryqueue",
+			name: "queue",
 			mixins: [BullMQMixin(REDIS_URI), DbMixin("comics", Comic)],
 			settings: {},
 			hooks: {},
@@ -70,9 +71,13 @@ export default class LibraryQueueService extends Service {
 							job.data.fileObject
 						);

+						// infer any issue-related metadata from the filename
+						const { inferredIssueDetails } = refineQuery(result.name);
+						console.log("Issue metadata inferred: ", JSON.stringify(inferredIssueDetails, null, 2));
+
 						// write to mongo
 						const dbImportResult = await this.broker.call(
-							"import.rawImportToDB",
+							"library.rawImportToDB",
 							{
 								importStatus: {
 									isImported: true,
@@ -82,6 +87,9 @@ export default class LibraryQueueService extends Service {
 									},
 								},
 								rawFileDetails: result,
+								inferredMetadata: {
+									issue: inferredIssueDetails,
+								},
 								sourcedMetadata: {
 									comicvine: {},
 								},
@@ -96,45 +104,11 @@ export default class LibraryQueueService extends Service {
 						});
 					},
 				},
-				"issue.findMatchesInLibrary": {
-					concurrency: 20,
-					async process(job: SandboxedJob) {
-						try {
-							console.log(
-								"Job recieved to find issue matches in library."
-							);
-							const matchesInLibrary = await this.broker.call(
-								"search.searchComic",
-								{
-									queryObject: job.data.queryObject,
-								}
-							);
-							if (
-								!isNil(matchesInLibrary) &&
-								!isUndefined(matchesInLibrary)
-							) {
-								console.log("Matches found in library:");
-								console.log(matchesInLibrary);
-
-								const foo = extend(
-									{ issue: job.data.queryObject.issueMetadata },
-									{ matches: matchesInLibrary }
-								);
-								return foo;
-							} else {
-								console.log(
-									"No match was found for this issue in the library."
-								);
-							}
-						} catch (error) {
-							throw error;
-						}
-					},
-				},
+				
 			},
 			actions: {
-				enqueue: {
-					rest: "POST /enqueue",
+				processImport: {
+					rest: "POST /processImport",
 					params: {},
 					async handler(
 						ctx: Context<{
@@ -146,28 +120,6 @@ export default class LibraryQueueService extends Service {
 						});
 					},
 				},
-				issuesForSeries: {
-					rest: "POST /findIssuesForSeries",
-					params: {},
-					handler: async (
-						ctx: Context<{
-							queryObject: {
-								issueName: string;
-								volumeName: string;
-								issueNumber: string;
-								issueId: string;
-								issueMetadata: object;
-							};
-						}>
-					) => {
-						return await this.createJob(
-							"issue.findMatchesInLibrary",
-							{
-								queryObject: ctx.params.queryObject,
-							}
-						);
-					},
-				},
 			},
 			methods: {},
 			async started(): Promise<any> {
@@ -213,7 +165,7 @@ export default class LibraryQueueService extends Service {
 						"completed",
 						async (job, res) => {
 							client.emit("action", {
-								type: "CV_ISSUES_FOR_VOLUME_IN_LIBRARY_SUCCESS",
+								type: "CV_ISSUES_FOR_VOLUME_IN_LIBRARY_UPDATED",
 								result: res,
 							});
 							console.info(
--- a/services/search.service.ts
+++ b/services/search.service.ts
@@ -7,21 +7,13 @@ import {
 	Errors,
 } from "moleculer";

-const { Client } = require("@elastic/elasticsearch");
-const client = new Client({
-	node: "http://tower.local:9200",
-	auth: {
-		username: "elastic",
-		password: "password",
-	},
-});
-
 import { DbMixin } from "../mixins/db.mixin";
 import Comic from "../models/comic.model";
 import { refineQuery } from "filename-parser";
-import { filter, isEmpty, isNull } from "lodash";
-
-console.log(client);
+import { each, filter, flatten, isEmpty, isNull } from "lodash";
+import { eSClient } from "../models/comic.model";
+import arrayToNDJSON from "array-to-ndjson";
+const s = eSClient.helpers.msearch();

 export default class SettingsService extends Service {
 	// @ts-ignore
@@ -34,7 +26,7 @@ export default class SettingsService extends Service {
 			Service.mergeSchemas(
 				{
 					name: "search",
-					mixins: [client, DbMixin("comics", Comic)],
+					mixins: [DbMixin("comics", Comic)],
 					hooks: {},
 					actions: {
 						searchComic: {
@@ -43,95 +35,55 @@ export default class SettingsService extends Service {
 							timeout: 400000,
 							async handler(
 								ctx: Context<{
-									queryObject: {
-										issueName: string;
-										volumeName: string;
-										issueNumber: string;
-									};
+									queryObjects: [],
+									elasticSearchQueries: [
+										{
+											elasticSearchQuery: object;
+										}
+									];
 								}>
 							) {
-								let elasticSearchQuery = {};
-								console.log(
-									"Volume: ",
-									ctx.params.queryObject.volumeName
+								const flattenedQueryArray = flatten(
+									ctx.params.elasticSearchQueries
 								);
-								console.log(
-									"Issue: ",
-									ctx.params.queryObject.issueName
-								);
-								if (isNull(ctx.params.queryObject.volumeName)) {
-									elasticSearchQuery = {
-										match: {
-											"rawFileDetails.name": {
-												query: ctx.params.queryObject
-													.issueName,
-												operator: "and",
-												fuzziness: "AUTO",
-											},
-										},
-									};
-								} else if (
-									isNull(ctx.params.queryObject.issueName)
-								) {
-									elasticSearchQuery = {
-										match: {
-											"rawFileDetails.name": {
-												query: ctx.params.queryObject
-													.volumeName,
-												operator: "and",
-												fuzziness: "AUTO",
-											},
-										},
-									};
-								} else {
-									elasticSearchQuery = {
-										bool: {
-											should: [
-												{
-													match_phrase: {
-														"rawFileDetails.name":
-															ctx.params
-																.queryObject
-																.issueName,
-													},
-												},
-												{
-													match_phrase: {
-														"rawFileDetails.name":
-															ctx.params
-																.queryObject
-																.volumeName,
-													},
-												},
-											],
-										},
-									};
-								}
-								console.log(elasticSearchQuery);
-								return Comic.esSearch({
-									query: elasticSearchQuery,
-								}).then(function (results) {
-									// results here
-									const foo = results.body.hits.hits.map(
-										(hit) => {
-											const parsedFilename = refineQuery(
-												hit._source.rawFileDetails.name
-											);
-											if (
-												parsedFilename.searchParams
-													.searchTerms.number ===
-												parseInt(
-													ctx.params.queryObject
-														.issueNumber,
-													10
-												)
-											) {
-												return hit;
-											}
-										}
-									);
-									return filter(foo, null);
+								let queries = flattenedQueryArray
+									.map((item) => JSON.stringify(item))
+									.join("\n");
+								queries += "\n";
+								const { body } = await eSClient.msearch({
+									body: queries,
 								});
+
+								body.responses.forEach((match) => {
+									console.log(match.hits.hits);
+								})
+
+								return body.responses;
+
+								// return Comic.esSearch({
+								// 	query: elasticSearchQuery,
+								// }).then(function (results) {
+								// 	// results here
+								// 	const foo = results.body.hits.hits.map(
+								// 		(hit) => {
+								// 			const parsedFilename = refineQuery(
+								// 				hit._source.rawFileDetails.name
+								// 			);
+								// 			if (
+								// 				parsedFilename.searchParams
+								// 					.searchTerms.number ===
+								// 				parseInt(
+								// 					ctx.params.queryObject
+								// 						.issueNumber,
+								// 					10
+								// 				)
+								// 			) {
+								// 				return hit;
+								// 			}
+								// 		}
+								// 	);
+								// 	return filter(foo, null);
+								// });
 							},
 						},
 					},