756 lines
20 KiB
TypeScript
756 lines
20 KiB
TypeScript
"use strict";
|
|
|
|
import { Service, ServiceBroker, Context } from "moleculer";
|
|
import axios from "axios";
|
|
import { isNil, isUndefined } from "lodash";
|
|
import { fetchReleases, FilterTypes, SortTypes } from "comicgeeks";
|
|
import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils";
|
|
import { scrapeIssuePage, getWeeklyPullList } from "../utils/scraping.utils";
|
|
const { calculateLimitAndOffset, paginate } = require("paginate-info");
|
|
const { MoleculerError } = require("moleculer").Errors;
|
|
|
|
const CV_BASE_URL = "https://comicvine.gamespot.com/api/";
|
|
console.log("ComicVine API Key: ", process.env.COMICVINE_API_KEY);
|
|
export default class ComicVineService extends Service {
|
|
public constructor(public broker: ServiceBroker) {
|
|
super(broker);
|
|
this.parseServiceSchema({
|
|
name: "comicvine",
|
|
actions: {
|
|
search: {
|
|
rest: "/search",
|
|
params: {},
|
|
handler: async (
|
|
ctx: Context<{
|
|
format: string;
|
|
sort: string;
|
|
query: string;
|
|
field_list: string;
|
|
limit: string;
|
|
offset: string;
|
|
resources: string;
|
|
}>
|
|
) => {
|
|
const response = await axios.request({
|
|
url:
|
|
CV_BASE_URL +
|
|
"search" +
|
|
"?api_key=" +
|
|
process.env.COMICVINE_API_KEY,
|
|
params: ctx.params,
|
|
headers: { Accept: "application/json" },
|
|
});
|
|
const { data } = response;
|
|
return data;
|
|
},
|
|
},
|
|
getVolumes: {
|
|
rest: "POST /getVolumes",
|
|
params: {},
|
|
handler: async (
|
|
ctx: Context<{
|
|
volumeURI: string;
|
|
fieldList: string;
|
|
}>
|
|
) => {
|
|
const { volumeURI, fieldList } = ctx.params;
|
|
const response = await axios.request({
|
|
url:
|
|
volumeURI +
|
|
"?api_key=" +
|
|
process.env.COMICVINE_API_KEY,
|
|
params: {
|
|
format: "json",
|
|
field_list: fieldList,
|
|
},
|
|
headers: {
|
|
Accept: "application/json",
|
|
"User-Agent": "ThreeTwo",
|
|
},
|
|
});
|
|
const { data } = response;
|
|
return data;
|
|
},
|
|
},
|
|
getIssuesForSeries: {
|
|
rest: "POST /getIssuesForSeries",
|
|
handler: async (
|
|
ctx: Context<{ comicObjectId: string }>
|
|
) => {
|
|
const { comicObjectId } = ctx.params;
|
|
// 1. Query mongo to get the comic document by its _id
|
|
const comicBookDetails: any = await this.broker.call(
|
|
"library.getComicBookById",
|
|
{ id: comicObjectId }
|
|
);
|
|
// 2. Query CV and get metadata for them
|
|
const issues = await axios({
|
|
url:
|
|
CV_BASE_URL +
|
|
"issues" +
|
|
"?api_key=" +
|
|
process.env.COMICVINE_API_KEY,
|
|
params: {
|
|
resources: "issues",
|
|
limit: "100",
|
|
format: "json",
|
|
filter: `volume:${comicBookDetails.sourcedMetadata.comicvine.volumeInformation.id}`,
|
|
},
|
|
headers: {
|
|
Accept: "application/json",
|
|
"User-Agent": "ThreeTwo",
|
|
},
|
|
});
|
|
return issues.data;
|
|
},
|
|
},
|
|
getWeeklyPullList: {
|
|
rest: "POST /scrapeLOCGForSeries",
|
|
timeout: 30000,
|
|
params: {},
|
|
handler: async (
|
|
ctx: Context<{
|
|
startDate: string;
|
|
currentPage: string;
|
|
pageSize: string;
|
|
}>
|
|
) => {
|
|
const { currentPage, pageSize, startDate } = ctx.params;
|
|
console.log(`date for the pull list: ${startDate}`);
|
|
const { limit, offset } = calculateLimitAndOffset(
|
|
parseInt(currentPage, 10),
|
|
parseInt(pageSize, 10)
|
|
);
|
|
|
|
const url = `https://leagueofcomicgeeks.com/comics/new-comics/${startDate}`;
|
|
const issues = await getWeeklyPullList(url);
|
|
|
|
const count = issues.length;
|
|
const paginatedData = issues.slice(
|
|
offset,
|
|
offset + limit
|
|
);
|
|
|
|
const paginationInfo = paginate(
|
|
parseInt(currentPage, 10),
|
|
count,
|
|
paginatedData
|
|
);
|
|
|
|
return {
|
|
result: paginatedData,
|
|
meta: paginationInfo,
|
|
};
|
|
},
|
|
},
|
|
getResource: {
|
|
rest: "POST /getResource",
|
|
handler: async (
|
|
ctx: Context<{
|
|
resources: string;
|
|
filter: string;
|
|
fieldList: string;
|
|
}>
|
|
) => {
|
|
const { resources, filter, fieldList } = ctx.params;
|
|
console.log(JSON.stringify(ctx.params, null, 2));
|
|
console.log(
|
|
CV_BASE_URL +
|
|
`${resources}` +
|
|
"?api_key=" +
|
|
process.env.COMICVINE_API_KEY
|
|
);
|
|
// 2. Query CV and get metadata for them
|
|
const response = await axios({
|
|
method: "GET",
|
|
url:
|
|
CV_BASE_URL +
|
|
`${resources}` +
|
|
"?api_key=" +
|
|
process.env.COMICVINE_API_KEY,
|
|
params: {
|
|
resources: `${resources}`,
|
|
limit: "100",
|
|
format: "json",
|
|
filter: `${filter}`,
|
|
field_list: `${fieldList}`,
|
|
},
|
|
headers: {
|
|
"Accept": "application/json",
|
|
"User-Agent": "ThreeTwo",
|
|
},
|
|
});
|
|
console.log(response.data);
|
|
return response.data;
|
|
},
|
|
},
|
|
volumeBasedSearch: {
|
|
rest: "POST /volumeBasedSearch",
|
|
params: {},
|
|
timeout: 10000000,
|
|
handler: async (
|
|
ctx: Context<{
|
|
format: string;
|
|
sort: string;
|
|
query: string;
|
|
fieldList: string;
|
|
limit: number;
|
|
offset: number;
|
|
resources: string;
|
|
scorerConfiguration?: {
|
|
searchParams: {
|
|
name: string;
|
|
subtitle?: string;
|
|
number: string;
|
|
year: string;
|
|
};
|
|
};
|
|
rawFileDetails: object;
|
|
}>
|
|
) => {
|
|
try {
|
|
console.log(
|
|
"Searching against: ",
|
|
ctx.params.scorerConfiguration.searchParams
|
|
);
|
|
const { rawFileDetails, scorerConfiguration } =
|
|
ctx.params;
|
|
const results: any = [];
|
|
console.log(
|
|
"passed to fetchVolumesFromCV",
|
|
ctx.params
|
|
);
|
|
|
|
// Send initial status to client
|
|
await this.broker.call("socket.broadcast", {
|
|
namespace: "/",
|
|
event: "CV_SCRAPING_STATUS",
|
|
args: [
|
|
{
|
|
message: `Starting volume search for: ${ctx.params.scorerConfiguration.searchParams.name}`,
|
|
stage: "fetching_volumes"
|
|
},
|
|
],
|
|
});
|
|
|
|
const volumes = await this.fetchVolumesFromCV(
|
|
ctx.params,
|
|
results
|
|
);
|
|
|
|
// Notify client that volume fetching is complete
|
|
await this.broker.call("socket.broadcast", {
|
|
namespace: "/",
|
|
event: "CV_SCRAPING_STATUS",
|
|
args: [
|
|
{
|
|
message: `Fetched ${volumes.length} volumes, now ranking matches...`,
|
|
stage: "ranking_volumes"
|
|
},
|
|
],
|
|
});
|
|
|
|
// 1. Run the current batch of volumes through the matcher
|
|
const potentialVolumeMatches = rankVolumes(
|
|
volumes,
|
|
ctx.params.scorerConfiguration
|
|
);
|
|
|
|
// Sort by totalScore in descending order to prioritize best matches
|
|
potentialVolumeMatches.sort((a: any, b: any) => b.totalScore - a.totalScore);
|
|
|
|
// Notify client about ranked matches
|
|
await this.broker.call("socket.broadcast", {
|
|
namespace: "/",
|
|
event: "CV_SCRAPING_STATUS",
|
|
args: [
|
|
{
|
|
message: `Found ${potentialVolumeMatches.length} potential volume matches, searching for issues...`,
|
|
stage: "searching_issues"
|
|
},
|
|
],
|
|
});
|
|
|
|
// 2. Construct the filter string
|
|
// 2a. volume: 1111|2222|3333
|
|
let volumeIdString = "volume:";
|
|
potentialVolumeMatches.map(
|
|
(volumeMatch: any, idx: number) => {
|
|
if (
|
|
idx >=
|
|
potentialVolumeMatches.length - 1
|
|
) {
|
|
volumeIdString += `${volumeMatch.id}`;
|
|
return volumeIdString;
|
|
}
|
|
volumeIdString += `${volumeMatch.id}|`;
|
|
}
|
|
);
|
|
|
|
// 2b. E.g.: cover_date:2014-01-01|2016-12-31 for the issue year 2015
|
|
let coverDateFilter = "";
|
|
if (
|
|
!isNil(
|
|
ctx.params.scorerConfiguration.searchParams
|
|
.year
|
|
)
|
|
) {
|
|
const issueYear = parseInt(
|
|
ctx.params.scorerConfiguration.searchParams
|
|
.year,
|
|
10
|
|
);
|
|
coverDateFilter = `cover_date:${
|
|
issueYear - 1
|
|
}-01-01|${issueYear + 1}-12-31`;
|
|
}
|
|
const filterString = `issue_number:${ctx.params.scorerConfiguration.searchParams.number},${volumeIdString},${coverDateFilter}`;
|
|
console.log(filterString);
|
|
|
|
const issueMatches = await axios({
|
|
url:
|
|
CV_BASE_URL +
|
|
"issues" +
|
|
"?api_key=" +
|
|
process.env.COMICVINE_API_KEY,
|
|
params: {
|
|
resources: "issues",
|
|
limit: "100",
|
|
format: "json",
|
|
filter: filterString,
|
|
},
|
|
headers: {
|
|
"Accept": "application/json",
|
|
"User-Agent": "ThreeTwo",
|
|
},
|
|
});
|
|
console.log(
|
|
`Total issues matching the criteria: ${issueMatches.data.results.length}`
|
|
);
|
|
|
|
// Handle case when no issues are found
|
|
if (issueMatches.data.results.length === 0) {
|
|
await this.broker.call("socket.broadcast", {
|
|
namespace: "/",
|
|
event: "CV_SCRAPING_STATUS",
|
|
args: [
|
|
{
|
|
message: `No matching issues found. Try adjusting your search criteria.`,
|
|
stage: "complete"
|
|
},
|
|
],
|
|
});
|
|
|
|
return {
|
|
finalMatches: [],
|
|
rawFileDetails,
|
|
scorerConfiguration,
|
|
};
|
|
}
|
|
|
|
// Notify client about issue matches found
|
|
await this.broker.call("socket.broadcast", {
|
|
namespace: "/",
|
|
event: "CV_SCRAPING_STATUS",
|
|
args: [
|
|
{
|
|
message: `Found ${issueMatches.data.results.length} issue matches, fetching volume details...`,
|
|
stage: "fetching_volume_details"
|
|
},
|
|
],
|
|
});
|
|
|
|
// 3. get volume information for the issue matches
|
|
if (issueMatches.data.results.length === 1) {
|
|
const volumeInformation =
|
|
await this.broker.call(
|
|
"comicvine.getVolumes",
|
|
{
|
|
volumeURI:
|
|
issueMatches.data.results[0]
|
|
.volume.api_detail_url,
|
|
}
|
|
);
|
|
issueMatches.data.results[0].volumeInformation =
|
|
volumeInformation;
|
|
|
|
// Notify scoring for single match
|
|
await this.broker.call("socket.broadcast", {
|
|
namespace: "/",
|
|
event: "CV_SCRAPING_STATUS",
|
|
args: [
|
|
{
|
|
message: `Scoring 1 match...`,
|
|
stage: "scoring_matches"
|
|
},
|
|
],
|
|
});
|
|
|
|
// Score the single match
|
|
const scoredMatch = await this.broker.call(
|
|
"comicvine.getComicVineMatchScores",
|
|
{
|
|
finalMatches: issueMatches.data.results,
|
|
rawFileDetails,
|
|
scorerConfiguration,
|
|
}
|
|
);
|
|
|
|
// Notify completion
|
|
await this.broker.call("socket.broadcast", {
|
|
namespace: "/",
|
|
event: "CV_SCRAPING_STATUS",
|
|
args: [
|
|
{
|
|
message: `Search complete! Found 1 match.`,
|
|
stage: "complete"
|
|
},
|
|
],
|
|
});
|
|
|
|
return scoredMatch;
|
|
}
|
|
const finalMatchesPromises = issueMatches.data.results.map(
|
|
async (issue: any) => {
|
|
const volumeDetails =
|
|
await this.broker.call(
|
|
"comicvine.getVolumes",
|
|
{
|
|
volumeURI:
|
|
issue.volume.api_detail_url,
|
|
}
|
|
);
|
|
issue.volumeInformation = volumeDetails;
|
|
return issue;
|
|
}
|
|
);
|
|
|
|
// Wait for all volume details to be fetched
|
|
const finalMatches = await Promise.all(finalMatchesPromises);
|
|
|
|
// Notify client about scoring
|
|
await this.broker.call("socket.broadcast", {
|
|
namespace: "/",
|
|
event: "CV_SCRAPING_STATUS",
|
|
args: [
|
|
{
|
|
message: `Scoring ${finalMatches.length} matches...`,
|
|
stage: "scoring_matches"
|
|
},
|
|
],
|
|
});
|
|
|
|
// Score the final matches
|
|
const scoredMatches = await this.broker.call(
|
|
"comicvine.getComicVineMatchScores",
|
|
{
|
|
finalMatches,
|
|
rawFileDetails,
|
|
scorerConfiguration,
|
|
}
|
|
);
|
|
|
|
// Notify completion
|
|
await this.broker.call("socket.broadcast", {
|
|
namespace: "/",
|
|
event: "CV_SCRAPING_STATUS",
|
|
args: [
|
|
{
|
|
message: `Search complete! Returning scored matches.`,
|
|
stage: "complete"
|
|
},
|
|
],
|
|
});
|
|
|
|
return scoredMatches;
|
|
} catch (error) {
|
|
console.error("Error in volumeBasedSearch:", error);
|
|
|
|
// Surface error to UI
|
|
await this.broker.call("socket.broadcast", {
|
|
namespace: "/",
|
|
event: "CV_SCRAPING_STATUS",
|
|
args: [
|
|
{
|
|
message: `Error during search: ${error.message || 'Unknown error'}`,
|
|
stage: "error",
|
|
error: {
|
|
message: error.message,
|
|
code: error.code,
|
|
type: error.type,
|
|
retryable: error.retryable
|
|
}
|
|
},
|
|
],
|
|
});
|
|
|
|
// Re-throw or return error response
|
|
throw error;
|
|
}
|
|
},
|
|
},
|
|
getComicVineMatchScores: {
|
|
rest: "POST /getComicVineMatchScores",
|
|
timeout: 120000, // 2 minutes - allows time for image downloads and hash calculations
|
|
handler: async (
|
|
ctx: Context<{
|
|
finalMatches: any[];
|
|
rawFileDetails: any;
|
|
scorerConfiguration: any;
|
|
}>
|
|
) => {
|
|
const {
|
|
finalMatches,
|
|
rawFileDetails,
|
|
scorerConfiguration,
|
|
} = ctx.params;
|
|
console.log(ctx.params);
|
|
return await matchScorer(
|
|
finalMatches,
|
|
scorerConfiguration.searchParams,
|
|
rawFileDetails
|
|
);
|
|
},
|
|
},
|
|
getStoryArcs: {
|
|
rest: "POST /getStoryArcs",
|
|
handler: async (
|
|
ctx: Context<{ volumeUrl: string; volumeId: number }>
|
|
) => {
|
|
const { volumeUrl, volumeId } = ctx.params;
|
|
try {
|
|
const volumeResponse = await axios({
|
|
url:
|
|
volumeUrl +
|
|
"?api_key=" +
|
|
process.env.COMICVINE_API_KEY,
|
|
method: "GET",
|
|
params: {
|
|
limit: "100",
|
|
format: "json",
|
|
resources: "volumes",
|
|
},
|
|
headers: {
|
|
"Accept": "application/json",
|
|
"User-Agent": "ThreeTwo",
|
|
},
|
|
});
|
|
const volumeData = volumeResponse.data;
|
|
|
|
if (volumeData.results.issues.length > 0) {
|
|
const issuePromises =
|
|
volumeData.results.issues.map(
|
|
async (issue: any) => {
|
|
const issueUrl = `${CV_BASE_URL}issue/4000-${issue.id}/?api_key=${process.env.COMICVINE_API_KEY}&format=json&field_list=story_arc_credits,description,image`;
|
|
try {
|
|
const issueResponse =
|
|
await axios.get(issueUrl, {
|
|
params: {
|
|
limit: "100",
|
|
format: "json",
|
|
},
|
|
headers: {
|
|
"Accept": "application/json",
|
|
"User-Agent":
|
|
"ThreeTwo",
|
|
},
|
|
});
|
|
const issueData =
|
|
issueResponse.data.results;
|
|
|
|
// Transform each story arc to include issue's description and image
|
|
return (
|
|
issueData.story_arc_credits?.map(
|
|
(arc: any) => ({
|
|
...arc,
|
|
issueDescription:
|
|
issueData.description,
|
|
issueImage:
|
|
issueData.image,
|
|
})
|
|
) || []
|
|
);
|
|
} catch (error) {
|
|
console.error(
|
|
"An error occurred while fetching issue data:",
|
|
error.message
|
|
);
|
|
return []; // Return an empty array on error
|
|
}
|
|
}
|
|
);
|
|
|
|
try {
|
|
const storyArcsResults: any =
|
|
await Promise.all(issuePromises);
|
|
// Flatten the array of arrays
|
|
const flattenedStoryArcs =
|
|
storyArcsResults.flat();
|
|
|
|
// Deduplicate based on arc ID, while preserving the last seen issueDescription and issueImage
|
|
const uniqueStoryArcs = Array.from(
|
|
new Map(
|
|
flattenedStoryArcs.map(
|
|
(arc: any) => [arc.id, arc]
|
|
)
|
|
).values()
|
|
);
|
|
|
|
console.log(
|
|
`Found ${uniqueStoryArcs.length} unique story arc(s) for volume ID ${volumeId}:`
|
|
);
|
|
uniqueStoryArcs.forEach((arc: any) => {
|
|
console.log(
|
|
`- ${arc.name} (ID: ${arc.id}) with issueDescription and issueImage`
|
|
);
|
|
});
|
|
|
|
return uniqueStoryArcs;
|
|
} catch (error) {
|
|
console.error(
|
|
"An error occurred while processing story arcs:",
|
|
error
|
|
);
|
|
}
|
|
} else {
|
|
console.log(
|
|
"No issues found for the specified volume."
|
|
);
|
|
}
|
|
} catch (error) {
|
|
console.error(
|
|
"An error occurred while fetching data from ComicVine:",
|
|
error
|
|
);
|
|
}
|
|
},
|
|
},
|
|
|
|
getIssuesForVolume: {
|
|
rest: "POST /getIssuesForVolume",
|
|
async handler(ctx: Context<{ volumeId: number }>) {
|
|
const { volumeId } = ctx.params;
|
|
const issuesUrl = `${CV_BASE_URL}issues/?api_key=${process.env.COMICVINE_API_KEY}`;
|
|
try {
|
|
const response = await axios.get(issuesUrl, {
|
|
params: {
|
|
api_key: process.env.COMICVINE_API_KEY,
|
|
filter: `volume:${volumeId}`,
|
|
format: "json",
|
|
field_list:
|
|
"id,name,image,issue_number,cover_date,description",
|
|
limit: 100,
|
|
},
|
|
headers: {
|
|
"Accept": "application/json",
|
|
"User-Agent": "ThreeTwo",
|
|
},
|
|
});
|
|
|
|
// Map over the issues to include the year extracted from cover_date
|
|
const issuesWithDescriptionImageAndYear =
|
|
response.data.results.map((issue: any) => {
|
|
const year = issue.cover_date
|
|
? new Date(
|
|
issue.cover_date
|
|
).getFullYear()
|
|
: null; // Extract the year from cover_date
|
|
return {
|
|
...issue,
|
|
year,
|
|
description: issue.description || "",
|
|
image: issue.image || {},
|
|
};
|
|
});
|
|
|
|
return issuesWithDescriptionImageAndYear;
|
|
} catch (error) {
|
|
this.logger.error(
|
|
"Error fetching issues from ComicVine:",
|
|
error.message
|
|
);
|
|
throw new MoleculerError(
|
|
"Failed to fetch issues",
|
|
500,
|
|
"FETCH_ERROR",
|
|
{ error: error.message }
|
|
);
|
|
}
|
|
},
|
|
},
|
|
},
|
|
methods: {
|
|
fetchVolumesFromCV: async (payload, output: any[] = []) => {
|
|
const { format, query, limit, page, resources } = payload;
|
|
let currentPage = parseInt(page, 10);
|
|
const response = await axios.request({
|
|
url:
|
|
CV_BASE_URL +
|
|
"search" +
|
|
"?api_key=" +
|
|
process.env.COMICVINE_API_KEY,
|
|
params: {
|
|
format,
|
|
query,
|
|
limit,
|
|
page,
|
|
resources,
|
|
},
|
|
headers: {
|
|
"Accept": "application/json",
|
|
"User-Agent": "ThreeTwo",
|
|
},
|
|
});
|
|
|
|
const { data } = response;
|
|
// 1. Calculate total pages
|
|
const totalPages = Math.floor(
|
|
parseInt(data.number_of_total_results, 10) /
|
|
parseInt(limit, 10)
|
|
);
|
|
// 1a. If total results are <= 100, just return the results
|
|
if (parseInt(data.number_of_total_results, 10) <= 100) {
|
|
return [...data.results];
|
|
}
|
|
// 1b. If not, recursively call fetchVolumesFromCV till we have fetched all pages
|
|
if (currentPage <= totalPages) {
|
|
output.push(...data.results);
|
|
currentPage += 1;
|
|
// Params.page = currentPage;
|
|
|
|
console.log(
|
|
`Fetching results for page ${currentPage} (of ${
|
|
totalPages + 1
|
|
})...`
|
|
);
|
|
|
|
await this.broker.call("socket.broadcast", {
|
|
namespace: "/",
|
|
event: "CV_SCRAPING_STATUS",
|
|
args: [
|
|
{
|
|
message: `Fetching results for page ${currentPage} (of ${
|
|
totalPages + 1
|
|
})...`,
|
|
},
|
|
],
|
|
});
|
|
return await this.fetchVolumesFromCV(
|
|
{
|
|
format,
|
|
query,
|
|
limit,
|
|
page: currentPage,
|
|
resources,
|
|
},
|
|
output
|
|
);
|
|
} else {
|
|
return [...output];
|
|
}
|
|
},
|
|
},
|
|
});
|
|
}
|
|
}
|