🏗️ Refactored CV search match scorer
This commit is contained in:
@@ -190,6 +190,8 @@ export default class ComicVineService extends Service {
|
|||||||
"Searching against: ",
|
"Searching against: ",
|
||||||
ctx.params.scorerConfiguration.searchParams
|
ctx.params.scorerConfiguration.searchParams
|
||||||
);
|
);
|
||||||
|
const { rawFileDetails, scorerConfiguration } =
|
||||||
|
ctx.params;
|
||||||
const results: any = [];
|
const results: any = [];
|
||||||
console.log(
|
console.log(
|
||||||
"passed to fetchVolumesFromCV",
|
"passed to fetchVolumesFromCV",
|
||||||
@@ -290,12 +292,44 @@ export default class ComicVineService extends Service {
|
|||||||
return issue;
|
return issue;
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Score the final matches
|
||||||
|
const foo = await this.broker.call(
|
||||||
|
"comicvine.getComicVineMatchScores",
|
||||||
|
{
|
||||||
|
finalMatches,
|
||||||
|
rawFileDetails,
|
||||||
|
scorerConfiguration,
|
||||||
|
}
|
||||||
|
);
|
||||||
return Promise.all(finalMatches);
|
return Promise.all(finalMatches);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
getComicVineMatchScores: {
|
||||||
|
rest: "POST /getComicVineMatchScores",
|
||||||
|
handler: async (
|
||||||
|
ctx: Context<{
|
||||||
|
finalMatches: Array<any>;
|
||||||
|
rawFileDetails: any;
|
||||||
|
scorerConfiguration: any;
|
||||||
|
}>
|
||||||
|
) => {
|
||||||
|
const {
|
||||||
|
finalMatches,
|
||||||
|
rawFileDetails,
|
||||||
|
scorerConfiguration,
|
||||||
|
} = ctx.params;
|
||||||
|
console.log(ctx.params);
|
||||||
|
return await matchScorer(
|
||||||
|
finalMatches,
|
||||||
|
scorerConfiguration.searchParams,
|
||||||
|
rawFileDetails
|
||||||
|
);
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
methods: {
|
methods: {
|
||||||
fetchVolumesFromCV: async (payload, output: any[] = []) => {
|
fetchVolumesFromCV: async (payload, output: any[] = []) => {
|
||||||
|
|||||||
@@ -42,60 +42,57 @@ import { isAfter, isSameYear, parseISO } from "date-fns";
|
|||||||
const imghash = require("imghash");
|
const imghash = require("imghash");
|
||||||
|
|
||||||
export const matchScorer = async (
|
export const matchScorer = async (
|
||||||
searchMatches: any,
|
searchMatches: Promise<any>[],
|
||||||
searchQuery: any,
|
searchQuery: any,
|
||||||
rawFileDetails: any
|
rawFileDetails: any
|
||||||
): Promise<any> => {
|
): Promise<any> => {
|
||||||
// 1. Check if it exists in the db (score: 0)
|
const scoredMatches: any = [];
|
||||||
// 2. Check if issue name matches strongly (score: ++)
|
|
||||||
// 3. Check if issue number matches strongly (score: ++)
|
try {
|
||||||
// 4. Check if issue covers hash match strongly (score: +++)
|
const matches = await Promise.all(searchMatches);
|
||||||
// 5. Check if issue year matches strongly (score: +)
|
|
||||||
const scoredMatches = map(searchMatches, async (match, idx) => {
|
for (const match of matches) {
|
||||||
match.score = 0;
|
match.score = 0;
|
||||||
|
|
||||||
// Check for the issue name match
|
// Check for the issue name match
|
||||||
if (
|
if (!isNil(searchQuery.name) && !isNil(match.name)) {
|
||||||
!isNil(searchQuery.issue.searchParams.searchTerms.name) &&
|
|
||||||
!isNil(match.name)
|
|
||||||
) {
|
|
||||||
const issueNameScore = stringSimilarity.compareTwoStrings(
|
const issueNameScore = stringSimilarity.compareTwoStrings(
|
||||||
searchQuery.issue.searchParams.searchTerms.name,
|
searchQuery.name,
|
||||||
match.name
|
match.name
|
||||||
);
|
);
|
||||||
match.score = issueNameScore;
|
match.score = issueNameScore;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Issue number matches
|
// Issue number matches
|
||||||
|
if (!isNil(searchQuery.number) && !isNil(match.issue_number)) {
|
||||||
if (
|
if (
|
||||||
!isNil(searchQuery.issue.searchParams.searchTerms.number) &&
|
parseInt(searchQuery.number, 10) ===
|
||||||
!isNil(match.issue_number)
|
parseInt(match.issue_number, 10)
|
||||||
) {
|
|
||||||
if (
|
|
||||||
parseInt(
|
|
||||||
searchQuery.issue.searchParams.searchTerms.number,
|
|
||||||
10
|
|
||||||
) === parseInt(match.issue_number, 10)
|
|
||||||
) {
|
) {
|
||||||
match.score += 1;
|
match.score += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cover image hash match
|
// Cover image hash match
|
||||||
return await calculateLevenshteinDistance(match, rawFileDetails);
|
scoredMatches.push(
|
||||||
});
|
await calculateLevenshteinDistance(match, rawFileDetails)
|
||||||
return Promise.all(scoredMatches);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return scoredMatches;
|
||||||
|
} catch (error) {
|
||||||
|
// Handle errors here
|
||||||
|
console.error("Error in matchScorer:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
export const rankVolumes = (volumes: any, scorerConfiguration: any) => {
|
export const rankVolumes = (volumes: any, scorerConfiguration: any) => {
|
||||||
// Iterate over volumes, checking to see:
|
// Iterate over volumes, checking to see:
|
||||||
// 1. If the detected year of the issue falls in the range (end_year >= {detected year for issue} >= start_year )
|
// 1. If the detected year of the issue falls in the range (end_year >= {detected year for issue} >= start_year )
|
||||||
// 2. If there is a strong string comparison between the volume name and the issue name ??
|
// 2. If there is a strong string comparison between the volume name and the issue name ??
|
||||||
const issueNumber = parseInt(
|
const issueNumber = parseInt(scorerConfiguration.searchParams.number, 10);
|
||||||
scorerConfiguration.searchParams.number,
|
const issueYear = parseISO(scorerConfiguration.searchParams.year);
|
||||||
10
|
|
||||||
);
|
|
||||||
const issueYear = parseISO(
|
|
||||||
scorerConfiguration.searchParams.year
|
|
||||||
);
|
|
||||||
const foo = volumes.map((volume: any, idx: number) => {
|
const foo = volumes.map((volume: any, idx: number) => {
|
||||||
let volumeMatchScore = 0;
|
let volumeMatchScore = 0;
|
||||||
const volumeStartYear = !isNil(volume.start_year)
|
const volumeStartYear = !isNil(volume.start_year)
|
||||||
@@ -114,26 +111,34 @@ export const rankVolumes = (volumes: any, scorerConfiguration: any) => {
|
|||||||
// 1. If there is a subtitle in the candidate volume's name, add it to the issueNameMatchScore
|
// 1. If there is a subtitle in the candidate volume's name, add it to the issueNameMatchScore
|
||||||
// If not, move on.
|
// If not, move on.
|
||||||
let subtitleMatchScore = 0;
|
let subtitleMatchScore = 0;
|
||||||
if(!isNil(scorerConfiguration.searchParams.subtitle)) {
|
if (!isNil(scorerConfiguration.searchParams.subtitle)) {
|
||||||
subtitleMatchScore = stringSimilarity.compareTwoStrings(scorerConfiguration.searchParams.subtitle, volume.name);
|
subtitleMatchScore = stringSimilarity.compareTwoStrings(
|
||||||
if(subtitleMatchScore > 0.1) {
|
scorerConfiguration.searchParams.subtitle,
|
||||||
|
volume.name
|
||||||
|
);
|
||||||
|
if (subtitleMatchScore > 0.1) {
|
||||||
issueNameMatchScore += subtitleMatchScore;
|
issueNameMatchScore += subtitleMatchScore;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 2. If issue year starts after the candidate volume's start year or is the same year, +2 to volumeMatchScore
|
// 2. If issue year starts after the candidate volume's start year or is the same year, +2 to volumeMatchScore
|
||||||
if (!isNil(volumeStartYear)) {
|
if (!isNil(volumeStartYear)) {
|
||||||
if (isSameYear(issueYear, volumeStartYear) ||
|
if (
|
||||||
isAfter(issueYear, volumeStartYear)) {
|
isSameYear(issueYear, volumeStartYear) ||
|
||||||
|
isAfter(issueYear, volumeStartYear)
|
||||||
|
) {
|
||||||
volumeMatchScore += 2;
|
volumeMatchScore += 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 3. If issue number falls in the range of candidate volume's first issue # and last issue #, +3 to volumeMatchScore
|
// 3. If issue number falls in the range of candidate volume's first issue # and last issue #, +3 to volumeMatchScore
|
||||||
if(!isNil(firstIssueNumber) && !isNil(lastIssueNumber)) {
|
if (!isNil(firstIssueNumber) && !isNil(lastIssueNumber)) {
|
||||||
if(firstIssueNumber <= issueNumber || issueNumber <= lastIssueNumber) {
|
if (
|
||||||
|
firstIssueNumber <= issueNumber ||
|
||||||
|
issueNumber <= lastIssueNumber
|
||||||
|
) {
|
||||||
volumeMatchScore += 3;
|
volumeMatchScore += 3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(issueNameMatchScore > 0.5 && volumeMatchScore > 2) {
|
if (issueNameMatchScore > 0.5 && volumeMatchScore > 2) {
|
||||||
console.log(`Found a match for criteria, volume ID: ${volume.id}`);
|
console.log(`Found a match for criteria, volume ID: ${volume.id}`);
|
||||||
return volume.id;
|
return volume.id;
|
||||||
}
|
}
|
||||||
@@ -144,17 +149,30 @@ export const rankVolumes = (volumes: any, scorerConfiguration: any) => {
|
|||||||
const calculateLevenshteinDistance = async (match: any, rawFileDetails: any) =>
|
const calculateLevenshteinDistance = async (match: any, rawFileDetails: any) =>
|
||||||
new Promise((resolve, reject) => {
|
new Promise((resolve, reject) => {
|
||||||
https.get(match.image.small_url, (response: any) => {
|
https.get(match.image.small_url, (response: any) => {
|
||||||
|
console.log(rawFileDetails.cover.filePath);
|
||||||
const fileName = match.id + "_" + rawFileDetails.name + ".jpg";
|
const fileName = match.id + "_" + rawFileDetails.name + ".jpg";
|
||||||
const file = createWriteStream(`./userdata/temporary/${fileName}`);
|
const file = createWriteStream(
|
||||||
|
`${process.env.USERDATA_DIRECTORY}/temporary/${fileName}`
|
||||||
|
);
|
||||||
const fileStream = response.pipe(file);
|
const fileStream = response.pipe(file);
|
||||||
fileStream.on("finish", async () => {
|
fileStream.on("finish", async () => {
|
||||||
// 1. hash of the cover image we have on hand
|
// 1. hash of the cover image we have on hand
|
||||||
|
const coverFileName = rawFileDetails.cover.filePath
|
||||||
|
.split("/")
|
||||||
|
.at(-1);
|
||||||
|
const coverDirectory = rawFileDetails.containedIn
|
||||||
|
.split("/")
|
||||||
|
.at(-1);
|
||||||
const hash1 = await imghash.hash(
|
const hash1 = await imghash.hash(
|
||||||
path.resolve(rawFileDetails.cover.filePath)
|
path.resolve(
|
||||||
|
`${process.env.USERDATA_DIRECTORY}/covers/${coverDirectory}/${coverFileName}`
|
||||||
|
)
|
||||||
);
|
);
|
||||||
// 2. hash of the cover of the potential match
|
// 2. hash of the cover of the potential match
|
||||||
const hash2 = await imghash.hash(
|
const hash2 = await imghash.hash(
|
||||||
path.resolve(`./userdata/temporary/${fileName}`)
|
path.resolve(
|
||||||
|
`${process.env.USERDATA_DIRECTORY}/temporary/${fileName}`
|
||||||
|
)
|
||||||
);
|
);
|
||||||
if (!isUndefined(hash1) && !isUndefined(hash2)) {
|
if (!isUndefined(hash1) && !isUndefined(hash2)) {
|
||||||
const levenshteinDistance = leven(hash1, hash2);
|
const levenshteinDistance = leven(hash1, hash2);
|
||||||
|
|||||||
Reference in New Issue
Block a user