🥇 Working draft of match scoring algo

This commit is contained in:
2021-08-03 01:56:22 -07:00
parent c7704a2dfa
commit 8210165e82

View File

@@ -35,26 +35,24 @@ import { createWriteStream } from "fs";
import path from "path"; import path from "path";
import https from "https"; import https from "https";
import stringSimilarity from "string-similarity"; import stringSimilarity from "string-similarity";
import { each, isNil, isNull, isUndefined } from "lodash"; import { each, isNil, map, isNull, isUndefined } from "lodash";
import leven from "leven"; import leven from "leven";
const imghash = require("imghash"); const imghash = require("imghash");
export const matchScorer = ( export const matchScorer = async (
searchMatches: any, searchMatches: any,
searchQuery: any, searchQuery: any,
rawFileDetails: any rawFileDetails: any
) => { ): Promise<any> => {
// 1. Check if it exists in the db (score: 0) // 1. Check if it exists in the db (score: 0)
// 2. Check if issue name matches strongly (score: ++) // 2. Check if issue name matches strongly (score: ++)
// 3. Check if issue number matches strongly (score: ++) // 3. Check if issue number matches strongly (score: ++)
// 4. Check if issue covers hash match strongly (score: +++) // 4. Check if issue covers hash match strongly (score: +++)
// 5. Check if issue year matches strongly (score: +) // 5. Check if issue year matches strongly (score: +)
const scoredMatches = map(searchMatches, async (match, idx) => {
each(searchMatches, (match, idx) => {
match.score = 0; match.score = 0;
// Check for the issue name match // Check for the issue name match
if ( if (
!isNil(searchQuery.issue.searchParams.searchTerms.name) && !isNil(searchQuery.issue.searchParams.searchTerms.name) &&
!isNil(match.name) !isNil(match.name)
@@ -80,20 +78,17 @@ export const matchScorer = (
match.score += 1; match.score += 1;
} }
} }
const foo = calculateLevenshteinDistance(rawFileDetails, match); // Cover image hash match
console.log("MAST", foo); return await calculateLevenshteinDistance(match, rawFileDetails);
}); });
return Promise.all(scoredMatches);
return searchMatches;
}; };
const calculateLevenshteinDistance = async (match: any, rawFileDetails: any) =>
const calculateLevenshteinDistance = (rawFileDetails: any, match: any) => { new Promise((resolve, reject) => {
const fileName = match.id + "_" + rawFileDetails.name + ".jpg"; https.get(match.image.small_url, (response: any) => {
const file = createWriteStream(`./userdata/temporary/${fileName}`); const fileName = match.id + "_" + rawFileDetails.name + ".jpg";
let levenshteinDistance; const file = createWriteStream(`./userdata/temporary/${fileName}`);
https
.get(match.image.small_url, (response) => {
const fileStream = response.pipe(file); const fileStream = response.pipe(file);
fileStream.on("finish", async () => { fileStream.on("finish", async () => {
const hash1 = await imghash.hash( const hash1 = await imghash.hash(
@@ -103,7 +98,7 @@ const calculateLevenshteinDistance = (rawFileDetails: any, match: any) => {
path.resolve(`./userdata/temporary/${fileName}`) path.resolve(`./userdata/temporary/${fileName}`)
); );
if (!isUndefined(hash1) && !isUndefined(hash2)) { if (!isUndefined(hash1) && !isUndefined(hash2)) {
levenshteinDistance = leven(hash1, hash2); const levenshteinDistance = leven(hash1, hash2);
if (levenshteinDistance === 0) { if (levenshteinDistance === 0) {
match.score += 4; match.score += 4;
} else if ( } else if (
@@ -114,15 +109,10 @@ const calculateLevenshteinDistance = (rawFileDetails: any, match: any) => {
} else { } else {
match.score -= 4; match.score -= 4;
} }
resolve(match);
} else { } else {
console.log("Couldn't calculate image hashes"); reject({ error: "bastard couldn't calculate hashes" });
} }
console.log("MATCH SCORE inside:", match.score);
}); });
}); });
console.log(levenshteinDistance); });
return match;
};