🤼‍♀️ Comic Vine Match algorithm, 2nd draft

This commit is contained in:
2021-07-22 16:07:52 -07:00
parent 96a6438fb4
commit 57f7621c0e
3 changed files with 37 additions and 26 deletions

View File

@@ -1,5 +1,9 @@
import axios from "axios";
import { IFolderData, IExtractedComicBookCoverFile } from "threetwo-ui-typings";
import {
IFolderData,
IExtractedComicBookCoverFile,
IComicVineSearchQuery,
} from "threetwo-ui-typings";
import { API_BASE_URI, SOCKET_BASE_URI } from "../constants/endpoints";
import { io } from "socket.io-client";
import {
@@ -105,8 +109,8 @@ export const getRecentlyImportedComicBooks = (options) => async (dispatch) => {
export const fetchComicVineMatches = (searchPayload) => (dispatch) => {
try {
const issueString = searchPayload.rawFileDetails.path.split("/").pop();
let seriesSearchQuery = {};
const issueSearchQuery = refineQuery(issueString);
const issueSearchQuery: IComicVineSearchQuery = refineQuery(issueString);
let seriesSearchQuery: IComicVineSearchQuery = {} as IComicVineSearchQuery;
if (searchPayload.rawFileDetails.containedIn !== "comics") {
seriesSearchQuery = refineQuery(
searchPayload.rawFileDetails.containedIn.split("/").pop(),

View File

@@ -3,8 +3,9 @@ import { default as dates } from "compromise-dates";
import { default as sentences } from "compromise-sentences";
import { default as numbers } from "compromise-numbers";
import xregexp from "xregexp";
import { MatchArray } from "xregexp/types";
import voca from "voca";
import { map, xor, isEmpty, isNull } from "lodash";
import { xor, isEmpty, isNull } from "lodash";
nlp.extend(sentences);
nlp.extend(numbers);
@@ -72,18 +73,12 @@ export const tokenize = (inputString: string) => {
// regexes to match constituent parts of the search string
// and isolate the search terms
const chapters = inputString.replace(
/ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi,
"",
);
const volumes = inputString.replace(
inputString.replace(/ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi, "");
inputString.replace(
/(\b(vo?l?u?m?e?)\.?)(\s*-|\s*_)?(\s*[0-9]+[.0-9a-z]*)/gi,
"",
);
const pageCounts = inputString.replace(
/\b[.,]?\s*\d+\s*(p|pg|pgs|pages)\b\s*/gi,
"",
);
inputString.replace(/\b[.,]?\s*\d+\s*(p|pg|pgs|pages)\b\s*/gi, "");
// if the name has things like "4 of 5", remove the " of 5" part
// also, if the name has 3-6, remove the -6 part. note that we'll
@@ -103,19 +98,29 @@ export const tokenize = (inputString: string) => {
let issueNumber = hyphenatedIssueRange[0];
}
if (voca.includes(inputString, "_") && !voca.includes(inputString, " ")) {
inputString.replace(/[-_#]/gi, "");
}
const readingListIndicators = inputString.match(
/^\s*\d+(\.\s+?|\s*-?\s*)/gim,
);
let issueNumbers = "";
let parsedIssueNumber = "";
const issues = inputString.match(/(^|[_\s#])(-?\d*\.?\d\w*)/gi);
if (!isEmpty(issues)) {
if (!isEmpty(issues) && !isNull(issues)) {
issueNumbers = issues[0].trim();
const matches = extractNumerals(issueNumbers);
// if we parsed out some potential issue numbers, designate the LAST
// (rightmost) one as the actual issue number, and remove it from the name
if (matches.length > 0) {
parsedIssueNumber = matches[0].pop();
}
}
// const issueHashes = inputString.match(/\#\d/gi);
inputString = voca.replace(inputString, parsedIssueNumber, "");
inputString = voca.replace(inputString, /_.-# /gi, "");
inputString = nlp(inputString).text("normal").trim();
const yearMatches = inputString.match(/\d{4}/gi);
const sentenceToProcess = sentence[0].normal.replace(/_/g, " ");
@@ -126,12 +131,8 @@ export const tokenize = (inputString: string) => {
const queryObject = {
comicbook_identifier_tokens: {
issueNumbers,
chapters,
pageCounts,
readingListIndicators,
volumes,
inputString,
parsedIssueNumber,
},
years: {
yearMatches,
@@ -144,14 +145,20 @@ export const tokenize = (inputString: string) => {
return queryObject;
};
export const extractNumerals = (inputString: string): string => {
export const extractNumerals = (inputString: string): MatchArray[string] => {
// Searches through the given string left-to-right, building an ordered list of
// "issue number-like" re.match objects. For example, this method finds
// matches substrings like: 3, #4, 5a, 6.00, 10.0b, .5, -1.0
const matches: MatchArray[string] = [];
xregexp.forEach(inputString, /(^|[_\s#])(-?\d*\.?\d\w*)/gmu, (match) => {
matches.push(match);
});
return matches;
};
export const refineQuery = (inputString) => {
const queryObj = tokenize(inputString);
console.log("QWEQWEQWE", queryObj);
const removedYears = xor(
queryObj.sentence_tokens.normalized,
queryObj.years.yearMatches,
@@ -162,7 +169,6 @@ export const refineQuery = (inputString) => {
name: queryObj.sentence_tokens.detailed[0].text,
number: queryObj.comicbook_identifier_tokens.issueNumbers,
},
year: queryObj.years,
},
meta: {
queryObj,

View File

@@ -111,6 +111,7 @@ interface SearchInstance {
searches_sent_ago: number;
}
app.use(opdsRouter());
const foo = SocketService.connect("admin", "password");
foo.then(async (data) => {
const instance: SearchInstance = await SocketService.post("search");