🤼♀️ Comic Vine Match algorithm, 2nd draft
This commit is contained in:
@@ -1,5 +1,9 @@
|
|||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import { IFolderData, IExtractedComicBookCoverFile } from "threetwo-ui-typings";
|
import {
|
||||||
|
IFolderData,
|
||||||
|
IExtractedComicBookCoverFile,
|
||||||
|
IComicVineSearchQuery,
|
||||||
|
} from "threetwo-ui-typings";
|
||||||
import { API_BASE_URI, SOCKET_BASE_URI } from "../constants/endpoints";
|
import { API_BASE_URI, SOCKET_BASE_URI } from "../constants/endpoints";
|
||||||
import { io } from "socket.io-client";
|
import { io } from "socket.io-client";
|
||||||
import {
|
import {
|
||||||
@@ -105,8 +109,8 @@ export const getRecentlyImportedComicBooks = (options) => async (dispatch) => {
|
|||||||
export const fetchComicVineMatches = (searchPayload) => (dispatch) => {
|
export const fetchComicVineMatches = (searchPayload) => (dispatch) => {
|
||||||
try {
|
try {
|
||||||
const issueString = searchPayload.rawFileDetails.path.split("/").pop();
|
const issueString = searchPayload.rawFileDetails.path.split("/").pop();
|
||||||
let seriesSearchQuery = {};
|
const issueSearchQuery: IComicVineSearchQuery = refineQuery(issueString);
|
||||||
const issueSearchQuery = refineQuery(issueString);
|
let seriesSearchQuery: IComicVineSearchQuery = {} as IComicVineSearchQuery;
|
||||||
if (searchPayload.rawFileDetails.containedIn !== "comics") {
|
if (searchPayload.rawFileDetails.containedIn !== "comics") {
|
||||||
seriesSearchQuery = refineQuery(
|
seriesSearchQuery = refineQuery(
|
||||||
searchPayload.rawFileDetails.containedIn.split("/").pop(),
|
searchPayload.rawFileDetails.containedIn.split("/").pop(),
|
||||||
|
|||||||
@@ -3,8 +3,9 @@ import { default as dates } from "compromise-dates";
|
|||||||
import { default as sentences } from "compromise-sentences";
|
import { default as sentences } from "compromise-sentences";
|
||||||
import { default as numbers } from "compromise-numbers";
|
import { default as numbers } from "compromise-numbers";
|
||||||
import xregexp from "xregexp";
|
import xregexp from "xregexp";
|
||||||
|
import { MatchArray } from "xregexp/types";
|
||||||
import voca from "voca";
|
import voca from "voca";
|
||||||
import { map, xor, isEmpty, isNull } from "lodash";
|
import { xor, isEmpty, isNull } from "lodash";
|
||||||
|
|
||||||
nlp.extend(sentences);
|
nlp.extend(sentences);
|
||||||
nlp.extend(numbers);
|
nlp.extend(numbers);
|
||||||
@@ -72,18 +73,12 @@ export const tokenize = (inputString: string) => {
|
|||||||
// regexes to match constituent parts of the search string
|
// regexes to match constituent parts of the search string
|
||||||
// and isolate the search terms
|
// and isolate the search terms
|
||||||
|
|
||||||
const chapters = inputString.replace(
|
inputString.replace(/ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi, "");
|
||||||
/ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi,
|
inputString.replace(
|
||||||
"",
|
|
||||||
);
|
|
||||||
const volumes = inputString.replace(
|
|
||||||
/(\b(vo?l?u?m?e?)\.?)(\s*-|\s*_)?(\s*[0-9]+[.0-9a-z]*)/gi,
|
/(\b(vo?l?u?m?e?)\.?)(\s*-|\s*_)?(\s*[0-9]+[.0-9a-z]*)/gi,
|
||||||
"",
|
"",
|
||||||
);
|
);
|
||||||
const pageCounts = inputString.replace(
|
inputString.replace(/\b[.,]?\s*\d+\s*(p|pg|pgs|pages)\b\s*/gi, "");
|
||||||
/\b[.,]?\s*\d+\s*(p|pg|pgs|pages)\b\s*/gi,
|
|
||||||
"",
|
|
||||||
);
|
|
||||||
|
|
||||||
// if the name has things like "4 of 5", remove the " of 5" part
|
// if the name has things like "4 of 5", remove the " of 5" part
|
||||||
// also, if the name has 3-6, remove the -6 part. note that we'll
|
// also, if the name has 3-6, remove the -6 part. note that we'll
|
||||||
@@ -103,19 +98,29 @@ export const tokenize = (inputString: string) => {
|
|||||||
let issueNumber = hyphenatedIssueRange[0];
|
let issueNumber = hyphenatedIssueRange[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (voca.includes(inputString, "_") && !voca.includes(inputString, " ")) {
|
|
||||||
inputString.replace(/[-_#]/gi, "");
|
|
||||||
}
|
|
||||||
const readingListIndicators = inputString.match(
|
const readingListIndicators = inputString.match(
|
||||||
/^\s*\d+(\.\s+?|\s*-?\s*)/gim,
|
/^\s*\d+(\.\s+?|\s*-?\s*)/gim,
|
||||||
);
|
);
|
||||||
|
|
||||||
let issueNumbers = "";
|
let issueNumbers = "";
|
||||||
|
let parsedIssueNumber = "";
|
||||||
const issues = inputString.match(/(^|[_\s#])(-?\d*\.?\d\w*)/gi);
|
const issues = inputString.match(/(^|[_\s#])(-?\d*\.?\d\w*)/gi);
|
||||||
if (!isEmpty(issues)) {
|
|
||||||
|
if (!isEmpty(issues) && !isNull(issues)) {
|
||||||
issueNumbers = issues[0].trim();
|
issueNumbers = issues[0].trim();
|
||||||
|
const matches = extractNumerals(issueNumbers);
|
||||||
|
// if we parsed out some potential issue numbers, designate the LAST
|
||||||
|
// (rightmost) one as the actual issue number, and remove it from the name
|
||||||
|
|
||||||
|
if (matches.length > 0) {
|
||||||
|
parsedIssueNumber = matches[0].pop();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// const issueHashes = inputString.match(/\#\d/gi);
|
|
||||||
|
inputString = voca.replace(inputString, parsedIssueNumber, "");
|
||||||
|
inputString = voca.replace(inputString, /_.-# /gi, "");
|
||||||
|
inputString = nlp(inputString).text("normal").trim();
|
||||||
|
|
||||||
const yearMatches = inputString.match(/\d{4}/gi);
|
const yearMatches = inputString.match(/\d{4}/gi);
|
||||||
|
|
||||||
const sentenceToProcess = sentence[0].normal.replace(/_/g, " ");
|
const sentenceToProcess = sentence[0].normal.replace(/_/g, " ");
|
||||||
@@ -126,12 +131,8 @@ export const tokenize = (inputString: string) => {
|
|||||||
|
|
||||||
const queryObject = {
|
const queryObject = {
|
||||||
comicbook_identifier_tokens: {
|
comicbook_identifier_tokens: {
|
||||||
issueNumbers,
|
inputString,
|
||||||
chapters,
|
parsedIssueNumber,
|
||||||
pageCounts,
|
|
||||||
|
|
||||||
readingListIndicators,
|
|
||||||
volumes,
|
|
||||||
},
|
},
|
||||||
years: {
|
years: {
|
||||||
yearMatches,
|
yearMatches,
|
||||||
@@ -144,14 +145,20 @@ export const tokenize = (inputString: string) => {
|
|||||||
return queryObject;
|
return queryObject;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const extractNumerals = (inputString: string): string => {
|
export const extractNumerals = (inputString: string): MatchArray[string] => {
|
||||||
// Searches through the given string left-to-right, building an ordered list of
|
// Searches through the given string left-to-right, building an ordered list of
|
||||||
// "issue number-like" re.match objects. For example, this method finds
|
// "issue number-like" re.match objects. For example, this method finds
|
||||||
// matches substrings like: 3, #4, 5a, 6.00, 10.0b, .5, -1.0
|
// matches substrings like: 3, #4, 5a, 6.00, 10.0b, .5, -1.0
|
||||||
|
const matches: MatchArray[string] = [];
|
||||||
|
xregexp.forEach(inputString, /(^|[_\s#])(-?\d*\.?\d\w*)/gmu, (match) => {
|
||||||
|
matches.push(match);
|
||||||
|
});
|
||||||
|
return matches;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const refineQuery = (inputString) => {
|
export const refineQuery = (inputString) => {
|
||||||
const queryObj = tokenize(inputString);
|
const queryObj = tokenize(inputString);
|
||||||
|
console.log("QWEQWEQWE", queryObj);
|
||||||
const removedYears = xor(
|
const removedYears = xor(
|
||||||
queryObj.sentence_tokens.normalized,
|
queryObj.sentence_tokens.normalized,
|
||||||
queryObj.years.yearMatches,
|
queryObj.years.yearMatches,
|
||||||
@@ -162,7 +169,6 @@ export const refineQuery = (inputString) => {
|
|||||||
name: queryObj.sentence_tokens.detailed[0].text,
|
name: queryObj.sentence_tokens.detailed[0].text,
|
||||||
number: queryObj.comicbook_identifier_tokens.issueNumbers,
|
number: queryObj.comicbook_identifier_tokens.issueNumbers,
|
||||||
},
|
},
|
||||||
year: queryObj.years,
|
|
||||||
},
|
},
|
||||||
meta: {
|
meta: {
|
||||||
queryObj,
|
queryObj,
|
||||||
|
|||||||
@@ -111,6 +111,7 @@ interface SearchInstance {
|
|||||||
searches_sent_ago: number;
|
searches_sent_ago: number;
|
||||||
}
|
}
|
||||||
app.use(opdsRouter());
|
app.use(opdsRouter());
|
||||||
|
|
||||||
const foo = SocketService.connect("admin", "password");
|
const foo = SocketService.connect("admin", "password");
|
||||||
foo.then(async (data) => {
|
foo.then(async (data) => {
|
||||||
const instance: SearchInstance = await SocketService.post("search");
|
const instance: SearchInstance = await SocketService.post("search");
|
||||||
|
|||||||
Reference in New Issue
Block a user