🔧 Wiring up the updated CV scraper call
This commit is contained in:
@@ -164,37 +164,32 @@ export const fetchComicVineMatches =
|
|||||||
console.log(seriesSearchQuery);
|
console.log(seriesSearchQuery);
|
||||||
axios
|
axios
|
||||||
.request({
|
.request({
|
||||||
url: `${COMICBOOKINFO_SERVICE_URI}/fetchresource`,
|
url: `${COMICBOOKINFO_SERVICE_URI}/volumeBasedSearch`,
|
||||||
method: "POST",
|
method: "POST",
|
||||||
data: {
|
data: {
|
||||||
format: "json",
|
format: "json",
|
||||||
sort: "name%3Aasc",
|
|
||||||
// hack
|
// hack
|
||||||
query: issueSearchQuery.searchParams.searchTerms.name
|
query: issueSearchQuery.searchParams.searchTerms.name
|
||||||
.replace(/[^a-zA-Z0-9 ]/g, "")
|
.replace(/[^a-zA-Z0-9 ]/g, "")
|
||||||
.trim(),
|
.trim(),
|
||||||
fieldList: "id",
|
|
||||||
limit: "100",
|
limit: "100",
|
||||||
offset: "0",
|
|
||||||
page: 1,
|
page: 1,
|
||||||
resources: "issue",
|
resources: "volume",
|
||||||
scorerConfiguration: {
|
scorerConfiguration: {
|
||||||
searchQuery: {
|
searchParams: issueSearchQuery.searchParams,
|
||||||
issue: issueSearchQuery,
|
|
||||||
series: seriesSearchQuery,
|
|
||||||
},
|
|
||||||
rawFileDetails: searchPayload.rawFileDetails,
|
|
||||||
},
|
},
|
||||||
|
rawFileDetails: searchPayload.rawFileDetails,
|
||||||
},
|
},
|
||||||
transformResponse: (r) => {
|
transformResponse: (r) => {
|
||||||
const matches = JSON.parse(r);
|
const matches = JSON.parse(r);
|
||||||
return sortBy(matches, (match) => -match.score);
|
return matches;
|
||||||
|
// return sortBy(matches, (match) => -match.score);
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
.then((response) => {
|
.then((response) => {
|
||||||
dispatch({
|
dispatch({
|
||||||
type: CV_SEARCH_SUCCESS,
|
type: CV_SEARCH_SUCCESS,
|
||||||
searchResults: response.data,
|
searchResults: response.data.results,
|
||||||
searchQueryObject: {
|
searchQueryObject: {
|
||||||
issue: issueSearchQuery,
|
issue: issueSearchQuery,
|
||||||
series: seriesSearchQuery,
|
series: seriesSearchQuery,
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ export const ComicVineMatchPanel = (comicVineData): ReactElement => {
|
|||||||
comicVineAPICallProgress,
|
comicVineAPICallProgress,
|
||||||
comicVineSearchResults,
|
comicVineSearchResults,
|
||||||
} = comicVineData.props;
|
} = comicVineData.props;
|
||||||
|
console.log(comicVineData);
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
{!isEmpty(comicVineSearchQueryObject) && (
|
{!isEmpty(comicVineSearchQueryObject) && (
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import { default as numbers } from "compromise-numbers";
|
|||||||
import xregexp from "xregexp";
|
import xregexp from "xregexp";
|
||||||
import { MatchArray } from "xregexp/types";
|
import { MatchArray } from "xregexp/types";
|
||||||
import voca from "voca";
|
import voca from "voca";
|
||||||
import { xor, isEmpty, isNull } from "lodash";
|
import { xor, isEmpty, isNull, isNil } from "lodash";
|
||||||
|
|
||||||
nlp.extend(sentences);
|
nlp.extend(sentences);
|
||||||
nlp.extend(numbers);
|
nlp.extend(numbers);
|
||||||
@@ -75,17 +75,68 @@ export const tokenize = (inputString: string) => {
|
|||||||
|
|
||||||
const yearMatches = extractYears(inputString);
|
const yearMatches = extractYears(inputString);
|
||||||
|
|
||||||
// filter out anything at the end of the title in parantheses
|
const hyphenatedIssueRange = inputString.match(/(\d)(-\d+)/gi);
|
||||||
inputString = inputString.replace(/\((.*?)\)$/gi, "");
|
if (!isNull(hyphenatedIssueRange) && hyphenatedIssueRange.length > 2) {
|
||||||
|
const issueNumber = hyphenatedIssueRange[0];
|
||||||
|
}
|
||||||
|
|
||||||
// regexes to match constituent parts of the search string
|
const readingListIndicators = inputString.match(
|
||||||
// and isolate the search terms
|
/^\s*\d+(\.\s+?|\s*-?\s*)/gim,
|
||||||
|
);
|
||||||
|
|
||||||
inputString.replace(/ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi, "");
|
// Issue numbers
|
||||||
|
let issueNumbers = "";
|
||||||
|
let parsedIssueNumber = "";
|
||||||
|
|
||||||
|
// https://regex101.com/r/fgmd22/1
|
||||||
|
const issues = inputString.match(/(^|[_\s#])(-?\d*\.?\d\w*)/gi);
|
||||||
|
const tpbIssueNumber = inputString.match(/((\s|\|-|:)v?\d?\s)/gim);
|
||||||
inputString.replace(
|
inputString.replace(
|
||||||
/(\b(vo?l?u?m?e?)\.?)(\s*-|\s*_)?(\s*[0-9]+[.0-9a-z]*)/gi,
|
/(\b(vo?l?u?m?e?)\.?)(\s*-|\s*_)?(\s*[0-9]+[.0-9a-z]*)/gi,
|
||||||
"",
|
"",
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// find the matches for a tpb "issue" number such as v2
|
||||||
|
if (!isNil(tpbIssueNumber)) {
|
||||||
|
parsedIssueNumber = tpbIssueNumber[0].trim();
|
||||||
|
}
|
||||||
|
if (!isEmpty(issues) && !isNull(issues)) {
|
||||||
|
issueNumbers = issues[0].trim();
|
||||||
|
const matches = extractNumerals(issueNumbers);
|
||||||
|
// if we parsed out some potential issue numbers, designate the LAST
|
||||||
|
// (rightmost) one as the actual issue number, and remove it from the name
|
||||||
|
|
||||||
|
if (matches.length > 0) {
|
||||||
|
parsedIssueNumber = matches[0].pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inputString = voca.replace(inputString, parsedIssueNumber, "");
|
||||||
|
|
||||||
|
// filter out anything at the end of the title in parantheses
|
||||||
|
inputString = inputString.replace(/\((.*?)\)$/gi, "");
|
||||||
|
|
||||||
|
// get a subtitle for titles such as:
|
||||||
|
// Commando 4779 - Evil in the East (2015) (Digital) (DR & Quinch-Empire)
|
||||||
|
// will match "Evil in the East (2015) (Digital) (DR & Quinch-Empire)"
|
||||||
|
const subtitleMatch = inputString.match(/\s\-\s(.*)/gm);
|
||||||
|
let subtitle = "";
|
||||||
|
if (!isNil(subtitleMatch)) {
|
||||||
|
subtitle = subtitleMatch[0].replace(/[^a-zA-Z0-9 ]/gm, "");
|
||||||
|
subtitle = subtitle.trim();
|
||||||
|
|
||||||
|
// Remove the subtitle from the main input string
|
||||||
|
// Commando 4779 - Evil in the East (2015) (Digital) (DR & Quinch-Empire)
|
||||||
|
// will return "Commando 4779"
|
||||||
|
inputString = inputString.replace(/\s\-\s(.*)/gm, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
// replace special characters with... nothing
|
||||||
|
inputString = inputString.replace(/[^a-zA-Z0-9 ]/gm, "");
|
||||||
|
|
||||||
|
// regexes to match constituent parts of the search string
|
||||||
|
// and isolate the search terms
|
||||||
|
inputString.replace(/ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi, "");
|
||||||
inputString.replace(/\b[.,]?\s*\d+\s*(p|pg|pgs|pages)\b\s*/gi, "");
|
inputString.replace(/\b[.,]?\s*\d+\s*(p|pg|pgs|pages)\b\s*/gi, "");
|
||||||
|
|
||||||
// if the name has things like "4 of 5", remove the " of 5" part
|
// if the name has things like "4 of 5", remove the " of 5" part
|
||||||
@@ -101,31 +152,6 @@ export const tokenize = (inputString: string) => {
|
|||||||
|
|
||||||
inputString.replace(/([^\d]+)(\s*(of|de|di|von|van|z)\s*#*\d+)/gi, "");
|
inputString.replace(/([^\d]+)(\s*(of|de|di|von|van|z)\s*#*\d+)/gi, "");
|
||||||
|
|
||||||
const hyphenatedIssueRange = inputString.match(/(\d)(-\d+)/gi);
|
|
||||||
if (!isNull(hyphenatedIssueRange) && hyphenatedIssueRange.length > 2) {
|
|
||||||
const issueNumber = hyphenatedIssueRange[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
const readingListIndicators = inputString.match(
|
|
||||||
/^\s*\d+(\.\s+?|\s*-?\s*)/gim,
|
|
||||||
);
|
|
||||||
|
|
||||||
let issueNumbers = "";
|
|
||||||
let parsedIssueNumber = "";
|
|
||||||
const issues = inputString.match(/(^|[_\s#])(-?\d*\.?\d\w*)/gi);
|
|
||||||
|
|
||||||
if (!isEmpty(issues) && !isNull(issues)) {
|
|
||||||
issueNumbers = issues[0].trim();
|
|
||||||
const matches = extractNumerals(issueNumbers);
|
|
||||||
// if we parsed out some potential issue numbers, designate the LAST
|
|
||||||
// (rightmost) one as the actual issue number, and remove it from the name
|
|
||||||
|
|
||||||
if (matches.length > 0) {
|
|
||||||
parsedIssueNumber = matches[0].pop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inputString = voca.replace(inputString, parsedIssueNumber, "");
|
|
||||||
inputString = voca.replace(inputString, /_.-# /gi, "");
|
inputString = voca.replace(inputString, /_.-# /gi, "");
|
||||||
inputString = nlp(inputString).text("normal").trim();
|
inputString = nlp(inputString).text("normal").trim();
|
||||||
|
|
||||||
@@ -138,7 +164,8 @@ export const tokenize = (inputString: string) => {
|
|||||||
const queryObject = {
|
const queryObject = {
|
||||||
comicbook_identifier_tokens: {
|
comicbook_identifier_tokens: {
|
||||||
inputString,
|
inputString,
|
||||||
parsedIssueNumber,
|
parsedIssueNumber: Number(parsedIssueNumber),
|
||||||
|
subtitle,
|
||||||
},
|
},
|
||||||
years: yearMatches,
|
years: yearMatches,
|
||||||
sentence_tokens: {
|
sentence_tokens: {
|
||||||
@@ -154,7 +181,7 @@ export const extractNumerals = (inputString: string): MatchArray[string] => {
|
|||||||
// "issue number-like" re.match objects. For example, this method finds
|
// "issue number-like" re.match objects. For example, this method finds
|
||||||
// matches substrings like: 3, #4, 5a, 6.00, 10.0b, .5, -1.0
|
// matches substrings like: 3, #4, 5a, 6.00, 10.0b, .5, -1.0
|
||||||
const matches: MatchArray[string] = [];
|
const matches: MatchArray[string] = [];
|
||||||
xregexp.forEach(inputString, /(^|[_\s#])(-?\d*\.?\d\w*)/gmu, (match) => {
|
xregexp.forEach(inputString, /(^|[_\s#v?])(-?\d*\.?\d\w*)/gmu, (match) => {
|
||||||
matches.push(match);
|
matches.push(match);
|
||||||
});
|
});
|
||||||
return matches;
|
return matches;
|
||||||
@@ -176,6 +203,7 @@ export const refineQuery = (inputString: string) => {
|
|||||||
name: queryObj.comicbook_identifier_tokens.inputString,
|
name: queryObj.comicbook_identifier_tokens.inputString,
|
||||||
number: queryObj.comicbook_identifier_tokens.parsedIssueNumber,
|
number: queryObj.comicbook_identifier_tokens.parsedIssueNumber,
|
||||||
year: queryObj.years?.toString(),
|
year: queryObj.years?.toString(),
|
||||||
|
subtitle: queryObj.comicbook_identifier_tokens.subtitle,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
meta: {
|
meta: {
|
||||||
|
|||||||
Reference in New Issue
Block a user