🔧 Wiring up the updated CV scraper call

2021-12-31 15:34:53 -08:00
parent 41918daafa
commit a964ffbf07
3 changed files with 69 additions and 45 deletions
--- a/src/client/actions/fileops.actions.tsx
+++ b/src/client/actions/fileops.actions.tsx
@@ -164,37 +164,32 @@ export const fetchComicVineMatches =
      console.log(seriesSearchQuery);
      axios
        .request({
-          url: `${COMICBOOKINFO_SERVICE_URI}/fetchresource`,
+          url: `${COMICBOOKINFO_SERVICE_URI}/volumeBasedSearch`,
          method: "POST",
          data: {
            format: "json",
            sort: "name%3Aasc",
            // hack
            query: issueSearchQuery.searchParams.searchTerms.name
              .replace(/[^a-zA-Z0-9 ]/g, "")
              .trim(),
            fieldList: "id",
            limit: "100",
            offset: "0",
            page: 1,
-            resources: "issue",
+            resources: "volume",
            scorerConfiguration: {
-              searchQuery: {
+              searchParams: issueSearchQuery.searchParams,
                issue: issueSearchQuery,
                series: seriesSearchQuery,
              },
              rawFileDetails: searchPayload.rawFileDetails,
            },
            rawFileDetails: searchPayload.rawFileDetails,
          },
          transformResponse: (r) => {
            const matches = JSON.parse(r);
-            return sortBy(matches, (match) => -match.score);
+            return matches;
            // return sortBy(matches, (match) => -match.score);
          },
        })
        .then((response) => {
          dispatch({
            type: CV_SEARCH_SUCCESS,
-            searchResults: response.data,
+            searchResults: response.data.results,
            searchQueryObject: {
              issue: issueSearchQuery,
              series: seriesSearchQuery,
--- a/src/client/components/ComicDetail/ComicVineMatchPanel.tsx
+++ b/src/client/components/ComicDetail/ComicVineMatchPanel.tsx
@@ -10,6 +10,7 @@ export const ComicVineMatchPanel = (comicVineData): ReactElement => {
    comicVineAPICallProgress,
    comicVineSearchResults,
  } = comicVineData.props;
  console.log(comicVineData);
  return (
    <>
      {!isEmpty(comicVineSearchQueryObject) && (
--- a/src/client/shared/utils/filenameparser.utils.ts
+++ b/src/client/shared/utils/filenameparser.utils.ts
@@ -5,7 +5,7 @@ import { default as numbers } from "compromise-numbers";
 import xregexp from "xregexp";
 import { MatchArray } from "xregexp/types";
 import voca from "voca";
-import { xor, isEmpty, isNull } from "lodash";
+import { xor, isEmpty, isNull, isNil } from "lodash";
 nlp.extend(sentences);
 nlp.extend(numbers);
@@ -75,17 +75,68 @@ export const tokenize = (inputString: string) => {
  const yearMatches = extractYears(inputString);
-  // filter out anything at the end of the title in parantheses
+  const hyphenatedIssueRange = inputString.match(/(\d)(-\d+)/gi);
-  inputString = inputString.replace(/\((.*?)\)$/gi, "");
+  if (!isNull(hyphenatedIssueRange) && hyphenatedIssueRange.length > 2) {
    const issueNumber = hyphenatedIssueRange[0];
  }
-  // regexes to match constituent parts of the search string
+  const readingListIndicators = inputString.match(
-  // and isolate the search terms
+    /^\s*\d+(\.\s+?|\s*-?\s*)/gim,
  );
-  inputString.replace(/ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi, "");
+  // Issue numbers
  let issueNumbers = "";
  let parsedIssueNumber = "";
  // https://regex101.com/r/fgmd22/1
  const issues = inputString.match(/(^|[_\s#])(-?\d*\.?\d\w*)/gi);
  const tpbIssueNumber = inputString.match(/((\s|\|-|:)v?\d?\s)/gim);
  inputString.replace(
    /(\b(vo?l?u?m?e?)\.?)(\s*-|\s*_)?(\s*[0-9]+[.0-9a-z]*)/gi,
    "",
  );
  // find the matches for a tpb "issue" number such as v2
  if (!isNil(tpbIssueNumber)) {
    parsedIssueNumber = tpbIssueNumber[0].trim();
  }
  if (!isEmpty(issues) && !isNull(issues)) {
    issueNumbers = issues[0].trim();
    const matches = extractNumerals(issueNumbers);
    // if we parsed out some potential issue numbers, designate the LAST
    // (rightmost) one as the actual issue number, and remove it from the name
    if (matches.length > 0) {
      parsedIssueNumber = matches[0].pop();
    }
  }
  inputString = voca.replace(inputString, parsedIssueNumber, "");
  // filter out anything at the end of the title in parantheses
  inputString = inputString.replace(/\((.*?)\)$/gi, "");
  // get a subtitle for titles such as:
  // Commando 4779 - Evil in the East (2015) (Digital) (DR & Quinch-Empire)
  // will match "Evil in the East (2015) (Digital) (DR & Quinch-Empire)"
  const subtitleMatch = inputString.match(/\s\-\s(.*)/gm);
  let subtitle = "";
  if (!isNil(subtitleMatch)) {
    subtitle = subtitleMatch[0].replace(/[^a-zA-Z0-9 ]/gm, "");
    subtitle = subtitle.trim();
    // Remove the subtitle from the main input string
    // Commando 4779 - Evil in the East (2015) (Digital) (DR & Quinch-Empire)
    // will return "Commando 4779"
    inputString = inputString.replace(/\s\-\s(.*)/gm, "");
  }
  // replace special characters with... nothing
  inputString = inputString.replace(/[^a-zA-Z0-9 ]/gm, "");
  // regexes to match constituent parts of the search string
  // and isolate the search terms
  inputString.replace(/ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi, "");
  inputString.replace(/\b[.,]?\s*\d+\s*(p|pg|pgs|pages)\b\s*/gi, "");
  // if the name has things like "4 of 5", remove the " of 5" part
@@ -101,31 +152,6 @@ export const tokenize = (inputString: string) => {
  inputString.replace(/([^\d]+)(\s*(of|de|di|von|van|z)\s*#*\d+)/gi, "");
  const hyphenatedIssueRange = inputString.match(/(\d)(-\d+)/gi);
  if (!isNull(hyphenatedIssueRange) && hyphenatedIssueRange.length > 2) {
    const issueNumber = hyphenatedIssueRange[0];
  }
  const readingListIndicators = inputString.match(
    /^\s*\d+(\.\s+?|\s*-?\s*)/gim,
  );
  let issueNumbers = "";
  let parsedIssueNumber = "";
  const issues = inputString.match(/(^|[_\s#])(-?\d*\.?\d\w*)/gi);
  if (!isEmpty(issues) && !isNull(issues)) {
    issueNumbers = issues[0].trim();
    const matches = extractNumerals(issueNumbers);
    // if we parsed out some potential issue numbers, designate the LAST
    // (rightmost) one as the actual issue number, and remove it from the name
    if (matches.length > 0) {
      parsedIssueNumber = matches[0].pop();
    }
  }
  inputString = voca.replace(inputString, parsedIssueNumber, "");
  inputString = voca.replace(inputString, /_.-# /gi, "");
  inputString = nlp(inputString).text("normal").trim();
@@ -138,7 +164,8 @@ export const tokenize = (inputString: string) => {
  const queryObject = {
    comicbook_identifier_tokens: {
      inputString,
-      parsedIssueNumber,
+      parsedIssueNumber: Number(parsedIssueNumber),
      subtitle,
    },
    years: yearMatches,
    sentence_tokens: {
@@ -154,7 +181,7 @@ export const extractNumerals = (inputString: string): MatchArray[string] => {
  // "issue number-like" re.match objects.  For example, this method finds
  // matches substrings like:  3, #4, 5a, 6.00, 10.0b, .5, -1.0
  const matches: MatchArray[string] = [];
-  xregexp.forEach(inputString, /(^|[_\s#])(-?\d*\.?\d\w*)/gmu, (match) => {
+  xregexp.forEach(inputString, /(^|[_\s#v?])(-?\d*\.?\d\w*)/gmu, (match) => {
    matches.push(match);
  });
  return matches;
@@ -176,6 +203,7 @@ export const refineQuery = (inputString: string) => {
        name: queryObj.comicbook_identifier_tokens.inputString,
        number: queryObj.comicbook_identifier_tokens.parsedIssueNumber,
        year: queryObj.years?.toString(),
        subtitle: queryObj.comicbook_identifier_tokens.subtitle,
      },
    },
    meta: {