⚙️ Added a year extractor regex in filenameparser

2021-12-20 14:57:01 -08:00
parent 3017920fb7
commit 41918daafa
2 changed files with 15 additions and 10 deletions
--- a/src/client/actions/fileops.actions.tsx
+++ b/src/client/actions/fileops.actions.tsx
@@ -160,7 +160,8 @@ export const fetchComicVineMatches =
      dispatch({
        type: CV_API_CALL_IN_PROGRESS,
      });
-
+      console.log(issueSearchQuery);
      console.log(seriesSearchQuery);
      axios
        .request({
          url: `${COMICBOOKINFO_SERVICE_URI}/fetchresource`,
@@ -175,6 +176,7 @@ export const fetchComicVineMatches =
            fieldList: "id",
            limit: "100",
            offset: "0",
            page: 1,
            resources: "issue",
            scorerConfiguration: {
              searchQuery: {
--- a/src/client/shared/utils/filenameparser.utils.ts
+++ b/src/client/shared/utils/filenameparser.utils.ts
@@ -73,6 +73,8 @@ export const tokenize = (inputString: string) => {
  const doc = nlp(inputString);
  const sentence = doc.sentences().json();
  const yearMatches = extractYears(inputString);
  // filter out anything at the end of the title in parantheses
  inputString = inputString.replace(/\((.*?)\)$/gi, "");
@@ -127,8 +129,6 @@ export const tokenize = (inputString: string) => {
  inputString = voca.replace(inputString, /_.-# /gi, "");
  inputString = nlp(inputString).text("normal").trim();
  const yearMatches = inputString.match(/\d{4}/gi);
  const sentenceToProcess = sentence[0].normal.replace(/_/g, " ");
  const normalizedSentence = nlp(sentenceToProcess)
    .text("normal")
@@ -140,9 +140,7 @@ export const tokenize = (inputString: string) => {
      inputString,
      parsedIssueNumber,
    },
-    years: {
+    years: yearMatches,
      yearMatches,
    },
    sentence_tokens: {
      detailed: sentence,
      normalized: normalizedSentence,
@@ -162,17 +160,22 @@ export const extractNumerals = (inputString: string): MatchArray[string] => {
  return matches;
 };
 export const extractYears = (inputString: string): RegExpMatchArray | null => {
  // Searches through the given string left-to-right, seeing if an intelligible
  // publication year can be extracted.
  const yearRegex = /(?:19|20)\d{2}/gm;
  return inputString.match(yearRegex);
 };
 export const refineQuery = (inputString: string) => {
  const queryObj = tokenize(inputString);
-  const removedYears = xor(
+  const removedYears = xor(queryObj.sentence_tokens.normalized, queryObj.years);
    queryObj.sentence_tokens.normalized,
    queryObj.years.yearMatches,
  );
  return {
    searchParams: {
      searchTerms: {
        name: queryObj.comicbook_identifier_tokens.inputString,
        number: queryObj.comicbook_identifier_tokens.parsedIssueNumber,
        year: queryObj.years?.toString(),
      },
    },
    meta: {