From 41918daafa48eb948c0ee0dadbac52a3b264974c Mon Sep 17 00:00:00 2001
From: Rishi Ghan <rishi.ghan@gmail.com>
Date: Mon, 20 Dec 2021 14:57:01 -0800
Subject: [PATCH] =?UTF-8?q?=E2=9A=99=EF=B8=8F=20Added=20a=20year=20extract?=
 =?UTF-8?q?or=20regex=20in=20filenameparser?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/client/actions/fileops.actions.tsx        |  4 +++-
 .../shared/utils/filenameparser.utils.ts      | 21 +++++++++++--------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/client/actions/fileops.actions.tsx b/src/client/actions/fileops.actions.tsx
index 02b0459..3adf502 100644
--- a/src/client/actions/fileops.actions.tsx
+++ b/src/client/actions/fileops.actions.tsx
@@ -160,7 +160,8 @@ export const fetchComicVineMatches =
       dispatch({
         type: CV_API_CALL_IN_PROGRESS,
       });
-
+      console.log(issueSearchQuery);
+      console.log(seriesSearchQuery);
       axios
         .request({
           url: `${COMICBOOKINFO_SERVICE_URI}/fetchresource`,
@@ -175,6 +176,7 @@ export const fetchComicVineMatches =
             fieldList: "id",
             limit: "100",
             offset: "0",
+            page: 1,
             resources: "issue",
             scorerConfiguration: {
               searchQuery: {
diff --git a/src/client/shared/utils/filenameparser.utils.ts b/src/client/shared/utils/filenameparser.utils.ts
index f61fb9f..f02731c 100644
--- a/src/client/shared/utils/filenameparser.utils.ts
+++ b/src/client/shared/utils/filenameparser.utils.ts
@@ -73,6 +73,8 @@ export const tokenize = (inputString: string) => {
   const doc = nlp(inputString);
   const sentence = doc.sentences().json();
 
+  const yearMatches = extractYears(inputString);
+
   // filter out anything at the end of the title in parantheses
   inputString = inputString.replace(/\((.*?)\)$/gi, "");
 
@@ -127,8 +129,6 @@ export const tokenize = (inputString: string) => {
   inputString = voca.replace(inputString, /_.-# /gi, "");
   inputString = nlp(inputString).text("normal").trim();
 
-  const yearMatches = inputString.match(/\d{4}/gi);
-
   const sentenceToProcess = sentence[0].normal.replace(/_/g, " ");
   const normalizedSentence = nlp(sentenceToProcess)
     .text("normal")
@@ -140,9 +140,7 @@ export const tokenize = (inputString: string) => {
       inputString,
       parsedIssueNumber,
     },
-    years: {
-      yearMatches,
-    },
+    years: yearMatches,
     sentence_tokens: {
       detailed: sentence,
       normalized: normalizedSentence,
@@ -162,17 +160,22 @@ export const extractNumerals = (inputString: string): MatchArray[string] => {
   return matches;
 };
 
+export const extractYears = (inputString: string): RegExpMatchArray | null => {
+  // Searches through the given string left-to-right, seeing if an intelligible
+  // publication year can be extracted.
+  const yearRegex = /(?:19|20)\d{2}/gm;
+  return inputString.match(yearRegex);
+};
+
 export const refineQuery = (inputString: string) => {
   const queryObj = tokenize(inputString);
-  const removedYears = xor(
-    queryObj.sentence_tokens.normalized,
-    queryObj.years.yearMatches,
-  );
+  const removedYears = xor(queryObj.sentence_tokens.normalized, queryObj.years);
   return {
     searchParams: {
       searchTerms: {
         name: queryObj.comicbook_identifier_tokens.inputString,
         number: queryObj.comicbook_identifier_tokens.parsedIssueNumber,
+        year: queryObj.years?.toString(),
       },
     },
     meta: {