From 55eb18a5a039e1cac099bd4e9c54aef99e4879e3 Mon Sep 17 00:00:00 2001
From: Rishi Ghan <rishi.ghan@gmail.com>
Date: Wed, 7 Jul 2021 09:25:21 -0700
Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20Tweaking=20the=20matching=20algo?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/client/actions/fileops.actions.tsx |  1 -
 src/client/shared/utils/nlp.utils.ts   | 24 ++++++++++++++++++++++--
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/src/client/actions/fileops.actions.tsx b/src/client/actions/fileops.actions.tsx
index 6070200..3415a5d 100644
--- a/src/client/actions/fileops.actions.tsx
+++ b/src/client/actions/fileops.actions.tsx
@@ -143,7 +143,6 @@ export const fetchComicVineMatches = (searchPayload) => (dispatch) => {
         ],
       })
       .then((response) => {
-        console.log(response);
         dispatch({
           type: CV_SEARCH_SUCCESS,
           searchResults: response.data,
diff --git a/src/client/shared/utils/nlp.utils.ts b/src/client/shared/utils/nlp.utils.ts
index f74f3fc..fcb6d2d 100644
--- a/src/client/shared/utils/nlp.utils.ts
+++ b/src/client/shared/utils/nlp.utils.ts
@@ -8,6 +8,22 @@ nlp.extend(sentences);
 nlp.extend(numbers);
 nlp.extend(dates);
 
+export const preprocess = (inputString) => {
+  // see if the comic matches the following format, and if so, remove everything
+  // after the first number:
+  // "nnn series name #xx (etc) (etc)" -> "series name #xx (etc) (etc)"
+  const format1 = "124 series name #xx (etc) (etc)".match(
+    /^\s*(\d+)[\s._-]+?([^#]+)(\W+.*)/,
+  );
+
+  //   see if the comic matches the following format, and if so, remove everything
+  // after the first number that isn't in brackets:
+  // "series name #xxx - title (etc) (etc)" -> "series name #xxx (etc) (etc)
+  const format2 = "".match(
+    /^((?:[a-zA-Z,.-]+\s)+)(\#?(?:\d+[.0-9*])\s*(?:-))(.*((\(.*)?))$/gis,
+  );
+};
+
 /**
  * Tokenizes a search string
  * @function
@@ -20,8 +36,12 @@ export const tokenize = (inputString) => {
 
   // regexes to match constituent parts of the search string
   // and isolate the search terms
-  const chapters = inputString.match(/ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi);
-  const volumes = inputString.match(
+
+  const chapters = inputString.replace(
+    /ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi,
+    "",
+  );
+  const volumes = inputString.replace(
     /(\b(vo?l?u?m?e?)\.?)(\s*-|\s*_)?(\s*[0-9]+[.0-9a-z]*)/gi,
   );
   const pageCounts = inputString.match(