From 55eb18a5a039e1cac099bd4e9c54aef99e4879e3 Mon Sep 17 00:00:00 2001 From: Rishi Ghan Date: Wed, 7 Jul 2021 09:25:21 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20Tweaking=20the=20matching=20algo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/client/actions/fileops.actions.tsx | 1 - src/client/shared/utils/nlp.utils.ts | 24 ++++++++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/client/actions/fileops.actions.tsx b/src/client/actions/fileops.actions.tsx index 6070200..3415a5d 100644 --- a/src/client/actions/fileops.actions.tsx +++ b/src/client/actions/fileops.actions.tsx @@ -143,7 +143,6 @@ export const fetchComicVineMatches = (searchPayload) => (dispatch) => { ], }) .then((response) => { - console.log(response); dispatch({ type: CV_SEARCH_SUCCESS, searchResults: response.data, diff --git a/src/client/shared/utils/nlp.utils.ts b/src/client/shared/utils/nlp.utils.ts index f74f3fc..fcb6d2d 100644 --- a/src/client/shared/utils/nlp.utils.ts +++ b/src/client/shared/utils/nlp.utils.ts @@ -8,6 +8,22 @@ nlp.extend(sentences); nlp.extend(numbers); nlp.extend(dates); +export const preprocess = (inputString) => { + // see if the comic matches the following format, and if so, remove everything + // after the first number: + // "nnn series name #xx (etc) (etc)" -> "series name #xx (etc) (etc)" + const format1 = "124 series name #xx (etc) (etc)".match( + /^\s*(\d+)[\s._-]+?([^#]+)(\W+.*)/, + ); + + // see if the comic matches the following format, and if so, remove everything + // after the first number that isn't in brackets: + // "series name #xxx - title (etc) (etc)" -> "series name #xxx (etc) (etc) + const format2 = "".match( + /^((?:[a-zA-Z,.-]+\s)+)(\#?(?:\d+[.0-9*])\s*(?:-))(.*((\(.*)?))$/gis, + ); +}; + /** * Tokenizes a search string * @function @@ -20,8 +36,12 @@ export const tokenize = (inputString) => { // regexes to match constituent parts of the search string // and isolate the search terms - const chapters = inputString.match(/ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi); - const volumes = inputString.match( + + const chapters = inputString.replace( + /ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi, + "", + ); + const volumes = inputString.replace( /(\b(vo?l?u?m?e?)\.?)(\s*-|\s*_)?(\s*[0-9]+[.0-9a-z]*)/gi, ); const pageCounts = inputString.match(