From 41918daafa48eb948c0ee0dadbac52a3b264974c Mon Sep 17 00:00:00 2001 From: Rishi Ghan Date: Mon, 20 Dec 2021 14:57:01 -0800 Subject: [PATCH] =?UTF-8?q?=E2=9A=99=EF=B8=8F=20Added=20a=20year=20extract?= =?UTF-8?q?or=20regex=20in=20filenameparser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/client/actions/fileops.actions.tsx | 4 +++- .../shared/utils/filenameparser.utils.ts | 21 +++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/client/actions/fileops.actions.tsx b/src/client/actions/fileops.actions.tsx index 02b0459..3adf502 100644 --- a/src/client/actions/fileops.actions.tsx +++ b/src/client/actions/fileops.actions.tsx @@ -160,7 +160,8 @@ export const fetchComicVineMatches = dispatch({ type: CV_API_CALL_IN_PROGRESS, }); - + console.log(issueSearchQuery); + console.log(seriesSearchQuery); axios .request({ url: `${COMICBOOKINFO_SERVICE_URI}/fetchresource`, @@ -175,6 +176,7 @@ export const fetchComicVineMatches = fieldList: "id", limit: "100", offset: "0", + page: 1, resources: "issue", scorerConfiguration: { searchQuery: { diff --git a/src/client/shared/utils/filenameparser.utils.ts b/src/client/shared/utils/filenameparser.utils.ts index f61fb9f..f02731c 100644 --- a/src/client/shared/utils/filenameparser.utils.ts +++ b/src/client/shared/utils/filenameparser.utils.ts @@ -73,6 +73,8 @@ export const tokenize = (inputString: string) => { const doc = nlp(inputString); const sentence = doc.sentences().json(); + const yearMatches = extractYears(inputString); + // filter out anything at the end of the title in parantheses inputString = inputString.replace(/\((.*?)\)$/gi, ""); @@ -127,8 +129,6 @@ export const tokenize = (inputString: string) => { inputString = voca.replace(inputString, /_.-# /gi, ""); inputString = nlp(inputString).text("normal").trim(); - const yearMatches = inputString.match(/\d{4}/gi); - const sentenceToProcess = sentence[0].normal.replace(/_/g, " "); const normalizedSentence = nlp(sentenceToProcess) .text("normal") @@ -140,9 +140,7 @@ export const tokenize = (inputString: string) => { inputString, parsedIssueNumber, }, - years: { - yearMatches, - }, + years: yearMatches, sentence_tokens: { detailed: sentence, normalized: normalizedSentence, @@ -162,17 +160,22 @@ export const extractNumerals = (inputString: string): MatchArray[string] => { return matches; }; +export const extractYears = (inputString: string): RegExpMatchArray | null => { + // Searches through the given string left-to-right, seeing if an intelligible + // publication year can be extracted. + const yearRegex = /(?:19|20)\d{2}/gm; + return inputString.match(yearRegex); +}; + export const refineQuery = (inputString: string) => { const queryObj = tokenize(inputString); - const removedYears = xor( - queryObj.sentence_tokens.normalized, - queryObj.years.yearMatches, - ); + const removedYears = xor(queryObj.sentence_tokens.normalized, queryObj.years); return { searchParams: { searchTerms: { name: queryObj.comicbook_identifier_tokens.inputString, number: queryObj.comicbook_identifier_tokens.parsedIssueNumber, + year: queryObj.years?.toString(), }, }, meta: {