⚙️ Added a year extractor regex in filenameparser

2021-12-20 14:57:01 -08:00
parent 3017920fb7
commit 41918daafa
2 changed files with 15 additions and 10 deletions
--- a/src/client/actions/fileops.actions.tsx
+++ b/src/client/actions/fileops.actions.tsx
@@ -160,7 +160,8 @@ export const fetchComicVineMatches =
      dispatch({
        type: CV_API_CALL_IN_PROGRESS,
      });
-
+      console.log(issueSearchQuery);
+      console.log(seriesSearchQuery);
      axios
        .request({
          url: `${COMICBOOKINFO_SERVICE_URI}/fetchresource`,
@@ -175,6 +176,7 @@ export const fetchComicVineMatches =
            fieldList: "id",
            limit: "100",
            offset: "0",
+            page: 1,
            resources: "issue",
            scorerConfiguration: {
              searchQuery: {
--- a/src/client/shared/utils/filenameparser.utils.ts
+++ b/src/client/shared/utils/filenameparser.utils.ts
@@ -73,6 +73,8 @@ export const tokenize = (inputString: string) => {
  const doc = nlp(inputString);
  const sentence = doc.sentences().json();

+  const yearMatches = extractYears(inputString);
+
  // filter out anything at the end of the title in parantheses
  inputString = inputString.replace(/\((.*?)\)$/gi, "");

@@ -127,8 +129,6 @@ export const tokenize = (inputString: string) => {
  inputString = voca.replace(inputString, /_.-# /gi, "");
  inputString = nlp(inputString).text("normal").trim();

-  const yearMatches = inputString.match(/\d{4}/gi);
-
  const sentenceToProcess = sentence[0].normal.replace(/_/g, " ");
  const normalizedSentence = nlp(sentenceToProcess)
    .text("normal")
@@ -140,9 +140,7 @@ export const tokenize = (inputString: string) => {
      inputString,
      parsedIssueNumber,
    },
-    years: {
-      yearMatches,
-    },
+    years: yearMatches,
    sentence_tokens: {
      detailed: sentence,
      normalized: normalizedSentence,
@@ -162,17 +160,22 @@ export const extractNumerals = (inputString: string): MatchArray[string] => {
  return matches;
 };

+export const extractYears = (inputString: string): RegExpMatchArray | null => {
+  // Searches through the given string left-to-right, seeing if an intelligible
+  // publication year can be extracted.
+  const yearRegex = /(?:19|20)\d{2}/gm;
+  return inputString.match(yearRegex);
+};
+
 export const refineQuery = (inputString: string) => {
  const queryObj = tokenize(inputString);
-  const removedYears = xor(
-    queryObj.sentence_tokens.normalized,
-    queryObj.years.yearMatches,
-  );
+  const removedYears = xor(queryObj.sentence_tokens.normalized, queryObj.years);
  return {
    searchParams: {
      searchTerms: {
        name: queryObj.comicbook_identifier_tokens.inputString,
        number: queryObj.comicbook_identifier_tokens.parsedIssueNumber,
+        year: queryObj.years?.toString(),
      },
    },
    meta: {