⚙️ Added a year extractor regex in filenameparser

This commit is contained in:
2021-12-20 14:57:01 -08:00
parent 3017920fb7
commit 41918daafa
2 changed files with 15 additions and 10 deletions

View File

@@ -160,7 +160,8 @@ export const fetchComicVineMatches =
dispatch({
type: CV_API_CALL_IN_PROGRESS,
});
console.log(issueSearchQuery);
console.log(seriesSearchQuery);
axios
.request({
url: `${COMICBOOKINFO_SERVICE_URI}/fetchresource`,
@@ -175,6 +176,7 @@ export const fetchComicVineMatches =
fieldList: "id",
limit: "100",
offset: "0",
page: 1,
resources: "issue",
scorerConfiguration: {
searchQuery: {

View File

@@ -73,6 +73,8 @@ export const tokenize = (inputString: string) => {
const doc = nlp(inputString);
const sentence = doc.sentences().json();
const yearMatches = extractYears(inputString);
// filter out anything at the end of the title in parantheses
inputString = inputString.replace(/\((.*?)\)$/gi, "");
@@ -127,8 +129,6 @@ export const tokenize = (inputString: string) => {
inputString = voca.replace(inputString, /_.-# /gi, "");
inputString = nlp(inputString).text("normal").trim();
const yearMatches = inputString.match(/\d{4}/gi);
const sentenceToProcess = sentence[0].normal.replace(/_/g, " ");
const normalizedSentence = nlp(sentenceToProcess)
.text("normal")
@@ -140,9 +140,7 @@ export const tokenize = (inputString: string) => {
inputString,
parsedIssueNumber,
},
years: {
yearMatches,
},
years: yearMatches,
sentence_tokens: {
detailed: sentence,
normalized: normalizedSentence,
@@ -162,17 +160,22 @@ export const extractNumerals = (inputString: string): MatchArray[string] => {
return matches;
};
export const extractYears = (inputString: string): RegExpMatchArray | null => {
// Searches through the given string left-to-right, seeing if an intelligible
// publication year can be extracted.
const yearRegex = /(?:19|20)\d{2}/gm;
return inputString.match(yearRegex);
};
export const refineQuery = (inputString: string) => {
const queryObj = tokenize(inputString);
const removedYears = xor(
queryObj.sentence_tokens.normalized,
queryObj.years.yearMatches,
);
const removedYears = xor(queryObj.sentence_tokens.normalized, queryObj.years);
return {
searchParams: {
searchTerms: {
name: queryObj.comicbook_identifier_tokens.inputString,
number: queryObj.comicbook_identifier_tokens.parsedIssueNumber,
year: queryObj.years?.toString(),
},
},
meta: {