⚙️ Added a year extractor regex in filenameparser

This commit is contained in:
2021-12-20 14:57:01 -08:00
parent 3017920fb7
commit 41918daafa
2 changed files with 15 additions and 10 deletions

View File

@@ -160,7 +160,8 @@ export const fetchComicVineMatches =
dispatch({ dispatch({
type: CV_API_CALL_IN_PROGRESS, type: CV_API_CALL_IN_PROGRESS,
}); });
console.log(issueSearchQuery);
console.log(seriesSearchQuery);
axios axios
.request({ .request({
url: `${COMICBOOKINFO_SERVICE_URI}/fetchresource`, url: `${COMICBOOKINFO_SERVICE_URI}/fetchresource`,
@@ -175,6 +176,7 @@ export const fetchComicVineMatches =
fieldList: "id", fieldList: "id",
limit: "100", limit: "100",
offset: "0", offset: "0",
page: 1,
resources: "issue", resources: "issue",
scorerConfiguration: { scorerConfiguration: {
searchQuery: { searchQuery: {

View File

@@ -73,6 +73,8 @@ export const tokenize = (inputString: string) => {
const doc = nlp(inputString); const doc = nlp(inputString);
const sentence = doc.sentences().json(); const sentence = doc.sentences().json();
const yearMatches = extractYears(inputString);
// filter out anything at the end of the title in parantheses // filter out anything at the end of the title in parantheses
inputString = inputString.replace(/\((.*?)\)$/gi, ""); inputString = inputString.replace(/\((.*?)\)$/gi, "");
@@ -127,8 +129,6 @@ export const tokenize = (inputString: string) => {
inputString = voca.replace(inputString, /_.-# /gi, ""); inputString = voca.replace(inputString, /_.-# /gi, "");
inputString = nlp(inputString).text("normal").trim(); inputString = nlp(inputString).text("normal").trim();
const yearMatches = inputString.match(/\d{4}/gi);
const sentenceToProcess = sentence[0].normal.replace(/_/g, " "); const sentenceToProcess = sentence[0].normal.replace(/_/g, " ");
const normalizedSentence = nlp(sentenceToProcess) const normalizedSentence = nlp(sentenceToProcess)
.text("normal") .text("normal")
@@ -140,9 +140,7 @@ export const tokenize = (inputString: string) => {
inputString, inputString,
parsedIssueNumber, parsedIssueNumber,
}, },
years: { years: yearMatches,
yearMatches,
},
sentence_tokens: { sentence_tokens: {
detailed: sentence, detailed: sentence,
normalized: normalizedSentence, normalized: normalizedSentence,
@@ -162,17 +160,22 @@ export const extractNumerals = (inputString: string): MatchArray[string] => {
return matches; return matches;
}; };
export const extractYears = (inputString: string): RegExpMatchArray | null => {
// Searches through the given string left-to-right, seeing if an intelligible
// publication year can be extracted.
const yearRegex = /(?:19|20)\d{2}/gm;
return inputString.match(yearRegex);
};
export const refineQuery = (inputString: string) => { export const refineQuery = (inputString: string) => {
const queryObj = tokenize(inputString); const queryObj = tokenize(inputString);
const removedYears = xor( const removedYears = xor(queryObj.sentence_tokens.normalized, queryObj.years);
queryObj.sentence_tokens.normalized,
queryObj.years.yearMatches,
);
return { return {
searchParams: { searchParams: {
searchTerms: { searchTerms: {
name: queryObj.comicbook_identifier_tokens.inputString, name: queryObj.comicbook_identifier_tokens.inputString,
number: queryObj.comicbook_identifier_tokens.parsedIssueNumber, number: queryObj.comicbook_identifier_tokens.parsedIssueNumber,
year: queryObj.years?.toString(),
}, },
}, },
meta: { meta: {