⚙️ Added a year extractor regex in filenameparser
This commit is contained in:
@@ -160,7 +160,8 @@ export const fetchComicVineMatches =
|
|||||||
dispatch({
|
dispatch({
|
||||||
type: CV_API_CALL_IN_PROGRESS,
|
type: CV_API_CALL_IN_PROGRESS,
|
||||||
});
|
});
|
||||||
|
console.log(issueSearchQuery);
|
||||||
|
console.log(seriesSearchQuery);
|
||||||
axios
|
axios
|
||||||
.request({
|
.request({
|
||||||
url: `${COMICBOOKINFO_SERVICE_URI}/fetchresource`,
|
url: `${COMICBOOKINFO_SERVICE_URI}/fetchresource`,
|
||||||
@@ -175,6 +176,7 @@ export const fetchComicVineMatches =
|
|||||||
fieldList: "id",
|
fieldList: "id",
|
||||||
limit: "100",
|
limit: "100",
|
||||||
offset: "0",
|
offset: "0",
|
||||||
|
page: 1,
|
||||||
resources: "issue",
|
resources: "issue",
|
||||||
scorerConfiguration: {
|
scorerConfiguration: {
|
||||||
searchQuery: {
|
searchQuery: {
|
||||||
|
|||||||
@@ -73,6 +73,8 @@ export const tokenize = (inputString: string) => {
|
|||||||
const doc = nlp(inputString);
|
const doc = nlp(inputString);
|
||||||
const sentence = doc.sentences().json();
|
const sentence = doc.sentences().json();
|
||||||
|
|
||||||
|
const yearMatches = extractYears(inputString);
|
||||||
|
|
||||||
// filter out anything at the end of the title in parantheses
|
// filter out anything at the end of the title in parantheses
|
||||||
inputString = inputString.replace(/\((.*?)\)$/gi, "");
|
inputString = inputString.replace(/\((.*?)\)$/gi, "");
|
||||||
|
|
||||||
@@ -127,8 +129,6 @@ export const tokenize = (inputString: string) => {
|
|||||||
inputString = voca.replace(inputString, /_.-# /gi, "");
|
inputString = voca.replace(inputString, /_.-# /gi, "");
|
||||||
inputString = nlp(inputString).text("normal").trim();
|
inputString = nlp(inputString).text("normal").trim();
|
||||||
|
|
||||||
const yearMatches = inputString.match(/\d{4}/gi);
|
|
||||||
|
|
||||||
const sentenceToProcess = sentence[0].normal.replace(/_/g, " ");
|
const sentenceToProcess = sentence[0].normal.replace(/_/g, " ");
|
||||||
const normalizedSentence = nlp(sentenceToProcess)
|
const normalizedSentence = nlp(sentenceToProcess)
|
||||||
.text("normal")
|
.text("normal")
|
||||||
@@ -140,9 +140,7 @@ export const tokenize = (inputString: string) => {
|
|||||||
inputString,
|
inputString,
|
||||||
parsedIssueNumber,
|
parsedIssueNumber,
|
||||||
},
|
},
|
||||||
years: {
|
years: yearMatches,
|
||||||
yearMatches,
|
|
||||||
},
|
|
||||||
sentence_tokens: {
|
sentence_tokens: {
|
||||||
detailed: sentence,
|
detailed: sentence,
|
||||||
normalized: normalizedSentence,
|
normalized: normalizedSentence,
|
||||||
@@ -162,17 +160,22 @@ export const extractNumerals = (inputString: string): MatchArray[string] => {
|
|||||||
return matches;
|
return matches;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const extractYears = (inputString: string): RegExpMatchArray | null => {
|
||||||
|
// Searches through the given string left-to-right, seeing if an intelligible
|
||||||
|
// publication year can be extracted.
|
||||||
|
const yearRegex = /(?:19|20)\d{2}/gm;
|
||||||
|
return inputString.match(yearRegex);
|
||||||
|
};
|
||||||
|
|
||||||
export const refineQuery = (inputString: string) => {
|
export const refineQuery = (inputString: string) => {
|
||||||
const queryObj = tokenize(inputString);
|
const queryObj = tokenize(inputString);
|
||||||
const removedYears = xor(
|
const removedYears = xor(queryObj.sentence_tokens.normalized, queryObj.years);
|
||||||
queryObj.sentence_tokens.normalized,
|
|
||||||
queryObj.years.yearMatches,
|
|
||||||
);
|
|
||||||
return {
|
return {
|
||||||
searchParams: {
|
searchParams: {
|
||||||
searchTerms: {
|
searchTerms: {
|
||||||
name: queryObj.comicbook_identifier_tokens.inputString,
|
name: queryObj.comicbook_identifier_tokens.inputString,
|
||||||
number: queryObj.comicbook_identifier_tokens.parsedIssueNumber,
|
number: queryObj.comicbook_identifier_tokens.parsedIssueNumber,
|
||||||
|
year: queryObj.years?.toString(),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
meta: {
|
meta: {
|
||||||
|
|||||||
Reference in New Issue
Block a user