💬 Adding regexes for string matching
This commit is contained in:
@@ -13,15 +13,36 @@ nlp.extend(dates);
|
||||
* @function
|
||||
* @param {string} inputString - The string used to search against CV, Shortboxed, and other APIs.
|
||||
*/
|
||||
export const tokenize = (inputString) => {
|
||||
export const tokenize = (searchCriteriaPayload) => {
|
||||
const { inputString } = searchCriteriaPayload;
|
||||
const doc = nlp(inputString);
|
||||
const sentence = doc.sentences().json();
|
||||
const number = doc.numbers().fractions();
|
||||
|
||||
// regexes to match constituent parts of the search string
|
||||
// and isolate the search terms
|
||||
const chapters = inputString.match(/ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi);
|
||||
const volumes = inputString.match(/v(o?l?u?m?e?)(\W?)(\_?)(\s?)(\d+)/gi);
|
||||
const volumes = inputString.match(
|
||||
/(\b(vo?l?u?m?e?)\.?)(\s*-|\s*_)?(\s*[0-9]+[.0-9a-z]*)/gi,
|
||||
);
|
||||
const pageCounts = inputString.match(
|
||||
/\b[.,]?\s*\d+\s*(p|pg|pgs|pages)\b\s*/gi,
|
||||
);
|
||||
|
||||
const parantheses = inputString.match(/\([^\(]*?\)/gi);
|
||||
const curlyBraces = inputString.match(/\{[^\{]*?\}/gi);
|
||||
const squareBrackets = inputString.match(/\[[^\[]*?\]/gi);
|
||||
const genericNumericRange = inputString.match(
|
||||
/([^\d]+)(\s*(of|de|di|von|van|z)\s*#*\d+)/gi,
|
||||
);
|
||||
const hyphenatedNumericRange = inputString.match(/([^\d])?(-\d+)/gi);
|
||||
const readingListIndicators = inputString.match(
|
||||
/^\s*\d+(\.\s+?|\s*-?\s*)/gim,
|
||||
);
|
||||
|
||||
const issues = inputString.match(/issue(\W?)(\_?)(\d+)/gi);
|
||||
const issueHashes = inputString.match(/\#\d/gi);
|
||||
const yearMatches = inputString.match(/\d{4}/g);
|
||||
const yearMatches = inputString.match(/\d{4}/gi);
|
||||
|
||||
const sentenceToProcess = sentence[0].normal.replace(/_/g, " ");
|
||||
const normalizedSentence = nlp(sentenceToProcess)
|
||||
|
||||
Reference in New Issue
Block a user