First commit

This commit is contained in:
2021-04-15 15:08:54 -07:00
commit 2ccebf13b8
39 changed files with 26887 additions and 0 deletions

View File

@@ -0,0 +1,57 @@
import { default as nlp } from "compromise";
import { default as dates } from "compromise-dates";
import { default as sentences } from "compromise-sentences";
import { default as numbers } from "compromise-numbers";
import _ from "lodash";
nlp.extend(sentences);
nlp.extend(numbers);
nlp.extend(dates);
export function tokenize(inputString) {
const doc = nlp(inputString);
const sentence = doc.sentences().json();
const number = doc.numbers().fractions();
const chapters = inputString.match(/ch(a?p?t?e?r?)(\W?)(\_?)(\#?)(\d)/gi);
const volumes = inputString.match(/v(o?l?u?m?e?)(\W?)(\_?)(\s?)(\d+)/gi);
const issues = inputString.match(/issue(\W?)(\_?)(\d+)/gi);
const issueHashes = inputString.match(/\#\d/gi);
const yearMatches = inputString.match(/\d{4}/g);
const sentenceToProcess = sentence[0].normal.replace(/_/g, " ");
const normalizedSentence = nlp(sentenceToProcess)
.text("normal")
.trim()
.split(" ");
const queryObject = {
comicbook_identifiers: {
issues,
issueHashes,
chapters,
volumes,
issueRanges: number,
},
years: {
yearMatches,
},
sentences: {
detailed: sentence,
normalized: normalizedSentence,
},
};
return queryObject;
}
export function refineQuery(queryString) {
let queryObj = tokenize(queryString);
let removedYears = _.xor(
queryObj.sentences.normalized,
queryObj.years.yearMatches,
);
return {
tokenized: removedYears,
normalized: removedYears.join(" "),
meta: queryObj,
};
}