diff --git a/package.json b/package.json index 69671e5..a441de0 100644 --- a/package.json +++ b/package.json @@ -46,7 +46,8 @@ "react-window-dynamic-list": "^2.3.5", "sharp": "^0.28.1", "socket.io-client": "^4.1.2", - "threetwo-ui-typings": "^1.0.1" + "threetwo-ui-typings": "^1.0.1", + "xregexp": "^5.0.2" }, "devDependencies": { "@babel/cli": "^7.13.10", diff --git a/src/client/actions/fileops.actions.tsx b/src/client/actions/fileops.actions.tsx index 3415a5d..2b17fc4 100644 --- a/src/client/actions/fileops.actions.tsx +++ b/src/client/actions/fileops.actions.tsx @@ -127,9 +127,9 @@ export const fetchComicVineMatches = (searchPayload) => (dispatch) => { format: "json", sort: "name%3Aasc", query: issueSearchQuery.searchParams.searchTerms.name, - fieldList: "", + fieldList: "id", limit: "10", - offset: "5", + offset: "0", resources: "issue", }, transformResponse: [ diff --git a/src/client/shared/utils/nlp.utils.ts b/src/client/shared/utils/nlp.utils.ts index fcb6d2d..f620815 100644 --- a/src/client/shared/utils/nlp.utils.ts +++ b/src/client/shared/utils/nlp.utils.ts @@ -2,13 +2,47 @@ import { default as nlp } from "compromise"; import { default as dates } from "compromise-dates"; import { default as sentences } from "compromise-sentences"; import { default as numbers } from "compromise-numbers"; -import _ from "lodash"; +import xregexp from "xregexp"; +import { map, xor, isEmpty } from "lodash"; nlp.extend(sentences); nlp.extend(numbers); nlp.extend(dates); -export const preprocess = (inputString) => { +interface M { + start: number; + end: number; + value: string; +} + +function replaceRecursive( + text: string, + left: string, + right: string, + replacer: (match: string) => string, +): string { + const r: M[] = xregexp.matchRecursive(text, left, right, "g", { + valueNames: [null, null, "match", null], + }); + let offset = 0; + for (const m of r) { + const replacement = replacer(m.value); + text = replaceAt(text, m.start + offset, m.value.length, replacement); + offset += replacement.length - m.value.length; + } + return text; +} + +function replaceAt( + string: string, + index: number, + length: number, + replacement: string, +): string { + return string.substr(0, index) + replacement + string.substr(index + length); +} + +export const preprocess = (inputString: string) => { // see if the comic matches the following format, and if so, remove everything // after the first number: // "nnn series name #xx (etc) (etc)" -> "series name #xx (etc) (etc)" @@ -24,12 +58,17 @@ export const preprocess = (inputString) => { ); }; +const recursivelyMatch = (regex, inputString) => { + const toReplace = xregexp.replace(inputString, regex, ""); + return toReplace; +}; + /** * Tokenizes a search string * @function * @param {string} inputString - The string used to search against CV, Shortboxed, and other APIs. */ -export const tokenize = (inputString) => { +export const tokenize = (inputString: string) => { const doc = nlp(inputString); const sentence = doc.sentences().json(); const number = doc.numbers().fractions(); @@ -49,6 +88,11 @@ export const tokenize = (inputString) => { ); const parantheses = inputString.match(/\([^\(]*?\)/gi); + const foo = recursivelyMatch( + new RegExp(/\([^\(]*?\)/, "gi"), + "jagan milun sampatkar ((asdasd)(ASDASD)(sadasd))", + ); + console.log(foo); const curlyBraces = inputString.match(/\{[^\{]*?\}/gi); const squareBrackets = inputString.match(/\[[^\[]*?\]/gi); const genericNumericRange = inputString.match( @@ -61,7 +105,7 @@ export const tokenize = (inputString) => { let issueNumbers = ""; const issues = inputString.match(/(^|[_\s#])(-?\d*\.?\d\w*)/gi); - if (!_.isEmpty(issues)) { + if (!isEmpty(issues)) { issueNumbers = issues[0].trim(); } // const issueHashes = inputString.match(/\#\d/gi); @@ -99,7 +143,7 @@ export const tokenize = (inputString) => { export const refineQuery = (inputString) => { const queryObj = tokenize(inputString); - const removedYears = _.xor( + const removedYears = xor( queryObj.sentence_tokens.normalized, queryObj.years.yearMatches, ); diff --git a/yarn.lock b/yarn.lock index 62d0554..73499b2 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1010,6 +1010,14 @@ core-js-pure "^3.14.0" regenerator-runtime "^0.13.4" +"@babel/runtime-corejs3@^7.12.1": + version "7.14.7" + resolved "https://registry.yarnpkg.com/@babel/runtime-corejs3/-/runtime-corejs3-7.14.7.tgz#0ef292bbce40ca00f874c9724ef175a12476465c" + integrity sha512-Wvzcw4mBYbTagyBVZpAJWI06auSIj033T/yNE0Zn1xcup83MieCddZA7ls3kme17L4NOGBrQ09Q+nKB41RLWBA== + dependencies: + core-js-pure "^3.15.0" + regenerator-runtime "^0.13.4" + "@babel/runtime@^7.0.0", "@babel/runtime@^7.1.2", "@babel/runtime@^7.10.2", "@babel/runtime@^7.11.2", "@babel/runtime@^7.12.1", "@babel/runtime@^7.13.17", "@babel/runtime@^7.8.4", "@babel/runtime@^7.9.2": version "7.14.5" resolved "https://registry.npmjs.org/@babel/runtime/-/runtime-7.14.5.tgz" @@ -4197,6 +4205,11 @@ core-js-pure@^3.14.0: resolved "https://registry.npmjs.org/core-js-pure/-/core-js-pure-3.14.0.tgz" integrity sha512-YVh+LN2FgNU0odThzm61BsdkwrbrchumFq3oztnE9vTKC4KS2fvnPmcx8t6jnqAyOTCTF4ZSiuK8Qhh7SNcL4g== +core-js-pure@^3.15.0: + version "3.15.2" + resolved "https://registry.yarnpkg.com/core-js-pure/-/core-js-pure-3.15.2.tgz#c8e0874822705f3385d3197af9348f7c9ae2e3ce" + integrity sha512-D42L7RYh1J2grW8ttxoY1+17Y4wXZeKe7uyplAI3FkNQyI5OgBIAjUfFiTPfL1rs0qLpxaabITNbjKl1Sp82tA== + core-js@^2.4.0, core-js@^2.5.0, core-js@^2.6.5: version "2.6.12" resolved "https://registry.npmjs.org/core-js/-/core-js-2.6.12.tgz" @@ -14033,6 +14046,13 @@ xmlcreate@^2.0.3: resolved "https://registry.npmjs.org/xmlcreate/-/xmlcreate-2.0.3.tgz" integrity sha512-HgS+X6zAztGa9zIK3Y3LXuJes33Lz9x+YyTxgrkIdabu2vqcGOWwdfCpf1hWLRrd553wd4QCDf6BBO6FfdsRiQ== +xregexp@^5.0.2: + version "5.0.2" + resolved "https://registry.yarnpkg.com/xregexp/-/xregexp-5.0.2.tgz#798aa7757836f39cdbdeeba3daf94d75f7a9dcc1" + integrity sha512-JPNfN40YMNSDxZrahMrmtNH1QqPJp0/qNeEJM2nnOlhcBdfCCjekPYFV2OnwKxwvpEYglH1RBotbpRRaEuCG8Q== + dependencies: + "@babel/runtime-corejs3" "^7.12.1" + xtend@^4.0.0, xtend@~4.0.1: version "4.0.2" resolved "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz"