📔 Trade paperback detection 1st draft

This commit is contained in:
2021-09-09 13:58:43 -07:00
parent 67c3ab807c
commit ed0d7cd254
9 changed files with 86 additions and 226 deletions

View File

@@ -1,36 +1,20 @@
const TrainingSet = require("./trainingData.json");
const natural = require("natural");
const BrainJs = require("brain.js");
import { compact, isNil } from "lodash";
function buildWordDictionary(trainingData) {
const tokenisedArray = trainingData.map((item) => {
const tokens = item.phrase.split(" ");
return tokens.map((token) => natural.PorterStemmer.stem(token));
});
const flattenedArray = [].concat.apply([], tokenisedArray);
return flattenedArray.filter((item, pos, self) => self.indexOf(item) == pos);
}
const dictionary = buildWordDictionary(TrainingSet);
function encode(phrase) {
const phraseTokens = phrase.split(" ");
const encodedPhrase = dictionary.map((word) =>
phraseTokens.includes(word) ? 1 : 0,
);
return encodedPhrase;
}
const encodedTrainingSet = TrainingSet.map((dataSet) => {
const encodedValue = encode(dataSet.phrase);
return { input: encodedValue, output: dataSet.result };
});
const network = new BrainJs.NeuralNetwork();
network.train(encodedTrainingSet);
const encoded = encode("Im so happy to have cake");
console.log(network.run(encoded));
export const detectTradePaperbacks = (deck: string): any => {};
export const detectTradePaperbacks = (deck): any => {
const paperback = [
/((trade)?\s?(paperback)|(tpb))/gim, // https://regex101.com/r/FhuowT/1
/(hard\s?cover)\s?(collect((ion)|(ed)|(ing)))/gim, //https://regex101.com/r/eFJVRM/1
/(collected\s?editions)/gim, // https://regex101.com/r/40pAm5/1
];
const matches = paperback
.map((regex) => {
return deck.match(regex);
})
.map((item) => {
if (item !== undefined) {
return item;
}
});
console.log(compact(matches));
return compact(matches);
};