⛏ Added LOCG series href scraping util
This commit is contained in:
14
package-lock.json
generated
14
package-lock.json
generated
@@ -43,7 +43,7 @@
|
||||
"jest": "^25.1.0",
|
||||
"jest-cli": "^25.1.0",
|
||||
"moleculer-repl": "^0.6.2",
|
||||
"threetwo-ui-typings": "^1.0.13",
|
||||
"threetwo-ui-typings": "^1.0.14",
|
||||
"ts-jest": "^25.3.0",
|
||||
"ts-node": "^8.8.1"
|
||||
},
|
||||
@@ -11618,9 +11618,9 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/threetwo-ui-typings": {
|
||||
"version": "1.0.13",
|
||||
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.13.tgz",
|
||||
"integrity": "sha512-AQiY8/hbp+TobBoehNTEoNco97AoiKYQjAANSFDR3pSD5jFn5qjLlKntvqdNF9Fg5tcS0ReYe0AjsvKshKpixQ==",
|
||||
"version": "1.0.14",
|
||||
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.14.tgz",
|
||||
"integrity": "sha512-nfOi2T9Pr35Bry7Y9q0r6ZnuLdGqfJY45Xu0lDGJl/oA8RLBS19FZtxsVQzYnm5jfm0tO2Q6t/JY7JnU8a9olw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"typescript": "^4.3.2"
|
||||
@@ -21547,9 +21547,9 @@
|
||||
"dev": true
|
||||
},
|
||||
"threetwo-ui-typings": {
|
||||
"version": "1.0.13",
|
||||
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.13.tgz",
|
||||
"integrity": "sha512-AQiY8/hbp+TobBoehNTEoNco97AoiKYQjAANSFDR3pSD5jFn5qjLlKntvqdNF9Fg5tcS0ReYe0AjsvKshKpixQ==",
|
||||
"version": "1.0.14",
|
||||
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.14.tgz",
|
||||
"integrity": "sha512-nfOi2T9Pr35Bry7Y9q0r6ZnuLdGqfJY45Xu0lDGJl/oA8RLBS19FZtxsVQzYnm5jfm0tO2Q6t/JY7JnU8a9olw==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"typescript": "^4.3.2"
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
"jest": "^25.1.0",
|
||||
"jest-cli": "^25.1.0",
|
||||
"moleculer-repl": "^0.6.2",
|
||||
"threetwo-ui-typings": "^1.0.13",
|
||||
"threetwo-ui-typings": "^1.0.14",
|
||||
"ts-jest": "^25.3.0",
|
||||
"ts-node": "^8.8.1"
|
||||
},
|
||||
|
||||
@@ -6,7 +6,10 @@ import delay from "delay";
|
||||
import { isNil, isUndefined } from "lodash";
|
||||
import { fetchReleases, FilterTypes, SortTypes } from "comicgeeks";
|
||||
import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils";
|
||||
import { scrapeIssuesFromSeriesPage } from "../utils/scraping.utils";
|
||||
import {
|
||||
scrapeIssuesFromSeriesPage,
|
||||
scrapeIssuePage,
|
||||
} from "../utils/scraping.utils";
|
||||
const { calculateLimitAndOffset, paginate } = require("paginate-info");
|
||||
|
||||
const CV_BASE_URL = "https://comicvine.gamespot.com/api/";
|
||||
@@ -106,10 +109,15 @@ export default class ComicVineService extends Service {
|
||||
},
|
||||
},
|
||||
scrapeLOCGForSeries: {
|
||||
rest: "POST/scrapeLOCGForSeries",
|
||||
rest: "POST /scrapeLOCGForSeries",
|
||||
params: {},
|
||||
handler: async (ctx: Context<{}>) => {
|
||||
return await scrapeIssuesFromSeriesPage("https://leagueofcomicgeeks.com/comics/series/151629/king-spawn");
|
||||
const seriesURIFragment = await scrapeIssuePage(
|
||||
"https://leagueofcomicgeeks.com/comic/5878833/hulk-4"
|
||||
);
|
||||
return await scrapeIssuesFromSeriesPage(
|
||||
`https://leagueofcomicgeeks.com/${seriesURIFragment}`
|
||||
);
|
||||
},
|
||||
},
|
||||
getWeeklyPullList: {
|
||||
@@ -132,7 +140,11 @@ export default class ComicVineService extends Service {
|
||||
const response = await fetchReleases(
|
||||
new Date(ctx.params.startDate),
|
||||
{
|
||||
publishers: ["DC Comics", "Marvel Comics", "Image Comics"],
|
||||
publishers: [
|
||||
"DC Comics",
|
||||
"Marvel Comics",
|
||||
"Image Comics",
|
||||
],
|
||||
filter: [
|
||||
FilterTypes.Regular,
|
||||
FilterTypes.Digital,
|
||||
|
||||
@@ -41,3 +41,17 @@ export const scrapeIssuesFromSeriesPage = async (url: string) => {
|
||||
issues,
|
||||
};
|
||||
};
|
||||
|
||||
export const scrapeIssuePage = async (url: string) => {
|
||||
const response = await axios(url);
|
||||
const dom = new JSDOM(response.data, {
|
||||
url,
|
||||
referrer: url,
|
||||
contentType: "text/html",
|
||||
includeNodeLocations: true,
|
||||
storageQuota: 10000000,
|
||||
});
|
||||
const seriesDOMElement = dom.window.document
|
||||
.querySelector("div.series-pagination > a.series").getAttribute("href");
|
||||
return seriesDOMElement;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user