⛏ Added LOCG series href scraping util

This commit is contained in:
2022-03-26 22:30:56 -07:00
parent b476ad77be
commit b50d9fea78
4 changed files with 38 additions and 12 deletions

14
package-lock.json generated
View File

@@ -43,7 +43,7 @@
"jest": "^25.1.0",
"jest-cli": "^25.1.0",
"moleculer-repl": "^0.6.2",
"threetwo-ui-typings": "^1.0.13",
"threetwo-ui-typings": "^1.0.14",
"ts-jest": "^25.3.0",
"ts-node": "^8.8.1"
},
@@ -11618,9 +11618,9 @@
"dev": true
},
"node_modules/threetwo-ui-typings": {
"version": "1.0.13",
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.13.tgz",
"integrity": "sha512-AQiY8/hbp+TobBoehNTEoNco97AoiKYQjAANSFDR3pSD5jFn5qjLlKntvqdNF9Fg5tcS0ReYe0AjsvKshKpixQ==",
"version": "1.0.14",
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.14.tgz",
"integrity": "sha512-nfOi2T9Pr35Bry7Y9q0r6ZnuLdGqfJY45Xu0lDGJl/oA8RLBS19FZtxsVQzYnm5jfm0tO2Q6t/JY7JnU8a9olw==",
"dev": true,
"dependencies": {
"typescript": "^4.3.2"
@@ -21547,9 +21547,9 @@
"dev": true
},
"threetwo-ui-typings": {
"version": "1.0.13",
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.13.tgz",
"integrity": "sha512-AQiY8/hbp+TobBoehNTEoNco97AoiKYQjAANSFDR3pSD5jFn5qjLlKntvqdNF9Fg5tcS0ReYe0AjsvKshKpixQ==",
"version": "1.0.14",
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.14.tgz",
"integrity": "sha512-nfOi2T9Pr35Bry7Y9q0r6ZnuLdGqfJY45Xu0lDGJl/oA8RLBS19FZtxsVQzYnm5jfm0tO2Q6t/JY7JnU8a9olw==",
"dev": true,
"requires": {
"typescript": "^4.3.2"

View File

@@ -31,7 +31,7 @@
"jest": "^25.1.0",
"jest-cli": "^25.1.0",
"moleculer-repl": "^0.6.2",
"threetwo-ui-typings": "^1.0.13",
"threetwo-ui-typings": "^1.0.14",
"ts-jest": "^25.3.0",
"ts-node": "^8.8.1"
},

View File

@@ -6,7 +6,10 @@ import delay from "delay";
import { isNil, isUndefined } from "lodash";
import { fetchReleases, FilterTypes, SortTypes } from "comicgeeks";
import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils";
import { scrapeIssuesFromSeriesPage } from "../utils/scraping.utils";
import {
scrapeIssuesFromSeriesPage,
scrapeIssuePage,
} from "../utils/scraping.utils";
const { calculateLimitAndOffset, paginate } = require("paginate-info");
const CV_BASE_URL = "https://comicvine.gamespot.com/api/";
@@ -106,10 +109,15 @@ export default class ComicVineService extends Service {
},
},
scrapeLOCGForSeries: {
rest: "POST/scrapeLOCGForSeries",
rest: "POST /scrapeLOCGForSeries",
params: {},
handler: async (ctx: Context<{}>) => {
return await scrapeIssuesFromSeriesPage("https://leagueofcomicgeeks.com/comics/series/151629/king-spawn");
const seriesURIFragment = await scrapeIssuePage(
"https://leagueofcomicgeeks.com/comic/5878833/hulk-4"
);
return await scrapeIssuesFromSeriesPage(
`https://leagueofcomicgeeks.com/${seriesURIFragment}`
);
},
},
getWeeklyPullList: {
@@ -132,7 +140,11 @@ export default class ComicVineService extends Service {
const response = await fetchReleases(
new Date(ctx.params.startDate),
{
publishers: ["DC Comics", "Marvel Comics", "Image Comics"],
publishers: [
"DC Comics",
"Marvel Comics",
"Image Comics",
],
filter: [
FilterTypes.Regular,
FilterTypes.Digital,

View File

@@ -41,3 +41,17 @@ export const scrapeIssuesFromSeriesPage = async (url: string) => {
issues,
};
};
export const scrapeIssuePage = async (url: string) => {
const response = await axios(url);
const dom = new JSDOM(response.data, {
url,
referrer: url,
contentType: "text/html",
includeNodeLocations: true,
storageQuota: 10000000,
});
const seriesDOMElement = dom.window.document
.querySelector("div.series-pagination > a.series").getAttribute("href");
return seriesDOMElement;
};