⛏ Added LOCG series href scraping util

This commit is contained in:
2022-03-26 22:30:56 -07:00
parent b476ad77be
commit b50d9fea78
4 changed files with 38 additions and 12 deletions

14
package-lock.json generated
View File

@@ -43,7 +43,7 @@
"jest": "^25.1.0", "jest": "^25.1.0",
"jest-cli": "^25.1.0", "jest-cli": "^25.1.0",
"moleculer-repl": "^0.6.2", "moleculer-repl": "^0.6.2",
"threetwo-ui-typings": "^1.0.13", "threetwo-ui-typings": "^1.0.14",
"ts-jest": "^25.3.0", "ts-jest": "^25.3.0",
"ts-node": "^8.8.1" "ts-node": "^8.8.1"
}, },
@@ -11618,9 +11618,9 @@
"dev": true "dev": true
}, },
"node_modules/threetwo-ui-typings": { "node_modules/threetwo-ui-typings": {
"version": "1.0.13", "version": "1.0.14",
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.13.tgz", "resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.14.tgz",
"integrity": "sha512-AQiY8/hbp+TobBoehNTEoNco97AoiKYQjAANSFDR3pSD5jFn5qjLlKntvqdNF9Fg5tcS0ReYe0AjsvKshKpixQ==", "integrity": "sha512-nfOi2T9Pr35Bry7Y9q0r6ZnuLdGqfJY45Xu0lDGJl/oA8RLBS19FZtxsVQzYnm5jfm0tO2Q6t/JY7JnU8a9olw==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"typescript": "^4.3.2" "typescript": "^4.3.2"
@@ -21547,9 +21547,9 @@
"dev": true "dev": true
}, },
"threetwo-ui-typings": { "threetwo-ui-typings": {
"version": "1.0.13", "version": "1.0.14",
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.13.tgz", "resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.14.tgz",
"integrity": "sha512-AQiY8/hbp+TobBoehNTEoNco97AoiKYQjAANSFDR3pSD5jFn5qjLlKntvqdNF9Fg5tcS0ReYe0AjsvKshKpixQ==", "integrity": "sha512-nfOi2T9Pr35Bry7Y9q0r6ZnuLdGqfJY45Xu0lDGJl/oA8RLBS19FZtxsVQzYnm5jfm0tO2Q6t/JY7JnU8a9olw==",
"dev": true, "dev": true,
"requires": { "requires": {
"typescript": "^4.3.2" "typescript": "^4.3.2"

View File

@@ -31,7 +31,7 @@
"jest": "^25.1.0", "jest": "^25.1.0",
"jest-cli": "^25.1.0", "jest-cli": "^25.1.0",
"moleculer-repl": "^0.6.2", "moleculer-repl": "^0.6.2",
"threetwo-ui-typings": "^1.0.13", "threetwo-ui-typings": "^1.0.14",
"ts-jest": "^25.3.0", "ts-jest": "^25.3.0",
"ts-node": "^8.8.1" "ts-node": "^8.8.1"
}, },

View File

@@ -6,7 +6,10 @@ import delay from "delay";
import { isNil, isUndefined } from "lodash"; import { isNil, isUndefined } from "lodash";
import { fetchReleases, FilterTypes, SortTypes } from "comicgeeks"; import { fetchReleases, FilterTypes, SortTypes } from "comicgeeks";
import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils"; import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils";
import { scrapeIssuesFromSeriesPage } from "../utils/scraping.utils"; import {
scrapeIssuesFromSeriesPage,
scrapeIssuePage,
} from "../utils/scraping.utils";
const { calculateLimitAndOffset, paginate } = require("paginate-info"); const { calculateLimitAndOffset, paginate } = require("paginate-info");
const CV_BASE_URL = "https://comicvine.gamespot.com/api/"; const CV_BASE_URL = "https://comicvine.gamespot.com/api/";
@@ -106,10 +109,15 @@ export default class ComicVineService extends Service {
}, },
}, },
scrapeLOCGForSeries: { scrapeLOCGForSeries: {
rest: "POST/scrapeLOCGForSeries", rest: "POST /scrapeLOCGForSeries",
params: {}, params: {},
handler: async (ctx: Context<{}>) => { handler: async (ctx: Context<{}>) => {
return await scrapeIssuesFromSeriesPage("https://leagueofcomicgeeks.com/comics/series/151629/king-spawn"); const seriesURIFragment = await scrapeIssuePage(
"https://leagueofcomicgeeks.com/comic/5878833/hulk-4"
);
return await scrapeIssuesFromSeriesPage(
`https://leagueofcomicgeeks.com/${seriesURIFragment}`
);
}, },
}, },
getWeeklyPullList: { getWeeklyPullList: {
@@ -132,7 +140,11 @@ export default class ComicVineService extends Service {
const response = await fetchReleases( const response = await fetchReleases(
new Date(ctx.params.startDate), new Date(ctx.params.startDate),
{ {
publishers: ["DC Comics", "Marvel Comics", "Image Comics"], publishers: [
"DC Comics",
"Marvel Comics",
"Image Comics",
],
filter: [ filter: [
FilterTypes.Regular, FilterTypes.Regular,
FilterTypes.Digital, FilterTypes.Digital,

View File

@@ -41,3 +41,17 @@ export const scrapeIssuesFromSeriesPage = async (url: string) => {
issues, issues,
}; };
}; };
export const scrapeIssuePage = async (url: string) => {
const response = await axios(url);
const dom = new JSDOM(response.data, {
url,
referrer: url,
contentType: "text/html",
includeNodeLocations: true,
storageQuota: 10000000,
});
const seriesDOMElement = dom.window.document
.querySelector("div.series-pagination > a.series").getAttribute("href");
return seriesDOMElement;
};