⛏ Added LOCG series href scraping util
This commit is contained in:
14
package-lock.json
generated
14
package-lock.json
generated
@@ -43,7 +43,7 @@
|
|||||||
"jest": "^25.1.0",
|
"jest": "^25.1.0",
|
||||||
"jest-cli": "^25.1.0",
|
"jest-cli": "^25.1.0",
|
||||||
"moleculer-repl": "^0.6.2",
|
"moleculer-repl": "^0.6.2",
|
||||||
"threetwo-ui-typings": "^1.0.13",
|
"threetwo-ui-typings": "^1.0.14",
|
||||||
"ts-jest": "^25.3.0",
|
"ts-jest": "^25.3.0",
|
||||||
"ts-node": "^8.8.1"
|
"ts-node": "^8.8.1"
|
||||||
},
|
},
|
||||||
@@ -11618,9 +11618,9 @@
|
|||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/threetwo-ui-typings": {
|
"node_modules/threetwo-ui-typings": {
|
||||||
"version": "1.0.13",
|
"version": "1.0.14",
|
||||||
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.13.tgz",
|
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.14.tgz",
|
||||||
"integrity": "sha512-AQiY8/hbp+TobBoehNTEoNco97AoiKYQjAANSFDR3pSD5jFn5qjLlKntvqdNF9Fg5tcS0ReYe0AjsvKshKpixQ==",
|
"integrity": "sha512-nfOi2T9Pr35Bry7Y9q0r6ZnuLdGqfJY45Xu0lDGJl/oA8RLBS19FZtxsVQzYnm5jfm0tO2Q6t/JY7JnU8a9olw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"typescript": "^4.3.2"
|
"typescript": "^4.3.2"
|
||||||
@@ -21547,9 +21547,9 @@
|
|||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"threetwo-ui-typings": {
|
"threetwo-ui-typings": {
|
||||||
"version": "1.0.13",
|
"version": "1.0.14",
|
||||||
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.13.tgz",
|
"resolved": "https://registry.npmjs.org/threetwo-ui-typings/-/threetwo-ui-typings-1.0.14.tgz",
|
||||||
"integrity": "sha512-AQiY8/hbp+TobBoehNTEoNco97AoiKYQjAANSFDR3pSD5jFn5qjLlKntvqdNF9Fg5tcS0ReYe0AjsvKshKpixQ==",
|
"integrity": "sha512-nfOi2T9Pr35Bry7Y9q0r6ZnuLdGqfJY45Xu0lDGJl/oA8RLBS19FZtxsVQzYnm5jfm0tO2Q6t/JY7JnU8a9olw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"typescript": "^4.3.2"
|
"typescript": "^4.3.2"
|
||||||
|
|||||||
@@ -31,7 +31,7 @@
|
|||||||
"jest": "^25.1.0",
|
"jest": "^25.1.0",
|
||||||
"jest-cli": "^25.1.0",
|
"jest-cli": "^25.1.0",
|
||||||
"moleculer-repl": "^0.6.2",
|
"moleculer-repl": "^0.6.2",
|
||||||
"threetwo-ui-typings": "^1.0.13",
|
"threetwo-ui-typings": "^1.0.14",
|
||||||
"ts-jest": "^25.3.0",
|
"ts-jest": "^25.3.0",
|
||||||
"ts-node": "^8.8.1"
|
"ts-node": "^8.8.1"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -6,7 +6,10 @@ import delay from "delay";
|
|||||||
import { isNil, isUndefined } from "lodash";
|
import { isNil, isUndefined } from "lodash";
|
||||||
import { fetchReleases, FilterTypes, SortTypes } from "comicgeeks";
|
import { fetchReleases, FilterTypes, SortTypes } from "comicgeeks";
|
||||||
import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils";
|
import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils";
|
||||||
import { scrapeIssuesFromSeriesPage } from "../utils/scraping.utils";
|
import {
|
||||||
|
scrapeIssuesFromSeriesPage,
|
||||||
|
scrapeIssuePage,
|
||||||
|
} from "../utils/scraping.utils";
|
||||||
const { calculateLimitAndOffset, paginate } = require("paginate-info");
|
const { calculateLimitAndOffset, paginate } = require("paginate-info");
|
||||||
|
|
||||||
const CV_BASE_URL = "https://comicvine.gamespot.com/api/";
|
const CV_BASE_URL = "https://comicvine.gamespot.com/api/";
|
||||||
@@ -106,10 +109,15 @@ export default class ComicVineService extends Service {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
scrapeLOCGForSeries: {
|
scrapeLOCGForSeries: {
|
||||||
rest: "POST/scrapeLOCGForSeries",
|
rest: "POST /scrapeLOCGForSeries",
|
||||||
params: {},
|
params: {},
|
||||||
handler: async (ctx: Context<{}>) => {
|
handler: async (ctx: Context<{}>) => {
|
||||||
return await scrapeIssuesFromSeriesPage("https://leagueofcomicgeeks.com/comics/series/151629/king-spawn");
|
const seriesURIFragment = await scrapeIssuePage(
|
||||||
|
"https://leagueofcomicgeeks.com/comic/5878833/hulk-4"
|
||||||
|
);
|
||||||
|
return await scrapeIssuesFromSeriesPage(
|
||||||
|
`https://leagueofcomicgeeks.com/${seriesURIFragment}`
|
||||||
|
);
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
getWeeklyPullList: {
|
getWeeklyPullList: {
|
||||||
@@ -132,7 +140,11 @@ export default class ComicVineService extends Service {
|
|||||||
const response = await fetchReleases(
|
const response = await fetchReleases(
|
||||||
new Date(ctx.params.startDate),
|
new Date(ctx.params.startDate),
|
||||||
{
|
{
|
||||||
publishers: ["DC Comics", "Marvel Comics", "Image Comics"],
|
publishers: [
|
||||||
|
"DC Comics",
|
||||||
|
"Marvel Comics",
|
||||||
|
"Image Comics",
|
||||||
|
],
|
||||||
filter: [
|
filter: [
|
||||||
FilterTypes.Regular,
|
FilterTypes.Regular,
|
||||||
FilterTypes.Digital,
|
FilterTypes.Digital,
|
||||||
|
|||||||
@@ -41,3 +41,17 @@ export const scrapeIssuesFromSeriesPage = async (url: string) => {
|
|||||||
issues,
|
issues,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const scrapeIssuePage = async (url: string) => {
|
||||||
|
const response = await axios(url);
|
||||||
|
const dom = new JSDOM(response.data, {
|
||||||
|
url,
|
||||||
|
referrer: url,
|
||||||
|
contentType: "text/html",
|
||||||
|
includeNodeLocations: true,
|
||||||
|
storageQuota: 10000000,
|
||||||
|
});
|
||||||
|
const seriesDOMElement = dom.window.document
|
||||||
|
.querySelector("div.series-pagination > a.series").getAttribute("href");
|
||||||
|
return seriesDOMElement;
|
||||||
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user