🔧 Added a LOCG scraper endpoint for pull lists

This commit is contained in:
2022-02-16 20:26:25 -08:00
parent d2e5a7664d
commit b476ad77be
4 changed files with 1844 additions and 389 deletions

2128
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -20,6 +20,7 @@
], ],
"author": "", "author": "",
"devDependencies": { "devDependencies": {
"@types/jsdom": "^16.2.14",
"@types/lodash": "^4.14.171", "@types/lodash": "^4.14.171",
"@types/string-similarity": "^4.0.0", "@types/string-similarity": "^4.0.0",
"@typescript-eslint/eslint-plugin": "^2.26.0", "@typescript-eslint/eslint-plugin": "^2.26.0",
@@ -40,16 +41,20 @@
"@types/mkdirp": "^1.0.0", "@types/mkdirp": "^1.0.0",
"@types/node": "^13.9.8", "@types/node": "^13.9.8",
"axios": "^0.21.1", "axios": "^0.21.1",
"comicgeeks": "^1.1.0",
"date-fns": "^2.27.0", "date-fns": "^2.27.0",
"delay": "^5.0.0", "delay": "^5.0.0",
"dotenv": "^10.0.0", "dotenv": "^10.0.0",
"got": "^12.0.1",
"imghash": "^0.0.9", "imghash": "^0.0.9",
"ioredis": "^4.28.1", "ioredis": "^4.28.1",
"jsdom": "^19.0.0",
"leven": "^3.1.0", "leven": "^3.1.0",
"lodash": "^4.17.21", "lodash": "^4.17.21",
"moleculer": "^0.14.19", "moleculer": "^0.14.19",
"moleculer-web": "^0.10.4", "moleculer-web": "^0.10.4",
"nats": "^1.3.2", "nats": "^1.3.2",
"paginate-info": "^1.0.4",
"query-string": "^7.0.1", "query-string": "^7.0.1",
"string-similarity": "^4.0.4", "string-similarity": "^4.0.4",
"typescript": "^3.8.3" "typescript": "^3.8.3"

View File

@@ -4,8 +4,10 @@ import { Service, ServiceBroker, Context } from "moleculer";
import axios from "axios"; import axios from "axios";
import delay from "delay"; import delay from "delay";
import { isNil, isUndefined } from "lodash"; import { isNil, isUndefined } from "lodash";
import { fetchReleases, FilterTypes, SortTypes } from "comicgeeks";
import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils"; import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils";
import { scrapeIssuesFromSeriesPage } from "../utils/scraping.utils";
const { calculateLimitAndOffset, paginate } = require("paginate-info");
const CV_BASE_URL = "https://comicvine.gamespot.com/api/"; const CV_BASE_URL = "https://comicvine.gamespot.com/api/";
console.log("ComicVine API Key: ", process.env.COMICVINE_API_KEY); console.log("ComicVine API Key: ", process.env.COMICVINE_API_KEY);
@@ -103,6 +105,13 @@ export default class ComicVineService extends Service {
return Promise.all(issuesPromises); return Promise.all(issuesPromises);
}, },
}, },
scrapeLOCGForSeries: {
rest: "POST/scrapeLOCGForSeries",
params: {},
handler: async (ctx: Context<{}>) => {
return await scrapeIssuesFromSeriesPage("https://leagueofcomicgeeks.com/comics/series/151629/king-spawn");
},
},
getWeeklyPullList: { getWeeklyPullList: {
rest: "GET /getWeeklyPullList", rest: "GET /getWeeklyPullList",
params: {}, params: {},
@@ -110,26 +119,40 @@ export default class ComicVineService extends Service {
handler: async ( handler: async (
ctx: Context<{ ctx: Context<{
startDate: string; startDate: string;
endDate: string; currentPage: string;
pageSize: string;
}> }>
) => { ) => {
const dateFilter = `store_date: ${ctx.params.startDate} | ${ctx.params.endDate}`; const { currentPage, pageSize } = ctx.params;
console.log(dateFilter); const { limit, offset } = calculateLimitAndOffset(
currentPage,
pageSize
);
// Get issues for that date const response = await fetchReleases(
const result = await axios({ new Date(ctx.params.startDate),
url: `https://comicvine.gamespot.com/api/issues?api_key=${process.env.COMICVINE_API_KEY}`, {
method: "get", publishers: ["DC Comics", "Marvel Comics", "Image Comics"],
params: { filter: [
resources: "issues", FilterTypes.Regular,
limit: "5", FilterTypes.Digital,
format: "json", FilterTypes.Annual,
filter: dateFilter, ],
}, sort: SortTypes.AlphaAsc,
headers: { "User-Agent": "ThreeTwo" }, }
}); );
return result.data; const count = response.length;
const paginatedData = response.slice(
offset,
offset + limit
);
const paginationInfo = paginate(
currentPage,
count,
paginatedData
);
return { result: paginatedData, meta: paginationInfo };
}, },
}, },
volumeBasedSearch: { volumeBasedSearch: {

43
utils/scraping.utils.ts Normal file
View File

@@ -0,0 +1,43 @@
import jsdom from "jsdom";
import axios from "axios";
const { JSDOM } = jsdom;
export const scrapeIssuesFromSeriesPage = async (url: string) => {
const response = await axios(url);
const dom = new JSDOM(response.data, {
url,
referrer: url,
contentType: "text/html",
includeNodeLocations: true,
storageQuota: 10000000,
});
const seriesId = dom.window.document
.querySelector("#comic-list-block")
.getAttribute("data-series-id");
const issueNodes = dom.window.document.querySelectorAll(
"ul.comic-list-thumbs > li"
);
const issues: any = [];
issueNodes.forEach(node => {
const comicHref = node.querySelector("a").getAttribute("href");
const issueCoverImage = node.querySelector("img").getAttribute("src");
const issueDetails = node.querySelector("img").getAttribute("alt");
const issueDate = node.querySelector("span.date").getAttribute("data-date");
const formattedIssueDate = node.querySelector("span.date").textContent.trim();
const publisher = node.querySelector("div.publisher").textContent.trim();
issues.push({
comicHref,
issueCoverImage,
issueDetails,
issueDate,
formattedIssueDate,
publisher,
});
});
return {
seriesId,
issues,
};
};