🔧 Added a LOCG scraper endpoint for pull lists
This commit is contained in:
2128
package-lock.json
generated
2128
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -20,6 +20,7 @@
|
|||||||
],
|
],
|
||||||
"author": "",
|
"author": "",
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@types/jsdom": "^16.2.14",
|
||||||
"@types/lodash": "^4.14.171",
|
"@types/lodash": "^4.14.171",
|
||||||
"@types/string-similarity": "^4.0.0",
|
"@types/string-similarity": "^4.0.0",
|
||||||
"@typescript-eslint/eslint-plugin": "^2.26.0",
|
"@typescript-eslint/eslint-plugin": "^2.26.0",
|
||||||
@@ -40,16 +41,20 @@
|
|||||||
"@types/mkdirp": "^1.0.0",
|
"@types/mkdirp": "^1.0.0",
|
||||||
"@types/node": "^13.9.8",
|
"@types/node": "^13.9.8",
|
||||||
"axios": "^0.21.1",
|
"axios": "^0.21.1",
|
||||||
|
"comicgeeks": "^1.1.0",
|
||||||
"date-fns": "^2.27.0",
|
"date-fns": "^2.27.0",
|
||||||
"delay": "^5.0.0",
|
"delay": "^5.0.0",
|
||||||
"dotenv": "^10.0.0",
|
"dotenv": "^10.0.0",
|
||||||
|
"got": "^12.0.1",
|
||||||
"imghash": "^0.0.9",
|
"imghash": "^0.0.9",
|
||||||
"ioredis": "^4.28.1",
|
"ioredis": "^4.28.1",
|
||||||
|
"jsdom": "^19.0.0",
|
||||||
"leven": "^3.1.0",
|
"leven": "^3.1.0",
|
||||||
"lodash": "^4.17.21",
|
"lodash": "^4.17.21",
|
||||||
"moleculer": "^0.14.19",
|
"moleculer": "^0.14.19",
|
||||||
"moleculer-web": "^0.10.4",
|
"moleculer-web": "^0.10.4",
|
||||||
"nats": "^1.3.2",
|
"nats": "^1.3.2",
|
||||||
|
"paginate-info": "^1.0.4",
|
||||||
"query-string": "^7.0.1",
|
"query-string": "^7.0.1",
|
||||||
"string-similarity": "^4.0.4",
|
"string-similarity": "^4.0.4",
|
||||||
"typescript": "^3.8.3"
|
"typescript": "^3.8.3"
|
||||||
|
|||||||
@@ -4,8 +4,10 @@ import { Service, ServiceBroker, Context } from "moleculer";
|
|||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import delay from "delay";
|
import delay from "delay";
|
||||||
import { isNil, isUndefined } from "lodash";
|
import { isNil, isUndefined } from "lodash";
|
||||||
|
import { fetchReleases, FilterTypes, SortTypes } from "comicgeeks";
|
||||||
import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils";
|
import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils";
|
||||||
|
import { scrapeIssuesFromSeriesPage } from "../utils/scraping.utils";
|
||||||
|
const { calculateLimitAndOffset, paginate } = require("paginate-info");
|
||||||
|
|
||||||
const CV_BASE_URL = "https://comicvine.gamespot.com/api/";
|
const CV_BASE_URL = "https://comicvine.gamespot.com/api/";
|
||||||
console.log("ComicVine API Key: ", process.env.COMICVINE_API_KEY);
|
console.log("ComicVine API Key: ", process.env.COMICVINE_API_KEY);
|
||||||
@@ -103,6 +105,13 @@ export default class ComicVineService extends Service {
|
|||||||
return Promise.all(issuesPromises);
|
return Promise.all(issuesPromises);
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
scrapeLOCGForSeries: {
|
||||||
|
rest: "POST/scrapeLOCGForSeries",
|
||||||
|
params: {},
|
||||||
|
handler: async (ctx: Context<{}>) => {
|
||||||
|
return await scrapeIssuesFromSeriesPage("https://leagueofcomicgeeks.com/comics/series/151629/king-spawn");
|
||||||
|
},
|
||||||
|
},
|
||||||
getWeeklyPullList: {
|
getWeeklyPullList: {
|
||||||
rest: "GET /getWeeklyPullList",
|
rest: "GET /getWeeklyPullList",
|
||||||
params: {},
|
params: {},
|
||||||
@@ -110,26 +119,40 @@ export default class ComicVineService extends Service {
|
|||||||
handler: async (
|
handler: async (
|
||||||
ctx: Context<{
|
ctx: Context<{
|
||||||
startDate: string;
|
startDate: string;
|
||||||
endDate: string;
|
currentPage: string;
|
||||||
|
pageSize: string;
|
||||||
}>
|
}>
|
||||||
) => {
|
) => {
|
||||||
const dateFilter = `store_date: ${ctx.params.startDate} | ${ctx.params.endDate}`;
|
const { currentPage, pageSize } = ctx.params;
|
||||||
console.log(dateFilter);
|
const { limit, offset } = calculateLimitAndOffset(
|
||||||
|
currentPage,
|
||||||
|
pageSize
|
||||||
|
);
|
||||||
|
|
||||||
// Get issues for that date
|
const response = await fetchReleases(
|
||||||
const result = await axios({
|
new Date(ctx.params.startDate),
|
||||||
url: `https://comicvine.gamespot.com/api/issues?api_key=${process.env.COMICVINE_API_KEY}`,
|
{
|
||||||
method: "get",
|
publishers: ["DC Comics", "Marvel Comics", "Image Comics"],
|
||||||
params: {
|
filter: [
|
||||||
resources: "issues",
|
FilterTypes.Regular,
|
||||||
limit: "5",
|
FilterTypes.Digital,
|
||||||
format: "json",
|
FilterTypes.Annual,
|
||||||
filter: dateFilter,
|
],
|
||||||
},
|
sort: SortTypes.AlphaAsc,
|
||||||
headers: { "User-Agent": "ThreeTwo" },
|
}
|
||||||
});
|
);
|
||||||
|
|
||||||
return result.data;
|
const count = response.length;
|
||||||
|
const paginatedData = response.slice(
|
||||||
|
offset,
|
||||||
|
offset + limit
|
||||||
|
);
|
||||||
|
const paginationInfo = paginate(
|
||||||
|
currentPage,
|
||||||
|
count,
|
||||||
|
paginatedData
|
||||||
|
);
|
||||||
|
return { result: paginatedData, meta: paginationInfo };
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
volumeBasedSearch: {
|
volumeBasedSearch: {
|
||||||
|
|||||||
43
utils/scraping.utils.ts
Normal file
43
utils/scraping.utils.ts
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
import jsdom from "jsdom";
|
||||||
|
import axios from "axios";
|
||||||
|
const { JSDOM } = jsdom;
|
||||||
|
|
||||||
|
export const scrapeIssuesFromSeriesPage = async (url: string) => {
|
||||||
|
const response = await axios(url);
|
||||||
|
const dom = new JSDOM(response.data, {
|
||||||
|
url,
|
||||||
|
referrer: url,
|
||||||
|
contentType: "text/html",
|
||||||
|
includeNodeLocations: true,
|
||||||
|
storageQuota: 10000000,
|
||||||
|
});
|
||||||
|
const seriesId = dom.window.document
|
||||||
|
.querySelector("#comic-list-block")
|
||||||
|
.getAttribute("data-series-id");
|
||||||
|
const issueNodes = dom.window.document.querySelectorAll(
|
||||||
|
"ul.comic-list-thumbs > li"
|
||||||
|
);
|
||||||
|
|
||||||
|
const issues: any = [];
|
||||||
|
issueNodes.forEach(node => {
|
||||||
|
const comicHref = node.querySelector("a").getAttribute("href");
|
||||||
|
const issueCoverImage = node.querySelector("img").getAttribute("src");
|
||||||
|
const issueDetails = node.querySelector("img").getAttribute("alt");
|
||||||
|
const issueDate = node.querySelector("span.date").getAttribute("data-date");
|
||||||
|
const formattedIssueDate = node.querySelector("span.date").textContent.trim();
|
||||||
|
const publisher = node.querySelector("div.publisher").textContent.trim();
|
||||||
|
|
||||||
|
issues.push({
|
||||||
|
comicHref,
|
||||||
|
issueCoverImage,
|
||||||
|
issueDetails,
|
||||||
|
issueDate,
|
||||||
|
formattedIssueDate,
|
||||||
|
publisher,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
seriesId,
|
||||||
|
issues,
|
||||||
|
};
|
||||||
|
};
|
||||||
Reference in New Issue
Block a user