🔧 Changed scrape source to TFAW
This commit is contained in:
@@ -8,6 +8,7 @@ import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils";
|
|||||||
import {
|
import {
|
||||||
scrapeIssuesFromSeriesPage,
|
scrapeIssuesFromSeriesPage,
|
||||||
scrapeIssuePage,
|
scrapeIssuePage,
|
||||||
|
getWeeklyPullList,
|
||||||
} from "../utils/scraping.utils";
|
} from "../utils/scraping.utils";
|
||||||
const { calculateLimitAndOffset, paginate } = require("paginate-info");
|
const { calculateLimitAndOffset, paginate } = require("paginate-info");
|
||||||
const { MoleculerError } = require("moleculer").Errors;
|
const { MoleculerError } = require("moleculer").Errors;
|
||||||
@@ -136,22 +137,8 @@ export default class ComicVineService extends Service {
|
|||||||
pageSize
|
pageSize
|
||||||
);
|
);
|
||||||
|
|
||||||
const response = await fetchReleases(
|
const response = await getWeeklyPullList();
|
||||||
new Date(ctx.params.startDate),
|
console.log(JSON.stringify(response, null, 4));
|
||||||
{
|
|
||||||
publishers: [
|
|
||||||
"DC Comics",
|
|
||||||
"Marvel Comics",
|
|
||||||
"Image Comics",
|
|
||||||
],
|
|
||||||
filter: [
|
|
||||||
FilterTypes.Regular,
|
|
||||||
FilterTypes.Digital,
|
|
||||||
FilterTypes.Annual,
|
|
||||||
],
|
|
||||||
sort: SortTypes.AlphaAsc,
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
const count = response.length;
|
const count = response.length;
|
||||||
const paginatedData = response.slice(
|
const paginatedData = response.slice(
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
import { Service, ServiceBroker, Context } from "moleculer";
|
|
||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
|
import { Context, Service, ServiceBroker } from "moleculer";
|
||||||
|
|
||||||
const METRON_BASE_URL = "https://metron.cloud/api/";
|
const METRON_BASE_URL = "https://metron.cloud/api/";
|
||||||
|
|
||||||
@@ -24,7 +24,7 @@ export default class MetronService extends Service {
|
|||||||
};
|
};
|
||||||
}>
|
}>
|
||||||
) => {
|
) => {
|
||||||
console.log(ctx.params);
|
console.log(ctx.params);
|
||||||
const results = await axios({
|
const results = await axios({
|
||||||
method: "GET",
|
method: "GET",
|
||||||
url: `https://metron.cloud/api/${ctx.params.resource}`,
|
url: `https://metron.cloud/api/${ctx.params.resource}`,
|
||||||
@@ -32,7 +32,14 @@ export default class MetronService extends Service {
|
|||||||
name: ctx.params.query.name,
|
name: ctx.params.query.name,
|
||||||
page: ctx.params.query.page,
|
page: ctx.params.query.page,
|
||||||
},
|
},
|
||||||
|
headers: {
|
||||||
|
"Authorization": "Basic ZnJpc2hpOlRpdHVAMTU4OA=="
|
||||||
|
},
|
||||||
|
auth: {
|
||||||
|
"username": "frishi",
|
||||||
|
"password": "Titu@1588"
|
||||||
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
return results.data;
|
return results.data;
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -55,3 +55,34 @@ export const scrapeIssuePage = async (url: string) => {
|
|||||||
.querySelector("div.series-pagination > a.series").getAttribute("href");
|
.querySelector("div.series-pagination > a.series").getAttribute("href");
|
||||||
return seriesDOMElement;
|
return seriesDOMElement;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
export const getWeeklyPullList = async () => {
|
||||||
|
const url = "https://www.tfaw.com/comics/new-releases.html";
|
||||||
|
const response = await axios(url);
|
||||||
|
const dom = new JSDOM(response.data, {
|
||||||
|
url,
|
||||||
|
referrer: url,
|
||||||
|
contentType: "text/html",
|
||||||
|
includeNodeLocations: true,
|
||||||
|
storageQuota: 10000000,
|
||||||
|
});
|
||||||
|
|
||||||
|
const pullList: any[] = [];
|
||||||
|
// Node for the comics container
|
||||||
|
const issueNodes = dom.window.document.querySelectorAll("ol.products > li");
|
||||||
|
|
||||||
|
issueNodes.forEach(node => {
|
||||||
|
const coverImageUrl = node.querySelector("img.photo").getAttribute("data-src");
|
||||||
|
const name = node.querySelector("div.product > a.product").textContent.trim();
|
||||||
|
const publicationDate = node.querySelector("div.product-item-date").textContent.trim();
|
||||||
|
pullList.push({
|
||||||
|
coverImageUrl,
|
||||||
|
name,
|
||||||
|
publicationDate,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return pullList;
|
||||||
|
|
||||||
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user