✏️ Scraping tools updated to support LoCG
This commit is contained in:
@@ -36,8 +36,8 @@ export const getWeeklyPullList = async (url: string) => {
|
||||
|
||||
try {
|
||||
await page.goto(url, {
|
||||
waitUntil: "domcontentloaded", // faster and more reliable for JS-rendered content
|
||||
timeout: 30000, // give it time on Tor or slow networks
|
||||
waitUntil: "domcontentloaded", // Faster and more reliable for JS-rendered content
|
||||
timeout: 30000, // Give it time on Tor or slow networks
|
||||
});
|
||||
|
||||
await page.waitForSelector(".issue", { timeout: 30000 });
|
||||
@@ -96,45 +96,6 @@ export const getWeeklyPullList = async (url: string) => {
|
||||
}
|
||||
};
|
||||
|
||||
// export const scrapeIssuesFromSeriesPage = async (url: string) => {
|
||||
// const response = await axios(url);
|
||||
// const dom = new JSDOM(response.data, {
|
||||
// url,
|
||||
// referrer: url,
|
||||
// contentType: "text/html",
|
||||
// includeNodeLocations: true,
|
||||
// storageQuota: 10000000,
|
||||
// });
|
||||
// const seriesId = dom.window.document
|
||||
// .querySelector("#comic-list-block")
|
||||
// .getAttribute("data-series-id");
|
||||
// const issueNodes = dom.window.document.querySelectorAll(
|
||||
// "ul.comic-list-thumbs > li"
|
||||
// );
|
||||
|
||||
// const issues: any = [];
|
||||
// issueNodes.forEach(node => {
|
||||
// const comicHref = node.querySelector("a").getAttribute("href");
|
||||
// const issueCoverImage = node.querySelector("img").getAttribute("src");
|
||||
// const issueDetails = node.querySelector("img").getAttribute("alt");
|
||||
// const issueDate = node.querySelector("span.date").getAttribute("data-date");
|
||||
// const formattedIssueDate = node.querySelector("span.date").textContent.trim();
|
||||
// const publisher = node.querySelector("div.publisher").textContent.trim();
|
||||
|
||||
// issues.push({
|
||||
// comicHref,
|
||||
// issueCoverImage,
|
||||
// issueDetails,
|
||||
// issueDate,
|
||||
// formattedIssueDate,
|
||||
// publisher,
|
||||
// });
|
||||
// });
|
||||
// return {
|
||||
// seriesId,
|
||||
// issues,
|
||||
// };
|
||||
// };
|
||||
|
||||
export const scrapeIssuePage = async (url: string) => {
|
||||
const response = await axios(url);
|
||||
|
||||
Reference in New Issue
Block a user