✏️ Scraping tools updated to support LoCG

This commit is contained in:
2025-07-14 12:10:27 -04:00
parent 8d5283402e
commit cad3326417
2 changed files with 10 additions and 49 deletions

View File

@@ -176,7 +176,7 @@ export default class ComicVineService extends Service {
field_list: `${fieldList}`,
},
headers: {
Accept: "application/json",
"Accept": "application/json",
"User-Agent": "ThreeTwo",
},
});
@@ -279,7 +279,7 @@ export default class ComicVineService extends Service {
filter: filterString,
},
headers: {
Accept: "application/json",
"Accept": "application/json",
"User-Agent": "ThreeTwo",
},
});
@@ -335,7 +335,7 @@ export default class ComicVineService extends Service {
rest: "POST /getComicVineMatchScores",
handler: async (
ctx: Context<{
finalMatches: Array<any>;
finalMatches: any[];
rawFileDetails: any;
scorerConfiguration: any;
}>
@@ -372,7 +372,7 @@ export default class ComicVineService extends Service {
resources: "volumes",
},
headers: {
Accept: "application/json",
"Accept": "application/json",
"User-Agent": "ThreeTwo",
},
});
@@ -391,7 +391,7 @@ export default class ComicVineService extends Service {
format: "json",
},
headers: {
Accept: "application/json",
"Accept": "application/json",
"User-Agent":
"ThreeTwo",
},
@@ -483,7 +483,7 @@ export default class ComicVineService extends Service {
limit: 100,
},
headers: {
Accept: "application/json",
"Accept": "application/json",
"User-Agent": "ThreeTwo",
},
});
@@ -498,7 +498,7 @@ export default class ComicVineService extends Service {
: null; // Extract the year from cover_date
return {
...issue,
year: year,
year,
description: issue.description || "",
image: issue.image || {},
};
@@ -538,7 +538,7 @@ export default class ComicVineService extends Service {
resources,
},
headers: {
Accept: "application/json",
"Accept": "application/json",
"User-Agent": "ThreeTwo",
},
});

View File

@@ -36,8 +36,8 @@ export const getWeeklyPullList = async (url: string) => {
try {
await page.goto(url, {
waitUntil: "domcontentloaded", // faster and more reliable for JS-rendered content
timeout: 30000, // give it time on Tor or slow networks
waitUntil: "domcontentloaded", // Faster and more reliable for JS-rendered content
timeout: 30000, // Give it time on Tor or slow networks
});
await page.waitForSelector(".issue", { timeout: 30000 });
@@ -96,45 +96,6 @@ export const getWeeklyPullList = async (url: string) => {
}
};
// export const scrapeIssuesFromSeriesPage = async (url: string) => {
// const response = await axios(url);
// const dom = new JSDOM(response.data, {
// url,
// referrer: url,
// contentType: "text/html",
// includeNodeLocations: true,
// storageQuota: 10000000,
// });
// const seriesId = dom.window.document
// .querySelector("#comic-list-block")
// .getAttribute("data-series-id");
// const issueNodes = dom.window.document.querySelectorAll(
// "ul.comic-list-thumbs > li"
// );
// const issues: any = [];
// issueNodes.forEach(node => {
// const comicHref = node.querySelector("a").getAttribute("href");
// const issueCoverImage = node.querySelector("img").getAttribute("src");
// const issueDetails = node.querySelector("img").getAttribute("alt");
// const issueDate = node.querySelector("span.date").getAttribute("data-date");
// const formattedIssueDate = node.querySelector("span.date").textContent.trim();
// const publisher = node.querySelector("div.publisher").textContent.trim();
// issues.push({
// comicHref,
// issueCoverImage,
// issueDetails,
// issueDate,
// formattedIssueDate,
// publisher,
// });
// });
// return {
// seriesId,
// issues,
// };
// };
export const scrapeIssuePage = async (url: string) => {
const response = await axios(url);