✏️ Scraping tools updated to support LoCG
This commit is contained in:
@@ -176,7 +176,7 @@ export default class ComicVineService extends Service {
|
|||||||
field_list: `${fieldList}`,
|
field_list: `${fieldList}`,
|
||||||
},
|
},
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "application/json",
|
"Accept": "application/json",
|
||||||
"User-Agent": "ThreeTwo",
|
"User-Agent": "ThreeTwo",
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
@@ -279,7 +279,7 @@ export default class ComicVineService extends Service {
|
|||||||
filter: filterString,
|
filter: filterString,
|
||||||
},
|
},
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "application/json",
|
"Accept": "application/json",
|
||||||
"User-Agent": "ThreeTwo",
|
"User-Agent": "ThreeTwo",
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
@@ -335,7 +335,7 @@ export default class ComicVineService extends Service {
|
|||||||
rest: "POST /getComicVineMatchScores",
|
rest: "POST /getComicVineMatchScores",
|
||||||
handler: async (
|
handler: async (
|
||||||
ctx: Context<{
|
ctx: Context<{
|
||||||
finalMatches: Array<any>;
|
finalMatches: any[];
|
||||||
rawFileDetails: any;
|
rawFileDetails: any;
|
||||||
scorerConfiguration: any;
|
scorerConfiguration: any;
|
||||||
}>
|
}>
|
||||||
@@ -372,7 +372,7 @@ export default class ComicVineService extends Service {
|
|||||||
resources: "volumes",
|
resources: "volumes",
|
||||||
},
|
},
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "application/json",
|
"Accept": "application/json",
|
||||||
"User-Agent": "ThreeTwo",
|
"User-Agent": "ThreeTwo",
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
@@ -391,7 +391,7 @@ export default class ComicVineService extends Service {
|
|||||||
format: "json",
|
format: "json",
|
||||||
},
|
},
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "application/json",
|
"Accept": "application/json",
|
||||||
"User-Agent":
|
"User-Agent":
|
||||||
"ThreeTwo",
|
"ThreeTwo",
|
||||||
},
|
},
|
||||||
@@ -483,7 +483,7 @@ export default class ComicVineService extends Service {
|
|||||||
limit: 100,
|
limit: 100,
|
||||||
},
|
},
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "application/json",
|
"Accept": "application/json",
|
||||||
"User-Agent": "ThreeTwo",
|
"User-Agent": "ThreeTwo",
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
@@ -498,7 +498,7 @@ export default class ComicVineService extends Service {
|
|||||||
: null; // Extract the year from cover_date
|
: null; // Extract the year from cover_date
|
||||||
return {
|
return {
|
||||||
...issue,
|
...issue,
|
||||||
year: year,
|
year,
|
||||||
description: issue.description || "",
|
description: issue.description || "",
|
||||||
image: issue.image || {},
|
image: issue.image || {},
|
||||||
};
|
};
|
||||||
@@ -538,7 +538,7 @@ export default class ComicVineService extends Service {
|
|||||||
resources,
|
resources,
|
||||||
},
|
},
|
||||||
headers: {
|
headers: {
|
||||||
Accept: "application/json",
|
"Accept": "application/json",
|
||||||
"User-Agent": "ThreeTwo",
|
"User-Agent": "ThreeTwo",
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -36,8 +36,8 @@ export const getWeeklyPullList = async (url: string) => {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
await page.goto(url, {
|
await page.goto(url, {
|
||||||
waitUntil: "domcontentloaded", // faster and more reliable for JS-rendered content
|
waitUntil: "domcontentloaded", // Faster and more reliable for JS-rendered content
|
||||||
timeout: 30000, // give it time on Tor or slow networks
|
timeout: 30000, // Give it time on Tor or slow networks
|
||||||
});
|
});
|
||||||
|
|
||||||
await page.waitForSelector(".issue", { timeout: 30000 });
|
await page.waitForSelector(".issue", { timeout: 30000 });
|
||||||
@@ -96,45 +96,6 @@ export const getWeeklyPullList = async (url: string) => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// export const scrapeIssuesFromSeriesPage = async (url: string) => {
|
|
||||||
// const response = await axios(url);
|
|
||||||
// const dom = new JSDOM(response.data, {
|
|
||||||
// url,
|
|
||||||
// referrer: url,
|
|
||||||
// contentType: "text/html",
|
|
||||||
// includeNodeLocations: true,
|
|
||||||
// storageQuota: 10000000,
|
|
||||||
// });
|
|
||||||
// const seriesId = dom.window.document
|
|
||||||
// .querySelector("#comic-list-block")
|
|
||||||
// .getAttribute("data-series-id");
|
|
||||||
// const issueNodes = dom.window.document.querySelectorAll(
|
|
||||||
// "ul.comic-list-thumbs > li"
|
|
||||||
// );
|
|
||||||
|
|
||||||
// const issues: any = [];
|
|
||||||
// issueNodes.forEach(node => {
|
|
||||||
// const comicHref = node.querySelector("a").getAttribute("href");
|
|
||||||
// const issueCoverImage = node.querySelector("img").getAttribute("src");
|
|
||||||
// const issueDetails = node.querySelector("img").getAttribute("alt");
|
|
||||||
// const issueDate = node.querySelector("span.date").getAttribute("data-date");
|
|
||||||
// const formattedIssueDate = node.querySelector("span.date").textContent.trim();
|
|
||||||
// const publisher = node.querySelector("div.publisher").textContent.trim();
|
|
||||||
|
|
||||||
// issues.push({
|
|
||||||
// comicHref,
|
|
||||||
// issueCoverImage,
|
|
||||||
// issueDetails,
|
|
||||||
// issueDate,
|
|
||||||
// formattedIssueDate,
|
|
||||||
// publisher,
|
|
||||||
// });
|
|
||||||
// });
|
|
||||||
// return {
|
|
||||||
// seriesId,
|
|
||||||
// issues,
|
|
||||||
// };
|
|
||||||
// };
|
|
||||||
|
|
||||||
export const scrapeIssuePage = async (url: string) => {
|
export const scrapeIssuePage = async (url: string) => {
|
||||||
const response = await axios(url);
|
const response = await axios(url);
|
||||||
|
|||||||
Reference in New Issue
Block a user