🔧 Changed scrape source to TFAW

This commit is contained in:
2025-01-17 09:21:19 -05:00
parent 7749f2db49
commit ab08613689
3 changed files with 44 additions and 19 deletions

View File

@@ -8,6 +8,7 @@ import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils";
import { import {
scrapeIssuesFromSeriesPage, scrapeIssuesFromSeriesPage,
scrapeIssuePage, scrapeIssuePage,
getWeeklyPullList,
} from "../utils/scraping.utils"; } from "../utils/scraping.utils";
const { calculateLimitAndOffset, paginate } = require("paginate-info"); const { calculateLimitAndOffset, paginate } = require("paginate-info");
const { MoleculerError } = require("moleculer").Errors; const { MoleculerError } = require("moleculer").Errors;
@@ -136,22 +137,8 @@ export default class ComicVineService extends Service {
pageSize pageSize
); );
const response = await fetchReleases( const response = await getWeeklyPullList();
new Date(ctx.params.startDate), console.log(JSON.stringify(response, null, 4));
{
publishers: [
"DC Comics",
"Marvel Comics",
"Image Comics",
],
filter: [
FilterTypes.Regular,
FilterTypes.Digital,
FilterTypes.Annual,
],
sort: SortTypes.AlphaAsc,
}
);
const count = response.length; const count = response.length;
const paginatedData = response.slice( const paginatedData = response.slice(

View File

@@ -1,7 +1,7 @@
"use strict"; "use strict";
import { Service, ServiceBroker, Context } from "moleculer";
import axios from "axios"; import axios from "axios";
import { Context, Service, ServiceBroker } from "moleculer";
const METRON_BASE_URL = "https://metron.cloud/api/"; const METRON_BASE_URL = "https://metron.cloud/api/";
@@ -24,7 +24,7 @@ export default class MetronService extends Service {
}; };
}> }>
) => { ) => {
console.log(ctx.params); console.log(ctx.params);
const results = await axios({ const results = await axios({
method: "GET", method: "GET",
url: `https://metron.cloud/api/${ctx.params.resource}`, url: `https://metron.cloud/api/${ctx.params.resource}`,
@@ -32,7 +32,14 @@ export default class MetronService extends Service {
name: ctx.params.query.name, name: ctx.params.query.name,
page: ctx.params.query.page, page: ctx.params.query.page,
}, },
headers: {
"Authorization": "Basic ZnJpc2hpOlRpdHVAMTU4OA=="
},
auth: {
"username": "frishi",
"password": "Titu@1588"
}
}); });
return results.data; return results.data;
}, },

View File

@@ -55,3 +55,34 @@ export const scrapeIssuePage = async (url: string) => {
.querySelector("div.series-pagination > a.series").getAttribute("href"); .querySelector("div.series-pagination > a.series").getAttribute("href");
return seriesDOMElement; return seriesDOMElement;
}; };
export const getWeeklyPullList = async () => {
const url = "https://www.tfaw.com/comics/new-releases.html";
const response = await axios(url);
const dom = new JSDOM(response.data, {
url,
referrer: url,
contentType: "text/html",
includeNodeLocations: true,
storageQuota: 10000000,
});
const pullList: any[] = [];
// Node for the comics container
const issueNodes = dom.window.document.querySelectorAll("ol.products > li");
issueNodes.forEach(node => {
const coverImageUrl = node.querySelector("img.photo").getAttribute("data-src");
const name = node.querySelector("div.product > a.product").textContent.trim();
const publicationDate = node.querySelector("div.product-item-date").textContent.trim();
pullList.push({
coverImageUrl,
name,
publicationDate,
});
});
return pullList;
};