From ab08613689a8852515f3193bee49907aa58f81a4 Mon Sep 17 00:00:00 2001 From: Rishi Ghan Date: Fri, 17 Jan 2025 09:21:19 -0500 Subject: [PATCH] =?UTF-8?q?=C2=A0=F0=9F=94=A7=20Changed=20scrape=20source?= =?UTF-8?q?=20to=20TFAW?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- services/comicvine.service.ts | 19 +++---------------- services/metron.service.ts | 13 ++++++++++--- utils/scraping.utils.ts | 31 +++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/services/comicvine.service.ts b/services/comicvine.service.ts index ca7a9d5..09db55c 100644 --- a/services/comicvine.service.ts +++ b/services/comicvine.service.ts @@ -8,6 +8,7 @@ import { matchScorer, rankVolumes } from "../utils/searchmatchscorer.utils"; import { scrapeIssuesFromSeriesPage, scrapeIssuePage, + getWeeklyPullList, } from "../utils/scraping.utils"; const { calculateLimitAndOffset, paginate } = require("paginate-info"); const { MoleculerError } = require("moleculer").Errors; @@ -136,22 +137,8 @@ export default class ComicVineService extends Service { pageSize ); - const response = await fetchReleases( - new Date(ctx.params.startDate), - { - publishers: [ - "DC Comics", - "Marvel Comics", - "Image Comics", - ], - filter: [ - FilterTypes.Regular, - FilterTypes.Digital, - FilterTypes.Annual, - ], - sort: SortTypes.AlphaAsc, - } - ); + const response = await getWeeklyPullList(); + console.log(JSON.stringify(response, null, 4)); const count = response.length; const paginatedData = response.slice( diff --git a/services/metron.service.ts b/services/metron.service.ts index 2476206..ec53b50 100644 --- a/services/metron.service.ts +++ b/services/metron.service.ts @@ -1,7 +1,7 @@ "use strict"; -import { Service, ServiceBroker, Context } from "moleculer"; import axios from "axios"; +import { Context, Service, ServiceBroker } from "moleculer"; const METRON_BASE_URL = "https://metron.cloud/api/"; @@ -24,7 +24,7 @@ export default class MetronService extends Service { }; }> ) => { - console.log(ctx.params); + console.log(ctx.params); const results = await axios({ method: "GET", url: `https://metron.cloud/api/${ctx.params.resource}`, @@ -32,7 +32,14 @@ export default class MetronService extends Service { name: ctx.params.query.name, page: ctx.params.query.page, }, - + headers: { + "Authorization": "Basic ZnJpc2hpOlRpdHVAMTU4OA==" + }, + auth: { + "username": "frishi", + "password": "Titu@1588" + } + }); return results.data; }, diff --git a/utils/scraping.utils.ts b/utils/scraping.utils.ts index 51246c6..a244168 100644 --- a/utils/scraping.utils.ts +++ b/utils/scraping.utils.ts @@ -55,3 +55,34 @@ export const scrapeIssuePage = async (url: string) => { .querySelector("div.series-pagination > a.series").getAttribute("href"); return seriesDOMElement; }; + + +export const getWeeklyPullList = async () => { + const url = "https://www.tfaw.com/comics/new-releases.html"; + const response = await axios(url); + const dom = new JSDOM(response.data, { + url, + referrer: url, + contentType: "text/html", + includeNodeLocations: true, + storageQuota: 10000000, + }); + + const pullList: any[] = []; + // Node for the comics container + const issueNodes = dom.window.document.querySelectorAll("ol.products > li"); + + issueNodes.forEach(node => { + const coverImageUrl = node.querySelector("img.photo").getAttribute("data-src"); + const name = node.querySelector("div.product > a.product").textContent.trim(); + const publicationDate = node.querySelector("div.product-item-date").textContent.trim(); + pullList.push({ + coverImageUrl, + name, + publicationDate, + }); + }); + + return pullList; + +};