From 266464c0e31d20705098a5f11d299656b391d007 Mon Sep 17 00:00:00 2001 From: Wiktor Date: Tue, 20 May 2025 18:09:48 +0200 Subject: [PATCH] =?UTF-8?q?Dodanie=20skryptu=20konwertera,=20przygotowanie?= =?UTF-8?q?=20do=20kategoryzacji=20list=20i=20dodanie=20pliku=20=C5=BAr?= =?UTF-8?q?=C3=B3d=C5=82owego=20JSON=20(#1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Wiktor <> Co-authored-by: Kuba Orlik Reviewed-on: https://git.internet-czas-dzialac.pl/icd/blocklisty/pulls/1 Reviewed-by: Kuba Orlik --- Makefile | 12 ++ dist/seo-nonsense/adguard.txt | 17 ++ .../seo-nonsense/ublacklist.txt | 0 src/converter.py | 152 ++++++++++++++++++ src/seo-source.json | 125 ++++++++++++++ 5 files changed, 306 insertions(+) create mode 100644 Makefile create mode 100644 dist/seo-nonsense/adguard.txt rename bełkot-seo-ublacklist.txt => dist/seo-nonsense/ublacklist.txt (100%) create mode 100644 src/converter.py create mode 100644 src/seo-source.json diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d934435 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +CONVERTER = python3 src/converter.py + +dist/seo-nonsense/adguard.txt:: src/seo-source.json + $(CONVERTER) --inputfile src/seo-source.json --targetformat adguard --outputfile dist/seo-nonsense/adguard.txt + +build: dist/seo-nonsense/adguard.txt + + +clean: + rm -rf dist/* + +all: build diff --git a/dist/seo-nonsense/adguard.txt b/dist/seo-nonsense/adguard.txt new file mode 100644 index 0000000..58f962f --- /dev/null +++ b/dist/seo-nonsense/adguard.txt @@ -0,0 +1,17 @@ +! Blocking list automatically generated at 2025-05-19 18:59:52 UTC+0000 +! Created with ❤️ by internet-czas-dzialac.pl +||forsal.pl^ +||wiadomosci.dziennik.pl^ +||biznes.wprost.pl^ +||legaartis.pl^ +||superbiz.se.pl^ +||pomorska.pl^ +||dziendobry.tvn.pl^ +||infor.pl^ +||wspanialakobieta.pl^ +||www.fakt.pl^ +||strefabiznesu.pl^ +||gospodarka.dziennik.pl^ +||ekopralnie.pl^ +||alepranie.com.pl^ +||aboutdecor.pl^ \ No newline at end of file diff --git a/bełkot-seo-ublacklist.txt b/dist/seo-nonsense/ublacklist.txt similarity index 100% rename from bełkot-seo-ublacklist.txt rename to dist/seo-nonsense/ublacklist.txt diff --git a/src/converter.py b/src/converter.py new file mode 100644 index 0000000..e10fb0c --- /dev/null +++ b/src/converter.py @@ -0,0 +1,152 @@ +# ruff: noqa: D100, D101, D102, D103, G004, TRY400, PTH123, RUF012 +import argparse +import json +import logging +import os +from datetime import datetime, timezone +from math import ceil +from os import getenv +from time import perf_counter + + +# Configure logging with color formatting +class CustomFormatter(logging.Formatter): + GREY: str = "\x1b[38;20m" + YELLOW: str = "\x1b[33;20m" + RED: str = "\x1b[31;20m" + BOLD_RED: str = "\x1b[31;1m" + RESET: str = "\x1b[0m" + + COLOR_MAP: dict[int, str] = { + logging.DEBUG: GREY, + logging.INFO: GREY, + logging.WARNING: YELLOW, + logging.ERROR: RED, + logging.CRITICAL: BOLD_RED, + } + + def format(self, record: logging.LogRecord) -> str: + color: str = self.COLOR_MAP.get(record.levelno, self.GREY) + return f"{color}[{record.levelname}] {record.getMessage()}{self.RESET}" + + +# Set up logging +logger: logging.Logger = logging.getLogger(__name__) +logger.setLevel(getenv("LOGLEVEL", "INFO").upper()) +color_handler: logging.StreamHandler = logging.StreamHandler() +color_handler.setFormatter(CustomFormatter()) +logger.addHandler(color_handler) + + +def parse_arguments() -> argparse.Namespace: + parser: argparse.ArgumentParser = argparse.ArgumentParser( + description="Convert our JSON blocking list to popular blocking list formats", + ) + parser.add_argument( + "--inputfile", + required=True, + metavar="INPUT_FILE", + help="Path to the input JSON file containing the blocking list", + ) + parser.add_argument( + "--targetformat", + required=True, + metavar="TARGET_FORMAT", + help="Target output format (e.g., adguard)", + ) + parser.add_argument( + "--outputfile", + required=True, + metavar="OUTPUT_FILE", + help="Path to the output file", + ) + return parser.parse_args() + + +def load_data(filename: str) -> dict: + with open(filename) as file: + return json.load(file) + + +class UnsupportedTargetFormatError(Exception): + pass + + +def convert(data: dict, last_modified: datetime, target_format: str) -> str: + match target_format: + case "adguard": + return adguard_conversion(last_modified, data) + case _: + raise UnsupportedTargetFormatError + + +def adguard_conversion(last_modified: datetime, data: dict) -> list[str]: + header_lines: list[str] = [ + f"! Blocking list automatically generated at {last_modified.strftime('%Y-%m-%d %H:%M:%S %Z%z')}", + "! Created with ❤️ by internet-czas-dzialac.pl", + ] + + output: list[str] = list(header_lines) + + for entry in data["domains"]: + fqdn = entry["fqdn"] + if entry.get("exclude", False): + continue + output.append(f"||{fqdn}^") + + return "\n".join(output) + + +def dump_output(data: str, output_file: str) -> None: + try: + directory = os.path.dirname(output_file) + os.makedirs(directory, exist_ok=True) + except OSError as e: + logger.critical(f"Error creating directory: {e}") + with open(output_file, "w") as file: + file.write(data) + + +def get_last_modified_datetime(file_path: str) -> datetime: + timestamp: float = os.path.getmtime(file_path) + return datetime.fromtimestamp(timestamp, tz=timezone.utc) + + +def main() -> None: + # Start measuring time + start_time: float = perf_counter() + + # Parse arguments + args: argparse.Namespace = parse_arguments() + + # Load data + try: + data: dict = load_data(args.inputfile) + last_modified: datetime = get_last_modified_datetime(args.inputfile) + except FileNotFoundError: + logger.error(f"File {args.inputfile} not found!") + return + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON: {e}") + return + except Exception as e: + logger.critical(f"Unexpected error occurred: {e}") + return + + # Convert + try: + output = convert(data, last_modified, args.targetformat) + except UnsupportedTargetFormatError: + logger.error('Unsupported format. For now only "adguard" is supported.') + return + + # Dump generated data + dump_output(output, args.outputfile) + + # Result time print + delta: float = ceil((perf_counter() - start_time) * 1000) + logger.info(f"Generated in {delta} ms") + + +if __name__ == "__main__": + main() diff --git a/src/seo-source.json b/src/seo-source.json new file mode 100644 index 0000000..8b6299c --- /dev/null +++ b/src/seo-source.json @@ -0,0 +1,125 @@ +{ + "domains": [ + { + "fqdn": "forsal.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://forsal.pl/kraj/aktualnosci/artykuly-infor-pl/9796010,czy-11052025-r-to-niedziela-handlowa-handel-bez-zakazu-zakupy-w-l.html" + ] + }, + { + "fqdn": "wiadomosci.dziennik.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://wiadomosci.dziennik.pl/wydarzenia/artykuly/9739637,czy-dzisiaj-jest-niedziela-handlowa-czy-11-maja-2025-sklepy-i-galerie-sa-otwarte-niedziele-handlowe-w-tym-roku-niedziele-handlowe-w-tym-roku-czy-teraz-jest-niedziela-handlowa-kalendarz-11052025.html" + ] + }, + { + "fqdn": "biznes.wprost.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://biznes.wprost.pl/finanse-i-inwestycje/12010876/niedziele-handlowe-2025-r-czy-dzis-zrobimy-zakupy.html" + ] + }, + { + "fqdn": "legaartis.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://legaartis.pl/blog/2025/05/11/czy-dzis-zrobisz-zakupy-sprawdzamy-czy-to-niedziela-handlowa-i-co-jest-otwarte/" + ] + }, + { + "fqdn": "superbiz.se.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://superbiz.se.pl/wiadomosci/niedziele-handlowe-2025-kiedy-zrobimy-zakupy-sprawdz-kalendarz-na-2025-aa-PiKL-kQ7v-4dbi.html" + ] + }, + { + "fqdn": "pomorska.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://pomorska.pl/sklepy-otwarte-11-maja-czy-to-niedziela-handlowa-sprawdz-jak-pracuja-sklepy-11-05-oto-kalendarz-niedziel-handlowych-2025/ar/c1p2-27566083" + ] + }, + { + "fqdn": "dziendobry.tvn.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://dziendobry.tvn.pl/newsy/czy-niedziela-11-maja-2025-r-jest-handlowa-gdzie-mozna-zrobic-dzisiaj-zakupy-st7720285" + ] + }, + { + "fqdn": "infor.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://www.infor.pl/twoje-pieniadze/zakupy/6929697,dzis-04052025-r-otwarte-galerie-handlowe-zakupy-w-lidlu-i-biedronce-czy-tylko-w-zabce-4-maja-to-niedziela-handlowa-i-czynne-wszystkie-sklepy.html", + "https://samorzad.infor.pl/wiadomosci/6818633,niedziele-handlowe-w-2025-r-kalendarz-terminy.html" + ] + }, + { + "fqdn": "wspanialakobieta.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://wspanialakobieta.pl/czy-modem-t-mobile-ma-simlocka/" + ] + }, + { + "fqdn": "www.fakt.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://www.fakt.pl/pieniadze/czy-11-maja-to-niedziela-handlowa-sprawdz-czy-zrobisz-zakupy/007w6n5" + ] + }, + { + "fqdn": "strefabiznesu.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://strefabiznesu.pl/sklepy-otwarte-11-05-czy-dzisiaj-wypada-niedziela-handlowa-gdzie-mozesz-dzis-zrobic-zakupy/ar/c3p1-22848549" + ] + }, + { + "fqdn": "gospodarka.dziennik.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://gospodarka.dziennik.pl/praca/artykuly/9739637,czy-9-marca-jest-niedziela-handlowa-niedziele-handlowe-2025-czy-0903-sklepy-i-galerie-beda-otwarte-niedziele-handlowe-w-marcu-2025-kalendarz.html" + ] + }, + { + "fqdn": "ekopralnie.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://www.ekopralnie.pl/w-czym-prac-mikrofibre/" + ] + }, + { + "fqdn": "alepranie.com.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://alepranie.com.pl/jak-prac-mikrofibre/" + ] + }, + { + "fqdn": "aboutdecor.pl", + "date_added": "2025-05-12", + "reason": "", + "evidence": [ + "https://aboutdecor.pl/blog/jak-ze-zdjec-zrobic-kolaz-zrob-samodzielnie-kolaz-27" + ] + } + ] +}