Dodanie skryptu konwertera, przygotowanie do kategoryzacji list i dodanie pliku źródłowego JSON (#1)

Co-authored-by: Wiktor <>
Co-authored-by: Kuba Orlik <kontakt@kuba-orlik.name>
Reviewed-on: #1
Reviewed-by: Kuba Orlik <kuba@internet-czas-dzialac.pl>
This commit is contained in:
Wiktor 2025-05-20 18:09:48 +02:00
parent 0ff8286208
commit 266464c0e3
5 changed files with 306 additions and 0 deletions

12
Makefile Normal file
View File

@ -0,0 +1,12 @@
CONVERTER = python3 src/converter.py
dist/seo-nonsense/adguard.txt:: src/seo-source.json
$(CONVERTER) --inputfile src/seo-source.json --targetformat adguard --outputfile dist/seo-nonsense/adguard.txt
build: dist/seo-nonsense/adguard.txt
clean:
rm -rf dist/*
all: build

17
dist/seo-nonsense/adguard.txt vendored Normal file
View File

@ -0,0 +1,17 @@
! Blocking list automatically generated at 2025-05-19 18:59:52 UTC+0000
! Created with ❤️ by internet-czas-dzialac.pl
||forsal.pl^
||wiadomosci.dziennik.pl^
||biznes.wprost.pl^
||legaartis.pl^
||superbiz.se.pl^
||pomorska.pl^
||dziendobry.tvn.pl^
||infor.pl^
||wspanialakobieta.pl^
||www.fakt.pl^
||strefabiznesu.pl^
||gospodarka.dziennik.pl^
||ekopralnie.pl^
||alepranie.com.pl^
||aboutdecor.pl^

152
src/converter.py Normal file
View File

@ -0,0 +1,152 @@
# ruff: noqa: D100, D101, D102, D103, G004, TRY400, PTH123, RUF012
import argparse
import json
import logging
import os
from datetime import datetime, timezone
from math import ceil
from os import getenv
from time import perf_counter
# Configure logging with color formatting
class CustomFormatter(logging.Formatter):
GREY: str = "\x1b[38;20m"
YELLOW: str = "\x1b[33;20m"
RED: str = "\x1b[31;20m"
BOLD_RED: str = "\x1b[31;1m"
RESET: str = "\x1b[0m"
COLOR_MAP: dict[int, str] = {
logging.DEBUG: GREY,
logging.INFO: GREY,
logging.WARNING: YELLOW,
logging.ERROR: RED,
logging.CRITICAL: BOLD_RED,
}
def format(self, record: logging.LogRecord) -> str:
color: str = self.COLOR_MAP.get(record.levelno, self.GREY)
return f"{color}[{record.levelname}] {record.getMessage()}{self.RESET}"
# Set up logging
logger: logging.Logger = logging.getLogger(__name__)
logger.setLevel(getenv("LOGLEVEL", "INFO").upper())
color_handler: logging.StreamHandler = logging.StreamHandler()
color_handler.setFormatter(CustomFormatter())
logger.addHandler(color_handler)
def parse_arguments() -> argparse.Namespace:
parser: argparse.ArgumentParser = argparse.ArgumentParser(
description="Convert our JSON blocking list to popular blocking list formats",
)
parser.add_argument(
"--inputfile",
required=True,
metavar="INPUT_FILE",
help="Path to the input JSON file containing the blocking list",
)
parser.add_argument(
"--targetformat",
required=True,
metavar="TARGET_FORMAT",
help="Target output format (e.g., adguard)",
)
parser.add_argument(
"--outputfile",
required=True,
metavar="OUTPUT_FILE",
help="Path to the output file",
)
return parser.parse_args()
def load_data(filename: str) -> dict:
with open(filename) as file:
return json.load(file)
class UnsupportedTargetFormatError(Exception):
pass
def convert(data: dict, last_modified: datetime, target_format: str) -> str:
match target_format:
case "adguard":
return adguard_conversion(last_modified, data)
case _:
raise UnsupportedTargetFormatError
def adguard_conversion(last_modified: datetime, data: dict) -> list[str]:
header_lines: list[str] = [
f"! Blocking list automatically generated at {last_modified.strftime('%Y-%m-%d %H:%M:%S %Z%z')}",
"! Created with ❤️ by internet-czas-dzialac.pl",
]
output: list[str] = list(header_lines)
for entry in data["domains"]:
fqdn = entry["fqdn"]
if entry.get("exclude", False):
continue
output.append(f"||{fqdn}^")
return "\n".join(output)
def dump_output(data: str, output_file: str) -> None:
try:
directory = os.path.dirname(output_file)
os.makedirs(directory, exist_ok=True)
except OSError as e:
logger.critical(f"Error creating directory: {e}")
with open(output_file, "w") as file:
file.write(data)
def get_last_modified_datetime(file_path: str) -> datetime:
timestamp: float = os.path.getmtime(file_path)
return datetime.fromtimestamp(timestamp, tz=timezone.utc)
def main() -> None:
# Start measuring time
start_time: float = perf_counter()
# Parse arguments
args: argparse.Namespace = parse_arguments()
# Load data
try:
data: dict = load_data(args.inputfile)
last_modified: datetime = get_last_modified_datetime(args.inputfile)
except FileNotFoundError:
logger.error(f"File {args.inputfile} not found!")
return
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON: {e}")
return
except Exception as e:
logger.critical(f"Unexpected error occurred: {e}")
return
# Convert
try:
output = convert(data, last_modified, args.targetformat)
except UnsupportedTargetFormatError:
logger.error('Unsupported format. For now only "adguard" is supported.')
return
# Dump generated data
dump_output(output, args.outputfile)
# Result time print
delta: float = ceil((perf_counter() - start_time) * 1000)
logger.info(f"Generated in {delta} ms")
if __name__ == "__main__":
main()

125
src/seo-source.json Normal file
View File

@ -0,0 +1,125 @@
{
"domains": [
{
"fqdn": "forsal.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://forsal.pl/kraj/aktualnosci/artykuly-infor-pl/9796010,czy-11052025-r-to-niedziela-handlowa-handel-bez-zakazu-zakupy-w-l.html"
]
},
{
"fqdn": "wiadomosci.dziennik.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://wiadomosci.dziennik.pl/wydarzenia/artykuly/9739637,czy-dzisiaj-jest-niedziela-handlowa-czy-11-maja-2025-sklepy-i-galerie-sa-otwarte-niedziele-handlowe-w-tym-roku-niedziele-handlowe-w-tym-roku-czy-teraz-jest-niedziela-handlowa-kalendarz-11052025.html"
]
},
{
"fqdn": "biznes.wprost.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://biznes.wprost.pl/finanse-i-inwestycje/12010876/niedziele-handlowe-2025-r-czy-dzis-zrobimy-zakupy.html"
]
},
{
"fqdn": "legaartis.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://legaartis.pl/blog/2025/05/11/czy-dzis-zrobisz-zakupy-sprawdzamy-czy-to-niedziela-handlowa-i-co-jest-otwarte/"
]
},
{
"fqdn": "superbiz.se.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://superbiz.se.pl/wiadomosci/niedziele-handlowe-2025-kiedy-zrobimy-zakupy-sprawdz-kalendarz-na-2025-aa-PiKL-kQ7v-4dbi.html"
]
},
{
"fqdn": "pomorska.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://pomorska.pl/sklepy-otwarte-11-maja-czy-to-niedziela-handlowa-sprawdz-jak-pracuja-sklepy-11-05-oto-kalendarz-niedziel-handlowych-2025/ar/c1p2-27566083"
]
},
{
"fqdn": "dziendobry.tvn.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://dziendobry.tvn.pl/newsy/czy-niedziela-11-maja-2025-r-jest-handlowa-gdzie-mozna-zrobic-dzisiaj-zakupy-st7720285"
]
},
{
"fqdn": "infor.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://www.infor.pl/twoje-pieniadze/zakupy/6929697,dzis-04052025-r-otwarte-galerie-handlowe-zakupy-w-lidlu-i-biedronce-czy-tylko-w-zabce-4-maja-to-niedziela-handlowa-i-czynne-wszystkie-sklepy.html",
"https://samorzad.infor.pl/wiadomosci/6818633,niedziele-handlowe-w-2025-r-kalendarz-terminy.html"
]
},
{
"fqdn": "wspanialakobieta.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://wspanialakobieta.pl/czy-modem-t-mobile-ma-simlocka/"
]
},
{
"fqdn": "www.fakt.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://www.fakt.pl/pieniadze/czy-11-maja-to-niedziela-handlowa-sprawdz-czy-zrobisz-zakupy/007w6n5"
]
},
{
"fqdn": "strefabiznesu.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://strefabiznesu.pl/sklepy-otwarte-11-05-czy-dzisiaj-wypada-niedziela-handlowa-gdzie-mozesz-dzis-zrobic-zakupy/ar/c3p1-22848549"
]
},
{
"fqdn": "gospodarka.dziennik.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://gospodarka.dziennik.pl/praca/artykuly/9739637,czy-9-marca-jest-niedziela-handlowa-niedziele-handlowe-2025-czy-0903-sklepy-i-galerie-beda-otwarte-niedziele-handlowe-w-marcu-2025-kalendarz.html"
]
},
{
"fqdn": "ekopralnie.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://www.ekopralnie.pl/w-czym-prac-mikrofibre/"
]
},
{
"fqdn": "alepranie.com.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://alepranie.com.pl/jak-prac-mikrofibre/"
]
},
{
"fqdn": "aboutdecor.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://aboutdecor.pl/blog/jak-ze-zdjec-zrobic-kolaz-zrob-samodzielnie-kolaz-27"
]
}
]
}