Dodanie skryptu konwertera, przygotowanie do kategoryzacji list i dodanie pliku źródłowego JSON #1

Merged
wiktor merged 7 commits from converter into master 2025-05-20 18:09:48 +02:00
5 changed files with 306 additions and 0 deletions

12
Makefile Normal file
View File

@ -0,0 +1,12 @@
CONVERTER = python3 src/converter.py
dist/seo-nonsense/adguard.txt:: src/seo-source.json
wiktor marked this conversation as resolved Outdated
Outdated
Review

tutaj jeszcze trzeba dodać plik src/seo-source.json jako dependency. Wtedy Make będzie unikał buildów dla plików, dla których już build był zrobiony (patrząc po mtime)

build: src/seo-source.json
tutaj jeszcze trzeba dodać plik `src/seo-source.json` jako dependency. Wtedy Make będzie unikał buildów dla plików, dla których już build był zrobiony (patrząc po mtime) ``` build: src/seo-source.json ```
$(CONVERTER) --inputfile src/seo-source.json --targetformat adguard --outputfile dist/seo-nonsense/adguard.txt
build: dist/seo-nonsense/adguard.txt
clean:
rm -rf dist/*
all: build

17
dist/seo-nonsense/adguard.txt vendored Normal file
View File

@ -0,0 +1,17 @@
! Blocking list automatically generated at 2025-05-19 18:59:52 UTC+0000
! Created with ❤️ by internet-czas-dzialac.pl
||forsal.pl^
||wiadomosci.dziennik.pl^
||biznes.wprost.pl^
||legaartis.pl^
||superbiz.se.pl^
||pomorska.pl^
||dziendobry.tvn.pl^
||infor.pl^
||wspanialakobieta.pl^
||www.fakt.pl^
||strefabiznesu.pl^
||gospodarka.dziennik.pl^
||ekopralnie.pl^
||alepranie.com.pl^
||aboutdecor.pl^

152
src/converter.py Normal file
View File

@ -0,0 +1,152 @@
# ruff: noqa: D100, D101, D102, D103, G004, TRY400, PTH123, RUF012
import argparse
import json
import logging
import os
from datetime import datetime, timezone
from math import ceil
from os import getenv
from time import perf_counter
# Configure logging with color formatting
class CustomFormatter(logging.Formatter):
GREY: str = "\x1b[38;20m"
YELLOW: str = "\x1b[33;20m"
RED: str = "\x1b[31;20m"
BOLD_RED: str = "\x1b[31;1m"
RESET: str = "\x1b[0m"
COLOR_MAP: dict[int, str] = {
logging.DEBUG: GREY,
logging.INFO: GREY,
logging.WARNING: YELLOW,
logging.ERROR: RED,
logging.CRITICAL: BOLD_RED,
}
def format(self, record: logging.LogRecord) -> str:
color: str = self.COLOR_MAP.get(record.levelno, self.GREY)
return f"{color}[{record.levelname}] {record.getMessage()}{self.RESET}"
# Set up logging
logger: logging.Logger = logging.getLogger(__name__)
logger.setLevel(getenv("LOGLEVEL", "INFO").upper())
color_handler: logging.StreamHandler = logging.StreamHandler()
color_handler.setFormatter(CustomFormatter())
logger.addHandler(color_handler)
def parse_arguments() -> argparse.Namespace:
parser: argparse.ArgumentParser = argparse.ArgumentParser(
description="Convert our JSON blocking list to popular blocking list formats",
)
parser.add_argument(
"--inputfile",
required=True,
metavar="INPUT_FILE",
help="Path to the input JSON file containing the blocking list",
)
parser.add_argument(
"--targetformat",
required=True,
metavar="TARGET_FORMAT",
help="Target output format (e.g., adguard)",
)
parser.add_argument(
"--outputfile",
required=True,
metavar="OUTPUT_FILE",
help="Path to the output file",
)
return parser.parse_args()
def load_data(filename: str) -> dict:
with open(filename) as file:
return json.load(file)
class UnsupportedTargetFormatError(Exception):
pass
def convert(data: dict, last_modified: datetime, target_format: str) -> str:
match target_format:
case "adguard":
return adguard_conversion(last_modified, data)
case _:
raise UnsupportedTargetFormatError
def adguard_conversion(last_modified: datetime, data: dict) -> list[str]:
header_lines: list[str] = [
f"! Blocking list automatically generated at {last_modified.strftime('%Y-%m-%d %H:%M:%S %Z%z')}",
wiktor marked this conversation as resolved Outdated
Outdated
Review

tutaj zamiast obecnej daty użyjmy mtime źródłowego pliku JSON. Wtedy buildy będą bardziej deterministyczne

tutaj zamiast obecnej daty użyjmy `mtime` źródłowego pliku JSON. Wtedy buildy będą bardziej deterministyczne

masz na myśli pobieranie datetime ostatniej modyfikacji pliku źródłowego JSON?

masz na myśli pobieranie datetime ostatniej modyfikacji pliku źródłowego JSON?
Outdated
Review

dokładnie tak!

dokładnie tak!
"! Created with ❤️ by internet-czas-dzialac.pl",
]
output: list[str] = list(header_lines)
for entry in data["domains"]:
fqdn = entry["fqdn"]
if entry.get("exclude", False):
continue
output.append(f"||{fqdn}^")
return "\n".join(output)
def dump_output(data: str, output_file: str) -> None:
try:
directory = os.path.dirname(output_file)
os.makedirs(directory, exist_ok=True)
except OSError as e:
logger.critical(f"Error creating directory: {e}")
with open(output_file, "w") as file:
file.write(data)
def get_last_modified_datetime(file_path: str) -> datetime:
timestamp: float = os.path.getmtime(file_path)
return datetime.fromtimestamp(timestamp, tz=timezone.utc)
def main() -> None:
# Start measuring time
start_time: float = perf_counter()
# Parse arguments
args: argparse.Namespace = parse_arguments()
# Load data
try:
data: dict = load_data(args.inputfile)
last_modified: datetime = get_last_modified_datetime(args.inputfile)
except FileNotFoundError:
logger.error(f"File {args.inputfile} not found!")
return
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON: {e}")
return
except Exception as e:
logger.critical(f"Unexpected error occurred: {e}")
return
# Convert
try:
output = convert(data, last_modified, args.targetformat)
except UnsupportedTargetFormatError:
logger.error('Unsupported format. For now only "adguard" is supported.')
return
# Dump generated data
dump_output(output, args.outputfile)
# Result time print
delta: float = ceil((perf_counter() - start_time) * 1000)
logger.info(f"Generated in {delta} ms")
if __name__ == "__main__":
main()

125
src/seo-source.json Normal file
View File

@ -0,0 +1,125 @@
{
"domains": [
{
"fqdn": "forsal.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://forsal.pl/kraj/aktualnosci/artykuly-infor-pl/9796010,czy-11052025-r-to-niedziela-handlowa-handel-bez-zakazu-zakupy-w-l.html"
]
},
{
"fqdn": "wiadomosci.dziennik.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://wiadomosci.dziennik.pl/wydarzenia/artykuly/9739637,czy-dzisiaj-jest-niedziela-handlowa-czy-11-maja-2025-sklepy-i-galerie-sa-otwarte-niedziele-handlowe-w-tym-roku-niedziele-handlowe-w-tym-roku-czy-teraz-jest-niedziela-handlowa-kalendarz-11052025.html"
]
},
{
"fqdn": "biznes.wprost.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://biznes.wprost.pl/finanse-i-inwestycje/12010876/niedziele-handlowe-2025-r-czy-dzis-zrobimy-zakupy.html"
]
},
{
"fqdn": "legaartis.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://legaartis.pl/blog/2025/05/11/czy-dzis-zrobisz-zakupy-sprawdzamy-czy-to-niedziela-handlowa-i-co-jest-otwarte/"
]
},
{
"fqdn": "superbiz.se.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://superbiz.se.pl/wiadomosci/niedziele-handlowe-2025-kiedy-zrobimy-zakupy-sprawdz-kalendarz-na-2025-aa-PiKL-kQ7v-4dbi.html"
]
},
{
"fqdn": "pomorska.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://pomorska.pl/sklepy-otwarte-11-maja-czy-to-niedziela-handlowa-sprawdz-jak-pracuja-sklepy-11-05-oto-kalendarz-niedziel-handlowych-2025/ar/c1p2-27566083"
]
},
{
"fqdn": "dziendobry.tvn.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://dziendobry.tvn.pl/newsy/czy-niedziela-11-maja-2025-r-jest-handlowa-gdzie-mozna-zrobic-dzisiaj-zakupy-st7720285"
]
},
{
"fqdn": "infor.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://www.infor.pl/twoje-pieniadze/zakupy/6929697,dzis-04052025-r-otwarte-galerie-handlowe-zakupy-w-lidlu-i-biedronce-czy-tylko-w-zabce-4-maja-to-niedziela-handlowa-i-czynne-wszystkie-sklepy.html",
"https://samorzad.infor.pl/wiadomosci/6818633,niedziele-handlowe-w-2025-r-kalendarz-terminy.html"
]
},
{
"fqdn": "wspanialakobieta.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://wspanialakobieta.pl/czy-modem-t-mobile-ma-simlocka/"
]
},
{
"fqdn": "www.fakt.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://www.fakt.pl/pieniadze/czy-11-maja-to-niedziela-handlowa-sprawdz-czy-zrobisz-zakupy/007w6n5"
]
},
{
"fqdn": "strefabiznesu.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://strefabiznesu.pl/sklepy-otwarte-11-05-czy-dzisiaj-wypada-niedziela-handlowa-gdzie-mozesz-dzis-zrobic-zakupy/ar/c3p1-22848549"
]
},
{
"fqdn": "gospodarka.dziennik.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://gospodarka.dziennik.pl/praca/artykuly/9739637,czy-9-marca-jest-niedziela-handlowa-niedziele-handlowe-2025-czy-0903-sklepy-i-galerie-beda-otwarte-niedziele-handlowe-w-marcu-2025-kalendarz.html"
]
},
{
"fqdn": "ekopralnie.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://www.ekopralnie.pl/w-czym-prac-mikrofibre/"
]
},
{
"fqdn": "alepranie.com.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://alepranie.com.pl/jak-prac-mikrofibre/"
]
},
{
"fqdn": "aboutdecor.pl",
"date_added": "2025-05-12",
"reason": "",
"evidence": [
"https://aboutdecor.pl/blog/jak-ze-zdjec-zrobic-kolaz-zrob-samodzielnie-kolaz-27"
]
}
]
}