Dodanie skryptu konwertera, przygotowanie do kategoryzacji list i dodanie pliku źródłowego JSON #1
12
Makefile
Normal file
12
Makefile
Normal file
@ -0,0 +1,12 @@
|
||||
CONVERTER = python3 src/converter.py
|
||||
|
||||
dist/seo-nonsense/adguard.txt:: src/seo-source.json
|
||||
wiktor marked this conversation as resolved
Outdated
|
||||
$(CONVERTER) --inputfile src/seo-source.json --targetformat adguard --outputfile dist/seo-nonsense/adguard.txt
|
||||
|
||||
build: dist/seo-nonsense/adguard.txt
|
||||
|
||||
|
||||
clean:
|
||||
rm -rf dist/*
|
||||
|
||||
all: build
|
17
dist/seo-nonsense/adguard.txt
vendored
Normal file
17
dist/seo-nonsense/adguard.txt
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
! Blocking list automatically generated at 2025-05-19 18:59:52 UTC+0000
|
||||
! Created with ❤️ by internet-czas-dzialac.pl
|
||||
||forsal.pl^
|
||||
||wiadomosci.dziennik.pl^
|
||||
||biznes.wprost.pl^
|
||||
||legaartis.pl^
|
||||
||superbiz.se.pl^
|
||||
||pomorska.pl^
|
||||
||dziendobry.tvn.pl^
|
||||
||infor.pl^
|
||||
||wspanialakobieta.pl^
|
||||
||www.fakt.pl^
|
||||
||strefabiznesu.pl^
|
||||
||gospodarka.dziennik.pl^
|
||||
||ekopralnie.pl^
|
||||
||alepranie.com.pl^
|
||||
||aboutdecor.pl^
|
152
src/converter.py
Normal file
152
src/converter.py
Normal file
@ -0,0 +1,152 @@
|
||||
# ruff: noqa: D100, D101, D102, D103, G004, TRY400, PTH123, RUF012
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from math import ceil
|
||||
from os import getenv
|
||||
from time import perf_counter
|
||||
|
||||
|
||||
# Configure logging with color formatting
|
||||
class CustomFormatter(logging.Formatter):
|
||||
GREY: str = "\x1b[38;20m"
|
||||
YELLOW: str = "\x1b[33;20m"
|
||||
RED: str = "\x1b[31;20m"
|
||||
BOLD_RED: str = "\x1b[31;1m"
|
||||
RESET: str = "\x1b[0m"
|
||||
|
||||
COLOR_MAP: dict[int, str] = {
|
||||
logging.DEBUG: GREY,
|
||||
logging.INFO: GREY,
|
||||
logging.WARNING: YELLOW,
|
||||
logging.ERROR: RED,
|
||||
logging.CRITICAL: BOLD_RED,
|
||||
}
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
color: str = self.COLOR_MAP.get(record.levelno, self.GREY)
|
||||
return f"{color}[{record.levelname}] {record.getMessage()}{self.RESET}"
|
||||
|
||||
|
||||
# Set up logging
|
||||
logger: logging.Logger = logging.getLogger(__name__)
|
||||
logger.setLevel(getenv("LOGLEVEL", "INFO").upper())
|
||||
color_handler: logging.StreamHandler = logging.StreamHandler()
|
||||
color_handler.setFormatter(CustomFormatter())
|
||||
logger.addHandler(color_handler)
|
||||
|
||||
|
||||
def parse_arguments() -> argparse.Namespace:
|
||||
parser: argparse.ArgumentParser = argparse.ArgumentParser(
|
||||
description="Convert our JSON blocking list to popular blocking list formats",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--inputfile",
|
||||
required=True,
|
||||
metavar="INPUT_FILE",
|
||||
help="Path to the input JSON file containing the blocking list",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--targetformat",
|
||||
required=True,
|
||||
metavar="TARGET_FORMAT",
|
||||
help="Target output format (e.g., adguard)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--outputfile",
|
||||
required=True,
|
||||
metavar="OUTPUT_FILE",
|
||||
help="Path to the output file",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def load_data(filename: str) -> dict:
|
||||
with open(filename) as file:
|
||||
return json.load(file)
|
||||
|
||||
|
||||
class UnsupportedTargetFormatError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def convert(data: dict, last_modified: datetime, target_format: str) -> str:
|
||||
match target_format:
|
||||
case "adguard":
|
||||
return adguard_conversion(last_modified, data)
|
||||
case _:
|
||||
raise UnsupportedTargetFormatError
|
||||
|
||||
|
||||
def adguard_conversion(last_modified: datetime, data: dict) -> list[str]:
|
||||
header_lines: list[str] = [
|
||||
f"! Blocking list automatically generated at {last_modified.strftime('%Y-%m-%d %H:%M:%S %Z%z')}",
|
||||
wiktor marked this conversation as resolved
Outdated
kuba
commented
tutaj zamiast obecnej daty użyjmy tutaj zamiast obecnej daty użyjmy `mtime` źródłowego pliku JSON. Wtedy buildy będą bardziej deterministyczne
wiktor
commented
masz na myśli pobieranie datetime ostatniej modyfikacji pliku źródłowego JSON? masz na myśli pobieranie datetime ostatniej modyfikacji pliku źródłowego JSON?
kuba
commented
dokładnie tak! dokładnie tak!
|
||||
"! Created with ❤️ by internet-czas-dzialac.pl",
|
||||
]
|
||||
|
||||
output: list[str] = list(header_lines)
|
||||
|
||||
for entry in data["domains"]:
|
||||
fqdn = entry["fqdn"]
|
||||
if entry.get("exclude", False):
|
||||
continue
|
||||
output.append(f"||{fqdn}^")
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
|
||||
def dump_output(data: str, output_file: str) -> None:
|
||||
try:
|
||||
directory = os.path.dirname(output_file)
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
except OSError as e:
|
||||
logger.critical(f"Error creating directory: {e}")
|
||||
with open(output_file, "w") as file:
|
||||
file.write(data)
|
||||
|
||||
|
||||
def get_last_modified_datetime(file_path: str) -> datetime:
|
||||
timestamp: float = os.path.getmtime(file_path)
|
||||
return datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
# Start measuring time
|
||||
start_time: float = perf_counter()
|
||||
|
||||
# Parse arguments
|
||||
args: argparse.Namespace = parse_arguments()
|
||||
|
||||
# Load data
|
||||
try:
|
||||
data: dict = load_data(args.inputfile)
|
||||
last_modified: datetime = get_last_modified_datetime(args.inputfile)
|
||||
except FileNotFoundError:
|
||||
logger.error(f"File {args.inputfile} not found!")
|
||||
return
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse JSON: {e}")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.critical(f"Unexpected error occurred: {e}")
|
||||
return
|
||||
|
||||
# Convert
|
||||
try:
|
||||
output = convert(data, last_modified, args.targetformat)
|
||||
except UnsupportedTargetFormatError:
|
||||
logger.error('Unsupported format. For now only "adguard" is supported.')
|
||||
return
|
||||
|
||||
# Dump generated data
|
||||
dump_output(output, args.outputfile)
|
||||
|
||||
# Result time print
|
||||
delta: float = ceil((perf_counter() - start_time) * 1000)
|
||||
logger.info(f"Generated in {delta} ms")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
125
src/seo-source.json
Normal file
125
src/seo-source.json
Normal file
@ -0,0 +1,125 @@
|
||||
{
|
||||
"domains": [
|
||||
{
|
||||
"fqdn": "forsal.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://forsal.pl/kraj/aktualnosci/artykuly-infor-pl/9796010,czy-11052025-r-to-niedziela-handlowa-handel-bez-zakazu-zakupy-w-l.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "wiadomosci.dziennik.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://wiadomosci.dziennik.pl/wydarzenia/artykuly/9739637,czy-dzisiaj-jest-niedziela-handlowa-czy-11-maja-2025-sklepy-i-galerie-sa-otwarte-niedziele-handlowe-w-tym-roku-niedziele-handlowe-w-tym-roku-czy-teraz-jest-niedziela-handlowa-kalendarz-11052025.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "biznes.wprost.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://biznes.wprost.pl/finanse-i-inwestycje/12010876/niedziele-handlowe-2025-r-czy-dzis-zrobimy-zakupy.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "legaartis.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://legaartis.pl/blog/2025/05/11/czy-dzis-zrobisz-zakupy-sprawdzamy-czy-to-niedziela-handlowa-i-co-jest-otwarte/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "superbiz.se.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://superbiz.se.pl/wiadomosci/niedziele-handlowe-2025-kiedy-zrobimy-zakupy-sprawdz-kalendarz-na-2025-aa-PiKL-kQ7v-4dbi.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "pomorska.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://pomorska.pl/sklepy-otwarte-11-maja-czy-to-niedziela-handlowa-sprawdz-jak-pracuja-sklepy-11-05-oto-kalendarz-niedziel-handlowych-2025/ar/c1p2-27566083"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "dziendobry.tvn.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://dziendobry.tvn.pl/newsy/czy-niedziela-11-maja-2025-r-jest-handlowa-gdzie-mozna-zrobic-dzisiaj-zakupy-st7720285"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "infor.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://www.infor.pl/twoje-pieniadze/zakupy/6929697,dzis-04052025-r-otwarte-galerie-handlowe-zakupy-w-lidlu-i-biedronce-czy-tylko-w-zabce-4-maja-to-niedziela-handlowa-i-czynne-wszystkie-sklepy.html",
|
||||
"https://samorzad.infor.pl/wiadomosci/6818633,niedziele-handlowe-w-2025-r-kalendarz-terminy.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "wspanialakobieta.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://wspanialakobieta.pl/czy-modem-t-mobile-ma-simlocka/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "www.fakt.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://www.fakt.pl/pieniadze/czy-11-maja-to-niedziela-handlowa-sprawdz-czy-zrobisz-zakupy/007w6n5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "strefabiznesu.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://strefabiznesu.pl/sklepy-otwarte-11-05-czy-dzisiaj-wypada-niedziela-handlowa-gdzie-mozesz-dzis-zrobic-zakupy/ar/c3p1-22848549"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "gospodarka.dziennik.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://gospodarka.dziennik.pl/praca/artykuly/9739637,czy-9-marca-jest-niedziela-handlowa-niedziele-handlowe-2025-czy-0903-sklepy-i-galerie-beda-otwarte-niedziele-handlowe-w-marcu-2025-kalendarz.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "ekopralnie.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://www.ekopralnie.pl/w-czym-prac-mikrofibre/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "alepranie.com.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://alepranie.com.pl/jak-prac-mikrofibre/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "aboutdecor.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://aboutdecor.pl/blog/jak-ze-zdjec-zrobic-kolaz-zrob-samodzielnie-kolaz-27"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user
tutaj jeszcze trzeba dodać plik
src/seo-source.json
jako dependency. Wtedy Make będzie unikał buildów dla plików, dla których już build był zrobiony (patrząc po mtime)