Dodanie skryptu konwertera, przygotowanie do kategoryzacji list i dodanie pliku źródłowego JSON (#1)
Co-authored-by: Wiktor <> Co-authored-by: Kuba Orlik <kontakt@kuba-orlik.name> Reviewed-on: #1 Reviewed-by: Kuba Orlik <kuba@internet-czas-dzialac.pl>
This commit is contained in:
parent
0ff8286208
commit
266464c0e3
12
Makefile
Normal file
12
Makefile
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
CONVERTER = python3 src/converter.py
|
||||||
|
|
||||||
|
dist/seo-nonsense/adguard.txt:: src/seo-source.json
|
||||||
|
$(CONVERTER) --inputfile src/seo-source.json --targetformat adguard --outputfile dist/seo-nonsense/adguard.txt
|
||||||
|
|
||||||
|
build: dist/seo-nonsense/adguard.txt
|
||||||
|
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf dist/*
|
||||||
|
|
||||||
|
all: build
|
17
dist/seo-nonsense/adguard.txt
vendored
Normal file
17
dist/seo-nonsense/adguard.txt
vendored
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
! Blocking list automatically generated at 2025-05-19 18:59:52 UTC+0000
|
||||||
|
! Created with ❤️ by internet-czas-dzialac.pl
|
||||||
|
||forsal.pl^
|
||||||
|
||wiadomosci.dziennik.pl^
|
||||||
|
||biznes.wprost.pl^
|
||||||
|
||legaartis.pl^
|
||||||
|
||superbiz.se.pl^
|
||||||
|
||pomorska.pl^
|
||||||
|
||dziendobry.tvn.pl^
|
||||||
|
||infor.pl^
|
||||||
|
||wspanialakobieta.pl^
|
||||||
|
||www.fakt.pl^
|
||||||
|
||strefabiznesu.pl^
|
||||||
|
||gospodarka.dziennik.pl^
|
||||||
|
||ekopralnie.pl^
|
||||||
|
||alepranie.com.pl^
|
||||||
|
||aboutdecor.pl^
|
152
src/converter.py
Normal file
152
src/converter.py
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
# ruff: noqa: D100, D101, D102, D103, G004, TRY400, PTH123, RUF012
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from math import ceil
|
||||||
|
from os import getenv
|
||||||
|
from time import perf_counter
|
||||||
|
|
||||||
|
|
||||||
|
# Configure logging with color formatting
|
||||||
|
class CustomFormatter(logging.Formatter):
|
||||||
|
GREY: str = "\x1b[38;20m"
|
||||||
|
YELLOW: str = "\x1b[33;20m"
|
||||||
|
RED: str = "\x1b[31;20m"
|
||||||
|
BOLD_RED: str = "\x1b[31;1m"
|
||||||
|
RESET: str = "\x1b[0m"
|
||||||
|
|
||||||
|
COLOR_MAP: dict[int, str] = {
|
||||||
|
logging.DEBUG: GREY,
|
||||||
|
logging.INFO: GREY,
|
||||||
|
logging.WARNING: YELLOW,
|
||||||
|
logging.ERROR: RED,
|
||||||
|
logging.CRITICAL: BOLD_RED,
|
||||||
|
}
|
||||||
|
|
||||||
|
def format(self, record: logging.LogRecord) -> str:
|
||||||
|
color: str = self.COLOR_MAP.get(record.levelno, self.GREY)
|
||||||
|
return f"{color}[{record.levelname}] {record.getMessage()}{self.RESET}"
|
||||||
|
|
||||||
|
|
||||||
|
# Set up logging
|
||||||
|
logger: logging.Logger = logging.getLogger(__name__)
|
||||||
|
logger.setLevel(getenv("LOGLEVEL", "INFO").upper())
|
||||||
|
color_handler: logging.StreamHandler = logging.StreamHandler()
|
||||||
|
color_handler.setFormatter(CustomFormatter())
|
||||||
|
logger.addHandler(color_handler)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments() -> argparse.Namespace:
|
||||||
|
parser: argparse.ArgumentParser = argparse.ArgumentParser(
|
||||||
|
description="Convert our JSON blocking list to popular blocking list formats",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--inputfile",
|
||||||
|
required=True,
|
||||||
|
metavar="INPUT_FILE",
|
||||||
|
help="Path to the input JSON file containing the blocking list",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--targetformat",
|
||||||
|
required=True,
|
||||||
|
metavar="TARGET_FORMAT",
|
||||||
|
help="Target output format (e.g., adguard)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--outputfile",
|
||||||
|
required=True,
|
||||||
|
metavar="OUTPUT_FILE",
|
||||||
|
help="Path to the output file",
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def load_data(filename: str) -> dict:
|
||||||
|
with open(filename) as file:
|
||||||
|
return json.load(file)
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedTargetFormatError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def convert(data: dict, last_modified: datetime, target_format: str) -> str:
|
||||||
|
match target_format:
|
||||||
|
case "adguard":
|
||||||
|
return adguard_conversion(last_modified, data)
|
||||||
|
case _:
|
||||||
|
raise UnsupportedTargetFormatError
|
||||||
|
|
||||||
|
|
||||||
|
def adguard_conversion(last_modified: datetime, data: dict) -> list[str]:
|
||||||
|
header_lines: list[str] = [
|
||||||
|
f"! Blocking list automatically generated at {last_modified.strftime('%Y-%m-%d %H:%M:%S %Z%z')}",
|
||||||
|
"! Created with ❤️ by internet-czas-dzialac.pl",
|
||||||
|
]
|
||||||
|
|
||||||
|
output: list[str] = list(header_lines)
|
||||||
|
|
||||||
|
for entry in data["domains"]:
|
||||||
|
fqdn = entry["fqdn"]
|
||||||
|
if entry.get("exclude", False):
|
||||||
|
continue
|
||||||
|
output.append(f"||{fqdn}^")
|
||||||
|
|
||||||
|
return "\n".join(output)
|
||||||
|
|
||||||
|
|
||||||
|
def dump_output(data: str, output_file: str) -> None:
|
||||||
|
try:
|
||||||
|
directory = os.path.dirname(output_file)
|
||||||
|
os.makedirs(directory, exist_ok=True)
|
||||||
|
except OSError as e:
|
||||||
|
logger.critical(f"Error creating directory: {e}")
|
||||||
|
with open(output_file, "w") as file:
|
||||||
|
file.write(data)
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_modified_datetime(file_path: str) -> datetime:
|
||||||
|
timestamp: float = os.path.getmtime(file_path)
|
||||||
|
return datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
# Start measuring time
|
||||||
|
start_time: float = perf_counter()
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
args: argparse.Namespace = parse_arguments()
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
try:
|
||||||
|
data: dict = load_data(args.inputfile)
|
||||||
|
last_modified: datetime = get_last_modified_datetime(args.inputfile)
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.error(f"File {args.inputfile} not found!")
|
||||||
|
return
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"Failed to parse JSON: {e}")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
logger.critical(f"Unexpected error occurred: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Convert
|
||||||
|
try:
|
||||||
|
output = convert(data, last_modified, args.targetformat)
|
||||||
|
except UnsupportedTargetFormatError:
|
||||||
|
logger.error('Unsupported format. For now only "adguard" is supported.')
|
||||||
|
return
|
||||||
|
|
||||||
|
# Dump generated data
|
||||||
|
dump_output(output, args.outputfile)
|
||||||
|
|
||||||
|
# Result time print
|
||||||
|
delta: float = ceil((perf_counter() - start_time) * 1000)
|
||||||
|
logger.info(f"Generated in {delta} ms")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
125
src/seo-source.json
Normal file
125
src/seo-source.json
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
{
|
||||||
|
"domains": [
|
||||||
|
{
|
||||||
|
"fqdn": "forsal.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://forsal.pl/kraj/aktualnosci/artykuly-infor-pl/9796010,czy-11052025-r-to-niedziela-handlowa-handel-bez-zakazu-zakupy-w-l.html"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "wiadomosci.dziennik.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://wiadomosci.dziennik.pl/wydarzenia/artykuly/9739637,czy-dzisiaj-jest-niedziela-handlowa-czy-11-maja-2025-sklepy-i-galerie-sa-otwarte-niedziele-handlowe-w-tym-roku-niedziele-handlowe-w-tym-roku-czy-teraz-jest-niedziela-handlowa-kalendarz-11052025.html"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "biznes.wprost.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://biznes.wprost.pl/finanse-i-inwestycje/12010876/niedziele-handlowe-2025-r-czy-dzis-zrobimy-zakupy.html"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "legaartis.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://legaartis.pl/blog/2025/05/11/czy-dzis-zrobisz-zakupy-sprawdzamy-czy-to-niedziela-handlowa-i-co-jest-otwarte/"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "superbiz.se.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://superbiz.se.pl/wiadomosci/niedziele-handlowe-2025-kiedy-zrobimy-zakupy-sprawdz-kalendarz-na-2025-aa-PiKL-kQ7v-4dbi.html"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "pomorska.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://pomorska.pl/sklepy-otwarte-11-maja-czy-to-niedziela-handlowa-sprawdz-jak-pracuja-sklepy-11-05-oto-kalendarz-niedziel-handlowych-2025/ar/c1p2-27566083"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "dziendobry.tvn.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://dziendobry.tvn.pl/newsy/czy-niedziela-11-maja-2025-r-jest-handlowa-gdzie-mozna-zrobic-dzisiaj-zakupy-st7720285"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "infor.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://www.infor.pl/twoje-pieniadze/zakupy/6929697,dzis-04052025-r-otwarte-galerie-handlowe-zakupy-w-lidlu-i-biedronce-czy-tylko-w-zabce-4-maja-to-niedziela-handlowa-i-czynne-wszystkie-sklepy.html",
|
||||||
|
"https://samorzad.infor.pl/wiadomosci/6818633,niedziele-handlowe-w-2025-r-kalendarz-terminy.html"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "wspanialakobieta.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://wspanialakobieta.pl/czy-modem-t-mobile-ma-simlocka/"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "www.fakt.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://www.fakt.pl/pieniadze/czy-11-maja-to-niedziela-handlowa-sprawdz-czy-zrobisz-zakupy/007w6n5"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "strefabiznesu.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://strefabiznesu.pl/sklepy-otwarte-11-05-czy-dzisiaj-wypada-niedziela-handlowa-gdzie-mozesz-dzis-zrobic-zakupy/ar/c3p1-22848549"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "gospodarka.dziennik.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://gospodarka.dziennik.pl/praca/artykuly/9739637,czy-9-marca-jest-niedziela-handlowa-niedziele-handlowe-2025-czy-0903-sklepy-i-galerie-beda-otwarte-niedziele-handlowe-w-marcu-2025-kalendarz.html"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "ekopralnie.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://www.ekopralnie.pl/w-czym-prac-mikrofibre/"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "alepranie.com.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://alepranie.com.pl/jak-prac-mikrofibre/"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"fqdn": "aboutdecor.pl",
|
||||||
|
"date_added": "2025-05-12",
|
||||||
|
"reason": "",
|
||||||
|
"evidence": [
|
||||||
|
"https://aboutdecor.pl/blog/jak-ze-zdjec-zrobic-kolaz-zrob-samodzielnie-kolaz-27"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user