Dodanie skryptu konwertera, przygotowanie do kategoryzacji list i dodanie pliku źródłowego JSON (#1)
Co-authored-by: Wiktor <> Co-authored-by: Kuba Orlik <kontakt@kuba-orlik.name> Reviewed-on: #1 Reviewed-by: Kuba Orlik <kuba@internet-czas-dzialac.pl>
This commit is contained in:
parent
0ff8286208
commit
266464c0e3
12
Makefile
Normal file
12
Makefile
Normal file
@ -0,0 +1,12 @@
|
||||
CONVERTER = python3 src/converter.py
|
||||
|
||||
dist/seo-nonsense/adguard.txt:: src/seo-source.json
|
||||
$(CONVERTER) --inputfile src/seo-source.json --targetformat adguard --outputfile dist/seo-nonsense/adguard.txt
|
||||
|
||||
build: dist/seo-nonsense/adguard.txt
|
||||
|
||||
|
||||
clean:
|
||||
rm -rf dist/*
|
||||
|
||||
all: build
|
17
dist/seo-nonsense/adguard.txt
vendored
Normal file
17
dist/seo-nonsense/adguard.txt
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
! Blocking list automatically generated at 2025-05-19 18:59:52 UTC+0000
|
||||
! Created with ❤️ by internet-czas-dzialac.pl
|
||||
||forsal.pl^
|
||||
||wiadomosci.dziennik.pl^
|
||||
||biznes.wprost.pl^
|
||||
||legaartis.pl^
|
||||
||superbiz.se.pl^
|
||||
||pomorska.pl^
|
||||
||dziendobry.tvn.pl^
|
||||
||infor.pl^
|
||||
||wspanialakobieta.pl^
|
||||
||www.fakt.pl^
|
||||
||strefabiznesu.pl^
|
||||
||gospodarka.dziennik.pl^
|
||||
||ekopralnie.pl^
|
||||
||alepranie.com.pl^
|
||||
||aboutdecor.pl^
|
152
src/converter.py
Normal file
152
src/converter.py
Normal file
@ -0,0 +1,152 @@
|
||||
# ruff: noqa: D100, D101, D102, D103, G004, TRY400, PTH123, RUF012
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from math import ceil
|
||||
from os import getenv
|
||||
from time import perf_counter
|
||||
|
||||
|
||||
# Configure logging with color formatting
|
||||
class CustomFormatter(logging.Formatter):
|
||||
GREY: str = "\x1b[38;20m"
|
||||
YELLOW: str = "\x1b[33;20m"
|
||||
RED: str = "\x1b[31;20m"
|
||||
BOLD_RED: str = "\x1b[31;1m"
|
||||
RESET: str = "\x1b[0m"
|
||||
|
||||
COLOR_MAP: dict[int, str] = {
|
||||
logging.DEBUG: GREY,
|
||||
logging.INFO: GREY,
|
||||
logging.WARNING: YELLOW,
|
||||
logging.ERROR: RED,
|
||||
logging.CRITICAL: BOLD_RED,
|
||||
}
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
color: str = self.COLOR_MAP.get(record.levelno, self.GREY)
|
||||
return f"{color}[{record.levelname}] {record.getMessage()}{self.RESET}"
|
||||
|
||||
|
||||
# Set up logging
|
||||
logger: logging.Logger = logging.getLogger(__name__)
|
||||
logger.setLevel(getenv("LOGLEVEL", "INFO").upper())
|
||||
color_handler: logging.StreamHandler = logging.StreamHandler()
|
||||
color_handler.setFormatter(CustomFormatter())
|
||||
logger.addHandler(color_handler)
|
||||
|
||||
|
||||
def parse_arguments() -> argparse.Namespace:
|
||||
parser: argparse.ArgumentParser = argparse.ArgumentParser(
|
||||
description="Convert our JSON blocking list to popular blocking list formats",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--inputfile",
|
||||
required=True,
|
||||
metavar="INPUT_FILE",
|
||||
help="Path to the input JSON file containing the blocking list",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--targetformat",
|
||||
required=True,
|
||||
metavar="TARGET_FORMAT",
|
||||
help="Target output format (e.g., adguard)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--outputfile",
|
||||
required=True,
|
||||
metavar="OUTPUT_FILE",
|
||||
help="Path to the output file",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def load_data(filename: str) -> dict:
|
||||
with open(filename) as file:
|
||||
return json.load(file)
|
||||
|
||||
|
||||
class UnsupportedTargetFormatError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def convert(data: dict, last_modified: datetime, target_format: str) -> str:
|
||||
match target_format:
|
||||
case "adguard":
|
||||
return adguard_conversion(last_modified, data)
|
||||
case _:
|
||||
raise UnsupportedTargetFormatError
|
||||
|
||||
|
||||
def adguard_conversion(last_modified: datetime, data: dict) -> list[str]:
|
||||
header_lines: list[str] = [
|
||||
f"! Blocking list automatically generated at {last_modified.strftime('%Y-%m-%d %H:%M:%S %Z%z')}",
|
||||
"! Created with ❤️ by internet-czas-dzialac.pl",
|
||||
]
|
||||
|
||||
output: list[str] = list(header_lines)
|
||||
|
||||
for entry in data["domains"]:
|
||||
fqdn = entry["fqdn"]
|
||||
if entry.get("exclude", False):
|
||||
continue
|
||||
output.append(f"||{fqdn}^")
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
|
||||
def dump_output(data: str, output_file: str) -> None:
|
||||
try:
|
||||
directory = os.path.dirname(output_file)
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
except OSError as e:
|
||||
logger.critical(f"Error creating directory: {e}")
|
||||
with open(output_file, "w") as file:
|
||||
file.write(data)
|
||||
|
||||
|
||||
def get_last_modified_datetime(file_path: str) -> datetime:
|
||||
timestamp: float = os.path.getmtime(file_path)
|
||||
return datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
# Start measuring time
|
||||
start_time: float = perf_counter()
|
||||
|
||||
# Parse arguments
|
||||
args: argparse.Namespace = parse_arguments()
|
||||
|
||||
# Load data
|
||||
try:
|
||||
data: dict = load_data(args.inputfile)
|
||||
last_modified: datetime = get_last_modified_datetime(args.inputfile)
|
||||
except FileNotFoundError:
|
||||
logger.error(f"File {args.inputfile} not found!")
|
||||
return
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse JSON: {e}")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.critical(f"Unexpected error occurred: {e}")
|
||||
return
|
||||
|
||||
# Convert
|
||||
try:
|
||||
output = convert(data, last_modified, args.targetformat)
|
||||
except UnsupportedTargetFormatError:
|
||||
logger.error('Unsupported format. For now only "adguard" is supported.')
|
||||
return
|
||||
|
||||
# Dump generated data
|
||||
dump_output(output, args.outputfile)
|
||||
|
||||
# Result time print
|
||||
delta: float = ceil((perf_counter() - start_time) * 1000)
|
||||
logger.info(f"Generated in {delta} ms")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
125
src/seo-source.json
Normal file
125
src/seo-source.json
Normal file
@ -0,0 +1,125 @@
|
||||
{
|
||||
"domains": [
|
||||
{
|
||||
"fqdn": "forsal.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://forsal.pl/kraj/aktualnosci/artykuly-infor-pl/9796010,czy-11052025-r-to-niedziela-handlowa-handel-bez-zakazu-zakupy-w-l.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "wiadomosci.dziennik.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://wiadomosci.dziennik.pl/wydarzenia/artykuly/9739637,czy-dzisiaj-jest-niedziela-handlowa-czy-11-maja-2025-sklepy-i-galerie-sa-otwarte-niedziele-handlowe-w-tym-roku-niedziele-handlowe-w-tym-roku-czy-teraz-jest-niedziela-handlowa-kalendarz-11052025.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "biznes.wprost.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://biznes.wprost.pl/finanse-i-inwestycje/12010876/niedziele-handlowe-2025-r-czy-dzis-zrobimy-zakupy.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "legaartis.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://legaartis.pl/blog/2025/05/11/czy-dzis-zrobisz-zakupy-sprawdzamy-czy-to-niedziela-handlowa-i-co-jest-otwarte/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "superbiz.se.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://superbiz.se.pl/wiadomosci/niedziele-handlowe-2025-kiedy-zrobimy-zakupy-sprawdz-kalendarz-na-2025-aa-PiKL-kQ7v-4dbi.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "pomorska.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://pomorska.pl/sklepy-otwarte-11-maja-czy-to-niedziela-handlowa-sprawdz-jak-pracuja-sklepy-11-05-oto-kalendarz-niedziel-handlowych-2025/ar/c1p2-27566083"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "dziendobry.tvn.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://dziendobry.tvn.pl/newsy/czy-niedziela-11-maja-2025-r-jest-handlowa-gdzie-mozna-zrobic-dzisiaj-zakupy-st7720285"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "infor.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://www.infor.pl/twoje-pieniadze/zakupy/6929697,dzis-04052025-r-otwarte-galerie-handlowe-zakupy-w-lidlu-i-biedronce-czy-tylko-w-zabce-4-maja-to-niedziela-handlowa-i-czynne-wszystkie-sklepy.html",
|
||||
"https://samorzad.infor.pl/wiadomosci/6818633,niedziele-handlowe-w-2025-r-kalendarz-terminy.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "wspanialakobieta.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://wspanialakobieta.pl/czy-modem-t-mobile-ma-simlocka/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "www.fakt.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://www.fakt.pl/pieniadze/czy-11-maja-to-niedziela-handlowa-sprawdz-czy-zrobisz-zakupy/007w6n5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "strefabiznesu.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://strefabiznesu.pl/sklepy-otwarte-11-05-czy-dzisiaj-wypada-niedziela-handlowa-gdzie-mozesz-dzis-zrobic-zakupy/ar/c3p1-22848549"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "gospodarka.dziennik.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://gospodarka.dziennik.pl/praca/artykuly/9739637,czy-9-marca-jest-niedziela-handlowa-niedziele-handlowe-2025-czy-0903-sklepy-i-galerie-beda-otwarte-niedziele-handlowe-w-marcu-2025-kalendarz.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "ekopralnie.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://www.ekopralnie.pl/w-czym-prac-mikrofibre/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "alepranie.com.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://alepranie.com.pl/jak-prac-mikrofibre/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"fqdn": "aboutdecor.pl",
|
||||
"date_added": "2025-05-12",
|
||||
"reason": "",
|
||||
"evidence": [
|
||||
"https://aboutdecor.pl/blog/jak-ze-zdjec-zrobic-kolaz-zrob-samodzielnie-kolaz-27"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user