-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmikrotik-domain-filter-bash.sh
1761 lines (1483 loc) · 51 KB
/
mikrotik-domain-filter-bash.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/bash
set -e
# @license: CC BY-NC-SA 4.0 International
# @author: SMKRV
# @github: https://github.com/smkrv/mikrotik-domain-filter-script
# @source: https://github.com/smkrv/mikrotik-domain-filter-script
#
# Mikrotik Domain Filter Script is a robust Bash solution primarily designed
# for filtering and processing domain lists for Mikrotik devices, enabling
# straightforward management of blocklists or allowlists.
#
# For a detailed description, please visit the GitHub repository:
# https://github.com/smkrv/mikrotik-domain-filter-script
#
# By combining domain classification, DNS validation, and whitelist handling,
# this tool offers a comprehensive workflow to create accurate and reliable
# filtered lists, ensuring efficient network policy enforcement. It is also
# suitable for building and maintaining Adlists by returning 0.0.0.0 for
# domains serving advertisements, integrating seamlessly with DNS Static in
# Mikrotik RouterOS, and aiding in generating DNS FWD records.
# Enable debugging
# set -x
# Path settings
# Important: Verify that the following directory path is correct
readonly WORK_DIR="/home/domain-filter-mikrotik"
readonly SOURCES_FILE="${WORK_DIR}/sources.txt"
readonly SOURCESSPECIAL_FILE="${WORK_DIR}/sources_special.txt"
readonly WHITELIST_FILE="${WORK_DIR}/sources_whitelist.txt"
readonly PUBLIC_SUFFIX_FILE="${WORK_DIR}/public_suffix_list.dat"
readonly LOG_FILE="${WORK_DIR}/script.log"
# This lock file path must also be correct to avoid conflicts
readonly LOCK_FILE="/tmp/domains_update.lock"
# Temporary directories and files
readonly TMP_DIR="${WORK_DIR}/tmp"
readonly CACHE_DIR="${WORK_DIR}/cache"
# Output files
readonly OUTPUT_FILE="${WORK_DIR}/filtered_domains_mikrotik.txt"
readonly OUTPUT_FILESPECIAL="${WORK_DIR}/filtered_domains_special_mikrotik.txt"
# Load environment variables from .env file if it exists
if [[ -f "${WORK_DIR}/.env" ]]; then
# shellcheck disable=SC1090
while IFS='=' read -r key value; do
# Skip comments and empty lines
[[ $key =~ ^#.*$ ]] && continue
[[ -z "$key" ]] && continue
# Remove quotes and export variable
value="${value%\"}"
value="${value#\"}"
export "$key=$value"
done < "${WORK_DIR}/.env"
fi
# GitHub Gist settings !Use env var if available
readonly EXPORT_GISTS=${EXPORT_GISTS:-false} # Default to false if not set
readonly GITHUB_TOKEN=${GITHUB_TOKEN:-""} # GitHub access token
readonly GIST_ID_MAIN=${GIST_ID_MAIN:-""} # Gist ID for main list
readonly GIST_ID_SPECIAL=${GIST_ID_SPECIAL:-""} # Gist ID for special list
# Add state preservation
readonly STATE_DIR="${WORK_DIR}/state"
readonly PREVIOUS_STATE="${STATE_DIR}/previous_state.dat"
# Performance settings
readonly MAX_PARALLEL_JOBS=5
readonly DNS_RATE_LIMIT=5
readonly DNS_TIMEOUT=10
readonly DNS_MAX_RETRIES=3
# Export variables for parallel
export DNS_TIMEOUT
export DNS_RATE_LIMIT
export LOG_FILE
export DNS_MAX_RETRIES
# Enable debugging
# exec 2>"${WORK_DIR}/debug.log"
# Create log directory if it doesn't exist
mkdir -p "$(dirname "$LOG_FILE")"
# Clear old log
: > "$LOG_FILE"
# Check for required files
check_required_files() {
local missing_files=()
[[ ! -f "$SOURCES_FILE" ]] && missing_files+=("$SOURCES_FILE")
[[ ! -f "$SOURCESSPECIAL_FILE" ]] && missing_files+=("$SOURCESSPECIAL_FILE")
if ! [[ ${#missing_files[@]} -eq 0 ]]; then
echo "ERROR: Missing required files:"
printf '%s\n' "${missing_files[@]}"
exit 1
fi
}
# Enhanced logging
log() {
local timestamp
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "[$timestamp] $1" | tee -a "$LOG_FILE"
}
error() {
log "ERROR: $1"
echo "ERROR: $1" >&2
exit 1
}
# Lock function
acquire_lock() {
log "Attempting to acquire lock..."
# Create file descriptor for the entire script
exec 9>"$LOCK_FILE"
if ! flock -n 9; then
log "Script is already running (PID: $(cat "$LOCK_FILE" 2>/dev/null || echo 'unknown'))"
exit 1
fi
echo $$ >&9
log "Lock acquired successfully"
}
# Unlock function
release_lock() {
log "Releasing lock..."
if [[ -e /proc/$$fd/9 ]]; then
flock -u 9
exec 9>&-
rm -f "$LOCK_FILE"
else
rm -f "$LOCK_FILE"
fi
}
# Signal handling
trap 'log "Script interrupted"; trap_cleanup' INT TERM
init_directories() {
log "Initialization started..."
if ! mkdir -p "$STATE_DIR"; then
error "Failed to create state directory"
fi
if ! chmod 755 "$STATE_DIR"; then
log "WARNING: Failed to set permissions for state directory"
else
log "State directory created with correct permissions"
fi
# Create state file if doesn't exist
if [[ ! -f "${STATE_DIR}/update_state.dat" ]]; then
if ! touch "${STATE_DIR}/update_state.dat"; then
log "WARNING: Failed to create state file"
fi
if ! chmod 644 "${STATE_DIR}/update_state.dat"; then
log "WARNING: Failed to set permissions for state file"
fi
fi
# Check and create all required directories with proper permissions
local dirs=("$WORK_DIR" "$TMP_DIR" "$CACHE_DIR" "$STATE_DIR" "${TMP_DIR}/downloads")
for dir in "${dirs[@]}"; do
if [[ ! -d "$dir" ]]; then
if ! mkdir -p "$dir"; then
error "Failed to create directory ${dir}"
fi
# Set proper permissions (rwxr-xr-x)
if ! chmod 755 "$dir"; then
log "WARNING: Failed to set permissions for ${dir}"
fi
fi
if [[ ! -w "$dir" ]]; then
error "No write permissions for ${dir}"
fi
done
# Ensure proper ownership
if [[ -n "$SUDO_USER" ]]; then
if ! chown -R "$SUDO_USER:$SUDO_USER" "$TMP_DIR"; then
log "WARNING: Failed to set ownership for ${TMP_DIR}"
fi
fi
# Verify log file permissions
if [[ ! -w "$(dirname "$LOG_FILE")" ]]; then
error "No write permissions for log directory"
fi
if [[ -f "$LOG_FILE" && ! -w "$LOG_FILE" ]]; then
error "No write permissions for log file"
fi
log "Initialization completed successfully"
return 0
}
# Function to clean up temporary files
cleanup() {
log "Starting cleanup..."
# Validate directory paths
for dir in "$TMP_DIR" "$CACHE_DIR"; do
if [[ ! -d "$dir" ]]; then
log "WARNING: Directory does not exist: $dir"
continue
fi
if [[ "$dir" == "/" ]]; then
log "ERROR: Invalid directory path: $dir"
return 1
fi
done
# Clean temporary directory
if [[ -d "$TMP_DIR" && "$TMP_DIR" =~ ^${WORK_DIR}/tmp ]]; then
log "Cleaning temporary directory..."
# Create list of protected files
local protected_files=(
"*md5"
"domain_registry.*"
"previous_state.*"
"update_state.dat"
"*.backup"
)
# Build exclude pattern
local exclude_pattern
exclude_pattern=$(printf " ! -name '%s'" "${protected_files[@]}")
# Remove files
eval "find '$TMP_DIR' -type f $exclude_pattern -delete" || {
log "WARNING: Failed to clean temporary files"
}
# Remove empty directories except protected ones
find "$TMP_DIR" -type d -empty ! -name "downloads" ! -name "state" -delete 2>/dev/null || {
log "WARNING: Failed to clean empty directories"
}
else
log "WARNING: Invalid temporary directory path: $TMP_DIR"
return 1
fi
# Clean old cache files
if [[ -d "$CACHE_DIR" && "$CACHE_DIR" != "/" ]]; then
log "Cleaning old cache files..."
# Remove files older than 90 days
find "$CACHE_DIR" -type f -name "*.cache" -mtime +90 -delete 2>/dev/null || {
log "WARNING: Failed to clean old cache files"
}
# Check cache size and clean if needed
local cache_size
cache_size=$(du -sm "$CACHE_DIR" 2>/dev/null | cut -f1)
if [[ -n "$cache_size" ]] && (( cache_size > 1024 )); then
log "Cache size exceeds 1GB, cleaning oldest files..."
find "$CACHE_DIR" -type f -name "*.cache" -printf '%T@ %p\n' | \
sort -n | head -n 1000 | cut -d' ' -f2- | xargs rm -f 2>/dev/null || {
log "WARNING: Failed to clean large cache"
}
fi
fi
log "Cleanup completed"
return 0
}
cleanup_invalid_cache() {
log "Cleaning invalid cache entries..."
find "$CACHE_DIR" -type f -name "*.cache" -print0 | while IFS= read -r -d '' f; do
if ! grep -qE "^(valid|invalid)$" "$f"; then
rm -f "$f"
fi
done
}
log_cache_stats() {
local total valid invalid
total=$(find "$CACHE_DIR" -type f -name "*.cache" | wc -l)
valid=$(grep -l "^valid$" "$CACHE_DIR"/*.cache 2>/dev/null | wc -l)
invalid=$(grep -l "^invalid$" "$CACHE_DIR"/*.cache 2>/dev/null | wc -l)
log "Cache stats - Total: $total, Valid: $valid, Invalid: $invalid"
}
handle_cache_error() {
local domain=$1
log "WARNING: Cache error for domain: $domain"
rm -f "${CACHE_DIR}/${domain}.cache"
return 1
}
# Enhanced trap handling
trap_cleanup() {
local exit_code=$?
log "Caught exit signal. Performing cleanup..."
# Save current work if possible
if [[ -f "${TMP_DIR}/main_filtered.txt" ]]; then
if ! cp "${TMP_DIR}/main_filtered.txt" "${WORK_DIR}/main_filtered.backup" 2>/dev/null; then
log "WARNING: Failed to save main list backup"
fi
fi
if [[ -f "${WORK_DIR}/main_filtered.backup" ]]; then
log "Saved main list backup"
fi
if [[ -f "${TMP_DIR}/special_filtered.txt" && -s "${TMP_DIR}/special_filtered.txt" ]]; then
if ! cp "${TMP_DIR}/special_filtered.txt" "${WORK_DIR}/special_filtered.backup" 2>/dev/null; then
log "WARNING: Failed to save special list backup"
else
log "Saved special list backup"
fi
fi
cleanup
release_lock
log "Script terminated with exit code: ${exit_code}"
exit "${exit_code}"
}
save_state() {
local temp_state
temp_state="${STATE_DIR}/state_$(date +%s).tmp"
local main_md5 special_md5
log "Saving state..."
if ! [[ -d "$STATE_DIR" ]]; then
if ! mkdir -p "$STATE_DIR"; then
log "ERROR: Failed to create state directory"
return 1
fi
if ! chmod 755 "$STATE_DIR"; then
log "WARNING: Failed to set permissions for state directory"
fi
fi
# Calculate MD5 sums with error checking
main_md5=$(md5sum "$OUTPUT_FILE" 2>/dev/null)
if [[ $? -ne 0 ]]; then
log "ERROR: Failed to calculate MD5 for main list"
return 1
fi
if ! special_md5=$(md5sum "$OUTPUT_FILESPECIAL" 2>/dev/null); then
log "ERROR: Failed to calculate MD5 for special list"
return 1
fi
# Write to temporary file first
if ! echo "${main_md5}" > "$temp_state" || ! echo "${special_md5}" >> "$temp_state"; then
log "ERROR: Failed to write state data"
rm -f "$temp_state"
return 1
fi
# Set proper permissions
if ! chmod 644 "$temp_state"; then
log "WARNING: Failed to set permissions for state file"
fi
# Atomic move
if ! mv "$temp_state" "$PREVIOUS_STATE"; then
log "ERROR: Failed to update state file"
rm -f "$temp_state"
return 1
fi
log "State saved successfully"
return 0
}
# Function to handle temporary files
handle_temp_file() {
local prefix="$1"
local suffix="${2:-tmp}"
local temp_file
if ! temp_file=$(mktemp "${TMP_DIR}/${prefix}.XXXXXX.${suffix}"); then
log "ERROR: Failed to create temporary file with prefix ${prefix}"
return 1
fi
if ! chmod 644 "$temp_file"; then
log "WARNING: Failed to set permissions for ${temp_file}"
fi
echo "$temp_file"
}
# Function to check dependencies
check_dependencies() {
local deps=(curl jq awk grep parallel)
for dep in "${deps[@]}"; do
if ! command -v "$dep" >/dev/null 2>&1; then
error "Required dependency: $dep"
fi
done
}
# Function to check domain via DNS
check_domain() {
local domain=$1
local retry_count=0
local success=false
local cache_file="${CACHE_DIR}/${domain}.cache"
if [[ -f "$cache_file" ]]; then
local cache_time
cache_time=$(stat -c %Y "$cache_file")
local current_time
current_time=$(date +%s)
if (( current_time - cache_time < 7776000 )); then
local cache_status
cache_status=$(cat "$cache_file")
if [[ "$cache_status" != "valid" && "$cache_status" != "invalid" ]]; then
log "WARNING: Invalid cache entry for $domain"
rm -f "$cache_file"
else
[[ "$cache_status" == "valid" ]] && return 0
return 1
fi
fi
fi
while [[ $retry_count -lt $DNS_MAX_RETRIES && $success == false ]]; do
[[ $retry_count -gt 0 ]] && sleep 2
if curl --connect-timeout $DNS_TIMEOUT --max-time $DNS_TIMEOUT -s -f \
--header "accept: application/dns-json" \
"https://cloudflare-dns.com/dns-query?name=${domain}&type=NS" | \
grep -q '"Status":0.*"Answer":\[.*"type":2'; then
echo "valid" > "$cache_file"
success=true
return 0
fi
((retry_count++))
done
echo "invalid" > "$cache_file"
return 1
}
# Function for parallel domain checking
check_domains_parallel() {
local input=$1
local output=$2
local temp_output="${output}.tmp"
local valid_count=0
# Check if input file exists
if [[ ! -f "$input" ]]; then
log "ERROR: Input file $input does not exist"
return 1
fi
# Check write permissions for output file
if [[ ! -w "$(dirname "$output")" ]]; then
log "ERROR: No write permissions for directory $(dirname "$output")"
return 1
fi
local total
total=$(wc -l < "$input")
if [[ $total -eq 0 ]]; then
log "WARNING: Input file $input is empty"
return 1
fi
log "Starting DNS checks for: $input (total domains: $total)"
: > "$temp_output"
local current=0
local processed=0
# Create temporary file for atomic result writing
local results_file="${TMP_DIR}/dns_results_$$"
local count_file="${TMP_DIR}/valid_count_$$"
: > "$results_file"
: > "$count_file"
while IFS= read -r domain; do
(
if check_domain "$domain"; then
echo "$domain" >> "$results_file"
echo "1" >> "$count_file"
log "Domain $domain is valid"
fi
) &
((current++))
((processed++))
# Control parallel processes
if [[ $((current % MAX_PARALLEL_JOBS)) -eq 0 ]] || [[ $processed -eq $total ]]; then
wait
# Count intermediate valid domains
if [[ -f "$count_file" ]]; then
valid_count=$(wc -l < "$count_file")
fi
current=0
# Update progress every 100 domains
if [[ $((processed % 100)) -eq 0 ]] || [[ $processed -eq $total ]]; then
log "Progress: $processed out of $total (valid: $valid_count)"
fi
fi
done < "$input"
wait
# Final count of valid domains
if [[ -f "$count_file" ]]; then
valid_count=$(wc -l < "$count_file")
rm -f "$count_file"
fi
# Collect all results
if [[ -f "$results_file" ]]; then
sort -u "$results_file" > "$temp_output"
rm -f "$results_file"
if [[ -s "$temp_output" ]]; then
mv "$temp_output" "$output"
local final_count
final_count=$(wc -l < "$output")
log "DNS check completed successfully. Valid domains: $final_count"
return 0
else
log "ERROR: No valid domains after check"
rm -f "$temp_output"
return 1
fi
else
log "ERROR: Check results file not found"
return 1
fi
}
# Function to load Public Suffix List
load_public_suffix_list() {
if [[ ! -f "$PUBLIC_SUFFIX_FILE" ]] || [[ -n $(find "$PUBLIC_SUFFIX_FILE" -mtime +7 -print) ]]; then
log "Updating Public Suffix List..."
curl -sSL "https://publicsuffix.org/list/public_suffix_list.dat" | \
grep -v '^//' | grep -v '^$' > "$PUBLIC_SUFFIX_FILE"
if [[ ! -s "$PUBLIC_SUFFIX_FILE" ]]; then
error "Failed to download Public Suffix List. Check internet connection and access rights."
fi
fi
}
# Function to validate domain
validate_domain() {
local domain="$1"
[[ "$domain" =~ ^[a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,}$ ]] || return 1
[[ "$domain" =~ \.\. ]] && return 1
[[ "$domain" =~ (^|\.)-|-(\.|$) ]] && return 1
[[ ${#domain} -gt 253 ]] && return 1
return 0
}
export -f validate_domain
# Function to extract domains from various formats
extract_domains() {
local input=$1
local output=$2
local temp_output
temp_output="${TMP_DIR}/extracted_$(date +%s).tmp"
log "Extracting domains from: $input"
if ! [[ -f "$input" ]]; then
log "ERROR: Input file does not exist: $input"
return 1
fi
if ! [[ -r "$input" ]]; then
log "ERROR: Cannot read input file: $input"
return 1
fi
# Initialize temporary file with proper permissions
if ! : > "$temp_output"; then
log "ERROR: Failed to create temporary file"
return 1
fi
if ! chmod 644 "$temp_output"; then
log "WARNING: Failed to set permissions for temporary file"
fi
local processed=0
local extracted=0
while IFS= read -r line; do
((processed++))
# Skip empty lines and comments
if [[ -z "$line" ]] || [[ "$line" =~ ^[[:space:]]*# ]]; then
continue
fi
# Extract domain from different formats
if [[ "$line" =~ ^[[:space:]]*-?[[:space:]]*(DOMAIN-SUFFIX|DOMAIN|DOMAIN-KEYWORD),(.+)$ ]]; then
# Remove trailing comments and whitespace
local domain="${BASH_REMATCH[2]%%#*}"
domain=$(echo "$domain" | tr -d '[:space:]')
if validate_domain "$domain"; then
if ! echo "$domain" >> "$temp_output"; then
log "ERROR: Failed to write domain to temporary file"
rm -f "$temp_output"
return 1
fi
((extracted++))
fi
elif [[ "$line" =~ ^[a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,}$ ]]; then
if validate_domain "$line"; then
if ! echo "$line" >> "$temp_output"; then
log "ERROR: Failed to write domain to temporary file"
rm -f "$temp_output"
return 1
fi
((extracted++))
fi
fi
# Progress logging for large files
if (( processed % 10000 == 0 )); then
log "Processed $processed lines, extracted $extracted domains"
fi
done < "$input"
# Sort and deduplicate with error checking
if ! sort -u "$temp_output" > "$output"; then
log "ERROR: Failed to sort and deduplicate domains"
rm -f "$temp_output"
return 1
fi
rm -f "$temp_output"
# Validate final output
if ! [[ -s "$output" ]]; then
log "WARNING: No domains were extracted"
return 1
fi
local final_count
final_count=$(wc -l < "$output")
log "Extracted $final_count unique domains from $processed lines"
return 0
}
# Function for initial filtering
initial_filter() {
local input=$1
local output=$2
local temp_output
temp_output="${TMP_DIR}/filtered_$(date +%s).tmp"
log "Initial filtering of: $input"
if ! [[ -f "$input" ]]; then
log "ERROR: Input file does not exist: $input"
return 1
fi
if ! [[ -r "$input" ]]; then
log "ERROR: Cannot read input file: $input"
return 1
fi
# Create temporary file with proper permissions
if ! : > "$temp_output"; then
log "ERROR: Failed to create temporary file"
return 1
fi
if ! chmod 644 "$temp_output"; then
log "WARNING: Failed to set permissions for temporary file"
fi
# Multi-stage filtering with error checking
if ! grep -P '^[a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,}$' "$input" | \
grep -v '^#' | \
grep -v '^$' | \
tr '[:upper:]' '[:lower:]' | \
awk 'length <= 253' > "$temp_output"; then
log "ERROR: Domain filtering failed"
rm -f "$temp_output"
return 1
fi
# Sort and deduplicate with error checking
if ! sort -u "$temp_output" > "$output"; then
log "ERROR: Failed to sort and deduplicate domains"
rm -f "$temp_output"
return 1
fi
rm -f "$temp_output"
# Validate results
if ! [[ -s "$output" ]]; then
log "ERROR: No domains passed initial filtering"
return 1
fi
local total
total=$(wc -l < "$output")
log "Initially filtered domains: $total"
return 0
}
# Function to determine domain type
get_domain_type() {
local domain=$1
local parts
IFS='.' read -ra parts <<< "$domain"
local levels=${#parts[@]}
local base="${parts[-2]}.${parts[-1]}"
if [[ $levels -eq 2 ]]; then
echo "second"
elif grep -Fxq "$base" "$PUBLIC_SUFFIX_FILE"; then
echo "regional"
else
echo "other"
fi
}
# Function to process and classify domains
process_domains() {
local input=$1
local output_dir=$2
log "Classifying domains from: $input"
if ! [[ -f "$input" ]]; then
log "ERROR: Input file does not exist: $input"
return 1
fi
if ! mkdir -p "${output_dir}"/{second,regional,other}; then
log "ERROR: Failed to create output directories"
return 1
fi
local second_level="${output_dir}/second.txt"
local regional="${output_dir}/regional.txt"
local other="${output_dir}/other.txt"
local base_domains="${output_dir}/base_domains.tmp"
local domain_registry="${output_dir}/domain_registry.tmp"
# Initialize files with proper permissions
for file in "$second_level" "$regional" "$other" "$base_domains" "$domain_registry"; do
if ! : > "$file"; then
log "ERROR: Failed to create/clear file: $file"
return 1
fi
if ! chmod 644 "$file"; then
log "WARNING: Failed to set permissions for: $file"
fi
done
# First pass - register domains
while IFS= read -r domain; do
local parts
IFS='.' read -ra parts <<< "$domain"
local levels=${#parts[@]}
if [[ $levels -gt 4 ]]; then
domain="${parts[-4]}.${parts[-3]}.${parts[-2]}.${parts[-1]}"
levels=4
fi
if ! echo "$domain $levels" >> "$domain_registry"; then
log "ERROR: Failed to write to domain registry"
return 1
fi
done < "$input"
# Second pass - classify domains
while IFS=' ' read -r domain levels; do
local parts
IFS='.' read -ra parts <<< "$domain"
if [[ $levels -eq 2 ]]; then
if ! echo "$domain" >> "$second_level" || \
! echo "$domain" >> "$base_domains"; then
log "ERROR: Failed to write second-level domain: $domain"
return 1
fi
elif [[ $levels -eq 3 ]]; then
local base_domain="${parts[-2]}.${parts[-1]}"
if grep -Fxq "$base_domain" "$PUBLIC_SUFFIX_FILE"; then
if ! echo "$domain" >> "$regional" || \
! echo "$domain" >> "$base_domains"; then
log "ERROR: Failed to write regional domain: $domain"
return 1
fi
else
if ! grep -Fxq "$base_domain" "$second_level"; then
if ! echo "$domain" >> "$other" || \
! echo "$domain" >> "$base_domains"; then
log "ERROR: Failed to write other domain: $domain"
return 1
fi
fi
fi
elif [[ $levels -eq 4 ]]; then
local base_domain="${parts[-2]}.${parts[-1]}"
local third_level="${parts[-3]}.${parts[-2]}.${parts[-1]}"
if grep -Fxq "$base_domain" "$PUBLIC_SUFFIX_FILE"; then
if ! grep -Fxq "$third_level" "$regional"; then
if ! echo "$domain" >> "$other" || \
! echo "$domain" >> "$base_domains"; then
log "ERROR: Failed to write fourth-level domain: $domain"
return 1
fi
fi
else
if ! grep -Fxq "$base_domain" "$second_level" && \
! grep -Fxq "$third_level" "$other"; then
if ! echo "$domain" >> "$other" || \
! echo "$domain" >> "$base_domains"; then
log "ERROR: Failed to write fourth-level domain: $domain"
return 1
fi
fi
fi
fi
done < "$domain_registry"
# Sort and deduplicate with error checking
for file in "$second_level" "$regional" "$other"; do
if [[ -f "$file" ]]; then
local temp_file="${file}.tmp"
if ! sort -u "$file" > "$temp_file"; then
log "ERROR: Failed to sort file: $file"
rm -f "$temp_file"
return 1
fi
if ! mv "$temp_file" "$file"; then
log "ERROR: Failed to update sorted file: $file"
rm -f "$temp_file"
return 1
fi
fi
done
# Cleanup temporary files
rm -f "$base_domains" "$domain_registry"
# Validate results and generate statistics
local second_count=0 regional_count=0 other_count=0
if [[ -f "$second_level" ]]; then
second_count=$(wc -l < "$second_level")
fi
if [[ -f "$regional" ]]; then
regional_count=$(wc -l < "$regional")
fi
if [[ -f "$other" ]]; then
other_count=$(wc -l < "$other")
fi
log "Classification results:"
log "- Second-level domains: $second_count"
log "- Regional domains: $regional_count"
log "- Other domains: $other_count"
# Verify we have at least some results
if (( second_count + regional_count + other_count == 0 )); then
log "ERROR: No domains classified"
return 1
fi
return 0
}
# Function to prepare domains for DNS check
prepare_domains_for_dns_check() {
local input_dir=$1
local output=$2
local temp_output="${output}.tmp"
log "Preparing domains for DNS check..."
if ! [[ -d "$input_dir" ]]; then
log "ERROR: Input directory does not exist: $input_dir"
return 1
fi
# Combine files with error checking
if ! : > "$temp_output"; then
log "ERROR: Failed to create temporary output file"
return 1
fi
for file in "${input_dir}/second.txt" "${input_dir}/regional.txt"; do
if [[ -f "$file" ]]; then
if ! cat "$file" >> "$temp_output"; then
log "ERROR: Failed to append file: $file"
rm -f "$temp_output"
return 1
fi
fi
done
# Sort and deduplicate
if ! sort -u "$temp_output" > "$output"; then
log "ERROR: Failed to sort and deduplicate domains"
rm -f "$temp_output"
return 1
fi
rm -f "$temp_output"
# Validate result
if ! [[ -s "$output" ]]; then
log "ERROR: No domains prepared for DNS check"
return 1
fi
local total
total=$(wc -l < "$output")
log "Prepared $total domains for DNS check"
return 0
}
# Function to apply whitelist
apply_whitelist() {
local input=$1
local whitelist=$2
local output=$3
local temp_pattern
temp_pattern="${TMP_DIR}/whitelist_pattern_$(date +%s).tmp"
local temp_output
temp_output="${TMP_DIR}/whitelist_filtered_$(date +%s).tmp"
log "Applying whitelist to: $input"
# Validate input files
for file in "$input" "$whitelist"; do
if ! [[ -f "$file" ]]; then
log "ERROR: Required file does not exist: $file"
return 1
fi
if ! [[ -r "$file" ]]; then
log "ERROR: Cannot read file: $file"
return 1
fi
done
# Initialize temporary files
for temp_file in "$temp_pattern" "$temp_output"; do
if ! : > "$temp_file"; then
log "ERROR: Failed to create temporary file: $temp_file"
return 1
fi
if ! chmod 644 "$temp_file"; then
log "WARNING: Failed to set permissions for: $temp_file"
fi
done
# Process whitelist and create patterns
while IFS= read -r domain; do
local parts
IFS='.' read -ra parts <<< "$domain"
local levels=${#parts[@]}
case $levels in
2)
# Second-level domain
echo "^${domain}$" >> "$temp_pattern"
echo "\.${domain}$" >> "$temp_pattern"
;;