fix(config): deduplicate uniqueItems arrays before schema validation (#292)

* fix(config): deduplicate uniqueItems arrays before schema validation

When saving plugin config via the web UI, the form data is merged with
the existing stored config. If a user adds an item that already exists
(e.g. adding stock symbol "FNMA" when it's already in the list), the
merged array contains duplicates. Schemas with `uniqueItems: true`
then reject the config, making it impossible to save.

Add a recursive dedup pass that runs after normalization/filtering but
before validation. It walks the schema tree, finds arrays with the
uniqueItems constraint, and removes duplicates while preserving order.

Co-Authored-By: 5ymb01 <noreply@github.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: recurse into array items and add tests for uniqueItems dedup

Address CodeRabbit review: _dedup_unique_arrays now also recurses into
array elements whose items schema is an object, so nested uniqueItems
constraints inside arrays-of-objects are enforced.

Add 11 unit tests covering:
- flat arrays with/without duplicates
- order preservation
- arrays without uniqueItems left untouched
- nested objects (feeds.stock_symbols pattern)
- arrays of objects with inner uniqueItems arrays
- edge cases (empty array, missing keys, integers)
- real-world stock-news plugin config shape

Co-Authored-By: 5ymb01 <noreply@github.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* refactor: extract dedup_unique_arrays to shared validators module

Move _dedup_unique_arrays from an inline closure in save_plugin_config
to src/web_interface/validators.dedup_unique_arrays so tests import
and exercise the production code path instead of a duplicated copy.

Addresses CodeRabbit review: tests now validate the real function,
preventing regressions from diverging copies.

Co-Authored-By: 5ymb01 <noreply@github.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: 5ymb01 <5ymb01@users.noreply.github.com>
Co-authored-by: 5ymb01 <noreply@github.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
5ymb01
2026-03-24 15:48:26 -04:00
committed by GitHub
parent 442638dd2c
commit 31ed854d4e
3 changed files with 311 additions and 0 deletions

View File

@@ -215,3 +215,49 @@ def sanitize_plugin_config(config: dict) -> dict:
return sanitized
def dedup_unique_arrays(cfg: dict, schema_node: dict) -> None:
"""Recursively deduplicate arrays with uniqueItems constraint.
Walks the JSON Schema tree alongside the config dict and removes
duplicate entries from any array whose schema specifies
``uniqueItems: true``, preserving insertion order (first occurrence
kept). Also recurses into:
- Object properties containing nested objects or arrays
- Array elements whose ``items`` schema is an object with its own
properties (so nested uniqueItems constraints are enforced)
This is intended to run **after** form-data normalisation but
**before** JSON Schema validation, to prevent spurious validation
failures when config merging introduces duplicates (e.g. a stock
symbol already present in the saved config is submitted again from
the web form).
Args:
cfg: The plugin configuration dict to mutate in-place.
schema_node: The corresponding JSON Schema node (must contain
a ``properties`` mapping at the current level).
"""
props = schema_node.get('properties', {})
for key, prop_schema in props.items():
if key not in cfg:
continue
prop_type = prop_schema.get('type')
if prop_type == 'array' and isinstance(cfg[key], list):
# Deduplicate this array if uniqueItems is set
if prop_schema.get('uniqueItems'):
seen: list = []
for item in cfg[key]:
if item not in seen:
seen.append(item)
cfg[key] = seen
# Recurse into array elements if items schema is an object
items_schema = prop_schema.get('items', {})
if isinstance(items_schema, dict) and items_schema.get('type') == 'object':
for element in cfg[key]:
if isinstance(element, dict):
dedup_unique_arrays(element, items_schema)
elif prop_type == 'object' and isinstance(cfg[key], dict):
dedup_unique_arrays(cfg[key], prop_schema)

View File

@@ -0,0 +1,258 @@
"""Tests for dedup_unique_arrays used by save_plugin_config.
Validates that arrays with uniqueItems: true in the JSON schema have
duplicates removed before validation, preventing spurious validation
failures when form merging introduces duplicate entries.
Tests import the production function from src.web_interface.validators
to ensure they exercise the real code path.
"""
import pytest
from src.web_interface.validators import dedup_unique_arrays as _dedup_unique_arrays
class TestDedupUniqueArrays:
"""Test suite for uniqueItems array deduplication."""
def test_flat_array_with_duplicates(self) -> None:
"""Duplicates in a top-level uniqueItems array are removed."""
cfg = {"stock_symbols": ["AAPL", "GOOGL", "FNMA", "TSLA", "FNMA"]}
schema = {
"properties": {
"stock_symbols": {
"type": "array",
"uniqueItems": True,
"items": {"type": "string"},
}
}
}
_dedup_unique_arrays(cfg, schema)
assert cfg["stock_symbols"] == ["AAPL", "GOOGL", "FNMA", "TSLA"]
def test_flat_array_preserves_order(self) -> None:
"""First occurrence of each item is kept, order preserved."""
cfg = {"tags": ["b", "a", "c", "a", "b"]}
schema = {
"properties": {
"tags": {
"type": "array",
"uniqueItems": True,
"items": {"type": "string"},
}
}
}
_dedup_unique_arrays(cfg, schema)
assert cfg["tags"] == ["b", "a", "c"]
def test_no_duplicates_unchanged(self) -> None:
"""Array without duplicates is not modified."""
cfg = {"items": ["a", "b", "c"]}
schema = {
"properties": {
"items": {
"type": "array",
"uniqueItems": True,
"items": {"type": "string"},
}
}
}
_dedup_unique_arrays(cfg, schema)
assert cfg["items"] == ["a", "b", "c"]
def test_array_without_unique_items_not_deduped(self) -> None:
"""Arrays without uniqueItems constraint keep duplicates."""
cfg = {"items": ["a", "a", "b"]}
schema = {
"properties": {
"items": {
"type": "array",
"items": {"type": "string"},
}
}
}
_dedup_unique_arrays(cfg, schema)
assert cfg["items"] == ["a", "a", "b"]
def test_nested_object_with_unique_array(self) -> None:
"""Dedup works for uniqueItems arrays inside nested objects."""
cfg = {
"feeds": {
"stock_symbols": ["AAPL", "FNMA", "NVDA", "FNMA"]
}
}
schema = {
"properties": {
"feeds": {
"type": "object",
"properties": {
"stock_symbols": {
"type": "array",
"uniqueItems": True,
"items": {"type": "string"},
}
},
}
}
}
_dedup_unique_arrays(cfg, schema)
assert cfg["feeds"]["stock_symbols"] == ["AAPL", "FNMA", "NVDA"]
def test_array_of_objects_with_nested_unique_arrays(self) -> None:
"""Dedup recurses into array elements that are objects."""
cfg = {
"servers": [
{"tags": ["web", "prod", "web"]},
{"tags": ["db", "staging"]},
]
}
schema = {
"properties": {
"servers": {
"type": "array",
"items": {
"type": "object",
"properties": {
"tags": {
"type": "array",
"uniqueItems": True,
"items": {"type": "string"},
}
},
},
}
}
}
_dedup_unique_arrays(cfg, schema)
assert cfg["servers"][0]["tags"] == ["web", "prod"]
assert cfg["servers"][1]["tags"] == ["db", "staging"]
def test_missing_key_in_config_skipped(self) -> None:
"""Schema properties not present in config are silently skipped."""
cfg = {"other": "value"}
schema = {
"properties": {
"items": {
"type": "array",
"uniqueItems": True,
"items": {"type": "string"},
}
}
}
_dedup_unique_arrays(cfg, schema)
assert cfg == {"other": "value"}
def test_empty_array(self) -> None:
"""Empty arrays are handled without error."""
cfg = {"items": []}
schema = {
"properties": {
"items": {
"type": "array",
"uniqueItems": True,
"items": {"type": "string"},
}
}
}
_dedup_unique_arrays(cfg, schema)
assert cfg["items"] == []
def test_integer_duplicates(self) -> None:
"""Dedup works for non-string types (integers)."""
cfg = {"ports": [80, 443, 80, 8080, 443]}
schema = {
"properties": {
"ports": {
"type": "array",
"uniqueItems": True,
"items": {"type": "integer"},
}
}
}
_dedup_unique_arrays(cfg, schema)
assert cfg["ports"] == [80, 443, 8080]
def test_deeply_nested_objects(self) -> None:
"""Dedup works through multiple levels of nesting."""
cfg = {
"level1": {
"level2": {
"values": ["x", "y", "x"]
}
}
}
schema = {
"properties": {
"level1": {
"type": "object",
"properties": {
"level2": {
"type": "object",
"properties": {
"values": {
"type": "array",
"uniqueItems": True,
"items": {"type": "string"},
}
},
}
},
}
}
}
_dedup_unique_arrays(cfg, schema)
assert cfg["level1"]["level2"]["values"] == ["x", "y"]
def test_stock_news_real_world_schema(self) -> None:
"""End-to-end test matching the actual stock-news plugin config shape."""
cfg = {
"enabled": True,
"global": {
"display_duration": 30.0,
"scroll_speed": 1.0,
},
"feeds": {
"news_source": "google_news",
"stock_symbols": [
"AAPL", "GOOGL", "MSFT", "FNMA",
"NVDA", "TSLA", "META", "AMD", "FNMA",
],
"text_color": [0, 255, 0],
},
"display_duration": 15,
}
schema = {
"properties": {
"enabled": {"type": "boolean"},
"global": {
"type": "object",
"properties": {
"display_duration": {"type": "number"},
"scroll_speed": {"type": "number"},
},
},
"feeds": {
"type": "object",
"properties": {
"news_source": {"type": "string"},
"stock_symbols": {
"type": "array",
"uniqueItems": True,
"items": {"type": "string"},
},
"text_color": {
"type": "array",
"items": {"type": "integer"},
},
},
},
"display_duration": {"type": "number"},
}
}
_dedup_unique_arrays(cfg, schema)
assert cfg["feeds"]["stock_symbols"] == [
"AAPL", "GOOGL", "MSFT", "FNMA", "NVDA", "TSLA", "META", "AMD"
]
# text_color has no uniqueItems, should be untouched
assert cfg["feeds"]["text_color"] == [0, 255, 0]

View File

@@ -4612,6 +4612,13 @@ def save_plugin_config():
seed_value = plugin_config['rotation_settings']['random_seed']
logger.debug(f"After normalization, random_seed value: {repr(seed_value)}, type: {type(seed_value)}")
# Deduplicate arrays where schema specifies uniqueItems: true
# This prevents validation failures when form merging introduces duplicates
# (e.g., existing config has ['AAPL','FNMA'] and form adds 'FNMA' again)
if schema:
from src.web_interface.validators import dedup_unique_arrays
dedup_unique_arrays(plugin_config, schema)
# Validate configuration against schema before saving
if schema:
# Log what we're validating for debugging