diff --git a/src/brightdata/scrapers/api_client.py b/src/brightdata/scrapers/api_client.py index fbd2b3b..004bd7b 100644 --- a/src/brightdata/scrapers/api_client.py +++ b/src/brightdata/scrapers/api_client.py @@ -45,6 +45,7 @@ async def trigger( dataset_id: str, include_errors: bool = True, sdk_function: Optional[str] = None, + extra_params: Optional[Dict[str, str]] = None, ) -> Optional[str]: """ Trigger dataset collection and get snapshot_id. @@ -54,6 +55,7 @@ async def trigger( dataset_id: Bright Data dataset identifier include_errors: Include error records in results sdk_function: SDK function name for monitoring + extra_params: Additional query parameters (e.g., for discovery endpoints) Returns: snapshot_id if successful, None otherwise @@ -69,6 +71,9 @@ async def trigger( if sdk_function: params["sdk_function"] = sdk_function + if extra_params: + params.update(extra_params) + async with self.engine.post_to_url( self.TRIGGER_URL, json_data=payload, params=params ) as response: diff --git a/src/brightdata/scrapers/instagram/search.py b/src/brightdata/scrapers/instagram/search.py index 43b347f..3d69f4b 100644 --- a/src/brightdata/scrapers/instagram/search.py +++ b/src/brightdata/scrapers/instagram/search.py @@ -287,6 +287,7 @@ async def _discover_with_params( include_errors=True, normalize_func=None, sdk_function=sdk_function, + extra_params={"type": "discover_new", "discover_by": "url"}, ) if is_single and isinstance(result.data, list) and len(result.data) == 1: diff --git a/src/brightdata/scrapers/workflow.py b/src/brightdata/scrapers/workflow.py index ab489d5..9dc5da6 100644 --- a/src/brightdata/scrapers/workflow.py +++ b/src/brightdata/scrapers/workflow.py @@ -52,6 +52,7 @@ async def execute( include_errors: bool = True, normalize_func: Optional[Callable[[Any], Any]] = None, sdk_function: Optional[str] = None, + extra_params: Optional[Dict[str, str]] = None, ) -> ScrapeResult: """ Execute complete trigger/poll/fetch workflow. @@ -64,6 +65,7 @@ async def execute( include_errors: Include error records normalize_func: Optional function to normalize result data sdk_function: SDK function name for monitoring + extra_params: Additional query parameters (e.g., for discovery endpoints) Returns: ScrapeResult with data or error @@ -76,6 +78,7 @@ async def execute( dataset_id=dataset_id, include_errors=include_errors, sdk_function=sdk_function, + extra_params=extra_params, ) except APIError as e: return ScrapeResult( diff --git a/tests/unit/test_instagram.py b/tests/unit/test_instagram.py index b89ed9e..add5519 100644 --- a/tests/unit/test_instagram.py +++ b/tests/unit/test_instagram.py @@ -329,3 +329,79 @@ def test_can_import_from_instagram_submodule(self): assert IG.__name__ == "InstagramScraper" assert IGSearch is not None assert IGSearch.__name__ == "InstagramSearchScraper" + + +class TestInstagramDiscoverExtraParams: + """Test Instagram discover endpoints include required extra_params.""" + + def test_workflow_executor_execute_accepts_extra_params(self): + """Test WorkflowExecutor.execute accepts extra_params parameter.""" + import inspect + from brightdata.scrapers.workflow import WorkflowExecutor + + sig = inspect.signature(WorkflowExecutor.execute) + assert "extra_params" in sig.parameters + + def test_api_client_trigger_accepts_extra_params(self): + """Test DatasetAPIClient.trigger accepts extra_params parameter.""" + import inspect + from brightdata.scrapers.api_client import DatasetAPIClient + + sig = inspect.signature(DatasetAPIClient.trigger) + assert "extra_params" in sig.parameters + + def test_discover_posts_passes_extra_params(self): + """Test Instagram search posts passes discovery extra_params to workflow executor.""" + from unittest.mock import AsyncMock, patch + + scraper = InstagramSearchScraper(bearer_token="test_token_123456789") + + # Mock the workflow executor's execute method + with patch.object(scraper.workflow_executor, "execute", new_callable=AsyncMock) as mock_execute: + # Set up mock return value + from brightdata.models import ScrapeResult + + mock_execute.return_value = ScrapeResult( + success=True, + data=[{"test": "data"}], + platform="instagram", + ) + + # Call the posts method (need to run async) + import asyncio + + asyncio.run(scraper.posts(url="https://instagram.com/test")) + + # Verify execute was called with extra_params + mock_execute.assert_called_once() + call_kwargs = mock_execute.call_args.kwargs + assert "extra_params" in call_kwargs + assert call_kwargs["extra_params"] == {"type": "discover_new", "discover_by": "url"} + + def test_discover_reels_passes_extra_params(self): + """Test Instagram search reels passes discovery extra_params to workflow executor.""" + from unittest.mock import AsyncMock, patch + + scraper = InstagramSearchScraper(bearer_token="test_token_123456789") + + # Mock the workflow executor's execute method + with patch.object(scraper.workflow_executor, "execute", new_callable=AsyncMock) as mock_execute: + # Set up mock return value + from brightdata.models import ScrapeResult + + mock_execute.return_value = ScrapeResult( + success=True, + data=[{"test": "data"}], + platform="instagram", + ) + + # Call the reels method (need to run async) + import asyncio + + asyncio.run(scraper.reels(url="https://instagram.com/test")) + + # Verify execute was called with extra_params + mock_execute.assert_called_once() + call_kwargs = mock_execute.call_args.kwargs + assert "extra_params" in call_kwargs + assert call_kwargs["extra_params"] == {"type": "discover_new", "discover_by": "url"}