Skip to content

Baidu

Classes

BaiDu

Bases: BaseSearchEngine[BaiDuResponse]

API client for the BaiDu image search engine.

Used for performing reverse image searches using BaiDu service.

Attributes:

Name Type Description
base_url str

The base URL for BaiDu searches.

Source code in PicImageSearch/engines/baidu.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
class BaiDu(BaseSearchEngine[BaiDuResponse]):
    """API client for the BaiDu image search engine.

    Used for performing reverse image searches using BaiDu service.

    Attributes:
        base_url (str): The base URL for BaiDu searches.
    """

    def __init__(self, **request_kwargs: Any):
        """Initializes a BaiDu API client with specified configurations.

        Args:
            **request_kwargs (Any): Additional arguments for network requests.
        """
        base_url = "https://graph.baidu.com"
        super().__init__(base_url, **request_kwargs)

    @staticmethod
    def _extract_card_data(data: PyQuery) -> list[dict[str, Any]]:
        """Extracts 'window.cardData' from the BaiDu search response page.

        This method parses the JavaScript content in the page to find and extract
        the 'window.cardData' object, which contains the search results.

        Args:
            data (PyQuery): A PyQuery object containing the parsed HTML page.

        Returns:
            list[dict[str, Any]]: A list of card data dictionaries, where each dictionary
                contains information about a search result. Returns an empty list if
                no card data is found.

        Note:
            The method searches for specific script tags containing 'window.cardData'
            and extracts the JSON data between the first '[' and last ']' characters.
        """
        for script in data("script").items():
            script_text = script.text()
            if script_text and "window.cardData" in script_text:
                start = script_text.find("[")
                end = script_text.rfind("]") + 1
                return json_loads(script_text[start:end])  # type: ignore
        return []

    async def search(
        self,
        url: Optional[str] = None,
        file: Union[str, bytes, Path, None] = None,
        **kwargs: Any,
    ) -> BaiDuResponse:
        """Performs a reverse image search on BaiDu.

        This method supports two ways of searching:
            1. Search by image URL
            2. Search by uploading a local image file

        The search process involves multiple steps:
            1. Upload the image or submit the URL to BaiDu
            2. Follow the returned URL to get the search results page
            3. Extract and parse the card data from the page
            4. If similar images are found, fetch the detailed results

        Args:
            url (Optional[str]): URL of the image to search.
            file (Union[str, bytes, Path, None]): Local image file, can be a path string, bytes data, or Path object.
            **kwargs (Any): Additional arguments passed to the parent class.

        Returns:
            BaiDuResponse: An object containing the search results and metadata.
                Returns empty results if no matches are found or if the 'noresult'
                card is present.

        Raises:
            ValueError: If neither `url` nor `file` is provided.

        Note:
            - Only one of `url` or `file` should be provided.
            - The search process involves multiple HTTP requests to BaiDu's API.
            - The response format varies depending on whether matches are found.
        """
        self._validate_args(url, file)

        params = {"from": "pc"}
        files: Optional[dict[str, Any]] = None

        if url:
            params["image"] = url
        elif file:
            files = {"image": read_file(file)}

        resp = await self._make_request(
            method="post",
            endpoint="upload",
            params=params,
            files=files,
        )
        data_url = deep_get(json_loads(resp.text), "data.url")
        if not data_url:
            return BaiDuResponse({}, resp.url)

        resp = await self.get(data_url)

        utf8_parser = HTMLParser(encoding="utf-8")
        data = PyQuery(fromstring(resp.text, parser=utf8_parser))
        card_data = self._extract_card_data(data)

        for card in card_data:
            if card.get("cardName") == "noresult":
                return BaiDuResponse({}, data_url)
            if card.get("cardName") == "simipic":
                next_url = card["tplData"]["firstUrl"]
                resp = await self.get(next_url)
                return BaiDuResponse(json_loads(resp.text), data_url)

        return BaiDuResponse({}, data_url)

Functions

__init__(**request_kwargs)

Initializes a BaiDu API client with specified configurations.

Parameters:

Name Type Description Default
**request_kwargs Any

Additional arguments for network requests.

{}
Source code in PicImageSearch/engines/baidu.py
22
23
24
25
26
27
28
29
def __init__(self, **request_kwargs: Any):
    """Initializes a BaiDu API client with specified configurations.

    Args:
        **request_kwargs (Any): Additional arguments for network requests.
    """
    base_url = "https://graph.baidu.com"
    super().__init__(base_url, **request_kwargs)
search(url=None, file=None, **kwargs) async

Performs a reverse image search on BaiDu.

This method supports two ways of searching
  1. Search by image URL
  2. Search by uploading a local image file
The search process involves multiple steps
  1. Upload the image or submit the URL to BaiDu
  2. Follow the returned URL to get the search results page
  3. Extract and parse the card data from the page
  4. If similar images are found, fetch the detailed results

Parameters:

Name Type Description Default
url Optional[str]

URL of the image to search.

None
file Union[str, bytes, Path, None]

Local image file, can be a path string, bytes data, or Path object.

None
**kwargs Any

Additional arguments passed to the parent class.

{}

Returns:

Name Type Description
BaiDuResponse BaiDuResponse

An object containing the search results and metadata. Returns empty results if no matches are found or if the 'noresult' card is present.

Raises:

Type Description
ValueError

If neither url nor file is provided.

Note
  • Only one of url or file should be provided.
  • The search process involves multiple HTTP requests to BaiDu's API.
  • The response format varies depending on whether matches are found.
Source code in PicImageSearch/engines/baidu.py
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
async def search(
    self,
    url: Optional[str] = None,
    file: Union[str, bytes, Path, None] = None,
    **kwargs: Any,
) -> BaiDuResponse:
    """Performs a reverse image search on BaiDu.

    This method supports two ways of searching:
        1. Search by image URL
        2. Search by uploading a local image file

    The search process involves multiple steps:
        1. Upload the image or submit the URL to BaiDu
        2. Follow the returned URL to get the search results page
        3. Extract and parse the card data from the page
        4. If similar images are found, fetch the detailed results

    Args:
        url (Optional[str]): URL of the image to search.
        file (Union[str, bytes, Path, None]): Local image file, can be a path string, bytes data, or Path object.
        **kwargs (Any): Additional arguments passed to the parent class.

    Returns:
        BaiDuResponse: An object containing the search results and metadata.
            Returns empty results if no matches are found or if the 'noresult'
            card is present.

    Raises:
        ValueError: If neither `url` nor `file` is provided.

    Note:
        - Only one of `url` or `file` should be provided.
        - The search process involves multiple HTTP requests to BaiDu's API.
        - The response format varies depending on whether matches are found.
    """
    self._validate_args(url, file)

    params = {"from": "pc"}
    files: Optional[dict[str, Any]] = None

    if url:
        params["image"] = url
    elif file:
        files = {"image": read_file(file)}

    resp = await self._make_request(
        method="post",
        endpoint="upload",
        params=params,
        files=files,
    )
    data_url = deep_get(json_loads(resp.text), "data.url")
    if not data_url:
        return BaiDuResponse({}, resp.url)

    resp = await self.get(data_url)

    utf8_parser = HTMLParser(encoding="utf-8")
    data = PyQuery(fromstring(resp.text, parser=utf8_parser))
    card_data = self._extract_card_data(data)

    for card in card_data:
        if card.get("cardName") == "noresult":
            return BaiDuResponse({}, data_url)
        if card.get("cardName") == "simipic":
            next_url = card["tplData"]["firstUrl"]
            resp = await self.get(next_url)
            return BaiDuResponse(json_loads(resp.text), data_url)

    return BaiDuResponse({}, data_url)

Functions