跳转至

Google

Classes

GoogleItem

Bases: BaseSearchItem

Represents a single Google search result item.

A class that processes and stores individual search result data from Google reverse image search.

Attributes:

Name Type Description
origin PyQuery

The raw PyQuery object containing the search result data.

title str

The title text of the search result.

url str

The URL link to the search result page.

thumbnail Optional[str]

Base64 encoded thumbnail image, if available.

Source code in PicImageSearch/model/google.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class GoogleItem(BaseSearchItem):
    """Represents a single Google search result item.

    A class that processes and stores individual search result data from Google reverse image search.

    Attributes:
        origin (PyQuery): The raw PyQuery object containing the search result data.
        title (str): The title text of the search result.
        url (str): The URL link to the search result page.
        thumbnail (Optional[str]): Base64 encoded thumbnail image, if available.
    """

    def __init__(self, data: PyQuery, thumbnail: Optional[str]):
        """Initializes a GoogleItem with data from a search result.

        Args:
            data (PyQuery): A PyQuery instance containing the search result item's data.
            thumbnail (Optional[str]): Optional base64 encoded thumbnail image.
        """
        super().__init__(data, thumbnail=thumbnail)

    def _parse_data(self, data: PyQuery, **kwargs: Any) -> None:
        """Parse search result data."""
        self.title: str = data("h3").text()
        self.url: str = data("a").eq(0).attr("href")
        self.thumbnail: Optional[str] = kwargs.get("thumbnail")  # type: ignore

Functions

__init__(data, thumbnail)

Initializes a GoogleItem with data from a search result.

Parameters:

Name Type Description Default
data PyQuery

A PyQuery instance containing the search result item's data.

required
thumbnail Optional[str]

Optional base64 encoded thumbnail image.

required
Source code in PicImageSearch/model/google.py
22
23
24
25
26
27
28
29
def __init__(self, data: PyQuery, thumbnail: Optional[str]):
    """Initializes a GoogleItem with data from a search result.

    Args:
        data (PyQuery): A PyQuery instance containing the search result item's data.
        thumbnail (Optional[str]): Optional base64 encoded thumbnail image.
    """
    super().__init__(data, thumbnail=thumbnail)

GoogleResponse

Bases: BaseSearchResponse[GoogleItem]

Encapsulates a Google reverse image search response.

Processes and stores the complete response from a Google reverse image search, including pagination information and individual search results.

Attributes:

Name Type Description
origin PyQuery

The raw PyQuery object containing the full response data.

page_number int

Current page number in the search results.

url str

URL of the current search result page.

pages list[str]

List of URLs for all available result pages.

raw list[GoogleItem]

List of processed search result items.

Source code in PicImageSearch/model/google.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
class GoogleResponse(BaseSearchResponse[GoogleItem]):
    """Encapsulates a Google reverse image search response.

    Processes and stores the complete response from a Google reverse image search,
    including pagination information and individual search results.

    Attributes:
        origin (PyQuery): The raw PyQuery object containing the full response data.
        page_number (int): Current page number in the search results.
        url (str): URL of the current search result page.
        pages (list[str]): List of URLs for all available result pages.
        raw (list[GoogleItem]): List of processed search result items.
    """

    def __init__(
        self,
        resp_data: str,
        resp_url: str,
        page_number: int = 1,
        pages: Optional[list[str]] = None,
    ):
        """Initializes with the response text and URL.

        Args:
            resp_data (str): The text of the response.
            resp_url (str): URL to the search result page.
            page_number (int): The current page number in the search results.
            pages (Optional[list[str]]): List of URLs to pages of search results.
        """
        super().__init__(resp_data, resp_url, page_number=page_number, pages=pages)

    def _parse_response(self, resp_data: str, **kwargs: Any) -> None:
        """Parse search response data."""
        data = parse_html(resp_data)
        self.origin: PyQuery = data
        self.page_number: int = kwargs["page_number"]

        if pages := kwargs.get("pages"):
            self.pages: list[str] = pages
        else:
            self.pages = [
                f'https://www.google.com{i.attr("href")}'
                for i in data.find('a[aria-label~="Page"]').items()
            ]
            self.pages.insert(0, kwargs["resp_url"])

        script_list = list(data.find("script").items())
        thumbnail_dict: dict[str, str] = self.create_thumbnail_dict(script_list)
        self.raw: list[GoogleItem] = [
            GoogleItem(i, thumbnail_dict.get(i('img[id^="dimg_"]').attr("id")))
            for i in data.find("#search .g").items()
        ]

    @staticmethod
    def create_thumbnail_dict(script_list: list[PyQuery]) -> dict[str, str]:
        """Creates a mapping of image IDs to their base64 encoded thumbnails.

        Processes script tags from Google's search results to extract thumbnail images
        and their corresponding IDs.

        Args:
            script_list (list[PyQuery]): List of PyQuery objects containing script elements
                from the search results page.

        Returns:
            dict[str, str]: A dictionary where:
                - Keys are image IDs (format: 'dimg_*')
                - Values are base64 encoded image strings

        Note:
            - Handles multiple image formats (jpeg, jpg, png, gif)
            - Automatically fixes escaped base64 strings by replacing '\x3d' with '='
        """
        thumbnail_dict = {}
        base_64_regex = compile(r"data:image/(?:jpeg|jpg|png|gif);base64,[^'\"]+")
        id_regex = compile(r"dimg_[^'\"]+")

        for script in script_list:
            base_64_match = base_64_regex.findall(script.text())
            if not base_64_match:
                continue

            # extract and adjust base64 encoded thumbnails
            base64: str = base_64_match[0]
            id_list: list[str] = id_regex.findall(script.text())

            for _id in id_list:
                thumbnail_dict[_id] = base64.replace(r"\x3d", "=")

        return thumbnail_dict

Functions

__init__(resp_data, resp_url, page_number=1, pages=None)

Initializes with the response text and URL.

Parameters:

Name Type Description Default
resp_data str

The text of the response.

required
resp_url str

URL to the search result page.

required
page_number int

The current page number in the search results.

1
pages Optional[list[str]]

List of URLs to pages of search results.

None
Source code in PicImageSearch/model/google.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def __init__(
    self,
    resp_data: str,
    resp_url: str,
    page_number: int = 1,
    pages: Optional[list[str]] = None,
):
    """Initializes with the response text and URL.

    Args:
        resp_data (str): The text of the response.
        resp_url (str): URL to the search result page.
        page_number (int): The current page number in the search results.
        pages (Optional[list[str]]): List of URLs to pages of search results.
    """
    super().__init__(resp_data, resp_url, page_number=page_number, pages=pages)
create_thumbnail_dict(script_list) staticmethod

Creates a mapping of image IDs to their base64 encoded thumbnails.

Processes script tags from Google's search results to extract thumbnail images and their corresponding IDs.

Parameters:

Name Type Description Default
script_list list[PyQuery]

List of PyQuery objects containing script elements from the search results page.

required

Returns:

Type Description
dict[str, str]

dict[str, str]: A dictionary where: - Keys are image IDs (format: 'dimg_*') - Values are base64 encoded image strings

Note
  • Handles multiple image formats (jpeg, jpg, png, gif)
  • Automatically fixes escaped base64 strings by replacing '=' with '='
Source code in PicImageSearch/model/google.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
@staticmethod
def create_thumbnail_dict(script_list: list[PyQuery]) -> dict[str, str]:
    """Creates a mapping of image IDs to their base64 encoded thumbnails.

    Processes script tags from Google's search results to extract thumbnail images
    and their corresponding IDs.

    Args:
        script_list (list[PyQuery]): List of PyQuery objects containing script elements
            from the search results page.

    Returns:
        dict[str, str]: A dictionary where:
            - Keys are image IDs (format: 'dimg_*')
            - Values are base64 encoded image strings

    Note:
        - Handles multiple image formats (jpeg, jpg, png, gif)
        - Automatically fixes escaped base64 strings by replacing '\x3d' with '='
    """
    thumbnail_dict = {}
    base_64_regex = compile(r"data:image/(?:jpeg|jpg|png|gif);base64,[^'\"]+")
    id_regex = compile(r"dimg_[^'\"]+")

    for script in script_list:
        base_64_match = base_64_regex.findall(script.text())
        if not base_64_match:
            continue

        # extract and adjust base64 encoded thumbnails
        base64: str = base_64_match[0]
        id_list: list[str] = id_regex.findall(script.text())

        for _id in id_list:
            thumbnail_dict[_id] = base64.replace(r"\x3d", "=")

    return thumbnail_dict

Functions