跳转至

Ascii2D

Attributes

BASE_URL = 'https://ascii2d.net' module-attribute

SUPPORTED_SOURCES = ['fanbox', 'fantia', 'misskey', 'pixiv', 'twitter', 'ニコニコ静画', 'ニジエ'] module-attribute

URL = namedtuple('URL', ['href', 'text']) module-attribute

Classes

Ascii2DItem

Bases: BaseSearchItem

Represents a single Ascii2D search result item.

Holds details of a result from an Ascii2D reverse image search, including image metadata, URLs, author information, and related content.

Attributes:

Name Type Description
origin PyQuery

The raw PyQuery data of the search result item.

hash str

The hash string from the search result.

detail str

Image details including dimensions, type, and size.

thumbnail str

URL of the thumbnail image.

url str

Primary URL of the webpage containing the image.

url_list list[URL]

List of related URLs with their text descriptions.

title str

Title of the image or related content.

author str

Name of the image author/creator.

author_url str

URL to the author's profile page.

Source code in PicImageSearch/model/ascii2d.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
class Ascii2DItem(BaseSearchItem):
    """Represents a single Ascii2D search result item.

    Holds details of a result from an Ascii2D reverse image search, including image metadata,
    URLs, author information, and related content.

    Attributes:
        origin (PyQuery): The raw PyQuery data of the search result item.
        hash (str): The hash string from the search result.
        detail (str): Image details including dimensions, type, and size.
        thumbnail (str): URL of the thumbnail image.
        url (str): Primary URL of the webpage containing the image.
        url_list (list[URL]): List of related URLs with their text descriptions.
        title (str): Title of the image or related content.
        author (str): Name of the image author/creator.
        author_url (str): URL to the author's profile page.
    """

    def __init__(self, data: PyQuery, **kwargs: Any) -> None:
        """Initializes an Ascii2DItem with data from a search result.

        Args:
            data (PyQuery): A PyQuery instance containing the search result item's data.
        """
        super().__init__(data, **kwargs)

    def _parse_data(self, data: PyQuery, **kwargs: Any) -> None:
        """Parses raw search result data into structured attributes.

        Extracts and processes various pieces of information from the PyQuery data,
        including hash, image details, thumbnail URL, and other metadata.

        Args:
            data (PyQuery): PyQuery object containing the search result HTML.
            **kwargs (Any): Additional keyword arguments (unused).
        """
        self.hash: str = data("div.hash").eq(0).text()
        self.detail: str = data("small").eq(0).text()
        image_source = data("img").eq(0).attr("src")
        self.thumbnail = (
            f"{BASE_URL}{image_source}"
            if image_source.startswith("/")
            else image_source
        )
        self.url_list: list[URL] = []
        self.author: str = ""
        self.author_url: str = ""
        self._arrange(data)

    def _arrange(self, data: PyQuery) -> None:
        """Organizes and processes the search result data.

        Coordinates the extraction of URLs, title, author information, and other metadata.
        Handles the normalization of URLs and sets backup links if necessary.

        Args:
            data (PyQuery): PyQuery object containing the detail box information.
        """
        if infos := data.find("div.detail-box.gray-link"):
            links = infos.find("a")
            self.url_list = (
                [URL(i.attr("href"), i.text()) for i in links.items()] if links else []
            )
            mark = next(
                (
                    small.text()
                    for small in infos("small").items()
                    if small.text() in SUPPORTED_SOURCES
                ),
                "",
            )
            self._arrange_links(infos, links, mark)
            self._arrange_title(infos)
        self._normalize_url_list()
        if not self.url_list:
            self._arrange_backup_links(data)

    def _arrange_links(self, infos: PyQuery, links: PyQuery, mark: str) -> None:
        """Processes and organizes the URLs found in the search result.

        Extracts primary URL, author URL, title, and author name based on the source type.
        Handles different link patterns based on the source platform.

        Args:
            infos (PyQuery): PyQuery object containing the detail box information.
            links (PyQuery): PyQuery object containing all URL links.
            mark (str): Source identifier string (e.g., "pixiv", "twitter").
        """
        if links:
            link_items = list(links.items())
            if len(link_items) > 1 and mark in SUPPORTED_SOURCES:
                self.title, self.url = link_items[0].text(), link_items[0].attr("href")
                self.author_url, self.author = (
                    link_items[1].attr("href"),
                    link_items[1].text(),
                )
            elif links.eq(0).parents("small"):
                infos.remove("small")
                self.title = infos.text()

    def _arrange_title(self, infos: PyQuery) -> None:
        """Extracts and processes the title from the search result.

        Handles various title formats and removes unwanted text patterns.
        Falls back to external text or h6 content if primary title is not found.

        Args:
            infos (PyQuery): PyQuery object containing the title information.
        """
        if not self.title:
            self.title = self._extract_external_text(infos) or infos.find("h6").text()
        if self.title and any(
            i in self.title for i in {"詳細掲示板のログ", "2ちゃんねるのログ"}
        ):
            self.title = ""

    @staticmethod
    def _extract_external_text(infos: PyQuery) -> str:
        """Extracts text from external elements in the search result.

        Removes link elements and combines remaining text content.

        Args:
            infos (PyQuery): PyQuery object containing external text elements.

        Returns:
            str: Combined text from external elements, or empty string if none found.
        """
        external = infos.find(".external")
        external.remove("a")
        return "\n".join(i.text() for i in external.items() if i.text()) or ""

    def _normalize_url_list(self) -> None:
        """Normalizes all URLs in the url_list to absolute paths.

        Converts relative URLs to absolute URLs by prepending the BASE_URL when necessary.
        Modifies the url_list attribute in place.
        """
        self.url_list = [
            URL(BASE_URL + url.href, url.text) if url.href.startswith("/") else url
            for url in self.url_list
        ]

    def _arrange_backup_links(self, data: PyQuery) -> None:
        """Sets backup URLs when primary URL list is empty.

        Extracts URLs from alternative locations in the HTML structure.

        Args:
            data (PyQuery): PyQuery object to search for backup links.
        """
        if links := data.find("div.pull-xs-right > a"):
            self.url = links.eq(0).attr("href")
            self.url_list = [URL(self.url, links.eq(0).text())]

Functions

__init__(data, **kwargs)

Initializes an Ascii2DItem with data from a search result.

Parameters:

Name Type Description Default
data PyQuery

A PyQuery instance containing the search result item's data.

required
Source code in PicImageSearch/model/ascii2d.py
40
41
42
43
44
45
46
def __init__(self, data: PyQuery, **kwargs: Any) -> None:
    """Initializes an Ascii2DItem with data from a search result.

    Args:
        data (PyQuery): A PyQuery instance containing the search result item's data.
    """
    super().__init__(data, **kwargs)

Ascii2DResponse

Bases: BaseSearchResponse[Ascii2DItem]

Represents a complete Ascii2D reverse image search response.

Processes and contains all search results from an Ascii2D search operation.

Attributes:

Name Type Description
origin PyQuery

The raw PyQuery data of the complete response.

raw list[Ascii2DItem]

List of processed search result items.

url str

URL of the search results page.

Source code in PicImageSearch/model/ascii2d.py
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
class Ascii2DResponse(BaseSearchResponse[Ascii2DItem]):
    """Represents a complete Ascii2D reverse image search response.

    Processes and contains all search results from an Ascii2D search operation.

    Attributes:
        origin (PyQuery): The raw PyQuery data of the complete response.
        raw (list[Ascii2DItem]): List of processed search result items.
        url (str): URL of the search results page.
    """

    def __init__(self, resp_data: str, resp_url: str, **kwargs: Any):
        """Initializes with the response text and URL.

        Args:
            resp_data (str): The data of the response.
            resp_url (str): URL to the search result page.
        """
        super().__init__(resp_data, resp_url, **kwargs)

    def _parse_response(self, resp_data: str, **kwargs: Any) -> None:
        """Parses the raw response data into structured search results.

        Converts HTML response into PyQuery object and extracts individual search items.

        Args:
            resp_data (str): Raw HTML response string from Ascii2D.
            **kwargs (Any): Additional keyword arguments (unused).
        """
        data = parse_html(resp_data)
        self.origin: PyQuery = data
        self.raw: list[Ascii2DItem] = [
            Ascii2DItem(i) for i in data.find("div.row.item-box").items()
        ]

Functions

__init__(resp_data, resp_url, **kwargs)

Initializes with the response text and URL.

Parameters:

Name Type Description Default
resp_data str

The data of the response.

required
resp_url str

URL to the search result page.

required
Source code in PicImageSearch/model/ascii2d.py
189
190
191
192
193
194
195
196
def __init__(self, resp_data: str, resp_url: str, **kwargs: Any):
    """Initializes with the response text and URL.

    Args:
        resp_data (str): The data of the response.
        resp_url (str): URL to the search result page.
    """
    super().__init__(resp_data, resp_url, **kwargs)

Functions