Spaces:
Sleeping
Sleeping
shimizukawa
commited on
Commit
•
4cf5bcf
1
Parent(s):
b8a64cb
provide djangoproject special rule
Browse files- loaders/rtdhtmlpage.py +3 -1
loaders/rtdhtmlpage.py
CHANGED
@@ -56,10 +56,12 @@ class RTDHtmlPageLoader(ReadTheDocsLoader):
|
|
56 |
for p in self.file_path.rglob("*"):
|
57 |
if p.is_dir():
|
58 |
continue
|
|
|
|
|
59 |
with open(p, encoding=self.encoding, errors=self.errors) as f:
|
60 |
text, title = self._my_clean_data(f.read())
|
61 |
|
62 |
-
if p.name == "index.html":
|
63 |
# Djangoドキュメントではindex.htmlにアクセスすると404になる
|
64 |
p = p.parent
|
65 |
url = f"https://{str(p)}/"
|
|
|
56 |
for p in self.file_path.rglob("*"):
|
57 |
if p.is_dir():
|
58 |
continue
|
59 |
+
# FIXME: utf-8を指定したい
|
60 |
+
# with open(p, encoding='utf-8', errors='ignore') as f:
|
61 |
with open(p, encoding=self.encoding, errors=self.errors) as f:
|
62 |
text, title = self._my_clean_data(f.read())
|
63 |
|
64 |
+
if "docs.djangoproject.com" in p.parts and p.name == "index.html":
|
65 |
# Djangoドキュメントではindex.htmlにアクセスすると404になる
|
66 |
p = p.parent
|
67 |
url = f"https://{str(p)}/"
|