shimizukawa commited on
Commit
4cf5bcf
1 Parent(s): b8a64cb

provide djangoproject special rule

Browse files
Files changed (1) hide show
  1. loaders/rtdhtmlpage.py +3 -1
loaders/rtdhtmlpage.py CHANGED
@@ -56,10 +56,12 @@ class RTDHtmlPageLoader(ReadTheDocsLoader):
56
  for p in self.file_path.rglob("*"):
57
  if p.is_dir():
58
  continue
 
 
59
  with open(p, encoding=self.encoding, errors=self.errors) as f:
60
  text, title = self._my_clean_data(f.read())
61
 
62
- if p.name == "index.html":
63
  # Djangoドキュメントではindex.htmlにアクセスすると404になる
64
  p = p.parent
65
  url = f"https://{str(p)}/"
 
56
  for p in self.file_path.rglob("*"):
57
  if p.is_dir():
58
  continue
59
+ # FIXME: utf-8を指定したい
60
+ # with open(p, encoding='utf-8', errors='ignore') as f:
61
  with open(p, encoding=self.encoding, errors=self.errors) as f:
62
  text, title = self._my_clean_data(f.read())
63
 
64
+ if "docs.djangoproject.com" in p.parts and p.name == "index.html":
65
  # Djangoドキュメントではindex.htmlにアクセスすると404になる
66
  p = p.parent
67
  url = f"https://{str(p)}/"