Krita Source Code Documentation
Loading...
Searching...
No Matches
comics_project_translation_scraper.py
Go to the documentation of this file.
1"""
2SPDX-FileCopyrightText: 2018 Wolthera van Hövell tot Westerflier <griffinvalley@gmail.com>
3
4This file is part of the Comics Project Management Tools(CPMT).
5
6SPDX-License-Identifier: GPL-3.0-or-later
7"""
8
9"""
10A class for getting translatable strings out.
11
12This class does several things:
131) It can parse through kra files' document.xml, and then through the svgs that file is pointing at.
142) It can parse a preexisting POT file to ensure it isn't making duplicates.
153) It can write a POT file.
164) Writing to a csv file was considered until the realisation hit that comic dialog itself contains commas.
17"""
18
19import os
20import zipfile
21from xml.dom import minidom
22try:
23 from PyQt6.QtCore import QDateTime, Qt
24except:
25 from PyQt5.QtCore import QDateTime, Qt
26
27
29 projectURL = str()
30 translation_folder = str()
31 textLayerNameList = []
32 translationDict = {}
33 translationKeys = [] # separate so that the keys will be somewhat according to the order of appearance.
34 pageTitleKeys= []
35 projectName = str()
36 languageKey = "AA_language"
37
38 def __init__(self, projectURL=str(), translation_folder=str(), textLayerNameList=[], projectName=str()):
39 self.projectURL = projectURL
40 self.projectName = projectName
41 self.translation_folder = translation_folder
42 self.textLayerNameListtextLayerNameList = textLayerNameList
45
46 # Check for a preexisting translation file and parse that.
47 for entry in os.scandir(os.path.join(self.projectURL, self.translation_folder)):
48 if entry.name.endswith(projectName + '.pot') and entry.is_file():
49 self.parse_pot(os.path.join(self.projectURL, self.translation_folder, entry.name))
50 break
51
52 def start(self, pagesList, language, metaData={}):
53 if self.languageKey not in self.translationDicttranslationDict.keys():
55 for p in pagesList:
56 self.get_svg_layers(os.path.join(self.projectURL, p))
57 self.write_pot(metaData)
58
59 def parse_pot(self, location):
60 if (os.path.exists(location)):
61 file = open(location, "r", newline="", encoding="utf8")
62 multiLine = ""
63 key = None
64 entry = {}
65
66 def addEntryToTranslationDict(key, entry):
67 if len(entry.keys()) > 0:
68 if key is None:
69 key = entry.get("text", None)
70 if key is not None:
71 if len(key) > 0:
72 self.translationDicttranslationDict[key] = entry
73
74 for line in file or len(line) < 1:
75 if line.isspace():
76 addEntryToTranslationDict(key, entry)
77 entry = {}
78 key = None
79 multiLine = ""
80 if line.startswith("msgid "):
81 string = line.strip("msgid \"")
82 string = string[:-len('"\n')]
83 string = string.replace("\\\"", "\"")
84 string = string.replace("\\\'", "\'")
85 string = string.replace("\\#", "#")
86 entry["text"] = string
87 multiLine = "text"
88 if line.startswith("msgstr "):
89 string = line.strip("msgstr \"")
90 string = string[:-len('"\n')]
91 string = string.replace("\\\"", "\"")
92 string = string.replace("\\\'", "\'")
93 string = string.replace("\\#", "#")
94 entry["trans"] = string
95 multiLine = "trans"
96 if line.startswith("# "):
97 # Translator comment
98 entry["translator"] = line
99 if line.startswith("#. "):
100 entry["extract"] = line
101 if line.startswith("msgctxt "):
102 string = line.strip("msgctxt \"")
103 string = string[:-len('"\n')]
104 string = string.replace("\\\"", "\"")
105 string = string.replace("\\\'", "\'")
106 string = string.replace("\\#", "#")
107 key = string
108 if line.startswith("\"") and len(multiLine) > 0:
109 string = line[1:]
110 string = string[:-len('"\n')]
111 string = string.replace("\\\"", "\"")
112 string = string.replace("\\\'", "\'")
113 string = string.replace("\\#", "#")
114 entry[multiLine] += string
115 addEntryToTranslationDict(key, entry)
116 file.close()
117
118 def get_svg_layers(self, location):
119 page = zipfile.ZipFile(location, "a")
120 xmlroot = minidom.parseString(page.read("maindoc.xml"))
121 doc = xmlroot.documentElement
122
123 candidates = []
124
125 for member in page.namelist():
126 info = page.getinfo(member)
127 if info.filename.endswith('svg'):
128 candidates.append(info.filename)
129
130 def parseThroughChildNodes(node):
131 for childNode in node.childNodes:
132 if childNode.nodeType != minidom.Node.TEXT_NODE:
133 if childNode.tagName == "layer" and childNode.getAttribute("nodetype") == "shapelayer":
134 isTextLayer = False
136 if t in childNode.getAttribute("name"):
137 isTextLayer = True
138 if isTextLayer:
139 filename = childNode.getAttribute("filename")
140 for c in candidates:
141 if str(filename + ".shapelayer/content.svg") in c:
142 self.get_txt(page.read(c))
143 if childNode.childNodes:
144 parseThroughChildNodes(childNode)
145
146 parseThroughChildNodes(doc)
147
148 # Get page title if the keywords contain acbf_title
149 xmlroot = minidom.parseString(page.read("documentinfo.xml"))
150 dict = {}
151 def parseThroughDocumentInfo(node, dict):
152 for childNode in node.childNodes:
153 if childNode.nodeType != minidom.Node.TEXT_NODE and childNode.nodeType != minidom.Node.CDATA_SECTION_NODE:
154 if childNode.tagName == "title":
155 title = ""
156 for text in childNode.childNodes:
157 title += text.data
158 dict["title"] = title
159 elif childNode.tagName == "keyword":
160 k = ""
161 for text in childNode.childNodes:
162 k += text.data
163 keywords = k.split(",")
164 for i in range(len(keywords)):
165 keywords[i] = str(keywords[i]).strip()
166 dict["key"] = keywords
167 if childNode.childNodes:
168 parseThroughDocumentInfo(childNode, dict)
169
170 parseThroughDocumentInfo(xmlroot.documentElement, dict)
171 keywords = dict["key"]
172 if "acbf_title" in keywords:
173 self.pageTitleKeyspageTitleKeys.append(dict["title"])
174
175 page.close()
176
177 def get_txt(self, string):
178 svg = minidom.parseString(string)
179 # parse through string as if svg.
180
181 def parseThroughChildNodes(node):
182 for childNode in node.childNodes:
183 if childNode.nodeType != minidom.Node.TEXT_NODE:
184 if childNode.tagName == "text":
185 text = ""
186 for c in childNode.childNodes:
187 text += c.toxml()
188 if text not in self.translationDicttranslationDict.keys():
189 entry = {}
190 entry["text"] = text
191 self.translationDicttranslationDict[text] = entry
192 if text not in self.translationKeys:
193 self.translationKeys.append(text)
194 elif childNode.childNodes:
195 parseThroughChildNodes(childNode)
196
197 parseThroughChildNodes(svg.documentElement)
198
199 def write_pot(self, metaData):
200 quote = "\""
201 newLine = "\n"
202 location = os.path.join(self.projectURL, self.translation_folder, self.projectName + ".pot")
203 file = open(location, "w", newline="", encoding="utf8")
204
205 file.write("msgid " + quote + quote + newLine)
206 file.write("msgstr " + quote + quote + newLine)
207 date = QDateTime.currentDateTimeUtc().toString(Qt.DateFormat.ISODate)
208 file.write(quote + "POT-Creation-Date:" + date + "\\n" + quote + newLine)
209 file.write(quote + "Content-Type: text/plain; charset=UTF-8\\n" + quote + newLine)
210 file.write(quote + "Content-Transfer-Encoding: 8bit\\n" + quote + newLine)
211 file.write(quote + "X-Generator: Krita Comics Project Manager Tools Plugin\\n" + quote + newLine)
212
213 file.write(newLine)
214 file.write("#. Title of the work" + newLine)
215 file.write("msgctxt \"@meta-title\"" + newLine)
216 file.write("msgid " + quote + metaData.get("title", "") + quote + newLine)
217 file.write("msgstr " + quote + quote + newLine)
218 file.write(newLine)
219
220 file.write("#. The summary" + newLine)
221 file.write("msgctxt \"@meta-summary\"" + newLine)
222 file.write("msgid " + quote + metaData.get("summary", "") + quote + newLine)
223 file.write("msgstr " + quote + quote + newLine)
224 file.write(newLine)
225
226 file.write("#. The keywords, these need to be comma separated." + newLine)
227 file.write("msgctxt \"@meta-keywords\"" + newLine)
228 file.write("msgid " + quote + metaData.get("keywords", "") + quote + newLine)
229 file.write("msgstr " + quote + quote + newLine)
230 file.write(newLine)
231
232 file.write("#. The header that will prepend translator's notes" + newLine)
233 file.write("msgctxt \"@meta-translator\"" + newLine)
234 file.write("msgid " + quote + metaData.get("transnotes", "") + quote + newLine)
235 file.write("msgstr " + quote + quote + newLine)
236
237 for i in range(len(self.pageTitleKeyspageTitleKeys)):
238 title = self.pageTitleKeyspageTitleKeys[i]
239 file.write(newLine)
240 file.write("msgctxt " + quote + "@page-title" + quote + newLine)
241 file.write("msgid " + quote + title + quote + newLine)
242 file.write("msgstr " + quote + quote + newLine)
243
244 for key in self.translationKeys:
245 if key != self.languageKey:
246 file.write(newLine)
247 if "translComment" in self.translationDicttranslationDict[key].keys():
248 file.write("# " + self.translationDicttranslationDict[key]["translator"] + newLine)
249 if "extract" in self.translationDicttranslationDict[key].keys():
250 file.write("#. " + self.translationDicttranslationDict[key]["extract"] + newLine)
251 string = self.translationDicttranslationDict[key]["text"]
252 uniqueContext = False
253 if string != key:
254 uniqueContext = True
255 string = string.replace(quote, "\\\"")
256 string = string.replace("\'", "\\\'")
257 string = string.replace("#", "\\#")
258 if uniqueContext:
259 file.write("msgctxt " + quote + key + quote + newLine)
260 file.write("msgid " + quote + string + quote + newLine)
261 file.write("msgstr " + quote + quote + newLine)
262 file.close()
263 print("CPMT: Translations have been written to:", location)
__init__(self, projectURL=str(), translation_folder=str(), textLayerNameList=[], projectName=str())