1 #!/usr/bin/perl -w
2
3 #
4 # Copyright 2005 John Carter and The Apache Software Foundation
5 # Copyright 2010 Tilmann Haak
6 #
7 # Licensed under the Apache License, Version 2.0 (the "License");
8 # you may not use this file except in compliance with the License.
9 # You may obtain a copy of the License at
10 #
11 # http://www.apache.org/licenses/LICENSE-2.0
12 #
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
18 #
19
20 ###############################################################################
21 #
22 # This script converts usemod markup to moinmoin markup
23 #
24 # Usage:
25 #
26 # ie:
27 # ./um2mm.pl < WikiPageToBePorted.txt > PortedWikiPage.txt
28 #
29 #
30 # http://www.therefromhere.org/software/wikiport/
31 #
32 ###############################################################################
33
34 use strict;
35
36 package UseModtoMoinMoinPort;
37
38 #use WWW::Mechanize;
39 use encoding 'utf8';
40
41 # globals
42
43 my @UpdatedPages = ();
44 my @UnchangedPages = ();
45 my @CreatedPages = ();
46 my @BrokenMarkupPages = ();
47 my @UnCreatedPages = ();
48
49 my
50 my
51 my
52 my
53 my
54
55 #my $Mech = WWW::Mechanize->new();
56
57 #
58 # read wiki text from stdin
59 my $page_source = '';
60 while (<>) {
61 $page_source .= $_;
62 }
63
64 my $ported_page = PortPage($page_source);
65
66 print $ported_page;
67
68 sub PortPage
69 {
70 #
71 # Get page $wikiName from the UseMod Wiki & convert it to MoinMoin.
72 # Returns the converted page as a scalar.
73 #
74
75 # Reset the global error report hashes:
76
77
78
79
80
81
82 my $wikiText = shift;
83
84 use Encode qw( decode FB_CROAK );
85 #$wikiText = decode('iso-8859-1', $wikiText, FB_CROAK);
86 use HTML::Entities qw( decode_entities );
87 decode_entities($wikiText);
88
89 my @wikiTextList = split(/\n/, $wikiText);
90 die "Couldn't get \$useModUrl" unless @wikiTextList;
91
92 my $lineNum = 0;
93 foreach my $line (@wikiTextList)
94 {
95 $line = UseModtoMoinMoinLine($line, $lineNum);
96 $lineNum++;
97 }
98
99 $wikiText = join ("\n", @wikiTextList);
100
101 my $brokenMarkupReport = CreateBrokenMarkupReport();
102
103 if ($brokenMarkupReport ne "")
104 {
105 push (@BrokenMarkupPages, 'wikiName');
106 }
107
108 $wikiText = $wikiText . $brokenMarkupReport . "\n"; # Add a final \n to match MoinMoin raw.
109 return $wikiText;
110 }
111
112 sub UseModtoMoinMoinLine
113 {
114 my $line = shift;
115 my $lineNum = shift;
116
117 # Detect titles - special handling needed, because UseMod allows
118 # images and links in titles, while MoinMoin doesn't.
119
120 my $isTitle = 0;
121 if ($line =~ /^\=.*\=\r$/)
122 {
123 #= Title =
124 $isTitle = 1;
125 }
126 elsif ($line =~ /^ .*:/)
127 {
128 # Subtitle:
129 $isTitle = 1;
130 }
131
132 #
133 # Basic Formatting
134 #
135
136 # toc
137 $line =~ s/[<]toc[>]/<<TableOfContents(9)>>/i;
138
139 # links
140 $line =~ s/^([=]+)(\s[#])?\s([^=]+)\s[=]+$/$1 $2 $1/g;
141
142 # line breaks
143 $line =~ s/[<]br[>]/<<BR>>/gi;
144
145 # Einfach geklammerte Links zu zweifach geklammert:
146 $line =~ s/\b(\[[^\[][^\]]+[\]])\b/[$1]/g;
147
148 # rauten in Ueberschriften
149 $line =~ s/([=][=])+ [#] /$1 /g;
150
151 # images
152 #$line =~ s/\b(http\:\\\\.*\.(jpg|gif|png))\b/{{$1}}/Ui;
153
154 # MoinMoin is more strict than UseMod about title formatting
155 #$line =~ s/^\=\=\=([^=]+)\=+(\s)+$/\=\=\=$1\=\=\=\r/;
156 #$line =~ s/^\=\=([^=]+)\=+(\s)+$/\=\=$1\=\=\r/;
157 #$line =~ s/^\=([^=]+)\=+(\s)+$/\=$1\=\r/;
158
159 $line =~ s/<\/?b>/\'\'\'/g; # bold
160 $line =~ s/<\/?strong>/\'\'\'/g; # strong=bold
161
162 $line =~ s/<\/?i>/\'\'/g; # italic
163 $line =~ s/<\/?em>/\'\'/g; # em=italic
164 $line =~ s/<\/?u>/_/g; # underline
165 $line =~ s/<\/?sup>/^/g; # superscript
166 $line =~ s/<?sub>/,,/g; # subscript
167 $line =~ s/<tt>/\{\{\{ /g; # inline_preformatted_start
168 $line =~ s/<\/tt>/ \}\}\}/g; # inline_preformatted_end
169
170 # Need to replace <nowiki> tags with ! infront of each word
171 # The following only affects single word cases
172 $line =~ s/<nowiki>(\w+)\s*<\/nowiki>/!$1/g; # strip_wiki_formatting
173
174 $line =~ s/\\$//g; # end_of_line_continuation_removal
175
176 $line =~ s/^\s*$//g; # blank line (do this before "preformatted")
177
178 $line =~ s/^ (.*)$/\{\{\{ $1 \}\}\}/g; # preformatted
179 $line =~ s/<pre>/\{\{\{ /g; # preformatted2_start
180 $line =~ s/<\/pre>/ \}\}\}/g; # preformatted2_end
181
182 # lists
183 $line =~ s/^\*\*\*/ \* /; # bullet_list_three_level
184 $line =~ s/^\*\*/ \* /; # bullet_list_two_level
185 $line =~ s/^\*/ \* /; # bullet_list_one_level
186
187 $line =~ s/^### / 1. /; # number_list_three_levels
188 $line =~ s/^## / 1. /; # number_list_two_levels
189 $line =~ s/^# / 1. /; # number_list_one_level
190
191 # definition lists.
192 #
193 # UseMod:
194 # ;;;SomeTerm: SomeDefinition
195 # MoinMoin:
196 # SomeTerm: SomeDefintion
197 #
198 # However, MoinMoin definition list's aren't much use to us, since
199 # they don't allow links in the definition title.
200 #
201 # So we replace definition lists with ul bulleted lists,
202 # and indented paragraphs.
203 #
204 # This replacement also looks for the following pattern at the start
205 # of the line: [.*][.*]
206 #
207 # This is discarded, it was previously used to add anchor points.
208 #
209
210 if (0)
211 {
212 # Proper definition lists
213 #$line =~ s/^;;;(.+):(.+)$/ $1:: $2/; # definition_three_levels
214 #$line =~ s/^;;(.+):(.+)$/ $1:: $2/; # definition_two_levels
215 #$line =~ s/^;(.+):(.+)$/ $1:: $2/; # definition_one_levels
216 }
217 else
218 {
219 $line =~ s/^;;;\s*(\[.*?\]\[.*?\])?([^:]*):(.+)$/ * $2:\n $3/; # definition_three_levels
220 $line =~ s/^;;\s*(\[.*?\]\[.*?\])?([^:]*):(.+)$/ * $2:\n $3/; # definition_two_levels
221 $line =~ s/^;\s*(\[.*?\]\[.*?\])?([^:]*):(.+)$/ * $2:\n $3/; # definition_one_level
222 }
223
224 $line =~ s/^:::([^:].*)$/ $1/; # indenting_three_levels
225 $line =~ s/^::([^:].*)$/ $1/; # indenting_two_levels
226 $line =~ s/^:([^:].*)$/ $1/; # indenting_one_level
227
228 # UseMod [#BladiBlah], MoinMoin [[Anchor(BladiBlah)]]
229
230 $line =~ s/\[\#([a-zA-Z0-9 _]+)\]/\[\[Anchor\($1\)\]\]/g; # anchors
231
232 # UseMod [[One]], MoinMoin ["One"]. Force a link to single word wikipage
233 #$line =~ s/\[\[([A-Z]+[a-z0-9]+)\]\]/\[\"$1\"\]/g; # odd links
234
235 # UseMod [/BladiBlah johoho], MoinMoin [wiki:/BlaDiBlah fancy link]
236 #$line =~ s/(?:^| )\[(\/[a-zA-Z0-9]+) ([^\]]+)\]/[:$1: $2]/g; # fancy_links_0
237
238 # UseMod [BladiBlah johoho], MoinMoin [wiki:/BlaDiBlah fancy link]
239 #$line =~ s/(?:^| )\[([A-Z]+[a-z0-9]+[A-Z]+[a-zA-Z0-9]+) ([^\]]+)\]/[:$1: $2]/g; # fancy_links_0
240
241 # UseMod [Bla di _da johoho], MoinMoin ["Bla di _da johoho"]
242 # (but not [wiki: ], and watch for [[ by a ' ' prefix
243 #$line =~ s/(?:^| )\[([^w\]\[][a-zA-Z0-9 _]+)\]/["$1"]/g; # fancy_links_0_2
244
245 # UseMod [[BlaDiBlah | fancy link]], MoinMoin [:BlaDiBlah: fancy link]
246 #$line =~ s/\[\[(\/?[a-zA-Z0-9]+) *\| *([^\]]+)\]\]/[:$1:$2]/g; # fancy_links_1
247
248 # Usemod [[bladlaslsla]], MoinMoin [" "]
249 #$line =~ s/\[\[([a-zA-Z0-9 _]+)\]\]/["$1"]/g; # fancy_links_2 # Not needed? JohnC
250
251 # Usemod [[blah]], MoinMoin ["blah"]
252 #$line =~ s/\[\[([a-zA-Z0-9 _]+)\]\]/\[\"$1\"\]/g;
253
254 # this was too strict...
255 #$line =~ s/\[\[([A-Z][a-z]+[A-Z][a-zA-Z]+) *\| ([^\]]+)\]\]/[wiki:$1 $2]/g; # fancy_links_1
256
257 # UseMod allows DDASDSaDASLeas as wiki name, Moin is more strict
258 # (watch for fancy_links_2 by looking for a ' ' prefix)
259 $line =~ s/ ([A-Z][A-Z]+[a-z0-9]+[A-Z]+[A-Za-z0-9]*)/ ["$1"]/g; # fancy_links_4
260 $line =~ s/ ([A-Z]+[a-z0-9]+[A-Z][A-Z]+[A-Za-z0-9]*)/ ["$1"]/g; # fancy_links_5
261
262 # UseMod forces links using ""link"", Moin uses ''''''link''''''
263 $line =~ s/""/''''''/g; # fancy_links_5
264
265 #
266 # Replace html entities with literals
267 #
268
269 $line =~ s/ / /g; # " "
270 $line =~ s/–/-/g; # "-"
271 $line =~ s/•/•/g; #bullet
272
273 $line =~ s/&#[xX]([A-Fa-f0-9]+);/"\&\#". hex($1) . ";"/eg; # convert any hex entities to decimal
274
275 $line =~ s/\&\#([0-9]+)\;/chr($1)/eg; # convert numerical entities to literals
276
277 #
278 # Links
279 #
280
281 if ($isTitle)
282 {
283 #
284 # Report images & links in titles
285 #
286
287 if ($line =~ /http\:.+\.(gif|png|jpg|jpeg) /)
288 {
289 $line =~ s/(.*?)(http\:.+\.)(gif|png|jpg|jpeg)(.*?)/$2$3\n$1 $4/g;
290
291 $ImageInTitle{$lineNum} = $line;
292 }
293
294 if ($line =~ /\[.*\]/)
295 {
296 $LinkInTitle{$lineNum} = $line;
297 }
298
299 if ($line =~ /[A-Z]+[a-z0-9]+[A-Z]+[a-z0-9]/)
300 {
301 $WikiNameInTitle{$lineNum} = $line;
302 }
303
304 if ($line =~ /\&\#([0-9]+)\;/)
305 {
306 $EntityInTitle{$lineNum} = $line;
307 }
308 }
309
310 if ($line =~ /\[.*\&\#([0-9]+)\;.*\]/)
311 {
312 $EntityInLink{$lineNum} = $line;
313 }
314
315 return $line;
316 }
317
318 sub CreateBrokenMarkupReport
319 {
320 # This creates a report that will be cat'd to be bottom of each
321 # MoinMoin WikiPage that needs manual work from an editor.
322
323 my $report = "";
324
325 my @imageKeys = keys (
326 my @linkKeys = keys (
327 my @wikinameKeys = keys (
328 my @entityTitleKeys = keys (
329 my @entityLinkKeys = keys (
330 if ((@imageKeys != 0)
331 || (@linkKeys != 0)
332 || (@wikinameKeys != 0)
333 || (@entityTitleKeys != 0)
334 || (@entityLinkKeys != 0))
335 {
336 $report .= "## Delete this section once the page has been fixed\n";
337 $report .= "----\n";
338 $report .= "\/!\\ The Markup on This Page Needs Fixing\n\n";
339 $report .= "This wiki page has been ported by the WikiMigrationBot, and this link to the WikiMigrationBotReport flags that this page contains wiki markup that needs fixing.\n\n";
340
341 if (@imageKeys != 0)
342 {
343 $report .= "ImageInTitle``s Moved on these lines:\n";
344 foreach my $lineNum (sort @imageKeys)
345 {
346 my $lineText = $ImageInTitle{$lineNum};
347 $lineText =~ s/\s$//g; # strip the newline
348 $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
349 }
350 $report .= "\n";
351 }
352
353 if (@linkKeys != 0)
354 {
355 $report .= "LinkInTitle``s on these lines:\n";
356 foreach my $lineNum (sort @linkKeys)
357 {
358 my $lineText = $LinkInTitle{$lineNum};
359 $lineText =~ s/\s$//g; # strip the newline
360 $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
361 }
362 $report .= "\n";
363 }
364
365 if (@wikinameKeys != 0)
366 {
367 $report .= "WikiNameInTitle``s on these lines:\n";
368 foreach my $lineNum (sort @wikinameKeys)
369 {
370 my $lineText = $WikiNameInTitle{$lineNum};
371 $lineText =~ s/\s$//g; # strip the newline
372 $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
373 }
374 $report .= "\n";
375 }
376
377 if (@entityTitleKeys != 0)
378 {
379 $report .= "EntityInTitle``s on these lines:\n";
380 foreach my $lineNum (sort @entityTitleKeys)
381 {
382 my $lineText = $EntityInTitle{$lineNum};
383 $lineText =~ s/\s$//g; # strip the newline
384 $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
385 }
386 $report .= "\n";
387 }
388
389 if (@entityLinkKeys != 0)
390 {
391 $report .= "EntityInLink``s on these lines:\n";
392 foreach my $lineNum (sort @entityLinkKeys)
393 {
394 my $lineText = $EntityInLink{$lineNum};
395 $lineText =~ s/\s$//g; # strip the newline
396 $report .= " * $lineNum \{\{\{ $lineText \}\}\}\n";
397 }
398 $report .= "\n";
399 }
400
401 $report .= "----\n";
402 $report .= "## End of section to be deleted\n";
403 }
404
405 return $report;
406 }
benutzt werden, wie sie auch in der folgenden Liste der Dateien erscheint. Es sollte
die URL des Verweises ("laden") kopiert werden, da sich diese jederzeit ändern kann und damit der Verweis auf die Datei brechen würde.