MediaWiki:Gadget-ocr.js
Uwaga: aby zobaczyć zmiany po opublikowaniu, może zajść potrzeba wyczyszczenia pamięci podręcznej przeglądarki.
- Firefox / Safari: Przytrzymaj Shift podczas klikania Odśwież bieżącą stronę, lub naciśnij klawisze Ctrl+F5, lub Ctrl+R (⌘-R na komputerze Mac)
- Google Chrome: Naciśnij Ctrl-Shift-R (⌘-Shift-R na komputerze Mac)
- Internet Explorer / Edge: Przytrzymaj Ctrl, jednocześnie klikając Odśwież, lub naciśnij klawisze Ctrl+F5
- Opera: Naciśnij klawisze Ctrl+F5.
/*jshint boss:true*/
/*global $, mw*/
/*
* Query an ocr for a given Page:, first try to get the hocr text layer as it's available
* for most book, fast and of a better quality. If it fails (or returns empty string for
* the page), try the older and slower ocr method. hocr fail around 1/5000 books. ocr should
* never fails as it use the image visible on the Page:.
*/
var lang = mw.config.get( 'wgContentLanguage' );
function disable_input(set)
{
if (set) {
$(document).keyup(function(e) {
if (e.which == 27) { disable_input(false); }
});
}
set ? $('#wsOcr1').off('click') : $('#wsOcr1').on('click', do_hocr);
set ? $('#wsOcr2').off('click') : $('#wsOcr1').on('click', fraktur_ocr);
$('#wpTextbox1').prop('disabled', set);
}
function ocr_callback(data) {
if (data.error) {
alert(data.text);
} else {
// Checking if tb is disabled is required with chrome as ESC doesn't kill
// the query.
var tb = document.getElementById("wpTextbox1");
if (tb.disabled)
tb.value = data.text;
}
disable_input(false);
}
function hocr_callback(data) {
if ((data.error) || (data.text.search("An error occurred during ocr processing")===0) || ($.trim($(data.text).text()) === '') ) {
// Fallback to the slow way.
disable_input(false);
do_ocr();
return;
} else {
// Checking if tb is disabled is required with chrome as ESC doesn't kill
// the query.
var tb = document.getElementById("wpTextbox1");
if (tb.disabled) {
localStorage.ws_hOCR = data.text;
var text = $(data.text).text();
// Ugly as hell.
text = text.replace(/^ +/mg, '')
.replace(/\n{4,}/g, '@_@_@_@')
.replace(/\n{2,}/g, '____SPACE____')
.replace(/\n/g, ' ')
.replace(/____SPACE____/g, '\n')
.replace(/@_@_@_@/g, '\n\n');
tb.value = $.trim(text);
}
}
disable_input(false);
}
function do_hocr() {
disable_input(true);
var request_url = '//phetools.toolforge.org/hocr_cgi.py?cmd=hocr&book='
+ encodeURIComponent(mw.config.get('wgTitle')) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName');
$.getJSON(request_url)
.done(hocr_callback)
.fail(do_ocr);
}
function do_ocr() {
if ($( '.prp-page-image img' ).length) {
disable_input(true);
// server side can't use protocol relative url, request it as https:
var url_image = 'https:' + $( '.prp-page-image img' ).attr('src');
var request_url = "//phetools.toolforge.org/ocr.php?cmd=ocr&url="+url_image+"&lang="+lang+"&user="+mw.config.get('wgUserName');
$.getJSON( request_url ).done( ocr_callback );
}
}
function fraktur_ocr()
{
lang = 'de-f';
// For fraktur we need to use the slow way, all hocr for 'de'
// are done with non-fraktur.
do_ocr();
lang = mw.config.get( 'wgContentLanguage' );
}
if ( mw.config.get( 'wgPageContentModel' ) === 'proofread-page' &&
$.inArray( mw.config.get( 'wgAction' ), [ 'edit', 'submit' ] ) !== -1 &&
!self.proofreadpage_disable_ocr
) {
$.when(
mw.loader.using( [ 'ext.gadget.lib-toolbar' ] )
).then(function() {
toolbarGadget.addButton( {
'oldIcon': "//upload.wikimedia.org/wikipedia/commons/e/e0/Button_ocr.png",
'newIcon': "//upload.wikimedia.org/wikipedia/commons/c/c9/Toolbaricon_OCR.png",
'title': "Pobierz tekst przez OCR",
'alt': "Pobierz tekst przez OCR",
'id': "wsOcr1",
'add_style': "width: 42px;",
'section': "main",
'group': "other-ocr",
'onclick': do_hocr
});
});
}