forked from eroux/luainputenc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
luainputenc.dtx
1464 lines (1306 loc) · 43.2 KB
/
luainputenc.dtx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% \iffalse meta-comment -- by the way, this file contains UTF-8
%
% Written in 2009, 2010 by Manuel Pégourié-Gonnard and Élie Roux.
%
% This work is under the CC0 license. As an exception, the files
% luainputenc.sty, lutf8.def and lutf8x.def have more restrictions.
% See these file for more details.
%
% People who helped on this package:
% Javier Bezos
% Will Robertson
% Hans Hagen
% Heiko Oberdiek
% Javier Mugica
%
% This work consists of the main source file luainputenc.dtx
% and the derived files
% luainputenc.sty, luainputenc.lua, lutf8.def, lutf8x.def,
% and luainputenc.pdf
%
% Unpacking:
% tex luainputenc.dtx
% Documentation:
% pdflatex luainputenc.dtx
%
%<*ignore>
\begingroup
\def\x{LaTeX2e}%
\expandafter\endgroup
\ifcase 0\ifx\install y1\fi\expandafter
\ifx\csname processbatchFile\endcsname\relax\else1\fi
\ifx\fmtname\x\else 1\fi\relax
\else\csname fi\endcsname
%</ignore>
%<*install>
\input docstrip.tex
\keepsilent
\askforoverwritefalse
\let\MetaPrefix\relax
\preamble
This is a generated file.
Written in 2009, 2010 by Manuel Pegourie-Gonnard and Elie Roux.
This work is under the CC0 license. As an exception, the files
luainputenc.sty, lutf8.def and lutf8x.def have more restrictions.
See these file for more details.
People who helped on this package:
Javier Bezos
Will Robertson
Hans Hagen
Heiko Oberdiek
Javier Mugica
This work consists of the main source file luainputenc.dtx
and the derived files
luainputenc.sty, luainputenc.lua, lutf8.def, lutf8x.def,
and luainputenc.pdf
\endpreamble
\let\MetaPrefix\DoubleperCent
\generate{%
\usedir{tex/lualatex/luainputenc}%
\file{luainputenc.sty}{\from{luainputenc.dtx}{package}}%
\file{lutf8.def}{\from{luainputenc.dtx}{def}}%
\file{lutf8x.def}{\from{luainputenc.dtx}{defx}}%
}
\generate{%
\usedir{doc/luatex/luainputenc}%
\file{test.tex}{\from{luainputenc.dtx}{test}}%
}
\def\MetaPrefix{-- }
\def\luapostamble{%
\MetaPrefix^^J%
\MetaPrefix\space End of File `\outFileName'.%
}
\def\currentpostamble{\luapostamble}%
\generate{%
\usedir{tex/lualatex/luainputenc}%
\file{luainputenc.lua}{\from{luainputenc.dtx}{lua}}%
}
\nopreamble
\nopostamble
\generate{%
\file{inputenc.sty.diff}{\from{luainputenc.dtx}{patch}}%
}
\obeyspaces
\Msg{************************************************************************}
\Msg{*}
\Msg{* To finish the installation you have to move the following}
\Msg{* files into a directory searched by TeX:}
\Msg{*}
\Msg{* luainputenc.sty luainputenc.lua lutf8.def lutf8x.def}
\Msg{*}
\Msg{* Happy TeXing!}
\Msg{*}
\Msg{************************************************************************}
\endbatchfile
%</install>
%<*ignore>
\fi
%</ignore>
% \iffalse
%<package>
%<package>\NeedsTeXFormat{LaTeX2e}
%<package>\ProvidesPackage{luainputenc}
%<package> [2010/11/19 v0.973 inputenc package for LuaTeX]
%<package>
% \fi
%<*driver>
\documentclass{ltxdoc}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{textcomp}
\usepackage{lmodern}
\usepackage{metalogo}
\usepackage[lmargin=5cm, textwidth=14cm]{geometry}
\usepackage[bookmarks=true, colorlinks=true]{hyperref}
\usepackage{bookmark}
\usepackage[english]{babel}
\providecommand\eTeX{e\TeX}
\newcommand\pf{\textsf}
\newcommand\file{\texorpdfstring{\nolinkurl}{}}
\newcommand\code{\texttt}
\newcommand*\email[1]{\href{mailto:#1}{#1}}
\begin{document}
\DocInput{luainputenc.dtx}%
\end{document}
%</driver>
% \fi
%
% \CheckSum{0}
%
% \CharacterTable
% {Upper-case \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z
% Lower-case \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z
% Digits \0\1\2\3\4\5\6\7\8\9
% Exclamation \! Double quote \" Hash (number) \#
% Dollar \$ Percent \% Ampersand \&
% Acute accent \' Left paren \( Right paren \)
% Asterisk \* Plus \+ Comma \,
% Minus \- Point \. Solidus \/
% Colon \: Semicolon \; Less than \<
% Equals \= Greater than \> Question mark \?
% Commercial at \@ Left bracket \[ Backslash \\
% Right bracket \] Circumflex \^ Underscore \_
% Grave accent \` Left brace \{ Vertical bar \|
% Right brace \} Tilde \~}
%
% \title{The \textsf{luainputenc} package}
% \date{2010/11/19 v0.973}
% \author{%
% Manuel P\'egouri\'e-Gonnard \email{[email protected]} \\
% \'Elie Roux \email{[email protected]}}
%
% \maketitle
%
% \begin{abstract}
% Input encoding management for Lua\TeX, needed only for compatibility with
% old documents. For new documents, using UTF-8 encoding and Unicode fonts is
% \emph{strongly} recommended. You've been warned!
% \end{abstract}
%
% \tableofcontents
%
% \section{Overview: When (not) to use this package}
%
% This package is strictly meant for compatibility. It is usefull in the two
% (overlapping) following cases:
% \begin{enumerate}
% \item Your source is not encoded in UTF-8 and you don't want to reencode it
% for some reason.
% \item Your document is using legacy 8-bit fonts (with \pf{fontenc}), as
% opposed to modern Unicode fonts (most probably with \pf{fontspec} or
% \pf{luaotfload} and \pf{fontenc} with option \code{EU2}).
% \end{enumerate}
% Surprisingly enough, in the second case \pf{luainputenc} is needed, due to
% the way \LaTeX\ implements font encodings.
%
% From the user point of view, adapting an old document for \LuaTeX\ is really
% easy: replacing \pf{inputenc} by \pf{luainputenc} in the preamble is
% enough.
%
% Note that \pf{luainputenc} automatically loads \pf{inputenc} if called
% with an old engine, so you will still be able to compile your documents with
% pdf\TeX\ without changing them.
%
% \pf{luainputenc} has several modes of operation. By default, it basically
% turns \LuaTeX\ into an 8-bit engine, which means you loose half of the
% benefits from using \LuaTeX. If you are using only Unicode fonts, you can
% activate a nicer mode of operation using the \code{unactivate} package
% option. That way, \LuaTeX\ remains a true Unicode engine.
%
% Unicode fonts with \LuaTeX\ are handled using a new encoding: \code{EU2}. It
% is used internally by the \pf{fontspec} package when loading Unicode fonts.
% This encoding is special as it needs non-ASCII characters to be non-active
% (unlike other font encodings), so you cannot mix old encodings and EU2. If
% you're using only Unicode fonts, this isn't a problem: use the
% \code{unactivate} package option mentioned in the previous paragraph.
%
% But if you
% want to use both 8-bit fonts and Unicode fonts in your document, you need to
% use another package option, \code{lutf8x}. This option
% overrides \LaTeX's mechanism for font encoding switching, so that it
% (un)activates non-ASCII characters on-the-fly. With this options, you'll be
% able change the font encoding from/to \code{EU2}, for example:
%
% \begin{verbatim}
% abc
% {
% \fontencoding{EU2}\usefont
% \font\foo="MyOtfFont.otf"\foo
% abc
% }
% abc
% \end{verbatim}
%
% \section{Documentation}
%
% \subsection{Introduction}
%
% One the the most interesting new features of Lua\TeX\ is the fact that it is
% (like Omega/Aleph) not limited to 256 characters, and can now understand
% Unicode. The problem is that it does not read input the way older engines
% (like pdf\TeX) do, and thus \textsf{inputenc} is totally broken with Lua\TeX
% . This package aims at replacing \textsf{inputenc} for Lua\TeX , by adapting
% the way Lua\TeX\ handles input, and the way \textsf{inputenc} handles UTF-8.
% This package has two very distinct modes: 8-bit and UTF-8.
%
% \subsection{Overview of 8-bit mode}
%
% This package \textbf{does not} map 8-bit encodings to utf8. It allows
% Lua\TeX\ to read 8-bit characters, by converting each byte into a unicode
% character with the same character number. The resulting unicode characters
% are not true UTF-8, they are what we will call ``fake UTF-8". For example
% the byte 225 will be converted into the unicode character with number 225
% (two bytes long). It will be true UTF-8 only if the encoding is latin1.
%
% Here is how it works: the 8-bit encodings are converted into fake UTF-8, so
% that the corresponding tokens are chars with the good numbers. Then (like
% \textsf{inputenc}) it reads the char numbers, and converts it into LICR
% (\LaTeX\ Internal Character Representation), with the font encoding.
%
% In Lua\TeX\ version 0.43, a new callback called |process_output_buffer|,
% this callbacks allows to make Lua\TeX\ write 8-bit instead of UTF-8, so the
% behaviour is the same as pdfTeX as this level. For versions prior to 0.43
% though, we need to do more tricky things, described in the next paragraph.
% This machinery is disabled for Lua\TeX\ version 0.43 and superior, so you
% can keep the default behaviour, which will be compatible with pdfTeX in
% most cases, but you can consider the machinery obsolete.
%
% For these old versions, \textsf{luainputenc} only changes the input
% behaviour, it does not change the ouput behaviour (when files are written
% for example). The consequence is that files will still be written by
% Lua\TeX\ in UTF-8 (fake UTF-8 in this case), even if the asked input
% encoding is a 8-bit encoding. In most cases it's not a problem, as most
% files will be written in LICR, meaning ASCII, which is both 8-bit and UTF-8.
% The problem comes when characters with a number $>$ 128 are written in a
% 8-bit encoding. This may happen if you use |\protect| in a section for
% example. In these cases, Lua\TeX\ will write fake UTF-8, and try to read
% 8-bit encoding, so it will get confused.
%
% The proposed solution is to unactivate the input conversion when we read
% certain files or extentions. This package should work with no change for
% most documents, but if you cook your own aux files with an unknown
% extention, you may have to force the package to read some files in UTF-8
% instead of 8-bit. See comments in the \texttt{.sty} file to know the useful
% commands.
%
% \subsection{Overview of UTF-8 mode}
%
% The behaviour of \textsf{inputenc} in utf8 mode is to read the input byte by
% byte, and decide if the character we are in is 1, 2, 3 or 4 bytes long, and
% then read other bytes accordingly. This behaviour fails with Lua\TeX\
% because it reads input character by character (characters do not have a
% fixed number of bytes in unicode). The result is thus an error.
%
% All characters recognized by \TeX\ are active characters, that correspond to
% a LICR macro. Then \textsf{inputenc} reads the \texttt{*.dfu} files that
% contain the correspondance between these LICR macros and a character number
% in the fonts for different font encodings (T1, OT1, etc.).
%
% \subsubsection{legacy mode}
%
% \textsf{luainputenc} can get this behaviour (we will call it \emph{legacy
% mode}, but another difference implied by the fact that Lua\TeX\ can read
% more than 256 characters is that fonts can also have more than 256
% characters. Lua\TeX\ can thus read unicode fonts. If we want to use unicode
% fonts (OTF for example), we can't use the \emph{legacy mode} anymore, as it
% would mean that we would have to rewrite a specially long
% \texttt{unicode.dfu} file, and it would be totally inefficient, as for
% instance \texttt{\'e} (unicode character number 233) would be mapped to
% \texttt{\string\'e}, and then mapped back to \texttt{\string\char\ 233}.
%
% \subsubsection{unicode font mode}
%
% To fix this, the most simple solution is to desactivate all activated
% characters, thus typing \texttt{\'e} will directly call
% \texttt{\string\char\ 233} in the unicode fonts, and produce a \texttt{\'e}.
% We will call this behaviour the \emph{unicode font mode}. To enable this
% mode, you can use the option \texttt{unactivate} in \textsf{luainputenc},
% and you must use the font encoding \texttt{EU2} provided by the \textsf{euenc}
% package. See documentation of \textsf{euenc} package for more details about
% \texttt{EU2}. To use this mode with \texttt{EU2}, you must be able to open OTF
% fonts. A simple way to do so it by using the package \textsf{luaotfload}.
%
% \subsubsection{mixed mode}
%
% But the \emph{unicode font mode} has a strong limitation (that will
% certainly dissapear with time): it cannot use non-unicode fonts. If you want
% to mix unicode fonts and old fonts, you'll have to use the \emph{mixed
% mode}. In this mode you can type some parts of your document in \emph{legacy
% mode} and some in \emph{unicode font mode}. The reason why we chose not to
% integrate this choice in the \emph{legacy mode} is that we wanted to have a
% mode that preserved most of the backward compatibility, to safely compile
% old documents; the \emph{mixed mode} introduces new things that may break
% old documents. To get the \emph{mixed mode}, you must pass the option
% \texttt{lutf8x} to \textsf{luainputenc}. This mode is the most experimental.
%
% \section{Accessing the encoding in lua}
%
% In order to access the encoding and the package option in lua, two variables
% are set: |luainputenc.package_option| contains the option passed to the
% package, and |luainputenc.encoding| that contains the encoding (defaults to
% utf8, and is utf8 even with the options |unactivate|, |utf8x|, etc.).
%
% \section{Files}
%
% This package contains a \texttt{.sty} file for both \LaTeX\ and Plain, a
% patch for inputenc to use \textsf{luainputenc} so that you can process old
% documents without changing anything, and the lua functions.
%
% \subsection{\texttt{inputenc.sty} patch}
%
% A good thing would be to patch \textsf{inputenc} to load
% \textsf{luainputenc} instead, so that you don't have to change your
% documents to load \textsf{luainputenc} especially. The \LaTeX\ team is
% extremely conservative and does not want this patch applied (maybe we will
% find a solution later). Here is a patch for inputenc.sty:
%
% \iffalse
%<*patch>
% \fi
%
% \begin{macrocode}
\ifnum\@tempcnta<`#2\relax
\advance\@tempcnta\@ne
\repeat}
+
+\begingroup\expandafter\expandafter\expandafter\endgroup
+\expandafter\ifx\csname XeTeXversion\endcsname\relax\else
+ \RequirePackage{xetex-inputenc}
+ \DeclareOption*{\PassOptionsToPackage{\CurrentOption}{xetex-inputenc}}
+ \ProcessOptions*
+ \expandafter\endinput
+\fi
+\begingroup\expandafter\expandafter\expandafter\endgroup
+\expandafter\ifx\csname directlua\endcsname\relax\else
+ \RequirePackage{luainputenc}
+ \DeclareOption*{\PassOptionsToPackage{\CurrentOption}{luainputenc}}
+ \ProcessOptions*
+ \expandafter\endinput
+\fi
+
\ProcessOptions
\endinput
%%
% \end{macrocode}
%
% \iffalse
%</patch>
% \fi
%
% \subsection{\texttt{luainputenc.sty}}
%
% This file has some code from \texttt{inputenc.sty}, but also provides new
% options, and new macros to convert from 8-bit to fake UTF-8.
%
% \iffalse
%<*package>
% \fi
%
% \begin{macrocode}
%
%% This file was adapted from inputenc.sty, which copyright is:
%% Copyright 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004
%% 2005 2006 The LaTeX3 Project.
%%
%% inputenc.sty is under the lppl version 1.3c or later, and can be
%% found in the base LaTeX system.
%%
%% The lppl can be found at http://www.latex-project.org/lppl.txt
%%
%% The changes to inputenc.sty are Copyright 2009 Elie Roux, and are
%% under the CC0 license.
%%
%% The changes are LuaTeX support.
%%
%% This file is distributed under the CC0 license, with clause 6 of the
%% lppl as additional restrictions.
% \end{macrocode}
%
% First we check if we are called with Lua\TeX , (pdf)\TeX or Xe\TeX . If
% we are called with pdf\TeX , we default to \textsf{inputenc}, and to
% \textsf{xetex-inputenc} if we are called with Xe\TeX . We also remap the
% new options to \texttt{utf8} in these cases.
%
% \begin{macrocode}
\RequirePackage{ifluatex}
\RequirePackage{ifxetex}
\ifxetex
\DeclareOption{unactivate}{\PassOptionsToPackage{utf8}{xetex-inputenc}}
\DeclareOption{lutf8}{\PassOptionsToPackage{utf8}{xetex-inputenc}}
\DeclareOption{lutf8x}{\PassOptionsToPackage{utf8}{xetex-inputenc}}
\DeclareOption*{\PassOptionsToPackage{\CurrentOption}{xetex-inputenc}}
\ProcessOptions*
\RequirePackage{xetex-inputenc}
\expandafter\endinput
\fi
\ifluatex\else
\DeclareOption{unactivate}{\PassOptionsToPackage{utf8}{inputenc}}
\DeclareOption{lutf8}{\PassOptionsToPackage{utf8}{inputenc}}
\DeclareOption{lutf8x}{\PassOptionsToPackage{utf8}{inputenc}}
\DeclareOption*{\PassOptionsToPackage{\CurrentOption}{inputenc}}
\ProcessOptions*
\RequirePackage{inputenc}
\expandafter\endinput
\fi
% \end{macrocode}
%
% Here we know we are called with Lua\TeX . We first require
% \textsf{luatextra} and ensure a few primitives, then we load the
% \texttt{lua} file.
%
% \begin{macrocode}
\RequirePackage{luatexbase}
\luatexbase@ensure@primitive{luaescapestring}
\RequireLuaModule{luainputenc}
% \end{macrocode}
%
% Here is some code from \textsf{inputenc}.
%
% \begin{macrocode}
\def\DeclareInputMath#1{%
\@inpenc@test
\bgroup
\uccode`\~#1%
\uppercase{%
\egroup
\def~%
}%
}
\def\DeclareInputText#1#2{%
\def\reserved@a##1 ${}%
\def\reserved@b{#2}%
\ifcat_\expandafter\reserved@a\meaning\reserved@b$ $_%
\DeclareInputMath{#1}{#2}%
\else
\DeclareInputMath{#1}{\IeC{#2}}%
\fi
}
\def\IeC{%
\ifx\protect\@typeset@protect
\expandafter\@firstofone
\else
\noexpand\IeC
\fi
}
% \end{macrocode}
%
% We changed a little the behaviour of this macro: we removed
% |\@inpenc@loop\^^?\^^ff|, because it made no sense in UTF-8 mode. We will
% call this line for 8-bit encodings.
%
% Note that the code has been changed for |\endlinechar|, because in new
% versions (from v0.43) of Lua\TeX{} the value cannot exceed 127.
% Thus, with the old version of \textsf{luainputenc}, when trying
% to add 10000, it fails silently, and when 10000 is substracted, the
% new value is -1, resulting in no end of lines at all in the document.
%
% \begin{macrocode}
\def\inputencoding#1{%
\the\inpenc@prehook
\gdef\@inpenc@test{\global\let\@inpenc@test\relax}%
\edef\@inpenc@undefined{\noexpand\@inpenc@undefined@{#1}}%
\edef\inputencodingname{#1}%
\@inpenc@loop\^^A\^^H%
\@inpenc@loop\^^K\^^K%
\@inpenc@loop\^^N\^^_%
\xdef\saved@endlinechar{\the\endlinechar }%
\endlinechar=-1
\xdef\saved@space@catcode{\the\catcode`\ }%
\catcode`\ 9\relax
\input{#1.def}%
\endlinechar=\saved@endlinechar{}%
\catcode`\ \saved@space@catcode\relax
\ifx\@inpenc@test\relax\else
\PackageWarning{inputenc}%
{No characters defined\MessageBreak
by input encoding change to `#1'\MessageBreak}%
\fi
\the\inpenc@posthook
\luatexbase@directlua{luainputenc.set_option("\luatexluaescapestring{#1}")}
}
\newtoks\inpenc@prehook
\newtoks\inpenc@posthook
\def\@inpenc@undefined@#1{\PackageError{inputenc}%
{Keyboard character used is undefined\MessageBreak
in inputencoding `#1'}%
{You need to provide a definition with
\noexpand\DeclareInputText\MessageBreak or
\noexpand\DeclareInputMath before using this key.}}%
\def\@inpenc@loop#1#2{%
\@tempcnta`#1\relax
\loop
\catcode\@tempcnta\active
\bgroup
\uccode`\~\@tempcnta
\uppercase{%
\egroup
\let~\inpenc@undefined
}%
\ifnum\@tempcnta<`#2\relax
\advance\@tempcnta\@ne
\repeat}
% \end{macrocode}
%
% Here we declare our options. Note that we remap \texttt{utf8} to
% \texttt{lutf8}, because we use out \texttt{lutf8.def} instead of
% \textsf{inputenc}'s \texttt{utf8.def}.
%
% \begin{macrocode}
\DeclareOption{utf8}{%
\inputencoding{lutf8}%
}
\DeclareOption{lutf8}{%
\inputencoding{lutf8}%
}
\DeclareOption{utf8x}{%
\inputencoding{lutf8}%
}
\DeclareOption{lutf8x}{%
\inputencoding{lutf8x}%
}
% \end{macrocode}
%
% For the \texttt{unactivate} option, for \emph{unicode font mode}, we just
% don't do anything.
%
% \begin{macrocode}
\DeclareOption{unactivate}{%
\edef\inputencodingname{unactivate}%
\luatexbase@directlua{luainputenc.set_option([[unactivate]])}
}
% \end{macrocode}
%
% All other options are 8-bit encodings, so we activate the translation
% into fake UTF-8, and we execute the loop we removes from
% |\inputencoding|.
%
% \begin{macrocode}
\DeclareOption*{%
\lIE@activate %
\@inpenc@loop\^^?\^^ff%
\inputencoding{\CurrentOption}%
}
% \end{macrocode}
%
% The rest of the file is only the machinery for LuaTeX versions without
% the callback |process_output_buffer|, so it will be deprecated after
% TeXLive 2009, you are not advised to use it.
%
% \begin{macrocode}
\ifnum\luatexversion>42
\newcommand*{\lIE@activate}[0]{%
\luatexbase@directlua{luainputenc.register_callbacks()}%
}
\else
% \end{macrocode}
%
% |\lIE@setstarted| and |\lIE@setstopped| are called when the fake UTF-8
% translation must be activated or desactivated. You can call them several
% successive times. They are called very often, even if the package is not
% activated (for example if it's loaded with the utf8 option), but they act
% only if the package is activated.
%
% \begin{macrocode}
\newcommand*\lIE@setstarted[0]{%
\ifnum\lIE@activated=1 %
\luatexbase@directlua{luainputenc.setstarted()}%
\fi %
}
\newcommand*\lIE@setstopped[0]{%
\ifnum\lIE@activated=1 %
\luatexbase@directlua{luainputenc.setstopped()}%
\fi %
}
% \end{macrocode}
%
% The following 5 macros are made to declare a file that will have to be
% read in fake UTF-8 and not in 8-bit. These files are the ones that will
% be generated by \TeX . In \textbf{no way} this means you can include true
% UTF-8 files, it means that you can include files that have been written
% by Lua\TeX\ with \textsf{luainputenc}, which means files in fake UTF-8.
% The macros are very simple, when you call them with a file name (the same
% as the one you will use with \string\input ), it will read it with or
% without the fake UTF-8 translation. This package includes a whole bunch
% of extentions that will be read in fake UTF-8, so the occasions to use
% these macros will be rare, but if you use them, please report it to the
% package maintainer.
%
% \begin{macro}{\lIE@SetUtfFile}
%
% If you call this macro with a file name, each time you will input this
% file, it will be read in fake UTF-8. You can call it with a file that you
% generate with Lua\TeX\ and that you want to include.
%
% \begin{macrocode}
\newcommand*\lIE@SetUtfFile[1]{%
\luatexbase@directlua{luainputenc.set_unicode_file("\luatexluaescapestring{#1}")}%
}
% \end{macrocode}
%
% \end{macro}
%
% \begin{macro}{\lIE@SetNonUtfFile}
%
% Same as the previous macro, except that the file will be read as 8-bit.
% This macro is useful if there is an exception in an extention (see
% further comments).
%
% \begin{macrocode}
\newcommand*\lIE@SetNonUtfFile[1]{%
\luatexbase@directlua{luainputenc.set_non_unicode_file("\luatexluaescapestring{#1}")}%
}
% \end{macrocode}
%
% \end{macro}
%
% \begin{macro}{\lIE@UnsetFile}
%
% This macro gives a file the default behaviour of its extention.
%
% \begin{macrocode}
\newcommand*\lIE@UnsetFile[1]{%
\luatexbase@directlua{luainputenc.unset_file("\luatexluaescapestring{#1}")}%
}
% \end{macrocode}
%
% \end{macro}
%
% \begin{macro}{\lIE@SetUtfExt}
%
% You can tell \textsf{luainputenc} to treat all files with a particular
% extention in a certain way. The way the file extention is checked is to
% compare the four last characters of the filename. So if your extention
% has only three letters, you must include the preceding dot. This macro
% tells \textsf{luainputenc} to read all files from an extention in fake
% UTF-8.
%
% \begin{macrocode}
\newcommand*\lIE@SetUtfExt[1]{%
\luatexbase@directlua{luainputenc.set_unicode_extention("\luatexluaescapestring{#1}")}%
}
% \end{macrocode}
%
% \end{macro}
%
% \begin{macro}{\lIE@SetUtfExt}
%
% Same as before, but the files will be read in 8-bit.
%
% \begin{macrocode}
\newcommand*\lIE@SetNonUtfExt[1]{
\luatexbase@directlua{luainputenc.set_non_unicode_extention("\luatexluaescapestring{#1}")}
}
% \end{macrocode}
%
% \end{macro}
%
% \begin{macro}{\lIE@InputUtfFile}
%
% This macro inputs a file in fake UTF-8. It has the ``feature" to unset
% the behaviour on the file you will call, so to be safe, you must call
% them with files for which the behaviour has not been set.
%
% \begin{macrocode}
\newcommand*\lIE@InputUtfFile[1]{%
\lIE@SetUtfFile{#1}%
\input #1%
\lIE@UnsetFile{#1}%
}
% \end{macrocode}
%
% \end{macro}
%
% \begin{macro}{\lIE@InputNonUtfFile}
%
% Same as before, but to read a file as 8-bit.
%
% \begin{macrocode}
\newcommand*\lIE@InputNonUtfFile[1]{%
\lIE@SetNonUtfFile{#1}%
\input #1%
\lIE@UnsetFile{#1}%
}
% \end{macrocode}
%
% \end{macro}
%
% Two definitions to put the previous two macros in the user space.
%
% \begin{macrocode}
\newcommand*\InputUtfFile[1]{%
\lIE@InputUtfFile{#1}%
}
\newcommand*\InputNonUtfFile[1]{%
\lIE@InputNonUtfFile{#1}%
}
\newcount\lIE@activated
\newcommand*{\lIE@activate}[0]{%
\lIE@activated=1 %
\lIE@setstarted %
}
\newcommand*{\lIE@FromInputenc}[1]{%
\ifnum\lIE@activated=0 %
\lIE@activate %
\fi%
}
\fi
\ProcessOptions*
% \end{macrocode}
%
% \iffalse
%</package>
% \fi
%
% \iffalse
%<*def>
% \fi
%
% \subsection{\texttt{lutf8.def}}
%
% \begin{macrocode}
%% This file was adapted from utf8.def, which copyright is:
%% Copyright 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
%% 2004 2005 2006 The LaTeX3 Project.
%%
%% utf8.def is under the lppl version 1.3c or later, and can be found
%% in the base LaTeX system.
%%
%% The lppl can be found at http://www.latex-project.org/lppl.txt
%%
%% The changes to utf8.def are Copyright 2009 Elie Roux, and are under
%% the CC0 license.
%%
%% The changes are LuaTeX support.
%%
%% This file is distributed under the CC0 license, with clause 6 of the
%% lppl as additional restrictions.
% \end{macrocode}
%
% Most of the file is taken from \texttt{utf8.def}, the main changes are
% commented. A lot of code was removed, especially the codes that analysed
% the unicode characters byte by byte.
%
% \begin{macrocode}
\ProvidesFile{lutf8.def}
[2010/05/10 v0.97 UTF-8 support for luainputenc]
\makeatletter
\catcode`\ \saved@space@catcode
\@inpenc@test
\ifx\@begindocumenthook\@undefined
\makeatother
\endinput \fi
% \end{macrocode}
%
% This function is changed a lot. Its aim is to map the character (first
% argument) to a macro (second argument). In \texttt{utf8.def} it was
% complicated as unicode was analyzed byte by byte. With Lua\TeX\ it is
% extremely simple, we just have to activate the character, and call a
% traditional |\DeclareInputTeXt|.
%
% \begin{macrocode}
\gdef\DeclareUnicodeCharacter#1#2{%
\@tempcnta"#1%
\catcode\@tempcnta\active %
\DeclareInputText{\the\@tempcnta}{#2}%
}
\@onlypreamble\DeclareUnicodeCharacter
\def\cdp@elt#1#2#3#4{%
\wlog{Now handling font encoding #1 ...}%
\lowercase{%
\InputIfFileExists{#1enc.dfu}}%
{\wlog{... processing UTF-8 mapping file for font encoding
#1}%
\catcode`\ 9\relax}%
{\wlog{... no UTF-8 mapping file for font encoding #1}}%
}
\cdp@list
\def\DeclareFontEncoding@#1#2#3{%
\expandafter %
\ifx\csname T@#1\endcsname\relax %
\def\cdp@elt{\noexpand\cdp@elt}%
\xdef\cdp@list{\cdp@list\cdp@elt{#1}%
{\default@family}{\default@series}%
{\default@shape}}%
\expandafter\let\csname#1-cmd\endcsname\@changed@cmd %
\begingroup %
\wlog{Now handling font encoding #1 ...}%
\lowercase{%
\InputIfFileExists{#1enc.dfu}}%
{\wlog{... processing UTF-8 mapping file for font encoding #1}}%
{\wlog{... no UTF-8 mapping file for font encoding #1}}%
\endgroup
\else
\@font@info{Redeclaring font encoding #1}%
\fi
\global\@namedef{T@#1}{#2}%
\global\@namedef{M@#1}{\default@M#3}%
\xdef\LastDeclaredEncoding{#1}%
}
\DeclareUnicodeCharacter{00A9}{\textcopyright}
\DeclareUnicodeCharacter{00AA}{\textordfeminine}
\DeclareUnicodeCharacter{00AE}{\textregistered}
\DeclareUnicodeCharacter{00BA}{\textordmasculine}
\DeclareUnicodeCharacter{02C6}{\textasciicircum}
\DeclareUnicodeCharacter{02DC}{\textasciitilde}
\DeclareUnicodeCharacter{200C}{\textcompwordmark}
\DeclareUnicodeCharacter{2026}{\textellipsis}
\DeclareUnicodeCharacter{2122}{\texttrademark}
\DeclareUnicodeCharacter{2423}{\textvisiblespace}
% \end{macrocode}
%
% \iffalse
%</def>
% \fi
%
% \iffalse
%<*defx>
% \fi
%
% \subsection{\texttt{lutf8x.def}}
%
% \begin{macrocode}
%% This file was adapted from utf8.def, which copyright is:
%% Copyright 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
%% 2004 2005 2006 The LaTeX3 Project.
%%
%% utf8.def is under the lppl version 1.3c or later, and can be found
%% in the base LaTeX system.
%%
%% The lppl can be found at http://www.latex-project.org/lppl.txt
%%
%% The changes to utf8.def are Copyright 2009 Elie Roux, and are under
%% the CC0 license.
%%
%% The changes are LuaTeX support.
%%
%% This file is distributed under the CC0 license, with clause 6 of the
%% lppl as additional restrictions.
% \end{macrocode}
%
% This file is mostly the code from \texttt{lutf.def}, but it adds
% mechanisms to pass from \emph{legacy mode} to \emph{unicode font mode}.
% The trick is to put in a lua table all characters that are activated by
% the \emph{legacy mode}, and to unactivate them when we switch to
% \emph{unicode font mode}. This is made (almost) entirely in lua. The
% difficult part is the changes in |\DeclareFontEncoding|.
%
% \begin{macrocode}
\ProvidesFile{lutf8x.def}
[2010/05/10 v0.97 UTF-8 support for luainputenc]
\makeatletter
\catcode`\ \saved@space@catcode
\@inpenc@test
\ifx\@begindocumenthook\@undefined
\makeatother
\endinput \fi
% \end{macrocode}
%
% We change it a little to add the activated character in the lua table.
%
% \begin{macrocode}
\gdef\DeclareUnicodeCharacter#1#2{%
\@tempcnta"#1%
\luatexbase@directlua{luainputenc.declare_character('\the\@tempcnta')}%
\catcode\@tempcnta\active %
\DeclareInputText{\the\@tempcnta}{#2}%
}
\@onlypreamble\DeclareUnicodeCharacter
\def\cdp@elt#1#2#3#4{%
\wlog{Now handling font encoding #1 ...}%
\lowercase{%
\InputIfFileExists{#1enc.dfu}}%
{\wlog{... processing UTF-8 mapping file for font encoding
#1}%
\catcode`\ 9\relax}%
{\wlog{... no UTF-8 mapping file for font encoding #1}}%
}
\cdp@list
% \end{macrocode}
%
% The macros to change from/to \emph{legacy mode} to/from \emph{unicode
% font mode}.
%
% \begin{macrocode}