1
0
Fork 0
mirror of https://github.com/ganelson/inform.git synced 2024-05-22 02:48:41 +03:00

Use CH32EOF as EOF marker, allows fewer casts. Also put generated C code back to using wchar_t

This commit is contained in:
David Kinder 2023-09-06 13:19:32 +01:00
parent bde03297ac
commit 81a92aef9f
11 changed files with 88 additions and 85 deletions

View file

@ -166,14 +166,15 @@ alone, and the version number is returned.
by the local |\n| for good measure.
@<Read the titling line of the extension and normalise its casing@> =
int c, commented_out = FALSE, quoted = FALSE, content_found = FALSE;
while ((c = TextFiles::utf8_fgetc(EXTF, NULL, NULL)) != EOF) {
inchar32_t c;
int commented_out = FALSE, quoted = FALSE, content_found = FALSE;
while ((c = TextFiles::utf8_fgetc(EXTF, NULL, NULL)) != CH32EOF) {
if (c == 0xFEFF) continue; /* skip the optional Unicode BOM pseudo-character */
if (commented_out) {
if (c == ']') commented_out = FALSE;
} else if (quoted) {
if (c == '"') quoted = FALSE;
PUT_TO(titling_line, (inchar32_t) c);
PUT_TO(titling_line, c);
} else {
if (c == '[') commented_out = TRUE;
else {
@ -181,10 +182,10 @@ by the local |\n| for good measure.
else if ((c == '\x0a') || (c == '\x0d') || (c == '\n')) {
if (content_found) break;
c = ' ';
} else if (Characters::is_whitespace((inchar32_t) c) == FALSE) {
} else if (Characters::is_whitespace(c) == FALSE) {
content_found = TRUE;
}
PUT_TO(titling_line, (inchar32_t) c);
PUT_TO(titling_line, c);
}
}
}
@ -205,15 +206,16 @@ halfway through a line division combination like |0A 0D|, so that the first
thing we read here is a meaningless |0D|.
@<Read the rubric text, if any is present@> =
int c, found_start = FALSE;
while ((c = TextFiles::utf8_fgetc(EXTF, NULL, NULL)) != EOF) {
inchar32_t c;
int found_start = FALSE;
while ((c = TextFiles::utf8_fgetc(EXTF, NULL, NULL)) != CH32EOF) {
if ((c == '\x0a') || (c == '\x0d') || (c == '\n') || (c == '\t')) c = ' ';
if ((c != ' ') && (found_start == FALSE)) {
if (c == '"') found_start = TRUE;
else break;
} else {
if (c == '"') break;
if (found_start) PUT_TO(E->rubric_as_lexed, (inchar32_t) c);
if (found_start) PUT_TO(E->rubric_as_lexed, c);
}
}

View file

@ -1193,34 +1193,35 @@ text |bibliographic_sentence| and |in French| to the text |bracketed|. If not,
the whole thing goes into |bibliographic_sentence| and |bracketed| is empty.
@<Capture the opening sentence and its bracketed part@> =
int c, commented = FALSE, quoted = FALSE, rounded = FALSE, content_found = FALSE;
while ((c = TextFiles::utf8_fgetc(SF, NULL, NULL)) != EOF) {
inchar32_t c;
int commented = FALSE, quoted = FALSE, rounded = FALSE, content_found = FALSE;
while ((c = TextFiles::utf8_fgetc(SF, NULL, NULL)) != CH32EOF) {
if (c == 0xFEFF) continue; /* skip the optional Unicode BOM pseudo-character */
if (commented) {
if (c == ']') commented = FALSE;
} else {
if (quoted) {
if (rounded) PUT_TO(bracketed, (inchar32_t) c);
else PUT_TO(bibliographic_sentence, (inchar32_t) c);
if (rounded) PUT_TO(bracketed, c);
else PUT_TO(bibliographic_sentence, c);
if (c == '"') quoted = FALSE;
} else {
if (c == '[') commented = TRUE;
else {
if (Characters::is_whitespace((inchar32_t) c) == FALSE) content_found = TRUE;
if (Characters::is_whitespace(c) == FALSE) content_found = TRUE;
if (rounded) {
if (c == '"') quoted = TRUE;
if ((c == '\x0a') || (c == '\x0d') || (c == '\n')) c = ' ';
if (c == ')') rounded = FALSE;
else PUT_TO(bracketed, (inchar32_t) c);
else PUT_TO(bracketed, c);
} else {
if (c == '(') rounded = TRUE;
else {
if ((c == '\x0a') || (c == '\x0d') || (c == '\n')) {
if (content_found) break;
c = ' ';
PUT_TO(bibliographic_sentence, (inchar32_t) c);
PUT_TO(bibliographic_sentence, c);
} else {
PUT_TO(bibliographic_sentence, (inchar32_t) c);
PUT_TO(bibliographic_sentence, c);
}
if (c == '"') quoted = TRUE;
}

View file

@ -79,7 +79,8 @@ perform them in the "wrong" order, what should the compiler do?
=
text_stream *Interventions::expand_bracket_plus(text_stream *S) {
text_stream *OUT = Str::new();
int col = 1, cr, sfp = 0;
int col = 1, sfp = 0;
inchar32_t cr;
TEMPORARY_TEXT(heading_name)
TEMPORARY_TEXT(command)
TEMPORARY_TEXT(argument)
@ -87,7 +88,7 @@ text_stream *Interventions::expand_bracket_plus(text_stream *S) {
Str::clear(command);
Str::clear(argument);
@<Read next character@>;
NewCharacter: if (cr == EOF) break;
NewCharacter: if (cr == CH32EOF) break;
if (cr == '{') {
@<Read next character@>;
if (cr == '-') {
@ -110,8 +111,8 @@ text_stream *Interventions::expand_bracket_plus(text_stream *S) {
goto NewCharacter;
}
}
if (OUT) PUT_TO(OUT, (inchar32_t) cr);
} while (cr != EOF);
if (OUT) PUT_TO(OUT, cr);
} while (cr != CH32EOF);
DISCARD_TEXT(command)
DISCARD_TEXT(argument)
DISCARD_TEXT(heading_name)
@ -119,7 +120,7 @@ text_stream *Interventions::expand_bracket_plus(text_stream *S) {
}
@<Read next character@> =
cr = (int) Str::get_at(S, sfp); if (cr == 0) cr = EOF; else sfp++;
cr = Str::get_at(S, sfp); if (cr == 0) cr = CH32EOF; else sfp++;
col++; if ((cr == 10) || (cr == 13)) col = 0;
@ Our biggest complication is that I7 expressions can be included in the I6
@ -139,10 +140,10 @@ which can trigger an unwanted |(+|.
TEMPORARY_TEXT(i7_exp)
while (TRUE) {
@<Read next character@>;
if (cr == EOF) break;
if (cr == CH32EOF) break;
if ((cr == ')') && (Str::get_last_char(i7_exp) == '+')) {
Str::delete_last_character(i7_exp); break; }
PUT_TO(i7_exp, (inchar32_t) cr);
PUT_TO(i7_exp, cr);
}
wording W = Feeds::feed_text(i7_exp);
CSIInline::eval_bracket_plus_to_text(OUT, W);
@ -154,10 +155,10 @@ which can trigger an unwanted |(+|.
int com_mode = TRUE;
while (TRUE) {
@<Read next character@>;
if ((cr == '}') || (cr == EOF)) break;
if ((cr == '}') || (cr == CH32EOF)) break;
if ((cr == ':') && (com_mode)) { com_mode = FALSE; continue; }
if (com_mode) PUT_TO(command, (inchar32_t) cr);
else PUT_TO(argument, (inchar32_t) cr);
if (com_mode) PUT_TO(command, cr);
else PUT_TO(argument, cr);
}
@<Act on I6T command and argument@> =

View file

@ -224,7 +224,7 @@ typedef struct i7_mg_file_t {
typedef struct i7_mg_stream_t {
FILE *to_file;
i7word_t to_file_id;
inchar32_t *to_memory;
wchar_t *to_memory;
size_t memory_used;
size_t memory_capacity;
i7word_t previous_id;
@ -620,12 +620,12 @@ void i7_miniglk_stream_set_current(i7process_t *proc, i7word_t id) {
characters are written to. The following implements |glk_put_char_stream|.
= (text to inform7_clib.h)
void i7_mg_put_to_stream(i7process_t *proc, i7word_t rock, inchar32_t c);
void i7_mg_put_to_stream(i7process_t *proc, i7word_t rock, wchar_t c);
void i7_miniglk_put_char_stream(i7process_t *proc, i7word_t stream_id, i7word_t x);
=
= (text to inform7_clib.c)
void i7_mg_put_to_stream(i7process_t *proc, i7word_t rock, inchar32_t c) {
void i7_mg_put_to_stream(i7process_t *proc, i7word_t rock, wchar_t c) {
i7_mg_stream_t *S =
&(proc->miniglk->memory_streams[proc->state.current_output_stream_ID]);
if (proc->receiver == NULL) fputc(c, stdout);
@ -665,7 +665,7 @@ void i7_miniglk_put_char_stream(i7process_t *proc, i7word_t stream_id, i7word_t
if (S->memory_used >= S->memory_capacity) {
size_t needed = 4*S->memory_capacity;
if (needed == 0) needed = 1024;
inchar32_t *new_data = (inchar32_t *) calloc(needed, sizeof(inchar32_t));
wchar_t *new_data = (wchar_t *) calloc(needed, sizeof(wchar_t));
if (new_data == NULL) {
fprintf(stderr, "Out of memory\n"); i7_fatal_exit(proc);
}
@ -673,7 +673,7 @@ void i7_miniglk_put_char_stream(i7process_t *proc, i7word_t stream_id, i7word_t
free(S->to_memory);
S->to_memory = new_data;
}
S->to_memory[S->memory_used++] = (inchar32_t) x;
S->to_memory[S->memory_used++] = (wchar_t) x;
}
}
@ -880,7 +880,7 @@ i7word_t i7_miniglk_request_line_event(i7process_t *proc, i7word_t window_id,
e.win_id = window_id;
e.val1 = 1;
e.val2 = 0;
inchar32_t c; int pos = init_len;
wchar_t c; int pos = init_len;
if (proc->sender == NULL) i7_benign_exit(proc);
char *s = (proc->sender)(proc->send_count++);
int i = 0;
@ -907,7 +907,7 @@ i7word_t i7_miniglk_request_line_event_uni(i7process_t *proc, i7word_t window_id
e.win_id = window_id;
e.val1 = 1;
e.val2 = 0;
inchar32_t c; int pos = init_len;
wchar_t c; int pos = init_len;
if (proc->sender == NULL) i7_benign_exit(proc);
char *s = (proc->sender)(proc->send_count++);
int i = 0;

View file

@ -418,7 +418,7 @@ typedef struct i7process_t {
int snapshot_pos;
jmp_buf execution_env;
int termination_code;
void (*receiver)(int id, inchar32_t c, char *style);
void (*receiver)(int id, wchar_t c, char *style);
int send_count;
char *(*sender)(int count);
void (*stylist)(struct i7process_t *proc, i7word_t which, i7word_t what);
@ -479,7 +479,7 @@ a new process, so we must define those:
= (text to inform7_clib.h)
char *i7_default_sender(int count);
void i7_default_receiver(int id, inchar32_t c, char *style);
void i7_default_receiver(int id, wchar_t c, char *style);
=
The receiver and sender functions allow our textual I/O to be managed by external
@ -493,7 +493,7 @@ The sender supplies us with textual commands. By default, it takes a typed (or
of course piped) single line of text from the C |stdin| stream.
= (text to inform7_clib.c)
void i7_default_receiver(int id, inchar32_t c, char *style) {
void i7_default_receiver(int id, wchar_t c, char *style) {
if (id == I7_BODY_TEXT_ID) fputc(c, stdout);
}
@ -537,13 +537,13 @@ but may in between the two supply its own receiver or sender:
= (text to inform7_clib.h)
void i7_set_process_receiver(i7process_t *proc,
void (*receiver)(int id, inchar32_t c, char *style), int UTF8);
void (*receiver)(int id, wchar_t c, char *style), int UTF8);
void i7_set_process_sender(i7process_t *proc, char *(*sender)(int count));
=
= (text to inform7_clib.c)
void i7_set_process_receiver(i7process_t *proc,
void (*receiver)(int id, inchar32_t c, char *style), int UTF8) {
void (*receiver)(int id, wchar_t c, char *style), int UTF8) {
proc->receiver = receiver;
proc->use_UTF8 = UTF8;
}

View file

@ -296,8 +296,8 @@ void DocReferences::doc_fragment_to(OUTPUT_STREAM, text_stream *fn) {
int i = 0;
p[0] = 0;
while (TRUE) {
int c = TextFiles::utf8_fgetc(FRAGMENTS, NULL, NULL);
if (c == EOF) break;
inchar32_t c = TextFiles::utf8_fgetc(FRAGMENTS, NULL, NULL);
if (c == CH32EOF) break;
if (c == 0xFEFF) continue; /* the Unicode BOM non-character */
if (i == MAX_EXTENT_OF_FRAGMENTS) break;
p[i++] = (char) c;

View file

@ -87,26 +87,26 @@ int Localisation::stock_from_file(filename *localisation_file, localisation_dict
}
int col = 1, line = 1, nwsol = FALSE; /* "non white space on line" */
unicode_file_buffer ufb = TextFiles::create_ufb();
int cr; /* note that on some platforms |inchar32_t| is unable to hold |EOF| */
inchar32_t cr;
TEMPORARY_TEXT(key)
TEMPORARY_TEXT(value)
do {
@<Read next character@>;
if (cr == EOF) break;
if (cr == CH32EOF) break;
if ((cr == '#') && (nwsol == FALSE)) @<Read up to end of line as a comment@>
else if ((cr == '%') && (nwsol == FALSE)) @<Read up to the next white space as a key@>
else if (Characters::is_whitespace((inchar32_t) cr) == FALSE) nwsol = TRUE;
if (cr == EOF) break;
else if (Characters::is_whitespace(cr) == FALSE) nwsol = TRUE;
if (cr == CH32EOF) break;
if (Str::len(key) > 0) {
if ((Characters::is_whitespace((inchar32_t) cr) == FALSE) || (Str::len(value) > 0))
PUT_TO(value, (inchar32_t) cr);
if ((Characters::is_whitespace(cr) == FALSE) || (Str::len(value) > 0))
PUT_TO(value, cr);
} else {
if (Characters::is_whitespace((inchar32_t) cr) == FALSE) {
if (Characters::is_whitespace(cr) == FALSE) {
Localisation::error(localisation_file, line, col,
I"extraneous matter appears before first %key");
}
}
} while (cr != EOF);
} while (cr != CH32EOF);
if (Str::len(key) > 0) @<Write key-value pair@>;
DISCARD_TEXT(key)
DISCARD_TEXT(value)
@ -125,13 +125,13 @@ int Localisation::stock_from_file(filename *localisation_file, localisation_dict
Str::clear(value);
while (TRUE) {
@<Read next character@>;
if ((cr == '=') || (cr == EOF)) break;
if (Characters::is_whitespace((inchar32_t) cr) == FALSE) PUT_TO(key, (inchar32_t) cr);
if ((cr == '=') || (cr == CH32EOF)) break;
if (Characters::is_whitespace(cr) == FALSE) PUT_TO(key, cr);
}
if (cr == '=') {
while (TRUE) {
@<Read next character@>;
if (Characters::is_whitespace((inchar32_t) cr)) continue;
if (Characters::is_whitespace(cr)) continue;
break;
}
}

View file

@ -195,7 +195,7 @@ But we want to restore the more natural spacing.
@<Restore inter-word spaces unless this would be unnatural@> =
if ((i>from)
&& ((p[1] != 0) || (Lexer::is_punctuation((int) p[0]) == FALSE) ||
&& ((p[1] != 0) || (Lexer::is_punctuation(p[0]) == FALSE) ||
(p[0] == '(') || (p[0] == '{') || (p[0] == '}'))
&& (compare_word(i-1, OPENBRACKET_V)==FALSE))
PasteButtons::put_code_char(OUT, ' ');

View file

@ -57,10 +57,10 @@ wording Feeds::feed_C_string_full(inchar32_t *text, int expand, inchar32_t *nons
@<Set up the lexer@>;
lexer_break_at_slashes = break_at_slashes;
for (int i=0; text[i] != 0; i++) {
int last_cr, cr, next_cr;
if (i > 0) last_cr = (int) text[i-1]; else last_cr = EOF;
cr = (int) text[i];
if (cr != 0) next_cr = (int) text[i+1]; else next_cr = EOF;
inchar32_t last_cr, cr, next_cr;
if (i > 0) last_cr = text[i-1]; else last_cr = CH32EOF;
cr = text[i];
if (cr != 0) next_cr = text[i+1]; else next_cr = CH32EOF;
Lexer::feed_triplet(last_cr, cr, next_cr);
}
@<Extract results from the lexer@>;
@ -69,10 +69,10 @@ wording Feeds::feed_C_string_full(inchar32_t *text, int expand, inchar32_t *nons
wording Feeds::feed_text_full(text_stream *text, int expand, inchar32_t *nonstandard) {
@<Set up the lexer@>;
for (int i=0, L=Str::len(text); i<L; i++) {
int last_cr, cr, next_cr;
if (i > 0) last_cr = (int) Str::get_at(text, i-1); else last_cr = EOF;
cr = (int) Str::get_at(text, i);
if (cr != 0) next_cr = (int) Str::get_at(text, i+1); else next_cr = EOF;
inchar32_t last_cr, cr, next_cr;
if (i > 0) last_cr = Str::get_at(text, i-1); else last_cr = CH32EOF;
cr = Str::get_at(text, i);
if (cr != 0) next_cr = Str::get_at(text, i+1); else next_cr = CH32EOF;
Lexer::feed_triplet(last_cr, cr, next_cr);
}
@<Extract results from the lexer@>;

View file

@ -91,7 +91,7 @@ characters cause word divisions, or signal literals.
@d INFORM6_ESCAPE_END_1 '-'
@d INFORM6_ESCAPE_END_2 ')'
@d PARAGRAPH_BREAK U"|__" /* Inserted as a special word to mark paragraph breaks */
@d UNICODE_CHAR_IN_STRING ((inchar32_t) 0x1b) /* To represent awkward characters in metadata only */
@d UNICODE_CHAR_IN_STRING 0x1bu /* To represent awkward characters in metadata only */
@ This is the standard set used for parsing source text.
@ -374,9 +374,9 @@ As we have seen, the question of whether something is a punctuation mark
or not depends slightly on the context:
=
int Lexer::is_punctuation(int c) {
int Lexer::is_punctuation(inchar32_t c) {
for (int i=0; lexer_punctuation_marks[i]; i++)
if (c == (int) lexer_punctuation_marks[i])
if (c == lexer_punctuation_marks[i])
return TRUE;
return FALSE;
}
@ -490,12 +490,10 @@ The current situation of the lexer is specified by the collective values
of all of the following. First, the start of the current word being
recorded, and the current high water mark -- those are defined above.
Second, we need the feeder machinery to maintain a variable telling us
the previous character in the raw, un-respaced source. We need to be a
little careful about the type of this: it needs to be an |int| so that it
can on occasion hold the pseudo-character value |EOF|.
the previous character in the raw, un-respaced source.
=
int lxs_previous_char_in_raw_feed; /* Preceding character in raw file read */
inchar32_t lxs_previous_char_in_raw_feed; /* Preceding character in raw file read */
@ There are four kinds of word: ordinary words, [comments in square brackets],
"strings in double quotes," and |(- I6_inclusion_text -)|. The latter
@ -541,7 +539,7 @@ always being "off").
=
void Lexer::reset_lexer(void) {
lexer_word = lexer_hwm;
lxs_previous_char_in_raw_feed = EOF;
lxs_previous_char_in_raw_feed = CH32EOF;
/* reset the external states */
lexer_wait_for_dashes = FALSE;
@ -673,9 +671,9 @@ int Lexer::detect_tear_off(void) {
}
@ The feeder routine is required to send us a triple each time: |cr|
must be a valid character (see above) and may not be |EOF|; |last_cr| must
be the previous one or else perhaps |EOF| at the start of feed;
while |next_cr| must be the next or else perhaps |EOF| at the end of feed.
must be a valid character (see above) and may not be |CH32EOF|; |last_cr| must
be the previous one or else perhaps |CH32EOF| at the start of feed;
while |next_cr| must be the next or else perhaps |CH32EOF| at the end of feed.
Spaces, often redundant, are inserted around punctuation unless one of the
following exceptions holds:
@ -697,7 +695,7 @@ Where the character following is a slash. (This is done essentially to make
most common URLs glue up as single words.)
=
void Lexer::feed_triplet(int last_cr, int cr, int next_cr) {
void Lexer::feed_triplet(inchar32_t last_cr, inchar32_t cr, inchar32_t next_cr) {
lxs_previous_char_in_raw_feed = last_cr;
int space = FALSE;
if (Lexer::is_punctuation(cr)) space = TRUE;
@ -706,9 +704,9 @@ void Lexer::feed_triplet(int last_cr, int cr, int next_cr) {
if (next_cr == '/') space = FALSE;
else {
int lc = 0, nc = 0;
if (Characters::isdigit((inchar32_t) last_cr)) lc = 1;
if (Characters::isdigit(last_cr)) lc = 1;
if ((last_cr >= 'a') && (last_cr <= 'z')) lc = 2;
if (Characters::isdigit((inchar32_t) next_cr)) nc = 1;
if (Characters::isdigit(next_cr)) nc = 1;
if (next_cr == '-') nc = 1;
if ((next_cr >= 'a') && (next_cr <= 'z')) nc = 2;
if ((lc == 1) && (nc == 1)) space = FALSE;
@ -754,7 +752,7 @@ surviving marbles is the sequence of characters starting at |lexer_word| and
extending to |lexer_hwm-1|.
=
void Lexer::feed_char_into_lexer(int c) {
void Lexer::feed_char_into_lexer(inchar32_t c) {
Lexer::ensure_lexer_hwm_can_be_raised_by(MAX_WORD_LENGTH, TRUE);
if (lxs_literal_mode) {
@ -774,7 +772,7 @@ void Lexer::feed_char_into_lexer(int c) {
}
/* otherwise record the current character as part of the word being built */
*(lexer_hwm++) = (inchar32_t) c;
*(lexer_hwm++) = c;
if (lxs_scanning_text_substitution) {
@<Force string division at the end of a text substitution, if necessary@>;
@ -834,7 +832,7 @@ discarded. A paragraph break is converted into a special "divider" word.
@<Line break outside a literal@> =
if (lxs_this_line_is_empty_so_far) {
for (int i=0; PARAGRAPH_BREAK[i]; i++)
Lexer::feed_char_into_lexer((int) PARAGRAPH_BREAK[i]);
Lexer::feed_char_into_lexer(PARAGRAPH_BREAK[i]);
Lexer::feed_char_into_lexer(' ');
}
lxs_this_line_is_empty_so_far = TRUE;
@ -852,7 +850,7 @@ Inform print a paragraph break at run-time.
@<Soak up whitespace around line breaks inside a literal string@> =
if (lxs_string_soak_up_spaces_mode) {
switch(c) {
case ' ': case '\t': c = (int) *(lexer_hwm-1); lexer_hwm--; break;
case ' ': case '\t': c = *(lexer_hwm-1); lexer_hwm--; break;
case '\n':
*(lexer_hwm-1) = NEWLINE_IN_STRING;
c = NEWLINE_IN_STRING;
@ -1028,7 +1026,7 @@ finished.
case STRING_KW:
if (c == STRING_END) {
lxs_string_soak_up_spaces_mode = FALSE;
*(lexer_hwm++) = (inchar32_t) c; /* record the |STRING_END| character as part of the word */
*(lexer_hwm++) = c; /* record the |STRING_END| character as part of the word */
lxs_literal_mode = FALSE;
}
break;

View file

@ -45,7 +45,8 @@ source_file *TextFromFiles::feed_open_file_into_lexer(filename *F, FILE *handle,
sf->body_text = Str::new();
sf->torn_off_documentation = Str::new();
source_location top_of_file;
int cr, last_cr, next_cr, read_cr, newline_char = 0, torn_off = FALSE;
inchar32_t cr, last_cr, next_cr, read_cr, newline_char = 0;
int torn_off = FALSE;
unicode_file_buffer ufb = TextFiles::create_filtered_ufb(mode);
@ -57,8 +58,8 @@ source_file *TextFromFiles::feed_open_file_into_lexer(filename *F, FILE *handle,
last_cr = ' '; cr = ' '; next_cr = TextFiles::utf8_fgetc(sf->handle, NULL, &ufb);
if (next_cr == 0xFEFF) next_cr = TextFiles::utf8_fgetc(sf->handle, NULL, &ufb); /* Unicode BOM code */
if (next_cr != EOF)
while (((read_cr = TextFiles::utf8_fgetc(sf->handle, NULL, &ufb)), next_cr) != EOF) {
if (next_cr != CH32EOF)
while (((read_cr = TextFiles::utf8_fgetc(sf->handle, NULL, &ufb)), next_cr) != CH32EOF) {
last_cr = cr; cr = next_cr; next_cr = read_cr;
switch(cr) {
case '\x0a':
@ -78,9 +79,9 @@ source_file *TextFromFiles::feed_open_file_into_lexer(filename *F, FILE *handle,
break;
}
if (torn_off) {
PUT_TO(sf->torn_off_documentation, (inchar32_t) cr);
PUT_TO(sf->torn_off_documentation, cr);
} else {
PUT_TO(sf->body_text, (inchar32_t) cr);
PUT_TO(sf->body_text, cr);
Lexer::feed_triplet(last_cr, cr, next_cr);
torn_off = Lexer::detect_tear_off();
}
@ -157,7 +158,7 @@ int TextFromFiles::word_count(int wc) {
/* outside quoted text, each lexer word not wholly composed of punctuation scores 1 */
if (Lexer::word(wc) != PARBREAK_V)
for (; *p != 0; p++)
if ((Lexer::is_punctuation((int) *p) == FALSE) && (*p != '|')) {
if ((Lexer::is_punctuation(*p) == FALSE) && (*p != '|')) {
N++;
break;
}