Skip to content

Commit a41ddc7

Browse files
committed
1. Use of generators and iterators for scanning data aka pieces of text in a stream like
fashion 2. rambda-like functional compose implemented for processing text data like a stream
1 parent 8988990 commit a41ddc7

File tree

6 files changed

+211
-129
lines changed

6 files changed

+211
-129
lines changed

src/IToken.ts

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import { IModule, IModuleEnums } from './module';
2-
2+
import { propsExist } from './helpers'
33
export interface ISimpleToken {
44
f: number;
55
}
66
export interface IRangeToken extends ISimpleToken {
77
t: number;
88
}
9-
export interface IToken extends IRangeToken {
9+
export interface INameToken extends IRangeToken {
1010
name: string; // name of token
1111
}
1212

@@ -19,13 +19,23 @@ export interface ILines {
1919
vlines(): string[];
2020
vline(n: number): string;
2121
}
22-
export interface IWhiteSpaces {
23-
_module: IModule; // reference, not a copy
24-
_tokens: IToken[];
25-
onLine(n: number): IToken[];
26-
onvLine(n: number): IToken[];
27-
}
2822

23+
export const isRangeToken = propsExist('f', 't')
24+
export const isSimpleToken = propsExist('f')
25+
26+
export type IToken = ISimpleToken|IRangeToken
27+
28+
export function sortTokenFAscTDesc(t1:IToken, t2:IToken){
29+
if (t1.f > t2.f) return 1
30+
if (t1.f < t2.f) return -1
31+
if (isRangeToken(t1) && isRangeToken(t2)){
32+
const t1t = <IRangeToken>t1
33+
const t2t = <IRangeToken>t2
34+
if (t1t < t2t) return 1
35+
if (t1t > t2t) return -1
36+
}
37+
return 0
38+
}
2939

3040

3141
// stream (system)
@@ -43,13 +53,3 @@ export interface IWhiteSpaces {
4353
// -> comments
4454

4555
// channel is just a collection of IToken objects
46-
47-
export function processLines(module: IModule): void {
48-
49-
50-
}
51-
52-
53-
export function createVirtualLines(lines: ILines): void {
54-
55-
}

src/channel.ts

Lines changed: 157 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import * as debug from 'debug'
22

3-
import { ISimpleToken, IRangeToken } from './IToken';
3+
import { ISimpleToken, IRangeToken, sortTokenFAscTDesc, IToken, isRangeToken, isSimpleToken } from './IToken';
44
import { IModule, IModuleEnums } from './module'
55
import { IMatcher, IMatcherState, createClassMatcher } from './matchers'
66
import { ITokenEmitter } from './tokenProducers'
7-
import { isComment, isContinue, binarySearch, last } from './helpers'
8-
import { ws } from './classes'
7+
import { isContinue, binarySearch, last, propsExist } from './helpers'
8+
import { ws, TestFunc } from './classes'
99
import { createTokenEmitter, rangeProducer } from './tokenProducers'
10+
import { Stream } from 'stream';
1011

1112
const printer = debug('IChannel')
1213

@@ -21,8 +22,8 @@ export interface Snippet {
2122
}
2223

2324
export interface Processed {
24-
snippets: Snippet[];
25-
tokens: IRangeToken[];
25+
snippet: Snippet;
26+
token: IRangeToken;
2627
}
2728

2829
function compare(a: any, b: any): 0 | -1 | 1 {
@@ -42,6 +43,37 @@ const wsMatcher = createClassMatcher(ws, '>1')
4243
const wsEmitter = createTokenEmitter(rangeProducer, wsMatcher)
4344

4445

46+
const regexp = (s: RegExp) => line => line.match(s)
47+
const isComment = line => {
48+
49+
if ('*Cc'.includes(line[0])) {
50+
const found: RegExpMatchArray = [line]
51+
found.index = 0
52+
found.input = line
53+
return found
54+
}
55+
return null
56+
}
57+
58+
const isNotComment = line => {
59+
if (!isComment(line)) {
60+
const found: RegExpMatchArray = [line]
61+
found.index = 0
62+
found.input = line
63+
return found
64+
}
65+
return null
66+
}
67+
68+
const chain = compose((a: Processed) => a.snippet)
69+
70+
export const processLineContinuation = createProcessor(regexp(/\n\s{5}[^\s]/))
71+
export const processNonComments = createProcessor(isNotComment)
72+
export const processWS = createProcessor(regexp(/[\s\t]+/))
73+
export const processComments = createProcessor(isComment)
74+
75+
76+
4577
export interface IChannel<T extends ISimpleToken> {
4678
mod: IModule;
4779
name: string;
@@ -97,7 +129,7 @@ export function createLogicalEOLChannel<T extends ISimpleToken>(ch: IChannel<T>)
97129
tokens,
98130
name: 'vlf',
99131
process() {
100-
tokens = []
132+
tokens.splice(0)
101133
const lftok = ch.tokens.slice(0)
102134
const raw = ch.mod.raw
103135
let prev = 0
@@ -134,142 +166,178 @@ export function createCommentsChannel(ch: IChannel<ISimpleToken>): IChannel<IRan
134166
const _lf = vlf || lf
135167
const tokens: IRangeToken[] = []
136168
const raw = _lf.mod.raw
169+
170+
const pipeLine = chain(processComments)
171+
137172
const comm: IChannel<IRangeToken> = {
138173
mod: ch.mod,
139174
tokens,
140175
name: 'comments',
141176
process() {
142177
tokens.splice(0)
143178
const lftok = _lf.tokens.slice(0) //copy
144-
let prev = 0
145-
for (let i = 0; i < lftok.length; i++) {
146-
const pos = lftok[i].f
147-
const line = raw.slice(prev, pos)
148-
if (isComment(line)) {
149-
tokens.push({ f: prev, t: pos - 1 })
150-
}
151-
prev = pos + 1
152-
}
153-
const lastf = last(lftok).f
154-
if (lastf < raw.length - 1) {
155-
const line = raw.slice(lastf + 1)
156-
if (isComment(line)) {
157-
tokens.push({ f: lastf + 1, t: raw.length - 1 })
158-
}
179+
for (const processed of pipeLine(createSnippetsUsingTokens(raw, lftok))) {
180+
tokens.push(processed.token)
159181
}
160182
}
161183
}
162184
ch.mod.channels.set(comm.name, comm)
163185
return comm
164186
}
165187

166-
export function createSourceChannel(ch: IChannel<ISimpleToken>): IChannel<IRangeToken> {
188+
export function createChannelExcluding(name: string, ...ch: IChannel<IToken>[]): IChannel<IRangeToken> {
167189

168-
const vlf = ch.mod.channels.get('vlf')
169-
const comms = ch.mod.channels.get('comments') as IChannel<IRangeToken>
170-
if (vlf !== ch) {
171-
throw new TypeError(`source "vlf" channel is not registered with a module`)
190+
if (ch.length === 0) {
191+
throw new Error(`Illegal Arguments, no arguments given`)
172192
}
173-
if (comms === undefined) {
174-
throw new TypeError(`source "comments" channel is not registered with a module`)
193+
const foundErrMod = ch.find(fch => fch.mod !== ch[0].mod)
194+
if (foundErrMod) {
195+
throw new Error(`Channels dont come from the same module`)
175196
}
197+
// merge and sort all the tokens from the channels
176198
const tokens: IRangeToken[] = []
177-
178-
const source: IChannel<IRangeToken> = {
179-
mod: ch.mod,
180-
tokens, //vtokens
181-
name: 'source',
199+
const raw = ch[0].mod.raw
200+
const rc: IChannel<IRangeToken> = {
201+
mod: ch[0].mod,
202+
tokens,
203+
name,
182204
process() {
183-
tokens.splice(0) // delete in palce
184-
const lftok = vlf.tokens.slice(0) //copy
185-
const raw = vlf.mod.raw
205+
const excludeTokens = ch.map(c => c.tokens).reduce((col, arr) => {
206+
col.push(...arr)
207+
return col
208+
}, [])
209+
excludeTokens.sort(sortTokenFAscTDesc)
210+
tokens.splice(0)
186211
let prev = 0
187-
const lastf = last(lftok).f
188-
for (let i = 0; i < lftok.length; i++) {
189-
const pos = lftok[i].f
190-
const line = raw.slice(prev, pos)
191-
if (!isComment(line)) {
192-
tokens.push({ f: prev, t: pos - 1 })
193-
}
194-
prev = pos + 1
212+
if (excludeTokens.length === 0) {
213+
tokens.push({ f: 0, t: raw.length - 1 })
214+
return
195215
}
196-
if (lastf < raw.length - 1) {
197-
const line = raw.slice(lastf + 1)
198-
if (!isComment(line)) {
199-
tokens.push({ f: lastf + 1, t: raw.length - 1 })
216+
for (const token of excludeTokens) {
217+
if (token.f <= prev) { // we skipped ahead temp
218+
prev = Math.max(token.f + 1, prev)
219+
if ((<IRangeToken>token).t) {
220+
prev = Math.max(prev, (<IRangeToken>token).t + 1)
221+
}
222+
continue
200223
}
224+
tokens.push({ f: prev, t: token.f - 1 })
225+
prev = isRangeToken(token) ?
226+
(<IRangeToken>token).t + 1 :
227+
token.f + 1
228+
}
229+
const lastToken = last(excludeTokens)
230+
if ((<IRangeToken>lastToken).t &&
231+
(<IRangeToken>lastToken).t < raw.length - 1) {
232+
tokens.push({ f: (<IRangeToken>lastToken).t + 1, t: raw.length - 1 })
233+
}
234+
else if (lastToken.f < raw.length - 1) {
235+
tokens.push({ f: lastToken.f + 1, t: raw.length - 1 })
201236
}
202237
}
203238
}
204-
ch.mod.channels.set(source.name, source)
205-
return source
239+
ch[0].mod.channels.set(name, rc)
240+
return rc
206241
}
207242

208243
export function createWSChannel(ch: IChannel<IRangeToken>): IChannel<IRangeToken> {
209244

210-
const vlf = ch.mod.channels.get('vlf') as IChannel<ISimpleToken>
211245
const source = ch.mod.channels.get('source') as IChannel<IRangeToken>
212-
if (vlf !== ch) {
213-
throw new TypeError(`source "vlf" channel is not registered with a module`)
214-
}
215246
if (source === undefined) {
216247
throw new TypeError(`source "comments" channel is not registered with a module`)
217248
}
218-
const raw = vlf.mod.raw
249+
const raw = ch.mod.raw
250+
const pipeLine = chain(processLineContinuation, processWS)
219251
const tokens: IRangeToken[] = []
220-
const nonWSSource: Snippet[] = []
221252
const ws: IChannel<IRangeToken> = {
222253
mod: ch.mod,
223-
tokens: [], //vtokens
254+
tokens, //vtokens
224255
name: 'ws',
225256
process() {
226257
tokens.splice(0)
227-
nonWSSource.splice(0)
228-
const srctok = source.tokens.slice(0) //copy
229-
for (let i = 0; i < srctok.length; i++) {
230-
const { f, t } = srctok[i]
231-
let snip = { line: raw.slice(f, t + 1), f, t }
232-
// split out continueation lines
233-
const { snippets, tokens: _tokens } = processLineContinuation(snip)
234-
tokens.splice(0, 0, ..._tokens)
235-
snippets.map(processWS).forEach(({ snippets: snips, tokens: toks }) => {
236-
tokens.splice(0, 0, ...toks)
237-
nonWSSource.splice(0, 0, ...snips)
238-
})
239-
// here the ws token need to be extracted from line
258+
const tok = source.tokens.slice(0) //copy
259+
for (const processed of pipeLine(createSnippetsUsingTokens(raw, tok))) {
260+
tokens.push(processed.token)
240261
}
241-
//sort ws tokens because there will be continue line stuff here!!
262+
tokens.sort((t1, t2) => t1.f - t2.f)
242263
}
243264
}
244-
ch.mod.channels.set(source.name, source)
245-
return source
265+
ch.mod.channels.set(ws.name, ws)
266+
return ws
246267
}
247268

248-
export function createProcessor(regex: RegExp) {
269+
export function createProcessor(matcher: TestFunc) {
249270

250-
return function process(s: Snippet): Processed {
271+
return function* processor(s: Snippet): IterableIterator<Processed> {
251272
const { line, f, t } = s
252-
const found = line.match(regex);
253-
const rc = {
254-
snippets: [s],
255-
tokens: []
256-
}
257-
273+
const found = matcher(line)
258274
if (found) {
259275
const first = line.slice(0, found.index)
260276
const second = line.slice(found.index + found[0].length)
261-
rc.snippets[0] = { line: first, f, t: f + first.length - 1 }
262-
rc.tokens[0] = { f: f + found.index, t: f + found.index + found[0].length - 1 }
277+
yield {
278+
snippet: { line: first, f, t: f + first.length - 1 },
279+
token: { f: f + found.index, t: f + found.index + found[0].length - 1 }
280+
}
263281
if (second) {
264-
const rv = process({ line: second, f: f + found.index + found[0].length, t })
265-
rc.tokens.splice(0, 0, ...rv.tokens)
266-
rc.snippets.splice(0, 0, ...rv.snippets)
282+
yield* processor({ line: second, f: f + found.index + found[0].length, t })
267283
}
268284
}
269-
return rc
270285
}
271286
}
272287

273-
export const processLineContinuation = createProcessor(/\n\s{5}[^\s]/)
274-
export const processWS = createProcessor(/[\s\t]+/)
288+
function* createSnippetsUsingTokens(raw: string, tokens: (ISimpleToken | IRangeToken)[]): IterableIterator<Snippet> {
289+
if (!(raw || '').trim()) {
290+
return
291+
}
292+
let prev = 0
293+
294+
for (const token of tokens) {
295+
if (isRangeToken(token)) {// range token
296+
const { f, t } = <IRangeToken>token
297+
yield { line: raw.slice(f, t + 1), f, t }
298+
prev = t + 1
299+
}
300+
else if (isSimpleToken(token)) {//simpletoken
301+
const { f } = <ISimpleToken>token
302+
yield { line: raw.slice(prev, f), f: prev, t: f - 1 }
303+
prev = f + 1
304+
305+
}
306+
else {
307+
throw new Error(`token is not a SimpleToken or a RangeToken, i.e: [${JSON.stringify(token)}]`)
308+
}
309+
}
310+
const lastToken = last(tokens)
311+
if (
312+
isSimpleToken(lastToken) //slicer token
313+
|| lastToken === undefined //source code has only one-liner?
314+
) {
315+
const f = lastToken && lastToken.f || 0
316+
if (raw.length - 1 > f) {
317+
yield { line: raw.slice(f + 1, raw.length), f: f + 1, t: raw.length - 1 }
318+
}
319+
}
320+
}
321+
322+
323+
export function compose<T, K>(convert: (a: K) => T) {
324+
325+
return function chain(...transformers: ((s: T) => IterableIterator<K>)[]) {
275326

327+
function* stream(data: T, ...fns: ((s: T) => IterableIterator<K>)[]) {
328+
const [fn, ...others] = fns
329+
for (const elt of fn(data)) {
330+
yield elt
331+
if (others.length) {
332+
yield* stream(convert(elt), ...others)
333+
}
334+
}
335+
}
336+
337+
return function* activate(gen: IterableIterator<T>): IterableIterator<K> {
338+
for (const elt of gen) {
339+
yield* stream(elt, ...transformers)
340+
}
341+
}
342+
}
343+
}

0 commit comments

Comments
 (0)